[C#] HTML 에서 Text만 추출

CSharp
/// <summary> /// HTML 에서 Text만 추출한다. /// </summary> /// <param name="Html"></param> /// <returns></returns> public static string StripHtml(string Html) { string output = Html; output = System.Text.RegularExpressions.Regex.Replace(output, "<br>", Environment.NewLine); output = System.Text.RegularExpressions.Regex.Replace(output, "<br/>", Environment.NewLine); output = System.Text.RegularExpressions.Regex.Replace(output, "<br />", Environment.NewLine); //get rid of HTML tags output = System.Text.RegularExpressions.Regex.Replace(output, "<[^>]*>", string.Empty); //get rid of multiple blank lines output = System.Text.RegularExpressions.Regex.Replace(output, @"^\s*$\n", string.Empty, System.Text.RegularExpressions.RegexOptions.Multiline); output = System.Text.RegularExpressions.Regex.Replace(output, " ", " "); return output; }
Read More