#region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode ////// http://www.codeproject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13 /// /// ///private static string getOnlyTextFromHtmlCode(string htmlCode) { // htmlCode = htmlCode.Replace("\r\n", @" "); htmlCode = htmlCode.Replace("\r", @" "); htmlCode = htmlCode.Replace("\n", @" "); htmlCode = htmlCode.Replace(@"", Environment.NewLine + Environment.NewLine); htmlCode = htmlCode.Replace(@"", Environment.NewLine + Environment.NewLine); //html comment htmlCode = Regex.Replace( htmlCode, @" ", string.Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); // htmlCode = Regex.Replace(htmlCode, @"
]*>", Environment.NewLine, RegexOptions.Singleline | RegexOptions.IgnoreCase); //tags htmlCode = removeTagFromHtmlCode(@"style", htmlCode); htmlCode = removeTagFromHtmlCode(@"script", htmlCode); //html htmlCode = Regex.Replace( htmlCode, "<(.|\n)+?>", string.Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); //umlaute htmlCode = unescapeHtmlEntities(htmlCode); //whitespaces htmlCode = Regex.Replace( htmlCode, @" +", @" ", RegexOptions.Singleline | RegexOptions.IgnoreCase); return htmlCode; } ////// http://dev.w3.org/html5/html-author/charref /// /// ///private static string unescapeHtmlEntities(string htmlCode) {
htmlCode = htmlCode.Replace(@" ", @" ");
htmlCode = htmlCode.Replace(@"Ä", @"ä");
htmlCode = htmlCode.Replace(@"&absp;", @""); htmlCode = htmlCode.Replace(@"&obsp;", @""); htmlCode = htmlCode.Replace(@"&Obsp;", @""); htmlCode = htmlCode.Replace(@"&ubsp;", @""); htmlCode = htmlCode.Replace(@"&Ubsp;", @""); htmlCode = htmlCode.Replace(@"ß", @"ß");htmlCode = htmlCode.Replace(@"£", @"£");
htmlCode = htmlCode.Replace(@"§", @"§"); htmlCode = htmlCode.Replace(@"©", @"©"); htmlCode = htmlCode.Replace(@"®", @"®"); htmlCode = htmlCode.Replace(@"µ", @"µ"); htmlCode = htmlCode.Replace(@"¶", @"¶"); htmlCode = htmlCode.Replace(@"Ø", @"Ø"); htmlCode = htmlCode.Replace(@"ø", @"Ø"); htmlCode = htmlCode.Replace(@"÷", @"÷"); htmlCode = htmlCode.Replace(@"×", @"×");return htmlCode; } private static string removeTagFromHtmlCode( string tag, string htmlCode) { return Regex.Replace( htmlCode, string.Format(@"<{0}.*? ", tag, tag), string.Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); } #endregion