]*)\\sclass=\"MsoNormal\")",
RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.Compiled);
Regex whitelistRegex = new Regex("^?(" + whiteListTags + ")>$|^<(b|h)r\\s?/?>$",
RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
Regex whitelistAnchorRegex = new Regex(@"
^]+"")|
(\srel=""nofollow""))*
\s?>$|
^$",
RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
Regex whitelistImageRegex = new Regex(@"
^]*"")|
(\stitle=""[^""<>]*""))*
\s?/?>$",
RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
#endregion
if (String.IsNullOrEmpty(html))
return html;
//Do a previous cleanup, for not not allowed attributes included comming from word
html = cleanupRegex.Replace(html, "");
string tagname;
Match tag;
// match every HTML tag in the input
MatchCollection tags = tagsRegex.Matches(html);
for (int i = tags.Count - 1; i > -1; i--)
{
tag = tags[i];
tagname = tag.Value.ToLowerInvariant();
if (!(whitelistRegex.IsMatch(tagname) || whitelistAnchorRegex.IsMatch(tagname) || whitelistImageRegex.IsMatch(tagname)))
{
html = html.Remove(tag.Index, tag.Length);
System.Diagnostics.Debug.WriteLine("tag sanitized: " + tagname);
}
}
return html;
}
}
}