Mega Code Archive

 
Categories / C# / Network
 

Get Links From HTML

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Collections.Specialized; using System.Text.RegularExpressions; using System.Net; public static class LinkCheckerUtilities {     public static StringCollection GetLinksFromHTML(string HtmlContent)     {         StringCollection links = new StringCollection();         MatchCollection AnchorTags = Regex.Matches(HtmlContent.ToLower(), @"(<a.*?>.*?</a>)", RegexOptions.Singleline);         foreach (Match AnchorTag in AnchorTags)         {             string value = AnchorTag.Groups[1].Value;             Match HrefAttribute = Regex.Match(value, @"href=\""(.*?)\""",                 RegexOptions.Singleline);             if (HrefAttribute.Success)             {                 string HrefValue = HrefAttribute.Groups[1].Value;                 if (!links.Contains(HrefValue))                 {                     links.Add(HrefValue);                 }             }         }         return links;     } }