Mega Code Archive

 
Categories / Php / HTML
 

Extracting URLs Using Tidy

<?php       function dump_urls(tidy_node $node, &$urls = NULL) {             $urls = (is_array($urls)) ? $urls : array();             if(isset($node->id)) {                   if($node->id == TIDY_TAG_A) {                         $urls[] = $node->attribute['href'];                   }             }             if($node->hasChildren()) {                   foreach($node->child as $child) {                         dump_urls($child, $urls);                   }             }                        return $urls;       }       $tidy = tidy_parse_file("http://www.php.net/");       $urls = dump_urls($tidy->body());       print_r($urls); ?>