Mega Code Archive

 
Categories / Php / HTML
 

Parse a HTML-document for a certain tag and display its content and attributes

This could be used to extract all links (<a href's) from a page for example . <? function html_element_handler($attribs, $content) { echo "<b>Content:</b><br> ".htmlspecialchars($content)."<br>"; echo "<b>Attributes:</b><br>"; while (list($key, $value) = each($attribs)) { echo "$key = $value <br>"; } echo "<p>"; } /* This function opens and parses $html_file for $tag and returns its content and its attributes to the callback function $element_handler. $element_handler is a custom funtion which acts upon the content and the attributes of $tag and gets called everytime $tag is closed. It must accept the following parameters: - $content (content of the element $tag) - $attributes (attributes of $tag */ function html_parse($html_file, $element_handler, $tag) { $fd = fopen($html_file, "r") or die("Error: Unable to open file $html_file"); // Loop until we're at EOF of $fd while (!feof($fd)) { $char = fgetc($fd); if ($open_tag != "") { $content .= $char; } if ($char == "<") { $inside_tag = true; } // We're inside a tag, so add $char to $element (for testing later if // this is $tag) if ($inside_tag) { $element .= $char; } if ($char == ">") { $inside_tag = false; if (ereg ("\<$tag", $element)) { $open_tag = $element; } else { if ($element == "</$tag>") { $tmp_array = ereg_replace("\<$tag", "", $open_tag); $tmp_array = ereg_replace(">", "", $tmp_array); $tmp_array = split ("[$\"] +", $tmp_array); for ($i=0; $i<count($tmp_array); $i++) { $tmp_array[$i] = trim($tmp_array[$i]); $tmp_array[$i] = ereg_replace("\"", "", $tmp_array[$i]); $tmp_attribs = split("=", $tmp_array[$i]); for ($j=0; $j<count($tmp_attribs); $j++) { $attribs[trim($tmp_attribs[$j])] = trim($tmp_attribs [$j+1]); } } $content = eregi_replace("\<$tag([^>]*)([^>]*)>", "", $content); $content = eregi_replace("\</$tag>", "", $content); $element_handler($attribs, $content); $content = ""; $attribs = ""; $tmp_array = ""; $open_tag = ""; } } $element = ""; } } fclose( $fd ); } html_parse("anfrage.html", "html_element_handler", "test"); ?>