Mega Code Archive
Categories
/
Java
/
Regular Expressions
Display all URLs in a web page by matching a regular expression that describes the a href= HTML tag
/* This program is a part of the companion code for Core Java 8th ed. (http://horstmann.com/corejava) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see
. */ import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * This program displays all URLs in a web page by matching a regular expression that describes the *
HTML tag. Start the program as
* java HrefMatch URL * @version 1.01 2004-06-04 * @author Cay Horstmann */ public class HrefMatch { public static void main(String[] args) { try { // get URL string from command line or use default String urlString; if (args.length > 0) urlString = args[0]; else urlString = "http://java.sun.com"; // open reader for URL InputStreamReader in = new InputStreamReader(new URL(urlString).openStream()); // read contents into string builder StringBuilder input = new StringBuilder(); int ch; while ((ch = in.read()) != -1) input.append((char) ch); // search for all occurrences of pattern String patternString = "
]*)\\s*>"; Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(input); while (matcher.find()) { int start = matcher.start(); int end = matcher.end(); String match = input.substring(start, end); System.out.println(match); } } catch (IOException e) { e.printStackTrace(); } catch (PatternSyntaxException e) { e.printStackTrace(); } } }