Mega Code Archive

 
Categories / Python Tutorial / Network
 

Retrieving Text from HTML Documents

import HTMLParser import urllib urlText = [] class parseText(HTMLParser.HTMLParser):     def handle_data(self, data):         if data != '\n':             urlText.append(data) lParser = parseText() lParser.feed(urllib.urlopen("http://www.python.org/index.html").read()) lParser.close() for item in urlText:     print item