Mega Code Archive

 
Categories / Python / XML
 

Normalizing Character Data Before Output

import sys from xml.parsers import expat def normalize_whitespace(text):     return " ".join(text.split()) class SimpleParse:     def __init__(self):         self.parser   = expat.ParserCreate()         self.parser.StartElementHandler = self.start_element         self.parser.EndElementHandler = self.end_element         self.parser.CharacterDataHandler = self.character_data         self.cdata = [ ]     def parse(self,file):         self.parser.ParseFile(file)     def print_cdata(self):         txt = normalize_whitespace("".join(self.cdata))         if txt: print normalize_whitespace(txt)         self.cdata = [ ]     def start_element(self,name,attrs):         self.print_cdata()         print "Start:",name,attrs     def character_data(self,data):         self.cdata.append(data)     def end_element(self,name):         self.print_cdata()                 print "End:", name p = SimpleParse() p.parse(open(sys.argv[1]))