1## 2## deja-vu batman, didn't I write this before? 3## This parser is designed to parse an SGML document, and the default action 4## is just to pass the data through. Based on TestSGMLParser from sgmllib.py 5## Hmm, actually, make it a flag whether to handle unknown elements 6## 7 8from sgmllib import SGMLParser 9 10class PassSGMLParser(SGMLParser): 11 def __init__(self, fp, pass_unknown=0, verbose=0): 12 self.pass_unknown = pass_unknown 13 self.data = "" 14 self.fp = fp 15 SGMLParser.__init__(self, verbose) 16 17 def handle_data(self, data): 18 self.data = self.data + data 19 20 def flush(self): 21 data = self.data 22 if data: 23 self.data = "" 24 self.write(data) 25 26 def write (self, data): 27 return self.fp.write(data) 28 29 def write_starttag (self, tag, attrs): 30 self.flush() 31 if not attrs: 32 self.write ("<%s>" % tag) 33 else: 34 self.write ("<" + tag) 35 for name, value in attrs: 36 self.write (" " + name + '=' + '"' + value + '"') 37 self.write (">") 38 39 def write_endtag (self, tag): 40 self.flush() 41 self.write ("</%s>" % tag) 42 43 def handle_comment(self, data): 44 # don't pass comments 45 pass 46 47 def unknown_starttag(self, tag, attrs): 48 if self.pass_unknown: 49 self.write_starttag (tag, attrs) 50 51 def unknown_endtag(self, tag): 52 if self.pass_unknown: 53 self.write_endtag(tag) 54 55 def handle_entityref(self, ref): 56 self.flush() 57 self.write ("&%s;" % ref) 58 59 def handle_charref(self, ref): 60 self.flush() 61 self.write ("&#%s;" % ref) 62 63 def close(self): 64 SGMLParser.close(self) 65 self.flush() 66