• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1##
2## deja-vu batman, didn't I write this before?
3## This parser is designed to parse an SGML document, and the default action
4## is just to pass the data through.  Based on TestSGMLParser from sgmllib.py
5## Hmm, actually, make it a flag whether to handle unknown elements
6##
7
8from sgmllib import SGMLParser
9
10class PassSGMLParser(SGMLParser):
11  def __init__(self, fp, pass_unknown=0, verbose=0):
12    self.pass_unknown = pass_unknown
13    self.data = ""
14    self.fp = fp
15    SGMLParser.__init__(self, verbose)
16
17  def handle_data(self, data):
18    self.data = self.data + data
19
20  def flush(self):
21    data = self.data
22    if data:
23      self.data = ""
24      self.write(data)
25
26  def write (self, data):
27    return self.fp.write(data)
28
29  def write_starttag (self, tag, attrs):
30    self.flush()
31    if not attrs:
32      self.write ("<%s>" % tag)
33    else:
34      self.write ("<" + tag)
35      for name, value in attrs:
36        self.write (" " + name + '=' + '"' + value + '"')
37      self.write (">")
38
39  def write_endtag (self, tag):
40    self.flush()
41    self.write ("</%s>" % tag)
42
43  def handle_comment(self, data):
44    # don't pass comments
45    pass
46
47  def unknown_starttag(self, tag, attrs):
48    if self.pass_unknown:
49      self.write_starttag (tag, attrs)
50
51  def unknown_endtag(self, tag):
52    if self.pass_unknown:
53      self.write_endtag(tag)
54
55  def handle_entityref(self, ref):
56    self.flush()
57    self.write ("&%s;" % ref)
58
59  def handle_charref(self, ref):
60    self.flush()
61    self.write ("&#%s;" % ref)
62
63  def close(self):
64    SGMLParser.close(self)
65    self.flush()
66