• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
3
4from . import handler
5
6from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
7
8
9# ===== XMLREADER =====
10
11class XMLReader:
12    """Interface for reading an XML document using callbacks.
13
14    XMLReader is the interface that an XML parser's SAX2 driver must
15    implement. This interface allows an application to set and query
16    features and properties in the parser, to register event handlers
17    for document processing, and to initiate a document parse.
18
19    All SAX interfaces are assumed to be synchronous: the parse
20    methods must not return until parsing is complete, and readers
21    must wait for an event-handler callback to return before reporting
22    the next event."""
23
24    def __init__(self):
25        self._cont_handler = handler.ContentHandler()
26        self._dtd_handler = handler.DTDHandler()
27        self._ent_handler = handler.EntityResolver()
28        self._err_handler = handler.ErrorHandler()
29
30    def parse(self, source):
31        "Parse an XML document from a system identifier or an InputSource."
32        raise NotImplementedError("This method must be implemented!")
33
34    def getContentHandler(self):
35        "Returns the current ContentHandler."
36        return self._cont_handler
37
38    def setContentHandler(self, handler):
39        "Registers a new object to receive document content events."
40        self._cont_handler = handler
41
42    def getDTDHandler(self):
43        "Returns the current DTD handler."
44        return self._dtd_handler
45
46    def setDTDHandler(self, handler):
47        "Register an object to receive basic DTD-related events."
48        self._dtd_handler = handler
49
50    def getEntityResolver(self):
51        "Returns the current EntityResolver."
52        return self._ent_handler
53
54    def setEntityResolver(self, resolver):
55        "Register an object to resolve external entities."
56        self._ent_handler = resolver
57
58    def getErrorHandler(self):
59        "Returns the current ErrorHandler."
60        return self._err_handler
61
62    def setErrorHandler(self, handler):
63        "Register an object to receive error-message events."
64        self._err_handler = handler
65
66    def setLocale(self, locale):
67        """Allow an application to set the locale for errors and warnings.
68
69        SAX parsers are not required to provide localization for errors
70        and warnings; if they cannot support the requested locale,
71        however, they must raise a SAX exception. Applications may
72        request a locale change in the middle of a parse."""
73        raise SAXNotSupportedException("Locale support not implemented")
74
75    def getFeature(self, name):
76        "Looks up and returns the state of a SAX2 feature."
77        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
78
79    def setFeature(self, name, state):
80        "Sets the state of a SAX2 feature."
81        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
82
83    def getProperty(self, name):
84        "Looks up and returns the value of a SAX2 property."
85        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
86
87    def setProperty(self, name, value):
88        "Sets the value of a SAX2 property."
89        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
90
91class IncrementalParser(XMLReader):
92    """This interface adds three extra methods to the XMLReader
93    interface that allow XML parsers to support incremental
94    parsing. Support for this interface is optional, since not all
95    underlying XML parsers support this functionality.
96
97    When the parser is instantiated it is ready to begin accepting
98    data from the feed method immediately. After parsing has been
99    finished with a call to close the reset method must be called to
100    make the parser ready to accept new data, either from feed or
101    using the parse method.
102
103    Note that these methods must _not_ be called during parsing, that
104    is, after parse has been called and before it returns.
105
106    By default, the class also implements the parse method of the XMLReader
107    interface using the feed, close and reset methods of the
108    IncrementalParser interface as a convenience to SAX 2.0 driver
109    writers."""
110
111    def __init__(self, bufsize=2**16):
112        self._bufsize = bufsize
113        XMLReader.__init__(self)
114
115    def parse(self, source):
116        from . import saxutils
117        source = saxutils.prepare_input_source(source)
118
119        self.prepareParser(source)
120        file = source.getCharacterStream()
121        if file is None:
122            file = source.getByteStream()
123        buffer = file.read(self._bufsize)
124        while buffer:
125            self.feed(buffer)
126            buffer = file.read(self._bufsize)
127        self.close()
128
129    def feed(self, data):
130        """This method gives the raw XML data in the data parameter to
131        the parser and makes it parse the data, emitting the
132        corresponding events. It is allowed for XML constructs to be
133        split across several calls to feed.
134
135        feed may raise SAXException."""
136        raise NotImplementedError("This method must be implemented!")
137
138    def prepareParser(self, source):
139        """This method is called by the parse implementation to allow
140        the SAX 2.0 driver to prepare itself for parsing."""
141        raise NotImplementedError("prepareParser must be overridden!")
142
143    def close(self):
144        """This method is called when the entire XML document has been
145        passed to the parser through the feed method, to notify the
146        parser that there are no more data. This allows the parser to
147        do the final checks on the document and empty the internal
148        data buffer.
149
150        The parser will not be ready to parse another document until
151        the reset method has been called.
152
153        close may raise SAXException."""
154        raise NotImplementedError("This method must be implemented!")
155
156    def reset(self):
157        """This method is called after close has been called to reset
158        the parser so that it is ready to parse new documents. The
159        results of calling parse or feed after close without calling
160        reset are undefined."""
161        raise NotImplementedError("This method must be implemented!")
162
163# ===== LOCATOR =====
164
165class Locator:
166    """Interface for associating a SAX event with a document
167    location. A locator object will return valid results only during
168    calls to DocumentHandler methods; at any other time, the
169    results are unpredictable."""
170
171    def getColumnNumber(self):
172        "Return the column number where the current event ends."
173        return -1
174
175    def getLineNumber(self):
176        "Return the line number where the current event ends."
177        return -1
178
179    def getPublicId(self):
180        "Return the public identifier for the current event."
181        return None
182
183    def getSystemId(self):
184        "Return the system identifier for the current event."
185        return None
186
187# ===== INPUTSOURCE =====
188
189class InputSource:
190    """Encapsulation of the information needed by the XMLReader to
191    read entities.
192
193    This class may include information about the public identifier,
194    system identifier, byte stream (possibly with character encoding
195    information) and/or the character stream of an entity.
196
197    Applications will create objects of this class for use in the
198    XMLReader.parse method and for returning from
199    EntityResolver.resolveEntity.
200
201    An InputSource belongs to the application, the XMLReader is not
202    allowed to modify InputSource objects passed to it from the
203    application, although it may make copies and modify those."""
204
205    def __init__(self, system_id = None):
206        self.__system_id = system_id
207        self.__public_id = None
208        self.__encoding  = None
209        self.__bytefile  = None
210        self.__charfile  = None
211
212    def setPublicId(self, public_id):
213        "Sets the public identifier of this InputSource."
214        self.__public_id = public_id
215
216    def getPublicId(self):
217        "Returns the public identifier of this InputSource."
218        return self.__public_id
219
220    def setSystemId(self, system_id):
221        "Sets the system identifier of this InputSource."
222        self.__system_id = system_id
223
224    def getSystemId(self):
225        "Returns the system identifier of this InputSource."
226        return self.__system_id
227
228    def setEncoding(self, encoding):
229        """Sets the character encoding of this InputSource.
230
231        The encoding must be a string acceptable for an XML encoding
232        declaration (see section 4.3.3 of the XML recommendation).
233
234        The encoding attribute of the InputSource is ignored if the
235        InputSource also contains a character stream."""
236        self.__encoding = encoding
237
238    def getEncoding(self):
239        "Get the character encoding of this InputSource."
240        return self.__encoding
241
242    def setByteStream(self, bytefile):
243        """Set the byte stream (a Python file-like object which does
244        not perform byte-to-character conversion) for this input
245        source.
246
247        The SAX parser will ignore this if there is also a character
248        stream specified, but it will use a byte stream in preference
249        to opening a URI connection itself.
250
251        If the application knows the character encoding of the byte
252        stream, it should set it with the setEncoding method."""
253        self.__bytefile = bytefile
254
255    def getByteStream(self):
256        """Get the byte stream for this input source.
257
258        The getEncoding method will return the character encoding for
259        this byte stream, or None if unknown."""
260        return self.__bytefile
261
262    def setCharacterStream(self, charfile):
263        """Set the character stream for this input source. (The stream
264        must be a Python 2.0 Unicode-wrapped file-like that performs
265        conversion to Unicode strings.)
266
267        If there is a character stream specified, the SAX parser will
268        ignore any byte stream and will not attempt to open a URI
269        connection to the system identifier."""
270        self.__charfile = charfile
271
272    def getCharacterStream(self):
273        "Get the character stream for this input source."
274        return self.__charfile
275
276# ===== ATTRIBUTESIMPL =====
277
278class AttributesImpl:
279
280    def __init__(self, attrs):
281        """Non-NS-aware implementation.
282
283        attrs should be of the form {name : value}."""
284        self._attrs = attrs
285
286    def getLength(self):
287        return len(self._attrs)
288
289    def getType(self, name):
290        return "CDATA"
291
292    def getValue(self, name):
293        return self._attrs[name]
294
295    def getValueByQName(self, name):
296        return self._attrs[name]
297
298    def getNameByQName(self, name):
299        if name not in self._attrs:
300            raise KeyError(name)
301        return name
302
303    def getQNameByName(self, name):
304        if name not in self._attrs:
305            raise KeyError(name)
306        return name
307
308    def getNames(self):
309        return list(self._attrs.keys())
310
311    def getQNames(self):
312        return list(self._attrs.keys())
313
314    def __len__(self):
315        return len(self._attrs)
316
317    def __getitem__(self, name):
318        return self._attrs[name]
319
320    def keys(self):
321        return list(self._attrs.keys())
322
323    def __contains__(self, name):
324        return name in self._attrs
325
326    def get(self, name, alternative=None):
327        return self._attrs.get(name, alternative)
328
329    def copy(self):
330        return self.__class__(self._attrs)
331
332    def items(self):
333        return list(self._attrs.items())
334
335    def values(self):
336        return list(self._attrs.values())
337
338# ===== ATTRIBUTESNSIMPL =====
339
340class AttributesNSImpl(AttributesImpl):
341
342    def __init__(self, attrs, qnames):
343        """NS-aware implementation.
344
345        attrs should be of the form {(ns_uri, lname): value, ...}.
346        qnames of the form {(ns_uri, lname): qname, ...}."""
347        self._attrs = attrs
348        self._qnames = qnames
349
350    def getValueByQName(self, name):
351        for (nsname, qname) in self._qnames.items():
352            if qname == name:
353                return self._attrs[nsname]
354
355        raise KeyError(name)
356
357    def getNameByQName(self, name):
358        for (nsname, qname) in self._qnames.items():
359            if qname == name:
360                return nsname
361
362        raise KeyError(name)
363
364    def getQNameByName(self, name):
365        return self._qnames[name]
366
367    def getQNames(self):
368        return list(self._qnames.values())
369
370    def copy(self):
371        return self.__class__(self._attrs, self._qnames)
372
373
374def _test():
375    XMLReader()
376    IncrementalParser()
377    Locator()
378
379if __name__ == "__main__":
380    _test()
381