• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2This module contains the core classes of version 2.0 of SAX for Python.
3This file provides only default classes with absolutely minimum
4functionality, from which drivers and applications can be subclassed.
5
6Many of these classes are empty and are included only as documentation
7of the interfaces.
8
9$Id$
10"""
11
12version = '2.0beta'
13
14#============================================================================
15#
16# HANDLER INTERFACES
17#
18#============================================================================
19
20# ===== ERRORHANDLER =====
21
22class ErrorHandler:
23    """Basic interface for SAX error handlers.
24
25    If you create an object that implements this interface, then
26    register the object with your XMLReader, the parser will call the
27    methods in your object to report all warnings and errors. There
28    are three levels of errors available: warnings, (possibly)
29    recoverable errors, and unrecoverable errors. All methods take a
30    SAXParseException as the only parameter."""
31
32    def error(self, exception):
33        "Handle a recoverable error."
34        raise exception
35
36    def fatalError(self, exception):
37        "Handle a non-recoverable error."
38        raise exception
39
40    def warning(self, exception):
41        "Handle a warning."
42        print(exception)
43
44
45# ===== CONTENTHANDLER =====
46
47class ContentHandler:
48    """Interface for receiving logical document content events.
49
50    This is the main callback interface in SAX, and the one most
51    important to applications. The order of events in this interface
52    mirrors the order of the information in the document."""
53
54    def __init__(self):
55        self._locator = None
56
57    def setDocumentLocator(self, locator):
58        """Called by the parser to give the application a locator for
59        locating the origin of document events.
60
61        SAX parsers are strongly encouraged (though not absolutely
62        required) to supply a locator: if it does so, it must supply
63        the locator to the application by invoking this method before
64        invoking any of the other methods in the DocumentHandler
65        interface.
66
67        The locator allows the application to determine the end
68        position of any document-related event, even if the parser is
69        not reporting an error. Typically, the application will use
70        this information for reporting its own errors (such as
71        character content that does not match an application's
72        business rules). The information returned by the locator is
73        probably not sufficient for use with a search engine.
74
75        Note that the locator will return correct information only
76        during the invocation of the events in this interface. The
77        application should not attempt to use it at any other time."""
78        self._locator = locator
79
80    def startDocument(self):
81        """Receive notification of the beginning of a document.
82
83        The SAX parser will invoke this method only once, before any
84        other methods in this interface or in DTDHandler (except for
85        setDocumentLocator)."""
86
87    def endDocument(self):
88        """Receive notification of the end of a document.
89
90        The SAX parser will invoke this method only once, and it will
91        be the last method invoked during the parse. The parser shall
92        not invoke this method until it has either abandoned parsing
93        (because of an unrecoverable error) or reached the end of
94        input."""
95
96    def startPrefixMapping(self, prefix, uri):
97        """Begin the scope of a prefix-URI Namespace mapping.
98
99        The information from this event is not necessary for normal
100        Namespace processing: the SAX XML reader will automatically
101        replace prefixes for element and attribute names when the
102        http://xml.org/sax/features/namespaces feature is true (the
103        default).
104
105        There are cases, however, when applications need to use
106        prefixes in character data or in attribute values, where they
107        cannot safely be expanded automatically; the
108        start/endPrefixMapping event supplies the information to the
109        application to expand prefixes in those contexts itself, if
110        necessary.
111
112        Note that start/endPrefixMapping events are not guaranteed to
113        be properly nested relative to each-other: all
114        startPrefixMapping events will occur before the corresponding
115        startElement event, and all endPrefixMapping events will occur
116        after the corresponding endElement event, but their order is
117        not guaranteed."""
118
119    def endPrefixMapping(self, prefix):
120        """End the scope of a prefix-URI mapping.
121
122        See startPrefixMapping for details. This event will always
123        occur after the corresponding endElement event, but the order
124        of endPrefixMapping events is not otherwise guaranteed."""
125
126    def startElement(self, name, attrs):
127        """Signals the start of an element in non-namespace mode.
128
129        The name parameter contains the raw XML 1.0 name of the
130        element type as a string and the attrs parameter holds an
131        instance of the Attributes class containing the attributes of
132        the element."""
133
134    def endElement(self, name):
135        """Signals the end of an element in non-namespace mode.
136
137        The name parameter contains the name of the element type, just
138        as with the startElement event."""
139
140    def startElementNS(self, name, qname, attrs):
141        """Signals the start of an element in namespace mode.
142
143        The name parameter contains the name of the element type as a
144        (uri, localname) tuple, the qname parameter the raw XML 1.0
145        name used in the source document, and the attrs parameter
146        holds an instance of the Attributes class containing the
147        attributes of the element.
148
149        The uri part of the name tuple is None for elements which have
150        no namespace."""
151
152    def endElementNS(self, name, qname):
153        """Signals the end of an element in namespace mode.
154
155        The name parameter contains the name of the element type, just
156        as with the startElementNS event."""
157
158    def characters(self, content):
159        """Receive notification of character data.
160
161        The Parser will call this method to report each chunk of
162        character data. SAX parsers may return all contiguous
163        character data in a single chunk, or they may split it into
164        several chunks; however, all of the characters in any single
165        event must come from the same external entity so that the
166        Locator provides useful information."""
167
168    def ignorableWhitespace(self, whitespace):
169        """Receive notification of ignorable whitespace in element content.
170
171        Validating Parsers must use this method to report each chunk
172        of ignorable whitespace (see the W3C XML 1.0 recommendation,
173        section 2.10): non-validating parsers may also use this method
174        if they are capable of parsing and using content models.
175
176        SAX parsers may return all contiguous whitespace in a single
177        chunk, or they may split it into several chunks; however, all
178        of the characters in any single event must come from the same
179        external entity, so that the Locator provides useful
180        information."""
181
182    def processingInstruction(self, target, data):
183        """Receive notification of a processing instruction.
184
185        The Parser will invoke this method once for each processing
186        instruction found: note that processing instructions may occur
187        before or after the main document element.
188
189        A SAX parser should never report an XML declaration (XML 1.0,
190        section 2.8) or a text declaration (XML 1.0, section 4.3.1)
191        using this method."""
192
193    def skippedEntity(self, name):
194        """Receive notification of a skipped entity.
195
196        The Parser will invoke this method once for each entity
197        skipped. Non-validating processors may skip entities if they
198        have not seen the declarations (because, for example, the
199        entity was declared in an external DTD subset). All processors
200        may skip external entities, depending on the values of the
201        http://xml.org/sax/features/external-general-entities and the
202        http://xml.org/sax/features/external-parameter-entities
203        properties."""
204
205
206# ===== DTDHandler =====
207
208class DTDHandler:
209    """Handle DTD events.
210
211    This interface specifies only those DTD events required for basic
212    parsing (unparsed entities and attributes)."""
213
214    def notationDecl(self, name, publicId, systemId):
215        "Handle a notation declaration event."
216
217    def unparsedEntityDecl(self, name, publicId, systemId, ndata):
218        "Handle an unparsed entity declaration event."
219
220
221# ===== ENTITYRESOLVER =====
222
223class EntityResolver:
224    """Basic interface for resolving entities. If you create an object
225    implementing this interface, then register the object with your
226    Parser, the parser will call the method in your object to
227    resolve all external entities. Note that DefaultHandler implements
228    this interface with the default behaviour."""
229
230    def resolveEntity(self, publicId, systemId):
231        """Resolve the system identifier of an entity and return either
232        the system identifier to read from as a string, or an InputSource
233        to read from."""
234        return systemId
235
236
237#============================================================================
238#
239# CORE FEATURES
240#
241#============================================================================
242
243feature_namespaces = "http://xml.org/sax/features/namespaces"
244# true: Perform Namespace processing (default).
245# false: Optionally do not perform Namespace processing
246#        (implies namespace-prefixes).
247# access: (parsing) read-only; (not parsing) read/write
248
249feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
250# true: Report the original prefixed names and attributes used for Namespace
251#       declarations.
252# false: Do not report attributes used for Namespace declarations, and
253#        optionally do not report original prefixed names (default).
254# access: (parsing) read-only; (not parsing) read/write
255
256feature_string_interning = "http://xml.org/sax/features/string-interning"
257# true: All element names, prefixes, attribute names, Namespace URIs, and
258#       local names are interned using the built-in intern function.
259# false: Names are not necessarily interned, although they may be (default).
260# access: (parsing) read-only; (not parsing) read/write
261
262feature_validation = "http://xml.org/sax/features/validation"
263# true: Report all validation errors (implies external-general-entities and
264#       external-parameter-entities).
265# false: Do not report validation errors.
266# access: (parsing) read-only; (not parsing) read/write
267
268feature_external_ges = "http://xml.org/sax/features/external-general-entities"
269# true: Include all external general (text) entities.
270# false: Do not include external general entities.
271# access: (parsing) read-only; (not parsing) read/write
272
273feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
274# true: Include all external parameter entities, including the external
275#       DTD subset.
276# false: Do not include any external parameter entities, even the external
277#        DTD subset.
278# access: (parsing) read-only; (not parsing) read/write
279
280all_features = [feature_namespaces,
281                feature_namespace_prefixes,
282                feature_string_interning,
283                feature_validation,
284                feature_external_ges,
285                feature_external_pes]
286
287
288#============================================================================
289#
290# CORE PROPERTIES
291#
292#============================================================================
293
294property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
295# data type: xml.sax.sax2lib.LexicalHandler
296# description: An optional extension handler for lexical events like comments.
297# access: read/write
298
299property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
300# data type: xml.sax.sax2lib.DeclHandler
301# description: An optional extension handler for DTD-related events other
302#              than notations and unparsed entities.
303# access: read/write
304
305property_dom_node = "http://xml.org/sax/properties/dom-node"
306# data type: org.w3c.dom.Node
307# description: When parsing, the current DOM node being visited if this is
308#              a DOM iterator; when not parsing, the root DOM node for
309#              iteration.
310# access: (parsing) read-only; (not parsing) read/write
311
312property_xml_string = "http://xml.org/sax/properties/xml-string"
313# data type: String
314# description: The literal string of characters that was the source for
315#              the current event.
316# access: read-only
317
318property_encoding = "http://www.python.org/sax/properties/encoding"
319# data type: String
320# description: The name of the encoding to assume for input data.
321# access: write: set the encoding, e.g. established by a higher-level
322#                protocol. May change during parsing (e.g. after
323#                processing a META tag)
324#         read:  return the current encoding (possibly established through
325#                auto-detection.
326# initial value: UTF-8
327#
328
329property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
330# data type: Dictionary
331# description: The dictionary used to intern common strings in the document
332# access: write: Request that the parser uses a specific dictionary, to
333#                allow interning across different documents
334#         read:  return the current interning dictionary, or None
335#
336
337all_properties = [property_lexical_handler,
338                  property_dom_node,
339                  property_declaration_handler,
340                  property_xml_string,
341                  property_encoding,
342                  property_interning_dict]
343
344
345class LexicalHandler:
346    """Optional SAX2 handler for lexical events.
347
348    This handler is used to obtain lexical information about an XML
349    document, that is, information about how the document was encoded
350    (as opposed to what it contains, which is reported to the
351    ContentHandler), such as comments and CDATA marked section
352    boundaries.
353
354    To set the LexicalHandler of an XMLReader, use the setProperty
355    method with the property identifier
356    'http://xml.org/sax/properties/lexical-handler'."""
357
358    def comment(self, content):
359        """Reports a comment anywhere in the document (including the
360        DTD and outside the document element).
361
362        content is a string that holds the contents of the comment."""
363
364    def startDTD(self, name, public_id, system_id):
365        """Report the start of the DTD declarations, if the document
366        has an associated DTD.
367
368        A startEntity event will be reported before declaration events
369        from the external DTD subset are reported, and this can be
370        used to infer from which subset DTD declarations derive.
371
372        name is the name of the document element type, public_id the
373        public identifier of the DTD (or None if none were supplied)
374        and system_id the system identfier of the external subset (or
375        None if none were supplied)."""
376
377    def endDTD(self):
378        """Signals the end of DTD declarations."""
379
380    def startCDATA(self):
381        """Reports the beginning of a CDATA marked section.
382
383        The contents of the CDATA marked section will be reported
384        through the characters event."""
385
386    def endCDATA(self):
387        """Reports the end of a CDATA marked section."""
388