• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import libxml2mod
2import types
3import sys
4
5# The root of all libxml2 errors.
6class libxmlError(Exception): pass
7
8# Type of the wrapper class for the C objects wrappers
9def checkWrapper(obj):
10    try:
11        n = type(_obj).__name__
12        if n != 'PyCObject' and n != 'PyCapsule':
13            return 1
14    except:
15        return 0
16    return 0
17
18#
19# id() is sometimes negative ...
20#
21def pos_id(o):
22    i = id(o)
23    if (i < 0):
24        return (sys.maxsize - i)
25    return i
26
27#
28# Errors raised by the wrappers when some tree handling failed.
29#
30class treeError(libxmlError):
31    def __init__(self, msg):
32        self.msg = msg
33    def __str__(self):
34        return self.msg
35
36class parserError(libxmlError):
37    def __init__(self, msg):
38        self.msg = msg
39    def __str__(self):
40        return self.msg
41
42class uriError(libxmlError):
43    def __init__(self, msg):
44        self.msg = msg
45    def __str__(self):
46        return self.msg
47
48class xpathError(libxmlError):
49    def __init__(self, msg):
50        self.msg = msg
51    def __str__(self):
52        return self.msg
53
54class ioWrapper:
55    def __init__(self, _obj):
56        self.__io = _obj
57        self._o = None
58
59    def io_close(self):
60        if self.__io == None:
61            return(-1)
62        self.__io.close()
63        self.__io = None
64        return(0)
65
66    def io_flush(self):
67        if self.__io == None:
68            return(-1)
69        self.__io.flush()
70        return(0)
71
72    def io_read(self, len = -1):
73        if self.__io == None:
74            return(-1)
75        try:
76            if len < 0:
77                ret = self.__io.read()
78            else:
79                ret = self.__io.read(len)
80        except Exception:
81            import sys
82            e = sys.exc_info()[1]
83            print("failed to read from Python:", type(e))
84            print("on IO:", self.__io)
85            self.__io == None
86            return(-1)
87
88        return(ret)
89
90    def io_write(self, str, len = -1):
91        if self.__io == None:
92            return(-1)
93        if len < 0:
94            return(self.__io.write(str))
95        return(self.__io.write(str, len))
96
97class ioReadWrapper(ioWrapper):
98    def __init__(self, _obj, enc = ""):
99        ioWrapper.__init__(self, _obj)
100        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
101
102    def __del__(self):
103        print("__del__")
104        self.io_close()
105        if self._o != None:
106            libxml2mod.xmlFreeParserInputBuffer(self._o)
107        self._o = None
108
109    def close(self):
110        self.io_close()
111        if self._o != None:
112            libxml2mod.xmlFreeParserInputBuffer(self._o)
113        self._o = None
114
115class ioWriteWrapper(ioWrapper):
116    def __init__(self, _obj, enc = ""):
117#        print "ioWriteWrapper.__init__", _obj
118        if type(_obj) == type(''):
119            print("write io from a string")
120            self.o = None
121        elif type(_obj).__name__ == 'PyCapsule':
122            file = libxml2mod.outputBufferGetPythonFile(_obj)
123            if file != None:
124                ioWrapper.__init__(self, file)
125            else:
126                ioWrapper.__init__(self, _obj)
127            self._o = _obj
128#        elif type(_obj) == types.InstanceType:
129#            print(("write io from instance of %s" % (_obj.__class__)))
130#            ioWrapper.__init__(self, _obj)
131#            self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
132        else:
133            file = libxml2mod.outputBufferGetPythonFile(_obj)
134            if file != None:
135                ioWrapper.__init__(self, file)
136            else:
137                ioWrapper.__init__(self, _obj)
138            self._o = _obj
139
140    def __del__(self):
141#        print "__del__"
142        self.io_close()
143        if self._o != None:
144            libxml2mod.xmlOutputBufferClose(self._o)
145        self._o = None
146
147    def flush(self):
148        self.io_flush()
149        if self._o != None:
150            libxml2mod.xmlOutputBufferClose(self._o)
151        self._o = None
152
153    def close(self):
154        self.io_flush()
155        if self._o != None:
156            libxml2mod.xmlOutputBufferClose(self._o)
157        self._o = None
158
159#
160# Example of a class to handle SAX events
161#
162class SAXCallback:
163    """Base class for SAX handlers"""
164    def startDocument(self):
165        """called at the start of the document"""
166        pass
167
168    def endDocument(self):
169        """called at the end of the document"""
170        pass
171
172    def startElement(self, tag, attrs):
173        """called at the start of every element, tag is the name of
174           the element, attrs is a dictionary of the element's attributes"""
175        pass
176
177    def endElement(self, tag):
178        """called at the start of every element, tag is the name of
179           the element"""
180        pass
181
182    def characters(self, data):
183        """called when character data have been read, data is the string
184           containing the data, multiple consecutive characters() callback
185           are possible."""
186        pass
187
188    def cdataBlock(self, data):
189        """called when CDATA section have been read, data is the string
190           containing the data, multiple consecutive cdataBlock() callback
191           are possible."""
192        pass
193
194    def reference(self, name):
195        """called when an entity reference has been found"""
196        pass
197
198    def ignorableWhitespace(self, data):
199        """called when potentially ignorable white spaces have been found"""
200        pass
201
202    def processingInstruction(self, target, data):
203        """called when a PI has been found, target contains the PI name and
204           data is the associated data in the PI"""
205        pass
206
207    def comment(self, content):
208        """called when a comment has been found, content contains the comment"""
209        pass
210
211    def externalSubset(self, name, externalID, systemID):
212        """called when a DOCTYPE declaration has been found, name is the
213           DTD name and externalID, systemID are the DTD public and system
214           identifier for that DTd if available"""
215        pass
216
217    def internalSubset(self, name, externalID, systemID):
218        """called when a DOCTYPE declaration has been found, name is the
219           DTD name and externalID, systemID are the DTD public and system
220           identifier for that DTD if available"""
221        pass
222
223    def entityDecl(self, name, type, externalID, systemID, content):
224        """called when an ENTITY declaration has been found, name is the
225           entity name and externalID, systemID are the entity public and
226           system identifier for that entity if available, type indicates
227           the entity type, and content reports it's string content"""
228        pass
229
230    def notationDecl(self, name, externalID, systemID):
231        """called when an NOTATION declaration has been found, name is the
232           notation name and externalID, systemID are the notation public and
233           system identifier for that notation if available"""
234        pass
235
236    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
237        """called when an ATTRIBUTE definition has been found"""
238        pass
239
240    def elementDecl(self, name, type, content):
241        """called when an ELEMENT definition has been found"""
242        pass
243
244    def entityDecl(self, name, publicId, systemID, notationName):
245        """called when an unparsed ENTITY declaration has been found,
246           name is the entity name and publicId,, systemID are the entity
247           public and system identifier for that entity if available,
248           and notationName indicate the associated NOTATION"""
249        pass
250
251    def warning(self, msg):
252        #print msg
253        pass
254
255    def error(self, msg):
256        raise parserError(msg)
257
258    def fatalError(self, msg):
259        raise parserError(msg)
260
261#
262# This class is the ancestor of all the Node classes. It provides
263# the basic functionalities shared by all nodes (and handle
264# gracefylly the exception), like name, navigation in the tree,
265# doc reference, content access and serializing to a string or URI
266#
267class xmlCore:
268    def __init__(self, _obj=None):
269        if _obj != None:
270            self._o = _obj;
271            return
272        self._o = None
273
274    def __eq__(self, other):
275        if other == None:
276            return False
277        ret = libxml2mod.compareNodesEqual(self._o, other._o)
278        if ret == None:
279            return False
280        return ret == True
281    def __ne__(self, other):
282        if other == None:
283            return True
284        ret = libxml2mod.compareNodesEqual(self._o, other._o)
285        return not ret
286    def __hash__(self):
287        ret = libxml2mod.nodeHash(self._o)
288        return ret
289
290    def __str__(self):
291        return self.serialize()
292    def get_parent(self):
293        ret = libxml2mod.parent(self._o)
294        if ret == None:
295            return None
296        return nodeWrap(ret)
297    def get_children(self):
298        ret = libxml2mod.children(self._o)
299        if ret == None:
300            return None
301        return nodeWrap(ret)
302    def get_last(self):
303        ret = libxml2mod.last(self._o)
304        if ret == None:
305            return None
306        return nodeWrap(ret)
307    def get_next(self):
308        ret = libxml2mod.next(self._o)
309        if ret == None:
310            return None
311        return nodeWrap(ret)
312    def get_properties(self):
313        ret = libxml2mod.properties(self._o)
314        if ret == None:
315            return None
316        return xmlAttr(_obj=ret)
317    def get_prev(self):
318        ret = libxml2mod.prev(self._o)
319        if ret == None:
320            return None
321        return nodeWrap(ret)
322    def get_content(self):
323        return libxml2mod.xmlNodeGetContent(self._o)
324    getContent = get_content  # why is this duplicate naming needed ?
325    def get_name(self):
326        return libxml2mod.name(self._o)
327    def get_type(self):
328        return libxml2mod.type(self._o)
329    def get_doc(self):
330        ret = libxml2mod.doc(self._o)
331        if ret == None:
332            if self.type in ["document_xml", "document_html"]:
333                return xmlDoc(_obj=self._o)
334            else:
335                return None
336        return xmlDoc(_obj=ret)
337    #
338    # Those are common attributes to nearly all type of nodes
339    # defined as python2 properties
340    #
341    import sys
342    if float(sys.version[0:3]) < 2.2:
343        def __getattr__(self, attr):
344            if attr == "parent":
345                ret = libxml2mod.parent(self._o)
346                if ret == None:
347                    return None
348                return nodeWrap(ret)
349            elif attr == "properties":
350                ret = libxml2mod.properties(self._o)
351                if ret == None:
352                    return None
353                return xmlAttr(_obj=ret)
354            elif attr == "children":
355                ret = libxml2mod.children(self._o)
356                if ret == None:
357                    return None
358                return nodeWrap(ret)
359            elif attr == "last":
360                ret = libxml2mod.last(self._o)
361                if ret == None:
362                    return None
363                return nodeWrap(ret)
364            elif attr == "next":
365                ret = libxml2mod.next(self._o)
366                if ret == None:
367                    return None
368                return nodeWrap(ret)
369            elif attr == "prev":
370                ret = libxml2mod.prev(self._o)
371                if ret == None:
372                    return None
373                return nodeWrap(ret)
374            elif attr == "content":
375                return libxml2mod.xmlNodeGetContent(self._o)
376            elif attr == "name":
377                return libxml2mod.name(self._o)
378            elif attr == "type":
379                return libxml2mod.type(self._o)
380            elif attr == "doc":
381                ret = libxml2mod.doc(self._o)
382                if ret == None:
383                    if self.type == "document_xml" or self.type == "document_html":
384                        return xmlDoc(_obj=self._o)
385                    else:
386                        return None
387                return xmlDoc(_obj=ret)
388            raise AttributeError(attr)
389    else:
390        parent = property(get_parent, None, None, "Parent node")
391        children = property(get_children, None, None, "First child node")
392        last = property(get_last, None, None, "Last sibling node")
393        next = property(get_next, None, None, "Next sibling node")
394        prev = property(get_prev, None, None, "Previous sibling node")
395        properties = property(get_properties, None, None, "List of properies")
396        content = property(get_content, None, None, "Content of this node")
397        name = property(get_name, None, None, "Node name")
398        type = property(get_type, None, None, "Node type")
399        doc = property(get_doc, None, None, "The document this node belongs to")
400
401    #
402    # Serialization routines, the optional arguments have the following
403    # meaning:
404    #     encoding: string to ask saving in a specific encoding
405    #     indent: if 1 the serializer is asked to indent the output
406    #
407    def serialize(self, encoding = None, format = 0):
408        return libxml2mod.serializeNode(self._o, encoding, format)
409    def saveTo(self, file, encoding = None, format = 0):
410        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
411
412    #
413    # Canonicalization routines:
414    #
415    #   nodes: the node set (tuple or list) to be included in the
416    #     canonized image or None if all document nodes should be
417    #     included.
418    #   exclusive: the exclusive flag (0 - non-exclusive
419    #     canonicalization; otherwise - exclusive canonicalization)
420    #   prefixes: the list of inclusive namespace prefixes (strings),
421    #     or None if there is no inclusive namespaces (only for
422    #     exclusive canonicalization, ignored otherwise)
423    #   with_comments: include comments in the result (!=0) or not
424    #     (==0)
425    def c14nMemory(self,
426                   nodes=None,
427                   exclusive=0,
428                   prefixes=None,
429                   with_comments=0):
430        if nodes:
431            nodes = [n._o for n in nodes]
432        return libxml2mod.xmlC14NDocDumpMemory(
433            self.get_doc()._o,
434            nodes,
435            exclusive != 0,
436            prefixes,
437            with_comments != 0)
438    def c14nSaveTo(self,
439                   file,
440                   nodes=None,
441                   exclusive=0,
442                   prefixes=None,
443                   with_comments=0):
444        if nodes:
445            nodes = [n._o for n in nodes]
446        return libxml2mod.xmlC14NDocSaveTo(
447            self.get_doc()._o,
448            nodes,
449            exclusive != 0,
450            prefixes,
451            with_comments != 0,
452            file)
453
454    #
455    # Selecting nodes using XPath, a bit slow because the context
456    # is allocated/freed every time but convenient.
457    #
458    def xpathEval(self, expr):
459        doc = self.doc
460        if doc == None:
461            return None
462        ctxt = doc.xpathNewContext()
463        ctxt.setContextNode(self)
464        res = ctxt.xpathEval(expr)
465        ctxt.xpathFreeContext()
466        return res
467
468#    #
469#    # Selecting nodes using XPath, faster because the context
470#    # is allocated just once per xmlDoc.
471#    #
472#    # Removed: DV memleaks c.f. #126735
473#    #
474#    def xpathEval2(self, expr):
475#        doc = self.doc
476#        if doc == None:
477#            return None
478#        try:
479#            doc._ctxt.setContextNode(self)
480#        except:
481#            doc._ctxt = doc.xpathNewContext()
482#            doc._ctxt.setContextNode(self)
483#        res = doc._ctxt.xpathEval(expr)
484#        return res
485    def xpathEval2(self, expr):
486        return self.xpathEval(expr)
487
488    # Remove namespaces
489    def removeNsDef(self, href):
490        """
491        Remove a namespace definition from a node.  If href is None,
492        remove all of the ns definitions on that node.  The removed
493        namespaces are returned as a linked list.
494
495        Note: If any child nodes referred to the removed namespaces,
496        they will be left with dangling links.  You should call
497        renconciliateNs() to fix those pointers.
498
499        Note: This method does not free memory taken by the ns
500        definitions.  You will need to free it manually with the
501        freeNsList() method on the returns xmlNs object.
502        """
503
504        ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
505        if ret is None:return None
506        __tmp = xmlNs(_obj=ret)
507        return __tmp
508
509    # support for python2 iterators
510    def walk_depth_first(self):
511        return xmlCoreDepthFirstItertor(self)
512    def walk_breadth_first(self):
513        return xmlCoreBreadthFirstItertor(self)
514    __iter__ = walk_depth_first
515
516    def free(self):
517        try:
518            self.doc._ctxt.xpathFreeContext()
519        except:
520            pass
521        libxml2mod.xmlFreeDoc(self._o)
522
523
524#
525# implements the depth-first iterator for libxml2 DOM tree
526#
527class xmlCoreDepthFirstItertor:
528    def __init__(self, node):
529        self.node = node
530        self.parents = []
531    def __iter__(self):
532        return self
533    def __next__(self):
534        while 1:
535            if self.node:
536                ret = self.node
537                self.parents.append(self.node)
538                self.node = self.node.children
539                return ret
540            try:
541                parent = self.parents.pop()
542            except IndexError:
543                raise StopIteration
544            self.node = parent.next
545    next = __next__
546
547#
548# implements the breadth-first iterator for libxml2 DOM tree
549#
550class xmlCoreBreadthFirstItertor:
551    def __init__(self, node):
552        self.node = node
553        self.parents = []
554    def __iter__(self):
555        return self
556    def __next__(self):
557        while 1:
558            if self.node:
559                ret = self.node
560                self.parents.append(self.node)
561                self.node = self.node.next
562                return ret
563            try:
564                parent = self.parents.pop()
565            except IndexError:
566                raise StopIteration
567            self.node = parent.children
568    next = __next__
569
570#
571# converters to present a nicer view of the XPath returns
572#
573def nodeWrap(o):
574    # TODO try to cast to the most appropriate node class
575    name = libxml2mod.type(o)
576    if name == "element" or name == "text":
577        return xmlNode(_obj=o)
578    if name == "attribute":
579        return xmlAttr(_obj=o)
580    if name[0:8] == "document":
581        return xmlDoc(_obj=o)
582    if name == "namespace":
583        return xmlNs(_obj=o)
584    if name == "elem_decl":
585        return xmlElement(_obj=o)
586    if name == "attribute_decl":
587        return xmlAttribute(_obj=o)
588    if name == "entity_decl":
589        return xmlEntity(_obj=o)
590    if name == "dtd":
591        return xmlDtd(_obj=o)
592    return xmlNode(_obj=o)
593
594def xpathObjectRet(o):
595    otype = type(o)
596    if otype == type([]):
597        ret = list(map(xpathObjectRet, o))
598        return ret
599    elif otype == type(()):
600        ret = list(map(xpathObjectRet, o))
601        return tuple(ret)
602    elif otype == type('') or otype == type(0) or otype == type(0.0):
603        return o
604    else:
605        return nodeWrap(o)
606
607#
608# register an XPath function
609#
610def registerXPathFunction(ctxt, name, ns_uri, f):
611    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
612
613#
614# For the xmlTextReader parser configuration
615#
616PARSER_LOADDTD=1
617PARSER_DEFAULTATTRS=2
618PARSER_VALIDATE=3
619PARSER_SUBST_ENTITIES=4
620
621#
622# For the error callback severities
623#
624PARSER_SEVERITY_VALIDITY_WARNING=1
625PARSER_SEVERITY_VALIDITY_ERROR=2
626PARSER_SEVERITY_WARNING=3
627PARSER_SEVERITY_ERROR=4
628
629#
630# register the libxml2 error handler
631#
632def registerErrorHandler(f, ctx):
633    """Register a Python written function to for error reporting.
634       The function is called back as f(ctx, error). """
635    import sys
636    if 'libxslt' not in sys.modules:
637        # normal behaviour when libxslt is not imported
638        ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
639    else:
640        # when libxslt is already imported, one must
641        # use libxst's error handler instead
642        import libxslt
643        ret = libxslt.registerErrorHandler(f,ctx)
644    return ret
645
646class parserCtxtCore:
647
648    def __init__(self, _obj=None):
649        if _obj != None:
650            self._o = _obj;
651            return
652        self._o = None
653
654    def __del__(self):
655        if self._o != None:
656            libxml2mod.xmlFreeParserCtxt(self._o)
657        self._o = None
658
659    def setErrorHandler(self,f,arg):
660        """Register an error handler that will be called back as
661           f(arg,msg,severity,reserved).
662
663           @reserved is currently always None."""
664        libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
665
666    def getErrorHandler(self):
667        """Return (f,arg) as previously registered with setErrorHandler
668           or (None,None)."""
669        return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
670
671    def addLocalCatalog(self, uri):
672        """Register a local catalog with the parser"""
673        return libxml2mod.addLocalCatalog(self._o, uri)
674
675
676class ValidCtxtCore:
677
678    def __init__(self, *args, **kw):
679        pass
680
681    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
682        """
683        Register error and warning handlers for DTD validation.
684        These will be called back as f(msg,arg)
685        """
686        libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
687
688
689class SchemaValidCtxtCore:
690
691    def __init__(self, *args, **kw):
692        pass
693
694    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
695        """
696        Register error and warning handlers for Schema validation.
697        These will be called back as f(msg,arg)
698        """
699        libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
700
701
702class relaxNgValidCtxtCore:
703
704    def __init__(self, *args, **kw):
705        pass
706
707    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
708        """
709        Register error and warning handlers for RelaxNG validation.
710        These will be called back as f(msg,arg)
711        """
712        libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
713
714
715def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
716    """Intermediate callback to wrap the locator"""
717    (f,arg) = xxx_todo_changeme
718    return f(arg,msg,severity,xmlTextReaderLocator(locator))
719
720class xmlTextReaderCore:
721
722    def __init__(self, _obj=None):
723        self.input = None
724        if _obj != None:self._o = _obj;return
725        self._o = None
726
727    def __del__(self):
728        if self._o != None:
729            libxml2mod.xmlFreeTextReader(self._o)
730        self._o = None
731
732    def SetErrorHandler(self,f,arg):
733        """Register an error handler that will be called back as
734           f(arg,msg,severity,locator)."""
735        if f is None:
736            libxml2mod.xmlTextReaderSetErrorHandler(\
737                self._o,None,None)
738        else:
739            libxml2mod.xmlTextReaderSetErrorHandler(\
740                self._o,_xmlTextReaderErrorFunc,(f,arg))
741
742    def GetErrorHandler(self):
743        """Return (f,arg) as previously registered with setErrorHandler
744           or (None,None)."""
745        f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
746        if f is None:
747            return None,None
748        else:
749            # assert f is _xmlTextReaderErrorFunc
750            return arg
751
752#
753# The cleanup now goes though a wrapper in libxml.c
754#
755def cleanupParser():
756    libxml2mod.xmlPythonCleanupParser()
757
758#
759# The interface to xmlRegisterInputCallbacks.
760# Since this API does not allow to pass a data object along with
761# match/open callbacks, it is necessary to maintain a list of all
762# Python callbacks.
763#
764__input_callbacks = []
765def registerInputCallback(func):
766    def findOpenCallback(URI):
767        for cb in reversed(__input_callbacks):
768            o = cb(URI)
769            if o is not None:
770                return o
771    libxml2mod.xmlRegisterInputCallback(findOpenCallback)
772    __input_callbacks.append(func)
773
774def popInputCallbacks():
775    # First pop python-level callbacks, when no more available - start
776    # popping built-in ones.
777    if len(__input_callbacks) > 0:
778        __input_callbacks.pop()
779    if len(__input_callbacks) == 0:
780        libxml2mod.xmlUnregisterInputCallback()
781
782# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
783#
784# Everything before this line comes from libxml.py
785# Everything after this line is automatically generated
786#
787# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
788
789