• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*--------------------------------------------------------------------
2  * Licensed to PSF under a Contributor Agreement.
3  * See https://www.python.org/psf/license for licensing details.
4  *
5  * _elementtree - C accelerator for xml.etree.ElementTree
6  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7  * Copyright (c) 1999-2009 by Fredrik Lundh.
8  *
9  * info@pythonware.com
10  * http://www.pythonware.com
11  *--------------------------------------------------------------------
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 
16 #include "Python.h"
17 #include "structmember.h"         // PyMemberDef
18 
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21 
22 /* An element can hold this many children without extra memory
23    allocations. */
24 #define STATIC_CHILDREN 4
25 
26 /* For best performance, chose a value so that 80-90% of all nodes
27    have no more than the given number of children.  Set this to zero
28    to minimize the size of the element structure itself (this only
29    helps if you have lots of leaf nodes with attributes). */
30 
31 /* Also note that pymalloc always allocates blocks in multiples of
32    eight bytes.  For the current C version of ElementTree, this means
33    that the number of children should be an even number, at least on
34    32-bit platforms. */
35 
36 /* -------------------------------------------------------------------- */
37 
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48 
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55 
56 /* macros used to store 'join' flags in string object pointers.  note
57    that all use of text and tail as object pointers must be wrapped in
58    JOIN_OBJ.  see comments in the ElementObject definition for more
59    info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63 
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68     PyObject *tmp = JOIN_OBJ(*p);
69     *p = new_joined_ptr;
70     Py_DECREF(tmp);
71 }
72 
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74  * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78     if (*p) {
79         _set_joined_ptr(p, NULL);
80     }
81 }
82 
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88 
89 
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92     PyObject *parseerror_obj;
93     PyObject *deepcopy_obj;
94     PyObject *elementpath_obj;
95     PyObject *comment_factory;
96     PyObject *pi_factory;
97 } elementtreestate;
98 
99 static struct PyModuleDef elementtreemodule;
100 
101 /* Given a module object (assumed to be _elementtree), get its per-module
102  * state.
103  */
104 static inline elementtreestate*
get_elementtree_state(PyObject * module)105 get_elementtree_state(PyObject *module)
106 {
107     void *state = PyModule_GetState(module);
108     assert(state != NULL);
109     return (elementtreestate *)state;
110 }
111 
112 /* Find the module instance imported in the currently running sub-interpreter
113  * and get its state.
114  */
115 #define ET_STATE_GLOBAL \
116     ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117 
118 static int
elementtree_clear(PyObject * m)119 elementtree_clear(PyObject *m)
120 {
121     elementtreestate *st = get_elementtree_state(m);
122     Py_CLEAR(st->parseerror_obj);
123     Py_CLEAR(st->deepcopy_obj);
124     Py_CLEAR(st->elementpath_obj);
125     Py_CLEAR(st->comment_factory);
126     Py_CLEAR(st->pi_factory);
127     return 0;
128 }
129 
130 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)131 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132 {
133     elementtreestate *st = get_elementtree_state(m);
134     Py_VISIT(st->parseerror_obj);
135     Py_VISIT(st->deepcopy_obj);
136     Py_VISIT(st->elementpath_obj);
137     Py_VISIT(st->comment_factory);
138     Py_VISIT(st->pi_factory);
139     return 0;
140 }
141 
142 static void
elementtree_free(void * m)143 elementtree_free(void *m)
144 {
145     elementtree_clear((PyObject *)m);
146 }
147 
148 /* helpers */
149 
150 LOCAL(PyObject*)
list_join(PyObject * list)151 list_join(PyObject* list)
152 {
153     /* join list elements */
154     PyObject* joiner;
155     PyObject* result;
156 
157     joiner = PyUnicode_FromStringAndSize("", 0);
158     if (!joiner)
159         return NULL;
160     result = PyUnicode_Join(joiner, list);
161     Py_DECREF(joiner);
162     return result;
163 }
164 
165 /* Is the given object an empty dictionary?
166 */
167 static int
is_empty_dict(PyObject * obj)168 is_empty_dict(PyObject *obj)
169 {
170     return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
171 }
172 
173 
174 /* -------------------------------------------------------------------- */
175 /* the Element type */
176 
177 typedef struct {
178 
179     /* attributes (a dictionary object), or NULL if no attributes */
180     PyObject* attrib;
181 
182     /* child elements */
183     Py_ssize_t length; /* actual number of items */
184     Py_ssize_t allocated; /* allocated items */
185 
186     /* this either points to _children or to a malloced buffer */
187     PyObject* *children;
188 
189     PyObject* _children[STATIC_CHILDREN];
190 
191 } ElementObjectExtra;
192 
193 typedef struct {
194     PyObject_HEAD
195 
196     /* element tag (a string). */
197     PyObject* tag;
198 
199     /* text before first child.  note that this is a tagged pointer;
200        use JOIN_OBJ to get the object pointer.  the join flag is used
201        to distinguish lists created by the tree builder from lists
202        assigned to the attribute by application code; the former
203        should be joined before being returned to the user, the latter
204        should be left intact. */
205     PyObject* text;
206 
207     /* text after this element, in parent.  note that this is a tagged
208        pointer; use JOIN_OBJ to get the object pointer. */
209     PyObject* tail;
210 
211     ElementObjectExtra* extra;
212 
213     PyObject *weakreflist; /* For tp_weaklistoffset */
214 
215 } ElementObject;
216 
217 
218 #define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
219 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220 
221 
222 /* -------------------------------------------------------------------- */
223 /* Element constructors and destructor */
224 
225 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)226 create_extra(ElementObject* self, PyObject* attrib)
227 {
228     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
229     if (!self->extra) {
230         PyErr_NoMemory();
231         return -1;
232     }
233 
234     Py_XINCREF(attrib);
235     self->extra->attrib = attrib;
236 
237     self->extra->length = 0;
238     self->extra->allocated = STATIC_CHILDREN;
239     self->extra->children = self->extra->_children;
240 
241     return 0;
242 }
243 
244 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)245 dealloc_extra(ElementObjectExtra *extra)
246 {
247     Py_ssize_t i;
248 
249     if (!extra)
250         return;
251 
252     Py_XDECREF(extra->attrib);
253 
254     for (i = 0; i < extra->length; i++)
255         Py_DECREF(extra->children[i]);
256 
257     if (extra->children != extra->_children)
258         PyObject_Free(extra->children);
259 
260     PyObject_Free(extra);
261 }
262 
263 LOCAL(void)
clear_extra(ElementObject * self)264 clear_extra(ElementObject* self)
265 {
266     ElementObjectExtra *myextra;
267 
268     if (!self->extra)
269         return;
270 
271     /* Avoid DECREFs calling into this code again (cycles, etc.)
272     */
273     myextra = self->extra;
274     self->extra = NULL;
275 
276     dealloc_extra(myextra);
277 }
278 
279 /* Convenience internal function to create new Element objects with the given
280  * tag and attributes.
281 */
282 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)283 create_new_element(PyObject* tag, PyObject* attrib)
284 {
285     ElementObject* self;
286 
287     self = PyObject_GC_New(ElementObject, &Element_Type);
288     if (self == NULL)
289         return NULL;
290     self->extra = NULL;
291 
292     Py_INCREF(tag);
293     self->tag = tag;
294 
295     Py_INCREF(Py_None);
296     self->text = Py_None;
297 
298     Py_INCREF(Py_None);
299     self->tail = Py_None;
300 
301     self->weakreflist = NULL;
302 
303     ALLOC(sizeof(ElementObject), "create element");
304     PyObject_GC_Track(self);
305 
306     if (attrib != NULL && !is_empty_dict(attrib)) {
307         if (create_extra(self, attrib) < 0) {
308             Py_DECREF(self);
309             return NULL;
310         }
311     }
312 
313     return (PyObject*) self;
314 }
315 
316 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)317 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318 {
319     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320     if (e != NULL) {
321         Py_INCREF(Py_None);
322         e->tag = Py_None;
323 
324         Py_INCREF(Py_None);
325         e->text = Py_None;
326 
327         Py_INCREF(Py_None);
328         e->tail = Py_None;
329 
330         e->extra = NULL;
331         e->weakreflist = NULL;
332     }
333     return (PyObject *)e;
334 }
335 
336 /* Helper function for extracting the attrib dictionary from a keywords dict.
337  * This is required by some constructors/functions in this module that can
338  * either accept attrib as a keyword argument or all attributes splashed
339  * directly into *kwds.
340  *
341  * Return a dictionary with the content of kwds merged into the content of
342  * attrib. If there is no attrib keyword, return a copy of kwds.
343  */
344 static PyObject*
get_attrib_from_keywords(PyObject * kwds)345 get_attrib_from_keywords(PyObject *kwds)
346 {
347     PyObject *attrib_str = PyUnicode_FromString("attrib");
348     if (attrib_str == NULL) {
349         return NULL;
350     }
351     PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
352 
353     if (attrib) {
354         /* If attrib was found in kwds, copy its value and remove it from
355          * kwds
356          */
357         if (!PyDict_Check(attrib)) {
358             Py_DECREF(attrib_str);
359             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360                          Py_TYPE(attrib)->tp_name);
361             return NULL;
362         }
363         attrib = PyDict_Copy(attrib);
364         if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365             Py_DECREF(attrib);
366             attrib = NULL;
367         }
368     }
369     else if (!PyErr_Occurred()) {
370         attrib = PyDict_New();
371     }
372 
373     Py_DECREF(attrib_str);
374 
375     if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376         Py_DECREF(attrib);
377         return NULL;
378     }
379     return attrib;
380 }
381 
382 /*[clinic input]
383 module _elementtree
384 class _elementtree.Element "ElementObject *" "&Element_Type"
385 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387 [clinic start generated code]*/
388 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389 
390 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)391 element_init(PyObject *self, PyObject *args, PyObject *kwds)
392 {
393     PyObject *tag;
394     PyObject *attrib = NULL;
395     ElementObject *self_elem;
396 
397     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398         return -1;
399 
400     if (attrib) {
401         /* attrib passed as positional arg */
402         attrib = PyDict_Copy(attrib);
403         if (!attrib)
404             return -1;
405         if (kwds) {
406             if (PyDict_Update(attrib, kwds) < 0) {
407                 Py_DECREF(attrib);
408                 return -1;
409             }
410         }
411     } else if (kwds) {
412         /* have keywords args */
413         attrib = get_attrib_from_keywords(kwds);
414         if (!attrib)
415             return -1;
416     }
417 
418     self_elem = (ElementObject *)self;
419 
420     if (attrib != NULL && !is_empty_dict(attrib)) {
421         if (create_extra(self_elem, attrib) < 0) {
422             Py_DECREF(attrib);
423             return -1;
424         }
425     }
426 
427     /* We own a reference to attrib here and it's no longer needed. */
428     Py_XDECREF(attrib);
429 
430     /* Replace the objects already pointed to by tag, text and tail. */
431     Py_INCREF(tag);
432     Py_XSETREF(self_elem->tag, tag);
433 
434     Py_INCREF(Py_None);
435     _set_joined_ptr(&self_elem->text, Py_None);
436 
437     Py_INCREF(Py_None);
438     _set_joined_ptr(&self_elem->tail, Py_None);
439 
440     return 0;
441 }
442 
443 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)444 element_resize(ElementObject* self, Py_ssize_t extra)
445 {
446     Py_ssize_t size;
447     PyObject* *children;
448 
449     assert(extra >= 0);
450     /* make sure self->children can hold the given number of extra
451        elements.  set an exception and return -1 if allocation failed */
452 
453     if (!self->extra) {
454         if (create_extra(self, NULL) < 0)
455             return -1;
456     }
457 
458     size = self->extra->length + extra;  /* never overflows */
459 
460     if (size > self->extra->allocated) {
461         /* use Python 2.4's list growth strategy */
462         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
463         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
464          * which needs at least 4 bytes.
465          * Although it's a false alarm always assume at least one child to
466          * be safe.
467          */
468         size = size ? size : 1;
469         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470             goto nomemory;
471         if (self->extra->children != self->extra->_children) {
472             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
473              * "children", which needs at least 4 bytes. Although it's a
474              * false alarm always assume at least one child to be safe.
475              */
476             children = PyObject_Realloc(self->extra->children,
477                                         size * sizeof(PyObject*));
478             if (!children)
479                 goto nomemory;
480         } else {
481             children = PyObject_Malloc(size * sizeof(PyObject*));
482             if (!children)
483                 goto nomemory;
484             /* copy existing children from static area to malloc buffer */
485             memcpy(children, self->extra->children,
486                    self->extra->length * sizeof(PyObject*));
487         }
488         self->extra->children = children;
489         self->extra->allocated = size;
490     }
491 
492     return 0;
493 
494   nomemory:
495     PyErr_NoMemory();
496     return -1;
497 }
498 
499 LOCAL(void)
raise_type_error(PyObject * element)500 raise_type_error(PyObject *element)
501 {
502     PyErr_Format(PyExc_TypeError,
503                  "expected an Element, not \"%.200s\"",
504                  Py_TYPE(element)->tp_name);
505 }
506 
507 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)508 element_add_subelement(ElementObject* self, PyObject* element)
509 {
510     /* add a child element to a parent */
511 
512     if (!Element_Check(element)) {
513         raise_type_error(element);
514         return -1;
515     }
516 
517     if (element_resize(self, 1) < 0)
518         return -1;
519 
520     Py_INCREF(element);
521     self->extra->children[self->extra->length] = element;
522 
523     self->extra->length++;
524 
525     return 0;
526 }
527 
528 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)529 element_get_attrib(ElementObject* self)
530 {
531     /* return borrowed reference to attrib dictionary */
532     /* note: this function assumes that the extra section exists */
533 
534     PyObject* res = self->extra->attrib;
535 
536     if (!res) {
537         /* create missing dictionary */
538         res = self->extra->attrib = PyDict_New();
539     }
540 
541     return res;
542 }
543 
544 LOCAL(PyObject*)
element_get_text(ElementObject * self)545 element_get_text(ElementObject* self)
546 {
547     /* return borrowed reference to text attribute */
548 
549     PyObject *res = self->text;
550 
551     if (JOIN_GET(res)) {
552         res = JOIN_OBJ(res);
553         if (PyList_CheckExact(res)) {
554             PyObject *tmp = list_join(res);
555             if (!tmp)
556                 return NULL;
557             self->text = tmp;
558             Py_DECREF(res);
559             res = tmp;
560         }
561     }
562 
563     return res;
564 }
565 
566 LOCAL(PyObject*)
element_get_tail(ElementObject * self)567 element_get_tail(ElementObject* self)
568 {
569     /* return borrowed reference to text attribute */
570 
571     PyObject *res = self->tail;
572 
573     if (JOIN_GET(res)) {
574         res = JOIN_OBJ(res);
575         if (PyList_CheckExact(res)) {
576             PyObject *tmp = list_join(res);
577             if (!tmp)
578                 return NULL;
579             self->tail = tmp;
580             Py_DECREF(res);
581             res = tmp;
582         }
583     }
584 
585     return res;
586 }
587 
588 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)589 subelement(PyObject *self, PyObject *args, PyObject *kwds)
590 {
591     PyObject* elem;
592 
593     ElementObject* parent;
594     PyObject* tag;
595     PyObject* attrib = NULL;
596     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597                           &Element_Type, &parent, &tag,
598                           &PyDict_Type, &attrib)) {
599         return NULL;
600     }
601 
602     if (attrib) {
603         /* attrib passed as positional arg */
604         attrib = PyDict_Copy(attrib);
605         if (!attrib)
606             return NULL;
607         if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608             Py_DECREF(attrib);
609             return NULL;
610         }
611     } else if (kwds) {
612         /* have keyword args */
613         attrib = get_attrib_from_keywords(kwds);
614         if (!attrib)
615             return NULL;
616     } else {
617         /* no attrib arg, no kwds, so no attribute */
618     }
619 
620     elem = create_new_element(tag, attrib);
621     Py_XDECREF(attrib);
622     if (elem == NULL)
623         return NULL;
624 
625     if (element_add_subelement(parent, elem) < 0) {
626         Py_DECREF(elem);
627         return NULL;
628     }
629 
630     return elem;
631 }
632 
633 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)634 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635 {
636     Py_VISIT(self->tag);
637     Py_VISIT(JOIN_OBJ(self->text));
638     Py_VISIT(JOIN_OBJ(self->tail));
639 
640     if (self->extra) {
641         Py_ssize_t i;
642         Py_VISIT(self->extra->attrib);
643 
644         for (i = 0; i < self->extra->length; ++i)
645             Py_VISIT(self->extra->children[i]);
646     }
647     return 0;
648 }
649 
650 static int
element_gc_clear(ElementObject * self)651 element_gc_clear(ElementObject *self)
652 {
653     Py_CLEAR(self->tag);
654     _clear_joined_ptr(&self->text);
655     _clear_joined_ptr(&self->tail);
656 
657     /* After dropping all references from extra, it's no longer valid anyway,
658      * so fully deallocate it.
659     */
660     clear_extra(self);
661     return 0;
662 }
663 
664 static void
element_dealloc(ElementObject * self)665 element_dealloc(ElementObject* self)
666 {
667     /* bpo-31095: UnTrack is needed before calling any callbacks */
668     PyObject_GC_UnTrack(self);
669     Py_TRASHCAN_BEGIN(self, element_dealloc)
670 
671     if (self->weakreflist != NULL)
672         PyObject_ClearWeakRefs((PyObject *) self);
673 
674     /* element_gc_clear clears all references and deallocates extra
675     */
676     element_gc_clear(self);
677 
678     RELEASE(sizeof(ElementObject), "destroy element");
679     Py_TYPE(self)->tp_free((PyObject *)self);
680     Py_TRASHCAN_END
681 }
682 
683 /* -------------------------------------------------------------------- */
684 
685 /*[clinic input]
686 _elementtree.Element.append
687 
688     subelement: object(subclass_of='&Element_Type')
689     /
690 
691 [clinic start generated code]*/
692 
693 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)694 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696 {
697     if (element_add_subelement(self, subelement) < 0)
698         return NULL;
699 
700     Py_RETURN_NONE;
701 }
702 
703 /*[clinic input]
704 _elementtree.Element.clear
705 
706 [clinic start generated code]*/
707 
708 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)709 _elementtree_Element_clear_impl(ElementObject *self)
710 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711 {
712     clear_extra(self);
713 
714     Py_INCREF(Py_None);
715     _set_joined_ptr(&self->text, Py_None);
716 
717     Py_INCREF(Py_None);
718     _set_joined_ptr(&self->tail, Py_None);
719 
720     Py_RETURN_NONE;
721 }
722 
723 /*[clinic input]
724 _elementtree.Element.__copy__
725 
726 [clinic start generated code]*/
727 
728 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)729 _elementtree_Element___copy___impl(ElementObject *self)
730 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
731 {
732     Py_ssize_t i;
733     ElementObject* element;
734 
735     element = (ElementObject*) create_new_element(
736         self->tag, self->extra ? self->extra->attrib : NULL);
737     if (!element)
738         return NULL;
739 
740     Py_INCREF(JOIN_OBJ(self->text));
741     _set_joined_ptr(&element->text, self->text);
742 
743     Py_INCREF(JOIN_OBJ(self->tail));
744     _set_joined_ptr(&element->tail, self->tail);
745 
746     assert(!element->extra || !element->extra->length);
747     if (self->extra) {
748         if (element_resize(element, self->extra->length) < 0) {
749             Py_DECREF(element);
750             return NULL;
751         }
752 
753         for (i = 0; i < self->extra->length; i++) {
754             Py_INCREF(self->extra->children[i]);
755             element->extra->children[i] = self->extra->children[i];
756         }
757 
758         assert(!element->extra->length);
759         element->extra->length = self->extra->length;
760     }
761 
762     return (PyObject*) element;
763 }
764 
765 /* Helper for a deep copy. */
766 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767 
768 /*[clinic input]
769 _elementtree.Element.__deepcopy__
770 
771     memo: object(subclass_of="&PyDict_Type")
772     /
773 
774 [clinic start generated code]*/
775 
776 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)777 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
779 {
780     Py_ssize_t i;
781     ElementObject* element;
782     PyObject* tag;
783     PyObject* attrib;
784     PyObject* text;
785     PyObject* tail;
786     PyObject* id;
787 
788     tag = deepcopy(self->tag, memo);
789     if (!tag)
790         return NULL;
791 
792     if (self->extra && self->extra->attrib) {
793         attrib = deepcopy(self->extra->attrib, memo);
794         if (!attrib) {
795             Py_DECREF(tag);
796             return NULL;
797         }
798     } else {
799         attrib = NULL;
800     }
801 
802     element = (ElementObject*) create_new_element(tag, attrib);
803 
804     Py_DECREF(tag);
805     Py_XDECREF(attrib);
806 
807     if (!element)
808         return NULL;
809 
810     text = deepcopy(JOIN_OBJ(self->text), memo);
811     if (!text)
812         goto error;
813     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
814 
815     tail = deepcopy(JOIN_OBJ(self->tail), memo);
816     if (!tail)
817         goto error;
818     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
819 
820     assert(!element->extra || !element->extra->length);
821     if (self->extra) {
822         if (element_resize(element, self->extra->length) < 0)
823             goto error;
824 
825         for (i = 0; i < self->extra->length; i++) {
826             PyObject* child = deepcopy(self->extra->children[i], memo);
827             if (!child || !Element_Check(child)) {
828                 if (child) {
829                     raise_type_error(child);
830                     Py_DECREF(child);
831                 }
832                 element->extra->length = i;
833                 goto error;
834             }
835             element->extra->children[i] = child;
836         }
837 
838         assert(!element->extra->length);
839         element->extra->length = self->extra->length;
840     }
841 
842     /* add object to memo dictionary (so deepcopy won't visit it again) */
843     id = PyLong_FromSsize_t((uintptr_t) self);
844     if (!id)
845         goto error;
846 
847     i = PyDict_SetItem(memo, id, (PyObject*) element);
848 
849     Py_DECREF(id);
850 
851     if (i < 0)
852         goto error;
853 
854     return (PyObject*) element;
855 
856   error:
857     Py_DECREF(element);
858     return NULL;
859 }
860 
861 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)862 deepcopy(PyObject *object, PyObject *memo)
863 {
864     /* do a deep copy of the given object */
865     elementtreestate *st;
866     PyObject *stack[2];
867 
868     /* Fast paths */
869     if (object == Py_None || PyUnicode_CheckExact(object)) {
870         Py_INCREF(object);
871         return object;
872     }
873 
874     if (Py_REFCNT(object) == 1) {
875         if (PyDict_CheckExact(object)) {
876             PyObject *key, *value;
877             Py_ssize_t pos = 0;
878             int simple = 1;
879             while (PyDict_Next(object, &pos, &key, &value)) {
880                 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881                     simple = 0;
882                     break;
883                 }
884             }
885             if (simple)
886                 return PyDict_Copy(object);
887             /* Fall through to general case */
888         }
889         else if (Element_CheckExact(object)) {
890             return _elementtree_Element___deepcopy___impl(
891                 (ElementObject *)object, memo);
892         }
893     }
894 
895     /* General case */
896     st = ET_STATE_GLOBAL;
897     if (!st->deepcopy_obj) {
898         PyErr_SetString(PyExc_RuntimeError,
899                         "deepcopy helper not found");
900         return NULL;
901     }
902 
903     stack[0] = object;
904     stack[1] = memo;
905     return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
906 }
907 
908 
909 /*[clinic input]
910 _elementtree.Element.__sizeof__ -> Py_ssize_t
911 
912 [clinic start generated code]*/
913 
914 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)915 _elementtree_Element___sizeof___impl(ElementObject *self)
916 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
917 {
918     Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
919     if (self->extra) {
920         result += sizeof(ElementObjectExtra);
921         if (self->extra->children != self->extra->_children)
922             result += sizeof(PyObject*) * self->extra->allocated;
923     }
924     return result;
925 }
926 
927 /* dict keys for getstate/setstate. */
928 #define PICKLED_TAG "tag"
929 #define PICKLED_CHILDREN "_children"
930 #define PICKLED_ATTRIB "attrib"
931 #define PICKLED_TAIL "tail"
932 #define PICKLED_TEXT "text"
933 
934 /* __getstate__ returns a fabricated instance dict as in the pure-Python
935  * Element implementation, for interoperability/interchangeability.  This
936  * makes the pure-Python implementation details an API, but (a) there aren't
937  * any unnecessary structures there; and (b) it buys compatibility with 3.2
938  * pickles.  See issue #16076.
939  */
940 /*[clinic input]
941 _elementtree.Element.__getstate__
942 
943 [clinic start generated code]*/
944 
945 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)946 _elementtree_Element___getstate___impl(ElementObject *self)
947 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
948 {
949     Py_ssize_t i;
950     PyObject *children, *attrib;
951 
952     /* Build a list of children. */
953     children = PyList_New(self->extra ? self->extra->length : 0);
954     if (!children)
955         return NULL;
956     for (i = 0; i < PyList_GET_SIZE(children); i++) {
957         PyObject *child = self->extra->children[i];
958         Py_INCREF(child);
959         PyList_SET_ITEM(children, i, child);
960     }
961 
962     if (self->extra && self->extra->attrib) {
963         attrib = self->extra->attrib;
964         Py_INCREF(attrib);
965     }
966     else {
967         attrib = PyDict_New();
968         if (!attrib) {
969             Py_DECREF(children);
970             return NULL;
971         }
972     }
973 
974     return Py_BuildValue("{sOsNsNsOsO}",
975                          PICKLED_TAG, self->tag,
976                          PICKLED_CHILDREN, children,
977                          PICKLED_ATTRIB, attrib,
978                          PICKLED_TEXT, JOIN_OBJ(self->text),
979                          PICKLED_TAIL, JOIN_OBJ(self->tail));
980 }
981 
982 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)983 element_setstate_from_attributes(ElementObject *self,
984                                  PyObject *tag,
985                                  PyObject *attrib,
986                                  PyObject *text,
987                                  PyObject *tail,
988                                  PyObject *children)
989 {
990     Py_ssize_t i, nchildren;
991     ElementObjectExtra *oldextra = NULL;
992 
993     if (!tag) {
994         PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995         return NULL;
996     }
997 
998     Py_INCREF(tag);
999     Py_XSETREF(self->tag, tag);
1000 
1001     text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002     Py_INCREF(JOIN_OBJ(text));
1003     _set_joined_ptr(&self->text, text);
1004 
1005     tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006     Py_INCREF(JOIN_OBJ(tail));
1007     _set_joined_ptr(&self->tail, tail);
1008 
1009     /* Handle ATTRIB and CHILDREN. */
1010     if (!children && !attrib) {
1011         Py_RETURN_NONE;
1012     }
1013 
1014     /* Compute 'nchildren'. */
1015     if (children) {
1016         if (!PyList_Check(children)) {
1017             PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018             return NULL;
1019         }
1020         nchildren = PyList_GET_SIZE(children);
1021 
1022         /* (Re-)allocate 'extra'.
1023            Avoid DECREFs calling into this code again (cycles, etc.)
1024          */
1025         oldextra = self->extra;
1026         self->extra = NULL;
1027         if (element_resize(self, nchildren)) {
1028             assert(!self->extra || !self->extra->length);
1029             clear_extra(self);
1030             self->extra = oldextra;
1031             return NULL;
1032         }
1033         assert(self->extra);
1034         assert(self->extra->allocated >= nchildren);
1035         if (oldextra) {
1036             assert(self->extra->attrib == NULL);
1037             self->extra->attrib = oldextra->attrib;
1038             oldextra->attrib = NULL;
1039         }
1040 
1041         /* Copy children */
1042         for (i = 0; i < nchildren; i++) {
1043             PyObject *child = PyList_GET_ITEM(children, i);
1044             if (!Element_Check(child)) {
1045                 raise_type_error(child);
1046                 self->extra->length = i;
1047                 dealloc_extra(oldextra);
1048                 return NULL;
1049             }
1050             Py_INCREF(child);
1051             self->extra->children[i] = child;
1052         }
1053 
1054         assert(!self->extra->length);
1055         self->extra->length = nchildren;
1056     }
1057     else {
1058         if (element_resize(self, 0)) {
1059             return NULL;
1060         }
1061     }
1062 
1063     /* Stash attrib. */
1064     Py_XINCREF(attrib);
1065     Py_XSETREF(self->extra->attrib, attrib);
1066     dealloc_extra(oldextra);
1067 
1068     Py_RETURN_NONE;
1069 }
1070 
1071 /* __setstate__ for Element instance from the Python implementation.
1072  * 'state' should be the instance dict.
1073  */
1074 
1075 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1076 element_setstate_from_Python(ElementObject *self, PyObject *state)
1077 {
1078     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079                              PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080     PyObject *args;
1081     PyObject *tag, *attrib, *text, *tail, *children;
1082     PyObject *retval;
1083 
1084     tag = attrib = text = tail = children = NULL;
1085     args = PyTuple_New(0);
1086     if (!args)
1087         return NULL;
1088 
1089     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090                                     &attrib, &text, &tail, &children))
1091         retval = element_setstate_from_attributes(self, tag, attrib, text,
1092                                                   tail, children);
1093     else
1094         retval = NULL;
1095 
1096     Py_DECREF(args);
1097     return retval;
1098 }
1099 
1100 /*[clinic input]
1101 _elementtree.Element.__setstate__
1102 
1103     state: object
1104     /
1105 
1106 [clinic start generated code]*/
1107 
1108 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1109 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1111 {
1112     if (!PyDict_CheckExact(state)) {
1113         PyErr_Format(PyExc_TypeError,
1114                      "Don't know how to unpickle \"%.200R\" as an Element",
1115                      state);
1116         return NULL;
1117     }
1118     else
1119         return element_setstate_from_Python(self, state);
1120 }
1121 
1122 LOCAL(int)
checkpath(PyObject * tag)1123 checkpath(PyObject* tag)
1124 {
1125     Py_ssize_t i;
1126     int check = 1;
1127 
1128     /* check if a tag contains an xpath character */
1129 
1130 #define PATHCHAR(ch) \
1131     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1132 
1133     if (PyUnicode_Check(tag)) {
1134         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1135         const void *data = PyUnicode_DATA(tag);
1136         unsigned int kind = PyUnicode_KIND(tag);
1137         if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138                 PyUnicode_READ(kind, data, 1) == '}' || (
1139                 PyUnicode_READ(kind, data, 1) == '*' &&
1140                 PyUnicode_READ(kind, data, 2) == '}'))) {
1141             /* wildcard: '{}tag' or '{*}tag' */
1142             return 1;
1143         }
1144         for (i = 0; i < len; i++) {
1145             Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146             if (ch == '{')
1147                 check = 0;
1148             else if (ch == '}')
1149                 check = 1;
1150             else if (check && PATHCHAR(ch))
1151                 return 1;
1152         }
1153         return 0;
1154     }
1155     if (PyBytes_Check(tag)) {
1156         const char *p = PyBytes_AS_STRING(tag);
1157         const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158         if (len >= 3 && p[0] == '{' && (
1159                 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1160             /* wildcard: '{}tag' or '{*}tag' */
1161             return 1;
1162         }
1163         for (i = 0; i < len; i++) {
1164             if (p[i] == '{')
1165                 check = 0;
1166             else if (p[i] == '}')
1167                 check = 1;
1168             else if (check && PATHCHAR(p[i]))
1169                 return 1;
1170         }
1171         return 0;
1172     }
1173 
1174     return 1; /* unknown type; might be path expression */
1175 }
1176 
1177 /*[clinic input]
1178 _elementtree.Element.extend
1179 
1180     elements: object
1181     /
1182 
1183 [clinic start generated code]*/
1184 
1185 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1186 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1188 {
1189     PyObject* seq;
1190     Py_ssize_t i;
1191 
1192     seq = PySequence_Fast(elements, "");
1193     if (!seq) {
1194         PyErr_Format(
1195             PyExc_TypeError,
1196             "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1197             );
1198         return NULL;
1199     }
1200 
1201     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1202         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1203         Py_INCREF(element);
1204         if (element_add_subelement(self, element) < 0) {
1205             Py_DECREF(seq);
1206             Py_DECREF(element);
1207             return NULL;
1208         }
1209         Py_DECREF(element);
1210     }
1211 
1212     Py_DECREF(seq);
1213 
1214     Py_RETURN_NONE;
1215 }
1216 
1217 /*[clinic input]
1218 _elementtree.Element.find
1219 
1220     path: object
1221     namespaces: object = None
1222 
1223 [clinic start generated code]*/
1224 
1225 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1226 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227                                PyObject *namespaces)
1228 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1229 {
1230     Py_ssize_t i;
1231     elementtreestate *st = ET_STATE_GLOBAL;
1232 
1233     if (checkpath(path) || namespaces != Py_None) {
1234         _Py_IDENTIFIER(find);
1235         return _PyObject_CallMethodIdObjArgs(
1236             st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1237             );
1238     }
1239 
1240     if (!self->extra)
1241         Py_RETURN_NONE;
1242 
1243     for (i = 0; i < self->extra->length; i++) {
1244         PyObject* item = self->extra->children[i];
1245         int rc;
1246         assert(Element_Check(item));
1247         Py_INCREF(item);
1248         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1249         if (rc > 0)
1250             return item;
1251         Py_DECREF(item);
1252         if (rc < 0)
1253             return NULL;
1254     }
1255 
1256     Py_RETURN_NONE;
1257 }
1258 
1259 /*[clinic input]
1260 _elementtree.Element.findtext
1261 
1262     path: object
1263     default: object = None
1264     namespaces: object = None
1265 
1266 [clinic start generated code]*/
1267 
1268 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1269 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270                                    PyObject *default_value,
1271                                    PyObject *namespaces)
1272 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1273 {
1274     Py_ssize_t i;
1275     _Py_IDENTIFIER(findtext);
1276     elementtreestate *st = ET_STATE_GLOBAL;
1277 
1278     if (checkpath(path) || namespaces != Py_None)
1279         return _PyObject_CallMethodIdObjArgs(
1280             st->elementpath_obj, &PyId_findtext,
1281             self, path, default_value, namespaces, NULL
1282             );
1283 
1284     if (!self->extra) {
1285         Py_INCREF(default_value);
1286         return default_value;
1287     }
1288 
1289     for (i = 0; i < self->extra->length; i++) {
1290         PyObject *item = self->extra->children[i];
1291         int rc;
1292         assert(Element_Check(item));
1293         Py_INCREF(item);
1294         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1295         if (rc > 0) {
1296             PyObject* text = element_get_text((ElementObject*)item);
1297             if (text == Py_None) {
1298                 Py_DECREF(item);
1299                 return PyUnicode_New(0, 0);
1300             }
1301             Py_XINCREF(text);
1302             Py_DECREF(item);
1303             return text;
1304         }
1305         Py_DECREF(item);
1306         if (rc < 0)
1307             return NULL;
1308     }
1309 
1310     Py_INCREF(default_value);
1311     return default_value;
1312 }
1313 
1314 /*[clinic input]
1315 _elementtree.Element.findall
1316 
1317     path: object
1318     namespaces: object = None
1319 
1320 [clinic start generated code]*/
1321 
1322 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1323 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324                                   PyObject *namespaces)
1325 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1326 {
1327     Py_ssize_t i;
1328     PyObject* out;
1329     elementtreestate *st = ET_STATE_GLOBAL;
1330 
1331     if (checkpath(path) || namespaces != Py_None) {
1332         _Py_IDENTIFIER(findall);
1333         return _PyObject_CallMethodIdObjArgs(
1334             st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1335             );
1336     }
1337 
1338     out = PyList_New(0);
1339     if (!out)
1340         return NULL;
1341 
1342     if (!self->extra)
1343         return out;
1344 
1345     for (i = 0; i < self->extra->length; i++) {
1346         PyObject* item = self->extra->children[i];
1347         int rc;
1348         assert(Element_Check(item));
1349         Py_INCREF(item);
1350         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1351         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352             Py_DECREF(item);
1353             Py_DECREF(out);
1354             return NULL;
1355         }
1356         Py_DECREF(item);
1357     }
1358 
1359     return out;
1360 }
1361 
1362 /*[clinic input]
1363 _elementtree.Element.iterfind
1364 
1365     path: object
1366     namespaces: object = None
1367 
1368 [clinic start generated code]*/
1369 
1370 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1371 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372                                    PyObject *namespaces)
1373 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374 {
1375     PyObject* tag = path;
1376     _Py_IDENTIFIER(iterfind);
1377     elementtreestate *st = ET_STATE_GLOBAL;
1378 
1379     return _PyObject_CallMethodIdObjArgs(
1380         st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1381 }
1382 
1383 /*[clinic input]
1384 _elementtree.Element.get
1385 
1386     key: object
1387     default: object = None
1388 
1389 [clinic start generated code]*/
1390 
1391 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1392 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393                               PyObject *default_value)
1394 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1395 {
1396     if (self->extra && self->extra->attrib) {
1397         PyObject *attrib = self->extra->attrib;
1398         Py_INCREF(attrib);
1399         PyObject *value = PyDict_GetItemWithError(attrib, key);
1400         Py_XINCREF(value);
1401         Py_DECREF(attrib);
1402         if (value != NULL || PyErr_Occurred()) {
1403             return value;
1404         }
1405     }
1406 
1407     Py_INCREF(default_value);
1408     return default_value;
1409 }
1410 
1411 static PyObject *
1412 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1413 
1414 
1415 /*[clinic input]
1416 _elementtree.Element.iter
1417 
1418     tag: object = None
1419 
1420 [clinic start generated code]*/
1421 
1422 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1423 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1424 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1425 {
1426     if (PyUnicode_Check(tag)) {
1427         if (PyUnicode_READY(tag) < 0)
1428             return NULL;
1429         if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1430             tag = Py_None;
1431     }
1432     else if (PyBytes_Check(tag)) {
1433         if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1434             tag = Py_None;
1435     }
1436 
1437     return create_elementiter(self, tag, 0);
1438 }
1439 
1440 
1441 /*[clinic input]
1442 _elementtree.Element.itertext
1443 
1444 [clinic start generated code]*/
1445 
1446 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1447 _elementtree_Element_itertext_impl(ElementObject *self)
1448 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1449 {
1450     return create_elementiter(self, Py_None, 1);
1451 }
1452 
1453 
1454 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1455 element_getitem(PyObject* self_, Py_ssize_t index)
1456 {
1457     ElementObject* self = (ElementObject*) self_;
1458 
1459     if (!self->extra || index < 0 || index >= self->extra->length) {
1460         PyErr_SetString(
1461             PyExc_IndexError,
1462             "child index out of range"
1463             );
1464         return NULL;
1465     }
1466 
1467     Py_INCREF(self->extra->children[index]);
1468     return self->extra->children[index];
1469 }
1470 
1471 /*[clinic input]
1472 _elementtree.Element.insert
1473 
1474     index: Py_ssize_t
1475     subelement: object(subclass_of='&Element_Type')
1476     /
1477 
1478 [clinic start generated code]*/
1479 
1480 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1481 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1482                                  PyObject *subelement)
1483 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1484 {
1485     Py_ssize_t i;
1486 
1487     if (!self->extra) {
1488         if (create_extra(self, NULL) < 0)
1489             return NULL;
1490     }
1491 
1492     if (index < 0) {
1493         index += self->extra->length;
1494         if (index < 0)
1495             index = 0;
1496     }
1497     if (index > self->extra->length)
1498         index = self->extra->length;
1499 
1500     if (element_resize(self, 1) < 0)
1501         return NULL;
1502 
1503     for (i = self->extra->length; i > index; i--)
1504         self->extra->children[i] = self->extra->children[i-1];
1505 
1506     Py_INCREF(subelement);
1507     self->extra->children[index] = subelement;
1508 
1509     self->extra->length++;
1510 
1511     Py_RETURN_NONE;
1512 }
1513 
1514 /*[clinic input]
1515 _elementtree.Element.items
1516 
1517 [clinic start generated code]*/
1518 
1519 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1520 _elementtree_Element_items_impl(ElementObject *self)
1521 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1522 {
1523     if (!self->extra || !self->extra->attrib)
1524         return PyList_New(0);
1525 
1526     return PyDict_Items(self->extra->attrib);
1527 }
1528 
1529 /*[clinic input]
1530 _elementtree.Element.keys
1531 
1532 [clinic start generated code]*/
1533 
1534 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1535 _elementtree_Element_keys_impl(ElementObject *self)
1536 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1537 {
1538     if (!self->extra || !self->extra->attrib)
1539         return PyList_New(0);
1540 
1541     return PyDict_Keys(self->extra->attrib);
1542 }
1543 
1544 static Py_ssize_t
element_length(ElementObject * self)1545 element_length(ElementObject* self)
1546 {
1547     if (!self->extra)
1548         return 0;
1549 
1550     return self->extra->length;
1551 }
1552 
1553 /*[clinic input]
1554 _elementtree.Element.makeelement
1555 
1556     tag: object
1557     attrib: object(subclass_of='&PyDict_Type')
1558     /
1559 
1560 [clinic start generated code]*/
1561 
1562 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1563 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1564                                       PyObject *attrib)
1565 /*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1566 {
1567     PyObject* elem;
1568 
1569     attrib = PyDict_Copy(attrib);
1570     if (!attrib)
1571         return NULL;
1572 
1573     elem = create_new_element(tag, attrib);
1574 
1575     Py_DECREF(attrib);
1576 
1577     return elem;
1578 }
1579 
1580 /*[clinic input]
1581 _elementtree.Element.remove
1582 
1583     subelement: object(subclass_of='&Element_Type')
1584     /
1585 
1586 [clinic start generated code]*/
1587 
1588 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1589 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1590 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1591 {
1592     Py_ssize_t i;
1593     int rc;
1594     PyObject *found;
1595 
1596     if (!self->extra) {
1597         /* element has no children, so raise exception */
1598         PyErr_SetString(
1599             PyExc_ValueError,
1600             "list.remove(x): x not in list"
1601             );
1602         return NULL;
1603     }
1604 
1605     for (i = 0; i < self->extra->length; i++) {
1606         if (self->extra->children[i] == subelement)
1607             break;
1608         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1609         if (rc > 0)
1610             break;
1611         if (rc < 0)
1612             return NULL;
1613     }
1614 
1615     if (i >= self->extra->length) {
1616         /* subelement is not in children, so raise exception */
1617         PyErr_SetString(
1618             PyExc_ValueError,
1619             "list.remove(x): x not in list"
1620             );
1621         return NULL;
1622     }
1623 
1624     found = self->extra->children[i];
1625 
1626     self->extra->length--;
1627     for (; i < self->extra->length; i++)
1628         self->extra->children[i] = self->extra->children[i+1];
1629 
1630     Py_DECREF(found);
1631     Py_RETURN_NONE;
1632 }
1633 
1634 static PyObject*
element_repr(ElementObject * self)1635 element_repr(ElementObject* self)
1636 {
1637     int status;
1638 
1639     if (self->tag == NULL)
1640         return PyUnicode_FromFormat("<Element at %p>", self);
1641 
1642     status = Py_ReprEnter((PyObject *)self);
1643     if (status == 0) {
1644         PyObject *res;
1645         res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1646         Py_ReprLeave((PyObject *)self);
1647         return res;
1648     }
1649     if (status > 0)
1650         PyErr_Format(PyExc_RuntimeError,
1651                      "reentrant call inside %s.__repr__",
1652                      Py_TYPE(self)->tp_name);
1653     return NULL;
1654 }
1655 
1656 /*[clinic input]
1657 _elementtree.Element.set
1658 
1659     key: object
1660     value: object
1661     /
1662 
1663 [clinic start generated code]*/
1664 
1665 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1666 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1667                               PyObject *value)
1668 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1669 {
1670     PyObject* attrib;
1671 
1672     if (!self->extra) {
1673         if (create_extra(self, NULL) < 0)
1674             return NULL;
1675     }
1676 
1677     attrib = element_get_attrib(self);
1678     if (!attrib)
1679         return NULL;
1680 
1681     if (PyDict_SetItem(attrib, key, value) < 0)
1682         return NULL;
1683 
1684     Py_RETURN_NONE;
1685 }
1686 
1687 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1688 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1689 {
1690     ElementObject* self = (ElementObject*) self_;
1691     Py_ssize_t i;
1692     PyObject* old;
1693 
1694     if (!self->extra || index < 0 || index >= self->extra->length) {
1695         PyErr_SetString(
1696             PyExc_IndexError,
1697             "child assignment index out of range");
1698         return -1;
1699     }
1700 
1701     old = self->extra->children[index];
1702 
1703     if (item) {
1704         if (!Element_Check(item)) {
1705             raise_type_error(item);
1706             return -1;
1707         }
1708         Py_INCREF(item);
1709         self->extra->children[index] = item;
1710     } else {
1711         self->extra->length--;
1712         for (i = index; i < self->extra->length; i++)
1713             self->extra->children[i] = self->extra->children[i+1];
1714     }
1715 
1716     Py_DECREF(old);
1717 
1718     return 0;
1719 }
1720 
1721 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1722 element_subscr(PyObject* self_, PyObject* item)
1723 {
1724     ElementObject* self = (ElementObject*) self_;
1725 
1726     if (PyIndex_Check(item)) {
1727         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1728 
1729         if (i == -1 && PyErr_Occurred()) {
1730             return NULL;
1731         }
1732         if (i < 0 && self->extra)
1733             i += self->extra->length;
1734         return element_getitem(self_, i);
1735     }
1736     else if (PySlice_Check(item)) {
1737         Py_ssize_t start, stop, step, slicelen, i;
1738         size_t cur;
1739         PyObject* list;
1740 
1741         if (!self->extra)
1742             return PyList_New(0);
1743 
1744         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1745             return NULL;
1746         }
1747         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1748                                          step);
1749 
1750         if (slicelen <= 0)
1751             return PyList_New(0);
1752         else {
1753             list = PyList_New(slicelen);
1754             if (!list)
1755                 return NULL;
1756 
1757             for (cur = start, i = 0; i < slicelen;
1758                  cur += step, i++) {
1759                 PyObject* item = self->extra->children[cur];
1760                 Py_INCREF(item);
1761                 PyList_SET_ITEM(list, i, item);
1762             }
1763 
1764             return list;
1765         }
1766     }
1767     else {
1768         PyErr_SetString(PyExc_TypeError,
1769                 "element indices must be integers");
1770         return NULL;
1771     }
1772 }
1773 
1774 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1775 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1776 {
1777     ElementObject* self = (ElementObject*) self_;
1778 
1779     if (PyIndex_Check(item)) {
1780         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1781 
1782         if (i == -1 && PyErr_Occurred()) {
1783             return -1;
1784         }
1785         if (i < 0 && self->extra)
1786             i += self->extra->length;
1787         return element_setitem(self_, i, value);
1788     }
1789     else if (PySlice_Check(item)) {
1790         Py_ssize_t start, stop, step, slicelen, newlen, i;
1791         size_t cur;
1792 
1793         PyObject* recycle = NULL;
1794         PyObject* seq;
1795 
1796         if (!self->extra) {
1797             if (create_extra(self, NULL) < 0)
1798                 return -1;
1799         }
1800 
1801         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1802             return -1;
1803         }
1804         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1805                                          step);
1806 
1807         if (value == NULL) {
1808             /* Delete slice */
1809             size_t cur;
1810             Py_ssize_t i;
1811 
1812             if (slicelen <= 0)
1813                 return 0;
1814 
1815             /* Since we're deleting, the direction of the range doesn't matter,
1816              * so for simplicity make it always ascending.
1817             */
1818             if (step < 0) {
1819                 stop = start + 1;
1820                 start = stop + step * (slicelen - 1) - 1;
1821                 step = -step;
1822             }
1823 
1824             assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1825 
1826             /* recycle is a list that will contain all the children
1827              * scheduled for removal.
1828             */
1829             if (!(recycle = PyList_New(slicelen))) {
1830                 return -1;
1831             }
1832 
1833             /* This loop walks over all the children that have to be deleted,
1834              * with cur pointing at them. num_moved is the amount of children
1835              * until the next deleted child that have to be "shifted down" to
1836              * occupy the deleted's places.
1837              * Note that in the ith iteration, shifting is done i+i places down
1838              * because i children were already removed.
1839             */
1840             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1841                 /* Compute how many children have to be moved, clipping at the
1842                  * list end.
1843                 */
1844                 Py_ssize_t num_moved = step - 1;
1845                 if (cur + step >= (size_t)self->extra->length) {
1846                     num_moved = self->extra->length - cur - 1;
1847                 }
1848 
1849                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1850 
1851                 memmove(
1852                     self->extra->children + cur - i,
1853                     self->extra->children + cur + 1,
1854                     num_moved * sizeof(PyObject *));
1855             }
1856 
1857             /* Leftover "tail" after the last removed child */
1858             cur = start + (size_t)slicelen * step;
1859             if (cur < (size_t)self->extra->length) {
1860                 memmove(
1861                     self->extra->children + cur - slicelen,
1862                     self->extra->children + cur,
1863                     (self->extra->length - cur) * sizeof(PyObject *));
1864             }
1865 
1866             self->extra->length -= slicelen;
1867 
1868             /* Discard the recycle list with all the deleted sub-elements */
1869             Py_DECREF(recycle);
1870             return 0;
1871         }
1872 
1873         /* A new slice is actually being assigned */
1874         seq = PySequence_Fast(value, "");
1875         if (!seq) {
1876             PyErr_Format(
1877                 PyExc_TypeError,
1878                 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1879                 );
1880             return -1;
1881         }
1882         newlen = PySequence_Fast_GET_SIZE(seq);
1883 
1884         if (step !=  1 && newlen != slicelen)
1885         {
1886             Py_DECREF(seq);
1887             PyErr_Format(PyExc_ValueError,
1888                 "attempt to assign sequence of size %zd "
1889                 "to extended slice of size %zd",
1890                 newlen, slicelen
1891                 );
1892             return -1;
1893         }
1894 
1895         /* Resize before creating the recycle bin, to prevent refleaks. */
1896         if (newlen > slicelen) {
1897             if (element_resize(self, newlen - slicelen) < 0) {
1898                 Py_DECREF(seq);
1899                 return -1;
1900             }
1901         }
1902 
1903         for (i = 0; i < newlen; i++) {
1904             PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1905             if (!Element_Check(element)) {
1906                 raise_type_error(element);
1907                 Py_DECREF(seq);
1908                 return -1;
1909             }
1910         }
1911 
1912         if (slicelen > 0) {
1913             /* to avoid recursive calls to this method (via decref), move
1914                old items to the recycle bin here, and get rid of them when
1915                we're done modifying the element */
1916             recycle = PyList_New(slicelen);
1917             if (!recycle) {
1918                 Py_DECREF(seq);
1919                 return -1;
1920             }
1921             for (cur = start, i = 0; i < slicelen;
1922                  cur += step, i++)
1923                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1924         }
1925 
1926         if (newlen < slicelen) {
1927             /* delete slice */
1928             for (i = stop; i < self->extra->length; i++)
1929                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1930         } else if (newlen > slicelen) {
1931             /* insert slice */
1932             for (i = self->extra->length-1; i >= stop; i--)
1933                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1934         }
1935 
1936         /* replace the slice */
1937         for (cur = start, i = 0; i < newlen;
1938              cur += step, i++) {
1939             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1940             Py_INCREF(element);
1941             self->extra->children[cur] = element;
1942         }
1943 
1944         self->extra->length += newlen - slicelen;
1945 
1946         Py_DECREF(seq);
1947 
1948         /* discard the recycle bin, and everything in it */
1949         Py_XDECREF(recycle);
1950 
1951         return 0;
1952     }
1953     else {
1954         PyErr_SetString(PyExc_TypeError,
1955                 "element indices must be integers");
1956         return -1;
1957     }
1958 }
1959 
1960 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1961 element_tag_getter(ElementObject *self, void *closure)
1962 {
1963     PyObject *res = self->tag;
1964     Py_INCREF(res);
1965     return res;
1966 }
1967 
1968 static PyObject*
element_text_getter(ElementObject * self,void * closure)1969 element_text_getter(ElementObject *self, void *closure)
1970 {
1971     PyObject *res = element_get_text(self);
1972     Py_XINCREF(res);
1973     return res;
1974 }
1975 
1976 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1977 element_tail_getter(ElementObject *self, void *closure)
1978 {
1979     PyObject *res = element_get_tail(self);
1980     Py_XINCREF(res);
1981     return res;
1982 }
1983 
1984 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)1985 element_attrib_getter(ElementObject *self, void *closure)
1986 {
1987     PyObject *res;
1988     if (!self->extra) {
1989         if (create_extra(self, NULL) < 0)
1990             return NULL;
1991     }
1992     res = element_get_attrib(self);
1993     Py_XINCREF(res);
1994     return res;
1995 }
1996 
1997 /* macro for setter validation */
1998 #define _VALIDATE_ATTR_VALUE(V)                     \
1999     if ((V) == NULL) {                              \
2000         PyErr_SetString(                            \
2001             PyExc_AttributeError,                   \
2002             "can't delete element attribute");      \
2003         return -1;                                  \
2004     }
2005 
2006 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2007 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2008 {
2009     _VALIDATE_ATTR_VALUE(value);
2010     Py_INCREF(value);
2011     Py_SETREF(self->tag, value);
2012     return 0;
2013 }
2014 
2015 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2016 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2017 {
2018     _VALIDATE_ATTR_VALUE(value);
2019     Py_INCREF(value);
2020     _set_joined_ptr(&self->text, value);
2021     return 0;
2022 }
2023 
2024 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2025 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2026 {
2027     _VALIDATE_ATTR_VALUE(value);
2028     Py_INCREF(value);
2029     _set_joined_ptr(&self->tail, value);
2030     return 0;
2031 }
2032 
2033 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2034 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2035 {
2036     _VALIDATE_ATTR_VALUE(value);
2037     if (!PyDict_Check(value)) {
2038         PyErr_Format(PyExc_TypeError,
2039                      "attrib must be dict, not %.200s",
2040                      Py_TYPE(value)->tp_name);
2041         return -1;
2042     }
2043     if (!self->extra) {
2044         if (create_extra(self, NULL) < 0)
2045             return -1;
2046     }
2047     Py_INCREF(value);
2048     Py_XSETREF(self->extra->attrib, value);
2049     return 0;
2050 }
2051 
2052 static PySequenceMethods element_as_sequence = {
2053     (lenfunc) element_length,
2054     0, /* sq_concat */
2055     0, /* sq_repeat */
2056     element_getitem,
2057     0,
2058     element_setitem,
2059     0,
2060 };
2061 
2062 /******************************* Element iterator ****************************/
2063 
2064 /* ElementIterObject represents the iteration state over an XML element in
2065  * pre-order traversal. To keep track of which sub-element should be returned
2066  * next, a stack of parents is maintained. This is a standard stack-based
2067  * iterative pre-order traversal of a tree.
2068  * The stack is managed using a continuous array.
2069  * Each stack item contains the saved parent to which we should return after
2070  * the current one is exhausted, and the next child to examine in that parent.
2071  */
2072 typedef struct ParentLocator_t {
2073     ElementObject *parent;
2074     Py_ssize_t child_index;
2075 } ParentLocator;
2076 
2077 typedef struct {
2078     PyObject_HEAD
2079     ParentLocator *parent_stack;
2080     Py_ssize_t parent_stack_used;
2081     Py_ssize_t parent_stack_size;
2082     ElementObject *root_element;
2083     PyObject *sought_tag;
2084     int gettext;
2085 } ElementIterObject;
2086 
2087 
2088 static void
elementiter_dealloc(ElementIterObject * it)2089 elementiter_dealloc(ElementIterObject *it)
2090 {
2091     Py_ssize_t i = it->parent_stack_used;
2092     it->parent_stack_used = 0;
2093     /* bpo-31095: UnTrack is needed before calling any callbacks */
2094     PyObject_GC_UnTrack(it);
2095     while (i--)
2096         Py_XDECREF(it->parent_stack[i].parent);
2097     PyMem_Free(it->parent_stack);
2098 
2099     Py_XDECREF(it->sought_tag);
2100     Py_XDECREF(it->root_element);
2101 
2102     PyObject_GC_Del(it);
2103 }
2104 
2105 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2106 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2107 {
2108     Py_ssize_t i = it->parent_stack_used;
2109     while (i--)
2110         Py_VISIT(it->parent_stack[i].parent);
2111 
2112     Py_VISIT(it->root_element);
2113     Py_VISIT(it->sought_tag);
2114     return 0;
2115 }
2116 
2117 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2118  */
2119 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2120 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2121 {
2122     ParentLocator *item;
2123 
2124     if (it->parent_stack_used >= it->parent_stack_size) {
2125         Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2126         ParentLocator *parent_stack = it->parent_stack;
2127         PyMem_Resize(parent_stack, ParentLocator, new_size);
2128         if (parent_stack == NULL)
2129             return -1;
2130         it->parent_stack = parent_stack;
2131         it->parent_stack_size = new_size;
2132     }
2133     item = it->parent_stack + it->parent_stack_used++;
2134     Py_INCREF(parent);
2135     item->parent = parent;
2136     item->child_index = 0;
2137     return 0;
2138 }
2139 
2140 static PyObject *
elementiter_next(ElementIterObject * it)2141 elementiter_next(ElementIterObject *it)
2142 {
2143     /* Sub-element iterator.
2144      *
2145      * A short note on gettext: this function serves both the iter() and
2146      * itertext() methods to avoid code duplication. However, there are a few
2147      * small differences in the way these iterations work. Namely:
2148      *   - itertext() only yields text from nodes that have it, and continues
2149      *     iterating when a node doesn't have text (so it doesn't return any
2150      *     node like iter())
2151      *   - itertext() also has to handle tail, after finishing with all the
2152      *     children of a node.
2153      */
2154     int rc;
2155     ElementObject *elem;
2156     PyObject *text;
2157 
2158     while (1) {
2159         /* Handle the case reached in the beginning and end of iteration, where
2160          * the parent stack is empty. If root_element is NULL and we're here, the
2161          * iterator is exhausted.
2162          */
2163         if (!it->parent_stack_used) {
2164             if (!it->root_element) {
2165                 PyErr_SetNone(PyExc_StopIteration);
2166                 return NULL;
2167             }
2168 
2169             elem = it->root_element;  /* steals a reference */
2170             it->root_element = NULL;
2171         }
2172         else {
2173             /* See if there are children left to traverse in the current parent. If
2174              * yes, visit the next child. If not, pop the stack and try again.
2175              */
2176             ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2177             Py_ssize_t child_index = item->child_index;
2178             ElementObjectExtra *extra;
2179             elem = item->parent;
2180             extra = elem->extra;
2181             if (!extra || child_index >= extra->length) {
2182                 it->parent_stack_used--;
2183                 /* Note that extra condition on it->parent_stack_used here;
2184                  * this is because itertext() is supposed to only return *inner*
2185                  * text, not text following the element it began iteration with.
2186                  */
2187                 if (it->gettext && it->parent_stack_used) {
2188                     text = element_get_tail(elem);
2189                     goto gettext;
2190                 }
2191                 Py_DECREF(elem);
2192                 continue;
2193             }
2194 
2195             assert(Element_Check(extra->children[child_index]));
2196             elem = (ElementObject *)extra->children[child_index];
2197             item->child_index++;
2198             Py_INCREF(elem);
2199         }
2200 
2201         if (parent_stack_push_new(it, elem) < 0) {
2202             Py_DECREF(elem);
2203             PyErr_NoMemory();
2204             return NULL;
2205         }
2206         if (it->gettext) {
2207             text = element_get_text(elem);
2208             goto gettext;
2209         }
2210 
2211         if (it->sought_tag == Py_None)
2212             return (PyObject *)elem;
2213 
2214         rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2215         if (rc > 0)
2216             return (PyObject *)elem;
2217 
2218         Py_DECREF(elem);
2219         if (rc < 0)
2220             return NULL;
2221         continue;
2222 
2223 gettext:
2224         if (!text) {
2225             Py_DECREF(elem);
2226             return NULL;
2227         }
2228         if (text == Py_None) {
2229             Py_DECREF(elem);
2230         }
2231         else {
2232             Py_INCREF(text);
2233             Py_DECREF(elem);
2234             rc = PyObject_IsTrue(text);
2235             if (rc > 0)
2236                 return text;
2237             Py_DECREF(text);
2238             if (rc < 0)
2239                 return NULL;
2240         }
2241     }
2242 
2243     return NULL;
2244 }
2245 
2246 
2247 static PyTypeObject ElementIter_Type = {
2248     PyVarObject_HEAD_INIT(NULL, 0)
2249     /* Using the module's name since the pure-Python implementation does not
2250        have such a type. */
2251     "_elementtree._element_iterator",           /* tp_name */
2252     sizeof(ElementIterObject),                  /* tp_basicsize */
2253     0,                                          /* tp_itemsize */
2254     /* methods */
2255     (destructor)elementiter_dealloc,            /* tp_dealloc */
2256     0,                                          /* tp_vectorcall_offset */
2257     0,                                          /* tp_getattr */
2258     0,                                          /* tp_setattr */
2259     0,                                          /* tp_as_async */
2260     0,                                          /* tp_repr */
2261     0,                                          /* tp_as_number */
2262     0,                                          /* tp_as_sequence */
2263     0,                                          /* tp_as_mapping */
2264     0,                                          /* tp_hash */
2265     0,                                          /* tp_call */
2266     0,                                          /* tp_str */
2267     0,                                          /* tp_getattro */
2268     0,                                          /* tp_setattro */
2269     0,                                          /* tp_as_buffer */
2270     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2271     0,                                          /* tp_doc */
2272     (traverseproc)elementiter_traverse,         /* tp_traverse */
2273     0,                                          /* tp_clear */
2274     0,                                          /* tp_richcompare */
2275     0,                                          /* tp_weaklistoffset */
2276     PyObject_SelfIter,                          /* tp_iter */
2277     (iternextfunc)elementiter_next,             /* tp_iternext */
2278     0,                                          /* tp_methods */
2279     0,                                          /* tp_members */
2280     0,                                          /* tp_getset */
2281     0,                                          /* tp_base */
2282     0,                                          /* tp_dict */
2283     0,                                          /* tp_descr_get */
2284     0,                                          /* tp_descr_set */
2285     0,                                          /* tp_dictoffset */
2286     0,                                          /* tp_init */
2287     0,                                          /* tp_alloc */
2288     0,                                          /* tp_new */
2289 };
2290 
2291 #define INIT_PARENT_STACK_SIZE 8
2292 
2293 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2294 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2295 {
2296     ElementIterObject *it;
2297 
2298     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2299     if (!it)
2300         return NULL;
2301 
2302     Py_INCREF(tag);
2303     it->sought_tag = tag;
2304     it->gettext = gettext;
2305     Py_INCREF(self);
2306     it->root_element = self;
2307 
2308     it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2309     if (it->parent_stack == NULL) {
2310         Py_DECREF(it);
2311         PyErr_NoMemory();
2312         return NULL;
2313     }
2314     it->parent_stack_used = 0;
2315     it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2316 
2317     PyObject_GC_Track(it);
2318 
2319     return (PyObject *)it;
2320 }
2321 
2322 
2323 /* ==================================================================== */
2324 /* the tree builder type */
2325 
2326 typedef struct {
2327     PyObject_HEAD
2328 
2329     PyObject *root; /* root node (first created node) */
2330 
2331     PyObject *this; /* current node */
2332     PyObject *last; /* most recently created node */
2333     PyObject *last_for_tail; /* most recently created node that takes a tail */
2334 
2335     PyObject *data; /* data collector (string or list), or NULL */
2336 
2337     PyObject *stack; /* element stack */
2338     Py_ssize_t index; /* current stack size (0 means empty) */
2339 
2340     PyObject *element_factory;
2341     PyObject *comment_factory;
2342     PyObject *pi_factory;
2343 
2344     /* element tracing */
2345     PyObject *events_append; /* the append method of the list of events, or NULL */
2346     PyObject *start_event_obj; /* event objects (NULL to ignore) */
2347     PyObject *end_event_obj;
2348     PyObject *start_ns_event_obj;
2349     PyObject *end_ns_event_obj;
2350     PyObject *comment_event_obj;
2351     PyObject *pi_event_obj;
2352 
2353     char insert_comments;
2354     char insert_pis;
2355 } TreeBuilderObject;
2356 
2357 #define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2358 
2359 /* -------------------------------------------------------------------- */
2360 /* constructor and destructor */
2361 
2362 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2363 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2364 {
2365     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2366     if (t != NULL) {
2367         t->root = NULL;
2368 
2369         Py_INCREF(Py_None);
2370         t->this = Py_None;
2371         Py_INCREF(Py_None);
2372         t->last = Py_None;
2373 
2374         t->data = NULL;
2375         t->element_factory = NULL;
2376         t->comment_factory = NULL;
2377         t->pi_factory = NULL;
2378         t->stack = PyList_New(20);
2379         if (!t->stack) {
2380             Py_DECREF(t->this);
2381             Py_DECREF(t->last);
2382             Py_DECREF((PyObject *) t);
2383             return NULL;
2384         }
2385         t->index = 0;
2386 
2387         t->events_append = NULL;
2388         t->start_event_obj = t->end_event_obj = NULL;
2389         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2390         t->comment_event_obj = t->pi_event_obj = NULL;
2391         t->insert_comments = t->insert_pis = 0;
2392     }
2393     return (PyObject *)t;
2394 }
2395 
2396 /*[clinic input]
2397 _elementtree.TreeBuilder.__init__
2398 
2399     element_factory: object = None
2400     *
2401     comment_factory: object = None
2402     pi_factory: object = None
2403     insert_comments: bool = False
2404     insert_pis: bool = False
2405 
2406 [clinic start generated code]*/
2407 
2408 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2409 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2410                                        PyObject *element_factory,
2411                                        PyObject *comment_factory,
2412                                        PyObject *pi_factory,
2413                                        int insert_comments, int insert_pis)
2414 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2415 {
2416     if (element_factory != Py_None) {
2417         Py_INCREF(element_factory);
2418         Py_XSETREF(self->element_factory, element_factory);
2419     } else {
2420         Py_CLEAR(self->element_factory);
2421     }
2422 
2423     if (comment_factory == Py_None) {
2424         elementtreestate *st = ET_STATE_GLOBAL;
2425         comment_factory = st->comment_factory;
2426     }
2427     if (comment_factory) {
2428         Py_INCREF(comment_factory);
2429         Py_XSETREF(self->comment_factory, comment_factory);
2430         self->insert_comments = insert_comments;
2431     } else {
2432         Py_CLEAR(self->comment_factory);
2433         self->insert_comments = 0;
2434     }
2435 
2436     if (pi_factory == Py_None) {
2437         elementtreestate *st = ET_STATE_GLOBAL;
2438         pi_factory = st->pi_factory;
2439     }
2440     if (pi_factory) {
2441         Py_INCREF(pi_factory);
2442         Py_XSETREF(self->pi_factory, pi_factory);
2443         self->insert_pis = insert_pis;
2444     } else {
2445         Py_CLEAR(self->pi_factory);
2446         self->insert_pis = 0;
2447     }
2448 
2449     return 0;
2450 }
2451 
2452 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2453 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2454 {
2455     Py_VISIT(self->pi_event_obj);
2456     Py_VISIT(self->comment_event_obj);
2457     Py_VISIT(self->end_ns_event_obj);
2458     Py_VISIT(self->start_ns_event_obj);
2459     Py_VISIT(self->end_event_obj);
2460     Py_VISIT(self->start_event_obj);
2461     Py_VISIT(self->events_append);
2462     Py_VISIT(self->root);
2463     Py_VISIT(self->this);
2464     Py_VISIT(self->last);
2465     Py_VISIT(self->last_for_tail);
2466     Py_VISIT(self->data);
2467     Py_VISIT(self->stack);
2468     Py_VISIT(self->pi_factory);
2469     Py_VISIT(self->comment_factory);
2470     Py_VISIT(self->element_factory);
2471     return 0;
2472 }
2473 
2474 static int
treebuilder_gc_clear(TreeBuilderObject * self)2475 treebuilder_gc_clear(TreeBuilderObject *self)
2476 {
2477     Py_CLEAR(self->pi_event_obj);
2478     Py_CLEAR(self->comment_event_obj);
2479     Py_CLEAR(self->end_ns_event_obj);
2480     Py_CLEAR(self->start_ns_event_obj);
2481     Py_CLEAR(self->end_event_obj);
2482     Py_CLEAR(self->start_event_obj);
2483     Py_CLEAR(self->events_append);
2484     Py_CLEAR(self->stack);
2485     Py_CLEAR(self->data);
2486     Py_CLEAR(self->last);
2487     Py_CLEAR(self->last_for_tail);
2488     Py_CLEAR(self->this);
2489     Py_CLEAR(self->pi_factory);
2490     Py_CLEAR(self->comment_factory);
2491     Py_CLEAR(self->element_factory);
2492     Py_CLEAR(self->root);
2493     return 0;
2494 }
2495 
2496 static void
treebuilder_dealloc(TreeBuilderObject * self)2497 treebuilder_dealloc(TreeBuilderObject *self)
2498 {
2499     PyObject_GC_UnTrack(self);
2500     treebuilder_gc_clear(self);
2501     Py_TYPE(self)->tp_free((PyObject *)self);
2502 }
2503 
2504 /* -------------------------------------------------------------------- */
2505 /* helpers for handling of arbitrary element-like objects */
2506 
2507 /*[clinic input]
2508 _elementtree._set_factories
2509 
2510     comment_factory: object
2511     pi_factory: object
2512     /
2513 
2514 Change the factories used to create comments and processing instructions.
2515 
2516 For internal use only.
2517 [clinic start generated code]*/
2518 
2519 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2520 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2521                                  PyObject *pi_factory)
2522 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2523 {
2524     elementtreestate *st = ET_STATE_GLOBAL;
2525     PyObject *old;
2526 
2527     if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2528         PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2529                      Py_TYPE(comment_factory)->tp_name);
2530         return NULL;
2531     }
2532     if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2533         PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2534                      Py_TYPE(pi_factory)->tp_name);
2535         return NULL;
2536     }
2537 
2538     old = PyTuple_Pack(2,
2539         st->comment_factory ? st->comment_factory : Py_None,
2540         st->pi_factory ? st->pi_factory : Py_None);
2541 
2542     if (comment_factory == Py_None) {
2543         Py_CLEAR(st->comment_factory);
2544     } else {
2545         Py_INCREF(comment_factory);
2546         Py_XSETREF(st->comment_factory, comment_factory);
2547     }
2548     if (pi_factory == Py_None) {
2549         Py_CLEAR(st->pi_factory);
2550     } else {
2551         Py_INCREF(pi_factory);
2552         Py_XSETREF(st->pi_factory, pi_factory);
2553     }
2554 
2555     return old;
2556 }
2557 
2558 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2559 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2560                                         PyObject **dest, _Py_Identifier *name)
2561 {
2562     /* Fast paths for the "almost always" cases. */
2563     if (Element_CheckExact(element)) {
2564         PyObject *dest_obj = JOIN_OBJ(*dest);
2565         if (dest_obj == Py_None) {
2566             *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2567             *data = NULL;
2568             Py_DECREF(dest_obj);
2569             return 0;
2570         }
2571         else if (JOIN_GET(*dest)) {
2572             if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2573                 return -1;
2574             }
2575             Py_CLEAR(*data);
2576             return 0;
2577         }
2578     }
2579 
2580     /*  Fallback for the non-Element / non-trivial cases. */
2581     {
2582         int r;
2583         PyObject* joined;
2584         PyObject* previous = _PyObject_GetAttrId(element, name);
2585         if (!previous)
2586             return -1;
2587         joined = list_join(*data);
2588         if (!joined) {
2589             Py_DECREF(previous);
2590             return -1;
2591         }
2592         if (previous != Py_None) {
2593             PyObject *tmp = PyNumber_Add(previous, joined);
2594             Py_DECREF(joined);
2595             Py_DECREF(previous);
2596             if (!tmp)
2597                 return -1;
2598             joined = tmp;
2599         } else {
2600             Py_DECREF(previous);
2601         }
2602 
2603         r = _PyObject_SetAttrId(element, name, joined);
2604         Py_DECREF(joined);
2605         if (r < 0)
2606             return -1;
2607         Py_CLEAR(*data);
2608         return 0;
2609     }
2610 }
2611 
2612 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2613 treebuilder_flush_data(TreeBuilderObject* self)
2614 {
2615     if (!self->data) {
2616         return 0;
2617     }
2618 
2619     if (!self->last_for_tail) {
2620         PyObject *element = self->last;
2621         _Py_IDENTIFIER(text);
2622         return treebuilder_extend_element_text_or_tail(
2623                 element, &self->data,
2624                 &((ElementObject *) element)->text, &PyId_text);
2625     }
2626     else {
2627         PyObject *element = self->last_for_tail;
2628         _Py_IDENTIFIER(tail);
2629         return treebuilder_extend_element_text_or_tail(
2630                 element, &self->data,
2631                 &((ElementObject *) element)->tail, &PyId_tail);
2632     }
2633 }
2634 
2635 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2636 treebuilder_add_subelement(PyObject *element, PyObject *child)
2637 {
2638     _Py_IDENTIFIER(append);
2639     if (Element_CheckExact(element)) {
2640         ElementObject *elem = (ElementObject *) element;
2641         return element_add_subelement(elem, child);
2642     }
2643     else {
2644         PyObject *res;
2645         res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2646         if (res == NULL)
2647             return -1;
2648         Py_DECREF(res);
2649         return 0;
2650     }
2651 }
2652 
2653 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2654 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2655                          PyObject *node)
2656 {
2657     if (action != NULL) {
2658         PyObject *res;
2659         PyObject *event = PyTuple_Pack(2, action, node);
2660         if (event == NULL)
2661             return -1;
2662         res = PyObject_CallOneArg(self->events_append, event);
2663         Py_DECREF(event);
2664         if (res == NULL)
2665             return -1;
2666         Py_DECREF(res);
2667     }
2668     return 0;
2669 }
2670 
2671 /* -------------------------------------------------------------------- */
2672 /* handlers */
2673 
2674 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2675 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2676                          PyObject* attrib)
2677 {
2678     PyObject* node;
2679     PyObject* this;
2680     elementtreestate *st = ET_STATE_GLOBAL;
2681 
2682     if (treebuilder_flush_data(self) < 0) {
2683         return NULL;
2684     }
2685 
2686     if (!self->element_factory) {
2687         node = create_new_element(tag, attrib);
2688     } else if (attrib == NULL) {
2689         attrib = PyDict_New();
2690         if (!attrib)
2691             return NULL;
2692         node = PyObject_CallFunctionObjArgs(self->element_factory,
2693                                             tag, attrib, NULL);
2694         Py_DECREF(attrib);
2695     }
2696     else {
2697         node = PyObject_CallFunctionObjArgs(self->element_factory,
2698                                             tag, attrib, NULL);
2699     }
2700     if (!node) {
2701         return NULL;
2702     }
2703 
2704     this = self->this;
2705     Py_CLEAR(self->last_for_tail);
2706 
2707     if (this != Py_None) {
2708         if (treebuilder_add_subelement(this, node) < 0)
2709             goto error;
2710     } else {
2711         if (self->root) {
2712             PyErr_SetString(
2713                 st->parseerror_obj,
2714                 "multiple elements on top level"
2715                 );
2716             goto error;
2717         }
2718         Py_INCREF(node);
2719         self->root = node;
2720     }
2721 
2722     if (self->index < PyList_GET_SIZE(self->stack)) {
2723         if (PyList_SetItem(self->stack, self->index, this) < 0)
2724             goto error;
2725         Py_INCREF(this);
2726     } else {
2727         if (PyList_Append(self->stack, this) < 0)
2728             goto error;
2729     }
2730     self->index++;
2731 
2732     Py_INCREF(node);
2733     Py_SETREF(self->this, node);
2734     Py_INCREF(node);
2735     Py_SETREF(self->last, node);
2736 
2737     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2738         goto error;
2739 
2740     return node;
2741 
2742   error:
2743     Py_DECREF(node);
2744     return NULL;
2745 }
2746 
2747 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2748 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2749 {
2750     if (!self->data) {
2751         if (self->last == Py_None) {
2752             /* ignore calls to data before the first call to start */
2753             Py_RETURN_NONE;
2754         }
2755         /* store the first item as is */
2756         Py_INCREF(data); self->data = data;
2757     } else {
2758         /* more than one item; use a list to collect items */
2759         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2760             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2761             /* XXX this code path unused in Python 3? */
2762             /* expat often generates single character data sections; handle
2763                the most common case by resizing the existing string... */
2764             Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2765             if (_PyBytes_Resize(&self->data, size + 1) < 0)
2766                 return NULL;
2767             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2768         } else if (PyList_CheckExact(self->data)) {
2769             if (PyList_Append(self->data, data) < 0)
2770                 return NULL;
2771         } else {
2772             PyObject* list = PyList_New(2);
2773             if (!list)
2774                 return NULL;
2775             PyList_SET_ITEM(list, 0, self->data);
2776             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2777             self->data = list;
2778         }
2779     }
2780 
2781     Py_RETURN_NONE;
2782 }
2783 
2784 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2785 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2786 {
2787     PyObject* item;
2788 
2789     if (treebuilder_flush_data(self) < 0) {
2790         return NULL;
2791     }
2792 
2793     if (self->index == 0) {
2794         PyErr_SetString(
2795             PyExc_IndexError,
2796             "pop from empty stack"
2797             );
2798         return NULL;
2799     }
2800 
2801     item = self->last;
2802     self->last = self->this;
2803     Py_INCREF(self->last);
2804     Py_XSETREF(self->last_for_tail, self->last);
2805     self->index--;
2806     self->this = PyList_GET_ITEM(self->stack, self->index);
2807     Py_INCREF(self->this);
2808     Py_DECREF(item);
2809 
2810     if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2811         return NULL;
2812 
2813     Py_INCREF(self->last);
2814     return (PyObject*) self->last;
2815 }
2816 
2817 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2818 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2819 {
2820     PyObject* comment;
2821     PyObject* this;
2822 
2823     if (treebuilder_flush_data(self) < 0) {
2824         return NULL;
2825     }
2826 
2827     if (self->comment_factory) {
2828         comment = PyObject_CallOneArg(self->comment_factory, text);
2829         if (!comment)
2830             return NULL;
2831 
2832         this = self->this;
2833         if (self->insert_comments && this != Py_None) {
2834             if (treebuilder_add_subelement(this, comment) < 0)
2835                 goto error;
2836             Py_INCREF(comment);
2837             Py_XSETREF(self->last_for_tail, comment);
2838         }
2839     } else {
2840         Py_INCREF(text);
2841         comment = text;
2842     }
2843 
2844     if (self->events_append && self->comment_event_obj) {
2845         if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2846             goto error;
2847     }
2848 
2849     return comment;
2850 
2851   error:
2852     Py_DECREF(comment);
2853     return NULL;
2854 }
2855 
2856 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2857 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2858 {
2859     PyObject* pi;
2860     PyObject* this;
2861     PyObject* stack[2] = {target, text};
2862 
2863     if (treebuilder_flush_data(self) < 0) {
2864         return NULL;
2865     }
2866 
2867     if (self->pi_factory) {
2868         pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2869         if (!pi) {
2870             return NULL;
2871         }
2872 
2873         this = self->this;
2874         if (self->insert_pis && this != Py_None) {
2875             if (treebuilder_add_subelement(this, pi) < 0)
2876                 goto error;
2877             Py_INCREF(pi);
2878             Py_XSETREF(self->last_for_tail, pi);
2879         }
2880     } else {
2881         pi = PyTuple_Pack(2, target, text);
2882         if (!pi) {
2883             return NULL;
2884         }
2885     }
2886 
2887     if (self->events_append && self->pi_event_obj) {
2888         if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2889             goto error;
2890     }
2891 
2892     return pi;
2893 
2894   error:
2895     Py_DECREF(pi);
2896     return NULL;
2897 }
2898 
2899 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2900 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2901 {
2902     PyObject* parcel;
2903 
2904     if (self->events_append && self->start_ns_event_obj) {
2905         parcel = PyTuple_Pack(2, prefix, uri);
2906         if (!parcel) {
2907             return NULL;
2908         }
2909 
2910         if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2911             Py_DECREF(parcel);
2912             return NULL;
2913         }
2914         Py_DECREF(parcel);
2915     }
2916 
2917     Py_RETURN_NONE;
2918 }
2919 
2920 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2921 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2922 {
2923     if (self->events_append && self->end_ns_event_obj) {
2924         if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2925             return NULL;
2926         }
2927     }
2928 
2929     Py_RETURN_NONE;
2930 }
2931 
2932 /* -------------------------------------------------------------------- */
2933 /* methods (in alphabetical order) */
2934 
2935 /*[clinic input]
2936 _elementtree.TreeBuilder.data
2937 
2938     data: object
2939     /
2940 
2941 [clinic start generated code]*/
2942 
2943 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2944 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2945 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2946 {
2947     return treebuilder_handle_data(self, data);
2948 }
2949 
2950 /*[clinic input]
2951 _elementtree.TreeBuilder.end
2952 
2953     tag: object
2954     /
2955 
2956 [clinic start generated code]*/
2957 
2958 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2959 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2960 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2961 {
2962     return treebuilder_handle_end(self, tag);
2963 }
2964 
2965 /*[clinic input]
2966 _elementtree.TreeBuilder.comment
2967 
2968     text: object
2969     /
2970 
2971 [clinic start generated code]*/
2972 
2973 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)2974 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2975 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2976 {
2977     return treebuilder_handle_comment(self, text);
2978 }
2979 
2980 /*[clinic input]
2981 _elementtree.TreeBuilder.pi
2982 
2983     target: object
2984     text: object = None
2985     /
2986 
2987 [clinic start generated code]*/
2988 
2989 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)2990 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2991                                  PyObject *text)
2992 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2993 {
2994     return treebuilder_handle_pi(self, target, text);
2995 }
2996 
2997 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)2998 treebuilder_done(TreeBuilderObject* self)
2999 {
3000     PyObject* res;
3001 
3002     /* FIXME: check stack size? */
3003 
3004     if (self->root)
3005         res = self->root;
3006     else
3007         res = Py_None;
3008 
3009     Py_INCREF(res);
3010     return res;
3011 }
3012 
3013 /*[clinic input]
3014 _elementtree.TreeBuilder.close
3015 
3016 [clinic start generated code]*/
3017 
3018 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3019 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3020 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3021 {
3022     return treebuilder_done(self);
3023 }
3024 
3025 /*[clinic input]
3026 _elementtree.TreeBuilder.start
3027 
3028     tag: object
3029     attrs: object(subclass_of='&PyDict_Type')
3030     /
3031 
3032 [clinic start generated code]*/
3033 
3034 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3035 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3036                                     PyObject *attrs)
3037 /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3038 {
3039     return treebuilder_handle_start(self, tag, attrs);
3040 }
3041 
3042 /* ==================================================================== */
3043 /* the expat interface */
3044 
3045 #include "expat.h"
3046 #include "pyexpat.h"
3047 
3048 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3049  * cached globally without being in per-module state.
3050  */
3051 static struct PyExpat_CAPI *expat_capi;
3052 #define EXPAT(func) (expat_capi->func)
3053 
3054 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3055     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3056 
3057 typedef struct {
3058     PyObject_HEAD
3059 
3060     XML_Parser parser;
3061 
3062     PyObject *target;
3063     PyObject *entity;
3064 
3065     PyObject *names;
3066 
3067     PyObject *handle_start_ns;
3068     PyObject *handle_end_ns;
3069     PyObject *handle_start;
3070     PyObject *handle_data;
3071     PyObject *handle_end;
3072 
3073     PyObject *handle_comment;
3074     PyObject *handle_pi;
3075     PyObject *handle_doctype;
3076 
3077     PyObject *handle_close;
3078 
3079 } XMLParserObject;
3080 
3081 /* helpers */
3082 
3083 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3084 makeuniversal(XMLParserObject* self, const char* string)
3085 {
3086     /* convert a UTF-8 tag/attribute name from the expat parser
3087        to a universal name string */
3088 
3089     Py_ssize_t size = (Py_ssize_t) strlen(string);
3090     PyObject* key;
3091     PyObject* value;
3092 
3093     /* look the 'raw' name up in the names dictionary */
3094     key = PyBytes_FromStringAndSize(string, size);
3095     if (!key)
3096         return NULL;
3097 
3098     value = PyDict_GetItemWithError(self->names, key);
3099 
3100     if (value) {
3101         Py_INCREF(value);
3102     }
3103     else if (!PyErr_Occurred()) {
3104         /* new name.  convert to universal name, and decode as
3105            necessary */
3106 
3107         PyObject* tag;
3108         char* p;
3109         Py_ssize_t i;
3110 
3111         /* look for namespace separator */
3112         for (i = 0; i < size; i++)
3113             if (string[i] == '}')
3114                 break;
3115         if (i != size) {
3116             /* convert to universal name */
3117             tag = PyBytes_FromStringAndSize(NULL, size+1);
3118             if (tag == NULL) {
3119                 Py_DECREF(key);
3120                 return NULL;
3121             }
3122             p = PyBytes_AS_STRING(tag);
3123             p[0] = '{';
3124             memcpy(p+1, string, size);
3125             size++;
3126         } else {
3127             /* plain name; use key as tag */
3128             Py_INCREF(key);
3129             tag = key;
3130         }
3131 
3132         /* decode universal name */
3133         p = PyBytes_AS_STRING(tag);
3134         value = PyUnicode_DecodeUTF8(p, size, "strict");
3135         Py_DECREF(tag);
3136         if (!value) {
3137             Py_DECREF(key);
3138             return NULL;
3139         }
3140 
3141         /* add to names dictionary */
3142         if (PyDict_SetItem(self->names, key, value) < 0) {
3143             Py_DECREF(key);
3144             Py_DECREF(value);
3145             return NULL;
3146         }
3147     }
3148 
3149     Py_DECREF(key);
3150     return value;
3151 }
3152 
3153 /* Set the ParseError exception with the given parameters.
3154  * If message is not NULL, it's used as the error string. Otherwise, the
3155  * message string is the default for the given error_code.
3156 */
3157 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3158 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3159                 const char *message)
3160 {
3161     PyObject *errmsg, *error, *position, *code;
3162     elementtreestate *st = ET_STATE_GLOBAL;
3163 
3164     errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3165                 message ? message : EXPAT(ErrorString)(error_code),
3166                 line, column);
3167     if (errmsg == NULL)
3168         return;
3169 
3170     error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3171     Py_DECREF(errmsg);
3172     if (!error)
3173         return;
3174 
3175     /* Add code and position attributes */
3176     code = PyLong_FromLong((long)error_code);
3177     if (!code) {
3178         Py_DECREF(error);
3179         return;
3180     }
3181     if (PyObject_SetAttrString(error, "code", code) == -1) {
3182         Py_DECREF(error);
3183         Py_DECREF(code);
3184         return;
3185     }
3186     Py_DECREF(code);
3187 
3188     position = Py_BuildValue("(nn)", line, column);
3189     if (!position) {
3190         Py_DECREF(error);
3191         return;
3192     }
3193     if (PyObject_SetAttrString(error, "position", position) == -1) {
3194         Py_DECREF(error);
3195         Py_DECREF(position);
3196         return;
3197     }
3198     Py_DECREF(position);
3199 
3200     PyErr_SetObject(st->parseerror_obj, error);
3201     Py_DECREF(error);
3202 }
3203 
3204 /* -------------------------------------------------------------------- */
3205 /* handlers */
3206 
3207 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3208 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3209                       int data_len)
3210 {
3211     PyObject* key;
3212     PyObject* value;
3213     PyObject* res;
3214 
3215     if (data_len < 2 || data_in[0] != '&')
3216         return;
3217 
3218     if (PyErr_Occurred())
3219         return;
3220 
3221     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3222     if (!key)
3223         return;
3224 
3225     value = PyDict_GetItemWithError(self->entity, key);
3226 
3227     if (value) {
3228         if (TreeBuilder_CheckExact(self->target))
3229             res = treebuilder_handle_data(
3230                 (TreeBuilderObject*) self->target, value
3231                 );
3232         else if (self->handle_data)
3233             res = PyObject_CallOneArg(self->handle_data, value);
3234         else
3235             res = NULL;
3236         Py_XDECREF(res);
3237     } else if (!PyErr_Occurred()) {
3238         /* Report the first error, not the last */
3239         char message[128] = "undefined entity ";
3240         strncat(message, data_in, data_len < 100?data_len:100);
3241         expat_set_error(
3242             XML_ERROR_UNDEFINED_ENTITY,
3243             EXPAT(GetErrorLineNumber)(self->parser),
3244             EXPAT(GetErrorColumnNumber)(self->parser),
3245             message
3246             );
3247     }
3248 
3249     Py_DECREF(key);
3250 }
3251 
3252 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3253 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3254                     const XML_Char **attrib_in)
3255 {
3256     PyObject* res;
3257     PyObject* tag;
3258     PyObject* attrib;
3259     int ok;
3260 
3261     if (PyErr_Occurred())
3262         return;
3263 
3264     /* tag name */
3265     tag = makeuniversal(self, tag_in);
3266     if (!tag)
3267         return; /* parser will look for errors */
3268 
3269     /* attributes */
3270     if (attrib_in[0]) {
3271         attrib = PyDict_New();
3272         if (!attrib) {
3273             Py_DECREF(tag);
3274             return;
3275         }
3276         while (attrib_in[0] && attrib_in[1]) {
3277             PyObject* key = makeuniversal(self, attrib_in[0]);
3278             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3279             if (!key || !value) {
3280                 Py_XDECREF(value);
3281                 Py_XDECREF(key);
3282                 Py_DECREF(attrib);
3283                 Py_DECREF(tag);
3284                 return;
3285             }
3286             ok = PyDict_SetItem(attrib, key, value);
3287             Py_DECREF(value);
3288             Py_DECREF(key);
3289             if (ok < 0) {
3290                 Py_DECREF(attrib);
3291                 Py_DECREF(tag);
3292                 return;
3293             }
3294             attrib_in += 2;
3295         }
3296     } else {
3297         attrib = NULL;
3298     }
3299 
3300     if (TreeBuilder_CheckExact(self->target)) {
3301         /* shortcut */
3302         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3303                                        tag, attrib);
3304     }
3305     else if (self->handle_start) {
3306         if (attrib == NULL) {
3307             attrib = PyDict_New();
3308             if (!attrib) {
3309                 Py_DECREF(tag);
3310                 return;
3311             }
3312         }
3313         res = PyObject_CallFunctionObjArgs(self->handle_start,
3314                                            tag, attrib, NULL);
3315     } else
3316         res = NULL;
3317 
3318     Py_DECREF(tag);
3319     Py_XDECREF(attrib);
3320 
3321     Py_XDECREF(res);
3322 }
3323 
3324 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3325 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3326                    int data_len)
3327 {
3328     PyObject* data;
3329     PyObject* res;
3330 
3331     if (PyErr_Occurred())
3332         return;
3333 
3334     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3335     if (!data)
3336         return; /* parser will look for errors */
3337 
3338     if (TreeBuilder_CheckExact(self->target))
3339         /* shortcut */
3340         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3341     else if (self->handle_data)
3342         res = PyObject_CallOneArg(self->handle_data, data);
3343     else
3344         res = NULL;
3345 
3346     Py_DECREF(data);
3347 
3348     Py_XDECREF(res);
3349 }
3350 
3351 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3352 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3353 {
3354     PyObject* tag;
3355     PyObject* res = NULL;
3356 
3357     if (PyErr_Occurred())
3358         return;
3359 
3360     if (TreeBuilder_CheckExact(self->target))
3361         /* shortcut */
3362         /* the standard tree builder doesn't look at the end tag */
3363         res = treebuilder_handle_end(
3364             (TreeBuilderObject*) self->target, Py_None
3365             );
3366     else if (self->handle_end) {
3367         tag = makeuniversal(self, tag_in);
3368         if (tag) {
3369             res = PyObject_CallOneArg(self->handle_end, tag);
3370             Py_DECREF(tag);
3371         }
3372     }
3373 
3374     Py_XDECREF(res);
3375 }
3376 
3377 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3378 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3379                        const XML_Char *uri_in)
3380 {
3381     PyObject* res = NULL;
3382     PyObject* uri;
3383     PyObject* prefix;
3384     PyObject* stack[2];
3385 
3386     if (PyErr_Occurred())
3387         return;
3388 
3389     if (!uri_in)
3390         uri_in = "";
3391     if (!prefix_in)
3392         prefix_in = "";
3393 
3394     if (TreeBuilder_CheckExact(self->target)) {
3395         /* shortcut - TreeBuilder does not actually implement .start_ns() */
3396         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3397 
3398         if (target->events_append && target->start_ns_event_obj) {
3399             prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3400             if (!prefix)
3401                 return;
3402             uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3403             if (!uri) {
3404                 Py_DECREF(prefix);
3405                 return;
3406             }
3407 
3408             res = treebuilder_handle_start_ns(target, prefix, uri);
3409             Py_DECREF(uri);
3410             Py_DECREF(prefix);
3411         }
3412     } else if (self->handle_start_ns) {
3413         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3414         if (!prefix)
3415             return;
3416         uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3417         if (!uri) {
3418             Py_DECREF(prefix);
3419             return;
3420         }
3421 
3422         stack[0] = prefix;
3423         stack[1] = uri;
3424         res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3425         Py_DECREF(uri);
3426         Py_DECREF(prefix);
3427     }
3428 
3429     Py_XDECREF(res);
3430 }
3431 
3432 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3433 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3434 {
3435     PyObject *res = NULL;
3436     PyObject* prefix;
3437 
3438     if (PyErr_Occurred())
3439         return;
3440 
3441     if (!prefix_in)
3442         prefix_in = "";
3443 
3444     if (TreeBuilder_CheckExact(self->target)) {
3445         /* shortcut - TreeBuilder does not actually implement .end_ns() */
3446         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3447 
3448         if (target->events_append && target->end_ns_event_obj) {
3449             res = treebuilder_handle_end_ns(target, Py_None);
3450         }
3451     } else if (self->handle_end_ns) {
3452         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3453         if (!prefix)
3454             return;
3455 
3456         res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3457         Py_DECREF(prefix);
3458     }
3459 
3460     Py_XDECREF(res);
3461 }
3462 
3463 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3464 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3465 {
3466     PyObject* comment;
3467     PyObject* res;
3468 
3469     if (PyErr_Occurred())
3470         return;
3471 
3472     if (TreeBuilder_CheckExact(self->target)) {
3473         /* shortcut */
3474         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3475 
3476         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3477         if (!comment)
3478             return; /* parser will look for errors */
3479 
3480         res = treebuilder_handle_comment(target,  comment);
3481         Py_XDECREF(res);
3482         Py_DECREF(comment);
3483     } else if (self->handle_comment) {
3484         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3485         if (!comment)
3486             return;
3487 
3488         res = PyObject_CallOneArg(self->handle_comment, comment);
3489         Py_XDECREF(res);
3490         Py_DECREF(comment);
3491     }
3492 }
3493 
3494 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3495 expat_start_doctype_handler(XMLParserObject *self,
3496                             const XML_Char *doctype_name,
3497                             const XML_Char *sysid,
3498                             const XML_Char *pubid,
3499                             int has_internal_subset)
3500 {
3501     _Py_IDENTIFIER(doctype);
3502     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3503     PyObject *res;
3504 
3505     if (PyErr_Occurred())
3506         return;
3507 
3508     doctype_name_obj = makeuniversal(self, doctype_name);
3509     if (!doctype_name_obj)
3510         return;
3511 
3512     if (sysid) {
3513         sysid_obj = makeuniversal(self, sysid);
3514         if (!sysid_obj) {
3515             Py_DECREF(doctype_name_obj);
3516             return;
3517         }
3518     } else {
3519         Py_INCREF(Py_None);
3520         sysid_obj = Py_None;
3521     }
3522 
3523     if (pubid) {
3524         pubid_obj = makeuniversal(self, pubid);
3525         if (!pubid_obj) {
3526             Py_DECREF(doctype_name_obj);
3527             Py_DECREF(sysid_obj);
3528             return;
3529         }
3530     } else {
3531         Py_INCREF(Py_None);
3532         pubid_obj = Py_None;
3533     }
3534 
3535     /* If the target has a handler for doctype, call it. */
3536     if (self->handle_doctype) {
3537         res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3538                                            doctype_name_obj, pubid_obj,
3539                                            sysid_obj, NULL);
3540         Py_XDECREF(res);
3541     }
3542     else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3543         (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3544                 "The doctype() method of XMLParser is ignored.  "
3545                 "Define doctype() method on the TreeBuilder target.",
3546                 1);
3547         Py_DECREF(res);
3548     }
3549 
3550     Py_DECREF(doctype_name_obj);
3551     Py_DECREF(pubid_obj);
3552     Py_DECREF(sysid_obj);
3553 }
3554 
3555 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3556 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3557                  const XML_Char* data_in)
3558 {
3559     PyObject* pi_target;
3560     PyObject* data;
3561     PyObject* res;
3562     PyObject* stack[2];
3563 
3564     if (PyErr_Occurred())
3565         return;
3566 
3567     if (TreeBuilder_CheckExact(self->target)) {
3568         /* shortcut */
3569         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3570 
3571         if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3572             pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573             if (!pi_target)
3574                 goto error;
3575             data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576             if (!data)
3577                 goto error;
3578             res = treebuilder_handle_pi(target, pi_target, data);
3579             Py_XDECREF(res);
3580             Py_DECREF(data);
3581             Py_DECREF(pi_target);
3582         }
3583     } else if (self->handle_pi) {
3584         pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3585         if (!pi_target)
3586             goto error;
3587         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3588         if (!data)
3589             goto error;
3590 
3591         stack[0] = pi_target;
3592         stack[1] = data;
3593         res = _PyObject_FastCall(self->handle_pi, stack, 2);
3594         Py_XDECREF(res);
3595         Py_DECREF(data);
3596         Py_DECREF(pi_target);
3597     }
3598 
3599     return;
3600 
3601   error:
3602     Py_XDECREF(pi_target);
3603     return;
3604 }
3605 
3606 /* -------------------------------------------------------------------- */
3607 
3608 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3609 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3610 {
3611     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3612     if (self) {
3613         self->parser = NULL;
3614         self->target = self->entity = self->names = NULL;
3615         self->handle_start_ns = self->handle_end_ns = NULL;
3616         self->handle_start = self->handle_data = self->handle_end = NULL;
3617         self->handle_comment = self->handle_pi = self->handle_close = NULL;
3618         self->handle_doctype = NULL;
3619     }
3620     return (PyObject *)self;
3621 }
3622 
3623 static int
ignore_attribute_error(PyObject * value)3624 ignore_attribute_error(PyObject *value)
3625 {
3626     if (value == NULL) {
3627         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3628             return -1;
3629         }
3630         PyErr_Clear();
3631     }
3632     return 0;
3633 }
3634 
3635 /*[clinic input]
3636 _elementtree.XMLParser.__init__
3637 
3638     *
3639     target: object = NULL
3640     encoding: str(accept={str, NoneType}) = None
3641 
3642 [clinic start generated code]*/
3643 
3644 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3645 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3646                                      const char *encoding)
3647 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
3648 {
3649     self->entity = PyDict_New();
3650     if (!self->entity)
3651         return -1;
3652 
3653     self->names = PyDict_New();
3654     if (!self->names) {
3655         Py_CLEAR(self->entity);
3656         return -1;
3657     }
3658 
3659     self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3660     if (!self->parser) {
3661         Py_CLEAR(self->entity);
3662         Py_CLEAR(self->names);
3663         PyErr_NoMemory();
3664         return -1;
3665     }
3666     /* expat < 2.1.0 has no XML_SetHashSalt() */
3667     if (EXPAT(SetHashSalt) != NULL) {
3668         EXPAT(SetHashSalt)(self->parser,
3669                            (unsigned long)_Py_HashSecret.expat.hashsalt);
3670     }
3671 
3672     if (target) {
3673         Py_INCREF(target);
3674     } else {
3675         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3676         if (!target) {
3677             Py_CLEAR(self->entity);
3678             Py_CLEAR(self->names);
3679             return -1;
3680         }
3681     }
3682     self->target = target;
3683 
3684     self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3685     if (ignore_attribute_error(self->handle_start_ns)) {
3686         return -1;
3687     }
3688     self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3689     if (ignore_attribute_error(self->handle_end_ns)) {
3690         return -1;
3691     }
3692     self->handle_start = PyObject_GetAttrString(target, "start");
3693     if (ignore_attribute_error(self->handle_start)) {
3694         return -1;
3695     }
3696     self->handle_data = PyObject_GetAttrString(target, "data");
3697     if (ignore_attribute_error(self->handle_data)) {
3698         return -1;
3699     }
3700     self->handle_end = PyObject_GetAttrString(target, "end");
3701     if (ignore_attribute_error(self->handle_end)) {
3702         return -1;
3703     }
3704     self->handle_comment = PyObject_GetAttrString(target, "comment");
3705     if (ignore_attribute_error(self->handle_comment)) {
3706         return -1;
3707     }
3708     self->handle_pi = PyObject_GetAttrString(target, "pi");
3709     if (ignore_attribute_error(self->handle_pi)) {
3710         return -1;
3711     }
3712     self->handle_close = PyObject_GetAttrString(target, "close");
3713     if (ignore_attribute_error(self->handle_close)) {
3714         return -1;
3715     }
3716     self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3717     if (ignore_attribute_error(self->handle_doctype)) {
3718         return -1;
3719     }
3720 
3721     /* configure parser */
3722     EXPAT(SetUserData)(self->parser, self);
3723     if (self->handle_start_ns || self->handle_end_ns)
3724         EXPAT(SetNamespaceDeclHandler)(
3725             self->parser,
3726             (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3727             (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3728             );
3729     EXPAT(SetElementHandler)(
3730         self->parser,
3731         (XML_StartElementHandler) expat_start_handler,
3732         (XML_EndElementHandler) expat_end_handler
3733         );
3734     EXPAT(SetDefaultHandlerExpand)(
3735         self->parser,
3736         (XML_DefaultHandler) expat_default_handler
3737         );
3738     EXPAT(SetCharacterDataHandler)(
3739         self->parser,
3740         (XML_CharacterDataHandler) expat_data_handler
3741         );
3742     if (self->handle_comment)
3743         EXPAT(SetCommentHandler)(
3744             self->parser,
3745             (XML_CommentHandler) expat_comment_handler
3746             );
3747     if (self->handle_pi)
3748         EXPAT(SetProcessingInstructionHandler)(
3749             self->parser,
3750             (XML_ProcessingInstructionHandler) expat_pi_handler
3751             );
3752     EXPAT(SetStartDoctypeDeclHandler)(
3753         self->parser,
3754         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3755         );
3756     EXPAT(SetUnknownEncodingHandler)(
3757         self->parser,
3758         EXPAT(DefaultUnknownEncodingHandler), NULL
3759         );
3760 
3761     return 0;
3762 }
3763 
3764 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3765 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3766 {
3767     Py_VISIT(self->handle_close);
3768     Py_VISIT(self->handle_pi);
3769     Py_VISIT(self->handle_comment);
3770     Py_VISIT(self->handle_end);
3771     Py_VISIT(self->handle_data);
3772     Py_VISIT(self->handle_start);
3773     Py_VISIT(self->handle_start_ns);
3774     Py_VISIT(self->handle_end_ns);
3775     Py_VISIT(self->handle_doctype);
3776 
3777     Py_VISIT(self->target);
3778     Py_VISIT(self->entity);
3779     Py_VISIT(self->names);
3780 
3781     return 0;
3782 }
3783 
3784 static int
xmlparser_gc_clear(XMLParserObject * self)3785 xmlparser_gc_clear(XMLParserObject *self)
3786 {
3787     if (self->parser != NULL) {
3788         XML_Parser parser = self->parser;
3789         self->parser = NULL;
3790         EXPAT(ParserFree)(parser);
3791     }
3792 
3793     Py_CLEAR(self->handle_close);
3794     Py_CLEAR(self->handle_pi);
3795     Py_CLEAR(self->handle_comment);
3796     Py_CLEAR(self->handle_end);
3797     Py_CLEAR(self->handle_data);
3798     Py_CLEAR(self->handle_start);
3799     Py_CLEAR(self->handle_start_ns);
3800     Py_CLEAR(self->handle_end_ns);
3801     Py_CLEAR(self->handle_doctype);
3802 
3803     Py_CLEAR(self->target);
3804     Py_CLEAR(self->entity);
3805     Py_CLEAR(self->names);
3806 
3807     return 0;
3808 }
3809 
3810 static void
xmlparser_dealloc(XMLParserObject * self)3811 xmlparser_dealloc(XMLParserObject* self)
3812 {
3813     PyObject_GC_UnTrack(self);
3814     xmlparser_gc_clear(self);
3815     Py_TYPE(self)->tp_free((PyObject *)self);
3816 }
3817 
3818 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3819 _check_xmlparser(XMLParserObject* self)
3820 {
3821     if (self->target == NULL) {
3822         PyErr_SetString(PyExc_ValueError,
3823                         "XMLParser.__init__() wasn't called");
3824         return 0;
3825     }
3826     return 1;
3827 }
3828 
3829 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3830 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3831 {
3832     int ok;
3833 
3834     assert(!PyErr_Occurred());
3835     ok = EXPAT(Parse)(self->parser, data, data_len, final);
3836 
3837     if (PyErr_Occurred())
3838         return NULL;
3839 
3840     if (!ok) {
3841         expat_set_error(
3842             EXPAT(GetErrorCode)(self->parser),
3843             EXPAT(GetErrorLineNumber)(self->parser),
3844             EXPAT(GetErrorColumnNumber)(self->parser),
3845             NULL
3846             );
3847         return NULL;
3848     }
3849 
3850     Py_RETURN_NONE;
3851 }
3852 
3853 /*[clinic input]
3854 _elementtree.XMLParser.close
3855 
3856 [clinic start generated code]*/
3857 
3858 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3859 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3860 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3861 {
3862     /* end feeding data to parser */
3863 
3864     PyObject* res;
3865 
3866     if (!_check_xmlparser(self)) {
3867         return NULL;
3868     }
3869     res = expat_parse(self, "", 0, 1);
3870     if (!res)
3871         return NULL;
3872 
3873     if (TreeBuilder_CheckExact(self->target)) {
3874         Py_DECREF(res);
3875         return treebuilder_done((TreeBuilderObject*) self->target);
3876     }
3877     else if (self->handle_close) {
3878         Py_DECREF(res);
3879         return PyObject_CallNoArgs(self->handle_close);
3880     }
3881     else {
3882         return res;
3883     }
3884 }
3885 
3886 /*[clinic input]
3887 _elementtree.XMLParser.feed
3888 
3889     data: object
3890     /
3891 
3892 [clinic start generated code]*/
3893 
3894 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3895 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3896 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3897 {
3898     /* feed data to parser */
3899 
3900     if (!_check_xmlparser(self)) {
3901         return NULL;
3902     }
3903     if (PyUnicode_Check(data)) {
3904         Py_ssize_t data_len;
3905         const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3906         if (data_ptr == NULL)
3907             return NULL;
3908         if (data_len > INT_MAX) {
3909             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3910             return NULL;
3911         }
3912         /* Explicitly set UTF-8 encoding. Return code ignored. */
3913         (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3914         return expat_parse(self, data_ptr, (int)data_len, 0);
3915     }
3916     else {
3917         Py_buffer view;
3918         PyObject *res;
3919         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3920             return NULL;
3921         if (view.len > INT_MAX) {
3922             PyBuffer_Release(&view);
3923             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3924             return NULL;
3925         }
3926         res = expat_parse(self, view.buf, (int)view.len, 0);
3927         PyBuffer_Release(&view);
3928         return res;
3929     }
3930 }
3931 
3932 /*[clinic input]
3933 _elementtree.XMLParser._parse_whole
3934 
3935     file: object
3936     /
3937 
3938 [clinic start generated code]*/
3939 
3940 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3941 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3942 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3943 {
3944     /* (internal) parse the whole input, until end of stream */
3945     PyObject* reader;
3946     PyObject* buffer;
3947     PyObject* temp;
3948     PyObject* res;
3949 
3950     if (!_check_xmlparser(self)) {
3951         return NULL;
3952     }
3953     reader = PyObject_GetAttrString(file, "read");
3954     if (!reader)
3955         return NULL;
3956 
3957     /* read from open file object */
3958     for (;;) {
3959 
3960         buffer = PyObject_CallFunction(reader, "i", 64*1024);
3961 
3962         if (!buffer) {
3963             /* read failed (e.g. due to KeyboardInterrupt) */
3964             Py_DECREF(reader);
3965             return NULL;
3966         }
3967 
3968         if (PyUnicode_CheckExact(buffer)) {
3969             /* A unicode object is encoded into bytes using UTF-8 */
3970             if (PyUnicode_GET_LENGTH(buffer) == 0) {
3971                 Py_DECREF(buffer);
3972                 break;
3973             }
3974             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3975             Py_DECREF(buffer);
3976             if (!temp) {
3977                 /* Propagate exception from PyUnicode_AsEncodedString */
3978                 Py_DECREF(reader);
3979                 return NULL;
3980             }
3981             buffer = temp;
3982         }
3983         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3984             Py_DECREF(buffer);
3985             break;
3986         }
3987 
3988         if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3989             Py_DECREF(buffer);
3990             Py_DECREF(reader);
3991             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3992             return NULL;
3993         }
3994         res = expat_parse(
3995             self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3996             );
3997 
3998         Py_DECREF(buffer);
3999 
4000         if (!res) {
4001             Py_DECREF(reader);
4002             return NULL;
4003         }
4004         Py_DECREF(res);
4005 
4006     }
4007 
4008     Py_DECREF(reader);
4009 
4010     res = expat_parse(self, "", 0, 1);
4011 
4012     if (res && TreeBuilder_CheckExact(self->target)) {
4013         Py_DECREF(res);
4014         return treebuilder_done((TreeBuilderObject*) self->target);
4015     }
4016 
4017     return res;
4018 }
4019 
4020 /*[clinic input]
4021 _elementtree.XMLParser._setevents
4022 
4023     events_queue: object
4024     events_to_report: object = None
4025     /
4026 
4027 [clinic start generated code]*/
4028 
4029 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4030 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4031                                        PyObject *events_queue,
4032                                        PyObject *events_to_report)
4033 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4034 {
4035     /* activate element event reporting */
4036     Py_ssize_t i;
4037     TreeBuilderObject *target;
4038     PyObject *events_append, *events_seq;
4039 
4040     if (!_check_xmlparser(self)) {
4041         return NULL;
4042     }
4043     if (!TreeBuilder_CheckExact(self->target)) {
4044         PyErr_SetString(
4045             PyExc_TypeError,
4046             "event handling only supported for ElementTree.TreeBuilder "
4047             "targets"
4048             );
4049         return NULL;
4050     }
4051 
4052     target = (TreeBuilderObject*) self->target;
4053 
4054     events_append = PyObject_GetAttrString(events_queue, "append");
4055     if (events_append == NULL)
4056         return NULL;
4057     Py_XSETREF(target->events_append, events_append);
4058 
4059     /* clear out existing events */
4060     Py_CLEAR(target->start_event_obj);
4061     Py_CLEAR(target->end_event_obj);
4062     Py_CLEAR(target->start_ns_event_obj);
4063     Py_CLEAR(target->end_ns_event_obj);
4064     Py_CLEAR(target->comment_event_obj);
4065     Py_CLEAR(target->pi_event_obj);
4066 
4067     if (events_to_report == Py_None) {
4068         /* default is "end" only */
4069         target->end_event_obj = PyUnicode_FromString("end");
4070         Py_RETURN_NONE;
4071     }
4072 
4073     if (!(events_seq = PySequence_Fast(events_to_report,
4074                                        "events must be a sequence"))) {
4075         return NULL;
4076     }
4077 
4078     for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4079         PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4080         const char *event_name = NULL;
4081         if (PyUnicode_Check(event_name_obj)) {
4082             event_name = PyUnicode_AsUTF8(event_name_obj);
4083         } else if (PyBytes_Check(event_name_obj)) {
4084             event_name = PyBytes_AS_STRING(event_name_obj);
4085         }
4086         if (event_name == NULL) {
4087             Py_DECREF(events_seq);
4088             PyErr_Format(PyExc_ValueError, "invalid events sequence");
4089             return NULL;
4090         }
4091 
4092         Py_INCREF(event_name_obj);
4093         if (strcmp(event_name, "start") == 0) {
4094             Py_XSETREF(target->start_event_obj, event_name_obj);
4095         } else if (strcmp(event_name, "end") == 0) {
4096             Py_XSETREF(target->end_event_obj, event_name_obj);
4097         } else if (strcmp(event_name, "start-ns") == 0) {
4098             Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4099             EXPAT(SetNamespaceDeclHandler)(
4100                 self->parser,
4101                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4102                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4103                 );
4104         } else if (strcmp(event_name, "end-ns") == 0) {
4105             Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4106             EXPAT(SetNamespaceDeclHandler)(
4107                 self->parser,
4108                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4109                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4110                 );
4111         } else if (strcmp(event_name, "comment") == 0) {
4112             Py_XSETREF(target->comment_event_obj, event_name_obj);
4113             EXPAT(SetCommentHandler)(
4114                 self->parser,
4115                 (XML_CommentHandler) expat_comment_handler
4116                 );
4117         } else if (strcmp(event_name, "pi") == 0) {
4118             Py_XSETREF(target->pi_event_obj, event_name_obj);
4119             EXPAT(SetProcessingInstructionHandler)(
4120                 self->parser,
4121                 (XML_ProcessingInstructionHandler) expat_pi_handler
4122                 );
4123         } else {
4124             Py_DECREF(event_name_obj);
4125             Py_DECREF(events_seq);
4126             PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4127             return NULL;
4128         }
4129     }
4130 
4131     Py_DECREF(events_seq);
4132     Py_RETURN_NONE;
4133 }
4134 
4135 static PyMemberDef xmlparser_members[] = {
4136     {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4137     {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4138     {NULL}
4139 };
4140 
4141 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4142 xmlparser_version_getter(XMLParserObject *self, void *closure)
4143 {
4144     return PyUnicode_FromFormat(
4145         "Expat %d.%d.%d", XML_MAJOR_VERSION,
4146         XML_MINOR_VERSION, XML_MICRO_VERSION);
4147 }
4148 
4149 static PyGetSetDef xmlparser_getsetlist[] = {
4150     {"version", (getter)xmlparser_version_getter, NULL, NULL},
4151     {NULL},
4152 };
4153 
4154 #include "clinic/_elementtree.c.h"
4155 
4156 static PyMethodDef element_methods[] = {
4157 
4158     _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4159 
4160     _ELEMENTTREE_ELEMENT_GET_METHODDEF
4161     _ELEMENTTREE_ELEMENT_SET_METHODDEF
4162 
4163     _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4164     _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4165     _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4166 
4167     _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4168     _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4169     _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4170     _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4171 
4172     _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4173     _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4174     _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4175 
4176     _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4177     _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4178 
4179     _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4180 
4181     _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4182     _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4183     _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4184     _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4185     _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4186 
4187     {NULL, NULL}
4188 };
4189 
4190 static PyMappingMethods element_as_mapping = {
4191     (lenfunc) element_length,
4192     (binaryfunc) element_subscr,
4193     (objobjargproc) element_ass_subscr,
4194 };
4195 
4196 static PyGetSetDef element_getsetlist[] = {
4197     {"tag",
4198         (getter)element_tag_getter,
4199         (setter)element_tag_setter,
4200         "A string identifying what kind of data this element represents"},
4201     {"text",
4202         (getter)element_text_getter,
4203         (setter)element_text_setter,
4204         "A string of text directly after the start tag, or None"},
4205     {"tail",
4206         (getter)element_tail_getter,
4207         (setter)element_tail_setter,
4208         "A string of text directly after the end tag, or None"},
4209     {"attrib",
4210         (getter)element_attrib_getter,
4211         (setter)element_attrib_setter,
4212         "A dictionary containing the element's attributes"},
4213     {NULL},
4214 };
4215 
4216 static PyTypeObject Element_Type = {
4217     PyVarObject_HEAD_INIT(NULL, 0)
4218     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4219     /* methods */
4220     (destructor)element_dealloc,                    /* tp_dealloc */
4221     0,                                              /* tp_vectorcall_offset */
4222     0,                                              /* tp_getattr */
4223     0,                                              /* tp_setattr */
4224     0,                                              /* tp_as_async */
4225     (reprfunc)element_repr,                         /* tp_repr */
4226     0,                                              /* tp_as_number */
4227     &element_as_sequence,                           /* tp_as_sequence */
4228     &element_as_mapping,                            /* tp_as_mapping */
4229     0,                                              /* tp_hash */
4230     0,                                              /* tp_call */
4231     0,                                              /* tp_str */
4232     PyObject_GenericGetAttr,                        /* tp_getattro */
4233     0,                                              /* tp_setattro */
4234     0,                                              /* tp_as_buffer */
4235     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4236                                                     /* tp_flags */
4237     0,                                              /* tp_doc */
4238     (traverseproc)element_gc_traverse,              /* tp_traverse */
4239     (inquiry)element_gc_clear,                      /* tp_clear */
4240     0,                                              /* tp_richcompare */
4241     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
4242     0,                                              /* tp_iter */
4243     0,                                              /* tp_iternext */
4244     element_methods,                                /* tp_methods */
4245     0,                                              /* tp_members */
4246     element_getsetlist,                             /* tp_getset */
4247     0,                                              /* tp_base */
4248     0,                                              /* tp_dict */
4249     0,                                              /* tp_descr_get */
4250     0,                                              /* tp_descr_set */
4251     0,                                              /* tp_dictoffset */
4252     (initproc)element_init,                         /* tp_init */
4253     PyType_GenericAlloc,                            /* tp_alloc */
4254     element_new,                                    /* tp_new */
4255     0,                                              /* tp_free */
4256 };
4257 
4258 static PyMethodDef treebuilder_methods[] = {
4259     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4260     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4261     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4262     _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4263     _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4264     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4265     {NULL, NULL}
4266 };
4267 
4268 static PyTypeObject TreeBuilder_Type = {
4269     PyVarObject_HEAD_INIT(NULL, 0)
4270     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4271     /* methods */
4272     (destructor)treebuilder_dealloc,                /* tp_dealloc */
4273     0,                                              /* tp_vectorcall_offset */
4274     0,                                              /* tp_getattr */
4275     0,                                              /* tp_setattr */
4276     0,                                              /* tp_as_async */
4277     0,                                              /* tp_repr */
4278     0,                                              /* tp_as_number */
4279     0,                                              /* tp_as_sequence */
4280     0,                                              /* tp_as_mapping */
4281     0,                                              /* tp_hash */
4282     0,                                              /* tp_call */
4283     0,                                              /* tp_str */
4284     0,                                              /* tp_getattro */
4285     0,                                              /* tp_setattro */
4286     0,                                              /* tp_as_buffer */
4287     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4288                                                     /* tp_flags */
4289     0,                                              /* tp_doc */
4290     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
4291     (inquiry)treebuilder_gc_clear,                  /* tp_clear */
4292     0,                                              /* tp_richcompare */
4293     0,                                              /* tp_weaklistoffset */
4294     0,                                              /* tp_iter */
4295     0,                                              /* tp_iternext */
4296     treebuilder_methods,                            /* tp_methods */
4297     0,                                              /* tp_members */
4298     0,                                              /* tp_getset */
4299     0,                                              /* tp_base */
4300     0,                                              /* tp_dict */
4301     0,                                              /* tp_descr_get */
4302     0,                                              /* tp_descr_set */
4303     0,                                              /* tp_dictoffset */
4304     _elementtree_TreeBuilder___init__,              /* tp_init */
4305     PyType_GenericAlloc,                            /* tp_alloc */
4306     treebuilder_new,                                /* tp_new */
4307     0,                                              /* tp_free */
4308 };
4309 
4310 static PyMethodDef xmlparser_methods[] = {
4311     _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4312     _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4313     _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4314     _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4315     {NULL, NULL}
4316 };
4317 
4318 static PyTypeObject XMLParser_Type = {
4319     PyVarObject_HEAD_INIT(NULL, 0)
4320     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4321     /* methods */
4322     (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4323     0,                                              /* tp_vectorcall_offset */
4324     0,                                              /* tp_getattr */
4325     0,                                              /* tp_setattr */
4326     0,                                              /* tp_as_async */
4327     0,                                              /* tp_repr */
4328     0,                                              /* tp_as_number */
4329     0,                                              /* tp_as_sequence */
4330     0,                                              /* tp_as_mapping */
4331     0,                                              /* tp_hash */
4332     0,                                              /* tp_call */
4333     0,                                              /* tp_str */
4334     0,                                              /* tp_getattro */
4335     0,                                              /* tp_setattro */
4336     0,                                              /* tp_as_buffer */
4337     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4338                                                     /* tp_flags */
4339     0,                                              /* tp_doc */
4340     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4341     (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4342     0,                                              /* tp_richcompare */
4343     0,                                              /* tp_weaklistoffset */
4344     0,                                              /* tp_iter */
4345     0,                                              /* tp_iternext */
4346     xmlparser_methods,                              /* tp_methods */
4347     xmlparser_members,                              /* tp_members */
4348     xmlparser_getsetlist,                           /* tp_getset */
4349     0,                                              /* tp_base */
4350     0,                                              /* tp_dict */
4351     0,                                              /* tp_descr_get */
4352     0,                                              /* tp_descr_set */
4353     0,                                              /* tp_dictoffset */
4354     _elementtree_XMLParser___init__,                /* tp_init */
4355     PyType_GenericAlloc,                            /* tp_alloc */
4356     xmlparser_new,                                  /* tp_new */
4357     0,                                              /* tp_free */
4358 };
4359 
4360 /* ==================================================================== */
4361 /* python module interface */
4362 
4363 static PyMethodDef _functions[] = {
4364     {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4365     _ELEMENTTREE__SET_FACTORIES_METHODDEF
4366     {NULL, NULL}
4367 };
4368 
4369 
4370 static struct PyModuleDef elementtreemodule = {
4371     PyModuleDef_HEAD_INIT,
4372     "_elementtree",
4373     NULL,
4374     sizeof(elementtreestate),
4375     _functions,
4376     NULL,
4377     elementtree_traverse,
4378     elementtree_clear,
4379     elementtree_free
4380 };
4381 
4382 PyMODINIT_FUNC
PyInit__elementtree(void)4383 PyInit__elementtree(void)
4384 {
4385     PyObject *m, *temp;
4386     elementtreestate *st;
4387 
4388     m = PyState_FindModule(&elementtreemodule);
4389     if (m) {
4390         Py_INCREF(m);
4391         return m;
4392     }
4393 
4394     /* Initialize object types */
4395     if (PyType_Ready(&ElementIter_Type) < 0)
4396         return NULL;
4397     if (PyType_Ready(&TreeBuilder_Type) < 0)
4398         return NULL;
4399     if (PyType_Ready(&Element_Type) < 0)
4400         return NULL;
4401     if (PyType_Ready(&XMLParser_Type) < 0)
4402         return NULL;
4403 
4404     m = PyModule_Create(&elementtreemodule);
4405     if (!m)
4406         return NULL;
4407     st = get_elementtree_state(m);
4408 
4409     if (!(temp = PyImport_ImportModule("copy")))
4410         return NULL;
4411     st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4412     Py_XDECREF(temp);
4413 
4414     if (st->deepcopy_obj == NULL) {
4415         return NULL;
4416     }
4417 
4418     assert(!PyErr_Occurred());
4419     if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4420         return NULL;
4421 
4422     /* link against pyexpat */
4423     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4424     if (expat_capi) {
4425         /* check that it's usable */
4426         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4427             (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4428             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4429             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4430             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4431             PyErr_SetString(PyExc_ImportError,
4432                             "pyexpat version is incompatible");
4433             return NULL;
4434         }
4435     } else {
4436         return NULL;
4437     }
4438 
4439     st->parseerror_obj = PyErr_NewException(
4440         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4441         );
4442     Py_INCREF(st->parseerror_obj);
4443     if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4444         Py_DECREF(st->parseerror_obj);
4445         return NULL;
4446     }
4447 
4448     PyTypeObject *types[] = {
4449         &Element_Type,
4450         &TreeBuilder_Type,
4451         &XMLParser_Type
4452     };
4453 
4454     for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4455         if (PyModule_AddType(m, types[i]) < 0) {
4456             return NULL;
4457         }
4458     }
4459 
4460     return m;
4461 }
4462