1 /*
2 * ElementTree
3 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
12 * 2005-01-05 fl major optimization effort
13 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
30 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
32 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
39 *
40 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
42 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
47 /* Licensed to PSF under a Contributor Agreement. */
48 /* See http://www.python.org/psf/license for licensing details. */
49
50 #include "Python.h"
51
52 #define VERSION "1.0.6"
53
54 /* -------------------------------------------------------------------- */
55 /* configuration */
56
57 /* Leave defined to include the expat-based XMLParser type */
58 #define USE_EXPAT
59
60 /* Define to do all expat calls via pyexpat's embedded expat library */
61 /* #define USE_PYEXPAT_CAPI */
62
63 /* An element can hold this many children without extra memory
64 allocations. */
65 #define STATIC_CHILDREN 4
66
67 /* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72 /* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77 /* -------------------------------------------------------------------- */
78
79 #if 0
80 static int memory = 0;
81 #define ALLOC(size, comment)\
82 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83 #define RELEASE(size, comment)\
84 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #else
86 #define ALLOC(size, comment)
87 #define RELEASE(size, comment)
88 #endif
89
90 /* compiler tweaks */
91 #if defined(_MSC_VER)
92 #define LOCAL(type) static __inline type __fastcall
93 #else
94 #define LOCAL(type) static type
95 #endif
96
97 /* compatibility macros */
98 #if (PY_VERSION_HEX < 0x02060000)
99 #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101 #endif
102
103 #if (PY_VERSION_HEX < 0x02050000)
104 typedef int Py_ssize_t;
105 #define lenfunc inquiry
106 #endif
107
108 #if (PY_VERSION_HEX < 0x02040000)
109 #define PyDict_CheckExact PyDict_Check
110
111 #if !defined(Py_RETURN_NONE)
112 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113 #endif
114 #endif
115
116 /* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124 /* glue functions (see the init function for details) */
125 static PyObject* elementtree_parseerror_obj;
126 static PyObject* elementtree_copyelement_obj;
127 static PyObject* elementtree_deepcopy_obj;
128 static PyObject* elementtree_iter_obj;
129 static PyObject* elementtree_itertext_obj;
130 static PyObject* elementpath_obj;
131
132 /* helpers */
133
134 /* Py_SETREF for a PyObject* that uses a join flag. */
135 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)136 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
137 {
138 PyObject *tmp = JOIN_OBJ(*p);
139 *p = new_joined_ptr;
140 Py_DECREF(tmp);
141 }
142
143 LOCAL(PyObject*)
deepcopy(PyObject * object,PyObject * memo)144 deepcopy(PyObject* object, PyObject* memo)
145 {
146 /* do a deep copy of the given object */
147
148 PyObject* args;
149 PyObject* result;
150
151 if (!elementtree_deepcopy_obj) {
152 PyErr_SetString(
153 PyExc_RuntimeError,
154 "deepcopy helper not found"
155 );
156 return NULL;
157 }
158
159 args = PyTuple_New(2);
160 if (!args)
161 return NULL;
162
163 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
164 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
165
166 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
167
168 Py_DECREF(args);
169
170 return result;
171 }
172
173 LOCAL(PyObject*)
list_join(PyObject * list)174 list_join(PyObject* list)
175 {
176 /* join list elements */
177 PyObject* joiner;
178 PyObject* function;
179 PyObject* args;
180 PyObject* result;
181
182 switch (PyList_GET_SIZE(list)) {
183 case 0:
184 return PyString_FromString("");
185 case 1:
186 result = PyList_GET_ITEM(list, 0);
187 Py_INCREF(result);
188 return result;
189 }
190
191 /* two or more elements: slice out a suitable separator from the
192 first member, and use that to join the entire list */
193
194 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
195 if (!joiner)
196 return NULL;
197
198 function = PyObject_GetAttrString(joiner, "join");
199 if (!function) {
200 Py_DECREF(joiner);
201 return NULL;
202 }
203
204 args = PyTuple_New(1);
205 if (!args) {
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208 return NULL;
209 }
210
211 Py_INCREF(list);
212 PyTuple_SET_ITEM(args, 0, list);
213
214 result = PyObject_CallObject(function, args);
215
216 Py_DECREF(args); /* also removes list */
217 Py_DECREF(function);
218 Py_DECREF(joiner);
219
220 return result;
221 }
222
223 /* -------------------------------------------------------------------- */
224 /* the element type */
225
226 typedef struct {
227
228 /* attributes (a dictionary object), or None if no attributes */
229 PyObject* attrib;
230
231 /* child elements */
232 int length; /* actual number of items */
233 int allocated; /* allocated items */
234
235 /* this either points to _children or to a malloced buffer */
236 PyObject* *children;
237
238 PyObject* _children[STATIC_CHILDREN];
239
240 } ElementObjectExtra;
241
242 typedef struct {
243 PyObject_HEAD
244
245 /* element tag (a string). */
246 PyObject* tag;
247
248 /* text before first child. note that this is a tagged pointer;
249 use JOIN_OBJ to get the object pointer. the join flag is used
250 to distinguish lists created by the tree builder from lists
251 assigned to the attribute by application code; the former
252 should be joined before being returned to the user, the latter
253 should be left intact. */
254 PyObject* text;
255
256 /* text after this element, in parent. note that this is a tagged
257 pointer; use JOIN_OBJ to get the object pointer. */
258 PyObject* tail;
259
260 ElementObjectExtra* extra;
261
262 } ElementObject;
263
264 staticforward PyTypeObject Element_Type;
265
266 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
267
268 /* -------------------------------------------------------------------- */
269 /* element constructor and destructor */
270
271 LOCAL(int)
element_new_extra(ElementObject * self,PyObject * attrib)272 element_new_extra(ElementObject* self, PyObject* attrib)
273 {
274 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
275 if (!self->extra)
276 return -1;
277
278 if (!attrib)
279 attrib = Py_None;
280
281 Py_INCREF(attrib);
282 self->extra->attrib = attrib;
283
284 self->extra->length = 0;
285 self->extra->allocated = STATIC_CHILDREN;
286 self->extra->children = self->extra->_children;
287
288 return 0;
289 }
290
291 LOCAL(void)
element_dealloc_extra(ElementObject * self)292 element_dealloc_extra(ElementObject* self)
293 {
294 int i;
295
296 Py_DECREF(self->extra->attrib);
297
298 for (i = 0; i < self->extra->length; i++)
299 Py_DECREF(self->extra->children[i]);
300
301 if (self->extra->children != self->extra->_children)
302 PyObject_Free(self->extra->children);
303
304 PyObject_Free(self->extra);
305 }
306
307 LOCAL(PyObject*)
element_new(PyObject * tag,PyObject * attrib)308 element_new(PyObject* tag, PyObject* attrib)
309 {
310 ElementObject* self;
311
312 self = PyObject_New(ElementObject, &Element_Type);
313 if (self == NULL)
314 return NULL;
315
316 /* use None for empty dictionaries */
317 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
318 attrib = Py_None;
319
320 self->extra = NULL;
321
322 if (attrib != Py_None) {
323
324 if (element_new_extra(self, attrib) < 0) {
325 PyObject_Del(self);
326 return NULL;
327 }
328
329 self->extra->length = 0;
330 self->extra->allocated = STATIC_CHILDREN;
331 self->extra->children = self->extra->_children;
332
333 }
334
335 Py_INCREF(tag);
336 self->tag = tag;
337
338 Py_INCREF(Py_None);
339 self->text = Py_None;
340
341 Py_INCREF(Py_None);
342 self->tail = Py_None;
343
344 ALLOC(sizeof(ElementObject), "create element");
345
346 return (PyObject*) self;
347 }
348
349 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)350 element_resize(ElementObject* self, Py_ssize_t extra)
351 {
352 Py_ssize_t size;
353 PyObject* *children;
354
355 /* make sure self->children can hold the given number of extra
356 elements. set an exception and return -1 if allocation failed */
357
358 if (!self->extra)
359 element_new_extra(self, NULL);
360
361 size = self->extra->length + extra;
362
363 if (size > self->extra->allocated) {
364 /* use Python 2.4's list growth strategy */
365 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
366 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
367 * which needs at least 4 bytes.
368 * Although it's a false alarm always assume at least one child to
369 * be safe.
370 */
371 size = size ? size : 1;
372 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
373 goto nomemory;
374 if (size > INT_MAX) {
375 PyErr_SetString(PyExc_OverflowError,
376 "too many children");
377 return -1;
378 }
379 if (self->extra->children != self->extra->_children) {
380 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
381 * "children", which needs at least 4 bytes. Although it's a
382 * false alarm always assume at least one child to be safe.
383 */
384 children = PyObject_Realloc(self->extra->children,
385 size * sizeof(PyObject*));
386 if (!children)
387 goto nomemory;
388 } else {
389 children = PyObject_Malloc(size * sizeof(PyObject*));
390 if (!children)
391 goto nomemory;
392 /* copy existing children from static area to malloc buffer */
393 memcpy(children, self->extra->children,
394 self->extra->length * sizeof(PyObject*));
395 }
396 self->extra->children = children;
397 self->extra->allocated = size;
398 }
399
400 return 0;
401
402 nomemory:
403 PyErr_NoMemory();
404 return -1;
405 }
406
407 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)408 element_add_subelement(ElementObject* self, PyObject* element)
409 {
410 /* add a child element to a parent */
411
412 if (element_resize(self, 1) < 0)
413 return -1;
414
415 Py_INCREF(element);
416 self->extra->children[self->extra->length] = element;
417
418 self->extra->length++;
419
420 return 0;
421 }
422
423 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)424 element_get_attrib(ElementObject* self)
425 {
426 /* return borrowed reference to attrib dictionary */
427 /* note: this function assumes that the extra section exists */
428
429 PyObject* res = self->extra->attrib;
430
431 if (res == Py_None) {
432 Py_DECREF(res);
433 /* create missing dictionary */
434 res = PyDict_New();
435 if (!res)
436 return NULL;
437 self->extra->attrib = res;
438 }
439
440 return res;
441 }
442
443 LOCAL(PyObject*)
element_get_text(ElementObject * self)444 element_get_text(ElementObject* self)
445 {
446 /* return borrowed reference to text attribute */
447
448 PyObject *res = self->text;
449
450 if (JOIN_GET(res)) {
451 res = JOIN_OBJ(res);
452 if (PyList_CheckExact(res)) {
453 PyObject *tmp = list_join(res);
454 if (!tmp)
455 return NULL;
456 self->text = tmp;
457 Py_DECREF(res);
458 res = tmp;
459 }
460 }
461
462 return res;
463 }
464
465 LOCAL(PyObject*)
element_get_tail(ElementObject * self)466 element_get_tail(ElementObject* self)
467 {
468 /* return borrowed reference to text attribute */
469
470 PyObject *res = self->tail;
471
472 if (JOIN_GET(res)) {
473 res = JOIN_OBJ(res);
474 if (PyList_CheckExact(res)) {
475 PyObject *tmp = list_join(res);
476 if (!tmp)
477 return NULL;
478 self->tail = tmp;
479 Py_DECREF(res);
480 res = tmp;
481 }
482 }
483
484 return res;
485 }
486
487 static PyObject*
element(PyObject * self,PyObject * args,PyObject * kw)488 element(PyObject* self, PyObject* args, PyObject* kw)
489 {
490 PyObject* elem;
491
492 PyObject* tag;
493 PyObject* attrib = NULL;
494 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
495 &PyDict_Type, &attrib))
496 return NULL;
497
498 if (attrib || kw) {
499 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
500 if (!attrib)
501 return NULL;
502 if (kw)
503 PyDict_Update(attrib, kw);
504 } else {
505 Py_INCREF(Py_None);
506 attrib = Py_None;
507 }
508
509 elem = element_new(tag, attrib);
510
511 Py_DECREF(attrib);
512
513 return elem;
514 }
515
516 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kw)517 subelement(PyObject* self, PyObject* args, PyObject* kw)
518 {
519 PyObject* elem;
520
521 ElementObject* parent;
522 PyObject* tag;
523 PyObject* attrib = NULL;
524 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
525 &Element_Type, &parent, &tag,
526 &PyDict_Type, &attrib))
527 return NULL;
528
529 if (attrib || kw) {
530 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
531 if (!attrib)
532 return NULL;
533 if (kw)
534 PyDict_Update(attrib, kw);
535 } else {
536 Py_INCREF(Py_None);
537 attrib = Py_None;
538 }
539
540 elem = element_new(tag, attrib);
541 Py_DECREF(attrib);
542 if (elem == NULL)
543 return NULL;
544
545 if (element_add_subelement(parent, elem) < 0) {
546 Py_DECREF(elem);
547 return NULL;
548 }
549
550 return elem;
551 }
552
553 static void
element_dealloc(ElementObject * self)554 element_dealloc(ElementObject* self)
555 {
556 if (self->extra)
557 element_dealloc_extra(self);
558
559 /* discard attributes */
560 Py_DECREF(self->tag);
561 Py_DECREF(JOIN_OBJ(self->text));
562 Py_DECREF(JOIN_OBJ(self->tail));
563
564 RELEASE(sizeof(ElementObject), "destroy element");
565
566 PyObject_Del(self);
567 }
568
569 /* -------------------------------------------------------------------- */
570 /* methods (in alphabetical order) */
571
572 static PyObject*
element_append(ElementObject * self,PyObject * args)573 element_append(ElementObject* self, PyObject* args)
574 {
575 PyObject* element;
576 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
577 return NULL;
578
579 if (element_add_subelement(self, element) < 0)
580 return NULL;
581
582 Py_RETURN_NONE;
583 }
584
585 static PyObject*
element_clear(ElementObject * self,PyObject * args)586 element_clear(ElementObject* self, PyObject* args)
587 {
588 if (!PyArg_ParseTuple(args, ":clear"))
589 return NULL;
590
591 if (self->extra) {
592 element_dealloc_extra(self);
593 self->extra = NULL;
594 }
595
596 Py_INCREF(Py_None);
597 _set_joined_ptr(&self->text, Py_None);
598
599 Py_INCREF(Py_None);
600 _set_joined_ptr(&self->tail, Py_None);
601
602 Py_RETURN_NONE;
603 }
604
605 static PyObject*
element_copy(ElementObject * self,PyObject * args)606 element_copy(ElementObject* self, PyObject* args)
607 {
608 int i;
609 ElementObject* element;
610
611 if (!PyArg_ParseTuple(args, ":__copy__"))
612 return NULL;
613
614 element = (ElementObject*) element_new(
615 self->tag, (self->extra) ? self->extra->attrib : Py_None
616 );
617 if (!element)
618 return NULL;
619
620 Py_INCREF(JOIN_OBJ(self->text));
621 _set_joined_ptr(&element->text, self->text);
622
623 Py_INCREF(JOIN_OBJ(self->tail));
624 _set_joined_ptr(&element->tail, self->tail);
625
626 if (self->extra) {
627
628 if (element_resize(element, self->extra->length) < 0) {
629 Py_DECREF(element);
630 return NULL;
631 }
632
633 for (i = 0; i < self->extra->length; i++) {
634 Py_INCREF(self->extra->children[i]);
635 element->extra->children[i] = self->extra->children[i];
636 }
637
638 element->extra->length = self->extra->length;
639
640 }
641
642 return (PyObject*) element;
643 }
644
645 static PyObject*
element_deepcopy(ElementObject * self,PyObject * args)646 element_deepcopy(ElementObject* self, PyObject* args)
647 {
648 int i;
649 ElementObject* element;
650 PyObject* tag;
651 PyObject* attrib;
652 PyObject* text;
653 PyObject* tail;
654 PyObject* id;
655
656 PyObject* memo;
657 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
658 return NULL;
659
660 tag = deepcopy(self->tag, memo);
661 if (!tag)
662 return NULL;
663
664 if (self->extra) {
665 attrib = deepcopy(self->extra->attrib, memo);
666 if (!attrib) {
667 Py_DECREF(tag);
668 return NULL;
669 }
670 } else {
671 Py_INCREF(Py_None);
672 attrib = Py_None;
673 }
674
675 element = (ElementObject*) element_new(tag, attrib);
676
677 Py_DECREF(tag);
678 Py_DECREF(attrib);
679
680 if (!element)
681 return NULL;
682
683 text = deepcopy(JOIN_OBJ(self->text), memo);
684 if (!text)
685 goto error;
686 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
687
688 tail = deepcopy(JOIN_OBJ(self->tail), memo);
689 if (!tail)
690 goto error;
691 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
692
693 if (self->extra) {
694
695 if (element_resize(element, self->extra->length) < 0)
696 goto error;
697
698 for (i = 0; i < self->extra->length; i++) {
699 PyObject* child = deepcopy(self->extra->children[i], memo);
700 if (!child) {
701 element->extra->length = i;
702 goto error;
703 }
704 element->extra->children[i] = child;
705 }
706
707 element->extra->length = self->extra->length;
708
709 }
710
711 /* add object to memo dictionary (so deepcopy won't visit it again) */
712 id = PyInt_FromLong((Py_uintptr_t) self);
713 if (!id)
714 goto error;
715
716 i = PyDict_SetItem(memo, id, (PyObject*) element);
717
718 Py_DECREF(id);
719
720 if (i < 0)
721 goto error;
722
723 return (PyObject*) element;
724
725 error:
726 Py_DECREF(element);
727 return NULL;
728 }
729
730 LOCAL(int)
checkpath(PyObject * tag)731 checkpath(PyObject* tag)
732 {
733 Py_ssize_t i;
734 int check = 1;
735
736 /* check if a tag contains an xpath character */
737
738 #define PATHCHAR(ch) \
739 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
740
741 #if defined(Py_USING_UNICODE)
742 if (PyUnicode_Check(tag)) {
743 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
744 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
745 if (p[i] == '{')
746 check = 0;
747 else if (p[i] == '}')
748 check = 1;
749 else if (check && PATHCHAR(p[i]))
750 return 1;
751 }
752 return 0;
753 }
754 #endif
755 if (PyString_Check(tag)) {
756 char *p = PyString_AS_STRING(tag);
757 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
758 if (p[i] == '{')
759 check = 0;
760 else if (p[i] == '}')
761 check = 1;
762 else if (check && PATHCHAR(p[i]))
763 return 1;
764 }
765 return 0;
766 }
767
768 return 1; /* unknown type; might be path expression */
769 }
770
771 static PyObject*
element_extend(ElementObject * self,PyObject * args)772 element_extend(ElementObject* self, PyObject* args)
773 {
774 PyObject* seq;
775 Py_ssize_t i;
776
777 PyObject* seq_in;
778 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
779 return NULL;
780
781 seq = PySequence_Fast(seq_in, "");
782 if (!seq) {
783 PyErr_Format(
784 PyExc_TypeError,
785 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
786 );
787 return NULL;
788 }
789
790 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
791 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
792 if (element_add_subelement(self, element) < 0) {
793 Py_DECREF(seq);
794 return NULL;
795 }
796 }
797
798 Py_DECREF(seq);
799
800 Py_RETURN_NONE;
801 }
802
803 static PyObject*
element_find(ElementObject * self,PyObject * args)804 element_find(ElementObject* self, PyObject* args)
805 {
806 int i;
807
808 PyObject* tag;
809 PyObject* namespaces = Py_None;
810 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
811 return NULL;
812
813 if (checkpath(tag) || namespaces != Py_None)
814 return PyObject_CallMethod(
815 elementpath_obj, "find", "OOO", self, tag, namespaces
816 );
817
818 if (!self->extra)
819 Py_RETURN_NONE;
820
821 for (i = 0; i < self->extra->length; i++) {
822 PyObject* item = self->extra->children[i];
823 int rc;
824 if (!Element_CheckExact(item))
825 continue;
826 Py_INCREF(item);
827 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
828 if (rc == 0)
829 return item;
830 Py_DECREF(item);
831 if (rc < 0 && PyErr_Occurred())
832 return NULL;
833 }
834
835 Py_RETURN_NONE;
836 }
837
838 static PyObject*
element_findtext(ElementObject * self,PyObject * args)839 element_findtext(ElementObject* self, PyObject* args)
840 {
841 int i;
842
843 PyObject* tag;
844 PyObject* default_value = Py_None;
845 PyObject* namespaces = Py_None;
846 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
847 return NULL;
848
849 if (checkpath(tag) || namespaces != Py_None)
850 return PyObject_CallMethod(
851 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
852 );
853
854 if (!self->extra) {
855 Py_INCREF(default_value);
856 return default_value;
857 }
858
859 for (i = 0; i < self->extra->length; i++) {
860 ElementObject* item = (ElementObject*) self->extra->children[i];
861 int rc;
862 if (!Element_CheckExact(item))
863 continue;
864 Py_INCREF(item);
865 rc = PyObject_Compare(item->tag, tag);
866 if (rc == 0) {
867 PyObject* text = element_get_text(item);
868 if (text == Py_None) {
869 Py_DECREF(item);
870 return PyString_FromString("");
871 }
872 Py_XINCREF(text);
873 Py_DECREF(item);
874 return text;
875 }
876 Py_DECREF(item);
877 if (rc < 0 && PyErr_Occurred())
878 return NULL;
879 }
880
881 Py_INCREF(default_value);
882 return default_value;
883 }
884
885 static PyObject*
element_findall(ElementObject * self,PyObject * args)886 element_findall(ElementObject* self, PyObject* args)
887 {
888 int i;
889 PyObject* out;
890
891 PyObject* tag;
892 PyObject* namespaces = Py_None;
893 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
894 return NULL;
895
896 if (checkpath(tag) || namespaces != Py_None)
897 return PyObject_CallMethod(
898 elementpath_obj, "findall", "OOO", self, tag, namespaces
899 );
900
901 out = PyList_New(0);
902 if (!out)
903 return NULL;
904
905 if (!self->extra)
906 return out;
907
908 for (i = 0; i < self->extra->length; i++) {
909 PyObject* item = self->extra->children[i];
910 int rc;
911 if (!Element_CheckExact(item))
912 continue;
913 Py_INCREF(item);
914 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
915 if (rc == 0)
916 rc = PyList_Append(out, item);
917 Py_DECREF(item);
918 if (rc < 0 && PyErr_Occurred()) {
919 Py_DECREF(out);
920 return NULL;
921 }
922 }
923
924 return out;
925 }
926
927 static PyObject*
element_iterfind(ElementObject * self,PyObject * args)928 element_iterfind(ElementObject* self, PyObject* args)
929 {
930 PyObject* tag;
931 PyObject* namespaces = Py_None;
932 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
933 return NULL;
934
935 return PyObject_CallMethod(
936 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
937 );
938 }
939
940 static PyObject*
element_get(ElementObject * self,PyObject * args)941 element_get(ElementObject* self, PyObject* args)
942 {
943 PyObject* value;
944
945 PyObject* key;
946 PyObject* default_value = Py_None;
947 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
948 return NULL;
949
950 if (!self->extra || self->extra->attrib == Py_None)
951 value = default_value;
952 else {
953 value = PyDict_GetItem(self->extra->attrib, key);
954 if (!value)
955 value = default_value;
956 }
957
958 Py_INCREF(value);
959 return value;
960 }
961
962 static PyObject*
element_getchildren(ElementObject * self,PyObject * args)963 element_getchildren(ElementObject* self, PyObject* args)
964 {
965 int i;
966 PyObject* list;
967
968 if (PyErr_WarnPy3k("This method will be removed in future versions. "
969 "Use 'list(elem)' or iteration over elem instead.",
970 1) < 0) {
971 return NULL;
972 }
973
974 if (!PyArg_ParseTuple(args, ":getchildren"))
975 return NULL;
976
977 if (!self->extra)
978 return PyList_New(0);
979
980 list = PyList_New(self->extra->length);
981 if (!list)
982 return NULL;
983
984 for (i = 0; i < self->extra->length; i++) {
985 PyObject* item = self->extra->children[i];
986 Py_INCREF(item);
987 PyList_SET_ITEM(list, i, item);
988 }
989
990 return list;
991 }
992
993 static PyObject*
element_iter_impl(ElementObject * self,PyObject * tag)994 element_iter_impl(ElementObject* self, PyObject* tag)
995 {
996 PyObject* args;
997 PyObject* result;
998
999 if (!elementtree_iter_obj) {
1000 PyErr_SetString(
1001 PyExc_RuntimeError,
1002 "iter helper not found"
1003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
1008 if (!args)
1009 return NULL;
1010
1011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
1014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019 }
1020
1021 static PyObject*
element_iter(ElementObject * self,PyObject * args)1022 element_iter(ElementObject* self, PyObject* args)
1023 {
1024 PyObject* tag = Py_None;
1025 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
1026 return NULL;
1027
1028 return element_iter_impl(self, tag);
1029 }
1030
1031 static PyObject*
element_getiterator(ElementObject * self,PyObject * args)1032 element_getiterator(ElementObject* self, PyObject* args)
1033 {
1034 PyObject* tag = Py_None;
1035 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
1036 return NULL;
1037
1038 /* Change for a DeprecationWarning in 1.4 */
1039 if (Py_Py3kWarningFlag &&
1040 PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1041 "This method will be removed in future versions. "
1042 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1043 1) < 0) {
1044 return NULL;
1045 }
1046 return element_iter_impl(self, tag);
1047 }
1048
1049
1050 static PyObject*
element_itertext(ElementObject * self,PyObject * args)1051 element_itertext(ElementObject* self, PyObject* args)
1052 {
1053 PyObject* result;
1054
1055 if (!PyArg_ParseTuple(args, ":itertext"))
1056 return NULL;
1057
1058 if (!elementtree_itertext_obj) {
1059 PyErr_SetString(
1060 PyExc_RuntimeError,
1061 "itertext helper not found"
1062 );
1063 return NULL;
1064 }
1065
1066 args = PyTuple_New(1);
1067 if (!args)
1068 return NULL;
1069
1070 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1071
1072 result = PyObject_CallObject(elementtree_itertext_obj, args);
1073
1074 Py_DECREF(args);
1075
1076 return result;
1077 }
1078
1079 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1080 element_getitem(PyObject* self_, Py_ssize_t index)
1081 {
1082 ElementObject* self = (ElementObject*) self_;
1083
1084 if (!self->extra || index < 0 || index >= self->extra->length) {
1085 PyErr_SetString(
1086 PyExc_IndexError,
1087 "child index out of range"
1088 );
1089 return NULL;
1090 }
1091
1092 Py_INCREF(self->extra->children[index]);
1093 return self->extra->children[index];
1094 }
1095
1096 static PyObject*
element_insert(ElementObject * self,PyObject * args)1097 element_insert(ElementObject* self, PyObject* args)
1098 {
1099 int i;
1100
1101 int index;
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1104 &Element_Type, &element))
1105 return NULL;
1106
1107 if (!self->extra)
1108 element_new_extra(self, NULL);
1109
1110 if (index < 0) {
1111 index += self->extra->length;
1112 if (index < 0)
1113 index = 0;
1114 }
1115 if (index > self->extra->length)
1116 index = self->extra->length;
1117
1118 if (element_resize(self, 1) < 0)
1119 return NULL;
1120
1121 for (i = self->extra->length; i > index; i--)
1122 self->extra->children[i] = self->extra->children[i-1];
1123
1124 Py_INCREF(element);
1125 self->extra->children[index] = element;
1126
1127 self->extra->length++;
1128
1129 Py_RETURN_NONE;
1130 }
1131
1132 static PyObject*
element_items(ElementObject * self,PyObject * args)1133 element_items(ElementObject* self, PyObject* args)
1134 {
1135 if (!PyArg_ParseTuple(args, ":items"))
1136 return NULL;
1137
1138 if (!self->extra || self->extra->attrib == Py_None)
1139 return PyList_New(0);
1140
1141 return PyDict_Items(self->extra->attrib);
1142 }
1143
1144 static PyObject*
element_keys(ElementObject * self,PyObject * args)1145 element_keys(ElementObject* self, PyObject* args)
1146 {
1147 if (!PyArg_ParseTuple(args, ":keys"))
1148 return NULL;
1149
1150 if (!self->extra || self->extra->attrib == Py_None)
1151 return PyList_New(0);
1152
1153 return PyDict_Keys(self->extra->attrib);
1154 }
1155
1156 static Py_ssize_t
element_length(ElementObject * self)1157 element_length(ElementObject* self)
1158 {
1159 if (!self->extra)
1160 return 0;
1161
1162 return self->extra->length;
1163 }
1164
1165 static PyObject*
element_makeelement(PyObject * self,PyObject * args,PyObject * kw)1166 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1167 {
1168 PyObject* elem;
1169
1170 PyObject* tag;
1171 PyObject* attrib;
1172 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1173 return NULL;
1174
1175 attrib = PyDict_Copy(attrib);
1176 if (!attrib)
1177 return NULL;
1178
1179 elem = element_new(tag, attrib);
1180
1181 Py_DECREF(attrib);
1182
1183 return elem;
1184 }
1185
1186 static PyObject*
element_reduce(ElementObject * self,PyObject * args)1187 element_reduce(ElementObject* self, PyObject* args)
1188 {
1189 if (!PyArg_ParseTuple(args, ":__reduce__"))
1190 return NULL;
1191
1192 /* Hack alert: This method is used to work around a __copy__
1193 problem on certain 2.3 and 2.4 versions. To save time and
1194 simplify the code, we create the copy in here, and use a dummy
1195 copyelement helper to trick the copy module into doing the
1196 right thing. */
1197
1198 if (!elementtree_copyelement_obj) {
1199 PyErr_SetString(
1200 PyExc_RuntimeError,
1201 "copyelement helper not found"
1202 );
1203 return NULL;
1204 }
1205
1206 return Py_BuildValue(
1207 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1208 );
1209 }
1210
1211 static PyObject*
element_remove(ElementObject * self,PyObject * args)1212 element_remove(ElementObject* self, PyObject* args)
1213 {
1214 int i;
1215 int rc;
1216 PyObject* element;
1217 PyObject* found;
1218
1219 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1220 return NULL;
1221
1222 if (!self->extra) {
1223 /* element has no children, so raise exception */
1224 PyErr_SetString(
1225 PyExc_ValueError,
1226 "list.remove(x): x not in list"
1227 );
1228 return NULL;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 if (self->extra->children[i] == element)
1233 break;
1234 rc = PyObject_Compare(self->extra->children[i], element);
1235 if (rc == 0)
1236 break;
1237 if (rc < 0 && PyErr_Occurred())
1238 return NULL;
1239 }
1240
1241 if (i >= self->extra->length) {
1242 /* element is not in children, so raise exception */
1243 PyErr_SetString(
1244 PyExc_ValueError,
1245 "list.remove(x): x not in list"
1246 );
1247 return NULL;
1248 }
1249
1250 found = self->extra->children[i];
1251
1252 self->extra->length--;
1253 for (; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255
1256 Py_DECREF(found);
1257 Py_RETURN_NONE;
1258 }
1259
1260 static PyObject*
element_repr(ElementObject * self)1261 element_repr(ElementObject* self)
1262 {
1263 int status;
1264
1265 if (self->tag == NULL)
1266 return PyUnicode_FromFormat("<Element at %p>", self);
1267
1268 status = Py_ReprEnter((PyObject *)self);
1269 if (status == 0) {
1270 PyObject *repr, *tag;
1271 tag = PyObject_Repr(self->tag);
1272 if (!tag) {
1273 Py_ReprLeave((PyObject *)self);
1274 return NULL;
1275 }
1276
1277 repr = PyString_FromFormat("<Element %s at %p>",
1278 PyString_AS_STRING(tag), self);
1279 Py_ReprLeave((PyObject *)self);
1280 Py_DECREF(tag);
1281 return repr;
1282 }
1283 if (status > 0)
1284 PyErr_Format(PyExc_RuntimeError,
1285 "reentrant call inside %s.__repr__",
1286 Py_TYPE(self)->tp_name);
1287 return NULL;
1288 }
1289
1290 static PyObject*
element_set(ElementObject * self,PyObject * args)1291 element_set(ElementObject* self, PyObject* args)
1292 {
1293 PyObject* attrib;
1294
1295 PyObject* key;
1296 PyObject* value;
1297 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1298 return NULL;
1299
1300 if (!self->extra)
1301 element_new_extra(self, NULL);
1302
1303 attrib = element_get_attrib(self);
1304 if (!attrib)
1305 return NULL;
1306
1307 if (PyDict_SetItem(attrib, key, value) < 0)
1308 return NULL;
1309
1310 Py_RETURN_NONE;
1311 }
1312
1313 static int
element_setitem(PyObject * self_,Py_ssize_t index_,PyObject * item)1314 element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
1315 {
1316 ElementObject* self = (ElementObject*) self_;
1317 int i, index;
1318 PyObject* old;
1319
1320 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
1321 PyErr_SetString(
1322 PyExc_IndexError,
1323 "child assignment index out of range");
1324 return -1;
1325 }
1326 index = (int)index_;
1327
1328 old = self->extra->children[index];
1329
1330 if (item) {
1331 Py_INCREF(item);
1332 self->extra->children[index] = item;
1333 } else {
1334 self->extra->length--;
1335 for (i = index; i < self->extra->length; i++)
1336 self->extra->children[i] = self->extra->children[i+1];
1337 }
1338
1339 Py_DECREF(old);
1340
1341 return 0;
1342 }
1343
1344 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1345 element_subscr(PyObject* self_, PyObject* item)
1346 {
1347 ElementObject* self = (ElementObject*) self_;
1348
1349 #if (PY_VERSION_HEX < 0x02050000)
1350 if (_PyAnyInt_Check(item)) {
1351 long i = PyInt_AsLong(item);
1352 #else
1353 if (PyIndex_Check(item)) {
1354 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1355 #endif
1356
1357 if (i == -1 && PyErr_Occurred()) {
1358 return NULL;
1359 }
1360 if (i < 0 && self->extra)
1361 i += self->extra->length;
1362 return element_getitem(self_, i);
1363 }
1364 else if (PySlice_Check(item)) {
1365 Py_ssize_t start, stop, step, slicelen, cur, i;
1366 PyObject* list;
1367
1368 if (!self->extra)
1369 return PyList_New(0);
1370
1371 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1372 return NULL;
1373 }
1374 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1375 step);
1376
1377 if (slicelen <= 0)
1378 return PyList_New(0);
1379 else {
1380 list = PyList_New(slicelen);
1381 if (!list)
1382 return NULL;
1383
1384 for (cur = start, i = 0; i < slicelen;
1385 cur += step, i++) {
1386 PyObject* item = self->extra->children[cur];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392 }
1393 }
1394 else {
1395 PyErr_SetString(PyExc_TypeError,
1396 "element indices must be integers");
1397 return NULL;
1398 }
1399 }
1400
1401 static int
1402 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403 {
1404 ElementObject* self = (ElementObject*) self_;
1405
1406 #if (PY_VERSION_HEX < 0x02050000)
1407 if (_PyAnyInt_Check(item)) {
1408 long i = PyInt_AsLong(item);
1409 #else
1410 if (PyIndex_Check(item)) {
1411 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1412 #endif
1413
1414 if (i == -1 && PyErr_Occurred()) {
1415 return -1;
1416 }
1417 if (i < 0 && self->extra)
1418 i += self->extra->length;
1419 return element_setitem(self_, i, value);
1420 }
1421 else if (PySlice_Check(item)) {
1422 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1423
1424 PyObject* recycle = NULL;
1425 PyObject* seq = NULL;
1426
1427 if (!self->extra)
1428 element_new_extra(self, NULL);
1429
1430 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1431 return -1;
1432 }
1433 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1434 step);
1435 assert(slicelen <= self->extra->length);
1436
1437 if (value == NULL)
1438 newlen = 0;
1439 else {
1440 seq = PySequence_Fast(value, "");
1441 if (!seq) {
1442 PyErr_Format(
1443 PyExc_TypeError,
1444 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1445 );
1446 return -1;
1447 }
1448 newlen = PySequence_Size(seq);
1449 }
1450
1451 if (step != 1 && newlen != slicelen)
1452 {
1453 Py_XDECREF(seq);
1454 PyErr_Format(PyExc_ValueError,
1455 #if (PY_VERSION_HEX < 0x02050000)
1456 "attempt to assign sequence of size %d "
1457 "to extended slice of size %d",
1458 (int)newlen, (int)slicelen
1459 #else
1460 "attempt to assign sequence of size %zd "
1461 "to extended slice of size %zd",
1462 newlen, slicelen
1463 #endif
1464 );
1465 return -1;
1466 }
1467
1468
1469 /* Resize before creating the recycle bin, to prevent refleaks. */
1470 if (newlen > slicelen) {
1471 if (element_resize(self, newlen - slicelen) < 0) {
1472 Py_XDECREF(seq);
1473 return -1;
1474 }
1475 }
1476 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1477 assert(newlen - slicelen >= -self->extra->length);
1478
1479 if (slicelen > 0) {
1480 /* to avoid recursive calls to this method (via decref), move
1481 old items to the recycle bin here, and get rid of them when
1482 we're done modifying the element */
1483 recycle = PyList_New(slicelen);
1484 if (!recycle) {
1485 Py_XDECREF(seq);
1486 return -1;
1487 }
1488 for (cur = start, i = 0; i < slicelen;
1489 cur += step, i++)
1490 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1491 }
1492
1493 if (newlen < slicelen) {
1494 /* delete slice */
1495 for (i = stop; i < self->extra->length; i++)
1496 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1497 } else if (newlen > slicelen) {
1498 /* insert slice */
1499 for (i = self->extra->length-1; i >= stop; i--)
1500 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1501 }
1502
1503 /* replace the slice */
1504 for (cur = start, i = 0; i < newlen;
1505 cur += step, i++) {
1506 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1507 Py_INCREF(element);
1508 self->extra->children[cur] = element;
1509 }
1510
1511 self->extra->length += (int)(newlen - slicelen);
1512
1513 Py_XDECREF(seq);
1514
1515 /* discard the recycle bin, and everything in it */
1516 Py_XDECREF(recycle);
1517
1518 return 0;
1519 }
1520 else {
1521 PyErr_SetString(PyExc_TypeError,
1522 "element indices must be integers");
1523 return -1;
1524 }
1525 }
1526
1527 static PyMethodDef element_methods[] = {
1528
1529 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1530
1531 {"get", (PyCFunction) element_get, METH_VARARGS},
1532 {"set", (PyCFunction) element_set, METH_VARARGS},
1533
1534 {"find", (PyCFunction) element_find, METH_VARARGS},
1535 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1536 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1537
1538 {"append", (PyCFunction) element_append, METH_VARARGS},
1539 {"extend", (PyCFunction) element_extend, METH_VARARGS},
1540 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1541 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1542
1543 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1544 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1545 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1546
1547 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1548 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1549
1550 {"items", (PyCFunction) element_items, METH_VARARGS},
1551 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1552
1553 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1554
1555 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1556 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1557
1558 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1559 C objects correctly, so we have to fake it using a __reduce__-
1560 based hack (see the element_reduce implementation above for
1561 details). */
1562
1563 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1564 using a runtime test to figure out if we need to fake things
1565 or now (see the init code below). The following entry is
1566 enabled only if the hack is needed. */
1567
1568 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1569
1570 {NULL, NULL}
1571 };
1572
1573 static PyObject*
1574 element_getattr(ElementObject* self, char* name)
1575 {
1576 PyObject* res;
1577
1578 /* handle common attributes first */
1579 if (strcmp(name, "tag") == 0) {
1580 res = self->tag;
1581 Py_INCREF(res);
1582 return res;
1583 } else if (strcmp(name, "text") == 0) {
1584 res = element_get_text(self);
1585 Py_XINCREF(res);
1586 return res;
1587 }
1588
1589 /* methods */
1590 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1591 if (res)
1592 return res;
1593
1594 PyErr_Clear();
1595
1596 /* less common attributes */
1597 if (strcmp(name, "tail") == 0) {
1598 res = element_get_tail(self);
1599 } else if (strcmp(name, "attrib") == 0) {
1600 if (!self->extra)
1601 element_new_extra(self, NULL);
1602 res = element_get_attrib(self);
1603 } else {
1604 PyErr_SetString(PyExc_AttributeError, name);
1605 return NULL;
1606 }
1607
1608 if (!res)
1609 return NULL;
1610
1611 Py_INCREF(res);
1612 return res;
1613 }
1614
1615 static int
1616 element_setattr(ElementObject* self, const char* name, PyObject* value)
1617 {
1618 if (value == NULL) {
1619 PyErr_SetString(
1620 PyExc_AttributeError,
1621 "can't delete element attributes"
1622 );
1623 return -1;
1624 }
1625
1626 if (strcmp(name, "tag") == 0) {
1627 Py_INCREF(value);
1628 Py_SETREF(self->tag, value);
1629 } else if (strcmp(name, "text") == 0) {
1630 Py_INCREF(value);
1631 _set_joined_ptr(&self->text, value);
1632 } else if (strcmp(name, "tail") == 0) {
1633 Py_INCREF(value);
1634 _set_joined_ptr(&self->tail, value);
1635 } else if (strcmp(name, "attrib") == 0) {
1636 if (!self->extra)
1637 element_new_extra(self, NULL);
1638 Py_INCREF(value);
1639 Py_SETREF(self->extra->attrib, value);
1640 } else {
1641 PyErr_SetString(PyExc_AttributeError, name);
1642 return -1;
1643 }
1644
1645 return 0;
1646 }
1647
1648 static PySequenceMethods element_as_sequence = {
1649 (lenfunc) element_length,
1650 0, /* sq_concat */
1651 0, /* sq_repeat */
1652 element_getitem,
1653 0,
1654 element_setitem,
1655 0,
1656 };
1657
1658 static PyMappingMethods element_as_mapping = {
1659 (lenfunc) element_length,
1660 (binaryfunc) element_subscr,
1661 (objobjargproc) element_ass_subscr,
1662 };
1663
1664 statichere PyTypeObject Element_Type = {
1665 PyVarObject_HEAD_INIT(NULL, 0)
1666 "Element", sizeof(ElementObject), 0,
1667 /* methods */
1668 (destructor)element_dealloc, /* tp_dealloc */
1669 0, /* tp_print */
1670 (getattrfunc)element_getattr, /* tp_getattr */
1671 (setattrfunc)element_setattr, /* tp_setattr */
1672 0, /* tp_compare */
1673 (reprfunc)element_repr, /* tp_repr */
1674 0, /* tp_as_number */
1675 &element_as_sequence, /* tp_as_sequence */
1676 &element_as_mapping, /* tp_as_mapping */
1677 };
1678
1679 /* ==================================================================== */
1680 /* the tree builder type */
1681
1682 typedef struct {
1683 PyObject_HEAD
1684
1685 PyObject* root; /* root node (first created node) */
1686
1687 ElementObject* this; /* current node */
1688 ElementObject* last; /* most recently created node */
1689
1690 PyObject* data; /* data collector (string or list), or NULL */
1691
1692 PyObject* stack; /* element stack */
1693 Py_ssize_t index; /* current stack size (0=empty) */
1694
1695 /* element tracing */
1696 PyObject* events; /* list of events, or NULL if not collecting */
1697 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1698 PyObject* end_event_obj;
1699 PyObject* start_ns_event_obj;
1700 PyObject* end_ns_event_obj;
1701
1702 } TreeBuilderObject;
1703
1704 staticforward PyTypeObject TreeBuilder_Type;
1705
1706 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1707
1708 /* -------------------------------------------------------------------- */
1709 /* constructor and destructor */
1710
1711 LOCAL(PyObject*)
1712 treebuilder_new(void)
1713 {
1714 TreeBuilderObject* self;
1715
1716 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1717 if (self == NULL)
1718 return NULL;
1719
1720 self->root = NULL;
1721
1722 Py_INCREF(Py_None);
1723 self->this = (ElementObject*) Py_None;
1724
1725 Py_INCREF(Py_None);
1726 self->last = (ElementObject*) Py_None;
1727
1728 self->data = NULL;
1729
1730 self->stack = PyList_New(20);
1731 self->index = 0;
1732
1733 self->events = NULL;
1734 self->start_event_obj = self->end_event_obj = NULL;
1735 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1736
1737 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1738
1739 return (PyObject*) self;
1740 }
1741
1742 static PyObject*
1743 treebuilder(PyObject* self_, PyObject* args)
1744 {
1745 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1746 return NULL;
1747
1748 return treebuilder_new();
1749 }
1750
1751 static void
1752 treebuilder_dealloc(TreeBuilderObject* self)
1753 {
1754 Py_XDECREF(self->end_ns_event_obj);
1755 Py_XDECREF(self->start_ns_event_obj);
1756 Py_XDECREF(self->end_event_obj);
1757 Py_XDECREF(self->start_event_obj);
1758 Py_XDECREF(self->events);
1759 Py_DECREF(self->stack);
1760 Py_XDECREF(self->data);
1761 Py_DECREF(self->last);
1762 Py_DECREF(self->this);
1763 Py_XDECREF(self->root);
1764
1765 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1766
1767 PyObject_Del(self);
1768 }
1769
1770 /* -------------------------------------------------------------------- */
1771 /* helpers for handling of arbitrary element-like objects */
1772
1773 static void
1774 treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1775 {
1776 PyObject *tmp = JOIN_OBJ(*dest);
1777 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1778 *data = NULL;
1779 Py_DECREF(tmp);
1780 }
1781
1782 LOCAL(void)
1783 treebuilder_flush_data(TreeBuilderObject* self)
1784 {
1785 ElementObject *element = self->last;
1786
1787 if (self->data) {
1788 if (self->this == element) {
1789 treebuilder_set_element_text_or_tail(
1790 &self->data,
1791 &element->text);
1792 }
1793 else {
1794 treebuilder_set_element_text_or_tail(
1795 &self->data,
1796 &element->tail);
1797 }
1798 }
1799 }
1800
1801 LOCAL(int)
1802 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1803 PyObject *node)
1804 {
1805 if (action != NULL) {
1806 PyObject *res = PyTuple_Pack(2, action, node);
1807 if (res == NULL)
1808 return -1;
1809 if (PyList_Append(self->events, res) < 0) {
1810 Py_DECREF(res);
1811 return -1;
1812 }
1813 Py_DECREF(res);
1814 }
1815 return 0;
1816 }
1817
1818 /* -------------------------------------------------------------------- */
1819 /* handlers */
1820
1821 LOCAL(PyObject*)
1822 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1823 PyObject* standalone)
1824 {
1825 Py_RETURN_NONE;
1826 }
1827
1828 LOCAL(PyObject*)
1829 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1830 PyObject* attrib)
1831 {
1832 PyObject* node;
1833 PyObject* this;
1834
1835 treebuilder_flush_data(self);
1836
1837 node = element_new(tag, attrib);
1838 if (!node)
1839 return NULL;
1840
1841 this = (PyObject*) self->this;
1842
1843 if (this != Py_None) {
1844 if (element_add_subelement((ElementObject*) this, node) < 0)
1845 goto error;
1846 } else {
1847 if (self->root) {
1848 PyErr_SetString(
1849 elementtree_parseerror_obj,
1850 "multiple elements on top level"
1851 );
1852 goto error;
1853 }
1854 Py_INCREF(node);
1855 self->root = node;
1856 }
1857
1858 if (self->index < PyList_GET_SIZE(self->stack)) {
1859 if (PyList_SetItem(self->stack, self->index, this) < 0)
1860 goto error;
1861 Py_INCREF(this);
1862 } else {
1863 if (PyList_Append(self->stack, this) < 0)
1864 goto error;
1865 }
1866 self->index++;
1867
1868 Py_INCREF(node);
1869 Py_SETREF(self->this, (ElementObject*) node);
1870
1871 Py_INCREF(node);
1872 Py_SETREF(self->last, (ElementObject*) node);
1873
1874 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1875 goto error;
1876
1877 return node;
1878
1879 error:
1880 Py_DECREF(node);
1881 return NULL;
1882 }
1883
1884 LOCAL(PyObject*)
1885 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1886 {
1887 if (!self->data) {
1888 if (self->last == (ElementObject*) Py_None) {
1889 /* ignore calls to data before the first call to start */
1890 Py_RETURN_NONE;
1891 }
1892 /* store the first item as is */
1893 Py_INCREF(data); self->data = data;
1894 } else {
1895 /* more than one item; use a list to collect items */
1896 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1897 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1898 /* expat often generates single character data sections; handle
1899 the most common case by resizing the existing string... */
1900 Py_ssize_t size = PyString_GET_SIZE(self->data);
1901 if (_PyString_Resize(&self->data, size + 1) < 0)
1902 return NULL;
1903 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1904 } else if (PyList_CheckExact(self->data)) {
1905 if (PyList_Append(self->data, data) < 0)
1906 return NULL;
1907 } else {
1908 PyObject* list = PyList_New(2);
1909 if (!list)
1910 return NULL;
1911 PyList_SET_ITEM(list, 0, self->data);
1912 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1913 self->data = list;
1914 }
1915 }
1916
1917 Py_RETURN_NONE;
1918 }
1919
1920 LOCAL(PyObject*)
1921 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1922 {
1923 ElementObject *item;
1924
1925 treebuilder_flush_data(self);
1926
1927 if (self->index == 0) {
1928 PyErr_SetString(
1929 PyExc_IndexError,
1930 "pop from empty stack"
1931 );
1932 return NULL;
1933 }
1934
1935 item = self->last;
1936 self->last = self->this;
1937 self->index--;
1938 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1939 Py_INCREF(self->this);
1940 Py_DECREF(item);
1941
1942 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1943 return NULL;
1944
1945 Py_INCREF(self->last);
1946 return (PyObject*) self->last;
1947 }
1948
1949 /* -------------------------------------------------------------------- */
1950 /* methods (in alphabetical order) */
1951
1952 static PyObject*
1953 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1954 {
1955 PyObject* data;
1956 if (!PyArg_ParseTuple(args, "O:data", &data))
1957 return NULL;
1958
1959 return treebuilder_handle_data(self, data);
1960 }
1961
1962 static PyObject*
1963 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1964 {
1965 PyObject* tag;
1966 if (!PyArg_ParseTuple(args, "O:end", &tag))
1967 return NULL;
1968
1969 return treebuilder_handle_end(self, tag);
1970 }
1971
1972 LOCAL(PyObject*)
1973 treebuilder_done(TreeBuilderObject* self)
1974 {
1975 PyObject* res;
1976
1977 /* FIXME: check stack size? */
1978
1979 if (self->root)
1980 res = self->root;
1981 else
1982 res = Py_None;
1983
1984 Py_INCREF(res);
1985 return res;
1986 }
1987
1988 static PyObject*
1989 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1990 {
1991 if (!PyArg_ParseTuple(args, ":close"))
1992 return NULL;
1993
1994 return treebuilder_done(self);
1995 }
1996
1997 static PyObject*
1998 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1999 {
2000 PyObject* tag;
2001 PyObject* attrib = Py_None;
2002 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2003 return NULL;
2004
2005 return treebuilder_handle_start(self, tag, attrib);
2006 }
2007
2008 static PyObject*
2009 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
2010 {
2011 PyObject* encoding;
2012 PyObject* standalone;
2013 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2014 return NULL;
2015
2016 return treebuilder_handle_xml(self, encoding, standalone);
2017 }
2018
2019 static PyMethodDef treebuilder_methods[] = {
2020 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2021 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2022 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2023 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2024 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2025 {NULL, NULL}
2026 };
2027
2028 static PyObject*
2029 treebuilder_getattr(TreeBuilderObject* self, char* name)
2030 {
2031 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2032 }
2033
2034 statichere PyTypeObject TreeBuilder_Type = {
2035 PyVarObject_HEAD_INIT(NULL, 0)
2036 "TreeBuilder", sizeof(TreeBuilderObject), 0,
2037 /* methods */
2038 (destructor)treebuilder_dealloc, /* tp_dealloc */
2039 0, /* tp_print */
2040 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2041 };
2042
2043 /* ==================================================================== */
2044 /* the expat interface */
2045
2046 #if defined(USE_EXPAT)
2047
2048 #include "expat.h"
2049
2050 #if defined(USE_PYEXPAT_CAPI)
2051 #include "pyexpat.h"
2052 static struct PyExpat_CAPI* expat_capi;
2053 #define EXPAT(func) (expat_capi->func)
2054 #else
2055 #define EXPAT(func) (XML_##func)
2056 #endif
2057
2058 typedef struct {
2059 PyObject_HEAD
2060
2061 XML_Parser parser;
2062
2063 PyObject* target;
2064 PyObject* entity;
2065
2066 PyObject* names;
2067
2068 PyObject* handle_xml;
2069
2070 PyObject* handle_start;
2071 PyObject* handle_data;
2072 PyObject* handle_end;
2073
2074 PyObject* handle_comment;
2075 PyObject* handle_pi;
2076
2077 PyObject* handle_close;
2078
2079 } XMLParserObject;
2080
2081 staticforward PyTypeObject XMLParser_Type;
2082
2083 /* helpers */
2084
2085 #if defined(Py_USING_UNICODE)
2086 LOCAL(int)
2087 checkstring(const char* string, int size)
2088 {
2089 int i;
2090
2091 /* check if an 8-bit string contains UTF-8 characters */
2092 for (i = 0; i < size; i++)
2093 if (string[i] & 0x80)
2094 return 1;
2095
2096 return 0;
2097 }
2098 #endif
2099
2100 LOCAL(PyObject*)
2101 makestring(const char* string, int size)
2102 {
2103 /* convert a UTF-8 string to either a 7-bit ascii string or a
2104 Unicode string */
2105
2106 #if defined(Py_USING_UNICODE)
2107 if (checkstring(string, size))
2108 return PyUnicode_DecodeUTF8(string, size, "strict");
2109 #endif
2110
2111 return PyString_FromStringAndSize(string, size);
2112 }
2113
2114 LOCAL(PyObject*)
2115 makeuniversal(XMLParserObject* self, const char* string)
2116 {
2117 /* convert a UTF-8 tag/attribute name from the expat parser
2118 to a universal name string */
2119
2120 int size = strlen(string);
2121 PyObject* key;
2122 PyObject* value;
2123
2124 /* look the 'raw' name up in the names dictionary */
2125 key = PyString_FromStringAndSize(string, size);
2126 if (!key)
2127 return NULL;
2128
2129 value = PyDict_GetItem(self->names, key);
2130
2131 if (value) {
2132 Py_INCREF(value);
2133 } else {
2134 /* new name. convert to universal name, and decode as
2135 necessary */
2136
2137 PyObject* tag;
2138 char* p;
2139 int i;
2140
2141 /* look for namespace separator */
2142 for (i = 0; i < size; i++)
2143 if (string[i] == '}')
2144 break;
2145 if (i != size) {
2146 /* convert to universal name */
2147 tag = PyString_FromStringAndSize(NULL, size+1);
2148 p = PyString_AS_STRING(tag);
2149 p[0] = '{';
2150 memcpy(p+1, string, size);
2151 size++;
2152 } else {
2153 /* plain name; use key as tag */
2154 Py_INCREF(key);
2155 tag = key;
2156 }
2157
2158 /* decode universal name */
2159 #if defined(Py_USING_UNICODE)
2160 /* inline makestring, to avoid duplicating the source string if
2161 it's not a utf-8 string */
2162 p = PyString_AS_STRING(tag);
2163 if (checkstring(p, size)) {
2164 value = PyUnicode_DecodeUTF8(p, size, "strict");
2165 Py_DECREF(tag);
2166 if (!value) {
2167 Py_DECREF(key);
2168 return NULL;
2169 }
2170 } else
2171 #endif
2172 value = tag; /* use tag as is */
2173
2174 /* add to names dictionary */
2175 if (PyDict_SetItem(self->names, key, value) < 0) {
2176 Py_DECREF(key);
2177 Py_DECREF(value);
2178 return NULL;
2179 }
2180 }
2181
2182 Py_DECREF(key);
2183 return value;
2184 }
2185
2186 static void
2187 expat_set_error(const char* message, int line, int column)
2188 {
2189 PyObject *error;
2190 PyObject *position;
2191 char buffer[256];
2192
2193 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2194
2195 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2196 if (!error)
2197 return;
2198
2199 /* add position attribute */
2200 position = Py_BuildValue("(ii)", line, column);
2201 if (!position) {
2202 Py_DECREF(error);
2203 return;
2204 }
2205 if (PyObject_SetAttrString(error, "position", position) == -1) {
2206 Py_DECREF(error);
2207 Py_DECREF(position);
2208 return;
2209 }
2210 Py_DECREF(position);
2211
2212 PyErr_SetObject(elementtree_parseerror_obj, error);
2213 Py_DECREF(error);
2214 }
2215
2216 /* -------------------------------------------------------------------- */
2217 /* handlers */
2218
2219 static void
2220 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2221 int data_len)
2222 {
2223 PyObject* key;
2224 PyObject* value;
2225 PyObject* res;
2226
2227 if (data_len < 2 || data_in[0] != '&')
2228 return;
2229
2230 key = makestring(data_in + 1, data_len - 2);
2231 if (!key)
2232 return;
2233
2234 value = PyDict_GetItem(self->entity, key);
2235
2236 if (value) {
2237 if (TreeBuilder_CheckExact(self->target))
2238 res = treebuilder_handle_data(
2239 (TreeBuilderObject*) self->target, value
2240 );
2241 else if (self->handle_data)
2242 res = PyObject_CallFunction(self->handle_data, "O", value);
2243 else
2244 res = NULL;
2245 Py_XDECREF(res);
2246 } else if (!PyErr_Occurred()) {
2247 /* Report the first error, not the last */
2248 char message[128];
2249 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2250 expat_set_error(
2251 message,
2252 EXPAT(GetErrorLineNumber)(self->parser),
2253 EXPAT(GetErrorColumnNumber)(self->parser)
2254 );
2255 }
2256
2257 Py_DECREF(key);
2258 }
2259
2260 static void
2261 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2262 const XML_Char **attrib_in)
2263 {
2264 PyObject* res;
2265 PyObject* tag;
2266 PyObject* attrib;
2267 int ok;
2268
2269 /* tag name */
2270 tag = makeuniversal(self, tag_in);
2271 if (!tag)
2272 return; /* parser will look for errors */
2273
2274 /* attributes */
2275 if (attrib_in[0]) {
2276 attrib = PyDict_New();
2277 if (!attrib) {
2278 Py_DECREF(tag);
2279 return;
2280 }
2281 while (attrib_in[0] && attrib_in[1]) {
2282 PyObject* key = makeuniversal(self, attrib_in[0]);
2283 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2284 if (!key || !value) {
2285 Py_XDECREF(value);
2286 Py_XDECREF(key);
2287 Py_DECREF(attrib);
2288 Py_DECREF(tag);
2289 return;
2290 }
2291 ok = PyDict_SetItem(attrib, key, value);
2292 Py_DECREF(value);
2293 Py_DECREF(key);
2294 if (ok < 0) {
2295 Py_DECREF(attrib);
2296 Py_DECREF(tag);
2297 return;
2298 }
2299 attrib_in += 2;
2300 }
2301 } else {
2302 Py_INCREF(Py_None);
2303 attrib = Py_None;
2304 }
2305
2306 if (TreeBuilder_CheckExact(self->target))
2307 /* shortcut */
2308 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2309 tag, attrib);
2310 else if (self->handle_start) {
2311 if (attrib == Py_None) {
2312 Py_DECREF(attrib);
2313 attrib = PyDict_New();
2314 if (!attrib) {
2315 Py_DECREF(tag);
2316 return;
2317 }
2318 }
2319 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2320 } else
2321 res = NULL;
2322
2323 Py_DECREF(tag);
2324 Py_DECREF(attrib);
2325
2326 Py_XDECREF(res);
2327 }
2328
2329 static void
2330 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2331 int data_len)
2332 {
2333 PyObject* data;
2334 PyObject* res;
2335
2336 data = makestring(data_in, data_len);
2337 if (!data)
2338 return; /* parser will look for errors */
2339
2340 if (TreeBuilder_CheckExact(self->target))
2341 /* shortcut */
2342 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2343 else if (self->handle_data)
2344 res = PyObject_CallFunction(self->handle_data, "O", data);
2345 else
2346 res = NULL;
2347
2348 Py_DECREF(data);
2349
2350 Py_XDECREF(res);
2351 }
2352
2353 static void
2354 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2355 {
2356 PyObject* tag;
2357 PyObject* res = NULL;
2358
2359 if (TreeBuilder_CheckExact(self->target))
2360 /* shortcut */
2361 /* the standard tree builder doesn't look at the end tag */
2362 res = treebuilder_handle_end(
2363 (TreeBuilderObject*) self->target, Py_None
2364 );
2365 else if (self->handle_end) {
2366 tag = makeuniversal(self, tag_in);
2367 if (tag) {
2368 res = PyObject_CallFunction(self->handle_end, "O", tag);
2369 Py_DECREF(tag);
2370 }
2371 }
2372
2373 Py_XDECREF(res);
2374 }
2375
2376 static void
2377 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2378 const XML_Char *uri)
2379 {
2380 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2381 PyObject *parcel;
2382 PyObject *sprefix = NULL;
2383 PyObject *suri = NULL;
2384
2385 if (PyErr_Occurred())
2386 return;
2387
2388 if (!target->events || !target->start_ns_event_obj)
2389 return;
2390
2391 if (uri)
2392 suri = makestring(uri, strlen(uri));
2393 else
2394 suri = PyString_FromStringAndSize("", 0);
2395 if (!suri)
2396 return;
2397
2398 if (prefix)
2399 sprefix = makestring(prefix, strlen(prefix));
2400 else
2401 sprefix = PyString_FromStringAndSize("", 0);
2402 if (!sprefix) {
2403 Py_DECREF(suri);
2404 return;
2405 }
2406
2407 parcel = PyTuple_Pack(2, sprefix, suri);
2408 Py_DECREF(sprefix);
2409 Py_DECREF(suri);
2410 if (!parcel)
2411 return;
2412 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2413 Py_DECREF(parcel);
2414 }
2415
2416 static void
2417 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2418 {
2419 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2420
2421 if (PyErr_Occurred())
2422 return;
2423
2424 if (!target->events)
2425 return;
2426
2427 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
2428 }
2429
2430 static void
2431 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2432 {
2433 PyObject* comment;
2434 PyObject* res;
2435
2436 if (self->handle_comment) {
2437 comment = makestring(comment_in, strlen(comment_in));
2438 if (comment) {
2439 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2440 Py_XDECREF(res);
2441 Py_DECREF(comment);
2442 }
2443 }
2444 }
2445
2446 static void
2447 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2448 const XML_Char* data_in)
2449 {
2450 PyObject* target;
2451 PyObject* data;
2452 PyObject* res;
2453
2454 if (self->handle_pi) {
2455 target = makestring(target_in, strlen(target_in));
2456 data = makestring(data_in, strlen(data_in));
2457 if (target && data) {
2458 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2459 Py_XDECREF(res);
2460 Py_DECREF(data);
2461 Py_DECREF(target);
2462 } else {
2463 Py_XDECREF(data);
2464 Py_XDECREF(target);
2465 }
2466 }
2467 }
2468
2469 #if defined(Py_USING_UNICODE)
2470 static int
2471 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2472 XML_Encoding *info)
2473 {
2474 PyObject* u;
2475 Py_UNICODE* p;
2476 unsigned char s[256];
2477 int i;
2478
2479 memset(info, 0, sizeof(XML_Encoding));
2480
2481 for (i = 0; i < 256; i++)
2482 s[i] = i;
2483
2484 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2485 if (!u)
2486 return XML_STATUS_ERROR;
2487
2488 if (PyUnicode_GET_SIZE(u) != 256) {
2489 Py_DECREF(u);
2490 PyErr_SetString(PyExc_ValueError,
2491 "multi-byte encodings are not supported");
2492 return XML_STATUS_ERROR;
2493 }
2494
2495 p = PyUnicode_AS_UNICODE(u);
2496
2497 for (i = 0; i < 256; i++) {
2498 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2499 info->map[i] = p[i];
2500 else
2501 info->map[i] = -1;
2502 }
2503
2504 Py_DECREF(u);
2505
2506 return XML_STATUS_OK;
2507 }
2508 #endif
2509
2510 /* -------------------------------------------------------------------- */
2511 /* constructor and destructor */
2512
2513 static int
2514 ignore_attribute_error(PyObject *value)
2515 {
2516 if (value == NULL) {
2517 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
2518 return -1;
2519 }
2520 PyErr_Clear();
2521 }
2522 return 0;
2523 }
2524
2525 static PyObject*
2526 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2527 {
2528 XMLParserObject* self;
2529 /* FIXME: does this need to be static? */
2530 static XML_Memory_Handling_Suite memory_handler;
2531
2532 PyObject* target = NULL;
2533 char* encoding = NULL;
2534 static char* kwlist[] = { "target", "encoding", NULL };
2535 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2536 &target, &encoding))
2537 return NULL;
2538
2539 #if defined(USE_PYEXPAT_CAPI)
2540 if (!expat_capi) {
2541 PyErr_SetString(
2542 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2543 );
2544 return NULL;
2545 }
2546 #endif
2547
2548 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2549 if (self == NULL)
2550 return NULL;
2551
2552 self->entity = PyDict_New();
2553 if (!self->entity) {
2554 PyObject_Del(self);
2555 return NULL;
2556 }
2557
2558 self->names = PyDict_New();
2559 if (!self->names) {
2560 PyObject_Del(self->entity);
2561 PyObject_Del(self);
2562 return NULL;
2563 }
2564
2565 memory_handler.malloc_fcn = PyObject_Malloc;
2566 memory_handler.realloc_fcn = PyObject_Realloc;
2567 memory_handler.free_fcn = PyObject_Free;
2568
2569 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2570 if (!self->parser) {
2571 PyObject_Del(self->names);
2572 PyObject_Del(self->entity);
2573 PyObject_Del(self);
2574 PyErr_NoMemory();
2575 return NULL;
2576 }
2577
2578 ALLOC(sizeof(XMLParserObject), "create expatparser");
2579
2580 /* Init to NULL to keep the error handling below manageable. */
2581 self->target =
2582 self->handle_xml =
2583 self->handle_start =
2584 self->handle_data =
2585 self->handle_end =
2586 self->handle_comment =
2587 self->handle_pi =
2588 self->handle_close =
2589 NULL;
2590
2591 /* setup target handlers */
2592 if (!target) {
2593 target = treebuilder_new();
2594 if (!target) {
2595 Py_DECREF(self);
2596 return NULL;
2597 }
2598 } else
2599 Py_INCREF(target);
2600 self->target = target;
2601
2602 self->handle_xml = PyObject_GetAttrString(target, "xml");
2603 if (ignore_attribute_error(self->handle_xml)) {
2604 Py_DECREF(self);
2605 return NULL;
2606 }
2607 self->handle_start = PyObject_GetAttrString(target, "start");
2608 if (ignore_attribute_error(self->handle_start)) {
2609 Py_DECREF(self);
2610 return NULL;
2611 }
2612 self->handle_data = PyObject_GetAttrString(target, "data");
2613 if (ignore_attribute_error(self->handle_data)) {
2614 Py_DECREF(self);
2615 return NULL;
2616 }
2617 self->handle_end = PyObject_GetAttrString(target, "end");
2618 if (ignore_attribute_error(self->handle_end)) {
2619 Py_DECREF(self);
2620 return NULL;
2621 }
2622 self->handle_comment = PyObject_GetAttrString(target, "comment");
2623 if (ignore_attribute_error(self->handle_comment)) {
2624 Py_DECREF(self);
2625 return NULL;
2626 }
2627 self->handle_pi = PyObject_GetAttrString(target, "pi");
2628 if (ignore_attribute_error(self->handle_pi)) {
2629 Py_DECREF(self);
2630 return NULL;
2631 }
2632 self->handle_close = PyObject_GetAttrString(target, "close");
2633 if (ignore_attribute_error(self->handle_close)) {
2634 Py_DECREF(self);
2635 return NULL;
2636 }
2637
2638 /* configure parser */
2639 EXPAT(SetUserData)(self->parser, self);
2640 EXPAT(SetElementHandler)(
2641 self->parser,
2642 (XML_StartElementHandler) expat_start_handler,
2643 (XML_EndElementHandler) expat_end_handler
2644 );
2645 EXPAT(SetDefaultHandlerExpand)(
2646 self->parser,
2647 (XML_DefaultHandler) expat_default_handler
2648 );
2649 EXPAT(SetCharacterDataHandler)(
2650 self->parser,
2651 (XML_CharacterDataHandler) expat_data_handler
2652 );
2653 if (self->handle_comment)
2654 EXPAT(SetCommentHandler)(
2655 self->parser,
2656 (XML_CommentHandler) expat_comment_handler
2657 );
2658 if (self->handle_pi)
2659 EXPAT(SetProcessingInstructionHandler)(
2660 self->parser,
2661 (XML_ProcessingInstructionHandler) expat_pi_handler
2662 );
2663 #if defined(Py_USING_UNICODE)
2664 EXPAT(SetUnknownEncodingHandler)(
2665 self->parser,
2666 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2667 );
2668 #endif
2669
2670 return (PyObject*) self;
2671 }
2672
2673 static void
2674 xmlparser_dealloc(XMLParserObject* self)
2675 {
2676 EXPAT(ParserFree)(self->parser);
2677
2678 Py_XDECREF(self->handle_close);
2679 Py_XDECREF(self->handle_pi);
2680 Py_XDECREF(self->handle_comment);
2681 Py_XDECREF(self->handle_end);
2682 Py_XDECREF(self->handle_data);
2683 Py_XDECREF(self->handle_start);
2684 Py_XDECREF(self->handle_xml);
2685
2686 Py_DECREF(self->target);
2687 Py_DECREF(self->entity);
2688 Py_DECREF(self->names);
2689
2690 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2691
2692 PyObject_Del(self);
2693 }
2694
2695 /* -------------------------------------------------------------------- */
2696 /* methods (in alphabetical order) */
2697
2698 LOCAL(PyObject*)
2699 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2700 {
2701 int ok;
2702
2703 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2704
2705 if (PyErr_Occurred())
2706 return NULL;
2707
2708 if (!ok) {
2709 expat_set_error(
2710 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2711 EXPAT(GetErrorLineNumber)(self->parser),
2712 EXPAT(GetErrorColumnNumber)(self->parser)
2713 );
2714 return NULL;
2715 }
2716
2717 Py_RETURN_NONE;
2718 }
2719
2720 static PyObject*
2721 xmlparser_close(XMLParserObject* self, PyObject* args)
2722 {
2723 /* end feeding data to parser */
2724
2725 PyObject* res;
2726 if (!PyArg_ParseTuple(args, ":close"))
2727 return NULL;
2728
2729 res = expat_parse(self, "", 0, 1);
2730 if (!res)
2731 return NULL;
2732
2733 if (TreeBuilder_CheckExact(self->target)) {
2734 Py_DECREF(res);
2735 return treebuilder_done((TreeBuilderObject*) self->target);
2736 } if (self->handle_close) {
2737 Py_DECREF(res);
2738 return PyObject_CallFunction(self->handle_close, "");
2739 } else
2740 return res;
2741 }
2742
2743 static PyObject*
2744 xmlparser_feed(XMLParserObject* self, PyObject* args)
2745 {
2746 /* feed data to parser */
2747
2748 char* data;
2749 int data_len;
2750 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2751 return NULL;
2752
2753 return expat_parse(self, data, data_len, 0);
2754 }
2755
2756 static PyObject*
2757 xmlparser_parse(XMLParserObject* self, PyObject* args)
2758 {
2759 /* (internal) parse until end of input stream */
2760
2761 PyObject* reader;
2762 PyObject* buffer;
2763 PyObject* res;
2764
2765 PyObject* fileobj;
2766 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2767 return NULL;
2768
2769 reader = PyObject_GetAttrString(fileobj, "read");
2770 if (!reader)
2771 return NULL;
2772
2773 /* read from open file object */
2774 for (;;) {
2775
2776 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2777
2778 if (!buffer) {
2779 /* read failed (e.g. due to KeyboardInterrupt) */
2780 Py_DECREF(reader);
2781 return NULL;
2782 }
2783
2784 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2785 Py_DECREF(buffer);
2786 break;
2787 }
2788
2789 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2790 Py_DECREF(buffer);
2791 Py_DECREF(reader);
2792 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2793 return NULL;
2794 }
2795 res = expat_parse(
2796 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
2797 );
2798
2799 Py_DECREF(buffer);
2800
2801 if (!res) {
2802 Py_DECREF(reader);
2803 return NULL;
2804 }
2805 Py_DECREF(res);
2806
2807 }
2808
2809 Py_DECREF(reader);
2810
2811 res = expat_parse(self, "", 0, 1);
2812
2813 if (res && TreeBuilder_CheckExact(self->target)) {
2814 Py_DECREF(res);
2815 return treebuilder_done((TreeBuilderObject*) self->target);
2816 }
2817
2818 return res;
2819 }
2820
2821 static PyObject*
2822 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2823 {
2824 /* activate element event reporting */
2825
2826 Py_ssize_t i;
2827 TreeBuilderObject* target;
2828
2829 PyObject* events; /* event collector */
2830 PyObject* event_set = Py_None;
2831 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2832 &event_set))
2833 return NULL;
2834
2835 if (!TreeBuilder_CheckExact(self->target)) {
2836 PyErr_SetString(
2837 PyExc_TypeError,
2838 "event handling only supported for cElementTree.Treebuilder "
2839 "targets"
2840 );
2841 return NULL;
2842 }
2843
2844 target = (TreeBuilderObject*) self->target;
2845
2846 Py_INCREF(events);
2847 Py_XSETREF(target->events, events);
2848
2849 /* clear out existing events */
2850 Py_CLEAR(target->start_event_obj);
2851 Py_CLEAR(target->end_event_obj);
2852 Py_CLEAR(target->start_ns_event_obj);
2853 Py_CLEAR(target->end_ns_event_obj);
2854
2855 if (event_set == Py_None) {
2856 /* default is "end" only */
2857 target->end_event_obj = PyString_FromString("end");
2858 Py_RETURN_NONE;
2859 }
2860
2861 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2862 goto error;
2863
2864 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2865 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2866 char* event;
2867 if (!PyString_Check(item))
2868 goto error;
2869 Py_INCREF(item);
2870 event = PyString_AS_STRING(item);
2871 if (strcmp(event, "start") == 0) {
2872 Py_XSETREF(target->start_event_obj, item);
2873 } else if (strcmp(event, "end") == 0) {
2874 Py_XSETREF(target->end_event_obj, item);
2875 } else if (strcmp(event, "start-ns") == 0) {
2876 Py_XSETREF(target->start_ns_event_obj, item);
2877 EXPAT(SetNamespaceDeclHandler)(
2878 self->parser,
2879 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2880 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2881 );
2882 } else if (strcmp(event, "end-ns") == 0) {
2883 Py_XSETREF(target->end_ns_event_obj, item);
2884 EXPAT(SetNamespaceDeclHandler)(
2885 self->parser,
2886 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2887 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2888 );
2889 } else {
2890 Py_DECREF(item);
2891 PyErr_Format(
2892 PyExc_ValueError,
2893 "unknown event '%s'", event
2894 );
2895 return NULL;
2896 }
2897 }
2898
2899 Py_RETURN_NONE;
2900
2901 error:
2902 PyErr_SetString(
2903 PyExc_TypeError,
2904 "invalid event tuple"
2905 );
2906 return NULL;
2907 }
2908
2909 static PyMethodDef xmlparser_methods[] = {
2910 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2911 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2912 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2913 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2914 {NULL, NULL}
2915 };
2916
2917 static PyObject*
2918 xmlparser_getattr(XMLParserObject* self, char* name)
2919 {
2920 PyObject* res;
2921
2922 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2923 if (res)
2924 return res;
2925
2926 PyErr_Clear();
2927
2928 if (strcmp(name, "entity") == 0)
2929 res = self->entity;
2930 else if (strcmp(name, "target") == 0)
2931 res = self->target;
2932 else if (strcmp(name, "version") == 0) {
2933 char buffer[100];
2934 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2935 XML_MINOR_VERSION, XML_MICRO_VERSION);
2936 return PyString_FromString(buffer);
2937 } else {
2938 PyErr_SetString(PyExc_AttributeError, name);
2939 return NULL;
2940 }
2941
2942 Py_INCREF(res);
2943 return res;
2944 }
2945
2946 statichere PyTypeObject XMLParser_Type = {
2947 PyVarObject_HEAD_INIT(NULL, 0)
2948 "XMLParser", sizeof(XMLParserObject), 0,
2949 /* methods */
2950 (destructor)xmlparser_dealloc, /* tp_dealloc */
2951 0, /* tp_print */
2952 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2953 };
2954
2955 #endif
2956
2957 /* ==================================================================== */
2958 /* python module interface */
2959
2960 static PyMethodDef _functions[] = {
2961 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2962 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2963 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2964 #if defined(USE_EXPAT)
2965 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2966 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2967 #endif
2968 {NULL, NULL}
2969 };
2970
2971 DL_EXPORT(void)
2972 init_elementtree(void)
2973 {
2974 PyObject* m;
2975 PyObject* g;
2976 char* bootstrap;
2977
2978 /* Patch object type */
2979 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
2980 #if defined(USE_EXPAT)
2981 Py_TYPE(&XMLParser_Type) = &PyType_Type;
2982 #endif
2983
2984 m = Py_InitModule("_elementtree", _functions);
2985 if (!m)
2986 return;
2987
2988 /* python glue code */
2989
2990 g = PyDict_New();
2991 if (!g)
2992 return;
2993
2994 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2995
2996 bootstrap = (
2997
2998 "from copy import copy, deepcopy\n"
2999
3000 "try:\n"
3001 " from xml.etree import ElementTree\n"
3002 "except ImportError:\n"
3003 " import ElementTree\n"
3004 "ET = ElementTree\n"
3005 "del ElementTree\n"
3006
3007 "import _elementtree as cElementTree\n"
3008
3009 "try:\n" /* check if copy works as is */
3010 " copy(cElementTree.Element('x'))\n"
3011 "except:\n"
3012 " def copyelement(elem):\n"
3013 " return elem\n"
3014
3015 "class CommentProxy:\n"
3016 " def __call__(self, text=None):\n"
3017 " element = cElementTree.Element(ET.Comment)\n"
3018 " element.text = text\n"
3019 " return element\n"
3020 " def __cmp__(self, other):\n"
3021 " return cmp(ET.Comment, other)\n"
3022 "cElementTree.Comment = CommentProxy()\n"
3023
3024 "class ElementTree(ET.ElementTree):\n" /* public */
3025 " def parse(self, source, parser=None):\n"
3026 " close_source = False\n"
3027 " if not hasattr(source, 'read'):\n"
3028 " source = open(source, 'rb')\n"
3029 " close_source = False\n"
3030 " try:\n"
3031 " if parser is not None:\n"
3032 " while 1:\n"
3033 " data = source.read(65536)\n"
3034 " if not data:\n"
3035 " break\n"
3036 " parser.feed(data)\n"
3037 " self._root = parser.close()\n"
3038 " else:\n"
3039 " parser = cElementTree.XMLParser()\n"
3040 " self._root = parser._parse(source)\n"
3041 " return self._root\n"
3042 " finally:\n"
3043 " if close_source:\n"
3044 " source.close()\n"
3045 "cElementTree.ElementTree = ElementTree\n"
3046
3047 "def iter(node, tag=None):\n" /* helper */
3048 " if tag == '*':\n"
3049 " tag = None\n"
3050 " if tag is None or node.tag == tag:\n"
3051 " yield node\n"
3052 " for node in node:\n"
3053 " for node in iter(node, tag):\n"
3054 " yield node\n"
3055
3056 "def itertext(node):\n" /* helper */
3057 " if node.text:\n"
3058 " yield node.text\n"
3059 " for e in node:\n"
3060 " for s in e.itertext():\n"
3061 " yield s\n"
3062 " if e.tail:\n"
3063 " yield e.tail\n"
3064
3065 "def parse(source, parser=None):\n" /* public */
3066 " tree = ElementTree()\n"
3067 " tree.parse(source, parser)\n"
3068 " return tree\n"
3069 "cElementTree.parse = parse\n"
3070
3071 "class iterparse(object):\n"
3072 " root = None\n"
3073 " def __init__(self, file, events=None):\n"
3074 " self._close_file = False\n"
3075 " if not hasattr(file, 'read'):\n"
3076 " file = open(file, 'rb')\n"
3077 " self._close_file = True\n"
3078 " self._file = file\n"
3079 " self._events = []\n"
3080 " self._index = 0\n"
3081 " self._error = None\n"
3082 " self.root = self._root = None\n"
3083 " b = cElementTree.TreeBuilder()\n"
3084 " self._parser = cElementTree.XMLParser(b)\n"
3085 " self._parser._setevents(self._events, events)\n"
3086 " def next(self):\n"
3087 " while 1:\n"
3088 " try:\n"
3089 " item = self._events[self._index]\n"
3090 " self._index += 1\n"
3091 " return item\n"
3092 " except IndexError:\n"
3093 " pass\n"
3094 " if self._error:\n"
3095 " e = self._error\n"
3096 " self._error = None\n"
3097 " raise e\n"
3098 " if self._parser is None:\n"
3099 " self.root = self._root\n"
3100 " if self._close_file:\n"
3101 " self._file.close()\n"
3102 " raise StopIteration\n"
3103 " # load event buffer\n"
3104 " del self._events[:]\n"
3105 " self._index = 0\n"
3106 " data = self._file.read(16384)\n"
3107 " if data:\n"
3108 " try:\n"
3109 " self._parser.feed(data)\n"
3110 " except SyntaxError as exc:\n"
3111 " self._error = exc\n"
3112 " else:\n"
3113 " self._root = self._parser.close()\n"
3114 " self._parser = None\n"
3115 " def __iter__(self):\n"
3116 " return self\n"
3117 "cElementTree.iterparse = iterparse\n"
3118
3119 "class PIProxy:\n"
3120 " def __call__(self, target, text=None):\n"
3121 " element = cElementTree.Element(ET.PI)\n"
3122 " element.text = target\n"
3123 " if text:\n"
3124 " element.text = element.text + ' ' + text\n"
3125 " return element\n"
3126 " def __cmp__(self, other):\n"
3127 " return cmp(ET.PI, other)\n"
3128 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
3129
3130 "def XML(text):\n" /* public */
3131 " parser = cElementTree.XMLParser()\n"
3132 " parser.feed(text)\n"
3133 " return parser.close()\n"
3134 "cElementTree.XML = cElementTree.fromstring = XML\n"
3135
3136 "def XMLID(text):\n" /* public */
3137 " tree = XML(text)\n"
3138 " ids = {}\n"
3139 " for elem in tree.iter():\n"
3140 " id = elem.get('id')\n"
3141 " if id:\n"
3142 " ids[id] = elem\n"
3143 " return tree, ids\n"
3144 "cElementTree.XMLID = XMLID\n"
3145
3146 "try:\n"
3147 " register_namespace = ET.register_namespace\n"
3148 "except AttributeError:\n"
3149 " def register_namespace(prefix, uri):\n"
3150 " ET._namespace_map[uri] = prefix\n"
3151 "cElementTree.register_namespace = register_namespace\n"
3152
3153 "cElementTree.dump = ET.dump\n"
3154 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3155 "cElementTree.iselement = ET.iselement\n"
3156 "cElementTree.QName = ET.QName\n"
3157 "cElementTree.tostring = ET.tostring\n"
3158 "cElementTree.fromstringlist = ET.fromstringlist\n"
3159 "cElementTree.tostringlist = ET.tostringlist\n"
3160 "cElementTree.VERSION = '" VERSION "'\n"
3161 "cElementTree.__version__ = '" VERSION "'\n"
3162
3163 );
3164
3165 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3166 return;
3167
3168 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3169
3170 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3171 if (elementtree_copyelement_obj) {
3172 /* reduce hack needed; enable reduce method */
3173 PyMethodDef* mp;
3174 for (mp = element_methods; mp->ml_name; mp++)
3175 if (mp->ml_meth == (PyCFunction) element_reduce) {
3176 mp->ml_name = "__reduce__";
3177 break;
3178 }
3179 } else
3180 PyErr_Clear();
3181
3182 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
3183 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3184 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
3185
3186 #if defined(USE_PYEXPAT_CAPI)
3187 /* link against pyexpat, if possible */
3188 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3189 if (expat_capi) {
3190 /* check that it's usable */
3191 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3192 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3193 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3194 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3195 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3196 expat_capi = NULL;
3197 }
3198 #endif
3199
3200 elementtree_parseerror_obj = PyErr_NewException(
3201 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3202 );
3203 Py_INCREF(elementtree_parseerror_obj);
3204 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3205 }
3206