• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include <ctype.h>
3 
4 #include "structmember.h"         // PyMemberDef
5 #include "frameobject.h"
6 #include "expat.h"
7 
8 #include "pyexpat.h"
9 
10 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11    included methods. */
12 /*[clinic input]
13 module pyexpat
14 [clinic start generated code]*/
15 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16 
17 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18 
19 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21 
22 enum HandlerTypes {
23     StartElement,
24     EndElement,
25     ProcessingInstruction,
26     CharacterData,
27     UnparsedEntityDecl,
28     NotationDecl,
29     StartNamespaceDecl,
30     EndNamespaceDecl,
31     Comment,
32     StartCdataSection,
33     EndCdataSection,
34     Default,
35     DefaultHandlerExpand,
36     NotStandalone,
37     ExternalEntityRef,
38     StartDoctypeDecl,
39     EndDoctypeDecl,
40     EntityDecl,
41     XmlDecl,
42     ElementDecl,
43     AttlistDecl,
44 #if XML_COMBINED_VERSION >= 19504
45     SkippedEntity,
46 #endif
47     _DummyDecl
48 };
49 
50 typedef struct {
51     PyTypeObject *xml_parse_type;
52     PyObject *error;
53 } pyexpat_state;
54 
55 static inline pyexpat_state*
pyexpat_get_state(PyObject * module)56 pyexpat_get_state(PyObject *module)
57 {
58     void *state = PyModule_GetState(module);
59     assert(state != NULL);
60     return (pyexpat_state *)state;
61 }
62 
63 /* ----------------------------------------------------- */
64 
65 /* Declarations for objects of type xmlparser */
66 
67 typedef struct {
68     PyObject_HEAD
69 
70     XML_Parser itself;
71     int ordered_attributes;     /* Return attributes as a list. */
72     int specified_attributes;   /* Report only specified attributes. */
73     int in_callback;            /* Is a callback active? */
74     int ns_prefixes;            /* Namespace-triplets mode? */
75     XML_Char *buffer;           /* Buffer used when accumulating characters */
76                                 /* NULL if not enabled */
77     int buffer_size;            /* Size of buffer, in XML_Char units */
78     int buffer_used;            /* Buffer units in use */
79     PyObject *intern;           /* Dictionary to intern strings */
80     PyObject **handlers;
81 } xmlparseobject;
82 
83 #include "clinic/pyexpat.c.h"
84 
85 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 
87 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
88 typedef void* xmlhandler;
89 
90 struct HandlerInfo {
91     const char *name;
92     xmlhandlersetter setter;
93     xmlhandler handler;
94     PyGetSetDef getset;
95 };
96 
97 static struct HandlerInfo handler_info[64];
98 
99 /* Set an integer attribute on the error object; return true on success,
100  * false on an exception.
101  */
102 static int
set_error_attr(PyObject * err,const char * name,int value)103 set_error_attr(PyObject *err, const char *name, int value)
104 {
105     PyObject *v = PyLong_FromLong(value);
106 
107     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108         Py_XDECREF(v);
109         return 0;
110     }
111     Py_DECREF(v);
112     return 1;
113 }
114 
115 /* Build and set an Expat exception, including positioning
116  * information.  Always returns NULL.
117  */
118 static PyObject *
set_error(pyexpat_state * state,xmlparseobject * self,enum XML_Error code)119 set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
120 {
121     PyObject *err;
122     PyObject *buffer;
123     XML_Parser parser = self->itself;
124     int lineno = XML_GetErrorLineNumber(parser);
125     int column = XML_GetErrorColumnNumber(parser);
126 
127     buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128                                   XML_ErrorString(code), lineno, column);
129     if (buffer == NULL)
130         return NULL;
131     err = PyObject_CallOneArg(state->error, buffer);
132     Py_DECREF(buffer);
133     if (  err != NULL
134           && set_error_attr(err, "code", code)
135           && set_error_attr(err, "offset", column)
136           && set_error_attr(err, "lineno", lineno)) {
137         PyErr_SetObject(state->error, err);
138     }
139     Py_XDECREF(err);
140     return NULL;
141 }
142 
143 static int
have_handler(xmlparseobject * self,int type)144 have_handler(xmlparseobject *self, int type)
145 {
146     PyObject *handler = self->handlers[type];
147     return handler != NULL;
148 }
149 
150 /* Convert a string of XML_Chars into a Unicode string.
151    Returns None if str is a null pointer. */
152 
153 static PyObject *
conv_string_to_unicode(const XML_Char * str)154 conv_string_to_unicode(const XML_Char *str)
155 {
156     /* XXX currently this code assumes that XML_Char is 8-bit,
157        and hence in UTF-8.  */
158     /* UTF-8 from Expat, Unicode desired */
159     if (str == NULL) {
160         Py_RETURN_NONE;
161     }
162     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163 }
164 
165 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)166 conv_string_len_to_unicode(const XML_Char *str, int len)
167 {
168     /* XXX currently this code assumes that XML_Char is 8-bit,
169        and hence in UTF-8.  */
170     /* UTF-8 from Expat, Unicode desired */
171     if (str == NULL) {
172         Py_RETURN_NONE;
173     }
174     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175 }
176 
177 /* Callback routines */
178 
179 static void clear_handlers(xmlparseobject *self, int initial);
180 
181 /* This handler is used when an error has been detected, in the hope
182    that actual parsing can be terminated early.  This will only help
183    if an external entity reference is encountered. */
184 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)185 error_external_entity_ref_handler(XML_Parser parser,
186                                   const XML_Char *context,
187                                   const XML_Char *base,
188                                   const XML_Char *systemId,
189                                   const XML_Char *publicId)
190 {
191     return 0;
192 }
193 
194 /* Dummy character data handler used when an error (exception) has
195    been detected, and the actual parsing can be terminated early.
196    This is needed since character data handler can't be safely removed
197    from within the character data handler, but can be replaced.  It is
198    used only from the character data handler trampoline, and must be
199    used right after `flag_error()` is called. */
200 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)201 noop_character_data_handler(void *userData, const XML_Char *data, int len)
202 {
203     /* Do nothing. */
204 }
205 
206 static void
flag_error(xmlparseobject * self)207 flag_error(xmlparseobject *self)
208 {
209     clear_handlers(self, 0);
210     XML_SetExternalEntityRefHandler(self->itself,
211                                     error_external_entity_ref_handler);
212 }
213 
214 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)215 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
216                 xmlparseobject *self)
217 {
218     PyObject *res;
219 
220     res = PyObject_Call(func, args, NULL);
221     if (res == NULL) {
222         _PyTraceback_Add(funcname, __FILE__, lineno);
223         XML_StopParser(self->itself, XML_FALSE);
224     }
225     return res;
226 }
227 
228 static PyObject*
string_intern(xmlparseobject * self,const char * str)229 string_intern(xmlparseobject *self, const char* str)
230 {
231     PyObject *result = conv_string_to_unicode(str);
232     PyObject *value;
233     /* result can be NULL if the unicode conversion failed. */
234     if (!result)
235         return result;
236     if (!self->intern)
237         return result;
238     value = PyDict_GetItemWithError(self->intern, result);
239     if (!value) {
240         if (!PyErr_Occurred() &&
241             PyDict_SetItem(self->intern, result, result) == 0)
242         {
243             return result;
244         }
245         else {
246             Py_DECREF(result);
247             return NULL;
248         }
249     }
250     Py_INCREF(value);
251     Py_DECREF(result);
252     return value;
253 }
254 
255 /* Return 0 on success, -1 on exception.
256  * flag_error() will be called before return if needed.
257  */
258 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)259 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260 {
261     PyObject *args;
262     PyObject *temp;
263 
264     if (!have_handler(self, CharacterData))
265         return -1;
266 
267     args = PyTuple_New(1);
268     if (args == NULL)
269         return -1;
270     temp = (conv_string_len_to_unicode(buffer, len));
271     if (temp == NULL) {
272         Py_DECREF(args);
273         flag_error(self);
274         XML_SetCharacterDataHandler(self->itself,
275                                     noop_character_data_handler);
276         return -1;
277     }
278     PyTuple_SET_ITEM(args, 0, temp);
279     /* temp is now a borrowed reference; consider it unused. */
280     self->in_callback = 1;
281     temp = call_with_frame("CharacterData", __LINE__,
282                            self->handlers[CharacterData], args, self);
283     /* temp is an owned reference again, or NULL */
284     self->in_callback = 0;
285     Py_DECREF(args);
286     if (temp == NULL) {
287         flag_error(self);
288         XML_SetCharacterDataHandler(self->itself,
289                                     noop_character_data_handler);
290         return -1;
291     }
292     Py_DECREF(temp);
293     return 0;
294 }
295 
296 static int
flush_character_buffer(xmlparseobject * self)297 flush_character_buffer(xmlparseobject *self)
298 {
299     int rc;
300     if (self->buffer == NULL || self->buffer_used == 0)
301         return 0;
302     rc = call_character_handler(self, self->buffer, self->buffer_used);
303     self->buffer_used = 0;
304     return rc;
305 }
306 
307 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)308 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
309 {
310     xmlparseobject *self = (xmlparseobject *) userData;
311 
312     if (PyErr_Occurred())
313         return;
314 
315     if (self->buffer == NULL)
316         call_character_handler(self, data, len);
317     else {
318         if ((self->buffer_used + len) > self->buffer_size) {
319             if (flush_character_buffer(self) < 0)
320                 return;
321             /* handler might have changed; drop the rest on the floor
322              * if there isn't a handler anymore
323              */
324             if (!have_handler(self, CharacterData))
325                 return;
326         }
327         if (len > self->buffer_size) {
328             call_character_handler(self, data, len);
329             self->buffer_used = 0;
330         }
331         else {
332             memcpy(self->buffer + self->buffer_used,
333                    data, len * sizeof(XML_Char));
334             self->buffer_used += len;
335         }
336     }
337 }
338 
339 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])340 my_StartElementHandler(void *userData,
341                        const XML_Char *name, const XML_Char *atts[])
342 {
343     xmlparseobject *self = (xmlparseobject *)userData;
344 
345     if (have_handler(self, StartElement)) {
346         PyObject *container, *rv, *args;
347         int i, max;
348 
349         if (PyErr_Occurred())
350             return;
351 
352         if (flush_character_buffer(self) < 0)
353             return;
354         /* Set max to the number of slots filled in atts[]; max/2 is
355          * the number of attributes we need to process.
356          */
357         if (self->specified_attributes) {
358             max = XML_GetSpecifiedAttributeCount(self->itself);
359         }
360         else {
361             max = 0;
362             while (atts[max] != NULL)
363                 max += 2;
364         }
365         /* Build the container. */
366         if (self->ordered_attributes)
367             container = PyList_New(max);
368         else
369             container = PyDict_New();
370         if (container == NULL) {
371             flag_error(self);
372             return;
373         }
374         for (i = 0; i < max; i += 2) {
375             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
376             PyObject *v;
377             if (n == NULL) {
378                 flag_error(self);
379                 Py_DECREF(container);
380                 return;
381             }
382             v = conv_string_to_unicode((XML_Char *) atts[i+1]);
383             if (v == NULL) {
384                 flag_error(self);
385                 Py_DECREF(container);
386                 Py_DECREF(n);
387                 return;
388             }
389             if (self->ordered_attributes) {
390                 PyList_SET_ITEM(container, i, n);
391                 PyList_SET_ITEM(container, i+1, v);
392             }
393             else if (PyDict_SetItem(container, n, v)) {
394                 flag_error(self);
395                 Py_DECREF(n);
396                 Py_DECREF(v);
397                 Py_DECREF(container);
398                 return;
399             }
400             else {
401                 Py_DECREF(n);
402                 Py_DECREF(v);
403             }
404         }
405         args = string_intern(self, name);
406         if (args == NULL) {
407             Py_DECREF(container);
408             return;
409         }
410         args = Py_BuildValue("(NN)", args, container);
411         if (args == NULL) {
412             return;
413         }
414         /* Container is now a borrowed reference; ignore it. */
415         self->in_callback = 1;
416         rv = call_with_frame("StartElement", __LINE__,
417                              self->handlers[StartElement], args, self);
418         self->in_callback = 0;
419         Py_DECREF(args);
420         if (rv == NULL) {
421             flag_error(self);
422             return;
423         }
424         Py_DECREF(rv);
425     }
426 }
427 
428 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429                 RETURN, GETUSERDATA) \
430 static RC \
431 my_##NAME##Handler PARAMS {\
432     xmlparseobject *self = GETUSERDATA ; \
433     PyObject *args = NULL; \
434     PyObject *rv = NULL; \
435     INIT \
436 \
437     if (have_handler(self, NAME)) { \
438         if (PyErr_Occurred()) \
439             return RETURN; \
440         if (flush_character_buffer(self) < 0) \
441             return RETURN; \
442         args = Py_BuildValue PARAM_FORMAT ;\
443         if (!args) { flag_error(self); return RETURN;} \
444         self->in_callback = 1; \
445         rv = call_with_frame(#NAME,__LINE__, \
446                              self->handlers[NAME], args, self); \
447         self->in_callback = 0; \
448         Py_DECREF(args); \
449         if (rv == NULL) { \
450             flag_error(self); \
451             return RETURN; \
452         } \
453         CONVERSION \
454         Py_DECREF(rv); \
455     } \
456     return RETURN; \
457 }
458 
459 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
460         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461         (xmlparseobject *)userData)
462 
463 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
464         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465                         rc = PyLong_AsLong(rv);, rc, \
466         (xmlparseobject *)userData)
467 
468 VOID_HANDLER(EndElement,
469              (void *userData, const XML_Char *name),
470              ("(N)", string_intern(self, name)))
471 
472 VOID_HANDLER(ProcessingInstruction,
473              (void *userData,
474               const XML_Char *target,
475               const XML_Char *data),
476              ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
477 
478 VOID_HANDLER(UnparsedEntityDecl,
479              (void *userData,
480               const XML_Char *entityName,
481               const XML_Char *base,
482               const XML_Char *systemId,
483               const XML_Char *publicId,
484               const XML_Char *notationName),
485              ("(NNNNN)",
486               string_intern(self, entityName), string_intern(self, base),
487               string_intern(self, systemId), string_intern(self, publicId),
488               string_intern(self, notationName)))
489 
490 VOID_HANDLER(EntityDecl,
491              (void *userData,
492               const XML_Char *entityName,
493               int is_parameter_entity,
494               const XML_Char *value,
495               int value_length,
496               const XML_Char *base,
497               const XML_Char *systemId,
498               const XML_Char *publicId,
499               const XML_Char *notationName),
500              ("NiNNNNN",
501               string_intern(self, entityName), is_parameter_entity,
502               (conv_string_len_to_unicode(value, value_length)),
503               string_intern(self, base), string_intern(self, systemId),
504               string_intern(self, publicId),
505               string_intern(self, notationName)))
506 
507 VOID_HANDLER(XmlDecl,
508              (void *userData,
509               const XML_Char *version,
510               const XML_Char *encoding,
511               int standalone),
512              ("(O&O&i)",
513               conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
514               standalone))
515 
516 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))517 conv_content_model(XML_Content * const model,
518                    PyObject *(*conv_string)(const XML_Char *))
519 {
520     PyObject *result = NULL;
521     PyObject *children = PyTuple_New(model->numchildren);
522     int i;
523 
524     if (children != NULL) {
525         assert(model->numchildren < INT_MAX);
526         for (i = 0; i < (int)model->numchildren; ++i) {
527             PyObject *child = conv_content_model(&model->children[i],
528                                                  conv_string);
529             if (child == NULL) {
530                 Py_XDECREF(children);
531                 return NULL;
532             }
533             PyTuple_SET_ITEM(children, i, child);
534         }
535         result = Py_BuildValue("(iiO&N)",
536                                model->type, model->quant,
537                                conv_string,model->name, children);
538     }
539     return result;
540 }
541 
542 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)543 my_ElementDeclHandler(void *userData,
544                       const XML_Char *name,
545                       XML_Content *model)
546 {
547     xmlparseobject *self = (xmlparseobject *)userData;
548     PyObject *args = NULL;
549 
550     if (have_handler(self, ElementDecl)) {
551         PyObject *rv = NULL;
552         PyObject *modelobj, *nameobj;
553 
554         if (PyErr_Occurred())
555             return;
556 
557         if (flush_character_buffer(self) < 0)
558             goto finally;
559         modelobj = conv_content_model(model, (conv_string_to_unicode));
560         if (modelobj == NULL) {
561             flag_error(self);
562             goto finally;
563         }
564         nameobj = string_intern(self, name);
565         if (nameobj == NULL) {
566             Py_DECREF(modelobj);
567             flag_error(self);
568             goto finally;
569         }
570         args = Py_BuildValue("NN", nameobj, modelobj);
571         if (args == NULL) {
572             flag_error(self);
573             goto finally;
574         }
575         self->in_callback = 1;
576         rv = call_with_frame("ElementDecl", __LINE__,
577                              self->handlers[ElementDecl], args, self);
578         self->in_callback = 0;
579         if (rv == NULL) {
580             flag_error(self);
581             goto finally;
582         }
583         Py_DECREF(rv);
584     }
585  finally:
586     Py_XDECREF(args);
587     XML_FreeContentModel(self->itself, model);
588     return;
589 }
590 
591 VOID_HANDLER(AttlistDecl,
592              (void *userData,
593               const XML_Char *elname,
594               const XML_Char *attname,
595               const XML_Char *att_type,
596               const XML_Char *dflt,
597               int isrequired),
598              ("(NNO&O&i)",
599               string_intern(self, elname), string_intern(self, attname),
600               conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
601               isrequired))
602 
603 #if XML_COMBINED_VERSION >= 19504
604 VOID_HANDLER(SkippedEntity,
605              (void *userData,
606               const XML_Char *entityName,
607               int is_parameter_entity),
608              ("Ni",
609               string_intern(self, entityName), is_parameter_entity))
610 #endif
611 
612 VOID_HANDLER(NotationDecl,
613                 (void *userData,
614                         const XML_Char *notationName,
615                         const XML_Char *base,
616                         const XML_Char *systemId,
617                         const XML_Char *publicId),
618                 ("(NNNN)",
619                  string_intern(self, notationName), string_intern(self, base),
620                  string_intern(self, systemId), string_intern(self, publicId)))
621 
622 VOID_HANDLER(StartNamespaceDecl,
623                 (void *userData,
624                       const XML_Char *prefix,
625                       const XML_Char *uri),
626                 ("(NN)",
627                  string_intern(self, prefix), string_intern(self, uri)))
628 
629 VOID_HANDLER(EndNamespaceDecl,
630                 (void *userData,
631                     const XML_Char *prefix),
632                 ("(N)", string_intern(self, prefix)))
633 
634 VOID_HANDLER(Comment,
635                (void *userData, const XML_Char *data),
636                 ("(O&)", conv_string_to_unicode ,data))
637 
638 VOID_HANDLER(StartCdataSection,
639                (void *userData),
640                 ("()"))
641 
642 VOID_HANDLER(EndCdataSection,
643                (void *userData),
644                 ("()"))
645 
646 VOID_HANDLER(Default,
647               (void *userData, const XML_Char *s, int len),
648               ("(N)", (conv_string_len_to_unicode(s,len))))
649 
650 VOID_HANDLER(DefaultHandlerExpand,
651               (void *userData, const XML_Char *s, int len),
652               ("(N)", (conv_string_len_to_unicode(s,len))))
653 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
654 
655 INT_HANDLER(NotStandalone,
656                 (void *userData),
657                 ("()"))
658 
659 RC_HANDLER(int, ExternalEntityRef,
660                 (XML_Parser parser,
661                     const XML_Char *context,
662                     const XML_Char *base,
663                     const XML_Char *systemId,
664                     const XML_Char *publicId),
665                 int rc=0;,
666                 ("(O&NNN)",
667                  conv_string_to_unicode ,context, string_intern(self, base),
668                  string_intern(self, systemId), string_intern(self, publicId)),
669                 rc = PyLong_AsLong(rv);, rc,
670                 XML_GetUserData(parser))
671 
672 /* XXX UnknownEncodingHandler */
673 
674 VOID_HANDLER(StartDoctypeDecl,
675              (void *userData, const XML_Char *doctypeName,
676               const XML_Char *sysid, const XML_Char *pubid,
677               int has_internal_subset),
678              ("(NNNi)", string_intern(self, doctypeName),
679               string_intern(self, sysid), string_intern(self, pubid),
680               has_internal_subset))
681 
682 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
683 
684 /* ---------------------------------------------------------------- */
685 /*[clinic input]
686 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687 [clinic start generated code]*/
688 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689 
690 
691 static PyObject *
get_parse_result(pyexpat_state * state,xmlparseobject * self,int rv)692 get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
693 {
694     if (PyErr_Occurred()) {
695         return NULL;
696     }
697     if (rv == 0) {
698         return set_error(state, self, XML_GetErrorCode(self->itself));
699     }
700     if (flush_character_buffer(self) < 0) {
701         return NULL;
702     }
703     return PyLong_FromLong(rv);
704 }
705 
706 #define MAX_CHUNK_SIZE (1 << 20)
707 
708 /*[clinic input]
709 pyexpat.xmlparser.Parse
710 
711     cls: defining_class
712     data: object
713     isfinal: bool(accept={int}) = False
714     /
715 
716 Parse XML data.
717 
718 `isfinal' should be true at end of input.
719 [clinic start generated code]*/
720 
721 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * data,int isfinal)722 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723                              PyObject *data, int isfinal)
724 /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
725 {
726     const char *s;
727     Py_ssize_t slen;
728     Py_buffer view;
729     int rc;
730     pyexpat_state *state = PyType_GetModuleState(cls);
731 
732     if (PyUnicode_Check(data)) {
733         view.buf = NULL;
734         s = PyUnicode_AsUTF8AndSize(data, &slen);
735         if (s == NULL)
736             return NULL;
737         /* Explicitly set UTF-8 encoding. Return code ignored. */
738         (void)XML_SetEncoding(self->itself, "utf-8");
739     }
740     else {
741         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742             return NULL;
743         s = view.buf;
744         slen = view.len;
745     }
746 
747     while (slen > MAX_CHUNK_SIZE) {
748         rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
749         if (!rc)
750             goto done;
751         s += MAX_CHUNK_SIZE;
752         slen -= MAX_CHUNK_SIZE;
753     }
754     Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
755     assert(slen <= INT_MAX);
756     rc = XML_Parse(self->itself, s, (int)slen, isfinal);
757 
758 done:
759     if (view.buf != NULL) {
760         PyBuffer_Release(&view);
761     }
762     return get_parse_result(state, self, rc);
763 }
764 
765 /* File reading copied from cPickle */
766 
767 #define BUF_SIZE 2048
768 
769 static int
readinst(char * buf,int buf_size,PyObject * meth)770 readinst(char *buf, int buf_size, PyObject *meth)
771 {
772     PyObject *str;
773     Py_ssize_t len;
774     const char *ptr;
775 
776     str = PyObject_CallFunction(meth, "n", buf_size);
777     if (str == NULL)
778         goto error;
779 
780     if (PyBytes_Check(str))
781         ptr = PyBytes_AS_STRING(str);
782     else if (PyByteArray_Check(str))
783         ptr = PyByteArray_AS_STRING(str);
784     else {
785         PyErr_Format(PyExc_TypeError,
786                      "read() did not return a bytes object (type=%.400s)",
787                      Py_TYPE(str)->tp_name);
788         goto error;
789     }
790     len = Py_SIZE(str);
791     if (len > buf_size) {
792         PyErr_Format(PyExc_ValueError,
793                      "read() returned too much data: "
794                      "%i bytes requested, %zd returned",
795                      buf_size, len);
796         goto error;
797     }
798     memcpy(buf, ptr, len);
799     Py_DECREF(str);
800     /* len <= buf_size <= INT_MAX */
801     return (int)len;
802 
803 error:
804     Py_XDECREF(str);
805     return -1;
806 }
807 
808 /*[clinic input]
809 pyexpat.xmlparser.ParseFile
810 
811     cls: defining_class
812     file: object
813     /
814 
815 Parse XML data from file-like object.
816 [clinic start generated code]*/
817 
818 static PyObject *
pyexpat_xmlparser_ParseFile_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * file)819 pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
820                                  PyObject *file)
821 /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
822 {
823     int rv = 1;
824     PyObject *readmethod = NULL;
825     _Py_IDENTIFIER(read);
826 
827     pyexpat_state *state = PyType_GetModuleState(cls);
828 
829     if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
830         return NULL;
831     }
832     if (readmethod == NULL) {
833         PyErr_SetString(PyExc_TypeError,
834                         "argument must have 'read' attribute");
835         return NULL;
836     }
837     for (;;) {
838         int bytes_read;
839         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
840         if (buf == NULL) {
841             Py_XDECREF(readmethod);
842             return get_parse_result(state, self, 0);
843         }
844 
845         bytes_read = readinst(buf, BUF_SIZE, readmethod);
846         if (bytes_read < 0) {
847             Py_DECREF(readmethod);
848             return NULL;
849         }
850         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
851         if (PyErr_Occurred()) {
852             Py_XDECREF(readmethod);
853             return NULL;
854         }
855 
856         if (!rv || bytes_read == 0)
857             break;
858     }
859     Py_XDECREF(readmethod);
860     return get_parse_result(state, self, rv);
861 }
862 
863 /*[clinic input]
864 pyexpat.xmlparser.SetBase
865 
866     base: str
867     /
868 
869 Set the base URL for the parser.
870 [clinic start generated code]*/
871 
872 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)873 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
874 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
875 {
876     if (!XML_SetBase(self->itself, base)) {
877         return PyErr_NoMemory();
878     }
879     Py_RETURN_NONE;
880 }
881 
882 /*[clinic input]
883 pyexpat.xmlparser.GetBase
884 
885 Return base URL string for the parser.
886 [clinic start generated code]*/
887 
888 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)889 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
890 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
891 {
892     return Py_BuildValue("z", XML_GetBase(self->itself));
893 }
894 
895 /*[clinic input]
896 pyexpat.xmlparser.GetInputContext
897 
898 Return the untranslated text of the input that caused the current event.
899 
900 If the event was generated by a large amount of text (such as a start tag
901 for an element with many attributes), not all of the text may be available.
902 [clinic start generated code]*/
903 
904 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)905 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
906 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
907 {
908     if (self->in_callback) {
909         int offset, size;
910         const char *buffer
911             = XML_GetInputContext(self->itself, &offset, &size);
912 
913         if (buffer != NULL)
914             return PyBytes_FromStringAndSize(buffer + offset,
915                                               size - offset);
916         else
917             Py_RETURN_NONE;
918     }
919     else
920         Py_RETURN_NONE;
921 }
922 
923 /*[clinic input]
924 pyexpat.xmlparser.ExternalEntityParserCreate
925 
926     cls: defining_class
927     context: str(accept={str, NoneType})
928     encoding: str = NULL
929     /
930 
931 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
932 [clinic start generated code]*/
933 
934 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,PyTypeObject * cls,const char * context,const char * encoding)935 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
936                                                   PyTypeObject *cls,
937                                                   const char *context,
938                                                   const char *encoding)
939 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
940 {
941     xmlparseobject *new_parser;
942     int i;
943 
944     pyexpat_state *state = PyType_GetModuleState(cls);
945 
946     new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
947     if (new_parser == NULL) {
948         return NULL;
949     }
950 
951     new_parser->buffer_size = self->buffer_size;
952     new_parser->buffer_used = 0;
953     new_parser->buffer = NULL;
954     new_parser->ordered_attributes = self->ordered_attributes;
955     new_parser->specified_attributes = self->specified_attributes;
956     new_parser->in_callback = 0;
957     new_parser->ns_prefixes = self->ns_prefixes;
958     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
959                                                         encoding);
960     new_parser->handlers = 0;
961     new_parser->intern = self->intern;
962     Py_XINCREF(new_parser->intern);
963 
964     if (self->buffer != NULL) {
965         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
966         if (new_parser->buffer == NULL) {
967             Py_DECREF(new_parser);
968             return PyErr_NoMemory();
969         }
970     }
971     if (!new_parser->itself) {
972         Py_DECREF(new_parser);
973         return PyErr_NoMemory();
974     }
975 
976     XML_SetUserData(new_parser->itself, (void *)new_parser);
977 
978     /* allocate and clear handlers first */
979     for (i = 0; handler_info[i].name != NULL; i++)
980         /* do nothing */;
981 
982     new_parser->handlers = PyMem_New(PyObject *, i);
983     if (!new_parser->handlers) {
984         Py_DECREF(new_parser);
985         return PyErr_NoMemory();
986     }
987     clear_handlers(new_parser, 1);
988 
989     /* then copy handlers from self */
990     for (i = 0; handler_info[i].name != NULL; i++) {
991         PyObject *handler = self->handlers[i];
992         if (handler != NULL) {
993             Py_INCREF(handler);
994             new_parser->handlers[i] = handler;
995             handler_info[i].setter(new_parser->itself,
996                                    handler_info[i].handler);
997         }
998     }
999 
1000     PyObject_GC_Track(new_parser);
1001     return (PyObject *)new_parser;
1002 }
1003 
1004 /*[clinic input]
1005 pyexpat.xmlparser.SetParamEntityParsing
1006 
1007     flag: int
1008     /
1009 
1010 Controls parsing of parameter entities (including the external DTD subset).
1011 
1012 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1013 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1014 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1015 was successful.
1016 [clinic start generated code]*/
1017 
1018 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)1019 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1020 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1021 {
1022     flag = XML_SetParamEntityParsing(self->itself, flag);
1023     return PyLong_FromLong(flag);
1024 }
1025 
1026 
1027 #if XML_COMBINED_VERSION >= 19505
1028 /*[clinic input]
1029 pyexpat.xmlparser.UseForeignDTD
1030 
1031     cls: defining_class
1032     flag: bool = True
1033     /
1034 
1035 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1036 
1037 This readily allows the use of a 'default' document type controlled by the
1038 application, while still getting the advantage of providing document type
1039 information to the parser. 'flag' defaults to True if not provided.
1040 [clinic start generated code]*/
1041 
1042 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,PyTypeObject * cls,int flag)1043 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1044                                      int flag)
1045 /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1046 {
1047     pyexpat_state *state = PyType_GetModuleState(cls);
1048     enum XML_Error rc;
1049 
1050     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1051     if (rc != XML_ERROR_NONE) {
1052         return set_error(state, self, rc);
1053     }
1054     Py_RETURN_NONE;
1055 }
1056 #endif
1057 
1058 static struct PyMethodDef xmlparse_methods[] = {
1059     PYEXPAT_XMLPARSER_PARSE_METHODDEF
1060     PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1061     PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1062     PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1063     PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1064     PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1065     PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1066 #if XML_COMBINED_VERSION >= 19505
1067     PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1068 #endif
1069     {NULL, NULL}  /* sentinel */
1070 };
1071 
1072 /* ---------- */
1073 
1074 
1075 
1076 /* pyexpat international encoding support.
1077    Make it as simple as possible.
1078 */
1079 
1080 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1081 PyUnknownEncodingHandler(void *encodingHandlerData,
1082                          const XML_Char *name,
1083                          XML_Encoding *info)
1084 {
1085     static unsigned char template_buffer[256] = {0};
1086     PyObject* u;
1087     int i;
1088     const void *data;
1089     unsigned int kind;
1090 
1091     if (PyErr_Occurred())
1092         return XML_STATUS_ERROR;
1093 
1094     if (template_buffer[1] == 0) {
1095         for (i = 0; i < 256; i++)
1096             template_buffer[i] = i;
1097     }
1098 
1099     u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1100     if (u == NULL || PyUnicode_READY(u)) {
1101         Py_XDECREF(u);
1102         return XML_STATUS_ERROR;
1103     }
1104 
1105     if (PyUnicode_GET_LENGTH(u) != 256) {
1106         Py_DECREF(u);
1107         PyErr_SetString(PyExc_ValueError,
1108                         "multi-byte encodings are not supported");
1109         return XML_STATUS_ERROR;
1110     }
1111 
1112     kind = PyUnicode_KIND(u);
1113     data = PyUnicode_DATA(u);
1114     for (i = 0; i < 256; i++) {
1115         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1116         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1117             info->map[i] = ch;
1118         else
1119             info->map[i] = -1;
1120     }
1121 
1122     info->data = NULL;
1123     info->convert = NULL;
1124     info->release = NULL;
1125     Py_DECREF(u);
1126 
1127     return XML_STATUS_OK;
1128 }
1129 
1130 
1131 static PyObject *
newxmlparseobject(pyexpat_state * state,const char * encoding,const char * namespace_separator,PyObject * intern)1132 newxmlparseobject(pyexpat_state *state, const char *encoding,
1133                   const char *namespace_separator, PyObject *intern)
1134 {
1135     int i;
1136     xmlparseobject *self;
1137 
1138     self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1139     if (self == NULL)
1140         return NULL;
1141 
1142     self->buffer = NULL;
1143     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1144     self->buffer_used = 0;
1145     self->ordered_attributes = 0;
1146     self->specified_attributes = 0;
1147     self->in_callback = 0;
1148     self->ns_prefixes = 0;
1149     self->handlers = NULL;
1150     self->intern = intern;
1151     Py_XINCREF(self->intern);
1152 
1153     /* namespace_separator is either NULL or contains one char + \0 */
1154     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1155                                        namespace_separator);
1156     if (self->itself == NULL) {
1157         PyErr_SetString(PyExc_RuntimeError,
1158                         "XML_ParserCreate failed");
1159         Py_DECREF(self);
1160         return NULL;
1161     }
1162 #if XML_COMBINED_VERSION >= 20100
1163     /* This feature was added upstream in libexpat 2.1.0. */
1164     XML_SetHashSalt(self->itself,
1165                     (unsigned long)_Py_HashSecret.expat.hashsalt);
1166 #endif
1167     XML_SetUserData(self->itself, (void *)self);
1168     XML_SetUnknownEncodingHandler(self->itself,
1169                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1170 
1171     for (i = 0; handler_info[i].name != NULL; i++)
1172         /* do nothing */;
1173 
1174     self->handlers = PyMem_New(PyObject *, i);
1175     if (!self->handlers) {
1176         Py_DECREF(self);
1177         return PyErr_NoMemory();
1178     }
1179     clear_handlers(self, 1);
1180 
1181     PyObject_GC_Track(self);
1182     return (PyObject*)self;
1183 }
1184 
1185 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1186 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1187 {
1188     for (int i = 0; handler_info[i].name != NULL; i++) {
1189         Py_VISIT(op->handlers[i]);
1190     }
1191     Py_VISIT(Py_TYPE(op));
1192     return 0;
1193 }
1194 
1195 static int
xmlparse_clear(xmlparseobject * op)1196 xmlparse_clear(xmlparseobject *op)
1197 {
1198     clear_handlers(op, 0);
1199     Py_CLEAR(op->intern);
1200     return 0;
1201 }
1202 
1203 static void
xmlparse_dealloc(xmlparseobject * self)1204 xmlparse_dealloc(xmlparseobject *self)
1205 {
1206     PyObject_GC_UnTrack(self);
1207     (void)xmlparse_clear(self);
1208     if (self->itself != NULL)
1209         XML_ParserFree(self->itself);
1210     self->itself = NULL;
1211 
1212     if (self->handlers != NULL) {
1213         PyMem_Free(self->handlers);
1214         self->handlers = NULL;
1215     }
1216     if (self->buffer != NULL) {
1217         PyMem_Free(self->buffer);
1218         self->buffer = NULL;
1219     }
1220     PyTypeObject *tp = Py_TYPE(self);
1221     PyObject_GC_Del(self);
1222     Py_DECREF(tp);
1223 }
1224 
1225 
1226 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1227 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1228 {
1229     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1230     int handlernum = (int)(hi - handler_info);
1231     PyObject *result = self->handlers[handlernum];
1232     if (result == NULL)
1233         result = Py_None;
1234     Py_INCREF(result);
1235     return result;
1236 }
1237 
1238 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1239 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1240 {
1241     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1242     int handlernum = (int)(hi - handler_info);
1243     if (v == NULL) {
1244         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1245         return -1;
1246     }
1247     if (handlernum == CharacterData) {
1248         /* If we're changing the character data handler, flush all
1249          * cached data with the old handler.  Not sure there's a
1250          * "right" thing to do, though, but this probably won't
1251          * happen.
1252          */
1253         if (flush_character_buffer(self) < 0)
1254             return -1;
1255     }
1256 
1257     xmlhandler c_handler = NULL;
1258     if (v == Py_None) {
1259         /* If this is the character data handler, and a character
1260            data handler is already active, we need to be more
1261            careful.  What we can safely do is replace the existing
1262            character data handler callback function with a no-op
1263            function that will refuse to call Python.  The downside
1264            is that this doesn't completely remove the character
1265            data handler from the C layer if there's any callback
1266            active, so Expat does a little more work than it
1267            otherwise would, but that's really an odd case.  A more
1268            elaborate system of handlers and state could remove the
1269            C handler more effectively. */
1270         if (handlernum == CharacterData && self->in_callback)
1271             c_handler = noop_character_data_handler;
1272         v = NULL;
1273     }
1274     else if (v != NULL) {
1275         Py_INCREF(v);
1276         c_handler = handler_info[handlernum].handler;
1277     }
1278     Py_XSETREF(self->handlers[handlernum], v);
1279     handler_info[handlernum].setter(self->itself, c_handler);
1280     return 0;
1281 }
1282 
1283 #define INT_GETTER(name) \
1284     static PyObject * \
1285     xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1286     { \
1287         return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1288     }
1289 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1290 INT_GETTER(ErrorLineNumber)
1291 INT_GETTER(ErrorColumnNumber)
1292 INT_GETTER(ErrorByteIndex)
1293 INT_GETTER(CurrentLineNumber)
1294 INT_GETTER(CurrentColumnNumber)
1295 INT_GETTER(CurrentByteIndex)
1296 
1297 #undef INT_GETTER
1298 
1299 static PyObject *
1300 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1301 {
1302     return PyBool_FromLong(self->buffer != NULL);
1303 }
1304 
1305 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1306 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1307 {
1308     if (v == NULL) {
1309         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1310         return -1;
1311     }
1312     int b = PyObject_IsTrue(v);
1313     if (b < 0)
1314         return -1;
1315     if (b) {
1316         if (self->buffer == NULL) {
1317             self->buffer = PyMem_Malloc(self->buffer_size);
1318             if (self->buffer == NULL) {
1319                 PyErr_NoMemory();
1320                 return -1;
1321             }
1322             self->buffer_used = 0;
1323         }
1324     }
1325     else if (self->buffer != NULL) {
1326         if (flush_character_buffer(self) < 0)
1327             return -1;
1328         PyMem_Free(self->buffer);
1329         self->buffer = NULL;
1330     }
1331     return 0;
1332 }
1333 
1334 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1335 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1336 {
1337     return PyLong_FromLong((long) self->buffer_size);
1338 }
1339 
1340 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1341 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1342 {
1343     if (v == NULL) {
1344         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1345         return -1;
1346     }
1347     long new_buffer_size;
1348     if (!PyLong_Check(v)) {
1349         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1350         return -1;
1351     }
1352 
1353     new_buffer_size = PyLong_AsLong(v);
1354     if (new_buffer_size <= 0) {
1355         if (!PyErr_Occurred())
1356             PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1357         return -1;
1358     }
1359 
1360     /* trivial case -- no change */
1361     if (new_buffer_size == self->buffer_size) {
1362         return 0;
1363     }
1364 
1365     /* check maximum */
1366     if (new_buffer_size > INT_MAX) {
1367         char errmsg[100];
1368         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1369         PyErr_SetString(PyExc_ValueError, errmsg);
1370         return -1;
1371     }
1372 
1373     if (self->buffer != NULL) {
1374         /* there is already a buffer */
1375         if (self->buffer_used != 0) {
1376             if (flush_character_buffer(self) < 0) {
1377                 return -1;
1378             }
1379         }
1380         /* free existing buffer */
1381         PyMem_Free(self->buffer);
1382     }
1383     self->buffer = PyMem_Malloc(new_buffer_size);
1384     if (self->buffer == NULL) {
1385         PyErr_NoMemory();
1386         return -1;
1387     }
1388     self->buffer_size = new_buffer_size;
1389     return 0;
1390 }
1391 
1392 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1393 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1394 {
1395     return PyLong_FromLong((long) self->buffer_used);
1396 }
1397 
1398 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1399 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1400 {
1401     return PyBool_FromLong(self->ns_prefixes);
1402 }
1403 
1404 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1405 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1406 {
1407     if (v == NULL) {
1408         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1409         return -1;
1410     }
1411     int b = PyObject_IsTrue(v);
1412     if (b < 0)
1413         return -1;
1414     self->ns_prefixes = b;
1415     XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1416     return 0;
1417 }
1418 
1419 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1420 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1421 {
1422     return PyBool_FromLong(self->ordered_attributes);
1423 }
1424 
1425 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1426 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1427 {
1428     if (v == NULL) {
1429         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1430         return -1;
1431     }
1432     int b = PyObject_IsTrue(v);
1433     if (b < 0)
1434         return -1;
1435     self->ordered_attributes = b;
1436     return 0;
1437 }
1438 
1439 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1440 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1441 {
1442     return PyBool_FromLong((long) self->specified_attributes);
1443 }
1444 
1445 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1446 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1447 {
1448     if (v == NULL) {
1449         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1450         return -1;
1451     }
1452     int b = PyObject_IsTrue(v);
1453     if (b < 0)
1454         return -1;
1455     self->specified_attributes = b;
1456     return 0;
1457 }
1458 
1459 static PyMemberDef xmlparse_members[] = {
1460     {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1461     {NULL}
1462 };
1463 
1464 #define XMLPARSE_GETTER_DEF(name) \
1465     {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1466 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1467     {#name, (getter)xmlparse_##name##_getter, \
1468             (setter)xmlparse_##name##_setter, NULL},
1469 
1470 static PyGetSetDef xmlparse_getsetlist[] = {
1471     XMLPARSE_GETTER_DEF(ErrorCode)
1472     XMLPARSE_GETTER_DEF(ErrorLineNumber)
1473     XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1474     XMLPARSE_GETTER_DEF(ErrorByteIndex)
1475     XMLPARSE_GETTER_DEF(CurrentLineNumber)
1476     XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1477     XMLPARSE_GETTER_DEF(CurrentByteIndex)
1478     XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1479     XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1480     XMLPARSE_GETTER_DEF(buffer_used)
1481     XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1482     XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1483     XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1484     {NULL},
1485 };
1486 
1487 #undef XMLPARSE_GETTER_DEF
1488 #undef XMLPARSE_GETTER_SETTER_DEF
1489 
1490 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1491 
1492 static PyType_Slot _xml_parse_type_spec_slots[] = {
1493     {Py_tp_dealloc, xmlparse_dealloc},
1494     {Py_tp_doc, (void *)Xmlparsetype__doc__},
1495     {Py_tp_traverse, xmlparse_traverse},
1496     {Py_tp_clear, xmlparse_clear},
1497     {Py_tp_methods, xmlparse_methods},
1498     {Py_tp_members, xmlparse_members},
1499     {Py_tp_getset, xmlparse_getsetlist},
1500     {0, 0}
1501 };
1502 
1503 static PyType_Spec _xml_parse_type_spec = {
1504     .name = "pyexpat.xmlparser",
1505     .basicsize = sizeof(xmlparseobject),
1506     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1507               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1508     .slots = _xml_parse_type_spec_slots,
1509 };
1510 
1511 /* End of code for xmlparser objects */
1512 /* -------------------------------------------------------- */
1513 
1514 /*[clinic input]
1515 pyexpat.ParserCreate
1516 
1517     encoding: str(accept={str, NoneType}) = None
1518     namespace_separator: str(accept={str, NoneType}) = None
1519     intern: object = NULL
1520 
1521 Return a new XML parser object.
1522 [clinic start generated code]*/
1523 
1524 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1525 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1526                           const char *namespace_separator, PyObject *intern)
1527 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1528 {
1529     PyObject *result;
1530     int intern_decref = 0;
1531 
1532     if (namespace_separator != NULL
1533         && strlen(namespace_separator) > 1) {
1534         PyErr_SetString(PyExc_ValueError,
1535                         "namespace_separator must be at most one"
1536                         " character, omitted, or None");
1537         return NULL;
1538     }
1539     /* Explicitly passing None means no interning is desired.
1540        Not passing anything means that a new dictionary is used. */
1541     if (intern == Py_None)
1542         intern = NULL;
1543     else if (intern == NULL) {
1544         intern = PyDict_New();
1545         if (!intern)
1546             return NULL;
1547         intern_decref = 1;
1548     }
1549     else if (!PyDict_Check(intern)) {
1550         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1551         return NULL;
1552     }
1553 
1554     pyexpat_state *state = pyexpat_get_state(module);
1555     result = newxmlparseobject(state, encoding, namespace_separator, intern);
1556     if (intern_decref) {
1557         Py_DECREF(intern);
1558     }
1559     return result;
1560 }
1561 
1562 /*[clinic input]
1563 pyexpat.ErrorString
1564 
1565     code: long
1566     /
1567 
1568 Returns string error for given number.
1569 [clinic start generated code]*/
1570 
1571 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1572 pyexpat_ErrorString_impl(PyObject *module, long code)
1573 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1574 {
1575     return Py_BuildValue("z", XML_ErrorString((int)code));
1576 }
1577 
1578 /* List of methods defined in the module */
1579 
1580 static struct PyMethodDef pyexpat_methods[] = {
1581     PYEXPAT_PARSERCREATE_METHODDEF
1582     PYEXPAT_ERRORSTRING_METHODDEF
1583     {NULL, NULL}  /* sentinel */
1584 };
1585 
1586 /* Module docstring */
1587 
1588 PyDoc_STRVAR(pyexpat_module_documentation,
1589 "Python wrapper for Expat parser.");
1590 
1591 /* Initialization function for the module */
1592 
1593 #ifndef MODULE_NAME
1594 #define MODULE_NAME "pyexpat"
1595 #endif
1596 
init_handler_descrs(pyexpat_state * state)1597 static int init_handler_descrs(pyexpat_state *state)
1598 {
1599     int i;
1600     assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
1601     for (i = 0; handler_info[i].name != NULL; i++) {
1602         struct HandlerInfo *hi = &handler_info[i];
1603         hi->getset.name = hi->name;
1604         hi->getset.get = (getter)xmlparse_handler_getter;
1605         hi->getset.set = (setter)xmlparse_handler_setter;
1606         hi->getset.closure = &handler_info[i];
1607 
1608         PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1609         if (descr == NULL)
1610             return -1;
1611 
1612         if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
1613             Py_DECREF(descr);
1614             return -1;
1615         }
1616         Py_DECREF(descr);
1617     }
1618     return 0;
1619 }
1620 
1621 static PyObject *
add_submodule(PyObject * mod,const char * fullname)1622 add_submodule(PyObject *mod, const char *fullname)
1623 {
1624     const char *name = strrchr(fullname, '.') + 1;
1625 
1626     PyObject *submodule = PyModule_New(fullname);
1627     if (submodule == NULL) {
1628         return NULL;
1629     }
1630 
1631     PyObject *mod_name = PyUnicode_FromString(fullname);
1632     if (mod_name == NULL) {
1633         Py_DECREF(submodule);
1634         return NULL;
1635     }
1636 
1637     if (_PyImport_SetModule(mod_name, submodule) < 0) {
1638         Py_DECREF(submodule);
1639         Py_DECREF(mod_name);
1640         return NULL;
1641     }
1642     Py_DECREF(mod_name);
1643 
1644     /* gives away the reference to the submodule */
1645     if (PyModule_AddObject(mod, name, submodule) < 0) {
1646         Py_DECREF(submodule);
1647         return NULL;
1648     }
1649 
1650     return submodule;
1651 }
1652 
1653 static int
add_error(PyObject * errors_module,PyObject * codes_dict,PyObject * rev_codes_dict,const char * name,int value)1654 add_error(PyObject *errors_module, PyObject *codes_dict,
1655           PyObject *rev_codes_dict, const char *name, int value)
1656 {
1657     const char *error_string = XML_ErrorString(value);
1658     if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1659         return -1;
1660     }
1661 
1662     PyObject *num = PyLong_FromLong(value);
1663     if (num == NULL) {
1664         return -1;
1665     }
1666 
1667     if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1668         Py_DECREF(num);
1669         return -1;
1670     }
1671 
1672     PyObject *str = PyUnicode_FromString(error_string);
1673     if (str == NULL) {
1674         Py_DECREF(num);
1675         return -1;
1676     }
1677 
1678     int res = PyDict_SetItem(rev_codes_dict, num, str);
1679     Py_DECREF(str);
1680     Py_DECREF(num);
1681     if (res < 0) {
1682         return -1;
1683     }
1684 
1685     return 0;
1686 }
1687 
1688 static int
add_errors_module(PyObject * mod)1689 add_errors_module(PyObject *mod)
1690 {
1691     PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1692     if (errors_module == NULL) {
1693         return -1;
1694     }
1695 
1696     PyObject *codes_dict = PyDict_New();
1697     PyObject *rev_codes_dict = PyDict_New();
1698     if (codes_dict == NULL || rev_codes_dict == NULL) {
1699         goto error;
1700     }
1701 
1702 #define ADD_CONST(name) do {                                        \
1703         if (add_error(errors_module, codes_dict, rev_codes_dict,    \
1704                       #name, name) < 0) {                           \
1705             goto error;                                             \
1706         }                                                           \
1707     } while(0)
1708 
1709     ADD_CONST(XML_ERROR_NO_MEMORY);
1710     ADD_CONST(XML_ERROR_SYNTAX);
1711     ADD_CONST(XML_ERROR_NO_ELEMENTS);
1712     ADD_CONST(XML_ERROR_INVALID_TOKEN);
1713     ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1714     ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1715     ADD_CONST(XML_ERROR_TAG_MISMATCH);
1716     ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1717     ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1718     ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1719     ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1720     ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1721     ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1722     ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1723     ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1724     ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1725     ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1726     ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1727     ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1728     ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1729     ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1730     ADD_CONST(XML_ERROR_NOT_STANDALONE);
1731     ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1732     ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1733     ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1734     ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1735     /* Added in Expat 1.95.7. */
1736     ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
1737     /* Added in Expat 1.95.8. */
1738     ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1739     ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1740     ADD_CONST(XML_ERROR_XML_DECL);
1741     ADD_CONST(XML_ERROR_TEXT_DECL);
1742     ADD_CONST(XML_ERROR_PUBLICID);
1743     ADD_CONST(XML_ERROR_SUSPENDED);
1744     ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1745     ADD_CONST(XML_ERROR_ABORTED);
1746     ADD_CONST(XML_ERROR_FINISHED);
1747     ADD_CONST(XML_ERROR_SUSPEND_PE);
1748 #undef ADD_CONST
1749 
1750     if (PyModule_AddStringConstant(errors_module, "__doc__",
1751                                    "Constants used to describe "
1752                                    "error conditions.") < 0) {
1753         goto error;
1754     }
1755 
1756     Py_INCREF(codes_dict);
1757     if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1758         Py_DECREF(codes_dict);
1759         goto error;
1760     }
1761     Py_CLEAR(codes_dict);
1762 
1763     Py_INCREF(rev_codes_dict);
1764     if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1765         Py_DECREF(rev_codes_dict);
1766         goto error;
1767     }
1768     Py_CLEAR(rev_codes_dict);
1769 
1770     return 0;
1771 
1772 error:
1773     Py_XDECREF(codes_dict);
1774     Py_XDECREF(rev_codes_dict);
1775     return -1;
1776 }
1777 
1778 static int
add_model_module(PyObject * mod)1779 add_model_module(PyObject *mod)
1780 {
1781     PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1782     if (model_module == NULL) {
1783         return -1;
1784     }
1785 
1786 #define MYCONST(c)  do {                                        \
1787         if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1788             return -1;                                          \
1789         }                                                       \
1790     } while(0)
1791 
1792     if (PyModule_AddStringConstant(
1793         model_module, "__doc__",
1794         "Constants used to interpret content model information.") < 0) {
1795         return -1;
1796     }
1797 
1798     MYCONST(XML_CTYPE_EMPTY);
1799     MYCONST(XML_CTYPE_ANY);
1800     MYCONST(XML_CTYPE_MIXED);
1801     MYCONST(XML_CTYPE_NAME);
1802     MYCONST(XML_CTYPE_CHOICE);
1803     MYCONST(XML_CTYPE_SEQ);
1804 
1805     MYCONST(XML_CQUANT_NONE);
1806     MYCONST(XML_CQUANT_OPT);
1807     MYCONST(XML_CQUANT_REP);
1808     MYCONST(XML_CQUANT_PLUS);
1809 #undef MYCONST
1810     return 0;
1811 }
1812 
1813 #if XML_COMBINED_VERSION > 19505
1814 static int
add_features(PyObject * mod)1815 add_features(PyObject *mod)
1816 {
1817     PyObject *list = PyList_New(0);
1818     if (list == NULL) {
1819         return -1;
1820     }
1821 
1822     const XML_Feature *features = XML_GetFeatureList();
1823     for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1824         PyObject *item = Py_BuildValue("si", features[i].name,
1825                                        features[i].value);
1826         if (item == NULL) {
1827             goto error;
1828         }
1829         int ok = PyList_Append(list, item);
1830         Py_DECREF(item);
1831         if (ok < 0) {
1832             goto error;
1833         }
1834     }
1835     if (PyModule_AddObject(mod, "features", list) < 0) {
1836         goto error;
1837     }
1838     return 0;
1839 
1840 error:
1841     Py_DECREF(list);
1842     return -1;
1843 }
1844 #endif
1845 
1846 static void
pyexpat_destructor(PyObject * op)1847 pyexpat_destructor(PyObject *op)
1848 {
1849     void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1850     PyMem_Free(p);
1851 }
1852 
1853 static int
pyexpat_exec(PyObject * mod)1854 pyexpat_exec(PyObject *mod)
1855 {
1856     pyexpat_state *state = pyexpat_get_state(mod);
1857     state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1858         mod, &_xml_parse_type_spec, NULL);
1859 
1860     if (state->xml_parse_type == NULL) {
1861         return -1;
1862     }
1863 
1864     if (init_handler_descrs(state) < 0) {
1865         return -1;
1866     }
1867     state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1868                                       NULL, NULL);
1869     if (state->error == NULL) {
1870         return -1;
1871     }
1872 
1873     /* Add some symbolic constants to the module */
1874 
1875     if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1876         return -1;
1877     }
1878 
1879     if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1880         return -1;
1881     }
1882 
1883     if (PyModule_AddObjectRef(mod, "XMLParserType",
1884                            (PyObject *) state->xml_parse_type) < 0) {
1885         return -1;
1886     }
1887 
1888     if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1889                                    XML_ExpatVersion()) < 0) {
1890         return -1;
1891     }
1892     {
1893         XML_Expat_Version info = XML_ExpatVersionInfo();
1894         PyObject *versionInfo = Py_BuildValue("(iii)",
1895                                               info.major,
1896                                               info.minor,
1897                                               info.micro);
1898         if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1899             Py_DECREF(versionInfo);
1900             return -1;
1901         }
1902     }
1903     /* XXX When Expat supports some way of figuring out how it was
1904        compiled, this should check and set native_encoding
1905        appropriately.
1906     */
1907     if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1908         return -1;
1909     }
1910 
1911     if (add_errors_module(mod) < 0) {
1912         return -1;
1913     }
1914 
1915     if (add_model_module(mod) < 0) {
1916         return -1;
1917     }
1918 
1919 #if XML_COMBINED_VERSION > 19505
1920     if (add_features(mod) < 0) {
1921         return -1;
1922     }
1923 #endif
1924 
1925 #define MYCONST(c) do {                                 \
1926         if (PyModule_AddIntConstant(mod, #c, c) < 0) {  \
1927             return -1;                                  \
1928         }                                               \
1929     } while(0)
1930 
1931     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1932     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1933     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1934 #undef MYCONST
1935 
1936     struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1937     if (capi == NULL) {
1938         PyErr_NoMemory();
1939         return -1;
1940     }
1941     /* initialize pyexpat dispatch table */
1942     capi->size = sizeof(*capi);
1943     capi->magic = PyExpat_CAPI_MAGIC;
1944     capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1945     capi->MINOR_VERSION = XML_MINOR_VERSION;
1946     capi->MICRO_VERSION = XML_MICRO_VERSION;
1947     capi->ErrorString = XML_ErrorString;
1948     capi->GetErrorCode = XML_GetErrorCode;
1949     capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1950     capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1951     capi->Parse = XML_Parse;
1952     capi->ParserCreate_MM = XML_ParserCreate_MM;
1953     capi->ParserFree = XML_ParserFree;
1954     capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
1955     capi->SetCommentHandler = XML_SetCommentHandler;
1956     capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1957     capi->SetElementHandler = XML_SetElementHandler;
1958     capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1959     capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1960     capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1961     capi->SetUserData = XML_SetUserData;
1962     capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1963     capi->SetEncoding = XML_SetEncoding;
1964     capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
1965 #if XML_COMBINED_VERSION >= 20100
1966     capi->SetHashSalt = XML_SetHashSalt;
1967 #else
1968     capi->SetHashSalt = NULL;
1969 #endif
1970 
1971     /* export using capsule */
1972     PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
1973                                           pyexpat_destructor);
1974     if (capi_object == NULL) {
1975         PyMem_Free(capi);
1976         return -1;
1977     }
1978 
1979     if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
1980         Py_DECREF(capi_object);
1981         return -1;
1982     }
1983 
1984     return 0;
1985 }
1986 
1987 static int
pyexpat_traverse(PyObject * module,visitproc visit,void * arg)1988 pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
1989 {
1990     pyexpat_state *state = pyexpat_get_state(module);
1991     Py_VISIT(state->xml_parse_type);
1992     Py_VISIT(state->error);
1993     return 0;
1994 }
1995 
1996 static int
pyexpat_clear(PyObject * module)1997 pyexpat_clear(PyObject *module)
1998 {
1999     pyexpat_state *state = pyexpat_get_state(module);
2000     Py_CLEAR(state->xml_parse_type);
2001     Py_CLEAR(state->error);
2002     return 0;
2003 }
2004 
2005 static void
pyexpat_free(void * module)2006 pyexpat_free(void *module)
2007 {
2008     pyexpat_clear((PyObject *)module);
2009 }
2010 
2011 static PyModuleDef_Slot pyexpat_slots[] = {
2012     {Py_mod_exec, pyexpat_exec},
2013     {0, NULL}
2014 };
2015 
2016 static struct PyModuleDef pyexpatmodule = {
2017     PyModuleDef_HEAD_INIT,
2018     .m_name = MODULE_NAME,
2019     .m_doc = pyexpat_module_documentation,
2020     .m_size = sizeof(pyexpat_state),
2021     .m_methods = pyexpat_methods,
2022     .m_slots = pyexpat_slots,
2023     .m_traverse = pyexpat_traverse,
2024     .m_clear = pyexpat_clear,
2025     .m_free = pyexpat_free
2026 };
2027 
2028 PyMODINIT_FUNC
PyInit_pyexpat(void)2029 PyInit_pyexpat(void)
2030 {
2031     return PyModuleDef_Init(&pyexpatmodule);
2032 }
2033 
2034 static void
clear_handlers(xmlparseobject * self,int initial)2035 clear_handlers(xmlparseobject *self, int initial)
2036 {
2037     int i = 0;
2038 
2039     for (; handler_info[i].name != NULL; i++) {
2040         if (initial)
2041             self->handlers[i] = NULL;
2042         else {
2043             Py_CLEAR(self->handlers[i]);
2044             handler_info[i].setter(self->itself, NULL);
2045         }
2046     }
2047 }
2048 
2049 static struct HandlerInfo handler_info[] = {
2050 
2051 #define HANDLER_INFO(name) \
2052     {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2053 
2054     HANDLER_INFO(StartElementHandler)
2055     HANDLER_INFO(EndElementHandler)
2056     HANDLER_INFO(ProcessingInstructionHandler)
2057     HANDLER_INFO(CharacterDataHandler)
2058     HANDLER_INFO(UnparsedEntityDeclHandler)
2059     HANDLER_INFO(NotationDeclHandler)
2060     HANDLER_INFO(StartNamespaceDeclHandler)
2061     HANDLER_INFO(EndNamespaceDeclHandler)
2062     HANDLER_INFO(CommentHandler)
2063     HANDLER_INFO(StartCdataSectionHandler)
2064     HANDLER_INFO(EndCdataSectionHandler)
2065     HANDLER_INFO(DefaultHandler)
2066     HANDLER_INFO(DefaultHandlerExpand)
2067     HANDLER_INFO(NotStandaloneHandler)
2068     HANDLER_INFO(ExternalEntityRefHandler)
2069     HANDLER_INFO(StartDoctypeDeclHandler)
2070     HANDLER_INFO(EndDoctypeDeclHandler)
2071     HANDLER_INFO(EntityDeclHandler)
2072     HANDLER_INFO(XmlDeclHandler)
2073     HANDLER_INFO(ElementDeclHandler)
2074     HANDLER_INFO(AttlistDeclHandler)
2075 #if XML_COMBINED_VERSION >= 19504
2076     HANDLER_INFO(SkippedEntityHandler)
2077 #endif
2078 
2079 #undef HANDLER_INFO
2080 
2081     {NULL, NULL, NULL} /* sentinel */
2082 };
2083