• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_BUILD_CORE_BUILTIN
2 #  define Py_BUILD_CORE_MODULE 1
3 #endif
4 
5 #include "Python.h"
6 #include "pycore_import.h"        // _PyImport_SetModule()
7 #include "pycore_pyhash.h"        // _Py_HashSecret
8 #include "pycore_traceback.h"     // _PyTraceback_Add()
9 
10 #include <stdbool.h>
11 #include <stddef.h>               // offsetof()
12 #include "expat.h"
13 #include "pyexpat.h"
14 
15 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
16    included methods. */
17 /*[clinic input]
18 module pyexpat
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
21 
22 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
23 
24 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
25     PyMem_Malloc, PyMem_Realloc, PyMem_Free};
26 
27 enum HandlerTypes {
28     StartElement,
29     EndElement,
30     ProcessingInstruction,
31     CharacterData,
32     UnparsedEntityDecl,
33     NotationDecl,
34     StartNamespaceDecl,
35     EndNamespaceDecl,
36     Comment,
37     StartCdataSection,
38     EndCdataSection,
39     Default,
40     DefaultHandlerExpand,
41     NotStandalone,
42     ExternalEntityRef,
43     StartDoctypeDecl,
44     EndDoctypeDecl,
45     EntityDecl,
46     XmlDecl,
47     ElementDecl,
48     AttlistDecl,
49 #if XML_COMBINED_VERSION >= 19504
50     SkippedEntity,
51 #endif
52     _DummyDecl
53 };
54 
55 typedef struct {
56     PyTypeObject *xml_parse_type;
57     PyObject *error;
58     PyObject *str_read;
59 } pyexpat_state;
60 
61 static inline pyexpat_state*
pyexpat_get_state(PyObject * module)62 pyexpat_get_state(PyObject *module)
63 {
64     void *state = PyModule_GetState(module);
65     assert(state != NULL);
66     return (pyexpat_state *)state;
67 }
68 
69 /* ----------------------------------------------------- */
70 
71 /* Declarations for objects of type xmlparser */
72 
73 typedef struct {
74     PyObject_HEAD
75 
76     XML_Parser itself;
77     int ordered_attributes;     /* Return attributes as a list. */
78     int specified_attributes;   /* Report only specified attributes. */
79     int in_callback;            /* Is a callback active? */
80     int ns_prefixes;            /* Namespace-triplets mode? */
81     XML_Char *buffer;           /* Buffer used when accumulating characters */
82                                 /* NULL if not enabled */
83     int buffer_size;            /* Size of buffer, in XML_Char units */
84     int buffer_used;            /* Buffer units in use */
85     bool reparse_deferral_enabled; /* Whether to defer reparsing of
86                                    unfinished XML tokens; a de-facto cache of
87                                    what Expat has the authority on, for lack
88                                    of a getter API function
89                                    "XML_GetReparseDeferralEnabled" in Expat
90                                    2.6.0 */
91     PyObject *intern;           /* Dictionary to intern strings */
92     PyObject **handlers;
93 } xmlparseobject;
94 
95 #include "clinic/pyexpat.c.h"
96 
97 #define CHARACTER_DATA_BUFFER_SIZE 8192
98 
99 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
100 typedef void* xmlhandler;
101 
102 struct HandlerInfo {
103     const char *name;
104     xmlhandlersetter setter;
105     xmlhandler handler;
106     PyGetSetDef getset;
107 };
108 
109 static struct HandlerInfo handler_info[64];
110 
111 /* Set an integer attribute on the error object; return true on success,
112  * false on an exception.
113  */
114 static int
set_error_attr(PyObject * err,const char * name,int value)115 set_error_attr(PyObject *err, const char *name, int value)
116 {
117     PyObject *v = PyLong_FromLong(value);
118 
119     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
120         Py_XDECREF(v);
121         return 0;
122     }
123     Py_DECREF(v);
124     return 1;
125 }
126 
127 /* Build and set an Expat exception, including positioning
128  * information.  Always returns NULL.
129  */
130 static PyObject *
set_error(pyexpat_state * state,xmlparseobject * self,enum XML_Error code)131 set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
132 {
133     PyObject *err;
134     PyObject *buffer;
135     XML_Parser parser = self->itself;
136     int lineno = XML_GetErrorLineNumber(parser);
137     int column = XML_GetErrorColumnNumber(parser);
138 
139     buffer = PyUnicode_FromFormat("%s: line %i, column %i",
140                                   XML_ErrorString(code), lineno, column);
141     if (buffer == NULL)
142         return NULL;
143     err = PyObject_CallOneArg(state->error, buffer);
144     Py_DECREF(buffer);
145     if (  err != NULL
146           && set_error_attr(err, "code", code)
147           && set_error_attr(err, "offset", column)
148           && set_error_attr(err, "lineno", lineno)) {
149         PyErr_SetObject(state->error, err);
150     }
151     Py_XDECREF(err);
152     return NULL;
153 }
154 
155 static int
have_handler(xmlparseobject * self,int type)156 have_handler(xmlparseobject *self, int type)
157 {
158     PyObject *handler = self->handlers[type];
159     return handler != NULL;
160 }
161 
162 /* Convert a string of XML_Chars into a Unicode string.
163    Returns None if str is a null pointer. */
164 
165 static PyObject *
conv_string_to_unicode(const XML_Char * str)166 conv_string_to_unicode(const XML_Char *str)
167 {
168     /* XXX currently this code assumes that XML_Char is 8-bit,
169        and hence in UTF-8.  */
170     /* UTF-8 from Expat, Unicode desired */
171     if (str == NULL) {
172         Py_RETURN_NONE;
173     }
174     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
175 }
176 
177 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)178 conv_string_len_to_unicode(const XML_Char *str, int len)
179 {
180     /* XXX currently this code assumes that XML_Char is 8-bit,
181        and hence in UTF-8.  */
182     /* UTF-8 from Expat, Unicode desired */
183     if (str == NULL) {
184         Py_RETURN_NONE;
185     }
186     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
187 }
188 
189 /* Callback routines */
190 
191 static void clear_handlers(xmlparseobject *self, int initial);
192 
193 /* This handler is used when an error has been detected, in the hope
194    that actual parsing can be terminated early.  This will only help
195    if an external entity reference is encountered. */
196 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)197 error_external_entity_ref_handler(XML_Parser parser,
198                                   const XML_Char *context,
199                                   const XML_Char *base,
200                                   const XML_Char *systemId,
201                                   const XML_Char *publicId)
202 {
203     return 0;
204 }
205 
206 /* Dummy character data handler used when an error (exception) has
207    been detected, and the actual parsing can be terminated early.
208    This is needed since character data handler can't be safely removed
209    from within the character data handler, but can be replaced.  It is
210    used only from the character data handler trampoline, and must be
211    used right after `flag_error()` is called. */
212 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)213 noop_character_data_handler(void *userData, const XML_Char *data, int len)
214 {
215     /* Do nothing. */
216 }
217 
218 static void
flag_error(xmlparseobject * self)219 flag_error(xmlparseobject *self)
220 {
221     clear_handlers(self, 0);
222     XML_SetExternalEntityRefHandler(self->itself,
223                                     error_external_entity_ref_handler);
224 }
225 
226 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)227 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
228                 xmlparseobject *self)
229 {
230     PyObject *res;
231 
232     res = PyObject_Call(func, args, NULL);
233     if (res == NULL) {
234         _PyTraceback_Add(funcname, __FILE__, lineno);
235         XML_StopParser(self->itself, XML_FALSE);
236     }
237     return res;
238 }
239 
240 static PyObject*
string_intern(xmlparseobject * self,const char * str)241 string_intern(xmlparseobject *self, const char* str)
242 {
243     PyObject *result = conv_string_to_unicode(str);
244     PyObject *value;
245     /* result can be NULL if the unicode conversion failed. */
246     if (!result)
247         return result;
248     if (!self->intern)
249         return result;
250     if (PyDict_GetItemRef(self->intern, result, &value) == 0 &&
251         PyDict_SetItem(self->intern, result, result) == 0)
252     {
253         return result;
254     }
255     assert((value != NULL) == !PyErr_Occurred());
256     Py_DECREF(result);
257     return value;
258 }
259 
260 /* Return 0 on success, -1 on exception.
261  * flag_error() will be called before return if needed.
262  */
263 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)264 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
265 {
266     PyObject *args;
267     PyObject *temp;
268 
269     if (!have_handler(self, CharacterData))
270         return -1;
271 
272     args = PyTuple_New(1);
273     if (args == NULL)
274         return -1;
275     temp = (conv_string_len_to_unicode(buffer, len));
276     if (temp == NULL) {
277         Py_DECREF(args);
278         flag_error(self);
279         XML_SetCharacterDataHandler(self->itself,
280                                     noop_character_data_handler);
281         return -1;
282     }
283     PyTuple_SET_ITEM(args, 0, temp);
284     /* temp is now a borrowed reference; consider it unused. */
285     self->in_callback = 1;
286     temp = call_with_frame("CharacterData", __LINE__,
287                            self->handlers[CharacterData], args, self);
288     /* temp is an owned reference again, or NULL */
289     self->in_callback = 0;
290     Py_DECREF(args);
291     if (temp == NULL) {
292         flag_error(self);
293         XML_SetCharacterDataHandler(self->itself,
294                                     noop_character_data_handler);
295         return -1;
296     }
297     Py_DECREF(temp);
298     return 0;
299 }
300 
301 static int
flush_character_buffer(xmlparseobject * self)302 flush_character_buffer(xmlparseobject *self)
303 {
304     int rc;
305     if (self->buffer == NULL || self->buffer_used == 0)
306         return 0;
307     rc = call_character_handler(self, self->buffer, self->buffer_used);
308     self->buffer_used = 0;
309     return rc;
310 }
311 
312 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)313 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
314 {
315     xmlparseobject *self = (xmlparseobject *) userData;
316 
317     if (PyErr_Occurred())
318         return;
319 
320     if (self->buffer == NULL)
321         call_character_handler(self, data, len);
322     else {
323         if ((self->buffer_used + len) > self->buffer_size) {
324             if (flush_character_buffer(self) < 0)
325                 return;
326             /* handler might have changed; drop the rest on the floor
327              * if there isn't a handler anymore
328              */
329             if (!have_handler(self, CharacterData))
330                 return;
331         }
332         if (len > self->buffer_size) {
333             call_character_handler(self, data, len);
334             self->buffer_used = 0;
335         }
336         else {
337             memcpy(self->buffer + self->buffer_used,
338                    data, len * sizeof(XML_Char));
339             self->buffer_used += len;
340         }
341     }
342 }
343 
344 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])345 my_StartElementHandler(void *userData,
346                        const XML_Char *name, const XML_Char *atts[])
347 {
348     xmlparseobject *self = (xmlparseobject *)userData;
349 
350     if (have_handler(self, StartElement)) {
351         PyObject *container, *rv, *args;
352         int i, max;
353 
354         if (PyErr_Occurred())
355             return;
356 
357         if (flush_character_buffer(self) < 0)
358             return;
359         /* Set max to the number of slots filled in atts[]; max/2 is
360          * the number of attributes we need to process.
361          */
362         if (self->specified_attributes) {
363             max = XML_GetSpecifiedAttributeCount(self->itself);
364         }
365         else {
366             max = 0;
367             while (atts[max] != NULL)
368                 max += 2;
369         }
370         /* Build the container. */
371         if (self->ordered_attributes)
372             container = PyList_New(max);
373         else
374             container = PyDict_New();
375         if (container == NULL) {
376             flag_error(self);
377             return;
378         }
379         for (i = 0; i < max; i += 2) {
380             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
381             PyObject *v;
382             if (n == NULL) {
383                 flag_error(self);
384                 Py_DECREF(container);
385                 return;
386             }
387             v = conv_string_to_unicode((XML_Char *) atts[i+1]);
388             if (v == NULL) {
389                 flag_error(self);
390                 Py_DECREF(container);
391                 Py_DECREF(n);
392                 return;
393             }
394             if (self->ordered_attributes) {
395                 PyList_SET_ITEM(container, i, n);
396                 PyList_SET_ITEM(container, i+1, v);
397             }
398             else if (PyDict_SetItem(container, n, v)) {
399                 flag_error(self);
400                 Py_DECREF(n);
401                 Py_DECREF(v);
402                 Py_DECREF(container);
403                 return;
404             }
405             else {
406                 Py_DECREF(n);
407                 Py_DECREF(v);
408             }
409         }
410         args = string_intern(self, name);
411         if (args == NULL) {
412             Py_DECREF(container);
413             return;
414         }
415         args = Py_BuildValue("(NN)", args, container);
416         if (args == NULL) {
417             return;
418         }
419         /* Container is now a borrowed reference; ignore it. */
420         self->in_callback = 1;
421         rv = call_with_frame("StartElement", __LINE__,
422                              self->handlers[StartElement], args, self);
423         self->in_callback = 0;
424         Py_DECREF(args);
425         if (rv == NULL) {
426             flag_error(self);
427             return;
428         }
429         Py_DECREF(rv);
430     }
431 }
432 
433 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
434                 RETURN, GETUSERDATA) \
435 static RC \
436 my_##NAME##Handler PARAMS {\
437     xmlparseobject *self = GETUSERDATA ; \
438     PyObject *args = NULL; \
439     PyObject *rv = NULL; \
440     INIT \
441 \
442     if (have_handler(self, NAME)) { \
443         if (PyErr_Occurred()) \
444             return RETURN; \
445         if (flush_character_buffer(self) < 0) \
446             return RETURN; \
447         args = Py_BuildValue PARAM_FORMAT ;\
448         if (!args) { flag_error(self); return RETURN;} \
449         self->in_callback = 1; \
450         rv = call_with_frame(#NAME,__LINE__, \
451                              self->handlers[NAME], args, self); \
452         self->in_callback = 0; \
453         Py_DECREF(args); \
454         if (rv == NULL) { \
455             flag_error(self); \
456             return RETURN; \
457         } \
458         CONVERSION \
459         Py_DECREF(rv); \
460     } \
461     return RETURN; \
462 }
463 
464 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
465         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
466         (xmlparseobject *)userData)
467 
468 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
469         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
470                         rc = PyLong_AsLong(rv);, rc, \
471         (xmlparseobject *)userData)
472 
473 VOID_HANDLER(EndElement,
474              (void *userData, const XML_Char *name),
475              ("(N)", string_intern(self, name)))
476 
477 VOID_HANDLER(ProcessingInstruction,
478              (void *userData,
479               const XML_Char *target,
480               const XML_Char *data),
481              ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
482 
483 VOID_HANDLER(UnparsedEntityDecl,
484              (void *userData,
485               const XML_Char *entityName,
486               const XML_Char *base,
487               const XML_Char *systemId,
488               const XML_Char *publicId,
489               const XML_Char *notationName),
490              ("(NNNNN)",
491               string_intern(self, entityName), string_intern(self, base),
492               string_intern(self, systemId), string_intern(self, publicId),
493               string_intern(self, notationName)))
494 
495 VOID_HANDLER(EntityDecl,
496              (void *userData,
497               const XML_Char *entityName,
498               int is_parameter_entity,
499               const XML_Char *value,
500               int value_length,
501               const XML_Char *base,
502               const XML_Char *systemId,
503               const XML_Char *publicId,
504               const XML_Char *notationName),
505              ("NiNNNNN",
506               string_intern(self, entityName), is_parameter_entity,
507               (conv_string_len_to_unicode(value, value_length)),
508               string_intern(self, base), string_intern(self, systemId),
509               string_intern(self, publicId),
510               string_intern(self, notationName)))
511 
512 VOID_HANDLER(XmlDecl,
513              (void *userData,
514               const XML_Char *version,
515               const XML_Char *encoding,
516               int standalone),
517              ("(O&O&i)",
518               conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
519               standalone))
520 
521 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))522 conv_content_model(XML_Content * const model,
523                    PyObject *(*conv_string)(const XML_Char *))
524 {
525     PyObject *result = NULL;
526     PyObject *children = PyTuple_New(model->numchildren);
527     int i;
528 
529     if (children != NULL) {
530         assert(model->numchildren < INT_MAX);
531         for (i = 0; i < (int)model->numchildren; ++i) {
532             PyObject *child = conv_content_model(&model->children[i],
533                                                  conv_string);
534             if (child == NULL) {
535                 Py_XDECREF(children);
536                 return NULL;
537             }
538             PyTuple_SET_ITEM(children, i, child);
539         }
540         result = Py_BuildValue("(iiO&N)",
541                                model->type, model->quant,
542                                conv_string,model->name, children);
543     }
544     return result;
545 }
546 
547 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)548 my_ElementDeclHandler(void *userData,
549                       const XML_Char *name,
550                       XML_Content *model)
551 {
552     xmlparseobject *self = (xmlparseobject *)userData;
553     PyObject *args = NULL;
554 
555     if (have_handler(self, ElementDecl)) {
556         PyObject *rv = NULL;
557         PyObject *modelobj, *nameobj;
558 
559         if (PyErr_Occurred())
560             return;
561 
562         if (flush_character_buffer(self) < 0)
563             goto finally;
564         modelobj = conv_content_model(model, (conv_string_to_unicode));
565         if (modelobj == NULL) {
566             flag_error(self);
567             goto finally;
568         }
569         nameobj = string_intern(self, name);
570         if (nameobj == NULL) {
571             Py_DECREF(modelobj);
572             flag_error(self);
573             goto finally;
574         }
575         args = Py_BuildValue("NN", nameobj, modelobj);
576         if (args == NULL) {
577             flag_error(self);
578             goto finally;
579         }
580         self->in_callback = 1;
581         rv = call_with_frame("ElementDecl", __LINE__,
582                              self->handlers[ElementDecl], args, self);
583         self->in_callback = 0;
584         if (rv == NULL) {
585             flag_error(self);
586             goto finally;
587         }
588         Py_DECREF(rv);
589     }
590  finally:
591     Py_XDECREF(args);
592     XML_FreeContentModel(self->itself, model);
593     return;
594 }
595 
596 VOID_HANDLER(AttlistDecl,
597              (void *userData,
598               const XML_Char *elname,
599               const XML_Char *attname,
600               const XML_Char *att_type,
601               const XML_Char *dflt,
602               int isrequired),
603              ("(NNO&O&i)",
604               string_intern(self, elname), string_intern(self, attname),
605               conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
606               isrequired))
607 
608 #if XML_COMBINED_VERSION >= 19504
609 VOID_HANDLER(SkippedEntity,
610              (void *userData,
611               const XML_Char *entityName,
612               int is_parameter_entity),
613              ("Ni",
614               string_intern(self, entityName), is_parameter_entity))
615 #endif
616 
617 VOID_HANDLER(NotationDecl,
618                 (void *userData,
619                         const XML_Char *notationName,
620                         const XML_Char *base,
621                         const XML_Char *systemId,
622                         const XML_Char *publicId),
623                 ("(NNNN)",
624                  string_intern(self, notationName), string_intern(self, base),
625                  string_intern(self, systemId), string_intern(self, publicId)))
626 
627 VOID_HANDLER(StartNamespaceDecl,
628                 (void *userData,
629                       const XML_Char *prefix,
630                       const XML_Char *uri),
631                 ("(NN)",
632                  string_intern(self, prefix), string_intern(self, uri)))
633 
634 VOID_HANDLER(EndNamespaceDecl,
635                 (void *userData,
636                     const XML_Char *prefix),
637                 ("(N)", string_intern(self, prefix)))
638 
639 VOID_HANDLER(Comment,
640                (void *userData, const XML_Char *data),
641                 ("(O&)", conv_string_to_unicode ,data))
642 
643 VOID_HANDLER(StartCdataSection,
644                (void *userData),
645                 ("()"))
646 
647 VOID_HANDLER(EndCdataSection,
648                (void *userData),
649                 ("()"))
650 
651 VOID_HANDLER(Default,
652               (void *userData, const XML_Char *s, int len),
653               ("(N)", (conv_string_len_to_unicode(s,len))))
654 
655 VOID_HANDLER(DefaultHandlerExpand,
656               (void *userData, const XML_Char *s, int len),
657               ("(N)", (conv_string_len_to_unicode(s,len))))
658 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
659 
660 INT_HANDLER(NotStandalone,
661                 (void *userData),
662                 ("()"))
663 
664 RC_HANDLER(int, ExternalEntityRef,
665                 (XML_Parser parser,
666                     const XML_Char *context,
667                     const XML_Char *base,
668                     const XML_Char *systemId,
669                     const XML_Char *publicId),
670                 int rc=0;,
671                 ("(O&NNN)",
672                  conv_string_to_unicode ,context, string_intern(self, base),
673                  string_intern(self, systemId), string_intern(self, publicId)),
674                 rc = PyLong_AsLong(rv);, rc,
675                 XML_GetUserData(parser))
676 
677 /* XXX UnknownEncodingHandler */
678 
679 VOID_HANDLER(StartDoctypeDecl,
680              (void *userData, const XML_Char *doctypeName,
681               const XML_Char *sysid, const XML_Char *pubid,
682               int has_internal_subset),
683              ("(NNNi)", string_intern(self, doctypeName),
684               string_intern(self, sysid), string_intern(self, pubid),
685               has_internal_subset))
686 
687 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
688 
689 /* ---------------------------------------------------------------- */
690 /*[clinic input]
691 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
692 [clinic start generated code]*/
693 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
694 
695 
696 static PyObject *
get_parse_result(pyexpat_state * state,xmlparseobject * self,int rv)697 get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
698 {
699     if (PyErr_Occurred()) {
700         return NULL;
701     }
702     if (rv == 0) {
703         return set_error(state, self, XML_GetErrorCode(self->itself));
704     }
705     if (flush_character_buffer(self) < 0) {
706         return NULL;
707     }
708     return PyLong_FromLong(rv);
709 }
710 
711 #define MAX_CHUNK_SIZE (1 << 20)
712 
713 /*[clinic input]
714 pyexpat.xmlparser.SetReparseDeferralEnabled
715 
716     enabled: bool
717     /
718 
719 Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.
720 [clinic start generated code]*/
721 
722 static PyObject *
pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject * self,int enabled)723 pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
724                                                  int enabled)
725 /*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/
726 {
727 #if XML_COMBINED_VERSION >= 20600
728     XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE);
729     self->reparse_deferral_enabled = (bool)enabled;
730 #endif
731     Py_RETURN_NONE;
732 }
733 
734 /*[clinic input]
735 pyexpat.xmlparser.GetReparseDeferralEnabled
736 
737 Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.
738 [clinic start generated code]*/
739 
740 static PyObject *
pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject * self)741 pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self)
742 /*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/
743 {
744     return PyBool_FromLong(self->reparse_deferral_enabled);
745 }
746 
747 /*[clinic input]
748 pyexpat.xmlparser.Parse
749 
750     cls: defining_class
751     data: object
752     isfinal: bool = False
753     /
754 
755 Parse XML data.
756 
757 `isfinal' should be true at end of input.
758 [clinic start generated code]*/
759 
760 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * data,int isfinal)761 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
762                              PyObject *data, int isfinal)
763 /*[clinic end generated code: output=8faffe07fe1f862a input=d0eb2a69fab3b9f1]*/
764 {
765     const char *s;
766     Py_ssize_t slen;
767     Py_buffer view;
768     int rc;
769     pyexpat_state *state = PyType_GetModuleState(cls);
770 
771     if (PyUnicode_Check(data)) {
772         view.buf = NULL;
773         s = PyUnicode_AsUTF8AndSize(data, &slen);
774         if (s == NULL)
775             return NULL;
776         /* Explicitly set UTF-8 encoding. Return code ignored. */
777         (void)XML_SetEncoding(self->itself, "utf-8");
778     }
779     else {
780         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
781             return NULL;
782         s = view.buf;
783         slen = view.len;
784     }
785 
786     static_assert(MAX_CHUNK_SIZE <= INT_MAX,
787                   "MAX_CHUNK_SIZE is larger than INT_MAX");
788     while (slen > MAX_CHUNK_SIZE) {
789         rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
790         if (!rc)
791             goto done;
792         s += MAX_CHUNK_SIZE;
793         slen -= MAX_CHUNK_SIZE;
794     }
795 
796     assert(slen <= INT_MAX);
797     rc = XML_Parse(self->itself, s, (int)slen, isfinal);
798 
799 done:
800     if (view.buf != NULL) {
801         PyBuffer_Release(&view);
802     }
803     return get_parse_result(state, self, rc);
804 }
805 
806 /* File reading copied from cPickle */
807 
808 #define BUF_SIZE 2048
809 
810 static int
readinst(char * buf,int buf_size,PyObject * meth)811 readinst(char *buf, int buf_size, PyObject *meth)
812 {
813     PyObject *str;
814     Py_ssize_t len;
815     const char *ptr;
816 
817     str = PyObject_CallFunction(meth, "i", buf_size);
818     if (str == NULL)
819         goto error;
820 
821     if (PyBytes_Check(str))
822         ptr = PyBytes_AS_STRING(str);
823     else if (PyByteArray_Check(str))
824         ptr = PyByteArray_AS_STRING(str);
825     else {
826         PyErr_Format(PyExc_TypeError,
827                      "read() did not return a bytes object (type=%.400s)",
828                      Py_TYPE(str)->tp_name);
829         goto error;
830     }
831     len = Py_SIZE(str);
832     if (len > buf_size) {
833         PyErr_Format(PyExc_ValueError,
834                      "read() returned too much data: "
835                      "%i bytes requested, %zd returned",
836                      buf_size, len);
837         goto error;
838     }
839     memcpy(buf, ptr, len);
840     Py_DECREF(str);
841     /* len <= buf_size <= INT_MAX */
842     return (int)len;
843 
844 error:
845     Py_XDECREF(str);
846     return -1;
847 }
848 
849 /*[clinic input]
850 pyexpat.xmlparser.ParseFile
851 
852     cls: defining_class
853     file: object
854     /
855 
856 Parse XML data from file-like object.
857 [clinic start generated code]*/
858 
859 static PyObject *
pyexpat_xmlparser_ParseFile_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * file)860 pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
861                                  PyObject *file)
862 /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
863 {
864     int rv = 1;
865     PyObject *readmethod = NULL;
866 
867     pyexpat_state *state = PyType_GetModuleState(cls);
868 
869     if (PyObject_GetOptionalAttr(file, state->str_read, &readmethod) < 0) {
870         return NULL;
871     }
872     if (readmethod == NULL) {
873         PyErr_SetString(PyExc_TypeError,
874                         "argument must have 'read' attribute");
875         return NULL;
876     }
877     for (;;) {
878         int bytes_read;
879         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
880         if (buf == NULL) {
881             Py_XDECREF(readmethod);
882             return get_parse_result(state, self, 0);
883         }
884 
885         bytes_read = readinst(buf, BUF_SIZE, readmethod);
886         if (bytes_read < 0) {
887             Py_DECREF(readmethod);
888             return NULL;
889         }
890         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
891         if (PyErr_Occurred()) {
892             Py_XDECREF(readmethod);
893             return NULL;
894         }
895 
896         if (!rv || bytes_read == 0)
897             break;
898     }
899     Py_XDECREF(readmethod);
900     return get_parse_result(state, self, rv);
901 }
902 
903 /*[clinic input]
904 pyexpat.xmlparser.SetBase
905 
906     base: str
907     /
908 
909 Set the base URL for the parser.
910 [clinic start generated code]*/
911 
912 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)913 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
914 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
915 {
916     if (!XML_SetBase(self->itself, base)) {
917         return PyErr_NoMemory();
918     }
919     Py_RETURN_NONE;
920 }
921 
922 /*[clinic input]
923 pyexpat.xmlparser.GetBase
924 
925 Return base URL string for the parser.
926 [clinic start generated code]*/
927 
928 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)929 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
930 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
931 {
932     return conv_string_to_unicode(XML_GetBase(self->itself));
933 }
934 
935 /*[clinic input]
936 pyexpat.xmlparser.GetInputContext
937 
938 Return the untranslated text of the input that caused the current event.
939 
940 If the event was generated by a large amount of text (such as a start tag
941 for an element with many attributes), not all of the text may be available.
942 [clinic start generated code]*/
943 
944 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)945 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
946 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
947 {
948     if (self->in_callback) {
949         int offset, size;
950         const char *buffer
951             = XML_GetInputContext(self->itself, &offset, &size);
952 
953         if (buffer != NULL)
954             return PyBytes_FromStringAndSize(buffer + offset,
955                                               size - offset);
956         else
957             Py_RETURN_NONE;
958     }
959     else
960         Py_RETURN_NONE;
961 }
962 
963 /*[clinic input]
964 pyexpat.xmlparser.ExternalEntityParserCreate
965 
966     cls: defining_class
967     context: str(accept={str, NoneType})
968     encoding: str = NULL
969     /
970 
971 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
972 [clinic start generated code]*/
973 
974 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,PyTypeObject * cls,const char * context,const char * encoding)975 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
976                                                   PyTypeObject *cls,
977                                                   const char *context,
978                                                   const char *encoding)
979 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
980 {
981     xmlparseobject *new_parser;
982     int i;
983 
984     pyexpat_state *state = PyType_GetModuleState(cls);
985 
986     new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
987     if (new_parser == NULL) {
988         return NULL;
989     }
990 
991     new_parser->buffer_size = self->buffer_size;
992     new_parser->buffer_used = 0;
993     new_parser->buffer = NULL;
994     new_parser->ordered_attributes = self->ordered_attributes;
995     new_parser->specified_attributes = self->specified_attributes;
996     new_parser->in_callback = 0;
997     new_parser->ns_prefixes = self->ns_prefixes;
998     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
999                                                         encoding);
1000     new_parser->handlers = 0;
1001     new_parser->intern = Py_XNewRef(self->intern);
1002 
1003     if (self->buffer != NULL) {
1004         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
1005         if (new_parser->buffer == NULL) {
1006             Py_DECREF(new_parser);
1007             return PyErr_NoMemory();
1008         }
1009     }
1010     if (!new_parser->itself) {
1011         Py_DECREF(new_parser);
1012         return PyErr_NoMemory();
1013     }
1014 
1015     XML_SetUserData(new_parser->itself, (void *)new_parser);
1016 
1017     /* allocate and clear handlers first */
1018     for (i = 0; handler_info[i].name != NULL; i++)
1019         /* do nothing */;
1020 
1021     new_parser->handlers = PyMem_New(PyObject *, i);
1022     if (!new_parser->handlers) {
1023         Py_DECREF(new_parser);
1024         return PyErr_NoMemory();
1025     }
1026     clear_handlers(new_parser, 1);
1027 
1028     /* then copy handlers from self */
1029     for (i = 0; handler_info[i].name != NULL; i++) {
1030         PyObject *handler = self->handlers[i];
1031         if (handler != NULL) {
1032             new_parser->handlers[i] = Py_NewRef(handler);
1033             handler_info[i].setter(new_parser->itself,
1034                                    handler_info[i].handler);
1035         }
1036     }
1037 
1038     PyObject_GC_Track(new_parser);
1039     return (PyObject *)new_parser;
1040 }
1041 
1042 /*[clinic input]
1043 pyexpat.xmlparser.SetParamEntityParsing
1044 
1045     flag: int
1046     /
1047 
1048 Controls parsing of parameter entities (including the external DTD subset).
1049 
1050 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1051 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1052 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1053 was successful.
1054 [clinic start generated code]*/
1055 
1056 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)1057 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1058 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1059 {
1060     flag = XML_SetParamEntityParsing(self->itself, flag);
1061     return PyLong_FromLong(flag);
1062 }
1063 
1064 
1065 #if XML_COMBINED_VERSION >= 19505
1066 /*[clinic input]
1067 pyexpat.xmlparser.UseForeignDTD
1068 
1069     cls: defining_class
1070     flag: bool = True
1071     /
1072 
1073 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1074 
1075 This readily allows the use of a 'default' document type controlled by the
1076 application, while still getting the advantage of providing document type
1077 information to the parser. 'flag' defaults to True if not provided.
1078 [clinic start generated code]*/
1079 
1080 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,PyTypeObject * cls,int flag)1081 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1082                                      int flag)
1083 /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1084 {
1085     pyexpat_state *state = PyType_GetModuleState(cls);
1086     enum XML_Error rc;
1087 
1088     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1089     if (rc != XML_ERROR_NONE) {
1090         return set_error(state, self, rc);
1091     }
1092     Py_RETURN_NONE;
1093 }
1094 #endif
1095 
1096 static struct PyMethodDef xmlparse_methods[] = {
1097     PYEXPAT_XMLPARSER_PARSE_METHODDEF
1098     PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1099     PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1100     PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1101     PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1102     PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1103     PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1104 #if XML_COMBINED_VERSION >= 19505
1105     PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1106 #endif
1107     PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
1108     PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
1109     {NULL, NULL}  /* sentinel */
1110 };
1111 
1112 /* ---------- */
1113 
1114 
1115 
1116 /* pyexpat international encoding support.
1117    Make it as simple as possible.
1118 */
1119 
1120 static const unsigned char template_buffer[256] =
1121     {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
1122      20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
1123      38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
1124      56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
1125      74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
1126      92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
1127      108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
1128      123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
1129      138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
1130      153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
1131      168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
1132      183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
1133      198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
1134      213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
1135      228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
1136      243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
1137 
1138 
1139 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1140 PyUnknownEncodingHandler(void *encodingHandlerData,
1141                          const XML_Char *name,
1142                          XML_Encoding *info)
1143 {
1144     PyObject *u;
1145     int i;
1146     const void *data;
1147     int kind;
1148 
1149     if (PyErr_Occurred())
1150         return XML_STATUS_ERROR;
1151 
1152     u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace");
1153     if (u == NULL) {
1154         Py_XDECREF(u);
1155         return XML_STATUS_ERROR;
1156     }
1157 
1158     if (PyUnicode_GET_LENGTH(u) != 256) {
1159         Py_DECREF(u);
1160         PyErr_SetString(PyExc_ValueError,
1161                         "multi-byte encodings are not supported");
1162         return XML_STATUS_ERROR;
1163     }
1164 
1165     kind = PyUnicode_KIND(u);
1166     data = PyUnicode_DATA(u);
1167     for (i = 0; i < 256; i++) {
1168         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1169         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1170             info->map[i] = ch;
1171         else
1172             info->map[i] = -1;
1173     }
1174 
1175     info->data = NULL;
1176     info->convert = NULL;
1177     info->release = NULL;
1178     Py_DECREF(u);
1179 
1180     return XML_STATUS_OK;
1181 }
1182 
1183 
1184 static PyObject *
newxmlparseobject(pyexpat_state * state,const char * encoding,const char * namespace_separator,PyObject * intern)1185 newxmlparseobject(pyexpat_state *state, const char *encoding,
1186                   const char *namespace_separator, PyObject *intern)
1187 {
1188     int i;
1189     xmlparseobject *self;
1190 
1191     self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1192     if (self == NULL)
1193         return NULL;
1194 
1195     self->buffer = NULL;
1196     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1197     self->buffer_used = 0;
1198     self->ordered_attributes = 0;
1199     self->specified_attributes = 0;
1200     self->in_callback = 0;
1201     self->ns_prefixes = 0;
1202     self->handlers = NULL;
1203     self->intern = Py_XNewRef(intern);
1204 #if XML_COMBINED_VERSION >= 20600
1205     self->reparse_deferral_enabled = true;
1206 #else
1207     self->reparse_deferral_enabled = false;
1208 #endif
1209 
1210     /* namespace_separator is either NULL or contains one char + \0 */
1211     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1212                                        namespace_separator);
1213     if (self->itself == NULL) {
1214         PyErr_SetString(PyExc_RuntimeError,
1215                         "XML_ParserCreate failed");
1216         Py_DECREF(self);
1217         return NULL;
1218     }
1219 #if XML_COMBINED_VERSION >= 20100
1220     /* This feature was added upstream in libexpat 2.1.0. */
1221     XML_SetHashSalt(self->itself,
1222                     (unsigned long)_Py_HashSecret.expat.hashsalt);
1223 #endif
1224     XML_SetUserData(self->itself, (void *)self);
1225     XML_SetUnknownEncodingHandler(self->itself,
1226                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1227 
1228     for (i = 0; handler_info[i].name != NULL; i++)
1229         /* do nothing */;
1230 
1231     self->handlers = PyMem_New(PyObject *, i);
1232     if (!self->handlers) {
1233         Py_DECREF(self);
1234         return PyErr_NoMemory();
1235     }
1236     clear_handlers(self, 1);
1237 
1238     PyObject_GC_Track(self);
1239     return (PyObject*)self;
1240 }
1241 
1242 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1243 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1244 {
1245     for (int i = 0; handler_info[i].name != NULL; i++) {
1246         Py_VISIT(op->handlers[i]);
1247     }
1248     Py_VISIT(Py_TYPE(op));
1249     return 0;
1250 }
1251 
1252 static int
xmlparse_clear(xmlparseobject * op)1253 xmlparse_clear(xmlparseobject *op)
1254 {
1255     clear_handlers(op, 0);
1256     Py_CLEAR(op->intern);
1257     return 0;
1258 }
1259 
1260 static void
xmlparse_dealloc(xmlparseobject * self)1261 xmlparse_dealloc(xmlparseobject *self)
1262 {
1263     PyObject_GC_UnTrack(self);
1264     (void)xmlparse_clear(self);
1265     if (self->itself != NULL)
1266         XML_ParserFree(self->itself);
1267     self->itself = NULL;
1268 
1269     if (self->handlers != NULL) {
1270         PyMem_Free(self->handlers);
1271         self->handlers = NULL;
1272     }
1273     if (self->buffer != NULL) {
1274         PyMem_Free(self->buffer);
1275         self->buffer = NULL;
1276     }
1277     PyTypeObject *tp = Py_TYPE(self);
1278     PyObject_GC_Del(self);
1279     Py_DECREF(tp);
1280 }
1281 
1282 
1283 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1284 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1285 {
1286     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1287     int handlernum = (int)(hi - handler_info);
1288     PyObject *result = self->handlers[handlernum];
1289     if (result == NULL)
1290         result = Py_None;
1291     return Py_NewRef(result);
1292 }
1293 
1294 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1295 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1296 {
1297     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1298     int handlernum = (int)(hi - handler_info);
1299     if (v == NULL) {
1300         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1301         return -1;
1302     }
1303     if (handlernum == CharacterData) {
1304         /* If we're changing the character data handler, flush all
1305          * cached data with the old handler.  Not sure there's a
1306          * "right" thing to do, though, but this probably won't
1307          * happen.
1308          */
1309         if (flush_character_buffer(self) < 0)
1310             return -1;
1311     }
1312 
1313     xmlhandler c_handler = NULL;
1314     if (v == Py_None) {
1315         /* If this is the character data handler, and a character
1316            data handler is already active, we need to be more
1317            careful.  What we can safely do is replace the existing
1318            character data handler callback function with a no-op
1319            function that will refuse to call Python.  The downside
1320            is that this doesn't completely remove the character
1321            data handler from the C layer if there's any callback
1322            active, so Expat does a little more work than it
1323            otherwise would, but that's really an odd case.  A more
1324            elaborate system of handlers and state could remove the
1325            C handler more effectively. */
1326         if (handlernum == CharacterData && self->in_callback)
1327             c_handler = noop_character_data_handler;
1328         v = NULL;
1329     }
1330     else if (v != NULL) {
1331         Py_INCREF(v);
1332         c_handler = handler_info[handlernum].handler;
1333     }
1334     Py_XSETREF(self->handlers[handlernum], v);
1335     handler_info[handlernum].setter(self->itself, c_handler);
1336     return 0;
1337 }
1338 
1339 #define INT_GETTER(name) \
1340     static PyObject * \
1341     xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1342     { \
1343         return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1344     }
1345 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1346 INT_GETTER(ErrorLineNumber)
1347 INT_GETTER(ErrorColumnNumber)
1348 INT_GETTER(ErrorByteIndex)
1349 INT_GETTER(CurrentLineNumber)
1350 INT_GETTER(CurrentColumnNumber)
1351 INT_GETTER(CurrentByteIndex)
1352 
1353 #undef INT_GETTER
1354 
1355 static PyObject *
1356 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1357 {
1358     return PyBool_FromLong(self->buffer != NULL);
1359 }
1360 
1361 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1362 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1363 {
1364     if (v == NULL) {
1365         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1366         return -1;
1367     }
1368     int b = PyObject_IsTrue(v);
1369     if (b < 0)
1370         return -1;
1371     if (b) {
1372         if (self->buffer == NULL) {
1373             self->buffer = PyMem_Malloc(self->buffer_size);
1374             if (self->buffer == NULL) {
1375                 PyErr_NoMemory();
1376                 return -1;
1377             }
1378             self->buffer_used = 0;
1379         }
1380     }
1381     else if (self->buffer != NULL) {
1382         if (flush_character_buffer(self) < 0)
1383             return -1;
1384         PyMem_Free(self->buffer);
1385         self->buffer = NULL;
1386     }
1387     return 0;
1388 }
1389 
1390 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1391 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1392 {
1393     return PyLong_FromLong((long) self->buffer_size);
1394 }
1395 
1396 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1397 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1398 {
1399     if (v == NULL) {
1400         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1401         return -1;
1402     }
1403     long new_buffer_size;
1404     if (!PyLong_Check(v)) {
1405         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1406         return -1;
1407     }
1408 
1409     new_buffer_size = PyLong_AsLong(v);
1410     if (new_buffer_size <= 0) {
1411         if (!PyErr_Occurred())
1412             PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1413         return -1;
1414     }
1415 
1416     /* trivial case -- no change */
1417     if (new_buffer_size == self->buffer_size) {
1418         return 0;
1419     }
1420 
1421     /* check maximum */
1422     if (new_buffer_size > INT_MAX) {
1423         PyErr_Format(PyExc_ValueError, "buffer_size must not be greater than %i", INT_MAX);
1424         return -1;
1425     }
1426 
1427     if (self->buffer != NULL) {
1428         /* there is already a buffer */
1429         if (self->buffer_used != 0) {
1430             if (flush_character_buffer(self) < 0) {
1431                 return -1;
1432             }
1433         }
1434         /* free existing buffer */
1435         PyMem_Free(self->buffer);
1436     }
1437     self->buffer = PyMem_Malloc(new_buffer_size);
1438     if (self->buffer == NULL) {
1439         PyErr_NoMemory();
1440         return -1;
1441     }
1442     self->buffer_size = new_buffer_size;
1443     return 0;
1444 }
1445 
1446 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1447 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1448 {
1449     return PyLong_FromLong((long) self->buffer_used);
1450 }
1451 
1452 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1453 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1454 {
1455     return PyBool_FromLong(self->ns_prefixes);
1456 }
1457 
1458 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1459 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1460 {
1461     if (v == NULL) {
1462         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1463         return -1;
1464     }
1465     int b = PyObject_IsTrue(v);
1466     if (b < 0)
1467         return -1;
1468     self->ns_prefixes = b;
1469     XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1470     return 0;
1471 }
1472 
1473 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1474 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1475 {
1476     return PyBool_FromLong(self->ordered_attributes);
1477 }
1478 
1479 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1480 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1481 {
1482     if (v == NULL) {
1483         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1484         return -1;
1485     }
1486     int b = PyObject_IsTrue(v);
1487     if (b < 0)
1488         return -1;
1489     self->ordered_attributes = b;
1490     return 0;
1491 }
1492 
1493 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1494 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1495 {
1496     return PyBool_FromLong((long) self->specified_attributes);
1497 }
1498 
1499 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1500 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1501 {
1502     if (v == NULL) {
1503         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1504         return -1;
1505     }
1506     int b = PyObject_IsTrue(v);
1507     if (b < 0)
1508         return -1;
1509     self->specified_attributes = b;
1510     return 0;
1511 }
1512 
1513 static PyMemberDef xmlparse_members[] = {
1514     {"intern", _Py_T_OBJECT, offsetof(xmlparseobject, intern), Py_READONLY, NULL},
1515     {NULL}
1516 };
1517 
1518 #define XMLPARSE_GETTER_DEF(name) \
1519     {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1520 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1521     {#name, (getter)xmlparse_##name##_getter, \
1522             (setter)xmlparse_##name##_setter, NULL},
1523 
1524 static PyGetSetDef xmlparse_getsetlist[] = {
1525     XMLPARSE_GETTER_DEF(ErrorCode)
1526     XMLPARSE_GETTER_DEF(ErrorLineNumber)
1527     XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1528     XMLPARSE_GETTER_DEF(ErrorByteIndex)
1529     XMLPARSE_GETTER_DEF(CurrentLineNumber)
1530     XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1531     XMLPARSE_GETTER_DEF(CurrentByteIndex)
1532     XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1533     XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1534     XMLPARSE_GETTER_DEF(buffer_used)
1535     XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1536     XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1537     XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1538     {NULL},
1539 };
1540 
1541 #undef XMLPARSE_GETTER_DEF
1542 #undef XMLPARSE_GETTER_SETTER_DEF
1543 
1544 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1545 
1546 static PyType_Slot _xml_parse_type_spec_slots[] = {
1547     {Py_tp_dealloc, xmlparse_dealloc},
1548     {Py_tp_doc, (void *)Xmlparsetype__doc__},
1549     {Py_tp_traverse, xmlparse_traverse},
1550     {Py_tp_clear, xmlparse_clear},
1551     {Py_tp_methods, xmlparse_methods},
1552     {Py_tp_members, xmlparse_members},
1553     {Py_tp_getset, xmlparse_getsetlist},
1554     {0, 0}
1555 };
1556 
1557 static PyType_Spec _xml_parse_type_spec = {
1558     .name = "pyexpat.xmlparser",
1559     .basicsize = sizeof(xmlparseobject),
1560     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1561               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1562     .slots = _xml_parse_type_spec_slots,
1563 };
1564 
1565 /* End of code for xmlparser objects */
1566 /* -------------------------------------------------------- */
1567 
1568 /*[clinic input]
1569 pyexpat.ParserCreate
1570 
1571     encoding: str(accept={str, NoneType}) = None
1572     namespace_separator: str(accept={str, NoneType}) = None
1573     intern: object = NULL
1574 
1575 Return a new XML parser object.
1576 [clinic start generated code]*/
1577 
1578 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1579 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1580                           const char *namespace_separator, PyObject *intern)
1581 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1582 {
1583     PyObject *result;
1584     int intern_decref = 0;
1585 
1586     if (namespace_separator != NULL
1587         && strlen(namespace_separator) > 1) {
1588         PyErr_SetString(PyExc_ValueError,
1589                         "namespace_separator must be at most one"
1590                         " character, omitted, or None");
1591         return NULL;
1592     }
1593     /* Explicitly passing None means no interning is desired.
1594        Not passing anything means that a new dictionary is used. */
1595     if (intern == Py_None)
1596         intern = NULL;
1597     else if (intern == NULL) {
1598         intern = PyDict_New();
1599         if (!intern)
1600             return NULL;
1601         intern_decref = 1;
1602     }
1603     else if (!PyDict_Check(intern)) {
1604         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1605         return NULL;
1606     }
1607 
1608     pyexpat_state *state = pyexpat_get_state(module);
1609     result = newxmlparseobject(state, encoding, namespace_separator, intern);
1610     if (intern_decref) {
1611         Py_DECREF(intern);
1612     }
1613     return result;
1614 }
1615 
1616 /*[clinic input]
1617 pyexpat.ErrorString
1618 
1619     code: long
1620     /
1621 
1622 Returns string error for given number.
1623 [clinic start generated code]*/
1624 
1625 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1626 pyexpat_ErrorString_impl(PyObject *module, long code)
1627 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1628 {
1629     return conv_string_to_unicode(XML_ErrorString((int)code));
1630 }
1631 
1632 /* List of methods defined in the module */
1633 
1634 static struct PyMethodDef pyexpat_methods[] = {
1635     PYEXPAT_PARSERCREATE_METHODDEF
1636     PYEXPAT_ERRORSTRING_METHODDEF
1637     {NULL, NULL}  /* sentinel */
1638 };
1639 
1640 /* Module docstring */
1641 
1642 PyDoc_STRVAR(pyexpat_module_documentation,
1643 "Python wrapper for Expat parser.");
1644 
1645 /* Initialization function for the module */
1646 
1647 #ifndef MODULE_NAME
1648 #define MODULE_NAME "pyexpat"
1649 #endif
1650 
init_handler_descrs(pyexpat_state * state)1651 static int init_handler_descrs(pyexpat_state *state)
1652 {
1653     int i;
1654     assert(state->xml_parse_type->tp_version_tag == 0);
1655     for (i = 0; handler_info[i].name != NULL; i++) {
1656         struct HandlerInfo *hi = &handler_info[i];
1657         hi->getset.name = hi->name;
1658         hi->getset.get = (getter)xmlparse_handler_getter;
1659         hi->getset.set = (setter)xmlparse_handler_setter;
1660         hi->getset.closure = &handler_info[i];
1661 
1662         PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1663         if (descr == NULL)
1664             return -1;
1665 
1666         if (PyDict_SetDefaultRef(state->xml_parse_type->tp_dict,
1667                                  PyDescr_NAME(descr), descr, NULL) < 0) {
1668             Py_DECREF(descr);
1669             return -1;
1670         }
1671         Py_DECREF(descr);
1672     }
1673     return 0;
1674 }
1675 
1676 static PyObject *
add_submodule(PyObject * mod,const char * fullname)1677 add_submodule(PyObject *mod, const char *fullname)
1678 {
1679     const char *name = strrchr(fullname, '.') + 1;
1680 
1681     PyObject *submodule = PyModule_New(fullname);
1682     if (submodule == NULL) {
1683         return NULL;
1684     }
1685 
1686     PyObject *mod_name = PyUnicode_FromString(fullname);
1687     if (mod_name == NULL) {
1688         Py_DECREF(submodule);
1689         return NULL;
1690     }
1691 
1692     if (_PyImport_SetModule(mod_name, submodule) < 0) {
1693         Py_DECREF(submodule);
1694         Py_DECREF(mod_name);
1695         return NULL;
1696     }
1697     Py_DECREF(mod_name);
1698 
1699     /* gives away the reference to the submodule */
1700     if (PyModule_Add(mod, name, submodule) < 0) {
1701         return NULL;
1702     }
1703 
1704     return submodule;
1705 }
1706 
1707 struct ErrorInfo {
1708     const char * name;  /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1709     const char * description;  /* Error description as returned by XML_ErrorString(<int>) */
1710 };
1711 
1712 static
1713 struct ErrorInfo error_info_of[] = {
1714     {NULL, NULL},  /* XML_ERROR_NONE (value 0) is not exposed */
1715 
1716     {"XML_ERROR_NO_MEMORY", "out of memory"},
1717     {"XML_ERROR_SYNTAX", "syntax error"},
1718     {"XML_ERROR_NO_ELEMENTS", "no element found"},
1719     {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1720     {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1721     {"XML_ERROR_PARTIAL_CHAR", "partial character"},
1722     {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1723     {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1724     {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1725     {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1726     {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1727     {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1728     {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1729     {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1730     {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1731     {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1732     {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1733     {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1734     {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1735     {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1736     {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1737     {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1738     {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1739     {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1740     {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1741     {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1742 
1743     /* Added in Expat 1.95.7. */
1744     {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1745 
1746     /* Added in Expat 1.95.8. */
1747     {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1748     {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1749     {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1750     {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1751     {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1752     {"XML_ERROR_SUSPENDED", "parser suspended"},
1753     {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1754     {"XML_ERROR_ABORTED", "parsing aborted"},
1755     {"XML_ERROR_FINISHED", "parsing finished"},
1756     {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1757 
1758     /* Added in 2.0.0. */
1759     {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1760     {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1761     {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1762 
1763     /* Added in 2.2.1. */
1764     {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1765 
1766     /* Added in 2.3.0. */
1767     {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1768 
1769     /* Added in 2.4.0. */
1770     {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1771 };
1772 
1773 static int
add_error(PyObject * errors_module,PyObject * codes_dict,PyObject * rev_codes_dict,size_t error_index)1774 add_error(PyObject *errors_module, PyObject *codes_dict,
1775           PyObject *rev_codes_dict, size_t error_index)
1776 {
1777     const char * const name = error_info_of[error_index].name;
1778     const int error_code = (int)error_index;
1779 
1780     /* NOTE: This keeps the source of truth regarding error
1781      *       messages with libexpat and (by definition) in bulletproof sync
1782      *       with the other uses of the XML_ErrorString function
1783      *       elsewhere within this file.  pyexpat's copy of the messages
1784      *       only acts as a fallback in case of outdated runtime libexpat,
1785      *       where it returns NULL. */
1786     const char *error_string = XML_ErrorString(error_code);
1787     if (error_string == NULL) {
1788         error_string = error_info_of[error_index].description;
1789     }
1790 
1791     if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1792         return -1;
1793     }
1794 
1795     PyObject *num = PyLong_FromLong(error_code);
1796     if (num == NULL) {
1797         return -1;
1798     }
1799 
1800     if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1801         Py_DECREF(num);
1802         return -1;
1803     }
1804 
1805     PyObject *str = PyUnicode_FromString(error_string);
1806     if (str == NULL) {
1807         Py_DECREF(num);
1808         return -1;
1809     }
1810 
1811     int res = PyDict_SetItem(rev_codes_dict, num, str);
1812     Py_DECREF(str);
1813     Py_DECREF(num);
1814     if (res < 0) {
1815         return -1;
1816     }
1817 
1818     return 0;
1819 }
1820 
1821 static int
add_errors_module(PyObject * mod)1822 add_errors_module(PyObject *mod)
1823 {
1824     // add_submodule() returns a borrowed ref.
1825     PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1826     if (errors_module == NULL) {
1827         return -1;
1828     }
1829 
1830     PyObject *codes_dict = PyDict_New();
1831     if (codes_dict == NULL) {
1832         return -1;
1833     }
1834     PyObject *rev_codes_dict = PyDict_New();
1835     if (rev_codes_dict == NULL) {
1836         goto error;
1837     }
1838 
1839     size_t error_index = 0;
1840     for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1841         if (error_info_of[error_index].name == NULL) {
1842             continue;
1843         }
1844 
1845         if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1846             goto error;
1847         }
1848     }
1849 
1850     if (PyModule_AddStringConstant(errors_module, "__doc__",
1851                                    "Constants used to describe "
1852                                    "error conditions.") < 0) {
1853         goto error;
1854     }
1855 
1856     if (PyModule_Add(errors_module, "codes", codes_dict) < 0) {
1857         Py_DECREF(rev_codes_dict);
1858         return -1;
1859     }
1860 
1861     if (PyModule_Add(errors_module, "messages", rev_codes_dict) < 0) {
1862         return -1;
1863     }
1864 
1865     return 0;
1866 
1867 error:
1868     Py_XDECREF(codes_dict);
1869     Py_XDECREF(rev_codes_dict);
1870     return -1;
1871 }
1872 
1873 static int
add_model_module(PyObject * mod)1874 add_model_module(PyObject *mod)
1875 {
1876     PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1877     if (model_module == NULL) {
1878         return -1;
1879     }
1880 
1881 #define MYCONST(c)  do {                                        \
1882         if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1883             return -1;                                          \
1884         }                                                       \
1885     } while(0)
1886 
1887     if (PyModule_AddStringConstant(
1888         model_module, "__doc__",
1889         "Constants used to interpret content model information.") < 0) {
1890         return -1;
1891     }
1892 
1893     MYCONST(XML_CTYPE_EMPTY);
1894     MYCONST(XML_CTYPE_ANY);
1895     MYCONST(XML_CTYPE_MIXED);
1896     MYCONST(XML_CTYPE_NAME);
1897     MYCONST(XML_CTYPE_CHOICE);
1898     MYCONST(XML_CTYPE_SEQ);
1899 
1900     MYCONST(XML_CQUANT_NONE);
1901     MYCONST(XML_CQUANT_OPT);
1902     MYCONST(XML_CQUANT_REP);
1903     MYCONST(XML_CQUANT_PLUS);
1904 #undef MYCONST
1905     return 0;
1906 }
1907 
1908 #if XML_COMBINED_VERSION > 19505
1909 static int
add_features(PyObject * mod)1910 add_features(PyObject *mod)
1911 {
1912     PyObject *list = PyList_New(0);
1913     if (list == NULL) {
1914         return -1;
1915     }
1916 
1917     const XML_Feature *features = XML_GetFeatureList();
1918     for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1919         PyObject *item = Py_BuildValue("si", features[i].name,
1920                                        features[i].value);
1921         if (item == NULL) {
1922             goto error;
1923         }
1924         int ok = PyList_Append(list, item);
1925         Py_DECREF(item);
1926         if (ok < 0) {
1927             goto error;
1928         }
1929     }
1930     return PyModule_Add(mod, "features", list);
1931 
1932 error:
1933     Py_DECREF(list);
1934     return -1;
1935 }
1936 #endif
1937 
1938 static void
pyexpat_capsule_destructor(PyObject * capsule)1939 pyexpat_capsule_destructor(PyObject *capsule)
1940 {
1941     void *p = PyCapsule_GetPointer(capsule, PyExpat_CAPSULE_NAME);
1942     if (p == NULL) {
1943         PyErr_WriteUnraisable(capsule);
1944         return;
1945     }
1946     PyMem_Free(p);
1947 }
1948 
1949 
1950 static int
pyexpat_exec(PyObject * mod)1951 pyexpat_exec(PyObject *mod)
1952 {
1953     pyexpat_state *state = pyexpat_get_state(mod);
1954     state->str_read = PyUnicode_InternFromString("read");
1955     if (state->str_read == NULL) {
1956         return -1;
1957     }
1958     state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1959         mod, &_xml_parse_type_spec, NULL);
1960 
1961     if (state->xml_parse_type == NULL) {
1962         return -1;
1963     }
1964 
1965     if (init_handler_descrs(state) < 0) {
1966         return -1;
1967     }
1968     state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1969                                       NULL, NULL);
1970     if (state->error == NULL) {
1971         return -1;
1972     }
1973 
1974     /* Add some symbolic constants to the module */
1975 
1976     if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1977         return -1;
1978     }
1979 
1980     if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1981         return -1;
1982     }
1983 
1984     if (PyModule_AddObjectRef(mod, "XMLParserType",
1985                            (PyObject *) state->xml_parse_type) < 0) {
1986         return -1;
1987     }
1988 
1989     if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1990                                    XML_ExpatVersion()) < 0) {
1991         return -1;
1992     }
1993     {
1994         XML_Expat_Version info = XML_ExpatVersionInfo();
1995         PyObject *versionInfo = Py_BuildValue("(iii)",
1996                                               info.major,
1997                                               info.minor,
1998                                               info.micro);
1999         if (PyModule_Add(mod, "version_info", versionInfo) < 0) {
2000             return -1;
2001         }
2002     }
2003     /* XXX When Expat supports some way of figuring out how it was
2004        compiled, this should check and set native_encoding
2005        appropriately.
2006     */
2007     if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
2008         return -1;
2009     }
2010 
2011     if (add_errors_module(mod) < 0) {
2012         return -1;
2013     }
2014 
2015     if (add_model_module(mod) < 0) {
2016         return -1;
2017     }
2018 
2019 #if XML_COMBINED_VERSION > 19505
2020     if (add_features(mod) < 0) {
2021         return -1;
2022     }
2023 #endif
2024 
2025 #define MYCONST(c) do {                                 \
2026         if (PyModule_AddIntConstant(mod, #c, c) < 0) {  \
2027             return -1;                                  \
2028         }                                               \
2029     } while(0)
2030 
2031     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2032     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2033     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2034 #undef MYCONST
2035 
2036     struct PyExpat_CAPI *capi = PyMem_Malloc(sizeof(*capi));
2037     if (capi == NULL) {
2038         PyErr_NoMemory();
2039         return -1;
2040     }
2041     /* initialize pyexpat dispatch table */
2042     capi->size = sizeof(*capi);
2043     capi->magic = PyExpat_CAPI_MAGIC;
2044     capi->MAJOR_VERSION = XML_MAJOR_VERSION;
2045     capi->MINOR_VERSION = XML_MINOR_VERSION;
2046     capi->MICRO_VERSION = XML_MICRO_VERSION;
2047     capi->ErrorString = XML_ErrorString;
2048     capi->GetErrorCode = XML_GetErrorCode;
2049     capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
2050     capi->GetErrorLineNumber = XML_GetErrorLineNumber;
2051     capi->Parse = XML_Parse;
2052     capi->ParserCreate_MM = XML_ParserCreate_MM;
2053     capi->ParserFree = XML_ParserFree;
2054     capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
2055     capi->SetCommentHandler = XML_SetCommentHandler;
2056     capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2057     capi->SetElementHandler = XML_SetElementHandler;
2058     capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2059     capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2060     capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2061     capi->SetUserData = XML_SetUserData;
2062     capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
2063     capi->SetEncoding = XML_SetEncoding;
2064     capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
2065 #if XML_COMBINED_VERSION >= 20100
2066     capi->SetHashSalt = XML_SetHashSalt;
2067 #else
2068     capi->SetHashSalt = NULL;
2069 #endif
2070 #if XML_COMBINED_VERSION >= 20600
2071     capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
2072 #else
2073     capi->SetReparseDeferralEnabled = NULL;
2074 #endif
2075 
2076     /* export using capsule */
2077     PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
2078                                           pyexpat_capsule_destructor);
2079     if (capi_object == NULL) {
2080         PyMem_Free(capi);
2081         return -1;
2082     }
2083 
2084     if (PyModule_Add(mod, "expat_CAPI", capi_object) < 0) {
2085         return -1;
2086     }
2087 
2088     return 0;
2089 }
2090 
2091 static int
pyexpat_traverse(PyObject * module,visitproc visit,void * arg)2092 pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
2093 {
2094     pyexpat_state *state = pyexpat_get_state(module);
2095     Py_VISIT(state->xml_parse_type);
2096     Py_VISIT(state->error);
2097     Py_VISIT(state->str_read);
2098     return 0;
2099 }
2100 
2101 static int
pyexpat_clear(PyObject * module)2102 pyexpat_clear(PyObject *module)
2103 {
2104     pyexpat_state *state = pyexpat_get_state(module);
2105     Py_CLEAR(state->xml_parse_type);
2106     Py_CLEAR(state->error);
2107     Py_CLEAR(state->str_read);
2108     return 0;
2109 }
2110 
2111 static void
pyexpat_free(void * module)2112 pyexpat_free(void *module)
2113 {
2114     pyexpat_clear((PyObject *)module);
2115 }
2116 
2117 static PyModuleDef_Slot pyexpat_slots[] = {
2118     {Py_mod_exec, pyexpat_exec},
2119     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2120     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2121     {0, NULL}
2122 };
2123 
2124 static struct PyModuleDef pyexpatmodule = {
2125     PyModuleDef_HEAD_INIT,
2126     .m_name = MODULE_NAME,
2127     .m_doc = pyexpat_module_documentation,
2128     .m_size = sizeof(pyexpat_state),
2129     .m_methods = pyexpat_methods,
2130     .m_slots = pyexpat_slots,
2131     .m_traverse = pyexpat_traverse,
2132     .m_clear = pyexpat_clear,
2133     .m_free = pyexpat_free
2134 };
2135 
2136 PyMODINIT_FUNC
PyInit_pyexpat(void)2137 PyInit_pyexpat(void)
2138 {
2139     return PyModuleDef_Init(&pyexpatmodule);
2140 }
2141 
2142 static void
clear_handlers(xmlparseobject * self,int initial)2143 clear_handlers(xmlparseobject *self, int initial)
2144 {
2145     int i = 0;
2146 
2147     for (; handler_info[i].name != NULL; i++) {
2148         if (initial)
2149             self->handlers[i] = NULL;
2150         else {
2151             Py_CLEAR(self->handlers[i]);
2152             handler_info[i].setter(self->itself, NULL);
2153         }
2154     }
2155 }
2156 
2157 static struct HandlerInfo handler_info[] = {
2158 
2159 #define HANDLER_INFO(name) \
2160     {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2161 
2162     HANDLER_INFO(StartElementHandler)
2163     HANDLER_INFO(EndElementHandler)
2164     HANDLER_INFO(ProcessingInstructionHandler)
2165     HANDLER_INFO(CharacterDataHandler)
2166     HANDLER_INFO(UnparsedEntityDeclHandler)
2167     HANDLER_INFO(NotationDeclHandler)
2168     HANDLER_INFO(StartNamespaceDeclHandler)
2169     HANDLER_INFO(EndNamespaceDeclHandler)
2170     HANDLER_INFO(CommentHandler)
2171     HANDLER_INFO(StartCdataSectionHandler)
2172     HANDLER_INFO(EndCdataSectionHandler)
2173     HANDLER_INFO(DefaultHandler)
2174     HANDLER_INFO(DefaultHandlerExpand)
2175     HANDLER_INFO(NotStandaloneHandler)
2176     HANDLER_INFO(ExternalEntityRefHandler)
2177     HANDLER_INFO(StartDoctypeDeclHandler)
2178     HANDLER_INFO(EndDoctypeDeclHandler)
2179     HANDLER_INFO(EntityDeclHandler)
2180     HANDLER_INFO(XmlDeclHandler)
2181     HANDLER_INFO(ElementDeclHandler)
2182     HANDLER_INFO(AttlistDeclHandler)
2183 #if XML_COMBINED_VERSION >= 19504
2184     HANDLER_INFO(SkippedEntityHandler)
2185 #endif
2186 
2187 #undef HANDLER_INFO
2188 
2189     {NULL, NULL, NULL} /* sentinel */
2190 };
2191