1 #ifndef Py_BUILD_CORE_BUILTIN
2 # define Py_BUILD_CORE_MODULE 1
3 #endif
4
5 #include "Python.h"
6 #include "pycore_import.h" // _PyImport_SetModule()
7 #include "pycore_pyhash.h" // _Py_HashSecret
8 #include "pycore_traceback.h" // _PyTraceback_Add()
9
10 #include <stdbool.h>
11 #include <stddef.h> // offsetof()
12 #include "expat.h"
13 #include "pyexpat.h"
14
15 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
16 included methods. */
17 /*[clinic input]
18 module pyexpat
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
21
22 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
23
24 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
25 PyMem_Malloc, PyMem_Realloc, PyMem_Free};
26
27 enum HandlerTypes {
28 StartElement,
29 EndElement,
30 ProcessingInstruction,
31 CharacterData,
32 UnparsedEntityDecl,
33 NotationDecl,
34 StartNamespaceDecl,
35 EndNamespaceDecl,
36 Comment,
37 StartCdataSection,
38 EndCdataSection,
39 Default,
40 DefaultHandlerExpand,
41 NotStandalone,
42 ExternalEntityRef,
43 StartDoctypeDecl,
44 EndDoctypeDecl,
45 EntityDecl,
46 XmlDecl,
47 ElementDecl,
48 AttlistDecl,
49 #if XML_COMBINED_VERSION >= 19504
50 SkippedEntity,
51 #endif
52 _DummyDecl
53 };
54
55 typedef struct {
56 PyTypeObject *xml_parse_type;
57 PyObject *error;
58 PyObject *str_read;
59 } pyexpat_state;
60
61 static inline pyexpat_state*
pyexpat_get_state(PyObject * module)62 pyexpat_get_state(PyObject *module)
63 {
64 void *state = PyModule_GetState(module);
65 assert(state != NULL);
66 return (pyexpat_state *)state;
67 }
68
69 /* ----------------------------------------------------- */
70
71 /* Declarations for objects of type xmlparser */
72
73 typedef struct {
74 PyObject_HEAD
75
76 XML_Parser itself;
77 int ordered_attributes; /* Return attributes as a list. */
78 int specified_attributes; /* Report only specified attributes. */
79 int in_callback; /* Is a callback active? */
80 int ns_prefixes; /* Namespace-triplets mode? */
81 XML_Char *buffer; /* Buffer used when accumulating characters */
82 /* NULL if not enabled */
83 int buffer_size; /* Size of buffer, in XML_Char units */
84 int buffer_used; /* Buffer units in use */
85 bool reparse_deferral_enabled; /* Whether to defer reparsing of
86 unfinished XML tokens; a de-facto cache of
87 what Expat has the authority on, for lack
88 of a getter API function
89 "XML_GetReparseDeferralEnabled" in Expat
90 2.6.0 */
91 PyObject *intern; /* Dictionary to intern strings */
92 PyObject **handlers;
93 } xmlparseobject;
94
95 #include "clinic/pyexpat.c.h"
96
97 #define CHARACTER_DATA_BUFFER_SIZE 8192
98
99 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
100 typedef void* xmlhandler;
101
102 struct HandlerInfo {
103 const char *name;
104 xmlhandlersetter setter;
105 xmlhandler handler;
106 PyGetSetDef getset;
107 };
108
109 static struct HandlerInfo handler_info[64];
110
111 /* Set an integer attribute on the error object; return true on success,
112 * false on an exception.
113 */
114 static int
set_error_attr(PyObject * err,const char * name,int value)115 set_error_attr(PyObject *err, const char *name, int value)
116 {
117 PyObject *v = PyLong_FromLong(value);
118
119 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
120 Py_XDECREF(v);
121 return 0;
122 }
123 Py_DECREF(v);
124 return 1;
125 }
126
127 /* Build and set an Expat exception, including positioning
128 * information. Always returns NULL.
129 */
130 static PyObject *
set_error(pyexpat_state * state,xmlparseobject * self,enum XML_Error code)131 set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
132 {
133 PyObject *err;
134 PyObject *buffer;
135 XML_Parser parser = self->itself;
136 int lineno = XML_GetErrorLineNumber(parser);
137 int column = XML_GetErrorColumnNumber(parser);
138
139 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
140 XML_ErrorString(code), lineno, column);
141 if (buffer == NULL)
142 return NULL;
143 err = PyObject_CallOneArg(state->error, buffer);
144 Py_DECREF(buffer);
145 if ( err != NULL
146 && set_error_attr(err, "code", code)
147 && set_error_attr(err, "offset", column)
148 && set_error_attr(err, "lineno", lineno)) {
149 PyErr_SetObject(state->error, err);
150 }
151 Py_XDECREF(err);
152 return NULL;
153 }
154
155 static int
have_handler(xmlparseobject * self,int type)156 have_handler(xmlparseobject *self, int type)
157 {
158 PyObject *handler = self->handlers[type];
159 return handler != NULL;
160 }
161
162 /* Convert a string of XML_Chars into a Unicode string.
163 Returns None if str is a null pointer. */
164
165 static PyObject *
conv_string_to_unicode(const XML_Char * str)166 conv_string_to_unicode(const XML_Char *str)
167 {
168 /* XXX currently this code assumes that XML_Char is 8-bit,
169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
172 Py_RETURN_NONE;
173 }
174 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
175 }
176
177 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)178 conv_string_len_to_unicode(const XML_Char *str, int len)
179 {
180 /* XXX currently this code assumes that XML_Char is 8-bit,
181 and hence in UTF-8. */
182 /* UTF-8 from Expat, Unicode desired */
183 if (str == NULL) {
184 Py_RETURN_NONE;
185 }
186 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
187 }
188
189 /* Callback routines */
190
191 static void clear_handlers(xmlparseobject *self, int initial);
192
193 /* This handler is used when an error has been detected, in the hope
194 that actual parsing can be terminated early. This will only help
195 if an external entity reference is encountered. */
196 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)197 error_external_entity_ref_handler(XML_Parser parser,
198 const XML_Char *context,
199 const XML_Char *base,
200 const XML_Char *systemId,
201 const XML_Char *publicId)
202 {
203 return 0;
204 }
205
206 /* Dummy character data handler used when an error (exception) has
207 been detected, and the actual parsing can be terminated early.
208 This is needed since character data handler can't be safely removed
209 from within the character data handler, but can be replaced. It is
210 used only from the character data handler trampoline, and must be
211 used right after `flag_error()` is called. */
212 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)213 noop_character_data_handler(void *userData, const XML_Char *data, int len)
214 {
215 /* Do nothing. */
216 }
217
218 static void
flag_error(xmlparseobject * self)219 flag_error(xmlparseobject *self)
220 {
221 clear_handlers(self, 0);
222 XML_SetExternalEntityRefHandler(self->itself,
223 error_external_entity_ref_handler);
224 }
225
226 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)227 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
228 xmlparseobject *self)
229 {
230 PyObject *res;
231
232 res = PyObject_Call(func, args, NULL);
233 if (res == NULL) {
234 _PyTraceback_Add(funcname, __FILE__, lineno);
235 XML_StopParser(self->itself, XML_FALSE);
236 }
237 return res;
238 }
239
240 static PyObject*
string_intern(xmlparseobject * self,const char * str)241 string_intern(xmlparseobject *self, const char* str)
242 {
243 PyObject *result = conv_string_to_unicode(str);
244 PyObject *value;
245 /* result can be NULL if the unicode conversion failed. */
246 if (!result)
247 return result;
248 if (!self->intern)
249 return result;
250 if (PyDict_GetItemRef(self->intern, result, &value) == 0 &&
251 PyDict_SetItem(self->intern, result, result) == 0)
252 {
253 return result;
254 }
255 assert((value != NULL) == !PyErr_Occurred());
256 Py_DECREF(result);
257 return value;
258 }
259
260 /* Return 0 on success, -1 on exception.
261 * flag_error() will be called before return if needed.
262 */
263 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)264 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
265 {
266 PyObject *args;
267 PyObject *temp;
268
269 if (!have_handler(self, CharacterData))
270 return -1;
271
272 args = PyTuple_New(1);
273 if (args == NULL)
274 return -1;
275 temp = (conv_string_len_to_unicode(buffer, len));
276 if (temp == NULL) {
277 Py_DECREF(args);
278 flag_error(self);
279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
281 return -1;
282 }
283 PyTuple_SET_ITEM(args, 0, temp);
284 /* temp is now a borrowed reference; consider it unused. */
285 self->in_callback = 1;
286 temp = call_with_frame("CharacterData", __LINE__,
287 self->handlers[CharacterData], args, self);
288 /* temp is an owned reference again, or NULL */
289 self->in_callback = 0;
290 Py_DECREF(args);
291 if (temp == NULL) {
292 flag_error(self);
293 XML_SetCharacterDataHandler(self->itself,
294 noop_character_data_handler);
295 return -1;
296 }
297 Py_DECREF(temp);
298 return 0;
299 }
300
301 static int
flush_character_buffer(xmlparseobject * self)302 flush_character_buffer(xmlparseobject *self)
303 {
304 int rc;
305 if (self->buffer == NULL || self->buffer_used == 0)
306 return 0;
307 rc = call_character_handler(self, self->buffer, self->buffer_used);
308 self->buffer_used = 0;
309 return rc;
310 }
311
312 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)313 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
314 {
315 xmlparseobject *self = (xmlparseobject *) userData;
316
317 if (PyErr_Occurred())
318 return;
319
320 if (self->buffer == NULL)
321 call_character_handler(self, data, len);
322 else {
323 if ((self->buffer_used + len) > self->buffer_size) {
324 if (flush_character_buffer(self) < 0)
325 return;
326 /* handler might have changed; drop the rest on the floor
327 * if there isn't a handler anymore
328 */
329 if (!have_handler(self, CharacterData))
330 return;
331 }
332 if (len > self->buffer_size) {
333 call_character_handler(self, data, len);
334 self->buffer_used = 0;
335 }
336 else {
337 memcpy(self->buffer + self->buffer_used,
338 data, len * sizeof(XML_Char));
339 self->buffer_used += len;
340 }
341 }
342 }
343
344 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])345 my_StartElementHandler(void *userData,
346 const XML_Char *name, const XML_Char *atts[])
347 {
348 xmlparseobject *self = (xmlparseobject *)userData;
349
350 if (have_handler(self, StartElement)) {
351 PyObject *container, *rv, *args;
352 int i, max;
353
354 if (PyErr_Occurred())
355 return;
356
357 if (flush_character_buffer(self) < 0)
358 return;
359 /* Set max to the number of slots filled in atts[]; max/2 is
360 * the number of attributes we need to process.
361 */
362 if (self->specified_attributes) {
363 max = XML_GetSpecifiedAttributeCount(self->itself);
364 }
365 else {
366 max = 0;
367 while (atts[max] != NULL)
368 max += 2;
369 }
370 /* Build the container. */
371 if (self->ordered_attributes)
372 container = PyList_New(max);
373 else
374 container = PyDict_New();
375 if (container == NULL) {
376 flag_error(self);
377 return;
378 }
379 for (i = 0; i < max; i += 2) {
380 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
381 PyObject *v;
382 if (n == NULL) {
383 flag_error(self);
384 Py_DECREF(container);
385 return;
386 }
387 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
388 if (v == NULL) {
389 flag_error(self);
390 Py_DECREF(container);
391 Py_DECREF(n);
392 return;
393 }
394 if (self->ordered_attributes) {
395 PyList_SET_ITEM(container, i, n);
396 PyList_SET_ITEM(container, i+1, v);
397 }
398 else if (PyDict_SetItem(container, n, v)) {
399 flag_error(self);
400 Py_DECREF(n);
401 Py_DECREF(v);
402 Py_DECREF(container);
403 return;
404 }
405 else {
406 Py_DECREF(n);
407 Py_DECREF(v);
408 }
409 }
410 args = string_intern(self, name);
411 if (args == NULL) {
412 Py_DECREF(container);
413 return;
414 }
415 args = Py_BuildValue("(NN)", args, container);
416 if (args == NULL) {
417 return;
418 }
419 /* Container is now a borrowed reference; ignore it. */
420 self->in_callback = 1;
421 rv = call_with_frame("StartElement", __LINE__,
422 self->handlers[StartElement], args, self);
423 self->in_callback = 0;
424 Py_DECREF(args);
425 if (rv == NULL) {
426 flag_error(self);
427 return;
428 }
429 Py_DECREF(rv);
430 }
431 }
432
433 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
434 RETURN, GETUSERDATA) \
435 static RC \
436 my_##NAME##Handler PARAMS {\
437 xmlparseobject *self = GETUSERDATA ; \
438 PyObject *args = NULL; \
439 PyObject *rv = NULL; \
440 INIT \
441 \
442 if (have_handler(self, NAME)) { \
443 if (PyErr_Occurred()) \
444 return RETURN; \
445 if (flush_character_buffer(self) < 0) \
446 return RETURN; \
447 args = Py_BuildValue PARAM_FORMAT ;\
448 if (!args) { flag_error(self); return RETURN;} \
449 self->in_callback = 1; \
450 rv = call_with_frame(#NAME,__LINE__, \
451 self->handlers[NAME], args, self); \
452 self->in_callback = 0; \
453 Py_DECREF(args); \
454 if (rv == NULL) { \
455 flag_error(self); \
456 return RETURN; \
457 } \
458 CONVERSION \
459 Py_DECREF(rv); \
460 } \
461 return RETURN; \
462 }
463
464 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
465 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
466 (xmlparseobject *)userData)
467
468 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
469 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
470 rc = PyLong_AsLong(rv);, rc, \
471 (xmlparseobject *)userData)
472
473 VOID_HANDLER(EndElement,
474 (void *userData, const XML_Char *name),
475 ("(N)", string_intern(self, name)))
476
477 VOID_HANDLER(ProcessingInstruction,
478 (void *userData,
479 const XML_Char *target,
480 const XML_Char *data),
481 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
482
483 VOID_HANDLER(UnparsedEntityDecl,
484 (void *userData,
485 const XML_Char *entityName,
486 const XML_Char *base,
487 const XML_Char *systemId,
488 const XML_Char *publicId,
489 const XML_Char *notationName),
490 ("(NNNNN)",
491 string_intern(self, entityName), string_intern(self, base),
492 string_intern(self, systemId), string_intern(self, publicId),
493 string_intern(self, notationName)))
494
495 VOID_HANDLER(EntityDecl,
496 (void *userData,
497 const XML_Char *entityName,
498 int is_parameter_entity,
499 const XML_Char *value,
500 int value_length,
501 const XML_Char *base,
502 const XML_Char *systemId,
503 const XML_Char *publicId,
504 const XML_Char *notationName),
505 ("NiNNNNN",
506 string_intern(self, entityName), is_parameter_entity,
507 (conv_string_len_to_unicode(value, value_length)),
508 string_intern(self, base), string_intern(self, systemId),
509 string_intern(self, publicId),
510 string_intern(self, notationName)))
511
512 VOID_HANDLER(XmlDecl,
513 (void *userData,
514 const XML_Char *version,
515 const XML_Char *encoding,
516 int standalone),
517 ("(O&O&i)",
518 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
519 standalone))
520
521 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))522 conv_content_model(XML_Content * const model,
523 PyObject *(*conv_string)(const XML_Char *))
524 {
525 PyObject *result = NULL;
526 PyObject *children = PyTuple_New(model->numchildren);
527 int i;
528
529 if (children != NULL) {
530 assert(model->numchildren < INT_MAX);
531 for (i = 0; i < (int)model->numchildren; ++i) {
532 PyObject *child = conv_content_model(&model->children[i],
533 conv_string);
534 if (child == NULL) {
535 Py_XDECREF(children);
536 return NULL;
537 }
538 PyTuple_SET_ITEM(children, i, child);
539 }
540 result = Py_BuildValue("(iiO&N)",
541 model->type, model->quant,
542 conv_string,model->name, children);
543 }
544 return result;
545 }
546
547 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)548 my_ElementDeclHandler(void *userData,
549 const XML_Char *name,
550 XML_Content *model)
551 {
552 xmlparseobject *self = (xmlparseobject *)userData;
553 PyObject *args = NULL;
554
555 if (have_handler(self, ElementDecl)) {
556 PyObject *rv = NULL;
557 PyObject *modelobj, *nameobj;
558
559 if (PyErr_Occurred())
560 return;
561
562 if (flush_character_buffer(self) < 0)
563 goto finally;
564 modelobj = conv_content_model(model, (conv_string_to_unicode));
565 if (modelobj == NULL) {
566 flag_error(self);
567 goto finally;
568 }
569 nameobj = string_intern(self, name);
570 if (nameobj == NULL) {
571 Py_DECREF(modelobj);
572 flag_error(self);
573 goto finally;
574 }
575 args = Py_BuildValue("NN", nameobj, modelobj);
576 if (args == NULL) {
577 flag_error(self);
578 goto finally;
579 }
580 self->in_callback = 1;
581 rv = call_with_frame("ElementDecl", __LINE__,
582 self->handlers[ElementDecl], args, self);
583 self->in_callback = 0;
584 if (rv == NULL) {
585 flag_error(self);
586 goto finally;
587 }
588 Py_DECREF(rv);
589 }
590 finally:
591 Py_XDECREF(args);
592 XML_FreeContentModel(self->itself, model);
593 return;
594 }
595
596 VOID_HANDLER(AttlistDecl,
597 (void *userData,
598 const XML_Char *elname,
599 const XML_Char *attname,
600 const XML_Char *att_type,
601 const XML_Char *dflt,
602 int isrequired),
603 ("(NNO&O&i)",
604 string_intern(self, elname), string_intern(self, attname),
605 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
606 isrequired))
607
608 #if XML_COMBINED_VERSION >= 19504
609 VOID_HANDLER(SkippedEntity,
610 (void *userData,
611 const XML_Char *entityName,
612 int is_parameter_entity),
613 ("Ni",
614 string_intern(self, entityName), is_parameter_entity))
615 #endif
616
617 VOID_HANDLER(NotationDecl,
618 (void *userData,
619 const XML_Char *notationName,
620 const XML_Char *base,
621 const XML_Char *systemId,
622 const XML_Char *publicId),
623 ("(NNNN)",
624 string_intern(self, notationName), string_intern(self, base),
625 string_intern(self, systemId), string_intern(self, publicId)))
626
627 VOID_HANDLER(StartNamespaceDecl,
628 (void *userData,
629 const XML_Char *prefix,
630 const XML_Char *uri),
631 ("(NN)",
632 string_intern(self, prefix), string_intern(self, uri)))
633
634 VOID_HANDLER(EndNamespaceDecl,
635 (void *userData,
636 const XML_Char *prefix),
637 ("(N)", string_intern(self, prefix)))
638
639 VOID_HANDLER(Comment,
640 (void *userData, const XML_Char *data),
641 ("(O&)", conv_string_to_unicode ,data))
642
643 VOID_HANDLER(StartCdataSection,
644 (void *userData),
645 ("()"))
646
647 VOID_HANDLER(EndCdataSection,
648 (void *userData),
649 ("()"))
650
651 VOID_HANDLER(Default,
652 (void *userData, const XML_Char *s, int len),
653 ("(N)", (conv_string_len_to_unicode(s,len))))
654
655 VOID_HANDLER(DefaultHandlerExpand,
656 (void *userData, const XML_Char *s, int len),
657 ("(N)", (conv_string_len_to_unicode(s,len))))
658 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
659
660 INT_HANDLER(NotStandalone,
661 (void *userData),
662 ("()"))
663
664 RC_HANDLER(int, ExternalEntityRef,
665 (XML_Parser parser,
666 const XML_Char *context,
667 const XML_Char *base,
668 const XML_Char *systemId,
669 const XML_Char *publicId),
670 int rc=0;,
671 ("(O&NNN)",
672 conv_string_to_unicode ,context, string_intern(self, base),
673 string_intern(self, systemId), string_intern(self, publicId)),
674 rc = PyLong_AsLong(rv);, rc,
675 XML_GetUserData(parser))
676
677 /* XXX UnknownEncodingHandler */
678
679 VOID_HANDLER(StartDoctypeDecl,
680 (void *userData, const XML_Char *doctypeName,
681 const XML_Char *sysid, const XML_Char *pubid,
682 int has_internal_subset),
683 ("(NNNi)", string_intern(self, doctypeName),
684 string_intern(self, sysid), string_intern(self, pubid),
685 has_internal_subset))
686
687 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
688
689 /* ---------------------------------------------------------------- */
690 /*[clinic input]
691 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
692 [clinic start generated code]*/
693 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
694
695
696 static PyObject *
get_parse_result(pyexpat_state * state,xmlparseobject * self,int rv)697 get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
698 {
699 if (PyErr_Occurred()) {
700 return NULL;
701 }
702 if (rv == 0) {
703 return set_error(state, self, XML_GetErrorCode(self->itself));
704 }
705 if (flush_character_buffer(self) < 0) {
706 return NULL;
707 }
708 return PyLong_FromLong(rv);
709 }
710
711 #define MAX_CHUNK_SIZE (1 << 20)
712
713 /*[clinic input]
714 pyexpat.xmlparser.SetReparseDeferralEnabled
715
716 enabled: bool
717 /
718
719 Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.
720 [clinic start generated code]*/
721
722 static PyObject *
pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject * self,int enabled)723 pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
724 int enabled)
725 /*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/
726 {
727 #if XML_COMBINED_VERSION >= 20600
728 XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE);
729 self->reparse_deferral_enabled = (bool)enabled;
730 #endif
731 Py_RETURN_NONE;
732 }
733
734 /*[clinic input]
735 pyexpat.xmlparser.GetReparseDeferralEnabled
736
737 Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.
738 [clinic start generated code]*/
739
740 static PyObject *
pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject * self)741 pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self)
742 /*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/
743 {
744 return PyBool_FromLong(self->reparse_deferral_enabled);
745 }
746
747 /*[clinic input]
748 pyexpat.xmlparser.Parse
749
750 cls: defining_class
751 data: object
752 isfinal: bool = False
753 /
754
755 Parse XML data.
756
757 `isfinal' should be true at end of input.
758 [clinic start generated code]*/
759
760 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * data,int isfinal)761 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
762 PyObject *data, int isfinal)
763 /*[clinic end generated code: output=8faffe07fe1f862a input=d0eb2a69fab3b9f1]*/
764 {
765 const char *s;
766 Py_ssize_t slen;
767 Py_buffer view;
768 int rc;
769 pyexpat_state *state = PyType_GetModuleState(cls);
770
771 if (PyUnicode_Check(data)) {
772 view.buf = NULL;
773 s = PyUnicode_AsUTF8AndSize(data, &slen);
774 if (s == NULL)
775 return NULL;
776 /* Explicitly set UTF-8 encoding. Return code ignored. */
777 (void)XML_SetEncoding(self->itself, "utf-8");
778 }
779 else {
780 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
781 return NULL;
782 s = view.buf;
783 slen = view.len;
784 }
785
786 static_assert(MAX_CHUNK_SIZE <= INT_MAX,
787 "MAX_CHUNK_SIZE is larger than INT_MAX");
788 while (slen > MAX_CHUNK_SIZE) {
789 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
790 if (!rc)
791 goto done;
792 s += MAX_CHUNK_SIZE;
793 slen -= MAX_CHUNK_SIZE;
794 }
795
796 assert(slen <= INT_MAX);
797 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
798
799 done:
800 if (view.buf != NULL) {
801 PyBuffer_Release(&view);
802 }
803 return get_parse_result(state, self, rc);
804 }
805
806 /* File reading copied from cPickle */
807
808 #define BUF_SIZE 2048
809
810 static int
readinst(char * buf,int buf_size,PyObject * meth)811 readinst(char *buf, int buf_size, PyObject *meth)
812 {
813 PyObject *str;
814 Py_ssize_t len;
815 const char *ptr;
816
817 str = PyObject_CallFunction(meth, "i", buf_size);
818 if (str == NULL)
819 goto error;
820
821 if (PyBytes_Check(str))
822 ptr = PyBytes_AS_STRING(str);
823 else if (PyByteArray_Check(str))
824 ptr = PyByteArray_AS_STRING(str);
825 else {
826 PyErr_Format(PyExc_TypeError,
827 "read() did not return a bytes object (type=%.400s)",
828 Py_TYPE(str)->tp_name);
829 goto error;
830 }
831 len = Py_SIZE(str);
832 if (len > buf_size) {
833 PyErr_Format(PyExc_ValueError,
834 "read() returned too much data: "
835 "%i bytes requested, %zd returned",
836 buf_size, len);
837 goto error;
838 }
839 memcpy(buf, ptr, len);
840 Py_DECREF(str);
841 /* len <= buf_size <= INT_MAX */
842 return (int)len;
843
844 error:
845 Py_XDECREF(str);
846 return -1;
847 }
848
849 /*[clinic input]
850 pyexpat.xmlparser.ParseFile
851
852 cls: defining_class
853 file: object
854 /
855
856 Parse XML data from file-like object.
857 [clinic start generated code]*/
858
859 static PyObject *
pyexpat_xmlparser_ParseFile_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * file)860 pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
861 PyObject *file)
862 /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
863 {
864 int rv = 1;
865 PyObject *readmethod = NULL;
866
867 pyexpat_state *state = PyType_GetModuleState(cls);
868
869 if (PyObject_GetOptionalAttr(file, state->str_read, &readmethod) < 0) {
870 return NULL;
871 }
872 if (readmethod == NULL) {
873 PyErr_SetString(PyExc_TypeError,
874 "argument must have 'read' attribute");
875 return NULL;
876 }
877 for (;;) {
878 int bytes_read;
879 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
880 if (buf == NULL) {
881 Py_XDECREF(readmethod);
882 return get_parse_result(state, self, 0);
883 }
884
885 bytes_read = readinst(buf, BUF_SIZE, readmethod);
886 if (bytes_read < 0) {
887 Py_DECREF(readmethod);
888 return NULL;
889 }
890 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
891 if (PyErr_Occurred()) {
892 Py_XDECREF(readmethod);
893 return NULL;
894 }
895
896 if (!rv || bytes_read == 0)
897 break;
898 }
899 Py_XDECREF(readmethod);
900 return get_parse_result(state, self, rv);
901 }
902
903 /*[clinic input]
904 pyexpat.xmlparser.SetBase
905
906 base: str
907 /
908
909 Set the base URL for the parser.
910 [clinic start generated code]*/
911
912 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)913 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
914 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
915 {
916 if (!XML_SetBase(self->itself, base)) {
917 return PyErr_NoMemory();
918 }
919 Py_RETURN_NONE;
920 }
921
922 /*[clinic input]
923 pyexpat.xmlparser.GetBase
924
925 Return base URL string for the parser.
926 [clinic start generated code]*/
927
928 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)929 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
930 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
931 {
932 return conv_string_to_unicode(XML_GetBase(self->itself));
933 }
934
935 /*[clinic input]
936 pyexpat.xmlparser.GetInputContext
937
938 Return the untranslated text of the input that caused the current event.
939
940 If the event was generated by a large amount of text (such as a start tag
941 for an element with many attributes), not all of the text may be available.
942 [clinic start generated code]*/
943
944 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)945 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
946 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
947 {
948 if (self->in_callback) {
949 int offset, size;
950 const char *buffer
951 = XML_GetInputContext(self->itself, &offset, &size);
952
953 if (buffer != NULL)
954 return PyBytes_FromStringAndSize(buffer + offset,
955 size - offset);
956 else
957 Py_RETURN_NONE;
958 }
959 else
960 Py_RETURN_NONE;
961 }
962
963 /*[clinic input]
964 pyexpat.xmlparser.ExternalEntityParserCreate
965
966 cls: defining_class
967 context: str(accept={str, NoneType})
968 encoding: str = NULL
969 /
970
971 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
972 [clinic start generated code]*/
973
974 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,PyTypeObject * cls,const char * context,const char * encoding)975 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
976 PyTypeObject *cls,
977 const char *context,
978 const char *encoding)
979 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
980 {
981 xmlparseobject *new_parser;
982 int i;
983
984 pyexpat_state *state = PyType_GetModuleState(cls);
985
986 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
987 if (new_parser == NULL) {
988 return NULL;
989 }
990
991 new_parser->buffer_size = self->buffer_size;
992 new_parser->buffer_used = 0;
993 new_parser->buffer = NULL;
994 new_parser->ordered_attributes = self->ordered_attributes;
995 new_parser->specified_attributes = self->specified_attributes;
996 new_parser->in_callback = 0;
997 new_parser->ns_prefixes = self->ns_prefixes;
998 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
999 encoding);
1000 new_parser->handlers = 0;
1001 new_parser->intern = Py_XNewRef(self->intern);
1002
1003 if (self->buffer != NULL) {
1004 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
1005 if (new_parser->buffer == NULL) {
1006 Py_DECREF(new_parser);
1007 return PyErr_NoMemory();
1008 }
1009 }
1010 if (!new_parser->itself) {
1011 Py_DECREF(new_parser);
1012 return PyErr_NoMemory();
1013 }
1014
1015 XML_SetUserData(new_parser->itself, (void *)new_parser);
1016
1017 /* allocate and clear handlers first */
1018 for (i = 0; handler_info[i].name != NULL; i++)
1019 /* do nothing */;
1020
1021 new_parser->handlers = PyMem_New(PyObject *, i);
1022 if (!new_parser->handlers) {
1023 Py_DECREF(new_parser);
1024 return PyErr_NoMemory();
1025 }
1026 clear_handlers(new_parser, 1);
1027
1028 /* then copy handlers from self */
1029 for (i = 0; handler_info[i].name != NULL; i++) {
1030 PyObject *handler = self->handlers[i];
1031 if (handler != NULL) {
1032 new_parser->handlers[i] = Py_NewRef(handler);
1033 handler_info[i].setter(new_parser->itself,
1034 handler_info[i].handler);
1035 }
1036 }
1037
1038 PyObject_GC_Track(new_parser);
1039 return (PyObject *)new_parser;
1040 }
1041
1042 /*[clinic input]
1043 pyexpat.xmlparser.SetParamEntityParsing
1044
1045 flag: int
1046 /
1047
1048 Controls parsing of parameter entities (including the external DTD subset).
1049
1050 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1051 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1052 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1053 was successful.
1054 [clinic start generated code]*/
1055
1056 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)1057 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1058 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1059 {
1060 flag = XML_SetParamEntityParsing(self->itself, flag);
1061 return PyLong_FromLong(flag);
1062 }
1063
1064
1065 #if XML_COMBINED_VERSION >= 19505
1066 /*[clinic input]
1067 pyexpat.xmlparser.UseForeignDTD
1068
1069 cls: defining_class
1070 flag: bool = True
1071 /
1072
1073 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1074
1075 This readily allows the use of a 'default' document type controlled by the
1076 application, while still getting the advantage of providing document type
1077 information to the parser. 'flag' defaults to True if not provided.
1078 [clinic start generated code]*/
1079
1080 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,PyTypeObject * cls,int flag)1081 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1082 int flag)
1083 /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1084 {
1085 pyexpat_state *state = PyType_GetModuleState(cls);
1086 enum XML_Error rc;
1087
1088 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1089 if (rc != XML_ERROR_NONE) {
1090 return set_error(state, self, rc);
1091 }
1092 Py_RETURN_NONE;
1093 }
1094 #endif
1095
1096 static struct PyMethodDef xmlparse_methods[] = {
1097 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1098 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1099 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1100 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1101 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1102 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1103 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1104 #if XML_COMBINED_VERSION >= 19505
1105 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1106 #endif
1107 PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
1108 PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
1109 {NULL, NULL} /* sentinel */
1110 };
1111
1112 /* ---------- */
1113
1114
1115
1116 /* pyexpat international encoding support.
1117 Make it as simple as possible.
1118 */
1119
1120 static const unsigned char template_buffer[256] =
1121 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
1122 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
1123 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
1124 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
1125 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
1126 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
1127 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
1128 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
1129 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
1130 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
1131 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
1132 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
1133 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
1134 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
1135 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
1136 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
1137
1138
1139 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1140 PyUnknownEncodingHandler(void *encodingHandlerData,
1141 const XML_Char *name,
1142 XML_Encoding *info)
1143 {
1144 PyObject *u;
1145 int i;
1146 const void *data;
1147 int kind;
1148
1149 if (PyErr_Occurred())
1150 return XML_STATUS_ERROR;
1151
1152 u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace");
1153 if (u == NULL) {
1154 Py_XDECREF(u);
1155 return XML_STATUS_ERROR;
1156 }
1157
1158 if (PyUnicode_GET_LENGTH(u) != 256) {
1159 Py_DECREF(u);
1160 PyErr_SetString(PyExc_ValueError,
1161 "multi-byte encodings are not supported");
1162 return XML_STATUS_ERROR;
1163 }
1164
1165 kind = PyUnicode_KIND(u);
1166 data = PyUnicode_DATA(u);
1167 for (i = 0; i < 256; i++) {
1168 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1169 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1170 info->map[i] = ch;
1171 else
1172 info->map[i] = -1;
1173 }
1174
1175 info->data = NULL;
1176 info->convert = NULL;
1177 info->release = NULL;
1178 Py_DECREF(u);
1179
1180 return XML_STATUS_OK;
1181 }
1182
1183
1184 static PyObject *
newxmlparseobject(pyexpat_state * state,const char * encoding,const char * namespace_separator,PyObject * intern)1185 newxmlparseobject(pyexpat_state *state, const char *encoding,
1186 const char *namespace_separator, PyObject *intern)
1187 {
1188 int i;
1189 xmlparseobject *self;
1190
1191 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1192 if (self == NULL)
1193 return NULL;
1194
1195 self->buffer = NULL;
1196 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1197 self->buffer_used = 0;
1198 self->ordered_attributes = 0;
1199 self->specified_attributes = 0;
1200 self->in_callback = 0;
1201 self->ns_prefixes = 0;
1202 self->handlers = NULL;
1203 self->intern = Py_XNewRef(intern);
1204 #if XML_COMBINED_VERSION >= 20600
1205 self->reparse_deferral_enabled = true;
1206 #else
1207 self->reparse_deferral_enabled = false;
1208 #endif
1209
1210 /* namespace_separator is either NULL or contains one char + \0 */
1211 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1212 namespace_separator);
1213 if (self->itself == NULL) {
1214 PyErr_SetString(PyExc_RuntimeError,
1215 "XML_ParserCreate failed");
1216 Py_DECREF(self);
1217 return NULL;
1218 }
1219 #if XML_COMBINED_VERSION >= 20100
1220 /* This feature was added upstream in libexpat 2.1.0. */
1221 XML_SetHashSalt(self->itself,
1222 (unsigned long)_Py_HashSecret.expat.hashsalt);
1223 #endif
1224 XML_SetUserData(self->itself, (void *)self);
1225 XML_SetUnknownEncodingHandler(self->itself,
1226 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1227
1228 for (i = 0; handler_info[i].name != NULL; i++)
1229 /* do nothing */;
1230
1231 self->handlers = PyMem_New(PyObject *, i);
1232 if (!self->handlers) {
1233 Py_DECREF(self);
1234 return PyErr_NoMemory();
1235 }
1236 clear_handlers(self, 1);
1237
1238 PyObject_GC_Track(self);
1239 return (PyObject*)self;
1240 }
1241
1242 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1243 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1244 {
1245 for (int i = 0; handler_info[i].name != NULL; i++) {
1246 Py_VISIT(op->handlers[i]);
1247 }
1248 Py_VISIT(Py_TYPE(op));
1249 return 0;
1250 }
1251
1252 static int
xmlparse_clear(xmlparseobject * op)1253 xmlparse_clear(xmlparseobject *op)
1254 {
1255 clear_handlers(op, 0);
1256 Py_CLEAR(op->intern);
1257 return 0;
1258 }
1259
1260 static void
xmlparse_dealloc(xmlparseobject * self)1261 xmlparse_dealloc(xmlparseobject *self)
1262 {
1263 PyObject_GC_UnTrack(self);
1264 (void)xmlparse_clear(self);
1265 if (self->itself != NULL)
1266 XML_ParserFree(self->itself);
1267 self->itself = NULL;
1268
1269 if (self->handlers != NULL) {
1270 PyMem_Free(self->handlers);
1271 self->handlers = NULL;
1272 }
1273 if (self->buffer != NULL) {
1274 PyMem_Free(self->buffer);
1275 self->buffer = NULL;
1276 }
1277 PyTypeObject *tp = Py_TYPE(self);
1278 PyObject_GC_Del(self);
1279 Py_DECREF(tp);
1280 }
1281
1282
1283 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1284 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1285 {
1286 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1287 int handlernum = (int)(hi - handler_info);
1288 PyObject *result = self->handlers[handlernum];
1289 if (result == NULL)
1290 result = Py_None;
1291 return Py_NewRef(result);
1292 }
1293
1294 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1295 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1296 {
1297 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1298 int handlernum = (int)(hi - handler_info);
1299 if (v == NULL) {
1300 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1301 return -1;
1302 }
1303 if (handlernum == CharacterData) {
1304 /* If we're changing the character data handler, flush all
1305 * cached data with the old handler. Not sure there's a
1306 * "right" thing to do, though, but this probably won't
1307 * happen.
1308 */
1309 if (flush_character_buffer(self) < 0)
1310 return -1;
1311 }
1312
1313 xmlhandler c_handler = NULL;
1314 if (v == Py_None) {
1315 /* If this is the character data handler, and a character
1316 data handler is already active, we need to be more
1317 careful. What we can safely do is replace the existing
1318 character data handler callback function with a no-op
1319 function that will refuse to call Python. The downside
1320 is that this doesn't completely remove the character
1321 data handler from the C layer if there's any callback
1322 active, so Expat does a little more work than it
1323 otherwise would, but that's really an odd case. A more
1324 elaborate system of handlers and state could remove the
1325 C handler more effectively. */
1326 if (handlernum == CharacterData && self->in_callback)
1327 c_handler = noop_character_data_handler;
1328 v = NULL;
1329 }
1330 else if (v != NULL) {
1331 Py_INCREF(v);
1332 c_handler = handler_info[handlernum].handler;
1333 }
1334 Py_XSETREF(self->handlers[handlernum], v);
1335 handler_info[handlernum].setter(self->itself, c_handler);
1336 return 0;
1337 }
1338
1339 #define INT_GETTER(name) \
1340 static PyObject * \
1341 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1342 { \
1343 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1344 }
1345 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1346 INT_GETTER(ErrorLineNumber)
1347 INT_GETTER(ErrorColumnNumber)
1348 INT_GETTER(ErrorByteIndex)
1349 INT_GETTER(CurrentLineNumber)
1350 INT_GETTER(CurrentColumnNumber)
1351 INT_GETTER(CurrentByteIndex)
1352
1353 #undef INT_GETTER
1354
1355 static PyObject *
1356 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1357 {
1358 return PyBool_FromLong(self->buffer != NULL);
1359 }
1360
1361 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1362 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1363 {
1364 if (v == NULL) {
1365 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1366 return -1;
1367 }
1368 int b = PyObject_IsTrue(v);
1369 if (b < 0)
1370 return -1;
1371 if (b) {
1372 if (self->buffer == NULL) {
1373 self->buffer = PyMem_Malloc(self->buffer_size);
1374 if (self->buffer == NULL) {
1375 PyErr_NoMemory();
1376 return -1;
1377 }
1378 self->buffer_used = 0;
1379 }
1380 }
1381 else if (self->buffer != NULL) {
1382 if (flush_character_buffer(self) < 0)
1383 return -1;
1384 PyMem_Free(self->buffer);
1385 self->buffer = NULL;
1386 }
1387 return 0;
1388 }
1389
1390 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1391 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1392 {
1393 return PyLong_FromLong((long) self->buffer_size);
1394 }
1395
1396 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1397 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1398 {
1399 if (v == NULL) {
1400 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1401 return -1;
1402 }
1403 long new_buffer_size;
1404 if (!PyLong_Check(v)) {
1405 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1406 return -1;
1407 }
1408
1409 new_buffer_size = PyLong_AsLong(v);
1410 if (new_buffer_size <= 0) {
1411 if (!PyErr_Occurred())
1412 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1413 return -1;
1414 }
1415
1416 /* trivial case -- no change */
1417 if (new_buffer_size == self->buffer_size) {
1418 return 0;
1419 }
1420
1421 /* check maximum */
1422 if (new_buffer_size > INT_MAX) {
1423 PyErr_Format(PyExc_ValueError, "buffer_size must not be greater than %i", INT_MAX);
1424 return -1;
1425 }
1426
1427 if (self->buffer != NULL) {
1428 /* there is already a buffer */
1429 if (self->buffer_used != 0) {
1430 if (flush_character_buffer(self) < 0) {
1431 return -1;
1432 }
1433 }
1434 /* free existing buffer */
1435 PyMem_Free(self->buffer);
1436 }
1437 self->buffer = PyMem_Malloc(new_buffer_size);
1438 if (self->buffer == NULL) {
1439 PyErr_NoMemory();
1440 return -1;
1441 }
1442 self->buffer_size = new_buffer_size;
1443 return 0;
1444 }
1445
1446 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1447 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1448 {
1449 return PyLong_FromLong((long) self->buffer_used);
1450 }
1451
1452 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1453 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1454 {
1455 return PyBool_FromLong(self->ns_prefixes);
1456 }
1457
1458 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1459 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1460 {
1461 if (v == NULL) {
1462 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1463 return -1;
1464 }
1465 int b = PyObject_IsTrue(v);
1466 if (b < 0)
1467 return -1;
1468 self->ns_prefixes = b;
1469 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1470 return 0;
1471 }
1472
1473 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1474 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1475 {
1476 return PyBool_FromLong(self->ordered_attributes);
1477 }
1478
1479 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1480 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1481 {
1482 if (v == NULL) {
1483 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1484 return -1;
1485 }
1486 int b = PyObject_IsTrue(v);
1487 if (b < 0)
1488 return -1;
1489 self->ordered_attributes = b;
1490 return 0;
1491 }
1492
1493 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1494 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1495 {
1496 return PyBool_FromLong((long) self->specified_attributes);
1497 }
1498
1499 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1500 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1501 {
1502 if (v == NULL) {
1503 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1504 return -1;
1505 }
1506 int b = PyObject_IsTrue(v);
1507 if (b < 0)
1508 return -1;
1509 self->specified_attributes = b;
1510 return 0;
1511 }
1512
1513 static PyMemberDef xmlparse_members[] = {
1514 {"intern", _Py_T_OBJECT, offsetof(xmlparseobject, intern), Py_READONLY, NULL},
1515 {NULL}
1516 };
1517
1518 #define XMLPARSE_GETTER_DEF(name) \
1519 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1520 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1521 {#name, (getter)xmlparse_##name##_getter, \
1522 (setter)xmlparse_##name##_setter, NULL},
1523
1524 static PyGetSetDef xmlparse_getsetlist[] = {
1525 XMLPARSE_GETTER_DEF(ErrorCode)
1526 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1527 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1528 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1529 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1530 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1531 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1532 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1533 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1534 XMLPARSE_GETTER_DEF(buffer_used)
1535 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1536 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1537 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1538 {NULL},
1539 };
1540
1541 #undef XMLPARSE_GETTER_DEF
1542 #undef XMLPARSE_GETTER_SETTER_DEF
1543
1544 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1545
1546 static PyType_Slot _xml_parse_type_spec_slots[] = {
1547 {Py_tp_dealloc, xmlparse_dealloc},
1548 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1549 {Py_tp_traverse, xmlparse_traverse},
1550 {Py_tp_clear, xmlparse_clear},
1551 {Py_tp_methods, xmlparse_methods},
1552 {Py_tp_members, xmlparse_members},
1553 {Py_tp_getset, xmlparse_getsetlist},
1554 {0, 0}
1555 };
1556
1557 static PyType_Spec _xml_parse_type_spec = {
1558 .name = "pyexpat.xmlparser",
1559 .basicsize = sizeof(xmlparseobject),
1560 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1561 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1562 .slots = _xml_parse_type_spec_slots,
1563 };
1564
1565 /* End of code for xmlparser objects */
1566 /* -------------------------------------------------------- */
1567
1568 /*[clinic input]
1569 pyexpat.ParserCreate
1570
1571 encoding: str(accept={str, NoneType}) = None
1572 namespace_separator: str(accept={str, NoneType}) = None
1573 intern: object = NULL
1574
1575 Return a new XML parser object.
1576 [clinic start generated code]*/
1577
1578 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1579 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1580 const char *namespace_separator, PyObject *intern)
1581 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1582 {
1583 PyObject *result;
1584 int intern_decref = 0;
1585
1586 if (namespace_separator != NULL
1587 && strlen(namespace_separator) > 1) {
1588 PyErr_SetString(PyExc_ValueError,
1589 "namespace_separator must be at most one"
1590 " character, omitted, or None");
1591 return NULL;
1592 }
1593 /* Explicitly passing None means no interning is desired.
1594 Not passing anything means that a new dictionary is used. */
1595 if (intern == Py_None)
1596 intern = NULL;
1597 else if (intern == NULL) {
1598 intern = PyDict_New();
1599 if (!intern)
1600 return NULL;
1601 intern_decref = 1;
1602 }
1603 else if (!PyDict_Check(intern)) {
1604 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1605 return NULL;
1606 }
1607
1608 pyexpat_state *state = pyexpat_get_state(module);
1609 result = newxmlparseobject(state, encoding, namespace_separator, intern);
1610 if (intern_decref) {
1611 Py_DECREF(intern);
1612 }
1613 return result;
1614 }
1615
1616 /*[clinic input]
1617 pyexpat.ErrorString
1618
1619 code: long
1620 /
1621
1622 Returns string error for given number.
1623 [clinic start generated code]*/
1624
1625 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1626 pyexpat_ErrorString_impl(PyObject *module, long code)
1627 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1628 {
1629 return conv_string_to_unicode(XML_ErrorString((int)code));
1630 }
1631
1632 /* List of methods defined in the module */
1633
1634 static struct PyMethodDef pyexpat_methods[] = {
1635 PYEXPAT_PARSERCREATE_METHODDEF
1636 PYEXPAT_ERRORSTRING_METHODDEF
1637 {NULL, NULL} /* sentinel */
1638 };
1639
1640 /* Module docstring */
1641
1642 PyDoc_STRVAR(pyexpat_module_documentation,
1643 "Python wrapper for Expat parser.");
1644
1645 /* Initialization function for the module */
1646
1647 #ifndef MODULE_NAME
1648 #define MODULE_NAME "pyexpat"
1649 #endif
1650
init_handler_descrs(pyexpat_state * state)1651 static int init_handler_descrs(pyexpat_state *state)
1652 {
1653 int i;
1654 assert(state->xml_parse_type->tp_version_tag == 0);
1655 for (i = 0; handler_info[i].name != NULL; i++) {
1656 struct HandlerInfo *hi = &handler_info[i];
1657 hi->getset.name = hi->name;
1658 hi->getset.get = (getter)xmlparse_handler_getter;
1659 hi->getset.set = (setter)xmlparse_handler_setter;
1660 hi->getset.closure = &handler_info[i];
1661
1662 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1663 if (descr == NULL)
1664 return -1;
1665
1666 if (PyDict_SetDefaultRef(state->xml_parse_type->tp_dict,
1667 PyDescr_NAME(descr), descr, NULL) < 0) {
1668 Py_DECREF(descr);
1669 return -1;
1670 }
1671 Py_DECREF(descr);
1672 }
1673 return 0;
1674 }
1675
1676 static PyObject *
add_submodule(PyObject * mod,const char * fullname)1677 add_submodule(PyObject *mod, const char *fullname)
1678 {
1679 const char *name = strrchr(fullname, '.') + 1;
1680
1681 PyObject *submodule = PyModule_New(fullname);
1682 if (submodule == NULL) {
1683 return NULL;
1684 }
1685
1686 PyObject *mod_name = PyUnicode_FromString(fullname);
1687 if (mod_name == NULL) {
1688 Py_DECREF(submodule);
1689 return NULL;
1690 }
1691
1692 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1693 Py_DECREF(submodule);
1694 Py_DECREF(mod_name);
1695 return NULL;
1696 }
1697 Py_DECREF(mod_name);
1698
1699 /* gives away the reference to the submodule */
1700 if (PyModule_Add(mod, name, submodule) < 0) {
1701 return NULL;
1702 }
1703
1704 return submodule;
1705 }
1706
1707 struct ErrorInfo {
1708 const char * name; /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1709 const char * description; /* Error description as returned by XML_ErrorString(<int>) */
1710 };
1711
1712 static
1713 struct ErrorInfo error_info_of[] = {
1714 {NULL, NULL}, /* XML_ERROR_NONE (value 0) is not exposed */
1715
1716 {"XML_ERROR_NO_MEMORY", "out of memory"},
1717 {"XML_ERROR_SYNTAX", "syntax error"},
1718 {"XML_ERROR_NO_ELEMENTS", "no element found"},
1719 {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1720 {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1721 {"XML_ERROR_PARTIAL_CHAR", "partial character"},
1722 {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1723 {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1724 {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1725 {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1726 {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1727 {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1728 {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1729 {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1730 {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1731 {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1732 {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1733 {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1734 {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1735 {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1736 {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1737 {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1738 {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1739 {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1740 {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1741 {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1742
1743 /* Added in Expat 1.95.7. */
1744 {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1745
1746 /* Added in Expat 1.95.8. */
1747 {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1748 {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1749 {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1750 {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1751 {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1752 {"XML_ERROR_SUSPENDED", "parser suspended"},
1753 {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1754 {"XML_ERROR_ABORTED", "parsing aborted"},
1755 {"XML_ERROR_FINISHED", "parsing finished"},
1756 {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1757
1758 /* Added in 2.0.0. */
1759 {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1760 {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1761 {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1762
1763 /* Added in 2.2.1. */
1764 {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1765
1766 /* Added in 2.3.0. */
1767 {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1768
1769 /* Added in 2.4.0. */
1770 {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1771 };
1772
1773 static int
add_error(PyObject * errors_module,PyObject * codes_dict,PyObject * rev_codes_dict,size_t error_index)1774 add_error(PyObject *errors_module, PyObject *codes_dict,
1775 PyObject *rev_codes_dict, size_t error_index)
1776 {
1777 const char * const name = error_info_of[error_index].name;
1778 const int error_code = (int)error_index;
1779
1780 /* NOTE: This keeps the source of truth regarding error
1781 * messages with libexpat and (by definition) in bulletproof sync
1782 * with the other uses of the XML_ErrorString function
1783 * elsewhere within this file. pyexpat's copy of the messages
1784 * only acts as a fallback in case of outdated runtime libexpat,
1785 * where it returns NULL. */
1786 const char *error_string = XML_ErrorString(error_code);
1787 if (error_string == NULL) {
1788 error_string = error_info_of[error_index].description;
1789 }
1790
1791 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1792 return -1;
1793 }
1794
1795 PyObject *num = PyLong_FromLong(error_code);
1796 if (num == NULL) {
1797 return -1;
1798 }
1799
1800 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1801 Py_DECREF(num);
1802 return -1;
1803 }
1804
1805 PyObject *str = PyUnicode_FromString(error_string);
1806 if (str == NULL) {
1807 Py_DECREF(num);
1808 return -1;
1809 }
1810
1811 int res = PyDict_SetItem(rev_codes_dict, num, str);
1812 Py_DECREF(str);
1813 Py_DECREF(num);
1814 if (res < 0) {
1815 return -1;
1816 }
1817
1818 return 0;
1819 }
1820
1821 static int
add_errors_module(PyObject * mod)1822 add_errors_module(PyObject *mod)
1823 {
1824 // add_submodule() returns a borrowed ref.
1825 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1826 if (errors_module == NULL) {
1827 return -1;
1828 }
1829
1830 PyObject *codes_dict = PyDict_New();
1831 if (codes_dict == NULL) {
1832 return -1;
1833 }
1834 PyObject *rev_codes_dict = PyDict_New();
1835 if (rev_codes_dict == NULL) {
1836 goto error;
1837 }
1838
1839 size_t error_index = 0;
1840 for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1841 if (error_info_of[error_index].name == NULL) {
1842 continue;
1843 }
1844
1845 if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1846 goto error;
1847 }
1848 }
1849
1850 if (PyModule_AddStringConstant(errors_module, "__doc__",
1851 "Constants used to describe "
1852 "error conditions.") < 0) {
1853 goto error;
1854 }
1855
1856 if (PyModule_Add(errors_module, "codes", codes_dict) < 0) {
1857 Py_DECREF(rev_codes_dict);
1858 return -1;
1859 }
1860
1861 if (PyModule_Add(errors_module, "messages", rev_codes_dict) < 0) {
1862 return -1;
1863 }
1864
1865 return 0;
1866
1867 error:
1868 Py_XDECREF(codes_dict);
1869 Py_XDECREF(rev_codes_dict);
1870 return -1;
1871 }
1872
1873 static int
add_model_module(PyObject * mod)1874 add_model_module(PyObject *mod)
1875 {
1876 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1877 if (model_module == NULL) {
1878 return -1;
1879 }
1880
1881 #define MYCONST(c) do { \
1882 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1883 return -1; \
1884 } \
1885 } while(0)
1886
1887 if (PyModule_AddStringConstant(
1888 model_module, "__doc__",
1889 "Constants used to interpret content model information.") < 0) {
1890 return -1;
1891 }
1892
1893 MYCONST(XML_CTYPE_EMPTY);
1894 MYCONST(XML_CTYPE_ANY);
1895 MYCONST(XML_CTYPE_MIXED);
1896 MYCONST(XML_CTYPE_NAME);
1897 MYCONST(XML_CTYPE_CHOICE);
1898 MYCONST(XML_CTYPE_SEQ);
1899
1900 MYCONST(XML_CQUANT_NONE);
1901 MYCONST(XML_CQUANT_OPT);
1902 MYCONST(XML_CQUANT_REP);
1903 MYCONST(XML_CQUANT_PLUS);
1904 #undef MYCONST
1905 return 0;
1906 }
1907
1908 #if XML_COMBINED_VERSION > 19505
1909 static int
add_features(PyObject * mod)1910 add_features(PyObject *mod)
1911 {
1912 PyObject *list = PyList_New(0);
1913 if (list == NULL) {
1914 return -1;
1915 }
1916
1917 const XML_Feature *features = XML_GetFeatureList();
1918 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1919 PyObject *item = Py_BuildValue("si", features[i].name,
1920 features[i].value);
1921 if (item == NULL) {
1922 goto error;
1923 }
1924 int ok = PyList_Append(list, item);
1925 Py_DECREF(item);
1926 if (ok < 0) {
1927 goto error;
1928 }
1929 }
1930 return PyModule_Add(mod, "features", list);
1931
1932 error:
1933 Py_DECREF(list);
1934 return -1;
1935 }
1936 #endif
1937
1938 static void
pyexpat_capsule_destructor(PyObject * capsule)1939 pyexpat_capsule_destructor(PyObject *capsule)
1940 {
1941 void *p = PyCapsule_GetPointer(capsule, PyExpat_CAPSULE_NAME);
1942 if (p == NULL) {
1943 PyErr_WriteUnraisable(capsule);
1944 return;
1945 }
1946 PyMem_Free(p);
1947 }
1948
1949
1950 static int
pyexpat_exec(PyObject * mod)1951 pyexpat_exec(PyObject *mod)
1952 {
1953 pyexpat_state *state = pyexpat_get_state(mod);
1954 state->str_read = PyUnicode_InternFromString("read");
1955 if (state->str_read == NULL) {
1956 return -1;
1957 }
1958 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1959 mod, &_xml_parse_type_spec, NULL);
1960
1961 if (state->xml_parse_type == NULL) {
1962 return -1;
1963 }
1964
1965 if (init_handler_descrs(state) < 0) {
1966 return -1;
1967 }
1968 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1969 NULL, NULL);
1970 if (state->error == NULL) {
1971 return -1;
1972 }
1973
1974 /* Add some symbolic constants to the module */
1975
1976 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1977 return -1;
1978 }
1979
1980 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1981 return -1;
1982 }
1983
1984 if (PyModule_AddObjectRef(mod, "XMLParserType",
1985 (PyObject *) state->xml_parse_type) < 0) {
1986 return -1;
1987 }
1988
1989 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1990 XML_ExpatVersion()) < 0) {
1991 return -1;
1992 }
1993 {
1994 XML_Expat_Version info = XML_ExpatVersionInfo();
1995 PyObject *versionInfo = Py_BuildValue("(iii)",
1996 info.major,
1997 info.minor,
1998 info.micro);
1999 if (PyModule_Add(mod, "version_info", versionInfo) < 0) {
2000 return -1;
2001 }
2002 }
2003 /* XXX When Expat supports some way of figuring out how it was
2004 compiled, this should check and set native_encoding
2005 appropriately.
2006 */
2007 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
2008 return -1;
2009 }
2010
2011 if (add_errors_module(mod) < 0) {
2012 return -1;
2013 }
2014
2015 if (add_model_module(mod) < 0) {
2016 return -1;
2017 }
2018
2019 #if XML_COMBINED_VERSION > 19505
2020 if (add_features(mod) < 0) {
2021 return -1;
2022 }
2023 #endif
2024
2025 #define MYCONST(c) do { \
2026 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
2027 return -1; \
2028 } \
2029 } while(0)
2030
2031 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2032 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2033 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2034 #undef MYCONST
2035
2036 struct PyExpat_CAPI *capi = PyMem_Malloc(sizeof(*capi));
2037 if (capi == NULL) {
2038 PyErr_NoMemory();
2039 return -1;
2040 }
2041 /* initialize pyexpat dispatch table */
2042 capi->size = sizeof(*capi);
2043 capi->magic = PyExpat_CAPI_MAGIC;
2044 capi->MAJOR_VERSION = XML_MAJOR_VERSION;
2045 capi->MINOR_VERSION = XML_MINOR_VERSION;
2046 capi->MICRO_VERSION = XML_MICRO_VERSION;
2047 capi->ErrorString = XML_ErrorString;
2048 capi->GetErrorCode = XML_GetErrorCode;
2049 capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
2050 capi->GetErrorLineNumber = XML_GetErrorLineNumber;
2051 capi->Parse = XML_Parse;
2052 capi->ParserCreate_MM = XML_ParserCreate_MM;
2053 capi->ParserFree = XML_ParserFree;
2054 capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
2055 capi->SetCommentHandler = XML_SetCommentHandler;
2056 capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2057 capi->SetElementHandler = XML_SetElementHandler;
2058 capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2059 capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2060 capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2061 capi->SetUserData = XML_SetUserData;
2062 capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
2063 capi->SetEncoding = XML_SetEncoding;
2064 capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
2065 #if XML_COMBINED_VERSION >= 20100
2066 capi->SetHashSalt = XML_SetHashSalt;
2067 #else
2068 capi->SetHashSalt = NULL;
2069 #endif
2070 #if XML_COMBINED_VERSION >= 20600
2071 capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
2072 #else
2073 capi->SetReparseDeferralEnabled = NULL;
2074 #endif
2075
2076 /* export using capsule */
2077 PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
2078 pyexpat_capsule_destructor);
2079 if (capi_object == NULL) {
2080 PyMem_Free(capi);
2081 return -1;
2082 }
2083
2084 if (PyModule_Add(mod, "expat_CAPI", capi_object) < 0) {
2085 return -1;
2086 }
2087
2088 return 0;
2089 }
2090
2091 static int
pyexpat_traverse(PyObject * module,visitproc visit,void * arg)2092 pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
2093 {
2094 pyexpat_state *state = pyexpat_get_state(module);
2095 Py_VISIT(state->xml_parse_type);
2096 Py_VISIT(state->error);
2097 Py_VISIT(state->str_read);
2098 return 0;
2099 }
2100
2101 static int
pyexpat_clear(PyObject * module)2102 pyexpat_clear(PyObject *module)
2103 {
2104 pyexpat_state *state = pyexpat_get_state(module);
2105 Py_CLEAR(state->xml_parse_type);
2106 Py_CLEAR(state->error);
2107 Py_CLEAR(state->str_read);
2108 return 0;
2109 }
2110
2111 static void
pyexpat_free(void * module)2112 pyexpat_free(void *module)
2113 {
2114 pyexpat_clear((PyObject *)module);
2115 }
2116
2117 static PyModuleDef_Slot pyexpat_slots[] = {
2118 {Py_mod_exec, pyexpat_exec},
2119 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2120 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2121 {0, NULL}
2122 };
2123
2124 static struct PyModuleDef pyexpatmodule = {
2125 PyModuleDef_HEAD_INIT,
2126 .m_name = MODULE_NAME,
2127 .m_doc = pyexpat_module_documentation,
2128 .m_size = sizeof(pyexpat_state),
2129 .m_methods = pyexpat_methods,
2130 .m_slots = pyexpat_slots,
2131 .m_traverse = pyexpat_traverse,
2132 .m_clear = pyexpat_clear,
2133 .m_free = pyexpat_free
2134 };
2135
2136 PyMODINIT_FUNC
PyInit_pyexpat(void)2137 PyInit_pyexpat(void)
2138 {
2139 return PyModuleDef_Init(&pyexpatmodule);
2140 }
2141
2142 static void
clear_handlers(xmlparseobject * self,int initial)2143 clear_handlers(xmlparseobject *self, int initial)
2144 {
2145 int i = 0;
2146
2147 for (; handler_info[i].name != NULL; i++) {
2148 if (initial)
2149 self->handlers[i] = NULL;
2150 else {
2151 Py_CLEAR(self->handlers[i]);
2152 handler_info[i].setter(self->itself, NULL);
2153 }
2154 }
2155 }
2156
2157 static struct HandlerInfo handler_info[] = {
2158
2159 #define HANDLER_INFO(name) \
2160 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2161
2162 HANDLER_INFO(StartElementHandler)
2163 HANDLER_INFO(EndElementHandler)
2164 HANDLER_INFO(ProcessingInstructionHandler)
2165 HANDLER_INFO(CharacterDataHandler)
2166 HANDLER_INFO(UnparsedEntityDeclHandler)
2167 HANDLER_INFO(NotationDeclHandler)
2168 HANDLER_INFO(StartNamespaceDeclHandler)
2169 HANDLER_INFO(EndNamespaceDeclHandler)
2170 HANDLER_INFO(CommentHandler)
2171 HANDLER_INFO(StartCdataSectionHandler)
2172 HANDLER_INFO(EndCdataSectionHandler)
2173 HANDLER_INFO(DefaultHandler)
2174 HANDLER_INFO(DefaultHandlerExpand)
2175 HANDLER_INFO(NotStandaloneHandler)
2176 HANDLER_INFO(ExternalEntityRefHandler)
2177 HANDLER_INFO(StartDoctypeDeclHandler)
2178 HANDLER_INFO(EndDoctypeDeclHandler)
2179 HANDLER_INFO(EntityDeclHandler)
2180 HANDLER_INFO(XmlDeclHandler)
2181 HANDLER_INFO(ElementDeclHandler)
2182 HANDLER_INFO(AttlistDeclHandler)
2183 #if XML_COMBINED_VERSION >= 19504
2184 HANDLER_INFO(SkippedEntityHandler)
2185 #endif
2186
2187 #undef HANDLER_INFO
2188
2189 {NULL, NULL, NULL} /* sentinel */
2190 };
2191