1 #include "Python.h"
2 #include <ctype.h>
3
4 #include "structmember.h" // PyMemberDef
5 #include "frameobject.h"
6 #include "expat.h"
7
8 #include "pyexpat.h"
9
10 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12 /*[clinic input]
13 module pyexpat
14 [clinic start generated code]*/
15 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
17 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
19 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
22 enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
37 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
40 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
44 #if XML_COMBINED_VERSION >= 19504
45 SkippedEntity,
46 #endif
47 _DummyDecl
48 };
49
50 static PyObject *ErrorObject;
51
52 /* ----------------------------------------------------- */
53
54 /* Declarations for objects of type xmlparser */
55
56 typedef struct {
57 PyObject_HEAD
58
59 XML_Parser itself;
60 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
62 int in_callback; /* Is a callback active? */
63 int ns_prefixes; /* Namespace-triplets mode? */
64 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
68 PyObject *intern; /* Dictionary to intern strings */
69 PyObject **handlers;
70 } xmlparseobject;
71
72 #include "clinic/pyexpat.c.h"
73
74 #define CHARACTER_DATA_BUFFER_SIZE 8192
75
76 static PyTypeObject Xmlparsetype;
77
78 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
79 typedef void* xmlhandler;
80
81 struct HandlerInfo {
82 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
85 PyGetSetDef getset;
86 };
87
88 static struct HandlerInfo handler_info[64];
89
90 /* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93 static int
set_error_attr(PyObject * err,const char * name,int value)94 set_error_attr(PyObject *err, const char *name, int value)
95 {
96 PyObject *v = PyLong_FromLong(value);
97
98 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
100 return 0;
101 }
102 Py_DECREF(v);
103 return 1;
104 }
105
106 /* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
109 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)110 set_error(xmlparseobject *self, enum XML_Error code)
111 {
112 PyObject *err;
113 PyObject *buffer;
114 XML_Parser parser = self->itself;
115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
117
118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
122 err = PyObject_CallOneArg(ErrorObject, buffer);
123 Py_DECREF(buffer);
124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
129 }
130 Py_XDECREF(err);
131 return NULL;
132 }
133
134 static int
have_handler(xmlparseobject * self,int type)135 have_handler(xmlparseobject *self, int type)
136 {
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139 }
140
141 /* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
144 static PyObject *
conv_string_to_unicode(const XML_Char * str)145 conv_string_to_unicode(const XML_Char *str)
146 {
147 /* XXX currently this code assumes that XML_Char is 8-bit,
148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
151 Py_RETURN_NONE;
152 }
153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
154 }
155
156 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)157 conv_string_len_to_unicode(const XML_Char *str, int len)
158 {
159 /* XXX currently this code assumes that XML_Char is 8-bit,
160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
163 Py_RETURN_NONE;
164 }
165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
166 }
167
168 /* Callback routines */
169
170 static void clear_handlers(xmlparseobject *self, int initial);
171
172 /* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)176 error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181 {
182 return 0;
183 }
184
185 /* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)192 noop_character_data_handler(void *userData, const XML_Char *data, int len)
193 {
194 /* Do nothing. */
195 }
196
197 static void
flag_error(xmlparseobject * self)198 flag_error(xmlparseobject *self)
199 {
200 clear_handlers(self, 0);
201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
203 }
204
205 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)206 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
207 xmlparseobject *self)
208 {
209 PyObject *res;
210
211 res = PyObject_Call(func, args, NULL);
212 if (res == NULL) {
213 _PyTraceback_Add(funcname, __FILE__, lineno);
214 XML_StopParser(self->itself, XML_FALSE);
215 }
216 return res;
217 }
218
219 static PyObject*
string_intern(xmlparseobject * self,const char * str)220 string_intern(xmlparseobject *self, const char* str)
221 {
222 PyObject *result = conv_string_to_unicode(str);
223 PyObject *value;
224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
226 return result;
227 if (!self->intern)
228 return result;
229 value = PyDict_GetItemWithError(self->intern, result);
230 if (!value) {
231 if (!PyErr_Occurred() &&
232 PyDict_SetItem(self->intern, result, result) == 0)
233 {
234 return result;
235 }
236 else {
237 Py_DECREF(result);
238 return NULL;
239 }
240 }
241 Py_INCREF(value);
242 Py_DECREF(result);
243 return value;
244 }
245
246 /* Return 0 on success, -1 on exception.
247 * flag_error() will be called before return if needed.
248 */
249 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)250 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
251 {
252 PyObject *args;
253 PyObject *temp;
254
255 if (!have_handler(self, CharacterData))
256 return -1;
257
258 args = PyTuple_New(1);
259 if (args == NULL)
260 return -1;
261 temp = (conv_string_len_to_unicode(buffer, len));
262 if (temp == NULL) {
263 Py_DECREF(args);
264 flag_error(self);
265 XML_SetCharacterDataHandler(self->itself,
266 noop_character_data_handler);
267 return -1;
268 }
269 PyTuple_SET_ITEM(args, 0, temp);
270 /* temp is now a borrowed reference; consider it unused. */
271 self->in_callback = 1;
272 temp = call_with_frame("CharacterData", __LINE__,
273 self->handlers[CharacterData], args, self);
274 /* temp is an owned reference again, or NULL */
275 self->in_callback = 0;
276 Py_DECREF(args);
277 if (temp == NULL) {
278 flag_error(self);
279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
281 return -1;
282 }
283 Py_DECREF(temp);
284 return 0;
285 }
286
287 static int
flush_character_buffer(xmlparseobject * self)288 flush_character_buffer(xmlparseobject *self)
289 {
290 int rc;
291 if (self->buffer == NULL || self->buffer_used == 0)
292 return 0;
293 rc = call_character_handler(self, self->buffer, self->buffer_used);
294 self->buffer_used = 0;
295 return rc;
296 }
297
298 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)299 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
300 {
301 xmlparseobject *self = (xmlparseobject *) userData;
302
303 if (PyErr_Occurred())
304 return;
305
306 if (self->buffer == NULL)
307 call_character_handler(self, data, len);
308 else {
309 if ((self->buffer_used + len) > self->buffer_size) {
310 if (flush_character_buffer(self) < 0)
311 return;
312 /* handler might have changed; drop the rest on the floor
313 * if there isn't a handler anymore
314 */
315 if (!have_handler(self, CharacterData))
316 return;
317 }
318 if (len > self->buffer_size) {
319 call_character_handler(self, data, len);
320 self->buffer_used = 0;
321 }
322 else {
323 memcpy(self->buffer + self->buffer_used,
324 data, len * sizeof(XML_Char));
325 self->buffer_used += len;
326 }
327 }
328 }
329
330 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])331 my_StartElementHandler(void *userData,
332 const XML_Char *name, const XML_Char *atts[])
333 {
334 xmlparseobject *self = (xmlparseobject *)userData;
335
336 if (have_handler(self, StartElement)) {
337 PyObject *container, *rv, *args;
338 int i, max;
339
340 if (PyErr_Occurred())
341 return;
342
343 if (flush_character_buffer(self) < 0)
344 return;
345 /* Set max to the number of slots filled in atts[]; max/2 is
346 * the number of attributes we need to process.
347 */
348 if (self->specified_attributes) {
349 max = XML_GetSpecifiedAttributeCount(self->itself);
350 }
351 else {
352 max = 0;
353 while (atts[max] != NULL)
354 max += 2;
355 }
356 /* Build the container. */
357 if (self->ordered_attributes)
358 container = PyList_New(max);
359 else
360 container = PyDict_New();
361 if (container == NULL) {
362 flag_error(self);
363 return;
364 }
365 for (i = 0; i < max; i += 2) {
366 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
367 PyObject *v;
368 if (n == NULL) {
369 flag_error(self);
370 Py_DECREF(container);
371 return;
372 }
373 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
374 if (v == NULL) {
375 flag_error(self);
376 Py_DECREF(container);
377 Py_DECREF(n);
378 return;
379 }
380 if (self->ordered_attributes) {
381 PyList_SET_ITEM(container, i, n);
382 PyList_SET_ITEM(container, i+1, v);
383 }
384 else if (PyDict_SetItem(container, n, v)) {
385 flag_error(self);
386 Py_DECREF(n);
387 Py_DECREF(v);
388 Py_DECREF(container);
389 return;
390 }
391 else {
392 Py_DECREF(n);
393 Py_DECREF(v);
394 }
395 }
396 args = string_intern(self, name);
397 if (args == NULL) {
398 Py_DECREF(container);
399 return;
400 }
401 args = Py_BuildValue("(NN)", args, container);
402 if (args == NULL) {
403 return;
404 }
405 /* Container is now a borrowed reference; ignore it. */
406 self->in_callback = 1;
407 rv = call_with_frame("StartElement", __LINE__,
408 self->handlers[StartElement], args, self);
409 self->in_callback = 0;
410 Py_DECREF(args);
411 if (rv == NULL) {
412 flag_error(self);
413 return;
414 }
415 Py_DECREF(rv);
416 }
417 }
418
419 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
420 RETURN, GETUSERDATA) \
421 static RC \
422 my_##NAME##Handler PARAMS {\
423 xmlparseobject *self = GETUSERDATA ; \
424 PyObject *args = NULL; \
425 PyObject *rv = NULL; \
426 INIT \
427 \
428 if (have_handler(self, NAME)) { \
429 if (PyErr_Occurred()) \
430 return RETURN; \
431 if (flush_character_buffer(self) < 0) \
432 return RETURN; \
433 args = Py_BuildValue PARAM_FORMAT ;\
434 if (!args) { flag_error(self); return RETURN;} \
435 self->in_callback = 1; \
436 rv = call_with_frame(#NAME,__LINE__, \
437 self->handlers[NAME], args, self); \
438 self->in_callback = 0; \
439 Py_DECREF(args); \
440 if (rv == NULL) { \
441 flag_error(self); \
442 return RETURN; \
443 } \
444 CONVERSION \
445 Py_DECREF(rv); \
446 } \
447 return RETURN; \
448 }
449
450 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
451 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
452 (xmlparseobject *)userData)
453
454 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
455 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
456 rc = PyLong_AsLong(rv);, rc, \
457 (xmlparseobject *)userData)
458
459 VOID_HANDLER(EndElement,
460 (void *userData, const XML_Char *name),
461 ("(N)", string_intern(self, name)))
462
463 VOID_HANDLER(ProcessingInstruction,
464 (void *userData,
465 const XML_Char *target,
466 const XML_Char *data),
467 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
468
469 VOID_HANDLER(UnparsedEntityDecl,
470 (void *userData,
471 const XML_Char *entityName,
472 const XML_Char *base,
473 const XML_Char *systemId,
474 const XML_Char *publicId,
475 const XML_Char *notationName),
476 ("(NNNNN)",
477 string_intern(self, entityName), string_intern(self, base),
478 string_intern(self, systemId), string_intern(self, publicId),
479 string_intern(self, notationName)))
480
481 VOID_HANDLER(EntityDecl,
482 (void *userData,
483 const XML_Char *entityName,
484 int is_parameter_entity,
485 const XML_Char *value,
486 int value_length,
487 const XML_Char *base,
488 const XML_Char *systemId,
489 const XML_Char *publicId,
490 const XML_Char *notationName),
491 ("NiNNNNN",
492 string_intern(self, entityName), is_parameter_entity,
493 (conv_string_len_to_unicode(value, value_length)),
494 string_intern(self, base), string_intern(self, systemId),
495 string_intern(self, publicId),
496 string_intern(self, notationName)))
497
498 VOID_HANDLER(XmlDecl,
499 (void *userData,
500 const XML_Char *version,
501 const XML_Char *encoding,
502 int standalone),
503 ("(O&O&i)",
504 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
505 standalone))
506
507 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))508 conv_content_model(XML_Content * const model,
509 PyObject *(*conv_string)(const XML_Char *))
510 {
511 PyObject *result = NULL;
512 PyObject *children = PyTuple_New(model->numchildren);
513 int i;
514
515 if (children != NULL) {
516 assert(model->numchildren < INT_MAX);
517 for (i = 0; i < (int)model->numchildren; ++i) {
518 PyObject *child = conv_content_model(&model->children[i],
519 conv_string);
520 if (child == NULL) {
521 Py_XDECREF(children);
522 return NULL;
523 }
524 PyTuple_SET_ITEM(children, i, child);
525 }
526 result = Py_BuildValue("(iiO&N)",
527 model->type, model->quant,
528 conv_string,model->name, children);
529 }
530 return result;
531 }
532
533 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)534 my_ElementDeclHandler(void *userData,
535 const XML_Char *name,
536 XML_Content *model)
537 {
538 xmlparseobject *self = (xmlparseobject *)userData;
539 PyObject *args = NULL;
540
541 if (have_handler(self, ElementDecl)) {
542 PyObject *rv = NULL;
543 PyObject *modelobj, *nameobj;
544
545 if (PyErr_Occurred())
546 return;
547
548 if (flush_character_buffer(self) < 0)
549 goto finally;
550 modelobj = conv_content_model(model, (conv_string_to_unicode));
551 if (modelobj == NULL) {
552 flag_error(self);
553 goto finally;
554 }
555 nameobj = string_intern(self, name);
556 if (nameobj == NULL) {
557 Py_DECREF(modelobj);
558 flag_error(self);
559 goto finally;
560 }
561 args = Py_BuildValue("NN", nameobj, modelobj);
562 if (args == NULL) {
563 flag_error(self);
564 goto finally;
565 }
566 self->in_callback = 1;
567 rv = call_with_frame("ElementDecl", __LINE__,
568 self->handlers[ElementDecl], args, self);
569 self->in_callback = 0;
570 if (rv == NULL) {
571 flag_error(self);
572 goto finally;
573 }
574 Py_DECREF(rv);
575 }
576 finally:
577 Py_XDECREF(args);
578 XML_FreeContentModel(self->itself, model);
579 return;
580 }
581
582 VOID_HANDLER(AttlistDecl,
583 (void *userData,
584 const XML_Char *elname,
585 const XML_Char *attname,
586 const XML_Char *att_type,
587 const XML_Char *dflt,
588 int isrequired),
589 ("(NNO&O&i)",
590 string_intern(self, elname), string_intern(self, attname),
591 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
592 isrequired))
593
594 #if XML_COMBINED_VERSION >= 19504
595 VOID_HANDLER(SkippedEntity,
596 (void *userData,
597 const XML_Char *entityName,
598 int is_parameter_entity),
599 ("Ni",
600 string_intern(self, entityName), is_parameter_entity))
601 #endif
602
603 VOID_HANDLER(NotationDecl,
604 (void *userData,
605 const XML_Char *notationName,
606 const XML_Char *base,
607 const XML_Char *systemId,
608 const XML_Char *publicId),
609 ("(NNNN)",
610 string_intern(self, notationName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId)))
612
613 VOID_HANDLER(StartNamespaceDecl,
614 (void *userData,
615 const XML_Char *prefix,
616 const XML_Char *uri),
617 ("(NN)",
618 string_intern(self, prefix), string_intern(self, uri)))
619
620 VOID_HANDLER(EndNamespaceDecl,
621 (void *userData,
622 const XML_Char *prefix),
623 ("(N)", string_intern(self, prefix)))
624
625 VOID_HANDLER(Comment,
626 (void *userData, const XML_Char *data),
627 ("(O&)", conv_string_to_unicode ,data))
628
629 VOID_HANDLER(StartCdataSection,
630 (void *userData),
631 ("()"))
632
633 VOID_HANDLER(EndCdataSection,
634 (void *userData),
635 ("()"))
636
637 VOID_HANDLER(Default,
638 (void *userData, const XML_Char *s, int len),
639 ("(N)", (conv_string_len_to_unicode(s,len))))
640
641 VOID_HANDLER(DefaultHandlerExpand,
642 (void *userData, const XML_Char *s, int len),
643 ("(N)", (conv_string_len_to_unicode(s,len))))
644 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
645
646 INT_HANDLER(NotStandalone,
647 (void *userData),
648 ("()"))
649
650 RC_HANDLER(int, ExternalEntityRef,
651 (XML_Parser parser,
652 const XML_Char *context,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId),
656 int rc=0;,
657 ("(O&NNN)",
658 conv_string_to_unicode ,context, string_intern(self, base),
659 string_intern(self, systemId), string_intern(self, publicId)),
660 rc = PyLong_AsLong(rv);, rc,
661 XML_GetUserData(parser))
662
663 /* XXX UnknownEncodingHandler */
664
665 VOID_HANDLER(StartDoctypeDecl,
666 (void *userData, const XML_Char *doctypeName,
667 const XML_Char *sysid, const XML_Char *pubid,
668 int has_internal_subset),
669 ("(NNNi)", string_intern(self, doctypeName),
670 string_intern(self, sysid), string_intern(self, pubid),
671 has_internal_subset))
672
673 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
674
675 /* ---------------------------------------------------------------- */
676 /*[clinic input]
677 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
678 [clinic start generated code]*/
679 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
680
681
682 static PyObject *
get_parse_result(xmlparseobject * self,int rv)683 get_parse_result(xmlparseobject *self, int rv)
684 {
685 if (PyErr_Occurred()) {
686 return NULL;
687 }
688 if (rv == 0) {
689 return set_error(self, XML_GetErrorCode(self->itself));
690 }
691 if (flush_character_buffer(self) < 0) {
692 return NULL;
693 }
694 return PyLong_FromLong(rv);
695 }
696
697 #define MAX_CHUNK_SIZE (1 << 20)
698
699 /*[clinic input]
700 pyexpat.xmlparser.Parse
701
702 data: object
703 isfinal: bool(accept={int}) = False
704 /
705
706 Parse XML data.
707
708 `isfinal' should be true at end of input.
709 [clinic start generated code]*/
710
711 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyObject * data,int isfinal)712 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
713 int isfinal)
714 /*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
715 {
716 const char *s;
717 Py_ssize_t slen;
718 Py_buffer view;
719 int rc;
720
721 if (PyUnicode_Check(data)) {
722 view.buf = NULL;
723 s = PyUnicode_AsUTF8AndSize(data, &slen);
724 if (s == NULL)
725 return NULL;
726 /* Explicitly set UTF-8 encoding. Return code ignored. */
727 (void)XML_SetEncoding(self->itself, "utf-8");
728 }
729 else {
730 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
731 return NULL;
732 s = view.buf;
733 slen = view.len;
734 }
735
736 while (slen > MAX_CHUNK_SIZE) {
737 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
738 if (!rc)
739 goto done;
740 s += MAX_CHUNK_SIZE;
741 slen -= MAX_CHUNK_SIZE;
742 }
743 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
744 assert(slen <= INT_MAX);
745 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
746
747 done:
748 if (view.buf != NULL)
749 PyBuffer_Release(&view);
750 return get_parse_result(self, rc);
751 }
752
753 /* File reading copied from cPickle */
754
755 #define BUF_SIZE 2048
756
757 static int
readinst(char * buf,int buf_size,PyObject * meth)758 readinst(char *buf, int buf_size, PyObject *meth)
759 {
760 PyObject *str;
761 Py_ssize_t len;
762 const char *ptr;
763
764 str = PyObject_CallFunction(meth, "n", buf_size);
765 if (str == NULL)
766 goto error;
767
768 if (PyBytes_Check(str))
769 ptr = PyBytes_AS_STRING(str);
770 else if (PyByteArray_Check(str))
771 ptr = PyByteArray_AS_STRING(str);
772 else {
773 PyErr_Format(PyExc_TypeError,
774 "read() did not return a bytes object (type=%.400s)",
775 Py_TYPE(str)->tp_name);
776 goto error;
777 }
778 len = Py_SIZE(str);
779 if (len > buf_size) {
780 PyErr_Format(PyExc_ValueError,
781 "read() returned too much data: "
782 "%i bytes requested, %zd returned",
783 buf_size, len);
784 goto error;
785 }
786 memcpy(buf, ptr, len);
787 Py_DECREF(str);
788 /* len <= buf_size <= INT_MAX */
789 return (int)len;
790
791 error:
792 Py_XDECREF(str);
793 return -1;
794 }
795
796 /*[clinic input]
797 pyexpat.xmlparser.ParseFile
798
799 file: object
800 /
801
802 Parse XML data from file-like object.
803 [clinic start generated code]*/
804
805 static PyObject *
pyexpat_xmlparser_ParseFile(xmlparseobject * self,PyObject * file)806 pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
807 /*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
808 {
809 int rv = 1;
810 PyObject *readmethod = NULL;
811 _Py_IDENTIFIER(read);
812
813 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
814 return NULL;
815 }
816 if (readmethod == NULL) {
817 PyErr_SetString(PyExc_TypeError,
818 "argument must have 'read' attribute");
819 return NULL;
820 }
821 for (;;) {
822 int bytes_read;
823 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
824 if (buf == NULL) {
825 Py_XDECREF(readmethod);
826 return get_parse_result(self, 0);
827 }
828
829 bytes_read = readinst(buf, BUF_SIZE, readmethod);
830 if (bytes_read < 0) {
831 Py_DECREF(readmethod);
832 return NULL;
833 }
834 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
835 if (PyErr_Occurred()) {
836 Py_XDECREF(readmethod);
837 return NULL;
838 }
839
840 if (!rv || bytes_read == 0)
841 break;
842 }
843 Py_XDECREF(readmethod);
844 return get_parse_result(self, rv);
845 }
846
847 /*[clinic input]
848 pyexpat.xmlparser.SetBase
849
850 base: str
851 /
852
853 Set the base URL for the parser.
854 [clinic start generated code]*/
855
856 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)857 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
858 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
859 {
860 if (!XML_SetBase(self->itself, base)) {
861 return PyErr_NoMemory();
862 }
863 Py_RETURN_NONE;
864 }
865
866 /*[clinic input]
867 pyexpat.xmlparser.GetBase
868
869 Return base URL string for the parser.
870 [clinic start generated code]*/
871
872 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)873 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
874 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
875 {
876 return Py_BuildValue("z", XML_GetBase(self->itself));
877 }
878
879 /*[clinic input]
880 pyexpat.xmlparser.GetInputContext
881
882 Return the untranslated text of the input that caused the current event.
883
884 If the event was generated by a large amount of text (such as a start tag
885 for an element with many attributes), not all of the text may be available.
886 [clinic start generated code]*/
887
888 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)889 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
890 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
891 {
892 if (self->in_callback) {
893 int offset, size;
894 const char *buffer
895 = XML_GetInputContext(self->itself, &offset, &size);
896
897 if (buffer != NULL)
898 return PyBytes_FromStringAndSize(buffer + offset,
899 size - offset);
900 else
901 Py_RETURN_NONE;
902 }
903 else
904 Py_RETURN_NONE;
905 }
906
907 /*[clinic input]
908 pyexpat.xmlparser.ExternalEntityParserCreate
909
910 context: str(accept={str, NoneType})
911 encoding: str = NULL
912 /
913
914 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
915 [clinic start generated code]*/
916
917 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,const char * context,const char * encoding)918 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
919 const char *context,
920 const char *encoding)
921 /*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
922 {
923 xmlparseobject *new_parser;
924 int i;
925
926 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
927 if (new_parser == NULL)
928 return NULL;
929 new_parser->buffer_size = self->buffer_size;
930 new_parser->buffer_used = 0;
931 new_parser->buffer = NULL;
932 new_parser->ordered_attributes = self->ordered_attributes;
933 new_parser->specified_attributes = self->specified_attributes;
934 new_parser->in_callback = 0;
935 new_parser->ns_prefixes = self->ns_prefixes;
936 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
937 encoding);
938 new_parser->handlers = 0;
939 new_parser->intern = self->intern;
940 Py_XINCREF(new_parser->intern);
941
942 if (self->buffer != NULL) {
943 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
944 if (new_parser->buffer == NULL) {
945 Py_DECREF(new_parser);
946 return PyErr_NoMemory();
947 }
948 }
949 if (!new_parser->itself) {
950 Py_DECREF(new_parser);
951 return PyErr_NoMemory();
952 }
953
954 XML_SetUserData(new_parser->itself, (void *)new_parser);
955
956 /* allocate and clear handlers first */
957 for (i = 0; handler_info[i].name != NULL; i++)
958 /* do nothing */;
959
960 new_parser->handlers = PyMem_New(PyObject *, i);
961 if (!new_parser->handlers) {
962 Py_DECREF(new_parser);
963 return PyErr_NoMemory();
964 }
965 clear_handlers(new_parser, 1);
966
967 /* then copy handlers from self */
968 for (i = 0; handler_info[i].name != NULL; i++) {
969 PyObject *handler = self->handlers[i];
970 if (handler != NULL) {
971 Py_INCREF(handler);
972 new_parser->handlers[i] = handler;
973 handler_info[i].setter(new_parser->itself,
974 handler_info[i].handler);
975 }
976 }
977
978 PyObject_GC_Track(new_parser);
979 return (PyObject *)new_parser;
980 }
981
982 /*[clinic input]
983 pyexpat.xmlparser.SetParamEntityParsing
984
985 flag: int
986 /
987
988 Controls parsing of parameter entities (including the external DTD subset).
989
990 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
991 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
992 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
993 was successful.
994 [clinic start generated code]*/
995
996 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)997 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
998 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
999 {
1000 flag = XML_SetParamEntityParsing(self->itself, flag);
1001 return PyLong_FromLong(flag);
1002 }
1003
1004
1005 #if XML_COMBINED_VERSION >= 19505
1006 /*[clinic input]
1007 pyexpat.xmlparser.UseForeignDTD
1008
1009 flag: bool = True
1010 /
1011
1012 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1013
1014 This readily allows the use of a 'default' document type controlled by the
1015 application, while still getting the advantage of providing document type
1016 information to the parser. 'flag' defaults to True if not provided.
1017 [clinic start generated code]*/
1018
1019 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,int flag)1020 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
1021 /*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
1022 {
1023 enum XML_Error rc;
1024
1025 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1026 if (rc != XML_ERROR_NONE) {
1027 return set_error(self, rc);
1028 }
1029 Py_RETURN_NONE;
1030 }
1031 #endif
1032
1033 static struct PyMethodDef xmlparse_methods[] = {
1034 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1035 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1036 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1037 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1038 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1039 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1040 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1041 #if XML_COMBINED_VERSION >= 19505
1042 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1043 #endif
1044 {NULL, NULL} /* sentinel */
1045 };
1046
1047 /* ---------- */
1048
1049
1050
1051 /* pyexpat international encoding support.
1052 Make it as simple as possible.
1053 */
1054
1055 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1056 PyUnknownEncodingHandler(void *encodingHandlerData,
1057 const XML_Char *name,
1058 XML_Encoding *info)
1059 {
1060 static unsigned char template_buffer[256] = {0};
1061 PyObject* u;
1062 int i;
1063 const void *data;
1064 unsigned int kind;
1065
1066 if (PyErr_Occurred())
1067 return XML_STATUS_ERROR;
1068
1069 if (template_buffer[1] == 0) {
1070 for (i = 0; i < 256; i++)
1071 template_buffer[i] = i;
1072 }
1073
1074 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1075 if (u == NULL || PyUnicode_READY(u)) {
1076 Py_XDECREF(u);
1077 return XML_STATUS_ERROR;
1078 }
1079
1080 if (PyUnicode_GET_LENGTH(u) != 256) {
1081 Py_DECREF(u);
1082 PyErr_SetString(PyExc_ValueError,
1083 "multi-byte encodings are not supported");
1084 return XML_STATUS_ERROR;
1085 }
1086
1087 kind = PyUnicode_KIND(u);
1088 data = PyUnicode_DATA(u);
1089 for (i = 0; i < 256; i++) {
1090 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1091 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1092 info->map[i] = ch;
1093 else
1094 info->map[i] = -1;
1095 }
1096
1097 info->data = NULL;
1098 info->convert = NULL;
1099 info->release = NULL;
1100 Py_DECREF(u);
1101
1102 return XML_STATUS_OK;
1103 }
1104
1105
1106 static PyObject *
newxmlparseobject(const char * encoding,const char * namespace_separator,PyObject * intern)1107 newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
1108 {
1109 int i;
1110 xmlparseobject *self;
1111
1112 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1113 if (self == NULL)
1114 return NULL;
1115
1116 self->buffer = NULL;
1117 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1118 self->buffer_used = 0;
1119 self->ordered_attributes = 0;
1120 self->specified_attributes = 0;
1121 self->in_callback = 0;
1122 self->ns_prefixes = 0;
1123 self->handlers = NULL;
1124 self->intern = intern;
1125 Py_XINCREF(self->intern);
1126
1127 /* namespace_separator is either NULL or contains one char + \0 */
1128 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1129 namespace_separator);
1130 if (self->itself == NULL) {
1131 PyErr_SetString(PyExc_RuntimeError,
1132 "XML_ParserCreate failed");
1133 Py_DECREF(self);
1134 return NULL;
1135 }
1136 #if XML_COMBINED_VERSION >= 20100
1137 /* This feature was added upstream in libexpat 2.1.0. */
1138 XML_SetHashSalt(self->itself,
1139 (unsigned long)_Py_HashSecret.expat.hashsalt);
1140 #endif
1141 XML_SetUserData(self->itself, (void *)self);
1142 XML_SetUnknownEncodingHandler(self->itself,
1143 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1144
1145 for (i = 0; handler_info[i].name != NULL; i++)
1146 /* do nothing */;
1147
1148 self->handlers = PyMem_New(PyObject *, i);
1149 if (!self->handlers) {
1150 Py_DECREF(self);
1151 return PyErr_NoMemory();
1152 }
1153 clear_handlers(self, 1);
1154
1155 PyObject_GC_Track(self);
1156 return (PyObject*)self;
1157 }
1158
1159
1160 static void
xmlparse_dealloc(xmlparseobject * self)1161 xmlparse_dealloc(xmlparseobject *self)
1162 {
1163 int i;
1164 PyObject_GC_UnTrack(self);
1165 if (self->itself != NULL)
1166 XML_ParserFree(self->itself);
1167 self->itself = NULL;
1168
1169 if (self->handlers != NULL) {
1170 for (i = 0; handler_info[i].name != NULL; i++)
1171 Py_CLEAR(self->handlers[i]);
1172 PyMem_Free(self->handlers);
1173 self->handlers = NULL;
1174 }
1175 if (self->buffer != NULL) {
1176 PyMem_Free(self->buffer);
1177 self->buffer = NULL;
1178 }
1179 Py_XDECREF(self->intern);
1180 PyObject_GC_Del(self);
1181 }
1182
1183
1184 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1185 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1186 {
1187 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1188 int handlernum = (int)(hi - handler_info);
1189 PyObject *result = self->handlers[handlernum];
1190 if (result == NULL)
1191 result = Py_None;
1192 Py_INCREF(result);
1193 return result;
1194 }
1195
1196 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1197 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1198 {
1199 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1200 int handlernum = (int)(hi - handler_info);
1201 if (v == NULL) {
1202 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1203 return -1;
1204 }
1205 if (handlernum == CharacterData) {
1206 /* If we're changing the character data handler, flush all
1207 * cached data with the old handler. Not sure there's a
1208 * "right" thing to do, though, but this probably won't
1209 * happen.
1210 */
1211 if (flush_character_buffer(self) < 0)
1212 return -1;
1213 }
1214
1215 xmlhandler c_handler = NULL;
1216 if (v == Py_None) {
1217 /* If this is the character data handler, and a character
1218 data handler is already active, we need to be more
1219 careful. What we can safely do is replace the existing
1220 character data handler callback function with a no-op
1221 function that will refuse to call Python. The downside
1222 is that this doesn't completely remove the character
1223 data handler from the C layer if there's any callback
1224 active, so Expat does a little more work than it
1225 otherwise would, but that's really an odd case. A more
1226 elaborate system of handlers and state could remove the
1227 C handler more effectively. */
1228 if (handlernum == CharacterData && self->in_callback)
1229 c_handler = noop_character_data_handler;
1230 v = NULL;
1231 }
1232 else if (v != NULL) {
1233 Py_INCREF(v);
1234 c_handler = handler_info[handlernum].handler;
1235 }
1236 Py_XSETREF(self->handlers[handlernum], v);
1237 handler_info[handlernum].setter(self->itself, c_handler);
1238 return 0;
1239 }
1240
1241 #define INT_GETTER(name) \
1242 static PyObject * \
1243 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1244 { \
1245 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1246 }
1247 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1248 INT_GETTER(ErrorLineNumber)
1249 INT_GETTER(ErrorColumnNumber)
1250 INT_GETTER(ErrorByteIndex)
1251 INT_GETTER(CurrentLineNumber)
1252 INT_GETTER(CurrentColumnNumber)
1253 INT_GETTER(CurrentByteIndex)
1254
1255 #undef INT_GETTER
1256
1257 static PyObject *
1258 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1259 {
1260 return PyBool_FromLong(self->buffer != NULL);
1261 }
1262
1263 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1264 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1265 {
1266 if (v == NULL) {
1267 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1268 return -1;
1269 }
1270 int b = PyObject_IsTrue(v);
1271 if (b < 0)
1272 return -1;
1273 if (b) {
1274 if (self->buffer == NULL) {
1275 self->buffer = PyMem_Malloc(self->buffer_size);
1276 if (self->buffer == NULL) {
1277 PyErr_NoMemory();
1278 return -1;
1279 }
1280 self->buffer_used = 0;
1281 }
1282 }
1283 else if (self->buffer != NULL) {
1284 if (flush_character_buffer(self) < 0)
1285 return -1;
1286 PyMem_Free(self->buffer);
1287 self->buffer = NULL;
1288 }
1289 return 0;
1290 }
1291
1292 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1293 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1294 {
1295 return PyLong_FromLong((long) self->buffer_size);
1296 }
1297
1298 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1299 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1300 {
1301 if (v == NULL) {
1302 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1303 return -1;
1304 }
1305 long new_buffer_size;
1306 if (!PyLong_Check(v)) {
1307 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1308 return -1;
1309 }
1310
1311 new_buffer_size = PyLong_AsLong(v);
1312 if (new_buffer_size <= 0) {
1313 if (!PyErr_Occurred())
1314 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1315 return -1;
1316 }
1317
1318 /* trivial case -- no change */
1319 if (new_buffer_size == self->buffer_size) {
1320 return 0;
1321 }
1322
1323 /* check maximum */
1324 if (new_buffer_size > INT_MAX) {
1325 char errmsg[100];
1326 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1327 PyErr_SetString(PyExc_ValueError, errmsg);
1328 return -1;
1329 }
1330
1331 if (self->buffer != NULL) {
1332 /* there is already a buffer */
1333 if (self->buffer_used != 0) {
1334 if (flush_character_buffer(self) < 0) {
1335 return -1;
1336 }
1337 }
1338 /* free existing buffer */
1339 PyMem_Free(self->buffer);
1340 }
1341 self->buffer = PyMem_Malloc(new_buffer_size);
1342 if (self->buffer == NULL) {
1343 PyErr_NoMemory();
1344 return -1;
1345 }
1346 self->buffer_size = new_buffer_size;
1347 return 0;
1348 }
1349
1350 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1351 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1352 {
1353 return PyLong_FromLong((long) self->buffer_used);
1354 }
1355
1356 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1357 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1358 {
1359 return PyBool_FromLong(self->ns_prefixes);
1360 }
1361
1362 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1363 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1364 {
1365 if (v == NULL) {
1366 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1367 return -1;
1368 }
1369 int b = PyObject_IsTrue(v);
1370 if (b < 0)
1371 return -1;
1372 self->ns_prefixes = b;
1373 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1374 return 0;
1375 }
1376
1377 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1378 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1379 {
1380 return PyBool_FromLong(self->ordered_attributes);
1381 }
1382
1383 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1384 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1385 {
1386 if (v == NULL) {
1387 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1388 return -1;
1389 }
1390 int b = PyObject_IsTrue(v);
1391 if (b < 0)
1392 return -1;
1393 self->ordered_attributes = b;
1394 return 0;
1395 }
1396
1397 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1398 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1399 {
1400 return PyBool_FromLong((long) self->specified_attributes);
1401 }
1402
1403 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1404 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1405 {
1406 if (v == NULL) {
1407 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1408 return -1;
1409 }
1410 int b = PyObject_IsTrue(v);
1411 if (b < 0)
1412 return -1;
1413 self->specified_attributes = b;
1414 return 0;
1415 }
1416
1417 static PyMemberDef xmlparse_members[] = {
1418 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1419 {NULL}
1420 };
1421
1422 #define XMLPARSE_GETTER_DEF(name) \
1423 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1424 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1425 {#name, (getter)xmlparse_##name##_getter, \
1426 (setter)xmlparse_##name##_setter, NULL},
1427
1428 static PyGetSetDef xmlparse_getsetlist[] = {
1429 XMLPARSE_GETTER_DEF(ErrorCode)
1430 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1431 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1432 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1433 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1434 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1435 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1436 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1437 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1438 XMLPARSE_GETTER_DEF(buffer_used)
1439 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1440 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1441 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1442 {NULL},
1443 };
1444
1445 #undef XMLPARSE_GETTER_DEF
1446 #undef XMLPARSE_GETTER_SETTER_DEF
1447
1448 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1449 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1450 {
1451 int i;
1452 for (i = 0; handler_info[i].name != NULL; i++)
1453 Py_VISIT(op->handlers[i]);
1454 return 0;
1455 }
1456
1457 static int
xmlparse_clear(xmlparseobject * op)1458 xmlparse_clear(xmlparseobject *op)
1459 {
1460 clear_handlers(op, 0);
1461 Py_CLEAR(op->intern);
1462 return 0;
1463 }
1464
1465 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1466
1467 static PyTypeObject Xmlparsetype = {
1468 PyVarObject_HEAD_INIT(NULL, 0)
1469 "pyexpat.xmlparser", /*tp_name*/
1470 sizeof(xmlparseobject), /*tp_basicsize*/
1471 0, /*tp_itemsize*/
1472 /* methods */
1473 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1474 0, /*tp_vectorcall_offset*/
1475 0, /*tp_getattr*/
1476 0, /*tp_setattr*/
1477 0, /*tp_as_async*/
1478 (reprfunc)0, /*tp_repr*/
1479 0, /*tp_as_number*/
1480 0, /*tp_as_sequence*/
1481 0, /*tp_as_mapping*/
1482 (hashfunc)0, /*tp_hash*/
1483 (ternaryfunc)0, /*tp_call*/
1484 (reprfunc)0, /*tp_str*/
1485 (getattrofunc)0, /* tp_getattro */
1486 (setattrofunc)0, /* tp_setattro */
1487 0, /* tp_as_buffer */
1488 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1489 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1490 (traverseproc)xmlparse_traverse, /* tp_traverse */
1491 (inquiry)xmlparse_clear, /* tp_clear */
1492 0, /* tp_richcompare */
1493 0, /* tp_weaklistoffset */
1494 0, /* tp_iter */
1495 0, /* tp_iternext */
1496 xmlparse_methods, /* tp_methods */
1497 xmlparse_members, /* tp_members */
1498 xmlparse_getsetlist, /* tp_getset */
1499 };
1500
1501 /* End of code for xmlparser objects */
1502 /* -------------------------------------------------------- */
1503
1504 /*[clinic input]
1505 pyexpat.ParserCreate
1506
1507 encoding: str(accept={str, NoneType}) = None
1508 namespace_separator: str(accept={str, NoneType}) = None
1509 intern: object = NULL
1510
1511 Return a new XML parser object.
1512 [clinic start generated code]*/
1513
1514 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1515 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1516 const char *namespace_separator, PyObject *intern)
1517 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1518 {
1519 PyObject *result;
1520 int intern_decref = 0;
1521
1522 if (namespace_separator != NULL
1523 && strlen(namespace_separator) > 1) {
1524 PyErr_SetString(PyExc_ValueError,
1525 "namespace_separator must be at most one"
1526 " character, omitted, or None");
1527 return NULL;
1528 }
1529 /* Explicitly passing None means no interning is desired.
1530 Not passing anything means that a new dictionary is used. */
1531 if (intern == Py_None)
1532 intern = NULL;
1533 else if (intern == NULL) {
1534 intern = PyDict_New();
1535 if (!intern)
1536 return NULL;
1537 intern_decref = 1;
1538 }
1539 else if (!PyDict_Check(intern)) {
1540 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1541 return NULL;
1542 }
1543
1544 result = newxmlparseobject(encoding, namespace_separator, intern);
1545 if (intern_decref) {
1546 Py_DECREF(intern);
1547 }
1548 return result;
1549 }
1550
1551 /*[clinic input]
1552 pyexpat.ErrorString
1553
1554 code: long
1555 /
1556
1557 Returns string error for given number.
1558 [clinic start generated code]*/
1559
1560 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1561 pyexpat_ErrorString_impl(PyObject *module, long code)
1562 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1563 {
1564 return Py_BuildValue("z", XML_ErrorString((int)code));
1565 }
1566
1567 /* List of methods defined in the module */
1568
1569 static struct PyMethodDef pyexpat_methods[] = {
1570 PYEXPAT_PARSERCREATE_METHODDEF
1571 PYEXPAT_ERRORSTRING_METHODDEF
1572 {NULL, NULL} /* sentinel */
1573 };
1574
1575 /* Module docstring */
1576
1577 PyDoc_STRVAR(pyexpat_module_documentation,
1578 "Python wrapper for Expat parser.");
1579
1580 /* Initialization function for the module */
1581
1582 #ifndef MODULE_NAME
1583 #define MODULE_NAME "pyexpat"
1584 #endif
1585
1586 #ifndef MODULE_INITFUNC
1587 #define MODULE_INITFUNC PyInit_pyexpat
1588 #endif
1589
1590 static struct PyModuleDef pyexpatmodule = {
1591 PyModuleDef_HEAD_INIT,
1592 MODULE_NAME,
1593 pyexpat_module_documentation,
1594 -1,
1595 pyexpat_methods,
1596 NULL,
1597 NULL,
1598 NULL,
1599 NULL
1600 };
1601
init_handler_descrs(void)1602 static int init_handler_descrs(void)
1603 {
1604 int i;
1605 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1606 for (i = 0; handler_info[i].name != NULL; i++) {
1607 struct HandlerInfo *hi = &handler_info[i];
1608 hi->getset.name = hi->name;
1609 hi->getset.get = (getter)xmlparse_handler_getter;
1610 hi->getset.set = (setter)xmlparse_handler_setter;
1611 hi->getset.closure = &handler_info[i];
1612
1613 PyObject *descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
1614 if (descr == NULL)
1615 return -1;
1616
1617 if (PyDict_GetItemWithError(Xmlparsetype.tp_dict, PyDescr_NAME(descr))) {
1618 Py_DECREF(descr);
1619 continue;
1620 }
1621 else if (PyErr_Occurred()) {
1622 Py_DECREF(descr);
1623 return -1;
1624 }
1625 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1626 Py_DECREF(descr);
1627 return -1;
1628 }
1629 Py_DECREF(descr);
1630 }
1631 return 0;
1632 }
1633
1634 PyMODINIT_FUNC
MODULE_INITFUNC(void)1635 MODULE_INITFUNC(void)
1636 {
1637 PyObject *m, *d;
1638 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
1639 PyObject *errors_module;
1640 PyObject *modelmod_name;
1641 PyObject *model_module;
1642 PyObject *tmpnum, *tmpstr;
1643 PyObject *codes_dict;
1644 PyObject *rev_codes_dict;
1645 int res;
1646 static struct PyExpat_CAPI capi;
1647 PyObject *capi_object;
1648
1649 if (errmod_name == NULL)
1650 return NULL;
1651 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
1652 if (modelmod_name == NULL)
1653 return NULL;
1654
1655 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
1656 return NULL;
1657
1658 /* Create the module and add the functions */
1659 m = PyModule_Create(&pyexpatmodule);
1660 if (m == NULL)
1661 return NULL;
1662
1663 /* Add some symbolic constants to the module */
1664 if (ErrorObject == NULL) {
1665 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1666 NULL, NULL);
1667 if (ErrorObject == NULL)
1668 return NULL;
1669 }
1670 Py_INCREF(ErrorObject);
1671 PyModule_AddObject(m, "error", ErrorObject);
1672 Py_INCREF(ErrorObject);
1673 PyModule_AddObject(m, "ExpatError", ErrorObject);
1674 Py_INCREF(&Xmlparsetype);
1675 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1676
1677 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1678 XML_ExpatVersion());
1679 {
1680 XML_Expat_Version info = XML_ExpatVersionInfo();
1681 PyModule_AddObject(m, "version_info",
1682 Py_BuildValue("(iii)", info.major,
1683 info.minor, info.micro));
1684 }
1685 /* XXX When Expat supports some way of figuring out how it was
1686 compiled, this should check and set native_encoding
1687 appropriately.
1688 */
1689 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1690
1691 d = PyModule_GetDict(m);
1692 if (d == NULL) {
1693 Py_DECREF(m);
1694 return NULL;
1695 }
1696 errors_module = PyDict_GetItemWithError(d, errmod_name);
1697 if (errors_module == NULL && !PyErr_Occurred()) {
1698 errors_module = PyModule_New(MODULE_NAME ".errors");
1699 if (errors_module != NULL) {
1700 _PyImport_SetModule(errmod_name, errors_module);
1701 /* gives away the reference to errors_module */
1702 PyModule_AddObject(m, "errors", errors_module);
1703 }
1704 }
1705 Py_DECREF(errmod_name);
1706 model_module = PyDict_GetItemWithError(d, modelmod_name);
1707 if (model_module == NULL && !PyErr_Occurred()) {
1708 model_module = PyModule_New(MODULE_NAME ".model");
1709 if (model_module != NULL) {
1710 _PyImport_SetModule(modelmod_name, model_module);
1711 /* gives away the reference to model_module */
1712 PyModule_AddObject(m, "model", model_module);
1713 }
1714 }
1715 Py_DECREF(modelmod_name);
1716 if (errors_module == NULL || model_module == NULL) {
1717 /* Don't core dump later! */
1718 Py_DECREF(m);
1719 return NULL;
1720 }
1721
1722 #if XML_COMBINED_VERSION > 19505
1723 {
1724 const XML_Feature *features = XML_GetFeatureList();
1725 PyObject *list = PyList_New(0);
1726 if (list == NULL)
1727 /* just ignore it */
1728 PyErr_Clear();
1729 else {
1730 int i = 0;
1731 for (; features[i].feature != XML_FEATURE_END; ++i) {
1732 int ok;
1733 PyObject *item = Py_BuildValue("si", features[i].name,
1734 features[i].value);
1735 if (item == NULL) {
1736 Py_DECREF(list);
1737 list = NULL;
1738 break;
1739 }
1740 ok = PyList_Append(list, item);
1741 Py_DECREF(item);
1742 if (ok < 0) {
1743 PyErr_Clear();
1744 break;
1745 }
1746 }
1747 if (list != NULL)
1748 PyModule_AddObject(m, "features", list);
1749 }
1750 }
1751 #endif
1752
1753 codes_dict = PyDict_New();
1754 rev_codes_dict = PyDict_New();
1755 if (codes_dict == NULL || rev_codes_dict == NULL) {
1756 Py_XDECREF(codes_dict);
1757 Py_XDECREF(rev_codes_dict);
1758 return NULL;
1759 }
1760
1761 #define MYCONST(name) \
1762 if (PyModule_AddStringConstant(errors_module, #name, \
1763 XML_ErrorString(name)) < 0) \
1764 return NULL; \
1765 tmpnum = PyLong_FromLong(name); \
1766 if (tmpnum == NULL) return NULL; \
1767 res = PyDict_SetItemString(codes_dict, \
1768 XML_ErrorString(name), tmpnum); \
1769 if (res < 0) return NULL; \
1770 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1771 if (tmpstr == NULL) return NULL; \
1772 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1773 Py_DECREF(tmpstr); \
1774 Py_DECREF(tmpnum); \
1775 if (res < 0) return NULL; \
1776
1777 MYCONST(XML_ERROR_NO_MEMORY);
1778 MYCONST(XML_ERROR_SYNTAX);
1779 MYCONST(XML_ERROR_NO_ELEMENTS);
1780 MYCONST(XML_ERROR_INVALID_TOKEN);
1781 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1782 MYCONST(XML_ERROR_PARTIAL_CHAR);
1783 MYCONST(XML_ERROR_TAG_MISMATCH);
1784 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1785 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1786 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1787 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1788 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1789 MYCONST(XML_ERROR_ASYNC_ENTITY);
1790 MYCONST(XML_ERROR_BAD_CHAR_REF);
1791 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1792 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1793 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1794 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1795 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1796 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1797 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1798 MYCONST(XML_ERROR_NOT_STANDALONE);
1799 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1800 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1801 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1802 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1803 /* Added in Expat 1.95.7. */
1804 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1805 /* Added in Expat 1.95.8. */
1806 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1807 MYCONST(XML_ERROR_INCOMPLETE_PE);
1808 MYCONST(XML_ERROR_XML_DECL);
1809 MYCONST(XML_ERROR_TEXT_DECL);
1810 MYCONST(XML_ERROR_PUBLICID);
1811 MYCONST(XML_ERROR_SUSPENDED);
1812 MYCONST(XML_ERROR_NOT_SUSPENDED);
1813 MYCONST(XML_ERROR_ABORTED);
1814 MYCONST(XML_ERROR_FINISHED);
1815 MYCONST(XML_ERROR_SUSPEND_PE);
1816
1817 if (PyModule_AddStringConstant(errors_module, "__doc__",
1818 "Constants used to describe "
1819 "error conditions.") < 0)
1820 return NULL;
1821
1822 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1823 return NULL;
1824 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1825 return NULL;
1826
1827 #undef MYCONST
1828
1829 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1830 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1831 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1832 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1833 #undef MYCONST
1834
1835 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1836 PyModule_AddStringConstant(model_module, "__doc__",
1837 "Constants used to interpret content model information.");
1838
1839 MYCONST(XML_CTYPE_EMPTY);
1840 MYCONST(XML_CTYPE_ANY);
1841 MYCONST(XML_CTYPE_MIXED);
1842 MYCONST(XML_CTYPE_NAME);
1843 MYCONST(XML_CTYPE_CHOICE);
1844 MYCONST(XML_CTYPE_SEQ);
1845
1846 MYCONST(XML_CQUANT_NONE);
1847 MYCONST(XML_CQUANT_OPT);
1848 MYCONST(XML_CQUANT_REP);
1849 MYCONST(XML_CQUANT_PLUS);
1850 #undef MYCONST
1851
1852 /* initialize pyexpat dispatch table */
1853 capi.size = sizeof(capi);
1854 capi.magic = PyExpat_CAPI_MAGIC;
1855 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1856 capi.MINOR_VERSION = XML_MINOR_VERSION;
1857 capi.MICRO_VERSION = XML_MICRO_VERSION;
1858 capi.ErrorString = XML_ErrorString;
1859 capi.GetErrorCode = XML_GetErrorCode;
1860 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1861 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
1862 capi.Parse = XML_Parse;
1863 capi.ParserCreate_MM = XML_ParserCreate_MM;
1864 capi.ParserFree = XML_ParserFree;
1865 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1866 capi.SetCommentHandler = XML_SetCommentHandler;
1867 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1868 capi.SetElementHandler = XML_SetElementHandler;
1869 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1870 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1871 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1872 capi.SetUserData = XML_SetUserData;
1873 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1874 capi.SetEncoding = XML_SetEncoding;
1875 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
1876 #if XML_COMBINED_VERSION >= 20100
1877 capi.SetHashSalt = XML_SetHashSalt;
1878 #else
1879 capi.SetHashSalt = NULL;
1880 #endif
1881
1882 /* export using capsule */
1883 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
1884 if (capi_object)
1885 PyModule_AddObject(m, "expat_CAPI", capi_object);
1886 return m;
1887 }
1888
1889 static void
clear_handlers(xmlparseobject * self,int initial)1890 clear_handlers(xmlparseobject *self, int initial)
1891 {
1892 int i = 0;
1893
1894 for (; handler_info[i].name != NULL; i++) {
1895 if (initial)
1896 self->handlers[i] = NULL;
1897 else {
1898 Py_CLEAR(self->handlers[i]);
1899 handler_info[i].setter(self->itself, NULL);
1900 }
1901 }
1902 }
1903
1904 static struct HandlerInfo handler_info[] = {
1905
1906 #define HANDLER_INFO(name) \
1907 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1908
1909 HANDLER_INFO(StartElementHandler)
1910 HANDLER_INFO(EndElementHandler)
1911 HANDLER_INFO(ProcessingInstructionHandler)
1912 HANDLER_INFO(CharacterDataHandler)
1913 HANDLER_INFO(UnparsedEntityDeclHandler)
1914 HANDLER_INFO(NotationDeclHandler)
1915 HANDLER_INFO(StartNamespaceDeclHandler)
1916 HANDLER_INFO(EndNamespaceDeclHandler)
1917 HANDLER_INFO(CommentHandler)
1918 HANDLER_INFO(StartCdataSectionHandler)
1919 HANDLER_INFO(EndCdataSectionHandler)
1920 HANDLER_INFO(DefaultHandler)
1921 HANDLER_INFO(DefaultHandlerExpand)
1922 HANDLER_INFO(NotStandaloneHandler)
1923 HANDLER_INFO(ExternalEntityRefHandler)
1924 HANDLER_INFO(StartDoctypeDeclHandler)
1925 HANDLER_INFO(EndDoctypeDeclHandler)
1926 HANDLER_INFO(EntityDeclHandler)
1927 HANDLER_INFO(XmlDeclHandler)
1928 HANDLER_INFO(ElementDeclHandler)
1929 HANDLER_INFO(AttlistDeclHandler)
1930 #if XML_COMBINED_VERSION >= 19504
1931 HANDLER_INFO(SkippedEntityHandler)
1932 #endif
1933
1934 #undef HANDLER_INFO
1935
1936 {NULL, NULL, NULL} /* sentinel */
1937 };
1938