• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include <ctype.h>
3 
4 #include "frameobject.h"
5 #include "expat.h"
6 
7 #include "pyexpat.h"
8 
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10 
11 #ifndef PyDoc_STRVAR
12 
13 /*
14  * fdrake says:
15  * Don't change the PyDoc_STR macro definition to (str), because
16  * '''the parentheses cause compile failures
17  * ("non-constant static initializer" or something like that)
18  * on some platforms (Irix?)'''
19  */
20 #define PyDoc_STR(str)         str
21 #define PyDoc_VAR(name)        static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
24 
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and  2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
31 
32 enum HandlerTypes {
33     StartElement,
34     EndElement,
35     ProcessingInstruction,
36     CharacterData,
37     UnparsedEntityDecl,
38     NotationDecl,
39     StartNamespaceDecl,
40     EndNamespaceDecl,
41     Comment,
42     StartCdataSection,
43     EndCdataSection,
44     Default,
45     DefaultHandlerExpand,
46     NotStandalone,
47     ExternalEntityRef,
48     StartDoctypeDecl,
49     EndDoctypeDecl,
50     EntityDecl,
51     XmlDecl,
52     ElementDecl,
53     AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55     SkippedEntity,
56 #endif
57     _DummyDecl
58 };
59 
60 static PyObject *ErrorObject;
61 
62 /* ----------------------------------------------------- */
63 
64 /* Declarations for objects of type xmlparser */
65 
66 typedef struct {
67     PyObject_HEAD
68 
69     XML_Parser itself;
70     int returns_unicode;        /* True if Unicode strings are returned;
71                                    if false, UTF-8 strings are returned */
72     int ordered_attributes;     /* Return attributes as a list. */
73     int specified_attributes;   /* Report only specified attributes. */
74     int in_callback;            /* Is a callback active? */
75     int ns_prefixes;            /* Namespace-triplets mode? */
76     XML_Char *buffer;           /* Buffer used when accumulating characters */
77                                 /* NULL if not enabled */
78     int buffer_size;            /* Size of buffer, in XML_Char units */
79     int buffer_used;            /* Buffer units in use */
80     PyObject *intern;           /* Dictionary to intern strings */
81     PyObject **handlers;
82 } xmlparseobject;
83 
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
85 
86 static PyTypeObject Xmlparsetype;
87 
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
90 
91 struct HandlerInfo {
92     const char *name;
93     xmlhandlersetter setter;
94     xmlhandler handler;
95     PyCodeObject *tb_code;
96     PyObject *nameobj;
97 };
98 
99 static struct HandlerInfo handler_info[64];
100 
101 /* Set an integer attribute on the error object; return true on success,
102  * false on an exception.
103  */
104 static int
set_error_attr(PyObject * err,char * name,int value)105 set_error_attr(PyObject *err, char *name, int value)
106 {
107     PyObject *v = PyInt_FromLong(value);
108 
109     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110         Py_XDECREF(v);
111         return 0;
112     }
113     Py_DECREF(v);
114     return 1;
115 }
116 
117 /* Build and set an Expat exception, including positioning
118  * information.  Always returns NULL.
119  */
120 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)121 set_error(xmlparseobject *self, enum XML_Error code)
122 {
123     PyObject *err;
124     char buffer[256];
125     XML_Parser parser = self->itself;
126     int lineno = XML_GetErrorLineNumber(parser);
127     int column = XML_GetErrorColumnNumber(parser);
128 
129     /* There is no risk of overflowing this buffer, since
130        even for 64-bit integers, there is sufficient space. */
131     sprintf(buffer, "%.200s: line %i, column %i",
132             XML_ErrorString(code), lineno, column);
133     err = PyObject_CallFunction(ErrorObject, "s", buffer);
134     if (  err != NULL
135           && set_error_attr(err, "code", code)
136           && set_error_attr(err, "offset", column)
137           && set_error_attr(err, "lineno", lineno)) {
138         PyErr_SetObject(ErrorObject, err);
139     }
140     Py_XDECREF(err);
141     return NULL;
142 }
143 
144 static int
have_handler(xmlparseobject * self,int type)145 have_handler(xmlparseobject *self, int type)
146 {
147     PyObject *handler = self->handlers[type];
148     return handler != NULL;
149 }
150 
151 static PyObject *
get_handler_name(struct HandlerInfo * hinfo)152 get_handler_name(struct HandlerInfo *hinfo)
153 {
154     PyObject *name = hinfo->nameobj;
155     if (name == NULL) {
156         name = PyString_FromString(hinfo->name);
157         hinfo->nameobj = name;
158     }
159     Py_XINCREF(name);
160     return name;
161 }
162 
163 
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166    Returns None if str is a null pointer. */
167 
168 static PyObject *
conv_string_to_unicode(const XML_Char * str)169 conv_string_to_unicode(const XML_Char *str)
170 {
171     /* XXX currently this code assumes that XML_Char is 8-bit,
172        and hence in UTF-8.  */
173     /* UTF-8 from Expat, Unicode desired */
174     if (str == NULL) {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
179 }
180 
181 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)182 conv_string_len_to_unicode(const XML_Char *str, int len)
183 {
184     /* XXX currently this code assumes that XML_Char is 8-bit,
185        and hence in UTF-8.  */
186     /* UTF-8 from Expat, Unicode desired */
187     if (str == NULL) {
188         Py_INCREF(Py_None);
189         return Py_None;
190     }
191     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
192 }
193 #endif
194 
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196    Returns None if str is a null pointer. */
197 
198 static PyObject *
conv_string_to_utf8(const XML_Char * str)199 conv_string_to_utf8(const XML_Char *str)
200 {
201     /* XXX currently this code assumes that XML_Char is 8-bit,
202        and hence in UTF-8.  */
203     /* UTF-8 from Expat, UTF-8 desired */
204     if (str == NULL) {
205         Py_INCREF(Py_None);
206         return Py_None;
207     }
208     return PyString_FromString(str);
209 }
210 
211 static PyObject *
conv_string_len_to_utf8(const XML_Char * str,int len)212 conv_string_len_to_utf8(const XML_Char *str, int len)
213 {
214     /* XXX currently this code assumes that XML_Char is 8-bit,
215        and hence in UTF-8.  */
216     /* UTF-8 from Expat, UTF-8 desired */
217     if (str == NULL) {
218         Py_INCREF(Py_None);
219         return Py_None;
220     }
221     return PyString_FromStringAndSize((const char *)str, len);
222 }
223 
224 /* Callback routines */
225 
226 static void clear_handlers(xmlparseobject *self, int initial);
227 
228 /* This handler is used when an error has been detected, in the hope
229    that actual parsing can be terminated early.  This will only help
230    if an external entity reference is encountered. */
231 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)232 error_external_entity_ref_handler(XML_Parser parser,
233                                   const XML_Char *context,
234                                   const XML_Char *base,
235                                   const XML_Char *systemId,
236                                   const XML_Char *publicId)
237 {
238     return 0;
239 }
240 
241 /* Dummy character data handler used when an error (exception) has
242    been detected, and the actual parsing can be terminated early.
243    This is needed since character data handler can't be safely removed
244    from within the character data handler, but can be replaced.  It is
245    used only from the character data handler trampoline, and must be
246    used right after `flag_error()` is called. */
247 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)248 noop_character_data_handler(void *userData, const XML_Char *data, int len)
249 {
250     /* Do nothing. */
251 }
252 
253 static void
flag_error(xmlparseobject * self)254 flag_error(xmlparseobject *self)
255 {
256     clear_handlers(self, 0);
257     XML_SetExternalEntityRefHandler(self->itself,
258                                     error_external_entity_ref_handler);
259 }
260 
261 static PyCodeObject*
getcode(enum HandlerTypes slot,char * func_name,int lineno)262 getcode(enum HandlerTypes slot, char* func_name, int lineno)
263 {
264     if (handler_info[slot].tb_code == NULL) {
265         handler_info[slot].tb_code =
266             PyCode_NewEmpty(__FILE__, func_name, lineno);
267     }
268     return handler_info[slot].tb_code;
269 }
270 
271 #ifdef FIX_TRACE
272 static int
trace_frame(PyThreadState * tstate,PyFrameObject * f,int code,PyObject * val)273 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274 {
275     int result = 0;
276     if (!tstate->use_tracing || tstate->tracing)
277         return 0;
278     if (tstate->c_profilefunc != NULL) {
279         tstate->tracing++;
280         result = tstate->c_profilefunc(tstate->c_profileobj,
281                                        f, code , val);
282         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283                                || (tstate->c_profilefunc != NULL));
284         tstate->tracing--;
285         if (result)
286             return result;
287     }
288     if (tstate->c_tracefunc != NULL) {
289         tstate->tracing++;
290         result = tstate->c_tracefunc(tstate->c_traceobj,
291                                      f, code , val);
292         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293                                || (tstate->c_profilefunc != NULL));
294         tstate->tracing--;
295     }
296     return result;
297 }
298 
299 static int
trace_frame_exc(PyThreadState * tstate,PyFrameObject * f)300 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301 {
302     PyObject *type, *value, *traceback, *arg;
303     int err;
304 
305     if (tstate->c_tracefunc == NULL)
306         return 0;
307 
308     PyErr_Fetch(&type, &value, &traceback);
309     if (value == NULL) {
310         value = Py_None;
311         Py_INCREF(value);
312     }
313 #if PY_VERSION_HEX < 0x02040000
314     arg = Py_BuildValue("(OOO)", type, value, traceback);
315 #else
316     arg = PyTuple_Pack(3, type, value, traceback);
317 #endif
318     if (arg == NULL) {
319         PyErr_Restore(type, value, traceback);
320         return 0;
321     }
322     err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323     Py_DECREF(arg);
324     if (err == 0)
325         PyErr_Restore(type, value, traceback);
326     else {
327         Py_XDECREF(type);
328         Py_XDECREF(value);
329         Py_XDECREF(traceback);
330     }
331     return err;
332 }
333 #endif
334 
335 static PyObject*
call_with_frame(PyCodeObject * c,PyObject * func,PyObject * args,xmlparseobject * self)336 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337                 xmlparseobject *self)
338 {
339     PyThreadState *tstate = PyThreadState_GET();
340     PyFrameObject *f;
341     PyObject *res;
342 
343     if (c == NULL)
344         return NULL;
345 
346     f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
347     if (f == NULL)
348         return NULL;
349     tstate->frame = f;
350 #ifdef FIX_TRACE
351     if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
352         return NULL;
353     }
354 #endif
355     res = PyEval_CallObject(func, args);
356     if (res == NULL) {
357         if (tstate->curexc_traceback == NULL)
358             PyTraceBack_Here(f);
359         XML_StopParser(self->itself, XML_FALSE);
360 #ifdef FIX_TRACE
361         if (trace_frame_exc(tstate, f) < 0) {
362             return NULL;
363         }
364     }
365     else {
366         if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
367             Py_XDECREF(res);
368             res = NULL;
369         }
370     }
371 #else
372     }
373 #endif
374     tstate->frame = f->f_back;
375     Py_DECREF(f);
376     return res;
377 }
378 
379 #ifndef Py_USING_UNICODE
380 #define STRING_CONV_FUNC conv_string_to_utf8
381 #else
382 /* Python 2.0 and later versions, when built with Unicode support */
383 #define STRING_CONV_FUNC (self->returns_unicode \
384                           ? conv_string_to_unicode : conv_string_to_utf8)
385 #endif
386 
387 static PyObject*
string_intern(xmlparseobject * self,const char * str)388 string_intern(xmlparseobject *self, const char* str)
389 {
390     PyObject *result = STRING_CONV_FUNC(str);
391     PyObject *value;
392     /* result can be NULL if the unicode conversion failed. */
393     if (!result)
394         return result;
395     if (!self->intern)
396         return result;
397     value = PyDict_GetItem(self->intern, result);
398     if (!value) {
399         if (PyDict_SetItem(self->intern, result, result) == 0)
400             return result;
401         else
402             return NULL;
403     }
404     Py_INCREF(value);
405     Py_DECREF(result);
406     return value;
407 }
408 
409 /* Return 0 on success, -1 on exception.
410  * flag_error() will be called before return if needed.
411  */
412 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)413 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414 {
415     PyObject *args;
416     PyObject *temp;
417 
418     if (!have_handler(self, CharacterData))
419         return -1;
420 
421     args = PyTuple_New(1);
422     if (args == NULL)
423         return -1;
424 #ifdef Py_USING_UNICODE
425     temp = (self->returns_unicode
426             ? conv_string_len_to_unicode(buffer, len)
427             : conv_string_len_to_utf8(buffer, len));
428 #else
429     temp = conv_string_len_to_utf8(buffer, len);
430 #endif
431     if (temp == NULL) {
432         Py_DECREF(args);
433         flag_error(self);
434         XML_SetCharacterDataHandler(self->itself,
435                                     noop_character_data_handler);
436         return -1;
437     }
438     PyTuple_SET_ITEM(args, 0, temp);
439     /* temp is now a borrowed reference; consider it unused. */
440     self->in_callback = 1;
441     temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
442                            self->handlers[CharacterData], args, self);
443     /* temp is an owned reference again, or NULL */
444     self->in_callback = 0;
445     Py_DECREF(args);
446     if (temp == NULL) {
447         flag_error(self);
448         XML_SetCharacterDataHandler(self->itself,
449                                     noop_character_data_handler);
450         return -1;
451     }
452     Py_DECREF(temp);
453     return 0;
454 }
455 
456 static int
flush_character_buffer(xmlparseobject * self)457 flush_character_buffer(xmlparseobject *self)
458 {
459     int rc;
460     if (self->buffer == NULL || self->buffer_used == 0)
461         return 0;
462     rc = call_character_handler(self, self->buffer, self->buffer_used);
463     self->buffer_used = 0;
464     return rc;
465 }
466 
467 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)468 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
469 {
470     xmlparseobject *self = (xmlparseobject *) userData;
471     if (self->buffer == NULL)
472         call_character_handler(self, data, len);
473     else {
474         if ((self->buffer_used + len) > self->buffer_size) {
475             if (flush_character_buffer(self) < 0)
476                 return;
477             /* handler might have changed; drop the rest on the floor
478              * if there isn't a handler anymore
479              */
480             if (!have_handler(self, CharacterData))
481                 return;
482         }
483         if (len > self->buffer_size) {
484             call_character_handler(self, data, len);
485             self->buffer_used = 0;
486         }
487         else {
488             memcpy(self->buffer + self->buffer_used,
489                    data, len * sizeof(XML_Char));
490             self->buffer_used += len;
491         }
492     }
493 }
494 
495 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])496 my_StartElementHandler(void *userData,
497                        const XML_Char *name, const XML_Char *atts[])
498 {
499     xmlparseobject *self = (xmlparseobject *)userData;
500 
501     if (have_handler(self, StartElement)) {
502         PyObject *container, *rv, *args;
503         int i, max;
504 
505         if (flush_character_buffer(self) < 0)
506             return;
507         /* Set max to the number of slots filled in atts[]; max/2 is
508          * the number of attributes we need to process.
509          */
510         if (self->specified_attributes) {
511             max = XML_GetSpecifiedAttributeCount(self->itself);
512         }
513         else {
514             max = 0;
515             while (atts[max] != NULL)
516                 max += 2;
517         }
518         /* Build the container. */
519         if (self->ordered_attributes)
520             container = PyList_New(max);
521         else
522             container = PyDict_New();
523         if (container == NULL) {
524             flag_error(self);
525             return;
526         }
527         for (i = 0; i < max; i += 2) {
528             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
529             PyObject *v;
530             if (n == NULL) {
531                 flag_error(self);
532                 Py_DECREF(container);
533                 return;
534             }
535             v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
536             if (v == NULL) {
537                 flag_error(self);
538                 Py_DECREF(container);
539                 Py_DECREF(n);
540                 return;
541             }
542             if (self->ordered_attributes) {
543                 PyList_SET_ITEM(container, i, n);
544                 PyList_SET_ITEM(container, i+1, v);
545             }
546             else if (PyDict_SetItem(container, n, v)) {
547                 flag_error(self);
548                 Py_DECREF(n);
549                 Py_DECREF(v);
550                 return;
551             }
552             else {
553                 Py_DECREF(n);
554                 Py_DECREF(v);
555             }
556         }
557         args = string_intern(self, name);
558         if (args != NULL)
559             args = Py_BuildValue("(NN)", args, container);
560         if (args == NULL) {
561             Py_DECREF(container);
562             return;
563         }
564         /* Container is now a borrowed reference; ignore it. */
565         self->in_callback = 1;
566         rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
567                              self->handlers[StartElement], args, self);
568         self->in_callback = 0;
569         Py_DECREF(args);
570         if (rv == NULL) {
571             flag_error(self);
572             return;
573         }
574         Py_DECREF(rv);
575     }
576 }
577 
578 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
579                 RETURN, GETUSERDATA) \
580 static RC \
581 my_##NAME##Handler PARAMS {\
582     xmlparseobject *self = GETUSERDATA ; \
583     PyObject *args = NULL; \
584     PyObject *rv = NULL; \
585     INIT \
586 \
587     if (have_handler(self, NAME)) { \
588         if (flush_character_buffer(self) < 0) \
589             return RETURN; \
590         args = Py_BuildValue PARAM_FORMAT ;\
591         if (!args) { flag_error(self); return RETURN;} \
592         self->in_callback = 1; \
593         rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
594                              self->handlers[NAME], args, self); \
595         self->in_callback = 0; \
596         Py_DECREF(args); \
597         if (rv == NULL) { \
598             flag_error(self); \
599             return RETURN; \
600         } \
601         CONVERSION \
602         Py_DECREF(rv); \
603     } \
604     return RETURN; \
605 }
606 
607 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
608         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
609         (xmlparseobject *)userData)
610 
611 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
612         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
613                         rc = PyInt_AsLong(rv);, rc, \
614         (xmlparseobject *)userData)
615 
616 VOID_HANDLER(EndElement,
617              (void *userData, const XML_Char *name),
618              ("(N)", string_intern(self, name)))
619 
620 VOID_HANDLER(ProcessingInstruction,
621              (void *userData,
622               const XML_Char *target,
623               const XML_Char *data),
624              ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
625 
626 VOID_HANDLER(UnparsedEntityDecl,
627              (void *userData,
628               const XML_Char *entityName,
629               const XML_Char *base,
630               const XML_Char *systemId,
631               const XML_Char *publicId,
632               const XML_Char *notationName),
633              ("(NNNNN)",
634               string_intern(self, entityName), string_intern(self, base),
635               string_intern(self, systemId), string_intern(self, publicId),
636               string_intern(self, notationName)))
637 
638 #ifndef Py_USING_UNICODE
639 VOID_HANDLER(EntityDecl,
640              (void *userData,
641               const XML_Char *entityName,
642               int is_parameter_entity,
643               const XML_Char *value,
644               int value_length,
645               const XML_Char *base,
646               const XML_Char *systemId,
647               const XML_Char *publicId,
648               const XML_Char *notationName),
649              ("NiNNNNN",
650               string_intern(self, entityName), is_parameter_entity,
651               conv_string_len_to_utf8(value, value_length),
652               string_intern(self, base), string_intern(self, systemId),
653               string_intern(self, publicId),
654               string_intern(self, notationName)))
655 #else
656 VOID_HANDLER(EntityDecl,
657              (void *userData,
658               const XML_Char *entityName,
659               int is_parameter_entity,
660               const XML_Char *value,
661               int value_length,
662               const XML_Char *base,
663               const XML_Char *systemId,
664               const XML_Char *publicId,
665               const XML_Char *notationName),
666              ("NiNNNNN",
667               string_intern(self, entityName), is_parameter_entity,
668               (self->returns_unicode
669                ? conv_string_len_to_unicode(value, value_length)
670                : conv_string_len_to_utf8(value, value_length)),
671               string_intern(self, base), string_intern(self, systemId),
672               string_intern(self, publicId),
673               string_intern(self, notationName)))
674 #endif
675 
676 VOID_HANDLER(XmlDecl,
677              (void *userData,
678               const XML_Char *version,
679               const XML_Char *encoding,
680               int standalone),
681              ("(O&O&i)",
682               STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
683               standalone))
684 
685 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))686 conv_content_model(XML_Content * const model,
687                    PyObject *(*conv_string)(const XML_Char *))
688 {
689     PyObject *result = NULL;
690     PyObject *children = PyTuple_New(model->numchildren);
691     int i;
692 
693     if (children != NULL) {
694         assert(model->numchildren < INT_MAX);
695         for (i = 0; i < (int)model->numchildren; ++i) {
696             PyObject *child = conv_content_model(&model->children[i],
697                                                  conv_string);
698             if (child == NULL) {
699                 Py_XDECREF(children);
700                 return NULL;
701             }
702             PyTuple_SET_ITEM(children, i, child);
703         }
704         result = Py_BuildValue("(iiO&N)",
705                                model->type, model->quant,
706                                conv_string,model->name, children);
707     }
708     return result;
709 }
710 
711 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)712 my_ElementDeclHandler(void *userData,
713                       const XML_Char *name,
714                       XML_Content *model)
715 {
716     xmlparseobject *self = (xmlparseobject *)userData;
717     PyObject *args = NULL;
718 
719     if (have_handler(self, ElementDecl)) {
720         PyObject *rv = NULL;
721         PyObject *modelobj, *nameobj;
722 
723         if (flush_character_buffer(self) < 0)
724             goto finally;
725 #ifdef Py_USING_UNICODE
726         modelobj = conv_content_model(model,
727                                       (self->returns_unicode
728                                        ? conv_string_to_unicode
729                                        : conv_string_to_utf8));
730 #else
731         modelobj = conv_content_model(model, conv_string_to_utf8);
732 #endif
733         if (modelobj == NULL) {
734             flag_error(self);
735             goto finally;
736         }
737         nameobj = string_intern(self, name);
738         if (nameobj == NULL) {
739             Py_DECREF(modelobj);
740             flag_error(self);
741             goto finally;
742         }
743         args = Py_BuildValue("NN", nameobj, modelobj);
744         if (args == NULL) {
745             Py_DECREF(modelobj);
746             flag_error(self);
747             goto finally;
748         }
749         self->in_callback = 1;
750         rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
751                              self->handlers[ElementDecl], args, self);
752         self->in_callback = 0;
753         if (rv == NULL) {
754             flag_error(self);
755             goto finally;
756         }
757         Py_DECREF(rv);
758     }
759  finally:
760     Py_XDECREF(args);
761     XML_FreeContentModel(self->itself, model);
762     return;
763 }
764 
765 VOID_HANDLER(AttlistDecl,
766              (void *userData,
767               const XML_Char *elname,
768               const XML_Char *attname,
769               const XML_Char *att_type,
770               const XML_Char *dflt,
771               int isrequired),
772              ("(NNO&O&i)",
773               string_intern(self, elname), string_intern(self, attname),
774               STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
775               isrequired))
776 
777 #if XML_COMBINED_VERSION >= 19504
778 VOID_HANDLER(SkippedEntity,
779              (void *userData,
780               const XML_Char *entityName,
781               int is_parameter_entity),
782              ("Ni",
783               string_intern(self, entityName), is_parameter_entity))
784 #endif
785 
786 VOID_HANDLER(NotationDecl,
787                 (void *userData,
788                         const XML_Char *notationName,
789                         const XML_Char *base,
790                         const XML_Char *systemId,
791                         const XML_Char *publicId),
792                 ("(NNNN)",
793                  string_intern(self, notationName), string_intern(self, base),
794                  string_intern(self, systemId), string_intern(self, publicId)))
795 
796 VOID_HANDLER(StartNamespaceDecl,
797                 (void *userData,
798                       const XML_Char *prefix,
799                       const XML_Char *uri),
800                 ("(NN)",
801                  string_intern(self, prefix), string_intern(self, uri)))
802 
803 VOID_HANDLER(EndNamespaceDecl,
804                 (void *userData,
805                     const XML_Char *prefix),
806                 ("(N)", string_intern(self, prefix)))
807 
808 VOID_HANDLER(Comment,
809                (void *userData, const XML_Char *data),
810                 ("(O&)", STRING_CONV_FUNC,data))
811 
812 VOID_HANDLER(StartCdataSection,
813                (void *userData),
814                 ("()"))
815 
816 VOID_HANDLER(EndCdataSection,
817                (void *userData),
818                 ("()"))
819 
820 #ifndef Py_USING_UNICODE
821 VOID_HANDLER(Default,
822               (void *userData, const XML_Char *s, int len),
823               ("(N)", conv_string_len_to_utf8(s,len)))
824 
825 VOID_HANDLER(DefaultHandlerExpand,
826               (void *userData, const XML_Char *s, int len),
827               ("(N)", conv_string_len_to_utf8(s,len)))
828 #else
829 VOID_HANDLER(Default,
830               (void *userData, const XML_Char *s, int len),
831               ("(N)", (self->returns_unicode
832                        ? conv_string_len_to_unicode(s,len)
833                        : conv_string_len_to_utf8(s,len))))
834 
835 VOID_HANDLER(DefaultHandlerExpand,
836               (void *userData, const XML_Char *s, int len),
837               ("(N)", (self->returns_unicode
838                        ? conv_string_len_to_unicode(s,len)
839                        : conv_string_len_to_utf8(s,len))))
840 #endif
841 
842 INT_HANDLER(NotStandalone,
843                 (void *userData),
844                 ("()"))
845 
846 RC_HANDLER(int, ExternalEntityRef,
847                 (XML_Parser parser,
848                     const XML_Char *context,
849                     const XML_Char *base,
850                     const XML_Char *systemId,
851                     const XML_Char *publicId),
852                 int rc=0;,
853                 ("(O&NNN)",
854                  STRING_CONV_FUNC,context, string_intern(self, base),
855                  string_intern(self, systemId), string_intern(self, publicId)),
856                 rc = PyInt_AsLong(rv);, rc,
857                 XML_GetUserData(parser))
858 
859 /* XXX UnknownEncodingHandler */
860 
861 VOID_HANDLER(StartDoctypeDecl,
862              (void *userData, const XML_Char *doctypeName,
863               const XML_Char *sysid, const XML_Char *pubid,
864               int has_internal_subset),
865              ("(NNNi)", string_intern(self, doctypeName),
866               string_intern(self, sysid), string_intern(self, pubid),
867               has_internal_subset))
868 
869 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
870 
871 /* ---------------------------------------------------------------- */
872 
873 static PyObject *
get_parse_result(xmlparseobject * self,int rv)874 get_parse_result(xmlparseobject *self, int rv)
875 {
876     if (PyErr_Occurred()) {
877         return NULL;
878     }
879     if (rv == 0) {
880         return set_error(self, XML_GetErrorCode(self->itself));
881     }
882     if (flush_character_buffer(self) < 0) {
883         return NULL;
884     }
885     return PyInt_FromLong(rv);
886 }
887 
888 PyDoc_STRVAR(xmlparse_Parse__doc__,
889 "Parse(data[, isfinal])\n\
890 Parse XML data.  `isfinal' should be true at end of input.");
891 
892 static PyObject *
xmlparse_Parse(xmlparseobject * self,PyObject * args)893 xmlparse_Parse(xmlparseobject *self, PyObject *args)
894 {
895     char *s;
896     int slen;
897     int isFinal = 0;
898 
899     if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
900         return NULL;
901 
902     return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
903 }
904 
905 /* File reading copied from cPickle */
906 
907 #define BUF_SIZE 2048
908 
909 static int
readinst(char * buf,int buf_size,PyObject * meth)910 readinst(char *buf, int buf_size, PyObject *meth)
911 {
912     PyObject *arg = NULL;
913     PyObject *bytes = NULL;
914     PyObject *str = NULL;
915     int len = -1;
916 
917     if ((bytes = PyInt_FromLong(buf_size)) == NULL)
918         goto finally;
919 
920     if ((arg = PyTuple_New(1)) == NULL) {
921         Py_DECREF(bytes);
922         goto finally;
923     }
924 
925     PyTuple_SET_ITEM(arg, 0, bytes);
926 
927 #if PY_VERSION_HEX < 0x02020000
928     str = PyObject_CallObject(meth, arg);
929 #else
930     str = PyObject_Call(meth, arg, NULL);
931 #endif
932     if (str == NULL)
933         goto finally;
934 
935     /* XXX what to do if it returns a Unicode string? */
936     if (!PyString_Check(str)) {
937         PyErr_Format(PyExc_TypeError,
938                      "read() did not return a string object (type=%.400s)",
939                      Py_TYPE(str)->tp_name);
940         goto finally;
941     }
942     len = PyString_GET_SIZE(str);
943     if (len > buf_size) {
944         PyErr_Format(PyExc_ValueError,
945                      "read() returned too much data: "
946                      "%i bytes requested, %i returned",
947                      buf_size, len);
948         goto finally;
949     }
950     memcpy(buf, PyString_AsString(str), len);
951 finally:
952     Py_XDECREF(arg);
953     Py_XDECREF(str);
954     return len;
955 }
956 
957 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
958 "ParseFile(file)\n\
959 Parse XML data from file-like object.");
960 
961 static PyObject *
xmlparse_ParseFile(xmlparseobject * self,PyObject * f)962 xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
963 {
964     int rv = 1;
965     PyObject *readmethod = NULL;
966 
967     readmethod = PyObject_GetAttrString(f, "read");
968     if (readmethod == NULL) {
969         PyErr_SetString(PyExc_TypeError,
970                         "argument must have 'read' attribute");
971         return NULL;
972 
973     }
974     for (;;) {
975         int bytes_read;
976         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
977         if (buf == NULL) {
978             Py_XDECREF(readmethod);
979             return get_parse_result(self, 0);
980         }
981 
982         bytes_read = readinst(buf, BUF_SIZE, readmethod);
983         if (bytes_read < 0) {
984             Py_XDECREF(readmethod);
985             return NULL;
986         }
987 
988         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
989         if (PyErr_Occurred()) {
990             Py_XDECREF(readmethod);
991             return NULL;
992         }
993 
994         if (!rv || bytes_read == 0)
995             break;
996     }
997     Py_XDECREF(readmethod);
998     return get_parse_result(self, rv);
999 }
1000 
1001 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1002 "SetBase(base_url)\n\
1003 Set the base URL for the parser.");
1004 
1005 static PyObject *
xmlparse_SetBase(xmlparseobject * self,PyObject * args)1006 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1007 {
1008     char *base;
1009 
1010     if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1011         return NULL;
1012     if (!XML_SetBase(self->itself, base)) {
1013         return PyErr_NoMemory();
1014     }
1015     Py_INCREF(Py_None);
1016     return Py_None;
1017 }
1018 
1019 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1020 "GetBase() -> url\n\
1021 Return base URL string for the parser.");
1022 
1023 static PyObject *
xmlparse_GetBase(xmlparseobject * self,PyObject * unused)1024 xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
1025 {
1026     return Py_BuildValue("z", XML_GetBase(self->itself));
1027 }
1028 
1029 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1030 "GetInputContext() -> string\n\
1031 Return the untranslated text of the input that caused the current event.\n\
1032 If the event was generated by a large amount of text (such as a start tag\n\
1033 for an element with many attributes), not all of the text may be available.");
1034 
1035 static PyObject *
xmlparse_GetInputContext(xmlparseobject * self,PyObject * unused)1036 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
1037 {
1038     if (self->in_callback) {
1039         int offset, size;
1040         const char *buffer
1041             = XML_GetInputContext(self->itself, &offset, &size);
1042 
1043         if (buffer != NULL)
1044             return PyString_FromStringAndSize(buffer + offset,
1045                                               size - offset);
1046         else
1047             Py_RETURN_NONE;
1048     }
1049     else
1050         Py_RETURN_NONE;
1051 }
1052 
1053 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1054 "ExternalEntityParserCreate(context[, encoding])\n\
1055 Create a parser for parsing an external entity based on the\n\
1056 information passed to the ExternalEntityRefHandler.");
1057 
1058 static PyObject *
xmlparse_ExternalEntityParserCreate(xmlparseobject * self,PyObject * args)1059 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1060 {
1061     char *context;
1062     char *encoding = NULL;
1063     xmlparseobject *new_parser;
1064     int i;
1065 
1066     if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1067                           &context, &encoding)) {
1068         return NULL;
1069     }
1070 
1071 #ifndef Py_TPFLAGS_HAVE_GC
1072     /* Python versions 2.0 and 2.1 */
1073     new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1074 #else
1075     /* Python versions 2.2 and later */
1076     new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1077 #endif
1078 
1079     if (new_parser == NULL)
1080         return NULL;
1081     new_parser->buffer_size = self->buffer_size;
1082     new_parser->buffer_used = 0;
1083     if (self->buffer != NULL) {
1084         new_parser->buffer = malloc(new_parser->buffer_size);
1085         if (new_parser->buffer == NULL) {
1086 #ifndef Py_TPFLAGS_HAVE_GC
1087             /* Code for versions 2.0 and 2.1 */
1088             PyObject_Del(new_parser);
1089 #else
1090             /* Code for versions 2.2 and later. */
1091             PyObject_GC_Del(new_parser);
1092 #endif
1093             return PyErr_NoMemory();
1094         }
1095     }
1096     else
1097         new_parser->buffer = NULL;
1098     new_parser->returns_unicode = self->returns_unicode;
1099     new_parser->ordered_attributes = self->ordered_attributes;
1100     new_parser->specified_attributes = self->specified_attributes;
1101     new_parser->in_callback = 0;
1102     new_parser->ns_prefixes = self->ns_prefixes;
1103     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1104                                                         encoding);
1105     new_parser->handlers = 0;
1106     new_parser->intern = self->intern;
1107     Py_XINCREF(new_parser->intern);
1108 #ifdef Py_TPFLAGS_HAVE_GC
1109     PyObject_GC_Track(new_parser);
1110 #else
1111     PyObject_GC_Init(new_parser);
1112 #endif
1113 
1114     if (!new_parser->itself) {
1115         Py_DECREF(new_parser);
1116         return PyErr_NoMemory();
1117     }
1118 
1119     XML_SetUserData(new_parser->itself, (void *)new_parser);
1120 
1121     /* allocate and clear handlers first */
1122     for (i = 0; handler_info[i].name != NULL; i++)
1123         /* do nothing */;
1124 
1125     new_parser->handlers = malloc(sizeof(PyObject *) * i);
1126     if (!new_parser->handlers) {
1127         Py_DECREF(new_parser);
1128         return PyErr_NoMemory();
1129     }
1130     clear_handlers(new_parser, 1);
1131 
1132     /* then copy handlers from self */
1133     for (i = 0; handler_info[i].name != NULL; i++) {
1134         PyObject *handler = self->handlers[i];
1135         if (handler != NULL) {
1136             Py_INCREF(handler);
1137             new_parser->handlers[i] = handler;
1138             handler_info[i].setter(new_parser->itself,
1139                                    handler_info[i].handler);
1140         }
1141     }
1142     return (PyObject *)new_parser;
1143 }
1144 
1145 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1146 "SetParamEntityParsing(flag) -> success\n\
1147 Controls parsing of parameter entities (including the external DTD\n\
1148 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1149 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1150 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1151 was successful.");
1152 
1153 static PyObject*
xmlparse_SetParamEntityParsing(xmlparseobject * p,PyObject * args)1154 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1155 {
1156     int flag;
1157     if (!PyArg_ParseTuple(args, "i", &flag))
1158         return NULL;
1159     flag = XML_SetParamEntityParsing(p->itself, flag);
1160     return PyInt_FromLong(flag);
1161 }
1162 
1163 
1164 #if XML_COMBINED_VERSION >= 19505
1165 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1166 "UseForeignDTD([flag])\n\
1167 Allows the application to provide an artificial external subset if one is\n\
1168 not specified as part of the document instance.  This readily allows the\n\
1169 use of a 'default' document type controlled by the application, while still\n\
1170 getting the advantage of providing document type information to the parser.\n\
1171 'flag' defaults to True if not provided.");
1172 
1173 static PyObject *
xmlparse_UseForeignDTD(xmlparseobject * self,PyObject * args)1174 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1175 {
1176     PyObject *flagobj = NULL;
1177     int flag = 1;
1178     enum XML_Error rc;
1179     if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1180         return NULL;
1181     if (flagobj != NULL) {
1182         flag = PyObject_IsTrue(flagobj);
1183         if (flag < 0)
1184             return NULL;
1185     }
1186     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1187     if (rc != XML_ERROR_NONE) {
1188         return set_error(self, rc);
1189     }
1190     Py_INCREF(Py_None);
1191     return Py_None;
1192 }
1193 #endif
1194 
1195 static struct PyMethodDef xmlparse_methods[] = {
1196     {"Parse",     (PyCFunction)xmlparse_Parse,
1197                   METH_VARARGS, xmlparse_Parse__doc__},
1198     {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1199                   METH_O,       xmlparse_ParseFile__doc__},
1200     {"SetBase",   (PyCFunction)xmlparse_SetBase,
1201                   METH_VARARGS, xmlparse_SetBase__doc__},
1202     {"GetBase",   (PyCFunction)xmlparse_GetBase,
1203                   METH_NOARGS, xmlparse_GetBase__doc__},
1204     {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1205                   METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1206     {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1207                   METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1208     {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1209                   METH_NOARGS, xmlparse_GetInputContext__doc__},
1210 #if XML_COMBINED_VERSION >= 19505
1211     {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1212                   METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1213 #endif
1214     {NULL,        NULL}         /* sentinel */
1215 };
1216 
1217 /* ---------- */
1218 
1219 
1220 #ifdef Py_USING_UNICODE
1221 
1222 /* pyexpat international encoding support.
1223    Make it as simple as possible.
1224 */
1225 
1226 static char template_buffer[257];
1227 PyObject *template_string = NULL;
1228 
1229 static void
init_template_buffer(void)1230 init_template_buffer(void)
1231 {
1232     int i;
1233     for (i = 0; i < 256; i++) {
1234         template_buffer[i] = i;
1235     }
1236     template_buffer[256] = 0;
1237 }
1238 
1239 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1240 PyUnknownEncodingHandler(void *encodingHandlerData,
1241                          const XML_Char *name,
1242                          XML_Encoding *info)
1243 {
1244     PyUnicodeObject *_u_string = NULL;
1245     int result = 0;
1246     int i;
1247 
1248     /* Yes, supports only 8bit encodings */
1249     _u_string = (PyUnicodeObject *)
1250         PyUnicode_Decode(template_buffer, 256, name, "replace");
1251 
1252     if (_u_string == NULL)
1253         return result;
1254 
1255     if (PyUnicode_GET_SIZE(_u_string) != 256) {
1256         Py_DECREF(_u_string);
1257         PyErr_SetString(PyExc_ValueError,
1258                         "multi-byte encodings are not supported");
1259         return result;
1260     }
1261 
1262     for (i = 0; i < 256; i++) {
1263         /* Stupid to access directly, but fast */
1264         Py_UNICODE c = _u_string->str[i];
1265         if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1266             info->map[i] = -1;
1267         else
1268             info->map[i] = c;
1269     }
1270     info->data = NULL;
1271     info->convert = NULL;
1272     info->release = NULL;
1273     result = 1;
1274     Py_DECREF(_u_string);
1275     return result;
1276 }
1277 
1278 #endif
1279 
1280 static PyObject *
newxmlparseobject(char * encoding,char * namespace_separator,PyObject * intern)1281 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1282 {
1283     int i;
1284     xmlparseobject *self;
1285 
1286 #ifdef Py_TPFLAGS_HAVE_GC
1287     /* Code for versions 2.2 and later */
1288     self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1289 #else
1290     self = PyObject_New(xmlparseobject, &Xmlparsetype);
1291 #endif
1292     if (self == NULL)
1293         return NULL;
1294 
1295 #ifdef Py_USING_UNICODE
1296     self->returns_unicode = 1;
1297 #else
1298     self->returns_unicode = 0;
1299 #endif
1300 
1301     self->buffer = NULL;
1302     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1303     self->buffer_used = 0;
1304     self->ordered_attributes = 0;
1305     self->specified_attributes = 0;
1306     self->in_callback = 0;
1307     self->ns_prefixes = 0;
1308     self->handlers = NULL;
1309     if (namespace_separator != NULL) {
1310         self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1311     }
1312     else {
1313         self->itself = XML_ParserCreate(encoding);
1314     }
1315 #if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1316     /* This feature was added upstream in libexpat 2.1.0.  Our expat copy
1317      * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1318      * to indicate that we can still use it. */
1319     XML_SetHashSalt(self->itself,
1320                     (unsigned long)_Py_HashSecret.prefix);
1321 #endif
1322     self->intern = intern;
1323     Py_XINCREF(self->intern);
1324 #ifdef Py_TPFLAGS_HAVE_GC
1325     PyObject_GC_Track(self);
1326 #else
1327     PyObject_GC_Init(self);
1328 #endif
1329     if (self->itself == NULL) {
1330         PyErr_SetString(PyExc_RuntimeError,
1331                         "XML_ParserCreate failed");
1332         Py_DECREF(self);
1333         return NULL;
1334     }
1335     XML_SetUserData(self->itself, (void *)self);
1336 #ifdef Py_USING_UNICODE
1337     XML_SetUnknownEncodingHandler(self->itself,
1338                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1339 #endif
1340 
1341     for (i = 0; handler_info[i].name != NULL; i++)
1342         /* do nothing */;
1343 
1344     self->handlers = malloc(sizeof(PyObject *) * i);
1345     if (!self->handlers) {
1346         Py_DECREF(self);
1347         return PyErr_NoMemory();
1348     }
1349     clear_handlers(self, 1);
1350 
1351     return (PyObject*)self;
1352 }
1353 
1354 
1355 static void
xmlparse_dealloc(xmlparseobject * self)1356 xmlparse_dealloc(xmlparseobject *self)
1357 {
1358     int i;
1359 #ifdef Py_TPFLAGS_HAVE_GC
1360     PyObject_GC_UnTrack(self);
1361 #else
1362     PyObject_GC_Fini(self);
1363 #endif
1364     if (self->itself != NULL)
1365         XML_ParserFree(self->itself);
1366     self->itself = NULL;
1367 
1368     if (self->handlers != NULL) {
1369         PyObject *temp;
1370         for (i = 0; handler_info[i].name != NULL; i++) {
1371             temp = self->handlers[i];
1372             self->handlers[i] = NULL;
1373             Py_XDECREF(temp);
1374         }
1375         free(self->handlers);
1376         self->handlers = NULL;
1377     }
1378     if (self->buffer != NULL) {
1379         free(self->buffer);
1380         self->buffer = NULL;
1381     }
1382     Py_XDECREF(self->intern);
1383 #ifndef Py_TPFLAGS_HAVE_GC
1384     /* Code for versions 2.0 and 2.1 */
1385     PyObject_Del(self);
1386 #else
1387     /* Code for versions 2.2 and later. */
1388     PyObject_GC_Del(self);
1389 #endif
1390 }
1391 
1392 static int
handlername2int(const char * name)1393 handlername2int(const char *name)
1394 {
1395     int i;
1396     for (i = 0; handler_info[i].name != NULL; i++) {
1397         if (strcmp(name, handler_info[i].name) == 0) {
1398             return i;
1399         }
1400     }
1401     return -1;
1402 }
1403 
1404 static PyObject *
get_pybool(int istrue)1405 get_pybool(int istrue)
1406 {
1407     PyObject *result = istrue ? Py_True : Py_False;
1408     Py_INCREF(result);
1409     return result;
1410 }
1411 
1412 static PyObject *
xmlparse_getattr(xmlparseobject * self,char * name)1413 xmlparse_getattr(xmlparseobject *self, char *name)
1414 {
1415     int handlernum = handlername2int(name);
1416 
1417     if (handlernum != -1) {
1418         PyObject *result = self->handlers[handlernum];
1419         if (result == NULL)
1420             result = Py_None;
1421         Py_INCREF(result);
1422         return result;
1423     }
1424     if (name[0] == 'E') {
1425         if (strcmp(name, "ErrorCode") == 0)
1426             return PyInt_FromLong((long)
1427                                   XML_GetErrorCode(self->itself));
1428         if (strcmp(name, "ErrorLineNumber") == 0)
1429             return PyInt_FromLong((long)
1430                                   XML_GetErrorLineNumber(self->itself));
1431         if (strcmp(name, "ErrorColumnNumber") == 0)
1432             return PyInt_FromLong((long)
1433                                   XML_GetErrorColumnNumber(self->itself));
1434         if (strcmp(name, "ErrorByteIndex") == 0)
1435             return PyInt_FromLong((long)
1436                                   XML_GetErrorByteIndex(self->itself));
1437     }
1438     if (name[0] == 'C') {
1439         if (strcmp(name, "CurrentLineNumber") == 0)
1440             return PyInt_FromLong((long)
1441                                   XML_GetCurrentLineNumber(self->itself));
1442         if (strcmp(name, "CurrentColumnNumber") == 0)
1443             return PyInt_FromLong((long)
1444                                   XML_GetCurrentColumnNumber(self->itself));
1445         if (strcmp(name, "CurrentByteIndex") == 0)
1446             return PyInt_FromLong((long)
1447                                   XML_GetCurrentByteIndex(self->itself));
1448     }
1449     if (name[0] == 'b') {
1450         if (strcmp(name, "buffer_size") == 0)
1451             return PyInt_FromLong((long) self->buffer_size);
1452         if (strcmp(name, "buffer_text") == 0)
1453             return get_pybool(self->buffer != NULL);
1454         if (strcmp(name, "buffer_used") == 0)
1455             return PyInt_FromLong((long) self->buffer_used);
1456     }
1457     if (strcmp(name, "namespace_prefixes") == 0)
1458         return get_pybool(self->ns_prefixes);
1459     if (strcmp(name, "ordered_attributes") == 0)
1460         return get_pybool(self->ordered_attributes);
1461     if (strcmp(name, "returns_unicode") == 0)
1462         return get_pybool((long) self->returns_unicode);
1463     if (strcmp(name, "specified_attributes") == 0)
1464         return get_pybool((long) self->specified_attributes);
1465     if (strcmp(name, "intern") == 0) {
1466         if (self->intern == NULL) {
1467             Py_INCREF(Py_None);
1468             return Py_None;
1469         }
1470         else {
1471             Py_INCREF(self->intern);
1472             return self->intern;
1473         }
1474     }
1475 
1476 #define APPEND(list, str)                               \
1477         do {                                            \
1478                 PyObject *o = PyString_FromString(str); \
1479                 if (o != NULL)                          \
1480                         PyList_Append(list, o);         \
1481                 Py_XDECREF(o);                          \
1482         } while (0)
1483 
1484     if (strcmp(name, "__members__") == 0) {
1485         int i;
1486         PyObject *rc = PyList_New(0);
1487         if (!rc)
1488                 return NULL;
1489         for (i = 0; handler_info[i].name != NULL; i++) {
1490             PyObject *o = get_handler_name(&handler_info[i]);
1491             if (o != NULL)
1492                 PyList_Append(rc, o);
1493             Py_XDECREF(o);
1494         }
1495         APPEND(rc, "ErrorCode");
1496         APPEND(rc, "ErrorLineNumber");
1497         APPEND(rc, "ErrorColumnNumber");
1498         APPEND(rc, "ErrorByteIndex");
1499         APPEND(rc, "CurrentLineNumber");
1500         APPEND(rc, "CurrentColumnNumber");
1501         APPEND(rc, "CurrentByteIndex");
1502         APPEND(rc, "buffer_size");
1503         APPEND(rc, "buffer_text");
1504         APPEND(rc, "buffer_used");
1505         APPEND(rc, "namespace_prefixes");
1506         APPEND(rc, "ordered_attributes");
1507         APPEND(rc, "returns_unicode");
1508         APPEND(rc, "specified_attributes");
1509         APPEND(rc, "intern");
1510 
1511 #undef APPEND
1512         return rc;
1513     }
1514     return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1515 }
1516 
1517 static int
sethandler(xmlparseobject * self,const char * name,PyObject * v)1518 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1519 {
1520     int handlernum = handlername2int(name);
1521     if (handlernum >= 0) {
1522         xmlhandler c_handler = NULL;
1523         PyObject *temp = self->handlers[handlernum];
1524 
1525         if (v == Py_None) {
1526             /* If this is the character data handler, and a character
1527                data handler is already active, we need to be more
1528                careful.  What we can safely do is replace the existing
1529                character data handler callback function with a no-op
1530                function that will refuse to call Python.  The downside
1531                is that this doesn't completely remove the character
1532                data handler from the C layer if there's any callback
1533                active, so Expat does a little more work than it
1534                otherwise would, but that's really an odd case.  A more
1535                elaborate system of handlers and state could remove the
1536                C handler more effectively. */
1537             if (handlernum == CharacterData && self->in_callback)
1538                 c_handler = noop_character_data_handler;
1539             v = NULL;
1540         }
1541         else if (v != NULL) {
1542             Py_INCREF(v);
1543             c_handler = handler_info[handlernum].handler;
1544         }
1545         self->handlers[handlernum] = v;
1546         Py_XDECREF(temp);
1547         handler_info[handlernum].setter(self->itself, c_handler);
1548         return 1;
1549     }
1550     return 0;
1551 }
1552 
1553 static int
xmlparse_setattr(xmlparseobject * self,char * name,PyObject * v)1554 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1555 {
1556     /* Set attribute 'name' to value 'v'. v==NULL means delete */
1557     if (v == NULL) {
1558         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1559         return -1;
1560     }
1561     if (strcmp(name, "buffer_text") == 0) {
1562         int b = PyObject_IsTrue(v);
1563         if (b < 0)
1564             return -1;
1565         if (b) {
1566             if (self->buffer == NULL) {
1567                 self->buffer = malloc(self->buffer_size);
1568                 if (self->buffer == NULL) {
1569                     PyErr_NoMemory();
1570                     return -1;
1571                 }
1572                 self->buffer_used = 0;
1573             }
1574         }
1575         else if (self->buffer != NULL) {
1576             if (flush_character_buffer(self) < 0)
1577                 return -1;
1578             free(self->buffer);
1579             self->buffer = NULL;
1580         }
1581         return 0;
1582     }
1583     if (strcmp(name, "namespace_prefixes") == 0) {
1584         int b = PyObject_IsTrue(v);
1585         if (b < 0)
1586             return -1;
1587         self->ns_prefixes = b;
1588         XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1589         return 0;
1590     }
1591     if (strcmp(name, "ordered_attributes") == 0) {
1592         int b = PyObject_IsTrue(v);
1593         if (b < 0)
1594             return -1;
1595         self->ordered_attributes = b;
1596         return 0;
1597     }
1598     if (strcmp(name, "returns_unicode") == 0) {
1599         int b = PyObject_IsTrue(v);
1600         if (b < 0)
1601             return -1;
1602 #ifndef Py_USING_UNICODE
1603         if (b) {
1604             PyErr_SetString(PyExc_ValueError,
1605                             "Unicode support not available");
1606             return -1;
1607         }
1608 #endif
1609         self->returns_unicode = b;
1610         return 0;
1611     }
1612     if (strcmp(name, "specified_attributes") == 0) {
1613         int b = PyObject_IsTrue(v);
1614         if (b < 0)
1615             return -1;
1616         self->specified_attributes = b;
1617         return 0;
1618     }
1619 
1620     if (strcmp(name, "buffer_size") == 0) {
1621       long new_buffer_size;
1622       if (!PyInt_Check(v)) {
1623         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1624         return -1;
1625       }
1626 
1627       new_buffer_size=PyInt_AS_LONG(v);
1628       /* trivial case -- no change */
1629       if (new_buffer_size == self->buffer_size) {
1630         return 0;
1631       }
1632 
1633       if (new_buffer_size <= 0) {
1634         PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1635         return -1;
1636       }
1637 
1638       /* check maximum */
1639       if (new_buffer_size > INT_MAX) {
1640         char errmsg[100];
1641         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1642         PyErr_SetString(PyExc_ValueError, errmsg);
1643         return -1;
1644       }
1645 
1646       if (self->buffer != NULL) {
1647         /* there is already a buffer */
1648         if (self->buffer_used != 0) {
1649           flush_character_buffer(self);
1650         }
1651         /* free existing buffer */
1652         free(self->buffer);
1653       }
1654       self->buffer = malloc(new_buffer_size);
1655       if (self->buffer == NULL) {
1656         PyErr_NoMemory();
1657         return -1;
1658       }
1659       self->buffer_size = new_buffer_size;
1660       return 0;
1661     }
1662 
1663     if (strcmp(name, "CharacterDataHandler") == 0) {
1664         /* If we're changing the character data handler, flush all
1665          * cached data with the old handler.  Not sure there's a
1666          * "right" thing to do, though, but this probably won't
1667          * happen.
1668          */
1669         if (flush_character_buffer(self) < 0)
1670             return -1;
1671     }
1672     if (sethandler(self, name, v)) {
1673         return 0;
1674     }
1675     PyErr_SetString(PyExc_AttributeError, name);
1676     return -1;
1677 }
1678 
1679 #ifdef WITH_CYCLE_GC
1680 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1681 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1682 {
1683     int i;
1684     for (i = 0; handler_info[i].name != NULL; i++)
1685         Py_VISIT(op->handlers[i]);
1686     return 0;
1687 }
1688 
1689 static int
xmlparse_clear(xmlparseobject * op)1690 xmlparse_clear(xmlparseobject *op)
1691 {
1692     clear_handlers(op, 0);
1693     Py_CLEAR(op->intern);
1694     return 0;
1695 }
1696 #endif
1697 
1698 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1699 
1700 static PyTypeObject Xmlparsetype = {
1701         PyVarObject_HEAD_INIT(NULL, 0)
1702         "pyexpat.xmlparser",            /*tp_name*/
1703         sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1704         0,                              /*tp_itemsize*/
1705         /* methods */
1706         (destructor)xmlparse_dealloc,   /*tp_dealloc*/
1707         (printfunc)0,           /*tp_print*/
1708         (getattrfunc)xmlparse_getattr,  /*tp_getattr*/
1709         (setattrfunc)xmlparse_setattr,  /*tp_setattr*/
1710         (cmpfunc)0,             /*tp_compare*/
1711         (reprfunc)0,            /*tp_repr*/
1712         0,                      /*tp_as_number*/
1713         0,              /*tp_as_sequence*/
1714         0,              /*tp_as_mapping*/
1715         (hashfunc)0,            /*tp_hash*/
1716         (ternaryfunc)0,         /*tp_call*/
1717         (reprfunc)0,            /*tp_str*/
1718         0,              /* tp_getattro */
1719         0,              /* tp_setattro */
1720         0,              /* tp_as_buffer */
1721 #ifdef Py_TPFLAGS_HAVE_GC
1722         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1723 #else
1724         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1725 #endif
1726         Xmlparsetype__doc__, /* tp_doc - Documentation string */
1727 #ifdef WITH_CYCLE_GC
1728         (traverseproc)xmlparse_traverse,        /* tp_traverse */
1729         (inquiry)xmlparse_clear         /* tp_clear */
1730 #else
1731         0, 0
1732 #endif
1733 };
1734 
1735 /* End of code for xmlparser objects */
1736 /* -------------------------------------------------------- */
1737 
1738 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1739 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1740 Return a new XML parser object.");
1741 
1742 static PyObject *
pyexpat_ParserCreate(PyObject * notused,PyObject * args,PyObject * kw)1743 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1744 {
1745     char *encoding = NULL;
1746     char *namespace_separator = NULL;
1747     PyObject *intern = NULL;
1748     PyObject *result;
1749     int intern_decref = 0;
1750     static char *kwlist[] = {"encoding", "namespace_separator",
1751                                    "intern", NULL};
1752 
1753     if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1754                                      &encoding, &namespace_separator, &intern))
1755         return NULL;
1756     if (namespace_separator != NULL
1757         && strlen(namespace_separator) > 1) {
1758         PyErr_SetString(PyExc_ValueError,
1759                         "namespace_separator must be at most one"
1760                         " character, omitted, or None");
1761         return NULL;
1762     }
1763     /* Explicitly passing None means no interning is desired.
1764        Not passing anything means that a new dictionary is used. */
1765     if (intern == Py_None)
1766         intern = NULL;
1767     else if (intern == NULL) {
1768         intern = PyDict_New();
1769         if (!intern)
1770             return NULL;
1771         intern_decref = 1;
1772     }
1773     else if (!PyDict_Check(intern)) {
1774         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1775         return NULL;
1776     }
1777 
1778     result = newxmlparseobject(encoding, namespace_separator, intern);
1779     if (intern_decref) {
1780         Py_DECREF(intern);
1781     }
1782     return result;
1783 }
1784 
1785 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1786 "ErrorString(errno) -> string\n\
1787 Returns string error for given number.");
1788 
1789 static PyObject *
pyexpat_ErrorString(PyObject * self,PyObject * args)1790 pyexpat_ErrorString(PyObject *self, PyObject *args)
1791 {
1792     long code = 0;
1793 
1794     if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1795         return NULL;
1796     return Py_BuildValue("z", XML_ErrorString((int)code));
1797 }
1798 
1799 /* List of methods defined in the module */
1800 
1801 static struct PyMethodDef pyexpat_methods[] = {
1802     {"ParserCreate",    (PyCFunction)pyexpat_ParserCreate,
1803      METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1804     {"ErrorString",     (PyCFunction)pyexpat_ErrorString,
1805      METH_VARARGS,      pyexpat_ErrorString__doc__},
1806 
1807     {NULL,       (PyCFunction)NULL, 0, NULL}            /* sentinel */
1808 };
1809 
1810 /* Module docstring */
1811 
1812 PyDoc_STRVAR(pyexpat_module_documentation,
1813 "Python wrapper for Expat parser.");
1814 
1815 /* Initialization function for the module */
1816 
1817 #ifndef MODULE_NAME
1818 #define MODULE_NAME "pyexpat"
1819 #endif
1820 
1821 #ifndef MODULE_INITFUNC
1822 #define MODULE_INITFUNC initpyexpat
1823 #endif
1824 
1825 #ifndef PyMODINIT_FUNC
1826 #   ifdef MS_WINDOWS
1827 #       define PyMODINIT_FUNC __declspec(dllexport) void
1828 #   else
1829 #       define PyMODINIT_FUNC void
1830 #   endif
1831 #endif
1832 
1833 PyMODINIT_FUNC MODULE_INITFUNC(void);  /* avoid compiler warnings */
1834 
1835 PyMODINIT_FUNC
MODULE_INITFUNC(void)1836 MODULE_INITFUNC(void)
1837 {
1838     PyObject *m, *d;
1839     PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1840     PyObject *errors_module;
1841     PyObject *modelmod_name;
1842     PyObject *model_module;
1843     PyObject *sys_modules;
1844     PyObject *version;
1845     static struct PyExpat_CAPI capi;
1846     PyObject* capi_object;
1847 
1848     if (errmod_name == NULL)
1849         return;
1850     modelmod_name = PyString_FromString(MODULE_NAME ".model");
1851     if (modelmod_name == NULL)
1852         return;
1853 
1854     Py_TYPE(&Xmlparsetype) = &PyType_Type;
1855 
1856     /* Create the module and add the functions */
1857     m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1858                        pyexpat_module_documentation);
1859     if (m == NULL)
1860         return;
1861 
1862     /* Add some symbolic constants to the module */
1863     if (ErrorObject == NULL) {
1864         ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1865                                          NULL, NULL);
1866         if (ErrorObject == NULL)
1867             return;
1868     }
1869     Py_INCREF(ErrorObject);
1870     PyModule_AddObject(m, "error", ErrorObject);
1871     Py_INCREF(ErrorObject);
1872     PyModule_AddObject(m, "ExpatError", ErrorObject);
1873     Py_INCREF(&Xmlparsetype);
1874     PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1875 
1876     version = PyString_FromString(PY_VERSION);
1877     if (!version)
1878         return;
1879     PyModule_AddObject(m, "__version__", version);
1880     PyModule_AddStringConstant(m, "EXPAT_VERSION",
1881                                (char *) XML_ExpatVersion());
1882     {
1883         XML_Expat_Version info = XML_ExpatVersionInfo();
1884         PyModule_AddObject(m, "version_info",
1885                            Py_BuildValue("(iii)", info.major,
1886                                          info.minor, info.micro));
1887     }
1888 #ifdef Py_USING_UNICODE
1889     init_template_buffer();
1890 #endif
1891     /* XXX When Expat supports some way of figuring out how it was
1892        compiled, this should check and set native_encoding
1893        appropriately.
1894     */
1895     PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1896 
1897     sys_modules = PySys_GetObject("modules");
1898     d = PyModule_GetDict(m);
1899     errors_module = PyDict_GetItem(d, errmod_name);
1900     if (errors_module == NULL) {
1901         errors_module = PyModule_New(MODULE_NAME ".errors");
1902         if (errors_module != NULL) {
1903             PyDict_SetItem(sys_modules, errmod_name, errors_module);
1904             /* gives away the reference to errors_module */
1905             PyModule_AddObject(m, "errors", errors_module);
1906         }
1907     }
1908     Py_DECREF(errmod_name);
1909     model_module = PyDict_GetItem(d, modelmod_name);
1910     if (model_module == NULL) {
1911         model_module = PyModule_New(MODULE_NAME ".model");
1912         if (model_module != NULL) {
1913             PyDict_SetItem(sys_modules, modelmod_name, model_module);
1914             /* gives away the reference to model_module */
1915             PyModule_AddObject(m, "model", model_module);
1916         }
1917     }
1918     Py_DECREF(modelmod_name);
1919     if (errors_module == NULL || model_module == NULL)
1920         /* Don't core dump later! */
1921         return;
1922 
1923 #if XML_COMBINED_VERSION > 19505
1924     {
1925         const XML_Feature *features = XML_GetFeatureList();
1926         PyObject *list = PyList_New(0);
1927         if (list == NULL)
1928             /* just ignore it */
1929             PyErr_Clear();
1930         else {
1931             int i = 0;
1932             for (; features[i].feature != XML_FEATURE_END; ++i) {
1933                 int ok;
1934                 PyObject *item = Py_BuildValue("si", features[i].name,
1935                                                features[i].value);
1936                 if (item == NULL) {
1937                     Py_DECREF(list);
1938                     list = NULL;
1939                     break;
1940                 }
1941                 ok = PyList_Append(list, item);
1942                 Py_DECREF(item);
1943                 if (ok < 0) {
1944                     PyErr_Clear();
1945                     break;
1946                 }
1947             }
1948             if (list != NULL)
1949                 PyModule_AddObject(m, "features", list);
1950         }
1951     }
1952 #endif
1953 
1954 #define MYCONST(name) \
1955     PyModule_AddStringConstant(errors_module, #name, \
1956                                (char*)XML_ErrorString(name))
1957 
1958     MYCONST(XML_ERROR_NO_MEMORY);
1959     MYCONST(XML_ERROR_SYNTAX);
1960     MYCONST(XML_ERROR_NO_ELEMENTS);
1961     MYCONST(XML_ERROR_INVALID_TOKEN);
1962     MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1963     MYCONST(XML_ERROR_PARTIAL_CHAR);
1964     MYCONST(XML_ERROR_TAG_MISMATCH);
1965     MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1966     MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1967     MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1968     MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1969     MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1970     MYCONST(XML_ERROR_ASYNC_ENTITY);
1971     MYCONST(XML_ERROR_BAD_CHAR_REF);
1972     MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1973     MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1974     MYCONST(XML_ERROR_MISPLACED_XML_PI);
1975     MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1976     MYCONST(XML_ERROR_INCORRECT_ENCODING);
1977     MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1978     MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1979     MYCONST(XML_ERROR_NOT_STANDALONE);
1980     MYCONST(XML_ERROR_UNEXPECTED_STATE);
1981     MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1982     MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1983     MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1984     /* Added in Expat 1.95.7. */
1985     MYCONST(XML_ERROR_UNBOUND_PREFIX);
1986     /* Added in Expat 1.95.8. */
1987     MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1988     MYCONST(XML_ERROR_INCOMPLETE_PE);
1989     MYCONST(XML_ERROR_XML_DECL);
1990     MYCONST(XML_ERROR_TEXT_DECL);
1991     MYCONST(XML_ERROR_PUBLICID);
1992     MYCONST(XML_ERROR_SUSPENDED);
1993     MYCONST(XML_ERROR_NOT_SUSPENDED);
1994     MYCONST(XML_ERROR_ABORTED);
1995     MYCONST(XML_ERROR_FINISHED);
1996     MYCONST(XML_ERROR_SUSPEND_PE);
1997 
1998     PyModule_AddStringConstant(errors_module, "__doc__",
1999                                "Constants used to describe error conditions.");
2000 
2001 #undef MYCONST
2002 
2003 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2004     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2005     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2006     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2007 #undef MYCONST
2008 
2009 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2010     PyModule_AddStringConstant(model_module, "__doc__",
2011                      "Constants used to interpret content model information.");
2012 
2013     MYCONST(XML_CTYPE_EMPTY);
2014     MYCONST(XML_CTYPE_ANY);
2015     MYCONST(XML_CTYPE_MIXED);
2016     MYCONST(XML_CTYPE_NAME);
2017     MYCONST(XML_CTYPE_CHOICE);
2018     MYCONST(XML_CTYPE_SEQ);
2019 
2020     MYCONST(XML_CQUANT_NONE);
2021     MYCONST(XML_CQUANT_OPT);
2022     MYCONST(XML_CQUANT_REP);
2023     MYCONST(XML_CQUANT_PLUS);
2024 #undef MYCONST
2025 
2026     /* initialize pyexpat dispatch table */
2027     capi.size = sizeof(capi);
2028     capi.magic = PyExpat_CAPI_MAGIC;
2029     capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2030     capi.MINOR_VERSION = XML_MINOR_VERSION;
2031     capi.MICRO_VERSION = XML_MICRO_VERSION;
2032     capi.ErrorString = XML_ErrorString;
2033     capi.GetErrorCode = XML_GetErrorCode;
2034     capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2035     capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2036     capi.Parse = XML_Parse;
2037     capi.ParserCreate_MM = XML_ParserCreate_MM;
2038     capi.ParserFree = XML_ParserFree;
2039     capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2040     capi.SetCommentHandler = XML_SetCommentHandler;
2041     capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2042     capi.SetElementHandler = XML_SetElementHandler;
2043     capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2044     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2045     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2046     capi.SetUserData = XML_SetUserData;
2047 
2048     /* export using capsule */
2049     capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2050     if (capi_object)
2051         PyModule_AddObject(m, "expat_CAPI", capi_object);
2052 }
2053 
2054 static void
clear_handlers(xmlparseobject * self,int initial)2055 clear_handlers(xmlparseobject *self, int initial)
2056 {
2057     int i = 0;
2058     PyObject *temp;
2059 
2060     for (; handler_info[i].name != NULL; i++) {
2061         if (initial)
2062             self->handlers[i] = NULL;
2063         else {
2064             temp = self->handlers[i];
2065             self->handlers[i] = NULL;
2066             Py_XDECREF(temp);
2067             handler_info[i].setter(self->itself, NULL);
2068         }
2069     }
2070 }
2071 
2072 static struct HandlerInfo handler_info[] = {
2073     {"StartElementHandler",
2074      (xmlhandlersetter)XML_SetStartElementHandler,
2075      (xmlhandler)my_StartElementHandler},
2076     {"EndElementHandler",
2077      (xmlhandlersetter)XML_SetEndElementHandler,
2078      (xmlhandler)my_EndElementHandler},
2079     {"ProcessingInstructionHandler",
2080      (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2081      (xmlhandler)my_ProcessingInstructionHandler},
2082     {"CharacterDataHandler",
2083      (xmlhandlersetter)XML_SetCharacterDataHandler,
2084      (xmlhandler)my_CharacterDataHandler},
2085     {"UnparsedEntityDeclHandler",
2086      (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2087      (xmlhandler)my_UnparsedEntityDeclHandler},
2088     {"NotationDeclHandler",
2089      (xmlhandlersetter)XML_SetNotationDeclHandler,
2090      (xmlhandler)my_NotationDeclHandler},
2091     {"StartNamespaceDeclHandler",
2092      (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2093      (xmlhandler)my_StartNamespaceDeclHandler},
2094     {"EndNamespaceDeclHandler",
2095      (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2096      (xmlhandler)my_EndNamespaceDeclHandler},
2097     {"CommentHandler",
2098      (xmlhandlersetter)XML_SetCommentHandler,
2099      (xmlhandler)my_CommentHandler},
2100     {"StartCdataSectionHandler",
2101      (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2102      (xmlhandler)my_StartCdataSectionHandler},
2103     {"EndCdataSectionHandler",
2104      (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2105      (xmlhandler)my_EndCdataSectionHandler},
2106     {"DefaultHandler",
2107      (xmlhandlersetter)XML_SetDefaultHandler,
2108      (xmlhandler)my_DefaultHandler},
2109     {"DefaultHandlerExpand",
2110      (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2111      (xmlhandler)my_DefaultHandlerExpandHandler},
2112     {"NotStandaloneHandler",
2113      (xmlhandlersetter)XML_SetNotStandaloneHandler,
2114      (xmlhandler)my_NotStandaloneHandler},
2115     {"ExternalEntityRefHandler",
2116      (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2117      (xmlhandler)my_ExternalEntityRefHandler},
2118     {"StartDoctypeDeclHandler",
2119      (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2120      (xmlhandler)my_StartDoctypeDeclHandler},
2121     {"EndDoctypeDeclHandler",
2122      (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2123      (xmlhandler)my_EndDoctypeDeclHandler},
2124     {"EntityDeclHandler",
2125      (xmlhandlersetter)XML_SetEntityDeclHandler,
2126      (xmlhandler)my_EntityDeclHandler},
2127     {"XmlDeclHandler",
2128      (xmlhandlersetter)XML_SetXmlDeclHandler,
2129      (xmlhandler)my_XmlDeclHandler},
2130     {"ElementDeclHandler",
2131      (xmlhandlersetter)XML_SetElementDeclHandler,
2132      (xmlhandler)my_ElementDeclHandler},
2133     {"AttlistDeclHandler",
2134      (xmlhandlersetter)XML_SetAttlistDeclHandler,
2135      (xmlhandler)my_AttlistDeclHandler},
2136 #if XML_COMBINED_VERSION >= 19504
2137     {"SkippedEntityHandler",
2138      (xmlhandlersetter)XML_SetSkippedEntityHandler,
2139      (xmlhandler)my_SkippedEntityHandler},
2140 #endif
2141 
2142     {NULL, NULL, NULL} /* sentinel */
2143 };
2144