• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include <ctype.h>
3 
4 #include "frameobject.h"
5 #include "expat.h"
6 
7 #include "pyexpat.h"
8 
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10 
11 #ifndef PyDoc_STRVAR
12 
13 /*
14  * fdrake says:
15  * Don't change the PyDoc_STR macro definition to (str), because
16  * '''the parentheses cause compile failures
17  * ("non-constant static initializer" or something like that)
18  * on some platforms (Irix?)'''
19  */
20 #define PyDoc_STR(str)         str
21 #define PyDoc_VAR(name)        static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
24 
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and  2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
31 
32 enum HandlerTypes {
33     StartElement,
34     EndElement,
35     ProcessingInstruction,
36     CharacterData,
37     UnparsedEntityDecl,
38     NotationDecl,
39     StartNamespaceDecl,
40     EndNamespaceDecl,
41     Comment,
42     StartCdataSection,
43     EndCdataSection,
44     Default,
45     DefaultHandlerExpand,
46     NotStandalone,
47     ExternalEntityRef,
48     StartDoctypeDecl,
49     EndDoctypeDecl,
50     EntityDecl,
51     XmlDecl,
52     ElementDecl,
53     AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55     SkippedEntity,
56 #endif
57     _DummyDecl
58 };
59 
60 static PyObject *ErrorObject;
61 
62 /* ----------------------------------------------------- */
63 
64 /* Declarations for objects of type xmlparser */
65 
66 typedef struct {
67     PyObject_HEAD
68 
69     XML_Parser itself;
70     int returns_unicode;        /* True if Unicode strings are returned;
71                                    if false, UTF-8 strings are returned */
72     int ordered_attributes;     /* Return attributes as a list. */
73     int specified_attributes;   /* Report only specified attributes. */
74     int in_callback;            /* Is a callback active? */
75     int ns_prefixes;            /* Namespace-triplets mode? */
76     XML_Char *buffer;           /* Buffer used when accumulating characters */
77                                 /* NULL if not enabled */
78     int buffer_size;            /* Size of buffer, in XML_Char units */
79     int buffer_used;            /* Buffer units in use */
80     PyObject *intern;           /* Dictionary to intern strings */
81     PyObject **handlers;
82 } xmlparseobject;
83 
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
85 
86 static PyTypeObject Xmlparsetype;
87 
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
90 
91 struct HandlerInfo {
92     const char *name;
93     xmlhandlersetter setter;
94     xmlhandler handler;
95     PyCodeObject *tb_code;
96     PyObject *nameobj;
97 };
98 
99 static struct HandlerInfo handler_info[64];
100 
101 /* Set an integer attribute on the error object; return true on success,
102  * false on an exception.
103  */
104 static int
set_error_attr(PyObject * err,char * name,int value)105 set_error_attr(PyObject *err, char *name, int value)
106 {
107     PyObject *v = PyInt_FromLong(value);
108 
109     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110         Py_XDECREF(v);
111         return 0;
112     }
113     Py_DECREF(v);
114     return 1;
115 }
116 
117 /* Build and set an Expat exception, including positioning
118  * information.  Always returns NULL.
119  */
120 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)121 set_error(xmlparseobject *self, enum XML_Error code)
122 {
123     PyObject *err;
124     char buffer[256];
125     XML_Parser parser = self->itself;
126     int lineno = XML_GetErrorLineNumber(parser);
127     int column = XML_GetErrorColumnNumber(parser);
128 
129     /* There is no risk of overflowing this buffer, since
130        even for 64-bit integers, there is sufficient space. */
131     sprintf(buffer, "%.200s: line %i, column %i",
132             XML_ErrorString(code), lineno, column);
133     err = PyObject_CallFunction(ErrorObject, "s", buffer);
134     if (  err != NULL
135           && set_error_attr(err, "code", code)
136           && set_error_attr(err, "offset", column)
137           && set_error_attr(err, "lineno", lineno)) {
138         PyErr_SetObject(ErrorObject, err);
139     }
140     Py_XDECREF(err);
141     return NULL;
142 }
143 
144 static int
have_handler(xmlparseobject * self,int type)145 have_handler(xmlparseobject *self, int type)
146 {
147     PyObject *handler = self->handlers[type];
148     return handler != NULL;
149 }
150 
151 static PyObject *
get_handler_name(struct HandlerInfo * hinfo)152 get_handler_name(struct HandlerInfo *hinfo)
153 {
154     PyObject *name = hinfo->nameobj;
155     if (name == NULL) {
156         name = PyString_FromString(hinfo->name);
157         hinfo->nameobj = name;
158     }
159     Py_XINCREF(name);
160     return name;
161 }
162 
163 
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166    Returns None if str is a null pointer. */
167 
168 static PyObject *
conv_string_to_unicode(const XML_Char * str)169 conv_string_to_unicode(const XML_Char *str)
170 {
171     /* XXX currently this code assumes that XML_Char is 8-bit,
172        and hence in UTF-8.  */
173     /* UTF-8 from Expat, Unicode desired */
174     if (str == NULL) {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
179 }
180 
181 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)182 conv_string_len_to_unicode(const XML_Char *str, int len)
183 {
184     /* XXX currently this code assumes that XML_Char is 8-bit,
185        and hence in UTF-8.  */
186     /* UTF-8 from Expat, Unicode desired */
187     if (str == NULL) {
188         Py_INCREF(Py_None);
189         return Py_None;
190     }
191     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
192 }
193 #endif
194 
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196    Returns None if str is a null pointer. */
197 
198 static PyObject *
conv_string_to_utf8(const XML_Char * str)199 conv_string_to_utf8(const XML_Char *str)
200 {
201     /* XXX currently this code assumes that XML_Char is 8-bit,
202        and hence in UTF-8.  */
203     /* UTF-8 from Expat, UTF-8 desired */
204     if (str == NULL) {
205         Py_INCREF(Py_None);
206         return Py_None;
207     }
208     return PyString_FromString(str);
209 }
210 
211 static PyObject *
conv_string_len_to_utf8(const XML_Char * str,int len)212 conv_string_len_to_utf8(const XML_Char *str, int len)
213 {
214     /* XXX currently this code assumes that XML_Char is 8-bit,
215        and hence in UTF-8.  */
216     /* UTF-8 from Expat, UTF-8 desired */
217     if (str == NULL) {
218         Py_INCREF(Py_None);
219         return Py_None;
220     }
221     return PyString_FromStringAndSize((const char *)str, len);
222 }
223 
224 /* Callback routines */
225 
226 static void clear_handlers(xmlparseobject *self, int initial);
227 
228 /* This handler is used when an error has been detected, in the hope
229    that actual parsing can be terminated early.  This will only help
230    if an external entity reference is encountered. */
231 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)232 error_external_entity_ref_handler(XML_Parser parser,
233                                   const XML_Char *context,
234                                   const XML_Char *base,
235                                   const XML_Char *systemId,
236                                   const XML_Char *publicId)
237 {
238     return 0;
239 }
240 
241 /* Dummy character data handler used when an error (exception) has
242    been detected, and the actual parsing can be terminated early.
243    This is needed since character data handler can't be safely removed
244    from within the character data handler, but can be replaced.  It is
245    used only from the character data handler trampoline, and must be
246    used right after `flag_error()` is called. */
247 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)248 noop_character_data_handler(void *userData, const XML_Char *data, int len)
249 {
250     /* Do nothing. */
251 }
252 
253 static void
flag_error(xmlparseobject * self)254 flag_error(xmlparseobject *self)
255 {
256     clear_handlers(self, 0);
257     XML_SetExternalEntityRefHandler(self->itself,
258                                     error_external_entity_ref_handler);
259 }
260 
261 static PyCodeObject*
getcode(enum HandlerTypes slot,char * func_name,int lineno)262 getcode(enum HandlerTypes slot, char* func_name, int lineno)
263 {
264     if (handler_info[slot].tb_code == NULL) {
265         handler_info[slot].tb_code =
266             PyCode_NewEmpty(__FILE__, func_name, lineno);
267     }
268     return handler_info[slot].tb_code;
269 }
270 
271 #ifdef FIX_TRACE
272 static int
trace_frame(PyThreadState * tstate,PyFrameObject * f,int code,PyObject * val)273 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274 {
275     int result = 0;
276     if (!tstate->use_tracing || tstate->tracing)
277         return 0;
278     if (tstate->c_profilefunc != NULL) {
279         tstate->tracing++;
280         result = tstate->c_profilefunc(tstate->c_profileobj,
281                                        f, code , val);
282         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283                                || (tstate->c_profilefunc != NULL));
284         tstate->tracing--;
285         if (result)
286             return result;
287     }
288     if (tstate->c_tracefunc != NULL) {
289         tstate->tracing++;
290         result = tstate->c_tracefunc(tstate->c_traceobj,
291                                      f, code , val);
292         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293                                || (tstate->c_profilefunc != NULL));
294         tstate->tracing--;
295     }
296     return result;
297 }
298 
299 static int
trace_frame_exc(PyThreadState * tstate,PyFrameObject * f)300 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301 {
302     PyObject *type, *value, *traceback, *arg;
303     int err;
304 
305     if (tstate->c_tracefunc == NULL)
306         return 0;
307 
308     PyErr_Fetch(&type, &value, &traceback);
309     if (value == NULL) {
310         value = Py_None;
311         Py_INCREF(value);
312     }
313 #if PY_VERSION_HEX < 0x02040000
314     arg = Py_BuildValue("(OOO)", type, value, traceback);
315 #else
316     arg = PyTuple_Pack(3, type, value, traceback);
317 #endif
318     if (arg == NULL) {
319         PyErr_Restore(type, value, traceback);
320         return 0;
321     }
322     err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323     Py_DECREF(arg);
324     if (err == 0)
325         PyErr_Restore(type, value, traceback);
326     else {
327         Py_XDECREF(type);
328         Py_XDECREF(value);
329         Py_XDECREF(traceback);
330     }
331     return err;
332 }
333 #endif
334 
335 static PyObject*
call_with_frame(PyCodeObject * c,PyObject * func,PyObject * args,xmlparseobject * self)336 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337                 xmlparseobject *self)
338 {
339     PyThreadState *tstate = PyThreadState_GET();
340     PyFrameObject *f;
341     PyObject *res;
342 
343     if (c == NULL)
344         return NULL;
345 
346     f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
347     if (f == NULL)
348         return NULL;
349     tstate->frame = f;
350 #ifdef FIX_TRACE
351     if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
352         return NULL;
353     }
354 #endif
355     res = PyEval_CallObject(func, args);
356     if (res == NULL) {
357         if (tstate->curexc_traceback == NULL)
358             PyTraceBack_Here(f);
359         XML_StopParser(self->itself, XML_FALSE);
360 #ifdef FIX_TRACE
361         if (trace_frame_exc(tstate, f) < 0) {
362             return NULL;
363         }
364     }
365     else {
366         if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
367             Py_XDECREF(res);
368             res = NULL;
369         }
370     }
371 #else
372     }
373 #endif
374     tstate->frame = f->f_back;
375     Py_DECREF(f);
376     return res;
377 }
378 
379 #ifndef Py_USING_UNICODE
380 #define STRING_CONV_FUNC conv_string_to_utf8
381 #else
382 /* Python 2.0 and later versions, when built with Unicode support */
383 #define STRING_CONV_FUNC (self->returns_unicode \
384                           ? conv_string_to_unicode : conv_string_to_utf8)
385 #endif
386 
387 static PyObject*
string_intern(xmlparseobject * self,const char * str)388 string_intern(xmlparseobject *self, const char* str)
389 {
390     PyObject *result = STRING_CONV_FUNC(str);
391     PyObject *value;
392     /* result can be NULL if the unicode conversion failed. */
393     if (!result)
394         return result;
395     if (!self->intern)
396         return result;
397     value = PyDict_GetItem(self->intern, result);
398     if (!value) {
399         if (PyDict_SetItem(self->intern, result, result) == 0)
400             return result;
401         else
402             return NULL;
403     }
404     Py_INCREF(value);
405     Py_DECREF(result);
406     return value;
407 }
408 
409 /* Return 0 on success, -1 on exception.
410  * flag_error() will be called before return if needed.
411  */
412 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)413 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414 {
415     PyObject *args;
416     PyObject *temp;
417 
418     if (!have_handler(self, CharacterData))
419         return -1;
420 
421     args = PyTuple_New(1);
422     if (args == NULL)
423         return -1;
424 #ifdef Py_USING_UNICODE
425     temp = (self->returns_unicode
426             ? conv_string_len_to_unicode(buffer, len)
427             : conv_string_len_to_utf8(buffer, len));
428 #else
429     temp = conv_string_len_to_utf8(buffer, len);
430 #endif
431     if (temp == NULL) {
432         Py_DECREF(args);
433         flag_error(self);
434         XML_SetCharacterDataHandler(self->itself,
435                                     noop_character_data_handler);
436         return -1;
437     }
438     PyTuple_SET_ITEM(args, 0, temp);
439     /* temp is now a borrowed reference; consider it unused. */
440     self->in_callback = 1;
441     temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
442                            self->handlers[CharacterData], args, self);
443     /* temp is an owned reference again, or NULL */
444     self->in_callback = 0;
445     Py_DECREF(args);
446     if (temp == NULL) {
447         flag_error(self);
448         XML_SetCharacterDataHandler(self->itself,
449                                     noop_character_data_handler);
450         return -1;
451     }
452     Py_DECREF(temp);
453     return 0;
454 }
455 
456 static int
flush_character_buffer(xmlparseobject * self)457 flush_character_buffer(xmlparseobject *self)
458 {
459     int rc;
460     if (self->buffer == NULL || self->buffer_used == 0)
461         return 0;
462     rc = call_character_handler(self, self->buffer, self->buffer_used);
463     self->buffer_used = 0;
464     return rc;
465 }
466 
467 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)468 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
469 {
470     xmlparseobject *self = (xmlparseobject *) userData;
471     if (self->buffer == NULL)
472         call_character_handler(self, data, len);
473     else {
474         if ((self->buffer_used + len) > self->buffer_size) {
475             if (flush_character_buffer(self) < 0)
476                 return;
477             /* handler might have changed; drop the rest on the floor
478              * if there isn't a handler anymore
479              */
480             if (!have_handler(self, CharacterData))
481                 return;
482         }
483         if (len > self->buffer_size) {
484             call_character_handler(self, data, len);
485             self->buffer_used = 0;
486         }
487         else {
488             memcpy(self->buffer + self->buffer_used,
489                    data, len * sizeof(XML_Char));
490             self->buffer_used += len;
491         }
492     }
493 }
494 
495 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])496 my_StartElementHandler(void *userData,
497                        const XML_Char *name, const XML_Char *atts[])
498 {
499     xmlparseobject *self = (xmlparseobject *)userData;
500 
501     if (have_handler(self, StartElement)) {
502         PyObject *container, *rv, *args;
503         int i, max;
504 
505         if (flush_character_buffer(self) < 0)
506             return;
507         /* Set max to the number of slots filled in atts[]; max/2 is
508          * the number of attributes we need to process.
509          */
510         if (self->specified_attributes) {
511             max = XML_GetSpecifiedAttributeCount(self->itself);
512         }
513         else {
514             max = 0;
515             while (atts[max] != NULL)
516                 max += 2;
517         }
518         /* Build the container. */
519         if (self->ordered_attributes)
520             container = PyList_New(max);
521         else
522             container = PyDict_New();
523         if (container == NULL) {
524             flag_error(self);
525             return;
526         }
527         for (i = 0; i < max; i += 2) {
528             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
529             PyObject *v;
530             if (n == NULL) {
531                 flag_error(self);
532                 Py_DECREF(container);
533                 return;
534             }
535             v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
536             if (v == NULL) {
537                 flag_error(self);
538                 Py_DECREF(container);
539                 Py_DECREF(n);
540                 return;
541             }
542             if (self->ordered_attributes) {
543                 PyList_SET_ITEM(container, i, n);
544                 PyList_SET_ITEM(container, i+1, v);
545             }
546             else if (PyDict_SetItem(container, n, v)) {
547                 flag_error(self);
548                 Py_DECREF(n);
549                 Py_DECREF(v);
550                 return;
551             }
552             else {
553                 Py_DECREF(n);
554                 Py_DECREF(v);
555             }
556         }
557         args = string_intern(self, name);
558         if (args != NULL)
559             args = Py_BuildValue("(NN)", args, container);
560         if (args == NULL) {
561             Py_DECREF(container);
562             return;
563         }
564         /* Container is now a borrowed reference; ignore it. */
565         self->in_callback = 1;
566         rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
567                              self->handlers[StartElement], args, self);
568         self->in_callback = 0;
569         Py_DECREF(args);
570         if (rv == NULL) {
571             flag_error(self);
572             return;
573         }
574         Py_DECREF(rv);
575     }
576 }
577 
578 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
579                 RETURN, GETUSERDATA) \
580 static RC \
581 my_##NAME##Handler PARAMS {\
582     xmlparseobject *self = GETUSERDATA ; \
583     PyObject *args = NULL; \
584     PyObject *rv = NULL; \
585     INIT \
586 \
587     if (have_handler(self, NAME)) { \
588         if (flush_character_buffer(self) < 0) \
589             return RETURN; \
590         args = Py_BuildValue PARAM_FORMAT ;\
591         if (!args) { flag_error(self); return RETURN;} \
592         self->in_callback = 1; \
593         rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
594                              self->handlers[NAME], args, self); \
595         self->in_callback = 0; \
596         Py_DECREF(args); \
597         if (rv == NULL) { \
598             flag_error(self); \
599             return RETURN; \
600         } \
601         CONVERSION \
602         Py_DECREF(rv); \
603     } \
604     return RETURN; \
605 }
606 
607 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
608         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
609         (xmlparseobject *)userData)
610 
611 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
612         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
613                         rc = PyInt_AsLong(rv);, rc, \
614         (xmlparseobject *)userData)
615 
616 VOID_HANDLER(EndElement,
617              (void *userData, const XML_Char *name),
618              ("(N)", string_intern(self, name)))
619 
620 VOID_HANDLER(ProcessingInstruction,
621              (void *userData,
622               const XML_Char *target,
623               const XML_Char *data),
624              ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
625 
626 VOID_HANDLER(UnparsedEntityDecl,
627              (void *userData,
628               const XML_Char *entityName,
629               const XML_Char *base,
630               const XML_Char *systemId,
631               const XML_Char *publicId,
632               const XML_Char *notationName),
633              ("(NNNNN)",
634               string_intern(self, entityName), string_intern(self, base),
635               string_intern(self, systemId), string_intern(self, publicId),
636               string_intern(self, notationName)))
637 
638 #ifndef Py_USING_UNICODE
639 VOID_HANDLER(EntityDecl,
640              (void *userData,
641               const XML_Char *entityName,
642               int is_parameter_entity,
643               const XML_Char *value,
644               int value_length,
645               const XML_Char *base,
646               const XML_Char *systemId,
647               const XML_Char *publicId,
648               const XML_Char *notationName),
649              ("NiNNNNN",
650               string_intern(self, entityName), is_parameter_entity,
651               conv_string_len_to_utf8(value, value_length),
652               string_intern(self, base), string_intern(self, systemId),
653               string_intern(self, publicId),
654               string_intern(self, notationName)))
655 #else
656 VOID_HANDLER(EntityDecl,
657              (void *userData,
658               const XML_Char *entityName,
659               int is_parameter_entity,
660               const XML_Char *value,
661               int value_length,
662               const XML_Char *base,
663               const XML_Char *systemId,
664               const XML_Char *publicId,
665               const XML_Char *notationName),
666              ("NiNNNNN",
667               string_intern(self, entityName), is_parameter_entity,
668               (self->returns_unicode
669                ? conv_string_len_to_unicode(value, value_length)
670                : conv_string_len_to_utf8(value, value_length)),
671               string_intern(self, base), string_intern(self, systemId),
672               string_intern(self, publicId),
673               string_intern(self, notationName)))
674 #endif
675 
676 VOID_HANDLER(XmlDecl,
677              (void *userData,
678               const XML_Char *version,
679               const XML_Char *encoding,
680               int standalone),
681              ("(O&O&i)",
682               STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
683               standalone))
684 
685 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))686 conv_content_model(XML_Content * const model,
687                    PyObject *(*conv_string)(const XML_Char *))
688 {
689     PyObject *result = NULL;
690     PyObject *children = PyTuple_New(model->numchildren);
691     int i;
692 
693     if (children != NULL) {
694         assert(model->numchildren < INT_MAX);
695         for (i = 0; i < (int)model->numchildren; ++i) {
696             PyObject *child = conv_content_model(&model->children[i],
697                                                  conv_string);
698             if (child == NULL) {
699                 Py_XDECREF(children);
700                 return NULL;
701             }
702             PyTuple_SET_ITEM(children, i, child);
703         }
704         result = Py_BuildValue("(iiO&N)",
705                                model->type, model->quant,
706                                conv_string,model->name, children);
707     }
708     return result;
709 }
710 
711 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)712 my_ElementDeclHandler(void *userData,
713                       const XML_Char *name,
714                       XML_Content *model)
715 {
716     xmlparseobject *self = (xmlparseobject *)userData;
717     PyObject *args = NULL;
718 
719     if (have_handler(self, ElementDecl)) {
720         PyObject *rv = NULL;
721         PyObject *modelobj, *nameobj;
722 
723         if (flush_character_buffer(self) < 0)
724             goto finally;
725 #ifdef Py_USING_UNICODE
726         modelobj = conv_content_model(model,
727                                       (self->returns_unicode
728                                        ? conv_string_to_unicode
729                                        : conv_string_to_utf8));
730 #else
731         modelobj = conv_content_model(model, conv_string_to_utf8);
732 #endif
733         if (modelobj == NULL) {
734             flag_error(self);
735             goto finally;
736         }
737         nameobj = string_intern(self, name);
738         if (nameobj == NULL) {
739             Py_DECREF(modelobj);
740             flag_error(self);
741             goto finally;
742         }
743         args = Py_BuildValue("NN", nameobj, modelobj);
744         if (args == NULL) {
745             Py_DECREF(modelobj);
746             flag_error(self);
747             goto finally;
748         }
749         self->in_callback = 1;
750         rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
751                              self->handlers[ElementDecl], args, self);
752         self->in_callback = 0;
753         if (rv == NULL) {
754             flag_error(self);
755             goto finally;
756         }
757         Py_DECREF(rv);
758     }
759  finally:
760     Py_XDECREF(args);
761     XML_FreeContentModel(self->itself, model);
762     return;
763 }
764 
765 VOID_HANDLER(AttlistDecl,
766              (void *userData,
767               const XML_Char *elname,
768               const XML_Char *attname,
769               const XML_Char *att_type,
770               const XML_Char *dflt,
771               int isrequired),
772              ("(NNO&O&i)",
773               string_intern(self, elname), string_intern(self, attname),
774               STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
775               isrequired))
776 
777 #if XML_COMBINED_VERSION >= 19504
778 VOID_HANDLER(SkippedEntity,
779              (void *userData,
780               const XML_Char *entityName,
781               int is_parameter_entity),
782              ("Ni",
783               string_intern(self, entityName), is_parameter_entity))
784 #endif
785 
786 VOID_HANDLER(NotationDecl,
787                 (void *userData,
788                         const XML_Char *notationName,
789                         const XML_Char *base,
790                         const XML_Char *systemId,
791                         const XML_Char *publicId),
792                 ("(NNNN)",
793                  string_intern(self, notationName), string_intern(self, base),
794                  string_intern(self, systemId), string_intern(self, publicId)))
795 
796 VOID_HANDLER(StartNamespaceDecl,
797                 (void *userData,
798                       const XML_Char *prefix,
799                       const XML_Char *uri),
800                 ("(NN)",
801                  string_intern(self, prefix), string_intern(self, uri)))
802 
803 VOID_HANDLER(EndNamespaceDecl,
804                 (void *userData,
805                     const XML_Char *prefix),
806                 ("(N)", string_intern(self, prefix)))
807 
808 VOID_HANDLER(Comment,
809                (void *userData, const XML_Char *data),
810                 ("(O&)", STRING_CONV_FUNC,data))
811 
812 VOID_HANDLER(StartCdataSection,
813                (void *userData),
814                 ("()"))
815 
816 VOID_HANDLER(EndCdataSection,
817                (void *userData),
818                 ("()"))
819 
820 #ifndef Py_USING_UNICODE
821 VOID_HANDLER(Default,
822               (void *userData, const XML_Char *s, int len),
823               ("(N)", conv_string_len_to_utf8(s,len)))
824 
825 VOID_HANDLER(DefaultHandlerExpand,
826               (void *userData, const XML_Char *s, int len),
827               ("(N)", conv_string_len_to_utf8(s,len)))
828 #else
829 VOID_HANDLER(Default,
830               (void *userData, const XML_Char *s, int len),
831               ("(N)", (self->returns_unicode
832                        ? conv_string_len_to_unicode(s,len)
833                        : conv_string_len_to_utf8(s,len))))
834 
835 VOID_HANDLER(DefaultHandlerExpand,
836               (void *userData, const XML_Char *s, int len),
837               ("(N)", (self->returns_unicode
838                        ? conv_string_len_to_unicode(s,len)
839                        : conv_string_len_to_utf8(s,len))))
840 #endif
841 
842 INT_HANDLER(NotStandalone,
843                 (void *userData),
844                 ("()"))
845 
846 RC_HANDLER(int, ExternalEntityRef,
847                 (XML_Parser parser,
848                     const XML_Char *context,
849                     const XML_Char *base,
850                     const XML_Char *systemId,
851                     const XML_Char *publicId),
852                 int rc=0;,
853                 ("(O&NNN)",
854                  STRING_CONV_FUNC,context, string_intern(self, base),
855                  string_intern(self, systemId), string_intern(self, publicId)),
856                 rc = PyInt_AsLong(rv);, rc,
857                 XML_GetUserData(parser))
858 
859 /* XXX UnknownEncodingHandler */
860 
861 VOID_HANDLER(StartDoctypeDecl,
862              (void *userData, const XML_Char *doctypeName,
863               const XML_Char *sysid, const XML_Char *pubid,
864               int has_internal_subset),
865              ("(NNNi)", string_intern(self, doctypeName),
866               string_intern(self, sysid), string_intern(self, pubid),
867               has_internal_subset))
868 
869 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
870 
871 /* ---------------------------------------------------------------- */
872 
873 static PyObject *
get_parse_result(xmlparseobject * self,int rv)874 get_parse_result(xmlparseobject *self, int rv)
875 {
876     if (PyErr_Occurred()) {
877         return NULL;
878     }
879     if (rv == 0) {
880         return set_error(self, XML_GetErrorCode(self->itself));
881     }
882     if (flush_character_buffer(self) < 0) {
883         return NULL;
884     }
885     return PyInt_FromLong(rv);
886 }
887 
888 PyDoc_STRVAR(xmlparse_Parse__doc__,
889 "Parse(data[, isfinal])\n\
890 Parse XML data.  `isfinal' should be true at end of input.");
891 
892 static PyObject *
xmlparse_Parse(xmlparseobject * self,PyObject * args)893 xmlparse_Parse(xmlparseobject *self, PyObject *args)
894 {
895     char *s;
896     int slen;
897     int isFinal = 0;
898 
899     if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
900         return NULL;
901 
902     return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
903 }
904 
905 /* File reading copied from cPickle */
906 
907 #define BUF_SIZE 2048
908 
909 static int
readinst(char * buf,int buf_size,PyObject * meth)910 readinst(char *buf, int buf_size, PyObject *meth)
911 {
912     PyObject *arg = NULL;
913     PyObject *bytes = NULL;
914     PyObject *str = NULL;
915     int len = -1;
916 
917     if ((bytes = PyInt_FromLong(buf_size)) == NULL)
918         goto finally;
919 
920     if ((arg = PyTuple_New(1)) == NULL) {
921         Py_DECREF(bytes);
922         goto finally;
923     }
924 
925     PyTuple_SET_ITEM(arg, 0, bytes);
926 
927 #if PY_VERSION_HEX < 0x02020000
928     str = PyObject_CallObject(meth, arg);
929 #else
930     str = PyObject_Call(meth, arg, NULL);
931 #endif
932     if (str == NULL)
933         goto finally;
934 
935     /* XXX what to do if it returns a Unicode string? */
936     if (!PyString_Check(str)) {
937         PyErr_Format(PyExc_TypeError,
938                      "read() did not return a string object (type=%.400s)",
939                      Py_TYPE(str)->tp_name);
940         goto finally;
941     }
942     len = PyString_GET_SIZE(str);
943     if (len > buf_size) {
944         PyErr_Format(PyExc_ValueError,
945                      "read() returned too much data: "
946                      "%i bytes requested, %i returned",
947                      buf_size, len);
948         goto finally;
949     }
950     memcpy(buf, PyString_AsString(str), len);
951 finally:
952     Py_XDECREF(arg);
953     Py_XDECREF(str);
954     return len;
955 }
956 
957 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
958 "ParseFile(file)\n\
959 Parse XML data from file-like object.");
960 
961 static PyObject *
xmlparse_ParseFile(xmlparseobject * self,PyObject * f)962 xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
963 {
964     int rv = 1;
965     PyObject *readmethod = NULL;
966 
967     readmethod = PyObject_GetAttrString(f, "read");
968     if (readmethod == NULL) {
969         PyErr_SetString(PyExc_TypeError,
970                         "argument must have 'read' attribute");
971         return NULL;
972 
973     }
974     for (;;) {
975         int bytes_read;
976         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
977         if (buf == NULL) {
978             Py_XDECREF(readmethod);
979             return PyErr_NoMemory();
980         }
981 
982         bytes_read = readinst(buf, BUF_SIZE, readmethod);
983         if (bytes_read < 0) {
984             Py_XDECREF(readmethod);
985             return NULL;
986         }
987 
988         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
989         if (PyErr_Occurred()) {
990             Py_XDECREF(readmethod);
991             return NULL;
992         }
993 
994         if (!rv || bytes_read == 0)
995             break;
996     }
997     Py_XDECREF(readmethod);
998     return get_parse_result(self, rv);
999 }
1000 
1001 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1002 "SetBase(base_url)\n\
1003 Set the base URL for the parser.");
1004 
1005 static PyObject *
xmlparse_SetBase(xmlparseobject * self,PyObject * args)1006 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1007 {
1008     char *base;
1009 
1010     if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1011         return NULL;
1012     if (!XML_SetBase(self->itself, base)) {
1013         return PyErr_NoMemory();
1014     }
1015     Py_INCREF(Py_None);
1016     return Py_None;
1017 }
1018 
1019 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1020 "GetBase() -> url\n\
1021 Return base URL string for the parser.");
1022 
1023 static PyObject *
xmlparse_GetBase(xmlparseobject * self,PyObject * unused)1024 xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
1025 {
1026     return Py_BuildValue("z", XML_GetBase(self->itself));
1027 }
1028 
1029 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1030 "GetInputContext() -> string\n\
1031 Return the untranslated text of the input that caused the current event.\n\
1032 If the event was generated by a large amount of text (such as a start tag\n\
1033 for an element with many attributes), not all of the text may be available.");
1034 
1035 static PyObject *
xmlparse_GetInputContext(xmlparseobject * self,PyObject * unused)1036 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
1037 {
1038     if (self->in_callback) {
1039         int offset, size;
1040         const char *buffer
1041             = XML_GetInputContext(self->itself, &offset, &size);
1042 
1043         if (buffer != NULL)
1044             return PyString_FromStringAndSize(buffer + offset,
1045                                               size - offset);
1046         else
1047             Py_RETURN_NONE;
1048     }
1049     else
1050         Py_RETURN_NONE;
1051 }
1052 
1053 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1054 "ExternalEntityParserCreate(context[, encoding])\n\
1055 Create a parser for parsing an external entity based on the\n\
1056 information passed to the ExternalEntityRefHandler.");
1057 
1058 static PyObject *
xmlparse_ExternalEntityParserCreate(xmlparseobject * self,PyObject * args)1059 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1060 {
1061     char *context;
1062     char *encoding = NULL;
1063     xmlparseobject *new_parser;
1064     int i;
1065 
1066     if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1067                           &context, &encoding)) {
1068         return NULL;
1069     }
1070 
1071 #ifndef Py_TPFLAGS_HAVE_GC
1072     /* Python versions 2.0 and 2.1 */
1073     new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1074 #else
1075     /* Python versions 2.2 and later */
1076     new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1077 #endif
1078 
1079     if (new_parser == NULL)
1080         return NULL;
1081     new_parser->buffer_size = self->buffer_size;
1082     new_parser->buffer_used = 0;
1083     if (self->buffer != NULL) {
1084         new_parser->buffer = malloc(new_parser->buffer_size);
1085         if (new_parser->buffer == NULL) {
1086 #ifndef Py_TPFLAGS_HAVE_GC
1087             /* Code for versions 2.0 and 2.1 */
1088             PyObject_Del(new_parser);
1089 #else
1090             /* Code for versions 2.2 and later. */
1091             PyObject_GC_Del(new_parser);
1092 #endif
1093             return PyErr_NoMemory();
1094         }
1095     }
1096     else
1097         new_parser->buffer = NULL;
1098     new_parser->returns_unicode = self->returns_unicode;
1099     new_parser->ordered_attributes = self->ordered_attributes;
1100     new_parser->specified_attributes = self->specified_attributes;
1101     new_parser->in_callback = 0;
1102     new_parser->ns_prefixes = self->ns_prefixes;
1103     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1104                                                         encoding);
1105     new_parser->handlers = 0;
1106     new_parser->intern = self->intern;
1107     Py_XINCREF(new_parser->intern);
1108 #ifdef Py_TPFLAGS_HAVE_GC
1109     PyObject_GC_Track(new_parser);
1110 #else
1111     PyObject_GC_Init(new_parser);
1112 #endif
1113 
1114     if (!new_parser->itself) {
1115         Py_DECREF(new_parser);
1116         return PyErr_NoMemory();
1117     }
1118 
1119     XML_SetUserData(new_parser->itself, (void *)new_parser);
1120 
1121     /* allocate and clear handlers first */
1122     for (i = 0; handler_info[i].name != NULL; i++)
1123         /* do nothing */;
1124 
1125     new_parser->handlers = malloc(sizeof(PyObject *) * i);
1126     if (!new_parser->handlers) {
1127         Py_DECREF(new_parser);
1128         return PyErr_NoMemory();
1129     }
1130     clear_handlers(new_parser, 1);
1131 
1132     /* then copy handlers from self */
1133     for (i = 0; handler_info[i].name != NULL; i++) {
1134         PyObject *handler = self->handlers[i];
1135         if (handler != NULL) {
1136             Py_INCREF(handler);
1137             new_parser->handlers[i] = handler;
1138             handler_info[i].setter(new_parser->itself,
1139                                    handler_info[i].handler);
1140         }
1141     }
1142     return (PyObject *)new_parser;
1143 }
1144 
1145 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1146 "SetParamEntityParsing(flag) -> success\n\
1147 Controls parsing of parameter entities (including the external DTD\n\
1148 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1149 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1150 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1151 was successful.");
1152 
1153 static PyObject*
xmlparse_SetParamEntityParsing(xmlparseobject * p,PyObject * args)1154 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1155 {
1156     int flag;
1157     if (!PyArg_ParseTuple(args, "i", &flag))
1158         return NULL;
1159     flag = XML_SetParamEntityParsing(p->itself, flag);
1160     return PyInt_FromLong(flag);
1161 }
1162 
1163 
1164 #if XML_COMBINED_VERSION >= 19505
1165 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1166 "UseForeignDTD([flag])\n\
1167 Allows the application to provide an artificial external subset if one is\n\
1168 not specified as part of the document instance.  This readily allows the\n\
1169 use of a 'default' document type controlled by the application, while still\n\
1170 getting the advantage of providing document type information to the parser.\n\
1171 'flag' defaults to True if not provided.");
1172 
1173 static PyObject *
xmlparse_UseForeignDTD(xmlparseobject * self,PyObject * args)1174 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1175 {
1176     PyObject *flagobj = NULL;
1177     XML_Bool flag = XML_TRUE;
1178     enum XML_Error rc;
1179     if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
1180         return NULL;
1181     if (flagobj != NULL)
1182         flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1183     rc = XML_UseForeignDTD(self->itself, flag);
1184     if (rc != XML_ERROR_NONE) {
1185         return set_error(self, rc);
1186     }
1187     Py_INCREF(Py_None);
1188     return Py_None;
1189 }
1190 #endif
1191 
1192 static struct PyMethodDef xmlparse_methods[] = {
1193     {"Parse",     (PyCFunction)xmlparse_Parse,
1194                   METH_VARARGS, xmlparse_Parse__doc__},
1195     {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1196                   METH_O,       xmlparse_ParseFile__doc__},
1197     {"SetBase",   (PyCFunction)xmlparse_SetBase,
1198                   METH_VARARGS, xmlparse_SetBase__doc__},
1199     {"GetBase",   (PyCFunction)xmlparse_GetBase,
1200                   METH_NOARGS, xmlparse_GetBase__doc__},
1201     {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1202                   METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1203     {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1204                   METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1205     {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1206                   METH_NOARGS, xmlparse_GetInputContext__doc__},
1207 #if XML_COMBINED_VERSION >= 19505
1208     {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1209                   METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1210 #endif
1211     {NULL,        NULL}         /* sentinel */
1212 };
1213 
1214 /* ---------- */
1215 
1216 
1217 #ifdef Py_USING_UNICODE
1218 
1219 /* pyexpat international encoding support.
1220    Make it as simple as possible.
1221 */
1222 
1223 static char template_buffer[257];
1224 PyObject *template_string = NULL;
1225 
1226 static void
init_template_buffer(void)1227 init_template_buffer(void)
1228 {
1229     int i;
1230     for (i = 0; i < 256; i++) {
1231         template_buffer[i] = i;
1232     }
1233     template_buffer[256] = 0;
1234 }
1235 
1236 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1237 PyUnknownEncodingHandler(void *encodingHandlerData,
1238                          const XML_Char *name,
1239                          XML_Encoding *info)
1240 {
1241     PyUnicodeObject *_u_string = NULL;
1242     int result = 0;
1243     int i;
1244 
1245     /* Yes, supports only 8bit encodings */
1246     _u_string = (PyUnicodeObject *)
1247         PyUnicode_Decode(template_buffer, 256, name, "replace");
1248 
1249     if (_u_string == NULL)
1250         return result;
1251 
1252     for (i = 0; i < 256; i++) {
1253         /* Stupid to access directly, but fast */
1254         Py_UNICODE c = _u_string->str[i];
1255         if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1256             info->map[i] = -1;
1257         else
1258             info->map[i] = c;
1259     }
1260     info->data = NULL;
1261     info->convert = NULL;
1262     info->release = NULL;
1263     result = 1;
1264     Py_DECREF(_u_string);
1265     return result;
1266 }
1267 
1268 #endif
1269 
1270 static PyObject *
newxmlparseobject(char * encoding,char * namespace_separator,PyObject * intern)1271 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1272 {
1273     int i;
1274     xmlparseobject *self;
1275 
1276 #ifdef Py_TPFLAGS_HAVE_GC
1277     /* Code for versions 2.2 and later */
1278     self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1279 #else
1280     self = PyObject_New(xmlparseobject, &Xmlparsetype);
1281 #endif
1282     if (self == NULL)
1283         return NULL;
1284 
1285 #ifdef Py_USING_UNICODE
1286     self->returns_unicode = 1;
1287 #else
1288     self->returns_unicode = 0;
1289 #endif
1290 
1291     self->buffer = NULL;
1292     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1293     self->buffer_used = 0;
1294     self->ordered_attributes = 0;
1295     self->specified_attributes = 0;
1296     self->in_callback = 0;
1297     self->ns_prefixes = 0;
1298     self->handlers = NULL;
1299     if (namespace_separator != NULL) {
1300         self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1301     }
1302     else {
1303         self->itself = XML_ParserCreate(encoding);
1304     }
1305     self->intern = intern;
1306     Py_XINCREF(self->intern);
1307 #ifdef Py_TPFLAGS_HAVE_GC
1308     PyObject_GC_Track(self);
1309 #else
1310     PyObject_GC_Init(self);
1311 #endif
1312     if (self->itself == NULL) {
1313         PyErr_SetString(PyExc_RuntimeError,
1314                         "XML_ParserCreate failed");
1315         Py_DECREF(self);
1316         return NULL;
1317     }
1318     XML_SetUserData(self->itself, (void *)self);
1319 #ifdef Py_USING_UNICODE
1320     XML_SetUnknownEncodingHandler(self->itself,
1321                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1322 #endif
1323 
1324     for (i = 0; handler_info[i].name != NULL; i++)
1325         /* do nothing */;
1326 
1327     self->handlers = malloc(sizeof(PyObject *) * i);
1328     if (!self->handlers) {
1329         Py_DECREF(self);
1330         return PyErr_NoMemory();
1331     }
1332     clear_handlers(self, 1);
1333 
1334     return (PyObject*)self;
1335 }
1336 
1337 
1338 static void
xmlparse_dealloc(xmlparseobject * self)1339 xmlparse_dealloc(xmlparseobject *self)
1340 {
1341     int i;
1342 #ifdef Py_TPFLAGS_HAVE_GC
1343     PyObject_GC_UnTrack(self);
1344 #else
1345     PyObject_GC_Fini(self);
1346 #endif
1347     if (self->itself != NULL)
1348         XML_ParserFree(self->itself);
1349     self->itself = NULL;
1350 
1351     if (self->handlers != NULL) {
1352         PyObject *temp;
1353         for (i = 0; handler_info[i].name != NULL; i++) {
1354             temp = self->handlers[i];
1355             self->handlers[i] = NULL;
1356             Py_XDECREF(temp);
1357         }
1358         free(self->handlers);
1359         self->handlers = NULL;
1360     }
1361     if (self->buffer != NULL) {
1362         free(self->buffer);
1363         self->buffer = NULL;
1364     }
1365     Py_XDECREF(self->intern);
1366 #ifndef Py_TPFLAGS_HAVE_GC
1367     /* Code for versions 2.0 and 2.1 */
1368     PyObject_Del(self);
1369 #else
1370     /* Code for versions 2.2 and later. */
1371     PyObject_GC_Del(self);
1372 #endif
1373 }
1374 
1375 static int
handlername2int(const char * name)1376 handlername2int(const char *name)
1377 {
1378     int i;
1379     for (i = 0; handler_info[i].name != NULL; i++) {
1380         if (strcmp(name, handler_info[i].name) == 0) {
1381             return i;
1382         }
1383     }
1384     return -1;
1385 }
1386 
1387 static PyObject *
get_pybool(int istrue)1388 get_pybool(int istrue)
1389 {
1390     PyObject *result = istrue ? Py_True : Py_False;
1391     Py_INCREF(result);
1392     return result;
1393 }
1394 
1395 static PyObject *
xmlparse_getattr(xmlparseobject * self,char * name)1396 xmlparse_getattr(xmlparseobject *self, char *name)
1397 {
1398     int handlernum = handlername2int(name);
1399 
1400     if (handlernum != -1) {
1401         PyObject *result = self->handlers[handlernum];
1402         if (result == NULL)
1403             result = Py_None;
1404         Py_INCREF(result);
1405         return result;
1406     }
1407     if (name[0] == 'E') {
1408         if (strcmp(name, "ErrorCode") == 0)
1409             return PyInt_FromLong((long)
1410                                   XML_GetErrorCode(self->itself));
1411         if (strcmp(name, "ErrorLineNumber") == 0)
1412             return PyInt_FromLong((long)
1413                                   XML_GetErrorLineNumber(self->itself));
1414         if (strcmp(name, "ErrorColumnNumber") == 0)
1415             return PyInt_FromLong((long)
1416                                   XML_GetErrorColumnNumber(self->itself));
1417         if (strcmp(name, "ErrorByteIndex") == 0)
1418             return PyInt_FromLong((long)
1419                                   XML_GetErrorByteIndex(self->itself));
1420     }
1421     if (name[0] == 'C') {
1422         if (strcmp(name, "CurrentLineNumber") == 0)
1423             return PyInt_FromLong((long)
1424                                   XML_GetCurrentLineNumber(self->itself));
1425         if (strcmp(name, "CurrentColumnNumber") == 0)
1426             return PyInt_FromLong((long)
1427                                   XML_GetCurrentColumnNumber(self->itself));
1428         if (strcmp(name, "CurrentByteIndex") == 0)
1429             return PyInt_FromLong((long)
1430                                   XML_GetCurrentByteIndex(self->itself));
1431     }
1432     if (name[0] == 'b') {
1433         if (strcmp(name, "buffer_size") == 0)
1434             return PyInt_FromLong((long) self->buffer_size);
1435         if (strcmp(name, "buffer_text") == 0)
1436             return get_pybool(self->buffer != NULL);
1437         if (strcmp(name, "buffer_used") == 0)
1438             return PyInt_FromLong((long) self->buffer_used);
1439     }
1440     if (strcmp(name, "namespace_prefixes") == 0)
1441         return get_pybool(self->ns_prefixes);
1442     if (strcmp(name, "ordered_attributes") == 0)
1443         return get_pybool(self->ordered_attributes);
1444     if (strcmp(name, "returns_unicode") == 0)
1445         return get_pybool((long) self->returns_unicode);
1446     if (strcmp(name, "specified_attributes") == 0)
1447         return get_pybool((long) self->specified_attributes);
1448     if (strcmp(name, "intern") == 0) {
1449         if (self->intern == NULL) {
1450             Py_INCREF(Py_None);
1451             return Py_None;
1452         }
1453         else {
1454             Py_INCREF(self->intern);
1455             return self->intern;
1456         }
1457     }
1458 
1459 #define APPEND(list, str)                               \
1460         do {                                            \
1461                 PyObject *o = PyString_FromString(str); \
1462                 if (o != NULL)                          \
1463                         PyList_Append(list, o);         \
1464                 Py_XDECREF(o);                          \
1465         } while (0)
1466 
1467     if (strcmp(name, "__members__") == 0) {
1468         int i;
1469         PyObject *rc = PyList_New(0);
1470         if (!rc)
1471                 return NULL;
1472         for (i = 0; handler_info[i].name != NULL; i++) {
1473             PyObject *o = get_handler_name(&handler_info[i]);
1474             if (o != NULL)
1475                 PyList_Append(rc, o);
1476             Py_XDECREF(o);
1477         }
1478         APPEND(rc, "ErrorCode");
1479         APPEND(rc, "ErrorLineNumber");
1480         APPEND(rc, "ErrorColumnNumber");
1481         APPEND(rc, "ErrorByteIndex");
1482         APPEND(rc, "CurrentLineNumber");
1483         APPEND(rc, "CurrentColumnNumber");
1484         APPEND(rc, "CurrentByteIndex");
1485         APPEND(rc, "buffer_size");
1486         APPEND(rc, "buffer_text");
1487         APPEND(rc, "buffer_used");
1488         APPEND(rc, "namespace_prefixes");
1489         APPEND(rc, "ordered_attributes");
1490         APPEND(rc, "returns_unicode");
1491         APPEND(rc, "specified_attributes");
1492         APPEND(rc, "intern");
1493 
1494 #undef APPEND
1495         return rc;
1496     }
1497     return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1498 }
1499 
1500 static int
sethandler(xmlparseobject * self,const char * name,PyObject * v)1501 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1502 {
1503     int handlernum = handlername2int(name);
1504     if (handlernum >= 0) {
1505         xmlhandler c_handler = NULL;
1506         PyObject *temp = self->handlers[handlernum];
1507 
1508         if (v == Py_None) {
1509             /* If this is the character data handler, and a character
1510                data handler is already active, we need to be more
1511                careful.  What we can safely do is replace the existing
1512                character data handler callback function with a no-op
1513                function that will refuse to call Python.  The downside
1514                is that this doesn't completely remove the character
1515                data handler from the C layer if there's any callback
1516                active, so Expat does a little more work than it
1517                otherwise would, but that's really an odd case.  A more
1518                elaborate system of handlers and state could remove the
1519                C handler more effectively. */
1520             if (handlernum == CharacterData && self->in_callback)
1521                 c_handler = noop_character_data_handler;
1522             v = NULL;
1523         }
1524         else if (v != NULL) {
1525             Py_INCREF(v);
1526             c_handler = handler_info[handlernum].handler;
1527         }
1528         self->handlers[handlernum] = v;
1529         Py_XDECREF(temp);
1530         handler_info[handlernum].setter(self->itself, c_handler);
1531         return 1;
1532     }
1533     return 0;
1534 }
1535 
1536 static int
xmlparse_setattr(xmlparseobject * self,char * name,PyObject * v)1537 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1538 {
1539     /* Set attribute 'name' to value 'v'. v==NULL means delete */
1540     if (v == NULL) {
1541         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1542         return -1;
1543     }
1544     if (strcmp(name, "buffer_text") == 0) {
1545         if (PyObject_IsTrue(v)) {
1546             if (self->buffer == NULL) {
1547                 self->buffer = malloc(self->buffer_size);
1548                 if (self->buffer == NULL) {
1549                     PyErr_NoMemory();
1550                     return -1;
1551                 }
1552                 self->buffer_used = 0;
1553             }
1554         }
1555         else if (self->buffer != NULL) {
1556             if (flush_character_buffer(self) < 0)
1557                 return -1;
1558             free(self->buffer);
1559             self->buffer = NULL;
1560         }
1561         return 0;
1562     }
1563     if (strcmp(name, "namespace_prefixes") == 0) {
1564         if (PyObject_IsTrue(v))
1565             self->ns_prefixes = 1;
1566         else
1567             self->ns_prefixes = 0;
1568         XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1569         return 0;
1570     }
1571     if (strcmp(name, "ordered_attributes") == 0) {
1572         if (PyObject_IsTrue(v))
1573             self->ordered_attributes = 1;
1574         else
1575             self->ordered_attributes = 0;
1576         return 0;
1577     }
1578     if (strcmp(name, "returns_unicode") == 0) {
1579         if (PyObject_IsTrue(v)) {
1580 #ifndef Py_USING_UNICODE
1581             PyErr_SetString(PyExc_ValueError,
1582                             "Unicode support not available");
1583             return -1;
1584 #else
1585             self->returns_unicode = 1;
1586 #endif
1587         }
1588         else
1589             self->returns_unicode = 0;
1590         return 0;
1591     }
1592     if (strcmp(name, "specified_attributes") == 0) {
1593         if (PyObject_IsTrue(v))
1594             self->specified_attributes = 1;
1595         else
1596             self->specified_attributes = 0;
1597         return 0;
1598     }
1599 
1600     if (strcmp(name, "buffer_size") == 0) {
1601       long new_buffer_size;
1602       if (!PyInt_Check(v)) {
1603         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1604         return -1;
1605       }
1606 
1607       new_buffer_size=PyInt_AS_LONG(v);
1608       /* trivial case -- no change */
1609       if (new_buffer_size == self->buffer_size) {
1610         return 0;
1611       }
1612 
1613       if (new_buffer_size <= 0) {
1614         PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1615         return -1;
1616       }
1617 
1618       /* check maximum */
1619       if (new_buffer_size > INT_MAX) {
1620         char errmsg[100];
1621         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1622         PyErr_SetString(PyExc_ValueError, errmsg);
1623         return -1;
1624       }
1625 
1626       if (self->buffer != NULL) {
1627         /* there is already a buffer */
1628         if (self->buffer_used != 0) {
1629           flush_character_buffer(self);
1630         }
1631         /* free existing buffer */
1632         free(self->buffer);
1633       }
1634       self->buffer = malloc(new_buffer_size);
1635       if (self->buffer == NULL) {
1636         PyErr_NoMemory();
1637         return -1;
1638       }
1639       self->buffer_size = new_buffer_size;
1640       return 0;
1641     }
1642 
1643     if (strcmp(name, "CharacterDataHandler") == 0) {
1644         /* If we're changing the character data handler, flush all
1645          * cached data with the old handler.  Not sure there's a
1646          * "right" thing to do, though, but this probably won't
1647          * happen.
1648          */
1649         if (flush_character_buffer(self) < 0)
1650             return -1;
1651     }
1652     if (sethandler(self, name, v)) {
1653         return 0;
1654     }
1655     PyErr_SetString(PyExc_AttributeError, name);
1656     return -1;
1657 }
1658 
1659 #ifdef WITH_CYCLE_GC
1660 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1661 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1662 {
1663     int i;
1664     for (i = 0; handler_info[i].name != NULL; i++)
1665         Py_VISIT(op->handlers[i]);
1666     return 0;
1667 }
1668 
1669 static int
xmlparse_clear(xmlparseobject * op)1670 xmlparse_clear(xmlparseobject *op)
1671 {
1672     clear_handlers(op, 0);
1673     Py_CLEAR(op->intern);
1674     return 0;
1675 }
1676 #endif
1677 
1678 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1679 
1680 static PyTypeObject Xmlparsetype = {
1681         PyVarObject_HEAD_INIT(NULL, 0)
1682         "pyexpat.xmlparser",            /*tp_name*/
1683         sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1684         0,                              /*tp_itemsize*/
1685         /* methods */
1686         (destructor)xmlparse_dealloc,   /*tp_dealloc*/
1687         (printfunc)0,           /*tp_print*/
1688         (getattrfunc)xmlparse_getattr,  /*tp_getattr*/
1689         (setattrfunc)xmlparse_setattr,  /*tp_setattr*/
1690         (cmpfunc)0,             /*tp_compare*/
1691         (reprfunc)0,            /*tp_repr*/
1692         0,                      /*tp_as_number*/
1693         0,              /*tp_as_sequence*/
1694         0,              /*tp_as_mapping*/
1695         (hashfunc)0,            /*tp_hash*/
1696         (ternaryfunc)0,         /*tp_call*/
1697         (reprfunc)0,            /*tp_str*/
1698         0,              /* tp_getattro */
1699         0,              /* tp_setattro */
1700         0,              /* tp_as_buffer */
1701 #ifdef Py_TPFLAGS_HAVE_GC
1702         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1703 #else
1704         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1705 #endif
1706         Xmlparsetype__doc__, /* tp_doc - Documentation string */
1707 #ifdef WITH_CYCLE_GC
1708         (traverseproc)xmlparse_traverse,        /* tp_traverse */
1709         (inquiry)xmlparse_clear         /* tp_clear */
1710 #else
1711         0, 0
1712 #endif
1713 };
1714 
1715 /* End of code for xmlparser objects */
1716 /* -------------------------------------------------------- */
1717 
1718 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1719 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1720 Return a new XML parser object.");
1721 
1722 static PyObject *
pyexpat_ParserCreate(PyObject * notused,PyObject * args,PyObject * kw)1723 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1724 {
1725     char *encoding = NULL;
1726     char *namespace_separator = NULL;
1727     PyObject *intern = NULL;
1728     PyObject *result;
1729     int intern_decref = 0;
1730     static char *kwlist[] = {"encoding", "namespace_separator",
1731                                    "intern", NULL};
1732 
1733     if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1734                                      &encoding, &namespace_separator, &intern))
1735         return NULL;
1736     if (namespace_separator != NULL
1737         && strlen(namespace_separator) > 1) {
1738         PyErr_SetString(PyExc_ValueError,
1739                         "namespace_separator must be at most one"
1740                         " character, omitted, or None");
1741         return NULL;
1742     }
1743     /* Explicitly passing None means no interning is desired.
1744        Not passing anything means that a new dictionary is used. */
1745     if (intern == Py_None)
1746         intern = NULL;
1747     else if (intern == NULL) {
1748         intern = PyDict_New();
1749         if (!intern)
1750             return NULL;
1751         intern_decref = 1;
1752     }
1753     else if (!PyDict_Check(intern)) {
1754         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1755         return NULL;
1756     }
1757 
1758     result = newxmlparseobject(encoding, namespace_separator, intern);
1759     if (intern_decref) {
1760         Py_DECREF(intern);
1761     }
1762     return result;
1763 }
1764 
1765 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1766 "ErrorString(errno) -> string\n\
1767 Returns string error for given number.");
1768 
1769 static PyObject *
pyexpat_ErrorString(PyObject * self,PyObject * args)1770 pyexpat_ErrorString(PyObject *self, PyObject *args)
1771 {
1772     long code = 0;
1773 
1774     if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1775         return NULL;
1776     return Py_BuildValue("z", XML_ErrorString((int)code));
1777 }
1778 
1779 /* List of methods defined in the module */
1780 
1781 static struct PyMethodDef pyexpat_methods[] = {
1782     {"ParserCreate",    (PyCFunction)pyexpat_ParserCreate,
1783      METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1784     {"ErrorString",     (PyCFunction)pyexpat_ErrorString,
1785      METH_VARARGS,      pyexpat_ErrorString__doc__},
1786 
1787     {NULL,       (PyCFunction)NULL, 0, NULL}            /* sentinel */
1788 };
1789 
1790 /* Module docstring */
1791 
1792 PyDoc_STRVAR(pyexpat_module_documentation,
1793 "Python wrapper for Expat parser.");
1794 
1795 /* Initialization function for the module */
1796 
1797 #ifndef MODULE_NAME
1798 #define MODULE_NAME "pyexpat"
1799 #endif
1800 
1801 #ifndef MODULE_INITFUNC
1802 #define MODULE_INITFUNC initpyexpat
1803 #endif
1804 
1805 #ifndef PyMODINIT_FUNC
1806 #   ifdef MS_WINDOWS
1807 #       define PyMODINIT_FUNC __declspec(dllexport) void
1808 #   else
1809 #       define PyMODINIT_FUNC void
1810 #   endif
1811 #endif
1812 
1813 PyMODINIT_FUNC MODULE_INITFUNC(void);  /* avoid compiler warnings */
1814 
1815 PyMODINIT_FUNC
MODULE_INITFUNC(void)1816 MODULE_INITFUNC(void)
1817 {
1818     PyObject *m, *d;
1819     PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1820     PyObject *errors_module;
1821     PyObject *modelmod_name;
1822     PyObject *model_module;
1823     PyObject *sys_modules;
1824     PyObject *version;
1825     static struct PyExpat_CAPI capi;
1826     PyObject* capi_object;
1827 
1828     if (errmod_name == NULL)
1829         return;
1830     modelmod_name = PyString_FromString(MODULE_NAME ".model");
1831     if (modelmod_name == NULL)
1832         return;
1833 
1834     Py_TYPE(&Xmlparsetype) = &PyType_Type;
1835 
1836     /* Create the module and add the functions */
1837     m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1838                        pyexpat_module_documentation);
1839     if (m == NULL)
1840         return;
1841 
1842     /* Add some symbolic constants to the module */
1843     if (ErrorObject == NULL) {
1844         ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1845                                          NULL, NULL);
1846         if (ErrorObject == NULL)
1847             return;
1848     }
1849     Py_INCREF(ErrorObject);
1850     PyModule_AddObject(m, "error", ErrorObject);
1851     Py_INCREF(ErrorObject);
1852     PyModule_AddObject(m, "ExpatError", ErrorObject);
1853     Py_INCREF(&Xmlparsetype);
1854     PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1855 
1856     version = PyString_FromString(PY_VERSION);
1857     if (!version)
1858         return;
1859     PyModule_AddObject(m, "__version__", version);
1860     PyModule_AddStringConstant(m, "EXPAT_VERSION",
1861                                (char *) XML_ExpatVersion());
1862     {
1863         XML_Expat_Version info = XML_ExpatVersionInfo();
1864         PyModule_AddObject(m, "version_info",
1865                            Py_BuildValue("(iii)", info.major,
1866                                          info.minor, info.micro));
1867     }
1868 #ifdef Py_USING_UNICODE
1869     init_template_buffer();
1870 #endif
1871     /* XXX When Expat supports some way of figuring out how it was
1872        compiled, this should check and set native_encoding
1873        appropriately.
1874     */
1875     PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1876 
1877     sys_modules = PySys_GetObject("modules");
1878     d = PyModule_GetDict(m);
1879     errors_module = PyDict_GetItem(d, errmod_name);
1880     if (errors_module == NULL) {
1881         errors_module = PyModule_New(MODULE_NAME ".errors");
1882         if (errors_module != NULL) {
1883             PyDict_SetItem(sys_modules, errmod_name, errors_module);
1884             /* gives away the reference to errors_module */
1885             PyModule_AddObject(m, "errors", errors_module);
1886         }
1887     }
1888     Py_DECREF(errmod_name);
1889     model_module = PyDict_GetItem(d, modelmod_name);
1890     if (model_module == NULL) {
1891         model_module = PyModule_New(MODULE_NAME ".model");
1892         if (model_module != NULL) {
1893             PyDict_SetItem(sys_modules, modelmod_name, model_module);
1894             /* gives away the reference to model_module */
1895             PyModule_AddObject(m, "model", model_module);
1896         }
1897     }
1898     Py_DECREF(modelmod_name);
1899     if (errors_module == NULL || model_module == NULL)
1900         /* Don't core dump later! */
1901         return;
1902 
1903 #if XML_COMBINED_VERSION > 19505
1904     {
1905         const XML_Feature *features = XML_GetFeatureList();
1906         PyObject *list = PyList_New(0);
1907         if (list == NULL)
1908             /* just ignore it */
1909             PyErr_Clear();
1910         else {
1911             int i = 0;
1912             for (; features[i].feature != XML_FEATURE_END; ++i) {
1913                 int ok;
1914                 PyObject *item = Py_BuildValue("si", features[i].name,
1915                                                features[i].value);
1916                 if (item == NULL) {
1917                     Py_DECREF(list);
1918                     list = NULL;
1919                     break;
1920                 }
1921                 ok = PyList_Append(list, item);
1922                 Py_DECREF(item);
1923                 if (ok < 0) {
1924                     PyErr_Clear();
1925                     break;
1926                 }
1927             }
1928             if (list != NULL)
1929                 PyModule_AddObject(m, "features", list);
1930         }
1931     }
1932 #endif
1933 
1934 #define MYCONST(name) \
1935     PyModule_AddStringConstant(errors_module, #name, \
1936                                (char*)XML_ErrorString(name))
1937 
1938     MYCONST(XML_ERROR_NO_MEMORY);
1939     MYCONST(XML_ERROR_SYNTAX);
1940     MYCONST(XML_ERROR_NO_ELEMENTS);
1941     MYCONST(XML_ERROR_INVALID_TOKEN);
1942     MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1943     MYCONST(XML_ERROR_PARTIAL_CHAR);
1944     MYCONST(XML_ERROR_TAG_MISMATCH);
1945     MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1946     MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1947     MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1948     MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1949     MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1950     MYCONST(XML_ERROR_ASYNC_ENTITY);
1951     MYCONST(XML_ERROR_BAD_CHAR_REF);
1952     MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1953     MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1954     MYCONST(XML_ERROR_MISPLACED_XML_PI);
1955     MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1956     MYCONST(XML_ERROR_INCORRECT_ENCODING);
1957     MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1958     MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1959     MYCONST(XML_ERROR_NOT_STANDALONE);
1960     MYCONST(XML_ERROR_UNEXPECTED_STATE);
1961     MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1962     MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1963     MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1964     /* Added in Expat 1.95.7. */
1965     MYCONST(XML_ERROR_UNBOUND_PREFIX);
1966     /* Added in Expat 1.95.8. */
1967     MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1968     MYCONST(XML_ERROR_INCOMPLETE_PE);
1969     MYCONST(XML_ERROR_XML_DECL);
1970     MYCONST(XML_ERROR_TEXT_DECL);
1971     MYCONST(XML_ERROR_PUBLICID);
1972     MYCONST(XML_ERROR_SUSPENDED);
1973     MYCONST(XML_ERROR_NOT_SUSPENDED);
1974     MYCONST(XML_ERROR_ABORTED);
1975     MYCONST(XML_ERROR_FINISHED);
1976     MYCONST(XML_ERROR_SUSPEND_PE);
1977 
1978     PyModule_AddStringConstant(errors_module, "__doc__",
1979                                "Constants used to describe error conditions.");
1980 
1981 #undef MYCONST
1982 
1983 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1984     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1985     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1986     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1987 #undef MYCONST
1988 
1989 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1990     PyModule_AddStringConstant(model_module, "__doc__",
1991                      "Constants used to interpret content model information.");
1992 
1993     MYCONST(XML_CTYPE_EMPTY);
1994     MYCONST(XML_CTYPE_ANY);
1995     MYCONST(XML_CTYPE_MIXED);
1996     MYCONST(XML_CTYPE_NAME);
1997     MYCONST(XML_CTYPE_CHOICE);
1998     MYCONST(XML_CTYPE_SEQ);
1999 
2000     MYCONST(XML_CQUANT_NONE);
2001     MYCONST(XML_CQUANT_OPT);
2002     MYCONST(XML_CQUANT_REP);
2003     MYCONST(XML_CQUANT_PLUS);
2004 #undef MYCONST
2005 
2006     /* initialize pyexpat dispatch table */
2007     capi.size = sizeof(capi);
2008     capi.magic = PyExpat_CAPI_MAGIC;
2009     capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2010     capi.MINOR_VERSION = XML_MINOR_VERSION;
2011     capi.MICRO_VERSION = XML_MICRO_VERSION;
2012     capi.ErrorString = XML_ErrorString;
2013     capi.GetErrorCode = XML_GetErrorCode;
2014     capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2015     capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2016     capi.Parse = XML_Parse;
2017     capi.ParserCreate_MM = XML_ParserCreate_MM;
2018     capi.ParserFree = XML_ParserFree;
2019     capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2020     capi.SetCommentHandler = XML_SetCommentHandler;
2021     capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2022     capi.SetElementHandler = XML_SetElementHandler;
2023     capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2024     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2025     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2026     capi.SetUserData = XML_SetUserData;
2027 
2028     /* export using capsule */
2029     capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2030     if (capi_object)
2031         PyModule_AddObject(m, "expat_CAPI", capi_object);
2032 }
2033 
2034 static void
clear_handlers(xmlparseobject * self,int initial)2035 clear_handlers(xmlparseobject *self, int initial)
2036 {
2037     int i = 0;
2038     PyObject *temp;
2039 
2040     for (; handler_info[i].name != NULL; i++) {
2041         if (initial)
2042             self->handlers[i] = NULL;
2043         else {
2044             temp = self->handlers[i];
2045             self->handlers[i] = NULL;
2046             Py_XDECREF(temp);
2047             handler_info[i].setter(self->itself, NULL);
2048         }
2049     }
2050 }
2051 
2052 static struct HandlerInfo handler_info[] = {
2053     {"StartElementHandler",
2054      (xmlhandlersetter)XML_SetStartElementHandler,
2055      (xmlhandler)my_StartElementHandler},
2056     {"EndElementHandler",
2057      (xmlhandlersetter)XML_SetEndElementHandler,
2058      (xmlhandler)my_EndElementHandler},
2059     {"ProcessingInstructionHandler",
2060      (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2061      (xmlhandler)my_ProcessingInstructionHandler},
2062     {"CharacterDataHandler",
2063      (xmlhandlersetter)XML_SetCharacterDataHandler,
2064      (xmlhandler)my_CharacterDataHandler},
2065     {"UnparsedEntityDeclHandler",
2066      (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2067      (xmlhandler)my_UnparsedEntityDeclHandler},
2068     {"NotationDeclHandler",
2069      (xmlhandlersetter)XML_SetNotationDeclHandler,
2070      (xmlhandler)my_NotationDeclHandler},
2071     {"StartNamespaceDeclHandler",
2072      (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2073      (xmlhandler)my_StartNamespaceDeclHandler},
2074     {"EndNamespaceDeclHandler",
2075      (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2076      (xmlhandler)my_EndNamespaceDeclHandler},
2077     {"CommentHandler",
2078      (xmlhandlersetter)XML_SetCommentHandler,
2079      (xmlhandler)my_CommentHandler},
2080     {"StartCdataSectionHandler",
2081      (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2082      (xmlhandler)my_StartCdataSectionHandler},
2083     {"EndCdataSectionHandler",
2084      (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2085      (xmlhandler)my_EndCdataSectionHandler},
2086     {"DefaultHandler",
2087      (xmlhandlersetter)XML_SetDefaultHandler,
2088      (xmlhandler)my_DefaultHandler},
2089     {"DefaultHandlerExpand",
2090      (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2091      (xmlhandler)my_DefaultHandlerExpandHandler},
2092     {"NotStandaloneHandler",
2093      (xmlhandlersetter)XML_SetNotStandaloneHandler,
2094      (xmlhandler)my_NotStandaloneHandler},
2095     {"ExternalEntityRefHandler",
2096      (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2097      (xmlhandler)my_ExternalEntityRefHandler},
2098     {"StartDoctypeDeclHandler",
2099      (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2100      (xmlhandler)my_StartDoctypeDeclHandler},
2101     {"EndDoctypeDeclHandler",
2102      (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2103      (xmlhandler)my_EndDoctypeDeclHandler},
2104     {"EntityDeclHandler",
2105      (xmlhandlersetter)XML_SetEntityDeclHandler,
2106      (xmlhandler)my_EntityDeclHandler},
2107     {"XmlDeclHandler",
2108      (xmlhandlersetter)XML_SetXmlDeclHandler,
2109      (xmlhandler)my_XmlDeclHandler},
2110     {"ElementDeclHandler",
2111      (xmlhandlersetter)XML_SetElementDeclHandler,
2112      (xmlhandler)my_ElementDeclHandler},
2113     {"AttlistDeclHandler",
2114      (xmlhandlersetter)XML_SetAttlistDeclHandler,
2115      (xmlhandler)my_AttlistDeclHandler},
2116 #if XML_COMBINED_VERSION >= 19504
2117     {"SkippedEntityHandler",
2118      (xmlhandlersetter)XML_SetSkippedEntityHandler,
2119      (xmlhandler)my_SkippedEntityHandler},
2120 #endif
2121 
2122     {NULL, NULL, NULL} /* sentinel */
2123 };
2124