1 #include "Python.h"
2 #include <ctype.h>
3
4 #include "frameobject.h"
5 #include "expat.h"
6
7 #include "pyexpat.h"
8
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
11 #ifndef PyDoc_STRVAR
12
13 /*
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
19 */
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
24
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
31
32 enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
47 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
50 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55 SkippedEntity,
56 #endif
57 _DummyDecl
58 };
59
60 static PyObject *ErrorObject;
61
62 /* ----------------------------------------------------- */
63
64 /* Declarations for objects of type xmlparser */
65
66 typedef struct {
67 PyObject_HEAD
68
69 XML_Parser itself;
70 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
74 int in_callback; /* Is a callback active? */
75 int ns_prefixes; /* Namespace-triplets mode? */
76 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
80 PyObject *intern; /* Dictionary to intern strings */
81 PyObject **handlers;
82 } xmlparseobject;
83
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
85
86 static PyTypeObject Xmlparsetype;
87
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
90
91 struct HandlerInfo {
92 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
95 PyCodeObject *tb_code;
96 PyObject *nameobj;
97 };
98
99 static struct HandlerInfo handler_info[64];
100
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
103 */
104 static int
set_error_attr(PyObject * err,char * name,int value)105 set_error_attr(PyObject *err, char *name, int value)
106 {
107 PyObject *v = PyInt_FromLong(value);
108
109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110 Py_XDECREF(v);
111 return 0;
112 }
113 Py_DECREF(v);
114 return 1;
115 }
116
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
119 */
120 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)121 set_error(xmlparseobject *self, enum XML_Error code)
122 {
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
128
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
132 XML_ErrorString(code), lineno, column);
133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
139 }
140 Py_XDECREF(err);
141 return NULL;
142 }
143
144 static int
have_handler(xmlparseobject * self,int type)145 have_handler(xmlparseobject *self, int type)
146 {
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
149 }
150
151 static PyObject *
get_handler_name(struct HandlerInfo * hinfo)152 get_handler_name(struct HandlerInfo *hinfo)
153 {
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
156 name = PyString_FromString(hinfo->name);
157 hinfo->nameobj = name;
158 }
159 Py_XINCREF(name);
160 return name;
161 }
162
163
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
167
168 static PyObject *
conv_string_to_unicode(const XML_Char * str)169 conv_string_to_unicode(const XML_Char *str)
170 {
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
179 }
180
181 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)182 conv_string_len_to_unicode(const XML_Char *str, int len)
183 {
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
190 }
191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
192 }
193 #endif
194
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
197
198 static PyObject *
conv_string_to_utf8(const XML_Char * str)199 conv_string_to_utf8(const XML_Char *str)
200 {
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
207 }
208 return PyString_FromString(str);
209 }
210
211 static PyObject *
conv_string_len_to_utf8(const XML_Char * str,int len)212 conv_string_len_to_utf8(const XML_Char *str, int len)
213 {
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
220 }
221 return PyString_FromStringAndSize((const char *)str, len);
222 }
223
224 /* Callback routines */
225
226 static void clear_handlers(xmlparseobject *self, int initial);
227
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)232 error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
237 {
238 return 0;
239 }
240
241 /* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
247 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)248 noop_character_data_handler(void *userData, const XML_Char *data, int len)
249 {
250 /* Do nothing. */
251 }
252
253 static void
flag_error(xmlparseobject * self)254 flag_error(xmlparseobject *self)
255 {
256 clear_handlers(self, 0);
257 XML_SetExternalEntityRefHandler(self->itself,
258 error_external_entity_ref_handler);
259 }
260
261 static PyCodeObject*
getcode(enum HandlerTypes slot,char * func_name,int lineno)262 getcode(enum HandlerTypes slot, char* func_name, int lineno)
263 {
264 if (handler_info[slot].tb_code == NULL) {
265 handler_info[slot].tb_code =
266 PyCode_NewEmpty(__FILE__, func_name, lineno);
267 }
268 return handler_info[slot].tb_code;
269 }
270
271 #ifdef FIX_TRACE
272 static int
trace_frame(PyThreadState * tstate,PyFrameObject * f,int code,PyObject * val)273 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274 {
275 int result = 0;
276 if (!tstate->use_tracing || tstate->tracing)
277 return 0;
278 if (tstate->c_profilefunc != NULL) {
279 tstate->tracing++;
280 result = tstate->c_profilefunc(tstate->c_profileobj,
281 f, code , val);
282 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283 || (tstate->c_profilefunc != NULL));
284 tstate->tracing--;
285 if (result)
286 return result;
287 }
288 if (tstate->c_tracefunc != NULL) {
289 tstate->tracing++;
290 result = tstate->c_tracefunc(tstate->c_traceobj,
291 f, code , val);
292 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293 || (tstate->c_profilefunc != NULL));
294 tstate->tracing--;
295 }
296 return result;
297 }
298
299 static int
trace_frame_exc(PyThreadState * tstate,PyFrameObject * f)300 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301 {
302 PyObject *type, *value, *traceback, *arg;
303 int err;
304
305 if (tstate->c_tracefunc == NULL)
306 return 0;
307
308 PyErr_Fetch(&type, &value, &traceback);
309 if (value == NULL) {
310 value = Py_None;
311 Py_INCREF(value);
312 }
313 #if PY_VERSION_HEX < 0x02040000
314 arg = Py_BuildValue("(OOO)", type, value, traceback);
315 #else
316 arg = PyTuple_Pack(3, type, value, traceback);
317 #endif
318 if (arg == NULL) {
319 PyErr_Restore(type, value, traceback);
320 return 0;
321 }
322 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323 Py_DECREF(arg);
324 if (err == 0)
325 PyErr_Restore(type, value, traceback);
326 else {
327 Py_XDECREF(type);
328 Py_XDECREF(value);
329 Py_XDECREF(traceback);
330 }
331 return err;
332 }
333 #endif
334
335 static PyObject*
call_with_frame(PyCodeObject * c,PyObject * func,PyObject * args,xmlparseobject * self)336 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337 xmlparseobject *self)
338 {
339 PyThreadState *tstate = PyThreadState_GET();
340 PyFrameObject *f;
341 PyObject *res;
342
343 if (c == NULL)
344 return NULL;
345
346 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
347 if (f == NULL)
348 return NULL;
349 tstate->frame = f;
350 #ifdef FIX_TRACE
351 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
352 return NULL;
353 }
354 #endif
355 res = PyEval_CallObject(func, args);
356 if (res == NULL) {
357 if (tstate->curexc_traceback == NULL)
358 PyTraceBack_Here(f);
359 XML_StopParser(self->itself, XML_FALSE);
360 #ifdef FIX_TRACE
361 if (trace_frame_exc(tstate, f) < 0) {
362 return NULL;
363 }
364 }
365 else {
366 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
367 Py_XDECREF(res);
368 res = NULL;
369 }
370 }
371 #else
372 }
373 #endif
374 tstate->frame = f->f_back;
375 Py_DECREF(f);
376 return res;
377 }
378
379 #ifndef Py_USING_UNICODE
380 #define STRING_CONV_FUNC conv_string_to_utf8
381 #else
382 /* Python 2.0 and later versions, when built with Unicode support */
383 #define STRING_CONV_FUNC (self->returns_unicode \
384 ? conv_string_to_unicode : conv_string_to_utf8)
385 #endif
386
387 static PyObject*
string_intern(xmlparseobject * self,const char * str)388 string_intern(xmlparseobject *self, const char* str)
389 {
390 PyObject *result = STRING_CONV_FUNC(str);
391 PyObject *value;
392 /* result can be NULL if the unicode conversion failed. */
393 if (!result)
394 return result;
395 if (!self->intern)
396 return result;
397 value = PyDict_GetItem(self->intern, result);
398 if (!value) {
399 if (PyDict_SetItem(self->intern, result, result) == 0)
400 return result;
401 else
402 return NULL;
403 }
404 Py_INCREF(value);
405 Py_DECREF(result);
406 return value;
407 }
408
409 /* Return 0 on success, -1 on exception.
410 * flag_error() will be called before return if needed.
411 */
412 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)413 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414 {
415 PyObject *args;
416 PyObject *temp;
417
418 if (!have_handler(self, CharacterData))
419 return -1;
420
421 args = PyTuple_New(1);
422 if (args == NULL)
423 return -1;
424 #ifdef Py_USING_UNICODE
425 temp = (self->returns_unicode
426 ? conv_string_len_to_unicode(buffer, len)
427 : conv_string_len_to_utf8(buffer, len));
428 #else
429 temp = conv_string_len_to_utf8(buffer, len);
430 #endif
431 if (temp == NULL) {
432 Py_DECREF(args);
433 flag_error(self);
434 XML_SetCharacterDataHandler(self->itself,
435 noop_character_data_handler);
436 return -1;
437 }
438 PyTuple_SET_ITEM(args, 0, temp);
439 /* temp is now a borrowed reference; consider it unused. */
440 self->in_callback = 1;
441 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
442 self->handlers[CharacterData], args, self);
443 /* temp is an owned reference again, or NULL */
444 self->in_callback = 0;
445 Py_DECREF(args);
446 if (temp == NULL) {
447 flag_error(self);
448 XML_SetCharacterDataHandler(self->itself,
449 noop_character_data_handler);
450 return -1;
451 }
452 Py_DECREF(temp);
453 return 0;
454 }
455
456 static int
flush_character_buffer(xmlparseobject * self)457 flush_character_buffer(xmlparseobject *self)
458 {
459 int rc;
460 if (self->buffer == NULL || self->buffer_used == 0)
461 return 0;
462 rc = call_character_handler(self, self->buffer, self->buffer_used);
463 self->buffer_used = 0;
464 return rc;
465 }
466
467 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)468 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
469 {
470 xmlparseobject *self = (xmlparseobject *) userData;
471 if (self->buffer == NULL)
472 call_character_handler(self, data, len);
473 else {
474 if ((self->buffer_used + len) > self->buffer_size) {
475 if (flush_character_buffer(self) < 0)
476 return;
477 /* handler might have changed; drop the rest on the floor
478 * if there isn't a handler anymore
479 */
480 if (!have_handler(self, CharacterData))
481 return;
482 }
483 if (len > self->buffer_size) {
484 call_character_handler(self, data, len);
485 self->buffer_used = 0;
486 }
487 else {
488 memcpy(self->buffer + self->buffer_used,
489 data, len * sizeof(XML_Char));
490 self->buffer_used += len;
491 }
492 }
493 }
494
495 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])496 my_StartElementHandler(void *userData,
497 const XML_Char *name, const XML_Char *atts[])
498 {
499 xmlparseobject *self = (xmlparseobject *)userData;
500
501 if (have_handler(self, StartElement)) {
502 PyObject *container, *rv, *args;
503 int i, max;
504
505 if (flush_character_buffer(self) < 0)
506 return;
507 /* Set max to the number of slots filled in atts[]; max/2 is
508 * the number of attributes we need to process.
509 */
510 if (self->specified_attributes) {
511 max = XML_GetSpecifiedAttributeCount(self->itself);
512 }
513 else {
514 max = 0;
515 while (atts[max] != NULL)
516 max += 2;
517 }
518 /* Build the container. */
519 if (self->ordered_attributes)
520 container = PyList_New(max);
521 else
522 container = PyDict_New();
523 if (container == NULL) {
524 flag_error(self);
525 return;
526 }
527 for (i = 0; i < max; i += 2) {
528 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
529 PyObject *v;
530 if (n == NULL) {
531 flag_error(self);
532 Py_DECREF(container);
533 return;
534 }
535 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
536 if (v == NULL) {
537 flag_error(self);
538 Py_DECREF(container);
539 Py_DECREF(n);
540 return;
541 }
542 if (self->ordered_attributes) {
543 PyList_SET_ITEM(container, i, n);
544 PyList_SET_ITEM(container, i+1, v);
545 }
546 else if (PyDict_SetItem(container, n, v)) {
547 flag_error(self);
548 Py_DECREF(n);
549 Py_DECREF(v);
550 return;
551 }
552 else {
553 Py_DECREF(n);
554 Py_DECREF(v);
555 }
556 }
557 args = string_intern(self, name);
558 if (args != NULL)
559 args = Py_BuildValue("(NN)", args, container);
560 if (args == NULL) {
561 Py_DECREF(container);
562 return;
563 }
564 /* Container is now a borrowed reference; ignore it. */
565 self->in_callback = 1;
566 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
567 self->handlers[StartElement], args, self);
568 self->in_callback = 0;
569 Py_DECREF(args);
570 if (rv == NULL) {
571 flag_error(self);
572 return;
573 }
574 Py_DECREF(rv);
575 }
576 }
577
578 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
579 RETURN, GETUSERDATA) \
580 static RC \
581 my_##NAME##Handler PARAMS {\
582 xmlparseobject *self = GETUSERDATA ; \
583 PyObject *args = NULL; \
584 PyObject *rv = NULL; \
585 INIT \
586 \
587 if (have_handler(self, NAME)) { \
588 if (flush_character_buffer(self) < 0) \
589 return RETURN; \
590 args = Py_BuildValue PARAM_FORMAT ;\
591 if (!args) { flag_error(self); return RETURN;} \
592 self->in_callback = 1; \
593 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
594 self->handlers[NAME], args, self); \
595 self->in_callback = 0; \
596 Py_DECREF(args); \
597 if (rv == NULL) { \
598 flag_error(self); \
599 return RETURN; \
600 } \
601 CONVERSION \
602 Py_DECREF(rv); \
603 } \
604 return RETURN; \
605 }
606
607 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
608 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
609 (xmlparseobject *)userData)
610
611 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
612 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
613 rc = PyInt_AsLong(rv);, rc, \
614 (xmlparseobject *)userData)
615
616 VOID_HANDLER(EndElement,
617 (void *userData, const XML_Char *name),
618 ("(N)", string_intern(self, name)))
619
620 VOID_HANDLER(ProcessingInstruction,
621 (void *userData,
622 const XML_Char *target,
623 const XML_Char *data),
624 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
625
626 VOID_HANDLER(UnparsedEntityDecl,
627 (void *userData,
628 const XML_Char *entityName,
629 const XML_Char *base,
630 const XML_Char *systemId,
631 const XML_Char *publicId,
632 const XML_Char *notationName),
633 ("(NNNNN)",
634 string_intern(self, entityName), string_intern(self, base),
635 string_intern(self, systemId), string_intern(self, publicId),
636 string_intern(self, notationName)))
637
638 #ifndef Py_USING_UNICODE
639 VOID_HANDLER(EntityDecl,
640 (void *userData,
641 const XML_Char *entityName,
642 int is_parameter_entity,
643 const XML_Char *value,
644 int value_length,
645 const XML_Char *base,
646 const XML_Char *systemId,
647 const XML_Char *publicId,
648 const XML_Char *notationName),
649 ("NiNNNNN",
650 string_intern(self, entityName), is_parameter_entity,
651 conv_string_len_to_utf8(value, value_length),
652 string_intern(self, base), string_intern(self, systemId),
653 string_intern(self, publicId),
654 string_intern(self, notationName)))
655 #else
656 VOID_HANDLER(EntityDecl,
657 (void *userData,
658 const XML_Char *entityName,
659 int is_parameter_entity,
660 const XML_Char *value,
661 int value_length,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId,
665 const XML_Char *notationName),
666 ("NiNNNNN",
667 string_intern(self, entityName), is_parameter_entity,
668 (self->returns_unicode
669 ? conv_string_len_to_unicode(value, value_length)
670 : conv_string_len_to_utf8(value, value_length)),
671 string_intern(self, base), string_intern(self, systemId),
672 string_intern(self, publicId),
673 string_intern(self, notationName)))
674 #endif
675
676 VOID_HANDLER(XmlDecl,
677 (void *userData,
678 const XML_Char *version,
679 const XML_Char *encoding,
680 int standalone),
681 ("(O&O&i)",
682 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
683 standalone))
684
685 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))686 conv_content_model(XML_Content * const model,
687 PyObject *(*conv_string)(const XML_Char *))
688 {
689 PyObject *result = NULL;
690 PyObject *children = PyTuple_New(model->numchildren);
691 int i;
692
693 if (children != NULL) {
694 assert(model->numchildren < INT_MAX);
695 for (i = 0; i < (int)model->numchildren; ++i) {
696 PyObject *child = conv_content_model(&model->children[i],
697 conv_string);
698 if (child == NULL) {
699 Py_XDECREF(children);
700 return NULL;
701 }
702 PyTuple_SET_ITEM(children, i, child);
703 }
704 result = Py_BuildValue("(iiO&N)",
705 model->type, model->quant,
706 conv_string,model->name, children);
707 }
708 return result;
709 }
710
711 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)712 my_ElementDeclHandler(void *userData,
713 const XML_Char *name,
714 XML_Content *model)
715 {
716 xmlparseobject *self = (xmlparseobject *)userData;
717 PyObject *args = NULL;
718
719 if (have_handler(self, ElementDecl)) {
720 PyObject *rv = NULL;
721 PyObject *modelobj, *nameobj;
722
723 if (flush_character_buffer(self) < 0)
724 goto finally;
725 #ifdef Py_USING_UNICODE
726 modelobj = conv_content_model(model,
727 (self->returns_unicode
728 ? conv_string_to_unicode
729 : conv_string_to_utf8));
730 #else
731 modelobj = conv_content_model(model, conv_string_to_utf8);
732 #endif
733 if (modelobj == NULL) {
734 flag_error(self);
735 goto finally;
736 }
737 nameobj = string_intern(self, name);
738 if (nameobj == NULL) {
739 Py_DECREF(modelobj);
740 flag_error(self);
741 goto finally;
742 }
743 args = Py_BuildValue("NN", nameobj, modelobj);
744 if (args == NULL) {
745 Py_DECREF(modelobj);
746 flag_error(self);
747 goto finally;
748 }
749 self->in_callback = 1;
750 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
751 self->handlers[ElementDecl], args, self);
752 self->in_callback = 0;
753 if (rv == NULL) {
754 flag_error(self);
755 goto finally;
756 }
757 Py_DECREF(rv);
758 }
759 finally:
760 Py_XDECREF(args);
761 XML_FreeContentModel(self->itself, model);
762 return;
763 }
764
765 VOID_HANDLER(AttlistDecl,
766 (void *userData,
767 const XML_Char *elname,
768 const XML_Char *attname,
769 const XML_Char *att_type,
770 const XML_Char *dflt,
771 int isrequired),
772 ("(NNO&O&i)",
773 string_intern(self, elname), string_intern(self, attname),
774 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
775 isrequired))
776
777 #if XML_COMBINED_VERSION >= 19504
778 VOID_HANDLER(SkippedEntity,
779 (void *userData,
780 const XML_Char *entityName,
781 int is_parameter_entity),
782 ("Ni",
783 string_intern(self, entityName), is_parameter_entity))
784 #endif
785
786 VOID_HANDLER(NotationDecl,
787 (void *userData,
788 const XML_Char *notationName,
789 const XML_Char *base,
790 const XML_Char *systemId,
791 const XML_Char *publicId),
792 ("(NNNN)",
793 string_intern(self, notationName), string_intern(self, base),
794 string_intern(self, systemId), string_intern(self, publicId)))
795
796 VOID_HANDLER(StartNamespaceDecl,
797 (void *userData,
798 const XML_Char *prefix,
799 const XML_Char *uri),
800 ("(NN)",
801 string_intern(self, prefix), string_intern(self, uri)))
802
803 VOID_HANDLER(EndNamespaceDecl,
804 (void *userData,
805 const XML_Char *prefix),
806 ("(N)", string_intern(self, prefix)))
807
808 VOID_HANDLER(Comment,
809 (void *userData, const XML_Char *data),
810 ("(O&)", STRING_CONV_FUNC,data))
811
812 VOID_HANDLER(StartCdataSection,
813 (void *userData),
814 ("()"))
815
816 VOID_HANDLER(EndCdataSection,
817 (void *userData),
818 ("()"))
819
820 #ifndef Py_USING_UNICODE
821 VOID_HANDLER(Default,
822 (void *userData, const XML_Char *s, int len),
823 ("(N)", conv_string_len_to_utf8(s,len)))
824
825 VOID_HANDLER(DefaultHandlerExpand,
826 (void *userData, const XML_Char *s, int len),
827 ("(N)", conv_string_len_to_utf8(s,len)))
828 #else
829 VOID_HANDLER(Default,
830 (void *userData, const XML_Char *s, int len),
831 ("(N)", (self->returns_unicode
832 ? conv_string_len_to_unicode(s,len)
833 : conv_string_len_to_utf8(s,len))))
834
835 VOID_HANDLER(DefaultHandlerExpand,
836 (void *userData, const XML_Char *s, int len),
837 ("(N)", (self->returns_unicode
838 ? conv_string_len_to_unicode(s,len)
839 : conv_string_len_to_utf8(s,len))))
840 #endif
841
842 INT_HANDLER(NotStandalone,
843 (void *userData),
844 ("()"))
845
846 RC_HANDLER(int, ExternalEntityRef,
847 (XML_Parser parser,
848 const XML_Char *context,
849 const XML_Char *base,
850 const XML_Char *systemId,
851 const XML_Char *publicId),
852 int rc=0;,
853 ("(O&NNN)",
854 STRING_CONV_FUNC,context, string_intern(self, base),
855 string_intern(self, systemId), string_intern(self, publicId)),
856 rc = PyInt_AsLong(rv);, rc,
857 XML_GetUserData(parser))
858
859 /* XXX UnknownEncodingHandler */
860
861 VOID_HANDLER(StartDoctypeDecl,
862 (void *userData, const XML_Char *doctypeName,
863 const XML_Char *sysid, const XML_Char *pubid,
864 int has_internal_subset),
865 ("(NNNi)", string_intern(self, doctypeName),
866 string_intern(self, sysid), string_intern(self, pubid),
867 has_internal_subset))
868
869 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
870
871 /* ---------------------------------------------------------------- */
872
873 static PyObject *
get_parse_result(xmlparseobject * self,int rv)874 get_parse_result(xmlparseobject *self, int rv)
875 {
876 if (PyErr_Occurred()) {
877 return NULL;
878 }
879 if (rv == 0) {
880 return set_error(self, XML_GetErrorCode(self->itself));
881 }
882 if (flush_character_buffer(self) < 0) {
883 return NULL;
884 }
885 return PyInt_FromLong(rv);
886 }
887
888 PyDoc_STRVAR(xmlparse_Parse__doc__,
889 "Parse(data[, isfinal])\n\
890 Parse XML data. `isfinal' should be true at end of input.");
891
892 static PyObject *
xmlparse_Parse(xmlparseobject * self,PyObject * args)893 xmlparse_Parse(xmlparseobject *self, PyObject *args)
894 {
895 char *s;
896 int slen;
897 int isFinal = 0;
898
899 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
900 return NULL;
901
902 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
903 }
904
905 /* File reading copied from cPickle */
906
907 #define BUF_SIZE 2048
908
909 static int
readinst(char * buf,int buf_size,PyObject * meth)910 readinst(char *buf, int buf_size, PyObject *meth)
911 {
912 PyObject *arg = NULL;
913 PyObject *bytes = NULL;
914 PyObject *str = NULL;
915 int len = -1;
916
917 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
918 goto finally;
919
920 if ((arg = PyTuple_New(1)) == NULL) {
921 Py_DECREF(bytes);
922 goto finally;
923 }
924
925 PyTuple_SET_ITEM(arg, 0, bytes);
926
927 #if PY_VERSION_HEX < 0x02020000
928 str = PyObject_CallObject(meth, arg);
929 #else
930 str = PyObject_Call(meth, arg, NULL);
931 #endif
932 if (str == NULL)
933 goto finally;
934
935 /* XXX what to do if it returns a Unicode string? */
936 if (!PyString_Check(str)) {
937 PyErr_Format(PyExc_TypeError,
938 "read() did not return a string object (type=%.400s)",
939 Py_TYPE(str)->tp_name);
940 goto finally;
941 }
942 len = PyString_GET_SIZE(str);
943 if (len > buf_size) {
944 PyErr_Format(PyExc_ValueError,
945 "read() returned too much data: "
946 "%i bytes requested, %i returned",
947 buf_size, len);
948 goto finally;
949 }
950 memcpy(buf, PyString_AsString(str), len);
951 finally:
952 Py_XDECREF(arg);
953 Py_XDECREF(str);
954 return len;
955 }
956
957 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
958 "ParseFile(file)\n\
959 Parse XML data from file-like object.");
960
961 static PyObject *
xmlparse_ParseFile(xmlparseobject * self,PyObject * f)962 xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
963 {
964 int rv = 1;
965 PyObject *readmethod = NULL;
966
967 readmethod = PyObject_GetAttrString(f, "read");
968 if (readmethod == NULL) {
969 PyErr_SetString(PyExc_TypeError,
970 "argument must have 'read' attribute");
971 return NULL;
972
973 }
974 for (;;) {
975 int bytes_read;
976 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
977 if (buf == NULL) {
978 Py_XDECREF(readmethod);
979 return get_parse_result(self, 0);
980 }
981
982 bytes_read = readinst(buf, BUF_SIZE, readmethod);
983 if (bytes_read < 0) {
984 Py_XDECREF(readmethod);
985 return NULL;
986 }
987
988 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
989 if (PyErr_Occurred()) {
990 Py_XDECREF(readmethod);
991 return NULL;
992 }
993
994 if (!rv || bytes_read == 0)
995 break;
996 }
997 Py_XDECREF(readmethod);
998 return get_parse_result(self, rv);
999 }
1000
1001 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1002 "SetBase(base_url)\n\
1003 Set the base URL for the parser.");
1004
1005 static PyObject *
xmlparse_SetBase(xmlparseobject * self,PyObject * args)1006 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1007 {
1008 char *base;
1009
1010 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1011 return NULL;
1012 if (!XML_SetBase(self->itself, base)) {
1013 return PyErr_NoMemory();
1014 }
1015 Py_INCREF(Py_None);
1016 return Py_None;
1017 }
1018
1019 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1020 "GetBase() -> url\n\
1021 Return base URL string for the parser.");
1022
1023 static PyObject *
xmlparse_GetBase(xmlparseobject * self,PyObject * unused)1024 xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
1025 {
1026 return Py_BuildValue("z", XML_GetBase(self->itself));
1027 }
1028
1029 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1030 "GetInputContext() -> string\n\
1031 Return the untranslated text of the input that caused the current event.\n\
1032 If the event was generated by a large amount of text (such as a start tag\n\
1033 for an element with many attributes), not all of the text may be available.");
1034
1035 static PyObject *
xmlparse_GetInputContext(xmlparseobject * self,PyObject * unused)1036 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
1037 {
1038 if (self->in_callback) {
1039 int offset, size;
1040 const char *buffer
1041 = XML_GetInputContext(self->itself, &offset, &size);
1042
1043 if (buffer != NULL)
1044 return PyString_FromStringAndSize(buffer + offset,
1045 size - offset);
1046 else
1047 Py_RETURN_NONE;
1048 }
1049 else
1050 Py_RETURN_NONE;
1051 }
1052
1053 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1054 "ExternalEntityParserCreate(context[, encoding])\n\
1055 Create a parser for parsing an external entity based on the\n\
1056 information passed to the ExternalEntityRefHandler.");
1057
1058 static PyObject *
xmlparse_ExternalEntityParserCreate(xmlparseobject * self,PyObject * args)1059 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1060 {
1061 char *context;
1062 char *encoding = NULL;
1063 xmlparseobject *new_parser;
1064 int i;
1065
1066 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1067 &context, &encoding)) {
1068 return NULL;
1069 }
1070
1071 #ifndef Py_TPFLAGS_HAVE_GC
1072 /* Python versions 2.0 and 2.1 */
1073 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1074 #else
1075 /* Python versions 2.2 and later */
1076 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1077 #endif
1078
1079 if (new_parser == NULL)
1080 return NULL;
1081 new_parser->buffer_size = self->buffer_size;
1082 new_parser->buffer_used = 0;
1083 if (self->buffer != NULL) {
1084 new_parser->buffer = malloc(new_parser->buffer_size);
1085 if (new_parser->buffer == NULL) {
1086 #ifndef Py_TPFLAGS_HAVE_GC
1087 /* Code for versions 2.0 and 2.1 */
1088 PyObject_Del(new_parser);
1089 #else
1090 /* Code for versions 2.2 and later. */
1091 PyObject_GC_Del(new_parser);
1092 #endif
1093 return PyErr_NoMemory();
1094 }
1095 }
1096 else
1097 new_parser->buffer = NULL;
1098 new_parser->returns_unicode = self->returns_unicode;
1099 new_parser->ordered_attributes = self->ordered_attributes;
1100 new_parser->specified_attributes = self->specified_attributes;
1101 new_parser->in_callback = 0;
1102 new_parser->ns_prefixes = self->ns_prefixes;
1103 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1104 encoding);
1105 new_parser->handlers = 0;
1106 new_parser->intern = self->intern;
1107 Py_XINCREF(new_parser->intern);
1108 #ifdef Py_TPFLAGS_HAVE_GC
1109 PyObject_GC_Track(new_parser);
1110 #else
1111 PyObject_GC_Init(new_parser);
1112 #endif
1113
1114 if (!new_parser->itself) {
1115 Py_DECREF(new_parser);
1116 return PyErr_NoMemory();
1117 }
1118
1119 XML_SetUserData(new_parser->itself, (void *)new_parser);
1120
1121 /* allocate and clear handlers first */
1122 for (i = 0; handler_info[i].name != NULL; i++)
1123 /* do nothing */;
1124
1125 new_parser->handlers = malloc(sizeof(PyObject *) * i);
1126 if (!new_parser->handlers) {
1127 Py_DECREF(new_parser);
1128 return PyErr_NoMemory();
1129 }
1130 clear_handlers(new_parser, 1);
1131
1132 /* then copy handlers from self */
1133 for (i = 0; handler_info[i].name != NULL; i++) {
1134 PyObject *handler = self->handlers[i];
1135 if (handler != NULL) {
1136 Py_INCREF(handler);
1137 new_parser->handlers[i] = handler;
1138 handler_info[i].setter(new_parser->itself,
1139 handler_info[i].handler);
1140 }
1141 }
1142 return (PyObject *)new_parser;
1143 }
1144
1145 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1146 "SetParamEntityParsing(flag) -> success\n\
1147 Controls parsing of parameter entities (including the external DTD\n\
1148 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1149 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1150 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1151 was successful.");
1152
1153 static PyObject*
xmlparse_SetParamEntityParsing(xmlparseobject * p,PyObject * args)1154 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1155 {
1156 int flag;
1157 if (!PyArg_ParseTuple(args, "i", &flag))
1158 return NULL;
1159 flag = XML_SetParamEntityParsing(p->itself, flag);
1160 return PyInt_FromLong(flag);
1161 }
1162
1163
1164 #if XML_COMBINED_VERSION >= 19505
1165 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1166 "UseForeignDTD([flag])\n\
1167 Allows the application to provide an artificial external subset if one is\n\
1168 not specified as part of the document instance. This readily allows the\n\
1169 use of a 'default' document type controlled by the application, while still\n\
1170 getting the advantage of providing document type information to the parser.\n\
1171 'flag' defaults to True if not provided.");
1172
1173 static PyObject *
xmlparse_UseForeignDTD(xmlparseobject * self,PyObject * args)1174 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1175 {
1176 PyObject *flagobj = NULL;
1177 int flag = 1;
1178 enum XML_Error rc;
1179 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1180 return NULL;
1181 if (flagobj != NULL) {
1182 flag = PyObject_IsTrue(flagobj);
1183 if (flag < 0)
1184 return NULL;
1185 }
1186 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1187 if (rc != XML_ERROR_NONE) {
1188 return set_error(self, rc);
1189 }
1190 Py_INCREF(Py_None);
1191 return Py_None;
1192 }
1193 #endif
1194
1195 static struct PyMethodDef xmlparse_methods[] = {
1196 {"Parse", (PyCFunction)xmlparse_Parse,
1197 METH_VARARGS, xmlparse_Parse__doc__},
1198 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1199 METH_O, xmlparse_ParseFile__doc__},
1200 {"SetBase", (PyCFunction)xmlparse_SetBase,
1201 METH_VARARGS, xmlparse_SetBase__doc__},
1202 {"GetBase", (PyCFunction)xmlparse_GetBase,
1203 METH_NOARGS, xmlparse_GetBase__doc__},
1204 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1205 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1206 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1207 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1208 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1209 METH_NOARGS, xmlparse_GetInputContext__doc__},
1210 #if XML_COMBINED_VERSION >= 19505
1211 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1212 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1213 #endif
1214 {NULL, NULL} /* sentinel */
1215 };
1216
1217 /* ---------- */
1218
1219
1220 #ifdef Py_USING_UNICODE
1221
1222 /* pyexpat international encoding support.
1223 Make it as simple as possible.
1224 */
1225
1226 static char template_buffer[257];
1227 PyObject *template_string = NULL;
1228
1229 static void
init_template_buffer(void)1230 init_template_buffer(void)
1231 {
1232 int i;
1233 for (i = 0; i < 256; i++) {
1234 template_buffer[i] = i;
1235 }
1236 template_buffer[256] = 0;
1237 }
1238
1239 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1240 PyUnknownEncodingHandler(void *encodingHandlerData,
1241 const XML_Char *name,
1242 XML_Encoding *info)
1243 {
1244 PyUnicodeObject *_u_string = NULL;
1245 int result = 0;
1246 int i;
1247
1248 /* Yes, supports only 8bit encodings */
1249 _u_string = (PyUnicodeObject *)
1250 PyUnicode_Decode(template_buffer, 256, name, "replace");
1251
1252 if (_u_string == NULL)
1253 return result;
1254
1255 if (PyUnicode_GET_SIZE(_u_string) != 256) {
1256 Py_DECREF(_u_string);
1257 PyErr_SetString(PyExc_ValueError,
1258 "multi-byte encodings are not supported");
1259 return result;
1260 }
1261
1262 for (i = 0; i < 256; i++) {
1263 /* Stupid to access directly, but fast */
1264 Py_UNICODE c = _u_string->str[i];
1265 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1266 info->map[i] = -1;
1267 else
1268 info->map[i] = c;
1269 }
1270 info->data = NULL;
1271 info->convert = NULL;
1272 info->release = NULL;
1273 result = 1;
1274 Py_DECREF(_u_string);
1275 return result;
1276 }
1277
1278 #endif
1279
1280 static PyObject *
newxmlparseobject(char * encoding,char * namespace_separator,PyObject * intern)1281 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1282 {
1283 int i;
1284 xmlparseobject *self;
1285
1286 #ifdef Py_TPFLAGS_HAVE_GC
1287 /* Code for versions 2.2 and later */
1288 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1289 #else
1290 self = PyObject_New(xmlparseobject, &Xmlparsetype);
1291 #endif
1292 if (self == NULL)
1293 return NULL;
1294
1295 #ifdef Py_USING_UNICODE
1296 self->returns_unicode = 1;
1297 #else
1298 self->returns_unicode = 0;
1299 #endif
1300
1301 self->buffer = NULL;
1302 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1303 self->buffer_used = 0;
1304 self->ordered_attributes = 0;
1305 self->specified_attributes = 0;
1306 self->in_callback = 0;
1307 self->ns_prefixes = 0;
1308 self->handlers = NULL;
1309 if (namespace_separator != NULL) {
1310 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1311 }
1312 else {
1313 self->itself = XML_ParserCreate(encoding);
1314 }
1315 if (self->itself == NULL) {
1316 PyErr_SetString(PyExc_RuntimeError,
1317 "XML_ParserCreate failed");
1318 Py_DECREF(self);
1319 return NULL;
1320 }
1321 #if XML_COMBINED_VERSION >= 20100
1322 /* This feature was added upstream in libexpat 2.1.0. */
1323 XML_SetHashSalt(self->itself,
1324 (unsigned long)_Py_HashSecret.prefix);
1325 #endif
1326 self->intern = intern;
1327 Py_XINCREF(self->intern);
1328 #ifdef Py_TPFLAGS_HAVE_GC
1329 PyObject_GC_Track(self);
1330 #else
1331 PyObject_GC_Init(self);
1332 #endif
1333 XML_SetUserData(self->itself, (void *)self);
1334 #ifdef Py_USING_UNICODE
1335 XML_SetUnknownEncodingHandler(self->itself,
1336 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1337 #endif
1338
1339 for (i = 0; handler_info[i].name != NULL; i++)
1340 /* do nothing */;
1341
1342 self->handlers = malloc(sizeof(PyObject *) * i);
1343 if (!self->handlers) {
1344 Py_DECREF(self);
1345 return PyErr_NoMemory();
1346 }
1347 clear_handlers(self, 1);
1348
1349 return (PyObject*)self;
1350 }
1351
1352
1353 static void
xmlparse_dealloc(xmlparseobject * self)1354 xmlparse_dealloc(xmlparseobject *self)
1355 {
1356 int i;
1357 #ifdef Py_TPFLAGS_HAVE_GC
1358 PyObject_GC_UnTrack(self);
1359 #else
1360 PyObject_GC_Fini(self);
1361 #endif
1362 if (self->itself != NULL)
1363 XML_ParserFree(self->itself);
1364 self->itself = NULL;
1365
1366 if (self->handlers != NULL) {
1367 PyObject *temp;
1368 for (i = 0; handler_info[i].name != NULL; i++) {
1369 temp = self->handlers[i];
1370 self->handlers[i] = NULL;
1371 Py_XDECREF(temp);
1372 }
1373 free(self->handlers);
1374 self->handlers = NULL;
1375 }
1376 if (self->buffer != NULL) {
1377 free(self->buffer);
1378 self->buffer = NULL;
1379 }
1380 Py_XDECREF(self->intern);
1381 #ifndef Py_TPFLAGS_HAVE_GC
1382 /* Code for versions 2.0 and 2.1 */
1383 PyObject_Del(self);
1384 #else
1385 /* Code for versions 2.2 and later. */
1386 PyObject_GC_Del(self);
1387 #endif
1388 }
1389
1390 static int
handlername2int(const char * name)1391 handlername2int(const char *name)
1392 {
1393 int i;
1394 for (i = 0; handler_info[i].name != NULL; i++) {
1395 if (strcmp(name, handler_info[i].name) == 0) {
1396 return i;
1397 }
1398 }
1399 return -1;
1400 }
1401
1402 static PyObject *
get_pybool(int istrue)1403 get_pybool(int istrue)
1404 {
1405 PyObject *result = istrue ? Py_True : Py_False;
1406 Py_INCREF(result);
1407 return result;
1408 }
1409
1410 static PyObject *
xmlparse_getattr(xmlparseobject * self,char * name)1411 xmlparse_getattr(xmlparseobject *self, char *name)
1412 {
1413 int handlernum = handlername2int(name);
1414
1415 if (handlernum != -1) {
1416 PyObject *result = self->handlers[handlernum];
1417 if (result == NULL)
1418 result = Py_None;
1419 Py_INCREF(result);
1420 return result;
1421 }
1422 if (name[0] == 'E') {
1423 if (strcmp(name, "ErrorCode") == 0)
1424 return PyInt_FromLong((long)
1425 XML_GetErrorCode(self->itself));
1426 if (strcmp(name, "ErrorLineNumber") == 0)
1427 return PyInt_FromLong((long)
1428 XML_GetErrorLineNumber(self->itself));
1429 if (strcmp(name, "ErrorColumnNumber") == 0)
1430 return PyInt_FromLong((long)
1431 XML_GetErrorColumnNumber(self->itself));
1432 if (strcmp(name, "ErrorByteIndex") == 0)
1433 return PyInt_FromLong((long)
1434 XML_GetErrorByteIndex(self->itself));
1435 }
1436 if (name[0] == 'C') {
1437 if (strcmp(name, "CurrentLineNumber") == 0)
1438 return PyInt_FromLong((long)
1439 XML_GetCurrentLineNumber(self->itself));
1440 if (strcmp(name, "CurrentColumnNumber") == 0)
1441 return PyInt_FromLong((long)
1442 XML_GetCurrentColumnNumber(self->itself));
1443 if (strcmp(name, "CurrentByteIndex") == 0)
1444 return PyInt_FromLong((long)
1445 XML_GetCurrentByteIndex(self->itself));
1446 }
1447 if (name[0] == 'b') {
1448 if (strcmp(name, "buffer_size") == 0)
1449 return PyInt_FromLong((long) self->buffer_size);
1450 if (strcmp(name, "buffer_text") == 0)
1451 return get_pybool(self->buffer != NULL);
1452 if (strcmp(name, "buffer_used") == 0)
1453 return PyInt_FromLong((long) self->buffer_used);
1454 }
1455 if (strcmp(name, "namespace_prefixes") == 0)
1456 return get_pybool(self->ns_prefixes);
1457 if (strcmp(name, "ordered_attributes") == 0)
1458 return get_pybool(self->ordered_attributes);
1459 if (strcmp(name, "returns_unicode") == 0)
1460 return get_pybool((long) self->returns_unicode);
1461 if (strcmp(name, "specified_attributes") == 0)
1462 return get_pybool((long) self->specified_attributes);
1463 if (strcmp(name, "intern") == 0) {
1464 if (self->intern == NULL) {
1465 Py_INCREF(Py_None);
1466 return Py_None;
1467 }
1468 else {
1469 Py_INCREF(self->intern);
1470 return self->intern;
1471 }
1472 }
1473
1474 #define APPEND(list, str) \
1475 do { \
1476 PyObject *o = PyString_FromString(str); \
1477 if (o != NULL) \
1478 PyList_Append(list, o); \
1479 Py_XDECREF(o); \
1480 } while (0)
1481
1482 if (strcmp(name, "__members__") == 0) {
1483 int i;
1484 PyObject *rc = PyList_New(0);
1485 if (!rc)
1486 return NULL;
1487 for (i = 0; handler_info[i].name != NULL; i++) {
1488 PyObject *o = get_handler_name(&handler_info[i]);
1489 if (o != NULL)
1490 PyList_Append(rc, o);
1491 Py_XDECREF(o);
1492 }
1493 APPEND(rc, "ErrorCode");
1494 APPEND(rc, "ErrorLineNumber");
1495 APPEND(rc, "ErrorColumnNumber");
1496 APPEND(rc, "ErrorByteIndex");
1497 APPEND(rc, "CurrentLineNumber");
1498 APPEND(rc, "CurrentColumnNumber");
1499 APPEND(rc, "CurrentByteIndex");
1500 APPEND(rc, "buffer_size");
1501 APPEND(rc, "buffer_text");
1502 APPEND(rc, "buffer_used");
1503 APPEND(rc, "namespace_prefixes");
1504 APPEND(rc, "ordered_attributes");
1505 APPEND(rc, "returns_unicode");
1506 APPEND(rc, "specified_attributes");
1507 APPEND(rc, "intern");
1508
1509 #undef APPEND
1510 return rc;
1511 }
1512 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1513 }
1514
1515 static int
sethandler(xmlparseobject * self,const char * name,PyObject * v)1516 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1517 {
1518 int handlernum = handlername2int(name);
1519 if (handlernum >= 0) {
1520 xmlhandler c_handler = NULL;
1521 PyObject *temp = self->handlers[handlernum];
1522
1523 if (v == Py_None) {
1524 /* If this is the character data handler, and a character
1525 data handler is already active, we need to be more
1526 careful. What we can safely do is replace the existing
1527 character data handler callback function with a no-op
1528 function that will refuse to call Python. The downside
1529 is that this doesn't completely remove the character
1530 data handler from the C layer if there's any callback
1531 active, so Expat does a little more work than it
1532 otherwise would, but that's really an odd case. A more
1533 elaborate system of handlers and state could remove the
1534 C handler more effectively. */
1535 if (handlernum == CharacterData && self->in_callback)
1536 c_handler = noop_character_data_handler;
1537 v = NULL;
1538 }
1539 else if (v != NULL) {
1540 Py_INCREF(v);
1541 c_handler = handler_info[handlernum].handler;
1542 }
1543 self->handlers[handlernum] = v;
1544 Py_XDECREF(temp);
1545 handler_info[handlernum].setter(self->itself, c_handler);
1546 return 1;
1547 }
1548 return 0;
1549 }
1550
1551 static int
xmlparse_setattr(xmlparseobject * self,char * name,PyObject * v)1552 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1553 {
1554 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1555 if (v == NULL) {
1556 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1557 return -1;
1558 }
1559 if (strcmp(name, "buffer_text") == 0) {
1560 int b = PyObject_IsTrue(v);
1561 if (b < 0)
1562 return -1;
1563 if (b) {
1564 if (self->buffer == NULL) {
1565 self->buffer = malloc(self->buffer_size);
1566 if (self->buffer == NULL) {
1567 PyErr_NoMemory();
1568 return -1;
1569 }
1570 self->buffer_used = 0;
1571 }
1572 }
1573 else if (self->buffer != NULL) {
1574 if (flush_character_buffer(self) < 0)
1575 return -1;
1576 free(self->buffer);
1577 self->buffer = NULL;
1578 }
1579 return 0;
1580 }
1581 if (strcmp(name, "namespace_prefixes") == 0) {
1582 int b = PyObject_IsTrue(v);
1583 if (b < 0)
1584 return -1;
1585 self->ns_prefixes = b;
1586 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1587 return 0;
1588 }
1589 if (strcmp(name, "ordered_attributes") == 0) {
1590 int b = PyObject_IsTrue(v);
1591 if (b < 0)
1592 return -1;
1593 self->ordered_attributes = b;
1594 return 0;
1595 }
1596 if (strcmp(name, "returns_unicode") == 0) {
1597 int b = PyObject_IsTrue(v);
1598 if (b < 0)
1599 return -1;
1600 #ifndef Py_USING_UNICODE
1601 if (b) {
1602 PyErr_SetString(PyExc_ValueError,
1603 "Unicode support not available");
1604 return -1;
1605 }
1606 #endif
1607 self->returns_unicode = b;
1608 return 0;
1609 }
1610 if (strcmp(name, "specified_attributes") == 0) {
1611 int b = PyObject_IsTrue(v);
1612 if (b < 0)
1613 return -1;
1614 self->specified_attributes = b;
1615 return 0;
1616 }
1617
1618 if (strcmp(name, "buffer_size") == 0) {
1619 long new_buffer_size;
1620 if (!PyInt_Check(v)) {
1621 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1622 return -1;
1623 }
1624
1625 new_buffer_size=PyInt_AS_LONG(v);
1626 /* trivial case -- no change */
1627 if (new_buffer_size == self->buffer_size) {
1628 return 0;
1629 }
1630
1631 if (new_buffer_size <= 0) {
1632 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1633 return -1;
1634 }
1635
1636 /* check maximum */
1637 if (new_buffer_size > INT_MAX) {
1638 char errmsg[100];
1639 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1640 PyErr_SetString(PyExc_ValueError, errmsg);
1641 return -1;
1642 }
1643
1644 if (self->buffer != NULL) {
1645 /* there is already a buffer */
1646 if (self->buffer_used != 0) {
1647 flush_character_buffer(self);
1648 }
1649 /* free existing buffer */
1650 free(self->buffer);
1651 }
1652 self->buffer = malloc(new_buffer_size);
1653 if (self->buffer == NULL) {
1654 PyErr_NoMemory();
1655 return -1;
1656 }
1657 self->buffer_size = new_buffer_size;
1658 return 0;
1659 }
1660
1661 if (strcmp(name, "CharacterDataHandler") == 0) {
1662 /* If we're changing the character data handler, flush all
1663 * cached data with the old handler. Not sure there's a
1664 * "right" thing to do, though, but this probably won't
1665 * happen.
1666 */
1667 if (flush_character_buffer(self) < 0)
1668 return -1;
1669 }
1670 if (sethandler(self, name, v)) {
1671 return 0;
1672 }
1673 PyErr_SetString(PyExc_AttributeError, name);
1674 return -1;
1675 }
1676
1677 #ifdef WITH_CYCLE_GC
1678 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1679 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1680 {
1681 int i;
1682 for (i = 0; handler_info[i].name != NULL; i++)
1683 Py_VISIT(op->handlers[i]);
1684 return 0;
1685 }
1686
1687 static int
xmlparse_clear(xmlparseobject * op)1688 xmlparse_clear(xmlparseobject *op)
1689 {
1690 clear_handlers(op, 0);
1691 Py_CLEAR(op->intern);
1692 return 0;
1693 }
1694 #endif
1695
1696 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1697
1698 static PyTypeObject Xmlparsetype = {
1699 PyVarObject_HEAD_INIT(NULL, 0)
1700 "pyexpat.xmlparser", /*tp_name*/
1701 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1702 0, /*tp_itemsize*/
1703 /* methods */
1704 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1705 (printfunc)0, /*tp_print*/
1706 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1707 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1708 (cmpfunc)0, /*tp_compare*/
1709 (reprfunc)0, /*tp_repr*/
1710 0, /*tp_as_number*/
1711 0, /*tp_as_sequence*/
1712 0, /*tp_as_mapping*/
1713 (hashfunc)0, /*tp_hash*/
1714 (ternaryfunc)0, /*tp_call*/
1715 (reprfunc)0, /*tp_str*/
1716 0, /* tp_getattro */
1717 0, /* tp_setattro */
1718 0, /* tp_as_buffer */
1719 #ifdef Py_TPFLAGS_HAVE_GC
1720 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1721 #else
1722 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1723 #endif
1724 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1725 #ifdef WITH_CYCLE_GC
1726 (traverseproc)xmlparse_traverse, /* tp_traverse */
1727 (inquiry)xmlparse_clear /* tp_clear */
1728 #else
1729 0, 0
1730 #endif
1731 };
1732
1733 /* End of code for xmlparser objects */
1734 /* -------------------------------------------------------- */
1735
1736 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1737 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1738 Return a new XML parser object.");
1739
1740 static PyObject *
pyexpat_ParserCreate(PyObject * notused,PyObject * args,PyObject * kw)1741 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1742 {
1743 char *encoding = NULL;
1744 char *namespace_separator = NULL;
1745 PyObject *intern = NULL;
1746 PyObject *result;
1747 int intern_decref = 0;
1748 static char *kwlist[] = {"encoding", "namespace_separator",
1749 "intern", NULL};
1750
1751 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1752 &encoding, &namespace_separator, &intern))
1753 return NULL;
1754 if (namespace_separator != NULL
1755 && strlen(namespace_separator) > 1) {
1756 PyErr_SetString(PyExc_ValueError,
1757 "namespace_separator must be at most one"
1758 " character, omitted, or None");
1759 return NULL;
1760 }
1761 /* Explicitly passing None means no interning is desired.
1762 Not passing anything means that a new dictionary is used. */
1763 if (intern == Py_None)
1764 intern = NULL;
1765 else if (intern == NULL) {
1766 intern = PyDict_New();
1767 if (!intern)
1768 return NULL;
1769 intern_decref = 1;
1770 }
1771 else if (!PyDict_Check(intern)) {
1772 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1773 return NULL;
1774 }
1775
1776 result = newxmlparseobject(encoding, namespace_separator, intern);
1777 if (intern_decref) {
1778 Py_DECREF(intern);
1779 }
1780 return result;
1781 }
1782
1783 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1784 "ErrorString(errno) -> string\n\
1785 Returns string error for given number.");
1786
1787 static PyObject *
pyexpat_ErrorString(PyObject * self,PyObject * args)1788 pyexpat_ErrorString(PyObject *self, PyObject *args)
1789 {
1790 long code = 0;
1791
1792 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1793 return NULL;
1794 return Py_BuildValue("z", XML_ErrorString((int)code));
1795 }
1796
1797 /* List of methods defined in the module */
1798
1799 static struct PyMethodDef pyexpat_methods[] = {
1800 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1801 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1802 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1803 METH_VARARGS, pyexpat_ErrorString__doc__},
1804
1805 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
1806 };
1807
1808 /* Module docstring */
1809
1810 PyDoc_STRVAR(pyexpat_module_documentation,
1811 "Python wrapper for Expat parser.");
1812
1813 /* Initialization function for the module */
1814
1815 #ifndef MODULE_NAME
1816 #define MODULE_NAME "pyexpat"
1817 #endif
1818
1819 #ifndef MODULE_INITFUNC
1820 #define MODULE_INITFUNC initpyexpat
1821 #endif
1822
1823 #ifndef PyMODINIT_FUNC
1824 # ifdef MS_WINDOWS
1825 # define PyMODINIT_FUNC __declspec(dllexport) void
1826 # else
1827 # define PyMODINIT_FUNC void
1828 # endif
1829 #endif
1830
1831 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
1832
1833 PyMODINIT_FUNC
MODULE_INITFUNC(void)1834 MODULE_INITFUNC(void)
1835 {
1836 PyObject *m, *d;
1837 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1838 PyObject *errors_module;
1839 PyObject *modelmod_name;
1840 PyObject *model_module;
1841 PyObject *sys_modules;
1842 PyObject *version;
1843 static struct PyExpat_CAPI capi;
1844 PyObject* capi_object;
1845
1846 if (errmod_name == NULL)
1847 return;
1848 modelmod_name = PyString_FromString(MODULE_NAME ".model");
1849 if (modelmod_name == NULL)
1850 return;
1851
1852 Py_TYPE(&Xmlparsetype) = &PyType_Type;
1853
1854 /* Create the module and add the functions */
1855 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1856 pyexpat_module_documentation);
1857 if (m == NULL)
1858 return;
1859
1860 /* Add some symbolic constants to the module */
1861 if (ErrorObject == NULL) {
1862 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1863 NULL, NULL);
1864 if (ErrorObject == NULL)
1865 return;
1866 }
1867 Py_INCREF(ErrorObject);
1868 PyModule_AddObject(m, "error", ErrorObject);
1869 Py_INCREF(ErrorObject);
1870 PyModule_AddObject(m, "ExpatError", ErrorObject);
1871 Py_INCREF(&Xmlparsetype);
1872 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1873
1874 version = PyString_FromString(PY_VERSION);
1875 if (!version)
1876 return;
1877 PyModule_AddObject(m, "__version__", version);
1878 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1879 (char *) XML_ExpatVersion());
1880 {
1881 XML_Expat_Version info = XML_ExpatVersionInfo();
1882 PyModule_AddObject(m, "version_info",
1883 Py_BuildValue("(iii)", info.major,
1884 info.minor, info.micro));
1885 }
1886 #ifdef Py_USING_UNICODE
1887 init_template_buffer();
1888 #endif
1889 /* XXX When Expat supports some way of figuring out how it was
1890 compiled, this should check and set native_encoding
1891 appropriately.
1892 */
1893 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1894
1895 sys_modules = PySys_GetObject("modules");
1896 d = PyModule_GetDict(m);
1897 errors_module = PyDict_GetItem(d, errmod_name);
1898 if (errors_module == NULL) {
1899 errors_module = PyModule_New(MODULE_NAME ".errors");
1900 if (errors_module != NULL) {
1901 PyDict_SetItem(sys_modules, errmod_name, errors_module);
1902 /* gives away the reference to errors_module */
1903 PyModule_AddObject(m, "errors", errors_module);
1904 }
1905 }
1906 Py_DECREF(errmod_name);
1907 model_module = PyDict_GetItem(d, modelmod_name);
1908 if (model_module == NULL) {
1909 model_module = PyModule_New(MODULE_NAME ".model");
1910 if (model_module != NULL) {
1911 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1912 /* gives away the reference to model_module */
1913 PyModule_AddObject(m, "model", model_module);
1914 }
1915 }
1916 Py_DECREF(modelmod_name);
1917 if (errors_module == NULL || model_module == NULL)
1918 /* Don't core dump later! */
1919 return;
1920
1921 #if XML_COMBINED_VERSION > 19505
1922 {
1923 const XML_Feature *features = XML_GetFeatureList();
1924 PyObject *list = PyList_New(0);
1925 if (list == NULL)
1926 /* just ignore it */
1927 PyErr_Clear();
1928 else {
1929 int i = 0;
1930 for (; features[i].feature != XML_FEATURE_END; ++i) {
1931 int ok;
1932 PyObject *item = Py_BuildValue("si", features[i].name,
1933 features[i].value);
1934 if (item == NULL) {
1935 Py_DECREF(list);
1936 list = NULL;
1937 break;
1938 }
1939 ok = PyList_Append(list, item);
1940 Py_DECREF(item);
1941 if (ok < 0) {
1942 PyErr_Clear();
1943 break;
1944 }
1945 }
1946 if (list != NULL)
1947 PyModule_AddObject(m, "features", list);
1948 }
1949 }
1950 #endif
1951
1952 #define MYCONST(name) \
1953 PyModule_AddStringConstant(errors_module, #name, \
1954 (char*)XML_ErrorString(name))
1955
1956 MYCONST(XML_ERROR_NO_MEMORY);
1957 MYCONST(XML_ERROR_SYNTAX);
1958 MYCONST(XML_ERROR_NO_ELEMENTS);
1959 MYCONST(XML_ERROR_INVALID_TOKEN);
1960 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1961 MYCONST(XML_ERROR_PARTIAL_CHAR);
1962 MYCONST(XML_ERROR_TAG_MISMATCH);
1963 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1964 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1965 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1966 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1967 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1968 MYCONST(XML_ERROR_ASYNC_ENTITY);
1969 MYCONST(XML_ERROR_BAD_CHAR_REF);
1970 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1971 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1972 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1973 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1974 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1975 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1976 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1977 MYCONST(XML_ERROR_NOT_STANDALONE);
1978 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1979 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1980 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1981 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1982 /* Added in Expat 1.95.7. */
1983 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1984 /* Added in Expat 1.95.8. */
1985 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1986 MYCONST(XML_ERROR_INCOMPLETE_PE);
1987 MYCONST(XML_ERROR_XML_DECL);
1988 MYCONST(XML_ERROR_TEXT_DECL);
1989 MYCONST(XML_ERROR_PUBLICID);
1990 MYCONST(XML_ERROR_SUSPENDED);
1991 MYCONST(XML_ERROR_NOT_SUSPENDED);
1992 MYCONST(XML_ERROR_ABORTED);
1993 MYCONST(XML_ERROR_FINISHED);
1994 MYCONST(XML_ERROR_SUSPEND_PE);
1995
1996 PyModule_AddStringConstant(errors_module, "__doc__",
1997 "Constants used to describe error conditions.");
1998
1999 #undef MYCONST
2000
2001 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2002 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2003 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2004 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2005 #undef MYCONST
2006
2007 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2008 PyModule_AddStringConstant(model_module, "__doc__",
2009 "Constants used to interpret content model information.");
2010
2011 MYCONST(XML_CTYPE_EMPTY);
2012 MYCONST(XML_CTYPE_ANY);
2013 MYCONST(XML_CTYPE_MIXED);
2014 MYCONST(XML_CTYPE_NAME);
2015 MYCONST(XML_CTYPE_CHOICE);
2016 MYCONST(XML_CTYPE_SEQ);
2017
2018 MYCONST(XML_CQUANT_NONE);
2019 MYCONST(XML_CQUANT_OPT);
2020 MYCONST(XML_CQUANT_REP);
2021 MYCONST(XML_CQUANT_PLUS);
2022 #undef MYCONST
2023
2024 /* initialize pyexpat dispatch table */
2025 capi.size = sizeof(capi);
2026 capi.magic = PyExpat_CAPI_MAGIC;
2027 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2028 capi.MINOR_VERSION = XML_MINOR_VERSION;
2029 capi.MICRO_VERSION = XML_MICRO_VERSION;
2030 capi.ErrorString = XML_ErrorString;
2031 capi.GetErrorCode = XML_GetErrorCode;
2032 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2033 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2034 capi.Parse = XML_Parse;
2035 capi.ParserCreate_MM = XML_ParserCreate_MM;
2036 capi.ParserFree = XML_ParserFree;
2037 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2038 capi.SetCommentHandler = XML_SetCommentHandler;
2039 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2040 capi.SetElementHandler = XML_SetElementHandler;
2041 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2042 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2043 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2044 capi.SetUserData = XML_SetUserData;
2045
2046 /* export using capsule */
2047 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2048 if (capi_object)
2049 PyModule_AddObject(m, "expat_CAPI", capi_object);
2050 }
2051
2052 static void
clear_handlers(xmlparseobject * self,int initial)2053 clear_handlers(xmlparseobject *self, int initial)
2054 {
2055 int i = 0;
2056 PyObject *temp;
2057
2058 for (; handler_info[i].name != NULL; i++) {
2059 if (initial)
2060 self->handlers[i] = NULL;
2061 else {
2062 temp = self->handlers[i];
2063 self->handlers[i] = NULL;
2064 Py_XDECREF(temp);
2065 handler_info[i].setter(self->itself, NULL);
2066 }
2067 }
2068 }
2069
2070 static struct HandlerInfo handler_info[] = {
2071 {"StartElementHandler",
2072 (xmlhandlersetter)XML_SetStartElementHandler,
2073 (xmlhandler)my_StartElementHandler},
2074 {"EndElementHandler",
2075 (xmlhandlersetter)XML_SetEndElementHandler,
2076 (xmlhandler)my_EndElementHandler},
2077 {"ProcessingInstructionHandler",
2078 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2079 (xmlhandler)my_ProcessingInstructionHandler},
2080 {"CharacterDataHandler",
2081 (xmlhandlersetter)XML_SetCharacterDataHandler,
2082 (xmlhandler)my_CharacterDataHandler},
2083 {"UnparsedEntityDeclHandler",
2084 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2085 (xmlhandler)my_UnparsedEntityDeclHandler},
2086 {"NotationDeclHandler",
2087 (xmlhandlersetter)XML_SetNotationDeclHandler,
2088 (xmlhandler)my_NotationDeclHandler},
2089 {"StartNamespaceDeclHandler",
2090 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2091 (xmlhandler)my_StartNamespaceDeclHandler},
2092 {"EndNamespaceDeclHandler",
2093 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2094 (xmlhandler)my_EndNamespaceDeclHandler},
2095 {"CommentHandler",
2096 (xmlhandlersetter)XML_SetCommentHandler,
2097 (xmlhandler)my_CommentHandler},
2098 {"StartCdataSectionHandler",
2099 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2100 (xmlhandler)my_StartCdataSectionHandler},
2101 {"EndCdataSectionHandler",
2102 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2103 (xmlhandler)my_EndCdataSectionHandler},
2104 {"DefaultHandler",
2105 (xmlhandlersetter)XML_SetDefaultHandler,
2106 (xmlhandler)my_DefaultHandler},
2107 {"DefaultHandlerExpand",
2108 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2109 (xmlhandler)my_DefaultHandlerExpandHandler},
2110 {"NotStandaloneHandler",
2111 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2112 (xmlhandler)my_NotStandaloneHandler},
2113 {"ExternalEntityRefHandler",
2114 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2115 (xmlhandler)my_ExternalEntityRefHandler},
2116 {"StartDoctypeDeclHandler",
2117 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2118 (xmlhandler)my_StartDoctypeDeclHandler},
2119 {"EndDoctypeDeclHandler",
2120 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2121 (xmlhandler)my_EndDoctypeDeclHandler},
2122 {"EntityDeclHandler",
2123 (xmlhandlersetter)XML_SetEntityDeclHandler,
2124 (xmlhandler)my_EntityDeclHandler},
2125 {"XmlDeclHandler",
2126 (xmlhandlersetter)XML_SetXmlDeclHandler,
2127 (xmlhandler)my_XmlDeclHandler},
2128 {"ElementDeclHandler",
2129 (xmlhandlersetter)XML_SetElementDeclHandler,
2130 (xmlhandler)my_ElementDeclHandler},
2131 {"AttlistDeclHandler",
2132 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2133 (xmlhandler)my_AttlistDeclHandler},
2134 #if XML_COMBINED_VERSION >= 19504
2135 {"SkippedEntityHandler",
2136 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2137 (xmlhandler)my_SkippedEntityHandler},
2138 #endif
2139
2140 {NULL, NULL, NULL} /* sentinel */
2141 };
2142