• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h"
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12 
13 /*[clinic input]
14 module _multibytecodec
15 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "&MultibyteCodec_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6ad689546cbb5450]*/
18 
19 typedef struct {
20     PyObject            *inobj;
21     Py_ssize_t          inpos, inlen;
22     unsigned char       *outbuf, *outbuf_end;
23     PyObject            *excobj, *outobj;
24 } MultibyteEncodeBuffer;
25 
26 typedef struct {
27     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
28     PyObject            *excobj;
29     _PyUnicodeWriter    writer;
30 } MultibyteDecodeBuffer;
31 
32 static char *incnewkwarglist[] = {"errors", NULL};
33 static char *streamkwarglist[] = {"stream", "errors", NULL};
34 
35 static PyObject *multibytecodec_encode(MultibyteCodec *,
36                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
37                 PyObject *, int);
38 
39 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
40 
41 _Py_IDENTIFIER(write);
42 
43 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)44 make_tuple(PyObject *object, Py_ssize_t len)
45 {
46     PyObject *v, *w;
47 
48     if (object == NULL)
49         return NULL;
50 
51     v = PyTuple_New(2);
52     if (v == NULL) {
53         Py_DECREF(object);
54         return NULL;
55     }
56     PyTuple_SET_ITEM(v, 0, object);
57 
58     w = PyLong_FromSsize_t(len);
59     if (w == NULL) {
60         Py_DECREF(v);
61         return NULL;
62     }
63     PyTuple_SET_ITEM(v, 1, w);
64 
65     return v;
66 }
67 
68 static PyObject *
internal_error_callback(const char * errors)69 internal_error_callback(const char *errors)
70 {
71     if (errors == NULL || strcmp(errors, "strict") == 0)
72         return ERROR_STRICT;
73     else if (strcmp(errors, "ignore") == 0)
74         return ERROR_IGNORE;
75     else if (strcmp(errors, "replace") == 0)
76         return ERROR_REPLACE;
77     else
78         return PyUnicode_FromString(errors);
79 }
80 
81 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)82 call_error_callback(PyObject *errors, PyObject *exc)
83 {
84     PyObject *args, *cb, *r;
85     const char *str;
86 
87     assert(PyUnicode_Check(errors));
88     str = PyUnicode_AsUTF8(errors);
89     if (str == NULL)
90         return NULL;
91     cb = PyCodec_LookupError(str);
92     if (cb == NULL)
93         return NULL;
94 
95     args = PyTuple_New(1);
96     if (args == NULL) {
97         Py_DECREF(cb);
98         return NULL;
99     }
100 
101     PyTuple_SET_ITEM(args, 0, exc);
102     Py_INCREF(exc);
103 
104     r = PyObject_CallObject(cb, args);
105     Py_DECREF(args);
106     Py_DECREF(cb);
107     return r;
108 }
109 
110 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))111 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
112 {
113     const char *errors;
114 
115     if (self->errors == ERROR_STRICT)
116         errors = "strict";
117     else if (self->errors == ERROR_IGNORE)
118         errors = "ignore";
119     else if (self->errors == ERROR_REPLACE)
120         errors = "replace";
121     else {
122         Py_INCREF(self->errors);
123         return self->errors;
124     }
125 
126     return PyUnicode_FromString(errors);
127 }
128 
129 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)130 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
131                     void *closure)
132 {
133     PyObject *cb;
134     const char *str;
135 
136     if (value == NULL) {
137         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
138         return -1;
139     }
140     if (!PyUnicode_Check(value)) {
141         PyErr_SetString(PyExc_TypeError, "errors must be a string");
142         return -1;
143     }
144 
145     str = PyUnicode_AsUTF8(value);
146     if (str == NULL)
147         return -1;
148 
149     cb = internal_error_callback(str);
150     if (cb == NULL)
151         return -1;
152 
153     ERROR_DECREF(self->errors);
154     self->errors = cb;
155     return 0;
156 }
157 
158 /* This getset handlers list is used by all the stateful codec objects */
159 static PyGetSetDef codecctx_getsets[] = {
160     {"errors",          (getter)codecctx_errors_get,
161                     (setter)codecctx_errors_set,
162                     PyDoc_STR("how to treat errors")},
163     {NULL,}
164 };
165 
166 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)167 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
168 {
169     Py_ssize_t orgpos, orgsize, incsize;
170 
171     orgpos = (Py_ssize_t)((char *)buf->outbuf -
172                             PyBytes_AS_STRING(buf->outobj));
173     orgsize = PyBytes_GET_SIZE(buf->outobj);
174     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
175 
176     if (orgsize > PY_SSIZE_T_MAX - incsize) {
177         PyErr_NoMemory();
178         return -1;
179     }
180 
181     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
182         return -1;
183 
184     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
185     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
186         + PyBytes_GET_SIZE(buf->outobj);
187 
188     return 0;
189 }
190 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
191     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
192         if (expand_encodebuffer(buf, s) == -1)                          \
193             goto errorexit;                                             \
194 } while(0)
195 
196 
197 /**
198  * MultibyteCodec object
199  */
200 
201 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)202 multibytecodec_encerror(MultibyteCodec *codec,
203                         MultibyteCodec_State *state,
204                         MultibyteEncodeBuffer *buf,
205                         PyObject *errors, Py_ssize_t e)
206 {
207     PyObject *retobj = NULL, *retstr = NULL, *tobj;
208     Py_ssize_t retstrsize, newpos;
209     Py_ssize_t esize, start, end;
210     const char *reason;
211 
212     if (e > 0) {
213         reason = "illegal multibyte sequence";
214         esize = e;
215     }
216     else {
217         switch (e) {
218         case MBERR_TOOSMALL:
219             REQUIRE_ENCODEBUFFER(buf, -1);
220             return 0; /* retry it */
221         case MBERR_TOOFEW:
222             reason = "incomplete multibyte sequence";
223             esize = (Py_ssize_t)buf->inpos;
224             break;
225         case MBERR_INTERNAL:
226             PyErr_SetString(PyExc_RuntimeError,
227                             "internal codec error");
228             return -1;
229         default:
230             PyErr_SetString(PyExc_RuntimeError,
231                             "unknown runtime error");
232             return -1;
233         }
234     }
235 
236     if (errors == ERROR_REPLACE) {
237         PyObject *replchar;
238         Py_ssize_t r;
239         Py_ssize_t inpos;
240         int kind;
241         void *data;
242 
243         replchar = PyUnicode_FromOrdinal('?');
244         if (replchar == NULL)
245             goto errorexit;
246         kind = PyUnicode_KIND(replchar);
247         data = PyUnicode_DATA(replchar);
248 
249         inpos = 0;
250         for (;;) {
251             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
252 
253             r = codec->encode(state, codec->config,
254                               kind, data, &inpos, 1,
255                               &buf->outbuf, outleft, 0);
256             if (r == MBERR_TOOSMALL) {
257                 REQUIRE_ENCODEBUFFER(buf, -1);
258                 continue;
259             }
260             else
261                 break;
262         }
263 
264         Py_DECREF(replchar);
265 
266         if (r != 0) {
267             REQUIRE_ENCODEBUFFER(buf, 1);
268             *buf->outbuf++ = '?';
269         }
270     }
271     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
272         buf->inpos += esize;
273         return 0;
274     }
275 
276     start = (Py_ssize_t)buf->inpos;
277     end = start + esize;
278 
279     /* use cached exception object if available */
280     if (buf->excobj == NULL) {
281         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
282                                              "sOnns",
283                                              codec->encoding, buf->inobj,
284                                              start, end, reason);
285         if (buf->excobj == NULL)
286             goto errorexit;
287     }
288     else
289         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
290             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
291             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
292             goto errorexit;
293 
294     if (errors == ERROR_STRICT) {
295         PyCodec_StrictErrors(buf->excobj);
296         goto errorexit;
297     }
298 
299     retobj = call_error_callback(errors, buf->excobj);
300     if (retobj == NULL)
301         goto errorexit;
302 
303     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
304         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
305         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
306         PyErr_SetString(PyExc_TypeError,
307                         "encoding error handler must return "
308                         "(str, int) tuple");
309         goto errorexit;
310     }
311 
312     if (PyUnicode_Check(tobj)) {
313         Py_ssize_t inpos;
314 
315         retstr = multibytecodec_encode(codec, state, tobj,
316                         &inpos, ERROR_STRICT,
317                         MBENC_FLUSH);
318         if (retstr == NULL)
319             goto errorexit;
320     }
321     else {
322         Py_INCREF(tobj);
323         retstr = tobj;
324     }
325 
326     assert(PyBytes_Check(retstr));
327     retstrsize = PyBytes_GET_SIZE(retstr);
328     if (retstrsize > 0) {
329         REQUIRE_ENCODEBUFFER(buf, retstrsize);
330         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
331         buf->outbuf += retstrsize;
332     }
333 
334     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
335     if (newpos < 0 && !PyErr_Occurred())
336         newpos += (Py_ssize_t)buf->inlen;
337     if (newpos < 0 || newpos > buf->inlen) {
338         PyErr_Clear();
339         PyErr_Format(PyExc_IndexError,
340                      "position %zd from error handler out of bounds",
341                      newpos);
342         goto errorexit;
343     }
344     buf->inpos = newpos;
345 
346     Py_DECREF(retobj);
347     Py_DECREF(retstr);
348     return 0;
349 
350 errorexit:
351     Py_XDECREF(retobj);
352     Py_XDECREF(retstr);
353     return -1;
354 }
355 
356 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)357 multibytecodec_decerror(MultibyteCodec *codec,
358                         MultibyteCodec_State *state,
359                         MultibyteDecodeBuffer *buf,
360                         PyObject *errors, Py_ssize_t e)
361 {
362     PyObject *retobj = NULL, *retuni = NULL;
363     Py_ssize_t newpos;
364     const char *reason;
365     Py_ssize_t esize, start, end;
366 
367     if (e > 0) {
368         reason = "illegal multibyte sequence";
369         esize = e;
370     }
371     else {
372         switch (e) {
373         case MBERR_TOOSMALL:
374             return 0; /* retry it */
375         case MBERR_TOOFEW:
376             reason = "incomplete multibyte sequence";
377             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
378             break;
379         case MBERR_INTERNAL:
380             PyErr_SetString(PyExc_RuntimeError,
381                             "internal codec error");
382             return -1;
383         case MBERR_EXCEPTION:
384             return -1;
385         default:
386             PyErr_SetString(PyExc_RuntimeError,
387                             "unknown runtime error");
388             return -1;
389         }
390     }
391 
392     if (errors == ERROR_REPLACE) {
393         if (_PyUnicodeWriter_WriteChar(&buf->writer,
394                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
395             goto errorexit;
396     }
397     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
398         buf->inbuf += esize;
399         return 0;
400     }
401 
402     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
403     end = start + esize;
404 
405     /* use cached exception object if available */
406     if (buf->excobj == NULL) {
407         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
408                         (const char *)buf->inbuf_top,
409                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
410                         start, end, reason);
411         if (buf->excobj == NULL)
412             goto errorexit;
413     }
414     else
415         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
416             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
417             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
418             goto errorexit;
419 
420     if (errors == ERROR_STRICT) {
421         PyCodec_StrictErrors(buf->excobj);
422         goto errorexit;
423     }
424 
425     retobj = call_error_callback(errors, buf->excobj);
426     if (retobj == NULL)
427         goto errorexit;
428 
429     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
430         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
431         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
432         PyErr_SetString(PyExc_TypeError,
433                         "decoding error handler must return "
434                         "(str, int) tuple");
435         goto errorexit;
436     }
437 
438     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
439         goto errorexit;
440 
441     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
442     if (newpos < 0 && !PyErr_Occurred())
443         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
444     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
445         PyErr_Clear();
446         PyErr_Format(PyExc_IndexError,
447                      "position %zd from error handler out of bounds",
448                      newpos);
449         goto errorexit;
450     }
451     buf->inbuf = buf->inbuf_top + newpos;
452     Py_DECREF(retobj);
453     return 0;
454 
455 errorexit:
456     Py_XDECREF(retobj);
457     return -1;
458 }
459 
460 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)461 multibytecodec_encode(MultibyteCodec *codec,
462                       MultibyteCodec_State *state,
463                       PyObject *text, Py_ssize_t *inpos_t,
464                       PyObject *errors, int flags)
465 {
466     MultibyteEncodeBuffer buf;
467     Py_ssize_t finalsize, r = 0;
468     Py_ssize_t datalen;
469     int kind;
470     void *data;
471 
472     if (PyUnicode_READY(text) < 0)
473         return NULL;
474     datalen = PyUnicode_GET_LENGTH(text);
475 
476     if (datalen == 0 && !(flags & MBENC_RESET))
477         return PyBytes_FromStringAndSize(NULL, 0);
478 
479     buf.excobj = NULL;
480     buf.outobj = NULL;
481     buf.inobj = text;   /* borrowed reference */
482     buf.inpos = 0;
483     buf.inlen = datalen;
484     kind = PyUnicode_KIND(buf.inobj);
485     data = PyUnicode_DATA(buf.inobj);
486 
487     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
488         PyErr_NoMemory();
489         goto errorexit;
490     }
491 
492     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
493     if (buf.outobj == NULL)
494         goto errorexit;
495     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
496     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
497 
498     while (buf.inpos < buf.inlen) {
499         /* we don't reuse inleft and outleft here.
500          * error callbacks can relocate the cursor anywhere on buffer*/
501         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
502 
503         r = codec->encode(state, codec->config,
504                           kind, data,
505                           &buf.inpos, buf.inlen,
506                           &buf.outbuf, outleft, flags);
507         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
508             break;
509         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
510             goto errorexit;
511         else if (r == MBERR_TOOFEW)
512             break;
513     }
514 
515     if (codec->encreset != NULL && (flags & MBENC_RESET))
516         for (;;) {
517             Py_ssize_t outleft;
518 
519             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
520             r = codec->encreset(state, codec->config, &buf.outbuf,
521                                 outleft);
522             if (r == 0)
523                 break;
524             else if (multibytecodec_encerror(codec, state,
525                                              &buf, errors, r))
526                 goto errorexit;
527         }
528 
529     finalsize = (Py_ssize_t)((char *)buf.outbuf -
530                              PyBytes_AS_STRING(buf.outobj));
531 
532     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
533         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
534             goto errorexit;
535 
536     if (inpos_t)
537         *inpos_t = buf.inpos;
538     Py_XDECREF(buf.excobj);
539     return buf.outobj;
540 
541 errorexit:
542     Py_XDECREF(buf.excobj);
543     Py_XDECREF(buf.outobj);
544     return NULL;
545 }
546 
547 /*[clinic input]
548 _multibytecodec.MultibyteCodec.encode
549 
550   input: object
551   errors: str(accept={str, NoneType}) = NULL
552 
553 Return an encoded string version of `input'.
554 
555 'errors' may be given to set a different error handling scheme. Default is
556 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
557 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
558 registered with codecs.register_error that can handle UnicodeEncodeErrors.
559 [clinic start generated code]*/
560 
561 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)562 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
563                                            PyObject *input,
564                                            const char *errors)
565 /*[clinic end generated code: output=7b26652045ba56a9 input=05f6ced3c8dd0582]*/
566 {
567     MultibyteCodec_State state;
568     PyObject *errorcb, *r, *ucvt;
569     Py_ssize_t datalen;
570 
571     if (PyUnicode_Check(input))
572         ucvt = NULL;
573     else {
574         input = ucvt = PyObject_Str(input);
575         if (input == NULL)
576             return NULL;
577         else if (!PyUnicode_Check(input)) {
578             PyErr_SetString(PyExc_TypeError,
579                 "couldn't convert the object to unicode.");
580             Py_DECREF(ucvt);
581             return NULL;
582         }
583     }
584 
585     if (PyUnicode_READY(input) < 0) {
586         Py_XDECREF(ucvt);
587         return NULL;
588     }
589     datalen = PyUnicode_GET_LENGTH(input);
590 
591     errorcb = internal_error_callback(errors);
592     if (errorcb == NULL) {
593         Py_XDECREF(ucvt);
594         return NULL;
595     }
596 
597     if (self->codec->encinit != NULL &&
598         self->codec->encinit(&state, self->codec->config) != 0)
599         goto errorexit;
600     r = multibytecodec_encode(self->codec, &state,
601                     input, NULL, errorcb,
602                     MBENC_FLUSH | MBENC_RESET);
603     if (r == NULL)
604         goto errorexit;
605 
606     ERROR_DECREF(errorcb);
607     Py_XDECREF(ucvt);
608     return make_tuple(r, datalen);
609 
610 errorexit:
611     ERROR_DECREF(errorcb);
612     Py_XDECREF(ucvt);
613     return NULL;
614 }
615 
616 /*[clinic input]
617 _multibytecodec.MultibyteCodec.decode
618 
619   input: Py_buffer
620   errors: str(accept={str, NoneType}) = NULL
621 
622 Decodes 'input'.
623 
624 'errors' may be given to set a different error handling scheme. Default is
625 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
626 values are 'ignore' and 'replace' as well as any other name registered with
627 codecs.register_error that is able to handle UnicodeDecodeErrors."
628 [clinic start generated code]*/
629 
630 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)631 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
632                                            Py_buffer *input,
633                                            const char *errors)
634 /*[clinic end generated code: output=ff419f65bad6cc77 input=a7d45f87f75e5e02]*/
635 {
636     MultibyteCodec_State state;
637     MultibyteDecodeBuffer buf;
638     PyObject *errorcb, *res;
639     const char *data;
640     Py_ssize_t datalen;
641 
642     data = input->buf;
643     datalen = input->len;
644 
645     errorcb = internal_error_callback(errors);
646     if (errorcb == NULL) {
647         return NULL;
648     }
649 
650     if (datalen == 0) {
651         ERROR_DECREF(errorcb);
652         return make_tuple(PyUnicode_New(0, 0), 0);
653     }
654 
655     _PyUnicodeWriter_Init(&buf.writer);
656     buf.writer.min_length = datalen;
657     buf.excobj = NULL;
658     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
659     buf.inbuf_end = buf.inbuf_top + datalen;
660 
661     if (self->codec->decinit != NULL &&
662         self->codec->decinit(&state, self->codec->config) != 0)
663         goto errorexit;
664 
665     while (buf.inbuf < buf.inbuf_end) {
666         Py_ssize_t inleft, r;
667 
668         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
669 
670         r = self->codec->decode(&state, self->codec->config,
671                         &buf.inbuf, inleft, &buf.writer);
672         if (r == 0)
673             break;
674         else if (multibytecodec_decerror(self->codec, &state,
675                                          &buf, errorcb, r))
676             goto errorexit;
677     }
678 
679     res = _PyUnicodeWriter_Finish(&buf.writer);
680     if (res == NULL)
681         goto errorexit;
682 
683     Py_XDECREF(buf.excobj);
684     ERROR_DECREF(errorcb);
685     return make_tuple(res, datalen);
686 
687 errorexit:
688     ERROR_DECREF(errorcb);
689     Py_XDECREF(buf.excobj);
690     _PyUnicodeWriter_Dealloc(&buf.writer);
691 
692     return NULL;
693 }
694 
695 static struct PyMethodDef multibytecodec_methods[] = {
696     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
697     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
698     {NULL, NULL},
699 };
700 
701 static void
multibytecodec_dealloc(MultibyteCodecObject * self)702 multibytecodec_dealloc(MultibyteCodecObject *self)
703 {
704     PyObject_Del(self);
705 }
706 
707 static PyTypeObject MultibyteCodec_Type = {
708     PyVarObject_HEAD_INIT(NULL, 0)
709     "MultibyteCodec",                   /* tp_name */
710     sizeof(MultibyteCodecObject),       /* tp_basicsize */
711     0,                                  /* tp_itemsize */
712     /* methods */
713     (destructor)multibytecodec_dealloc, /* tp_dealloc */
714     0,                                  /* tp_print */
715     0,                                  /* tp_getattr */
716     0,                                  /* tp_setattr */
717     0,                                  /* tp_reserved */
718     0,                                  /* tp_repr */
719     0,                                  /* tp_as_number */
720     0,                                  /* tp_as_sequence */
721     0,                                  /* tp_as_mapping */
722     0,                                  /* tp_hash */
723     0,                                  /* tp_call */
724     0,                                  /* tp_str */
725     PyObject_GenericGetAttr,            /* tp_getattro */
726     0,                                  /* tp_setattro */
727     0,                                  /* tp_as_buffer */
728     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
729     0,                                  /* tp_doc */
730     0,                                  /* tp_traverse */
731     0,                                  /* tp_clear */
732     0,                                  /* tp_richcompare */
733     0,                                  /* tp_weaklistoffset */
734     0,                                  /* tp_iter */
735     0,                                  /* tp_iterext */
736     multibytecodec_methods,             /* tp_methods */
737 };
738 
739 
740 /**
741  * Utility functions for stateful codec mechanism
742  */
743 
744 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
745 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
746 
747 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)748 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
749                         PyObject *unistr, int final)
750 {
751     PyObject *ucvt, *r = NULL;
752     PyObject *inbuf = NULL;
753     Py_ssize_t inpos, datalen;
754     PyObject *origpending = NULL;
755 
756     if (PyUnicode_Check(unistr))
757         ucvt = NULL;
758     else {
759         unistr = ucvt = PyObject_Str(unistr);
760         if (unistr == NULL)
761             return NULL;
762         else if (!PyUnicode_Check(unistr)) {
763             PyErr_SetString(PyExc_TypeError,
764                 "couldn't convert the object to str.");
765             Py_DECREF(ucvt);
766             return NULL;
767         }
768     }
769 
770     if (ctx->pending) {
771         PyObject *inbuf_tmp;
772 
773         Py_INCREF(ctx->pending);
774         origpending = ctx->pending;
775 
776         Py_INCREF(ctx->pending);
777         inbuf_tmp = ctx->pending;
778         PyUnicode_Append(&inbuf_tmp, unistr);
779         if (inbuf_tmp == NULL)
780             goto errorexit;
781         Py_CLEAR(ctx->pending);
782         inbuf = inbuf_tmp;
783     }
784     else {
785         origpending = NULL;
786 
787         Py_INCREF(unistr);
788         inbuf = unistr;
789     }
790     if (PyUnicode_READY(inbuf) < 0)
791         goto errorexit;
792     inpos = 0;
793     datalen = PyUnicode_GET_LENGTH(inbuf);
794 
795     r = multibytecodec_encode(ctx->codec, &ctx->state,
796                               inbuf, &inpos,
797                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
798     if (r == NULL) {
799         /* recover the original pending buffer */
800         Py_XSETREF(ctx->pending, origpending);
801         origpending = NULL;
802         goto errorexit;
803     }
804     Py_XDECREF(origpending);
805 
806     if (inpos < datalen) {
807         if (datalen - inpos > MAXENCPENDING) {
808             /* normal codecs can't reach here */
809             PyErr_SetString(PyExc_UnicodeError,
810                             "pending buffer overflow");
811             goto errorexit;
812         }
813         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
814         if (ctx->pending == NULL) {
815             /* normal codecs can't reach here */
816             goto errorexit;
817         }
818     }
819 
820     Py_DECREF(inbuf);
821     Py_XDECREF(ucvt);
822     return r;
823 
824 errorexit:
825     Py_XDECREF(r);
826     Py_XDECREF(ucvt);
827     Py_XDECREF(origpending);
828     Py_XDECREF(inbuf);
829     return NULL;
830 }
831 
832 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)833 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
834                        MultibyteDecodeBuffer *buf)
835 {
836     Py_ssize_t npendings;
837 
838     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
839     if (npendings + ctx->pendingsize > MAXDECPENDING ||
840         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
841             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
842             return -1;
843     }
844     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
845     ctx->pendingsize += npendings;
846     return 0;
847 }
848 
849 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)850 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
851                        Py_ssize_t size)
852 {
853     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
854     buf->inbuf_end = buf->inbuf_top + size;
855     buf->writer.min_length += size;
856     return 0;
857 }
858 
859 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)860 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
861                     MultibyteDecodeBuffer *buf)
862 {
863     while (buf->inbuf < buf->inbuf_end) {
864         Py_ssize_t inleft;
865         Py_ssize_t r;
866 
867         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
868 
869         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
870             &buf->inbuf, inleft, &buf->writer);
871         if (r == 0 || r == MBERR_TOOFEW)
872             break;
873         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
874                                          buf, ctx->errors, r))
875             return -1;
876     }
877     return 0;
878 }
879 
880 
881 /*[clinic input]
882  class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "&MultibyteIncrementalEncoder_Type"
883 [clinic start generated code]*/
884 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=3be82909cd08924d]*/
885 
886 /*[clinic input]
887 _multibytecodec.MultibyteIncrementalEncoder.encode
888 
889     input: object
890     final: bool(accept={int}) = False
891 [clinic start generated code]*/
892 
893 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)894 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
895                                                         PyObject *input,
896                                                         int final)
897 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
898 {
899     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
900 }
901 
902 /*[clinic input]
903 _multibytecodec.MultibyteIncrementalEncoder.reset
904 [clinic start generated code]*/
905 
906 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)907 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
908 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
909 {
910     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
911     unsigned char buffer[4], *outbuf;
912     Py_ssize_t r;
913     if (self->codec->encreset != NULL) {
914         outbuf = buffer;
915         r = self->codec->encreset(&self->state, self->codec->config,
916                                   &outbuf, sizeof(buffer));
917         if (r != 0)
918             return NULL;
919     }
920     Py_CLEAR(self->pending);
921     Py_RETURN_NONE;
922 }
923 
924 static struct PyMethodDef mbiencoder_methods[] = {
925     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
926     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
927     {NULL, NULL},
928 };
929 
930 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)931 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
932 {
933     MultibyteIncrementalEncoderObject *self;
934     PyObject *codec = NULL;
935     char *errors = NULL;
936 
937     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
938                                      incnewkwarglist, &errors))
939         return NULL;
940 
941     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
942     if (self == NULL)
943         return NULL;
944 
945     codec = PyObject_GetAttrString((PyObject *)type, "codec");
946     if (codec == NULL)
947         goto errorexit;
948     if (!MultibyteCodec_Check(codec)) {
949         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
950         goto errorexit;
951     }
952 
953     self->codec = ((MultibyteCodecObject *)codec)->codec;
954     self->pending = NULL;
955     self->errors = internal_error_callback(errors);
956     if (self->errors == NULL)
957         goto errorexit;
958     if (self->codec->encinit != NULL &&
959         self->codec->encinit(&self->state, self->codec->config) != 0)
960         goto errorexit;
961 
962     Py_DECREF(codec);
963     return (PyObject *)self;
964 
965 errorexit:
966     Py_XDECREF(self);
967     Py_XDECREF(codec);
968     return NULL;
969 }
970 
971 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)972 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
973 {
974     return 0;
975 }
976 
977 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)978 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
979                     visitproc visit, void *arg)
980 {
981     if (ERROR_ISCUSTOM(self->errors))
982         Py_VISIT(self->errors);
983     return 0;
984 }
985 
986 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)987 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
988 {
989     PyObject_GC_UnTrack(self);
990     ERROR_DECREF(self->errors);
991     Py_TYPE(self)->tp_free(self);
992 }
993 
994 static PyTypeObject MultibyteIncrementalEncoder_Type = {
995     PyVarObject_HEAD_INIT(NULL, 0)
996     "MultibyteIncrementalEncoder",      /* tp_name */
997     sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
998     0,                                  /* tp_itemsize */
999     /*  methods  */
1000     (destructor)mbiencoder_dealloc, /* tp_dealloc */
1001     0,                                  /* tp_print */
1002     0,                                  /* tp_getattr */
1003     0,                                  /* tp_setattr */
1004     0,                                  /* tp_reserved */
1005     0,                                  /* tp_repr */
1006     0,                                  /* tp_as_number */
1007     0,                                  /* tp_as_sequence */
1008     0,                                  /* tp_as_mapping */
1009     0,                                  /* tp_hash */
1010     0,                                  /* tp_call */
1011     0,                                  /* tp_str */
1012     PyObject_GenericGetAttr,            /* tp_getattro */
1013     0,                                  /* tp_setattro */
1014     0,                                  /* tp_as_buffer */
1015     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1016         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1017     0,                                  /* tp_doc */
1018     (traverseproc)mbiencoder_traverse,          /* tp_traverse */
1019     0,                                  /* tp_clear */
1020     0,                                  /* tp_richcompare */
1021     0,                                  /* tp_weaklistoffset */
1022     0,                                  /* tp_iter */
1023     0,                                  /* tp_iterext */
1024     mbiencoder_methods,                 /* tp_methods */
1025     0,                                  /* tp_members */
1026     codecctx_getsets,                   /* tp_getset */
1027     0,                                  /* tp_base */
1028     0,                                  /* tp_dict */
1029     0,                                  /* tp_descr_get */
1030     0,                                  /* tp_descr_set */
1031     0,                                  /* tp_dictoffset */
1032     mbiencoder_init,                    /* tp_init */
1033     0,                                  /* tp_alloc */
1034     mbiencoder_new,                     /* tp_new */
1035 };
1036 
1037 
1038 /*[clinic input]
1039  class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "&MultibyteIncrementalDecoder_Type"
1040 [clinic start generated code]*/
1041 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f6003faaf2cea692]*/
1042 
1043 /*[clinic input]
1044 _multibytecodec.MultibyteIncrementalDecoder.decode
1045 
1046     input: Py_buffer
1047     final: bool(accept={int}) = False
1048 [clinic start generated code]*/
1049 
1050 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1051 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1052                                                         Py_buffer *input,
1053                                                         int final)
1054 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1055 {
1056     MultibyteDecodeBuffer buf;
1057     char *data, *wdata = NULL;
1058     Py_ssize_t wsize, size, origpending;
1059     PyObject *res;
1060 
1061     data = input->buf;
1062     size = input->len;
1063 
1064     _PyUnicodeWriter_Init(&buf.writer);
1065     buf.excobj = NULL;
1066     origpending = self->pendingsize;
1067 
1068     if (self->pendingsize == 0) {
1069         wsize = size;
1070         wdata = data;
1071     }
1072     else {
1073         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1074             PyErr_NoMemory();
1075             goto errorexit;
1076         }
1077         wsize = size + self->pendingsize;
1078         wdata = PyMem_Malloc(wsize);
1079         if (wdata == NULL) {
1080             PyErr_NoMemory();
1081             goto errorexit;
1082         }
1083         memcpy(wdata, self->pending, self->pendingsize);
1084         memcpy(wdata + self->pendingsize, data, size);
1085         self->pendingsize = 0;
1086     }
1087 
1088     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1089         goto errorexit;
1090 
1091     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1092         goto errorexit;
1093 
1094     if (final && buf.inbuf < buf.inbuf_end) {
1095         if (multibytecodec_decerror(self->codec, &self->state,
1096                         &buf, self->errors, MBERR_TOOFEW)) {
1097             /* recover the original pending buffer */
1098             memcpy(self->pending, wdata, origpending);
1099             self->pendingsize = origpending;
1100             goto errorexit;
1101         }
1102     }
1103 
1104     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1105         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1106             goto errorexit;
1107     }
1108 
1109     res = _PyUnicodeWriter_Finish(&buf.writer);
1110     if (res == NULL)
1111         goto errorexit;
1112 
1113     if (wdata != data)
1114         PyMem_Del(wdata);
1115     Py_XDECREF(buf.excobj);
1116     return res;
1117 
1118 errorexit:
1119     if (wdata != NULL && wdata != data)
1120         PyMem_Del(wdata);
1121     Py_XDECREF(buf.excobj);
1122     _PyUnicodeWriter_Dealloc(&buf.writer);
1123     return NULL;
1124 }
1125 
1126 /*[clinic input]
1127 _multibytecodec.MultibyteIncrementalDecoder.reset
1128 [clinic start generated code]*/
1129 
1130 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1131 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1132 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1133 {
1134     if (self->codec->decreset != NULL &&
1135         self->codec->decreset(&self->state, self->codec->config) != 0)
1136         return NULL;
1137     self->pendingsize = 0;
1138 
1139     Py_RETURN_NONE;
1140 }
1141 
1142 static struct PyMethodDef mbidecoder_methods[] = {
1143     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1144     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1145     {NULL, NULL},
1146 };
1147 
1148 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1149 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1150 {
1151     MultibyteIncrementalDecoderObject *self;
1152     PyObject *codec = NULL;
1153     char *errors = NULL;
1154 
1155     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1156                                      incnewkwarglist, &errors))
1157         return NULL;
1158 
1159     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1160     if (self == NULL)
1161         return NULL;
1162 
1163     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1164     if (codec == NULL)
1165         goto errorexit;
1166     if (!MultibyteCodec_Check(codec)) {
1167         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1168         goto errorexit;
1169     }
1170 
1171     self->codec = ((MultibyteCodecObject *)codec)->codec;
1172     self->pendingsize = 0;
1173     self->errors = internal_error_callback(errors);
1174     if (self->errors == NULL)
1175         goto errorexit;
1176     if (self->codec->decinit != NULL &&
1177         self->codec->decinit(&self->state, self->codec->config) != 0)
1178         goto errorexit;
1179 
1180     Py_DECREF(codec);
1181     return (PyObject *)self;
1182 
1183 errorexit:
1184     Py_XDECREF(self);
1185     Py_XDECREF(codec);
1186     return NULL;
1187 }
1188 
1189 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1190 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1191 {
1192     return 0;
1193 }
1194 
1195 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1196 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1197                     visitproc visit, void *arg)
1198 {
1199     if (ERROR_ISCUSTOM(self->errors))
1200         Py_VISIT(self->errors);
1201     return 0;
1202 }
1203 
1204 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1205 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1206 {
1207     PyObject_GC_UnTrack(self);
1208     ERROR_DECREF(self->errors);
1209     Py_TYPE(self)->tp_free(self);
1210 }
1211 
1212 static PyTypeObject MultibyteIncrementalDecoder_Type = {
1213     PyVarObject_HEAD_INIT(NULL, 0)
1214     "MultibyteIncrementalDecoder",      /* tp_name */
1215     sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1216     0,                                  /* tp_itemsize */
1217     /*  methods  */
1218     (destructor)mbidecoder_dealloc, /* tp_dealloc */
1219     0,                                  /* tp_print */
1220     0,                                  /* tp_getattr */
1221     0,                                  /* tp_setattr */
1222     0,                                  /* tp_reserved */
1223     0,                                  /* tp_repr */
1224     0,                                  /* tp_as_number */
1225     0,                                  /* tp_as_sequence */
1226     0,                                  /* tp_as_mapping */
1227     0,                                  /* tp_hash */
1228     0,                                  /* tp_call */
1229     0,                                  /* tp_str */
1230     PyObject_GenericGetAttr,            /* tp_getattro */
1231     0,                                  /* tp_setattro */
1232     0,                                  /* tp_as_buffer */
1233     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1234         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1235     0,                                  /* tp_doc */
1236     (traverseproc)mbidecoder_traverse,          /* tp_traverse */
1237     0,                                  /* tp_clear */
1238     0,                                  /* tp_richcompare */
1239     0,                                  /* tp_weaklistoffset */
1240     0,                                  /* tp_iter */
1241     0,                                  /* tp_iterext */
1242     mbidecoder_methods,                 /* tp_methods */
1243     0,                                  /* tp_members */
1244     codecctx_getsets,                   /* tp_getset */
1245     0,                                  /* tp_base */
1246     0,                                  /* tp_dict */
1247     0,                                  /* tp_descr_get */
1248     0,                                  /* tp_descr_set */
1249     0,                                  /* tp_dictoffset */
1250     mbidecoder_init,                    /* tp_init */
1251     0,                                  /* tp_alloc */
1252     mbidecoder_new,                     /* tp_new */
1253 };
1254 
1255 
1256 /*[clinic input]
1257  class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "MultibyteStreamReader_Type"
1258 [clinic start generated code]*/
1259 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d323634b74976f09]*/
1260 
1261 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1262 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1263                      const char *method, Py_ssize_t sizehint)
1264 {
1265     MultibyteDecodeBuffer buf;
1266     PyObject *cres, *res;
1267     Py_ssize_t rsize;
1268 
1269     if (sizehint == 0)
1270         return PyUnicode_New(0, 0);
1271 
1272     _PyUnicodeWriter_Init(&buf.writer);
1273     buf.excobj = NULL;
1274     cres = NULL;
1275 
1276     for (;;) {
1277         int endoffile;
1278 
1279         if (sizehint < 0)
1280             cres = PyObject_CallMethod(self->stream,
1281                             method, NULL);
1282         else
1283             cres = PyObject_CallMethod(self->stream,
1284                             method, "i", sizehint);
1285         if (cres == NULL)
1286             goto errorexit;
1287 
1288         if (!PyBytes_Check(cres)) {
1289             PyErr_Format(PyExc_TypeError,
1290                          "stream function returned a "
1291                          "non-bytes object (%.100s)",
1292                          cres->ob_type->tp_name);
1293             goto errorexit;
1294         }
1295 
1296         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1297 
1298         if (self->pendingsize > 0) {
1299             PyObject *ctr;
1300             char *ctrdata;
1301 
1302             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1303                 PyErr_NoMemory();
1304                 goto errorexit;
1305             }
1306             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1307             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1308             if (ctr == NULL)
1309                 goto errorexit;
1310             ctrdata = PyBytes_AS_STRING(ctr);
1311             memcpy(ctrdata, self->pending, self->pendingsize);
1312             memcpy(ctrdata + self->pendingsize,
1313                     PyBytes_AS_STRING(cres),
1314                     PyBytes_GET_SIZE(cres));
1315             Py_DECREF(cres);
1316             cres = ctr;
1317             self->pendingsize = 0;
1318         }
1319 
1320         rsize = PyBytes_GET_SIZE(cres);
1321         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1322                                    rsize) != 0)
1323             goto errorexit;
1324 
1325         if (rsize > 0 && decoder_feed_buffer(
1326                         (MultibyteStatefulDecoderContext *)self, &buf))
1327             goto errorexit;
1328 
1329         if (endoffile || sizehint < 0) {
1330             if (buf.inbuf < buf.inbuf_end &&
1331                 multibytecodec_decerror(self->codec, &self->state,
1332                             &buf, self->errors, MBERR_TOOFEW))
1333                 goto errorexit;
1334         }
1335 
1336         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1337             if (decoder_append_pending(STATEFUL_DCTX(self),
1338                                        &buf) != 0)
1339                 goto errorexit;
1340         }
1341 
1342         Py_DECREF(cres);
1343         cres = NULL;
1344 
1345         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1346             break;
1347 
1348         sizehint = 1; /* read 1 more byte and retry */
1349     }
1350 
1351     res = _PyUnicodeWriter_Finish(&buf.writer);
1352     if (res == NULL)
1353         goto errorexit;
1354 
1355     Py_XDECREF(cres);
1356     Py_XDECREF(buf.excobj);
1357     return res;
1358 
1359 errorexit:
1360     Py_XDECREF(cres);
1361     Py_XDECREF(buf.excobj);
1362     _PyUnicodeWriter_Dealloc(&buf.writer);
1363     return NULL;
1364 }
1365 
1366 /*[clinic input]
1367  _multibytecodec.MultibyteStreamReader.read
1368 
1369     sizeobj: object = None
1370     /
1371 [clinic start generated code]*/
1372 
1373 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1374 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1375                                                 PyObject *sizeobj)
1376 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1377 {
1378     Py_ssize_t size;
1379 
1380     if (sizeobj == Py_None)
1381         size = -1;
1382     else if (PyLong_Check(sizeobj))
1383         size = PyLong_AsSsize_t(sizeobj);
1384     else {
1385         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1386         return NULL;
1387     }
1388 
1389     if (size == -1 && PyErr_Occurred())
1390         return NULL;
1391 
1392     return mbstreamreader_iread(self, "read", size);
1393 }
1394 
1395 /*[clinic input]
1396  _multibytecodec.MultibyteStreamReader.readline
1397 
1398     sizeobj: object = None
1399     /
1400 [clinic start generated code]*/
1401 
1402 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1403 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1404                                                     PyObject *sizeobj)
1405 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1406 {
1407     Py_ssize_t size;
1408 
1409     if (sizeobj == Py_None)
1410         size = -1;
1411     else if (PyLong_Check(sizeobj))
1412         size = PyLong_AsSsize_t(sizeobj);
1413     else {
1414         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1415         return NULL;
1416     }
1417 
1418     if (size == -1 && PyErr_Occurred())
1419         return NULL;
1420 
1421     return mbstreamreader_iread(self, "readline", size);
1422 }
1423 
1424 /*[clinic input]
1425  _multibytecodec.MultibyteStreamReader.readlines
1426 
1427     sizehintobj: object = None
1428     /
1429 [clinic start generated code]*/
1430 
1431 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1432 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1433                                                      PyObject *sizehintobj)
1434 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1435 {
1436     PyObject *r, *sr;
1437     Py_ssize_t sizehint;
1438 
1439     if (sizehintobj == Py_None)
1440         sizehint = -1;
1441     else if (PyLong_Check(sizehintobj))
1442         sizehint = PyLong_AsSsize_t(sizehintobj);
1443     else {
1444         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1445         return NULL;
1446     }
1447 
1448     if (sizehint == -1 && PyErr_Occurred())
1449         return NULL;
1450 
1451     r = mbstreamreader_iread(self, "read", sizehint);
1452     if (r == NULL)
1453         return NULL;
1454 
1455     sr = PyUnicode_Splitlines(r, 1);
1456     Py_DECREF(r);
1457     return sr;
1458 }
1459 
1460 /*[clinic input]
1461  _multibytecodec.MultibyteStreamReader.reset
1462 [clinic start generated code]*/
1463 
1464 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1465 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1466 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1467 {
1468     if (self->codec->decreset != NULL &&
1469         self->codec->decreset(&self->state, self->codec->config) != 0)
1470         return NULL;
1471     self->pendingsize = 0;
1472 
1473     Py_RETURN_NONE;
1474 }
1475 
1476 static struct PyMethodDef mbstreamreader_methods[] = {
1477     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1478     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1479     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1480     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1481     {NULL,              NULL},
1482 };
1483 
1484 static PyMemberDef mbstreamreader_members[] = {
1485     {"stream",          T_OBJECT,
1486                     offsetof(MultibyteStreamReaderObject, stream),
1487                     READONLY, NULL},
1488     {NULL,}
1489 };
1490 
1491 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1492 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1493 {
1494     MultibyteStreamReaderObject *self;
1495     PyObject *stream, *codec = NULL;
1496     char *errors = NULL;
1497 
1498     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1499                             streamkwarglist, &stream, &errors))
1500         return NULL;
1501 
1502     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1503     if (self == NULL)
1504         return NULL;
1505 
1506     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1507     if (codec == NULL)
1508         goto errorexit;
1509     if (!MultibyteCodec_Check(codec)) {
1510         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1511         goto errorexit;
1512     }
1513 
1514     self->codec = ((MultibyteCodecObject *)codec)->codec;
1515     self->stream = stream;
1516     Py_INCREF(stream);
1517     self->pendingsize = 0;
1518     self->errors = internal_error_callback(errors);
1519     if (self->errors == NULL)
1520         goto errorexit;
1521     if (self->codec->decinit != NULL &&
1522         self->codec->decinit(&self->state, self->codec->config) != 0)
1523         goto errorexit;
1524 
1525     Py_DECREF(codec);
1526     return (PyObject *)self;
1527 
1528 errorexit:
1529     Py_XDECREF(self);
1530     Py_XDECREF(codec);
1531     return NULL;
1532 }
1533 
1534 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1535 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1536 {
1537     return 0;
1538 }
1539 
1540 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1541 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1542                         visitproc visit, void *arg)
1543 {
1544     if (ERROR_ISCUSTOM(self->errors))
1545         Py_VISIT(self->errors);
1546     Py_VISIT(self->stream);
1547     return 0;
1548 }
1549 
1550 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1551 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1552 {
1553     PyObject_GC_UnTrack(self);
1554     ERROR_DECREF(self->errors);
1555     Py_XDECREF(self->stream);
1556     Py_TYPE(self)->tp_free(self);
1557 }
1558 
1559 static PyTypeObject MultibyteStreamReader_Type = {
1560     PyVarObject_HEAD_INIT(NULL, 0)
1561     "MultibyteStreamReader",            /* tp_name */
1562     sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1563     0,                                  /* tp_itemsize */
1564     /*  methods  */
1565     (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1566     0,                                  /* tp_print */
1567     0,                                  /* tp_getattr */
1568     0,                                  /* tp_setattr */
1569     0,                                  /* tp_reserved */
1570     0,                                  /* tp_repr */
1571     0,                                  /* tp_as_number */
1572     0,                                  /* tp_as_sequence */
1573     0,                                  /* tp_as_mapping */
1574     0,                                  /* tp_hash */
1575     0,                                  /* tp_call */
1576     0,                                  /* tp_str */
1577     PyObject_GenericGetAttr,            /* tp_getattro */
1578     0,                                  /* tp_setattro */
1579     0,                                  /* tp_as_buffer */
1580     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1581         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1582     0,                                  /* tp_doc */
1583     (traverseproc)mbstreamreader_traverse,      /* tp_traverse */
1584     0,                                  /* tp_clear */
1585     0,                                  /* tp_richcompare */
1586     0,                                  /* tp_weaklistoffset */
1587     0,                                  /* tp_iter */
1588     0,                                  /* tp_iterext */
1589     mbstreamreader_methods,             /* tp_methods */
1590     mbstreamreader_members,             /* tp_members */
1591     codecctx_getsets,                   /* tp_getset */
1592     0,                                  /* tp_base */
1593     0,                                  /* tp_dict */
1594     0,                                  /* tp_descr_get */
1595     0,                                  /* tp_descr_set */
1596     0,                                  /* tp_dictoffset */
1597     mbstreamreader_init,                /* tp_init */
1598     0,                                  /* tp_alloc */
1599     mbstreamreader_new,                 /* tp_new */
1600 };
1601 
1602 
1603 /*[clinic input]
1604  class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "&MultibyteStreamWriter_Type"
1605 [clinic start generated code]*/
1606 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=cde22780a215d6ac]*/
1607 
1608 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr)1609 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1610                       PyObject *unistr)
1611 {
1612     PyObject *str, *wr;
1613 
1614     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1615     if (str == NULL)
1616         return -1;
1617 
1618     wr = _PyObject_CallMethodIdObjArgs(self->stream, &PyId_write, str, NULL);
1619     Py_DECREF(str);
1620     if (wr == NULL)
1621         return -1;
1622 
1623     Py_DECREF(wr);
1624     return 0;
1625 }
1626 
1627 /*[clinic input]
1628  _multibytecodec.MultibyteStreamWriter.write
1629 
1630     strobj: object
1631     /
1632 [clinic start generated code]*/
1633 
1634 static PyObject *
_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject * self,PyObject * strobj)1635 _multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1636                                             PyObject *strobj)
1637 /*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
1638 {
1639     if (mbstreamwriter_iwrite(self, strobj))
1640         return NULL;
1641     else
1642         Py_RETURN_NONE;
1643 }
1644 
1645 /*[clinic input]
1646  _multibytecodec.MultibyteStreamWriter.writelines
1647 
1648     lines: object
1649     /
1650 [clinic start generated code]*/
1651 
1652 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject * self,PyObject * lines)1653 _multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1654                                                  PyObject *lines)
1655 /*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
1656 {
1657     PyObject *strobj;
1658     int i, r;
1659 
1660     if (!PySequence_Check(lines)) {
1661         PyErr_SetString(PyExc_TypeError,
1662                         "arg must be a sequence object");
1663         return NULL;
1664     }
1665 
1666     for (i = 0; i < PySequence_Length(lines); i++) {
1667         /* length can be changed even within this loop */
1668         strobj = PySequence_GetItem(lines, i);
1669         if (strobj == NULL)
1670             return NULL;
1671 
1672         r = mbstreamwriter_iwrite(self, strobj);
1673         Py_DECREF(strobj);
1674         if (r == -1)
1675             return NULL;
1676     }
1677     /* PySequence_Length() can fail */
1678     if (PyErr_Occurred())
1679         return NULL;
1680 
1681     Py_RETURN_NONE;
1682 }
1683 
1684 /*[clinic input]
1685  _multibytecodec.MultibyteStreamWriter.reset
1686 [clinic start generated code]*/
1687 
1688 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self)1689 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1690 /*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
1691 {
1692     PyObject *pwrt;
1693 
1694     if (!self->pending)
1695         Py_RETURN_NONE;
1696 
1697     pwrt = multibytecodec_encode(self->codec, &self->state,
1698                     self->pending, NULL, self->errors,
1699                     MBENC_FLUSH | MBENC_RESET);
1700     /* some pending buffer can be truncated when UnicodeEncodeError is
1701      * raised on 'strict' mode. but, 'reset' method is designed to
1702      * reset the pending buffer or states so failed string sequence
1703      * ought to be missed */
1704     Py_CLEAR(self->pending);
1705     if (pwrt == NULL)
1706         return NULL;
1707 
1708     assert(PyBytes_Check(pwrt));
1709     if (PyBytes_Size(pwrt) > 0) {
1710         PyObject *wr;
1711 
1712         wr = _PyObject_CallMethodIdObjArgs(self->stream, &PyId_write, pwrt);
1713         if (wr == NULL) {
1714             Py_DECREF(pwrt);
1715             return NULL;
1716         }
1717     }
1718     Py_DECREF(pwrt);
1719 
1720     Py_RETURN_NONE;
1721 }
1722 
1723 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1724 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1725 {
1726     MultibyteStreamWriterObject *self;
1727     PyObject *stream, *codec = NULL;
1728     char *errors = NULL;
1729 
1730     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1731                             streamkwarglist, &stream, &errors))
1732         return NULL;
1733 
1734     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1735     if (self == NULL)
1736         return NULL;
1737 
1738     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1739     if (codec == NULL)
1740         goto errorexit;
1741     if (!MultibyteCodec_Check(codec)) {
1742         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1743         goto errorexit;
1744     }
1745 
1746     self->codec = ((MultibyteCodecObject *)codec)->codec;
1747     self->stream = stream;
1748     Py_INCREF(stream);
1749     self->pending = NULL;
1750     self->errors = internal_error_callback(errors);
1751     if (self->errors == NULL)
1752         goto errorexit;
1753     if (self->codec->encinit != NULL &&
1754         self->codec->encinit(&self->state, self->codec->config) != 0)
1755         goto errorexit;
1756 
1757     Py_DECREF(codec);
1758     return (PyObject *)self;
1759 
1760 errorexit:
1761     Py_XDECREF(self);
1762     Py_XDECREF(codec);
1763     return NULL;
1764 }
1765 
1766 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1767 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1768 {
1769     return 0;
1770 }
1771 
1772 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1773 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1774                         visitproc visit, void *arg)
1775 {
1776     if (ERROR_ISCUSTOM(self->errors))
1777         Py_VISIT(self->errors);
1778     Py_VISIT(self->stream);
1779     return 0;
1780 }
1781 
1782 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1783 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1784 {
1785     PyObject_GC_UnTrack(self);
1786     ERROR_DECREF(self->errors);
1787     Py_XDECREF(self->stream);
1788     Py_TYPE(self)->tp_free(self);
1789 }
1790 
1791 static struct PyMethodDef mbstreamwriter_methods[] = {
1792     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1793     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1794     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1795     {NULL, NULL},
1796 };
1797 
1798 static PyMemberDef mbstreamwriter_members[] = {
1799     {"stream",          T_OBJECT,
1800                     offsetof(MultibyteStreamWriterObject, stream),
1801                     READONLY, NULL},
1802     {NULL,}
1803 };
1804 
1805 static PyTypeObject MultibyteStreamWriter_Type = {
1806     PyVarObject_HEAD_INIT(NULL, 0)
1807     "MultibyteStreamWriter",            /* tp_name */
1808     sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1809     0,                                  /* tp_itemsize */
1810     /*  methods  */
1811     (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1812     0,                                  /* tp_print */
1813     0,                                  /* tp_getattr */
1814     0,                                  /* tp_setattr */
1815     0,                                  /* tp_reserved */
1816     0,                                  /* tp_repr */
1817     0,                                  /* tp_as_number */
1818     0,                                  /* tp_as_sequence */
1819     0,                                  /* tp_as_mapping */
1820     0,                                  /* tp_hash */
1821     0,                                  /* tp_call */
1822     0,                                  /* tp_str */
1823     PyObject_GenericGetAttr,            /* tp_getattro */
1824     0,                                  /* tp_setattro */
1825     0,                                  /* tp_as_buffer */
1826     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1827         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1828     0,                                  /* tp_doc */
1829     (traverseproc)mbstreamwriter_traverse,      /* tp_traverse */
1830     0,                                  /* tp_clear */
1831     0,                                  /* tp_richcompare */
1832     0,                                  /* tp_weaklistoffset */
1833     0,                                  /* tp_iter */
1834     0,                                  /* tp_iterext */
1835     mbstreamwriter_methods,             /* tp_methods */
1836     mbstreamwriter_members,             /* tp_members */
1837     codecctx_getsets,                   /* tp_getset */
1838     0,                                  /* tp_base */
1839     0,                                  /* tp_dict */
1840     0,                                  /* tp_descr_get */
1841     0,                                  /* tp_descr_set */
1842     0,                                  /* tp_dictoffset */
1843     mbstreamwriter_init,                /* tp_init */
1844     0,                                  /* tp_alloc */
1845     mbstreamwriter_new,                 /* tp_new */
1846 };
1847 
1848 
1849 /*[clinic input]
1850 _multibytecodec.__create_codec
1851 
1852     arg: object
1853     /
1854 [clinic start generated code]*/
1855 
1856 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1857 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1858 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1859 {
1860     MultibyteCodecObject *self;
1861     MultibyteCodec *codec;
1862 
1863     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1864         PyErr_SetString(PyExc_ValueError, "argument type invalid");
1865         return NULL;
1866     }
1867 
1868     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1869     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1870         return NULL;
1871 
1872     self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1873     if (self == NULL)
1874         return NULL;
1875     self->codec = codec;
1876 
1877     return (PyObject *)self;
1878 }
1879 
1880 static struct PyMethodDef __methods[] = {
1881     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
1882     {NULL, NULL},
1883 };
1884 
1885 
1886 static struct PyModuleDef _multibytecodecmodule = {
1887     PyModuleDef_HEAD_INIT,
1888     "_multibytecodec",
1889     NULL,
1890     -1,
1891     __methods,
1892     NULL,
1893     NULL,
1894     NULL,
1895     NULL
1896 };
1897 
1898 PyMODINIT_FUNC
PyInit__multibytecodec(void)1899 PyInit__multibytecodec(void)
1900 {
1901     int i;
1902     PyObject *m;
1903     PyTypeObject *typelist[] = {
1904         &MultibyteIncrementalEncoder_Type,
1905         &MultibyteIncrementalDecoder_Type,
1906         &MultibyteStreamReader_Type,
1907         &MultibyteStreamWriter_Type,
1908         NULL
1909     };
1910 
1911     if (PyType_Ready(&MultibyteCodec_Type) < 0)
1912         return NULL;
1913 
1914     m = PyModule_Create(&_multibytecodecmodule);
1915     if (m == NULL)
1916         return NULL;
1917 
1918     for (i = 0; typelist[i] != NULL; i++) {
1919         if (PyType_Ready(typelist[i]) < 0)
1920             return NULL;
1921         Py_INCREF(typelist[i]);
1922         PyModule_AddObject(m, typelist[i]->tp_name,
1923                            (PyObject *)typelist[i]);
1924     }
1925 
1926     if (PyErr_Occurred()) {
1927         Py_FatalError("can't initialize the _multibytecodec module");
1928         Py_DECREF(m);
1929         m = NULL;
1930     }
1931     return m;
1932 }
1933