• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h"         // PyMemberDef
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12 
13 #define MODULE_NAME "_multibytecodec"
14 
15 typedef struct {
16     PyTypeObject *encoder_type;
17     PyTypeObject *decoder_type;
18     PyTypeObject *reader_type;
19     PyTypeObject *writer_type;
20     PyTypeObject *multibytecodec_type;
21 } _multibytecodec_state;
22 
23 static _multibytecodec_state *
_multibytecodec_get_state(PyObject * module)24 _multibytecodec_get_state(PyObject *module)
25 {
26     _multibytecodec_state *state = PyModule_GetState(module);
27     assert(state != NULL);
28     return state;
29 }
30 
31 static struct PyModuleDef _multibytecodecmodule;
32 static _multibytecodec_state *
_multibyte_codec_find_state_by_type(PyTypeObject * type)33 _multibyte_codec_find_state_by_type(PyTypeObject *type)
34 {
35     PyObject *module = _PyType_GetModuleByDef(type, &_multibytecodecmodule);
36     assert(module != NULL);
37     return _multibytecodec_get_state(module);
38 }
39 
40 #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
41 /*[clinic input]
42 module _multibytecodec
43 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
44 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
45 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
46 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
47 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
48 [clinic start generated code]*/
49 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
50 #undef clinic_get_state
51 
52 typedef struct {
53     PyObject            *inobj;
54     Py_ssize_t          inpos, inlen;
55     unsigned char       *outbuf, *outbuf_end;
56     PyObject            *excobj, *outobj;
57 } MultibyteEncodeBuffer;
58 
59 typedef struct {
60     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
61     PyObject            *excobj;
62     _PyUnicodeWriter    writer;
63 } MultibyteDecodeBuffer;
64 
65 static char *incnewkwarglist[] = {"errors", NULL};
66 static char *streamkwarglist[] = {"stream", "errors", NULL};
67 
68 static PyObject *multibytecodec_encode(MultibyteCodec *,
69                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
70                 PyObject *, int);
71 
72 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
73 
74 _Py_IDENTIFIER(write);
75 
76 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)77 make_tuple(PyObject *object, Py_ssize_t len)
78 {
79     PyObject *v, *w;
80 
81     if (object == NULL)
82         return NULL;
83 
84     v = PyTuple_New(2);
85     if (v == NULL) {
86         Py_DECREF(object);
87         return NULL;
88     }
89     PyTuple_SET_ITEM(v, 0, object);
90 
91     w = PyLong_FromSsize_t(len);
92     if (w == NULL) {
93         Py_DECREF(v);
94         return NULL;
95     }
96     PyTuple_SET_ITEM(v, 1, w);
97 
98     return v;
99 }
100 
101 static PyObject *
internal_error_callback(const char * errors)102 internal_error_callback(const char *errors)
103 {
104     if (errors == NULL || strcmp(errors, "strict") == 0)
105         return ERROR_STRICT;
106     else if (strcmp(errors, "ignore") == 0)
107         return ERROR_IGNORE;
108     else if (strcmp(errors, "replace") == 0)
109         return ERROR_REPLACE;
110     else
111         return PyUnicode_FromString(errors);
112 }
113 
114 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)115 call_error_callback(PyObject *errors, PyObject *exc)
116 {
117     PyObject *cb, *r;
118     const char *str;
119 
120     assert(PyUnicode_Check(errors));
121     str = PyUnicode_AsUTF8(errors);
122     if (str == NULL)
123         return NULL;
124     cb = PyCodec_LookupError(str);
125     if (cb == NULL)
126         return NULL;
127 
128     r = PyObject_CallOneArg(cb, exc);
129     Py_DECREF(cb);
130     return r;
131 }
132 
133 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))134 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
135 {
136     const char *errors;
137 
138     if (self->errors == ERROR_STRICT)
139         errors = "strict";
140     else if (self->errors == ERROR_IGNORE)
141         errors = "ignore";
142     else if (self->errors == ERROR_REPLACE)
143         errors = "replace";
144     else {
145         Py_INCREF(self->errors);
146         return self->errors;
147     }
148 
149     return PyUnicode_FromString(errors);
150 }
151 
152 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)153 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
154                     void *closure)
155 {
156     PyObject *cb;
157     const char *str;
158 
159     if (value == NULL) {
160         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
161         return -1;
162     }
163     if (!PyUnicode_Check(value)) {
164         PyErr_SetString(PyExc_TypeError, "errors must be a string");
165         return -1;
166     }
167 
168     str = PyUnicode_AsUTF8(value);
169     if (str == NULL)
170         return -1;
171 
172     cb = internal_error_callback(str);
173     if (cb == NULL)
174         return -1;
175 
176     ERROR_DECREF(self->errors);
177     self->errors = cb;
178     return 0;
179 }
180 
181 /* This getset handlers list is used by all the stateful codec objects */
182 static PyGetSetDef codecctx_getsets[] = {
183     {"errors",          (getter)codecctx_errors_get,
184                     (setter)codecctx_errors_set,
185                     PyDoc_STR("how to treat errors")},
186     {NULL,}
187 };
188 
189 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)190 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
191 {
192     Py_ssize_t orgpos, orgsize, incsize;
193 
194     orgpos = (Py_ssize_t)((char *)buf->outbuf -
195                             PyBytes_AS_STRING(buf->outobj));
196     orgsize = PyBytes_GET_SIZE(buf->outobj);
197     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
198 
199     if (orgsize > PY_SSIZE_T_MAX - incsize) {
200         PyErr_NoMemory();
201         return -1;
202     }
203 
204     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
205         return -1;
206 
207     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
208     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
209         + PyBytes_GET_SIZE(buf->outobj);
210 
211     return 0;
212 }
213 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
214     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
215         if (expand_encodebuffer(buf, s) == -1)                          \
216             goto errorexit;                                             \
217 } while(0)
218 
219 
220 /**
221  * MultibyteCodec object
222  */
223 
224 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)225 multibytecodec_encerror(MultibyteCodec *codec,
226                         MultibyteCodec_State *state,
227                         MultibyteEncodeBuffer *buf,
228                         PyObject *errors, Py_ssize_t e)
229 {
230     PyObject *retobj = NULL, *retstr = NULL, *tobj;
231     Py_ssize_t retstrsize, newpos;
232     Py_ssize_t esize, start, end;
233     const char *reason;
234 
235     if (e > 0) {
236         reason = "illegal multibyte sequence";
237         esize = e;
238     }
239     else {
240         switch (e) {
241         case MBERR_TOOSMALL:
242             REQUIRE_ENCODEBUFFER(buf, -1);
243             return 0; /* retry it */
244         case MBERR_TOOFEW:
245             reason = "incomplete multibyte sequence";
246             esize = (Py_ssize_t)buf->inpos;
247             break;
248         case MBERR_INTERNAL:
249             PyErr_SetString(PyExc_RuntimeError,
250                             "internal codec error");
251             return -1;
252         default:
253             PyErr_SetString(PyExc_RuntimeError,
254                             "unknown runtime error");
255             return -1;
256         }
257     }
258 
259     if (errors == ERROR_REPLACE) {
260         PyObject *replchar;
261         Py_ssize_t r;
262         Py_ssize_t inpos;
263         int kind;
264         const void *data;
265 
266         replchar = PyUnicode_FromOrdinal('?');
267         if (replchar == NULL)
268             goto errorexit;
269         kind = PyUnicode_KIND(replchar);
270         data = PyUnicode_DATA(replchar);
271 
272         inpos = 0;
273         for (;;) {
274             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
275 
276             r = codec->encode(state, codec->config,
277                               kind, data, &inpos, 1,
278                               &buf->outbuf, outleft, 0);
279             if (r == MBERR_TOOSMALL) {
280                 REQUIRE_ENCODEBUFFER(buf, -1);
281                 continue;
282             }
283             else
284                 break;
285         }
286 
287         Py_DECREF(replchar);
288 
289         if (r != 0) {
290             REQUIRE_ENCODEBUFFER(buf, 1);
291             *buf->outbuf++ = '?';
292         }
293     }
294     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
295         buf->inpos += esize;
296         return 0;
297     }
298 
299     start = (Py_ssize_t)buf->inpos;
300     end = start + esize;
301 
302     /* use cached exception object if available */
303     if (buf->excobj == NULL) {
304         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
305                                              "sOnns",
306                                              codec->encoding, buf->inobj,
307                                              start, end, reason);
308         if (buf->excobj == NULL)
309             goto errorexit;
310     }
311     else
312         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
313             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
314             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
315             goto errorexit;
316 
317     if (errors == ERROR_STRICT) {
318         PyCodec_StrictErrors(buf->excobj);
319         goto errorexit;
320     }
321 
322     retobj = call_error_callback(errors, buf->excobj);
323     if (retobj == NULL)
324         goto errorexit;
325 
326     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
327         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
328         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
329         PyErr_SetString(PyExc_TypeError,
330                         "encoding error handler must return "
331                         "(str, int) tuple");
332         goto errorexit;
333     }
334 
335     if (PyUnicode_Check(tobj)) {
336         Py_ssize_t inpos;
337 
338         retstr = multibytecodec_encode(codec, state, tobj,
339                         &inpos, ERROR_STRICT,
340                         MBENC_FLUSH);
341         if (retstr == NULL)
342             goto errorexit;
343     }
344     else {
345         Py_INCREF(tobj);
346         retstr = tobj;
347     }
348 
349     assert(PyBytes_Check(retstr));
350     retstrsize = PyBytes_GET_SIZE(retstr);
351     if (retstrsize > 0) {
352         REQUIRE_ENCODEBUFFER(buf, retstrsize);
353         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
354         buf->outbuf += retstrsize;
355     }
356 
357     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
358     if (newpos < 0 && !PyErr_Occurred())
359         newpos += (Py_ssize_t)buf->inlen;
360     if (newpos < 0 || newpos > buf->inlen) {
361         PyErr_Clear();
362         PyErr_Format(PyExc_IndexError,
363                      "position %zd from error handler out of bounds",
364                      newpos);
365         goto errorexit;
366     }
367     buf->inpos = newpos;
368 
369     Py_DECREF(retobj);
370     Py_DECREF(retstr);
371     return 0;
372 
373 errorexit:
374     Py_XDECREF(retobj);
375     Py_XDECREF(retstr);
376     return -1;
377 }
378 
379 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)380 multibytecodec_decerror(MultibyteCodec *codec,
381                         MultibyteCodec_State *state,
382                         MultibyteDecodeBuffer *buf,
383                         PyObject *errors, Py_ssize_t e)
384 {
385     PyObject *retobj = NULL, *retuni = NULL;
386     Py_ssize_t newpos;
387     const char *reason;
388     Py_ssize_t esize, start, end;
389 
390     if (e > 0) {
391         reason = "illegal multibyte sequence";
392         esize = e;
393     }
394     else {
395         switch (e) {
396         case MBERR_TOOSMALL:
397             return 0; /* retry it */
398         case MBERR_TOOFEW:
399             reason = "incomplete multibyte sequence";
400             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
401             break;
402         case MBERR_INTERNAL:
403             PyErr_SetString(PyExc_RuntimeError,
404                             "internal codec error");
405             return -1;
406         case MBERR_EXCEPTION:
407             return -1;
408         default:
409             PyErr_SetString(PyExc_RuntimeError,
410                             "unknown runtime error");
411             return -1;
412         }
413     }
414 
415     if (errors == ERROR_REPLACE) {
416         if (_PyUnicodeWriter_WriteChar(&buf->writer,
417                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
418             goto errorexit;
419     }
420     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
421         buf->inbuf += esize;
422         return 0;
423     }
424 
425     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
426     end = start + esize;
427 
428     /* use cached exception object if available */
429     if (buf->excobj == NULL) {
430         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
431                         (const char *)buf->inbuf_top,
432                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
433                         start, end, reason);
434         if (buf->excobj == NULL)
435             goto errorexit;
436     }
437     else
438         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
439             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
440             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
441             goto errorexit;
442 
443     if (errors == ERROR_STRICT) {
444         PyCodec_StrictErrors(buf->excobj);
445         goto errorexit;
446     }
447 
448     retobj = call_error_callback(errors, buf->excobj);
449     if (retobj == NULL)
450         goto errorexit;
451 
452     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
453         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
454         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
455         PyErr_SetString(PyExc_TypeError,
456                         "decoding error handler must return "
457                         "(str, int) tuple");
458         goto errorexit;
459     }
460 
461     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
462         goto errorexit;
463 
464     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
465     if (newpos < 0 && !PyErr_Occurred())
466         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
467     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
468         PyErr_Clear();
469         PyErr_Format(PyExc_IndexError,
470                      "position %zd from error handler out of bounds",
471                      newpos);
472         goto errorexit;
473     }
474     buf->inbuf = buf->inbuf_top + newpos;
475     Py_DECREF(retobj);
476     return 0;
477 
478 errorexit:
479     Py_XDECREF(retobj);
480     return -1;
481 }
482 
483 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)484 multibytecodec_encode(MultibyteCodec *codec,
485                       MultibyteCodec_State *state,
486                       PyObject *text, Py_ssize_t *inpos_t,
487                       PyObject *errors, int flags)
488 {
489     MultibyteEncodeBuffer buf;
490     Py_ssize_t finalsize, r = 0;
491     Py_ssize_t datalen;
492     int kind;
493     const void *data;
494 
495     if (PyUnicode_READY(text) < 0)
496         return NULL;
497     datalen = PyUnicode_GET_LENGTH(text);
498 
499     if (datalen == 0 && !(flags & MBENC_RESET))
500         return PyBytes_FromStringAndSize(NULL, 0);
501 
502     buf.excobj = NULL;
503     buf.outobj = NULL;
504     buf.inobj = text;   /* borrowed reference */
505     buf.inpos = 0;
506     buf.inlen = datalen;
507     kind = PyUnicode_KIND(buf.inobj);
508     data = PyUnicode_DATA(buf.inobj);
509 
510     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
511         PyErr_NoMemory();
512         goto errorexit;
513     }
514 
515     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
516     if (buf.outobj == NULL)
517         goto errorexit;
518     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
519     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
520 
521     while (buf.inpos < buf.inlen) {
522         /* we don't reuse inleft and outleft here.
523          * error callbacks can relocate the cursor anywhere on buffer*/
524         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
525 
526         r = codec->encode(state, codec->config,
527                           kind, data,
528                           &buf.inpos, buf.inlen,
529                           &buf.outbuf, outleft, flags);
530         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
531             break;
532         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
533             goto errorexit;
534         else if (r == MBERR_TOOFEW)
535             break;
536     }
537 
538     if (codec->encreset != NULL && (flags & MBENC_RESET))
539         for (;;) {
540             Py_ssize_t outleft;
541 
542             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
543             r = codec->encreset(state, codec->config, &buf.outbuf,
544                                 outleft);
545             if (r == 0)
546                 break;
547             else if (multibytecodec_encerror(codec, state,
548                                              &buf, errors, r))
549                 goto errorexit;
550         }
551 
552     finalsize = (Py_ssize_t)((char *)buf.outbuf -
553                              PyBytes_AS_STRING(buf.outobj));
554 
555     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
556         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
557             goto errorexit;
558 
559     if (inpos_t)
560         *inpos_t = buf.inpos;
561     Py_XDECREF(buf.excobj);
562     return buf.outobj;
563 
564 errorexit:
565     Py_XDECREF(buf.excobj);
566     Py_XDECREF(buf.outobj);
567     return NULL;
568 }
569 
570 /*[clinic input]
571 _multibytecodec.MultibyteCodec.encode
572 
573   input: object
574   errors: str(accept={str, NoneType}) = None
575 
576 Return an encoded string version of `input'.
577 
578 'errors' may be given to set a different error handling scheme. Default is
579 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
580 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
581 registered with codecs.register_error that can handle UnicodeEncodeErrors.
582 [clinic start generated code]*/
583 
584 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)585 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
586                                            PyObject *input,
587                                            const char *errors)
588 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
589 {
590     MultibyteCodec_State state;
591     PyObject *errorcb, *r, *ucvt;
592     Py_ssize_t datalen;
593 
594     if (PyUnicode_Check(input))
595         ucvt = NULL;
596     else {
597         input = ucvt = PyObject_Str(input);
598         if (input == NULL)
599             return NULL;
600         else if (!PyUnicode_Check(input)) {
601             PyErr_SetString(PyExc_TypeError,
602                 "couldn't convert the object to unicode.");
603             Py_DECREF(ucvt);
604             return NULL;
605         }
606     }
607 
608     if (PyUnicode_READY(input) < 0) {
609         Py_XDECREF(ucvt);
610         return NULL;
611     }
612     datalen = PyUnicode_GET_LENGTH(input);
613 
614     errorcb = internal_error_callback(errors);
615     if (errorcb == NULL) {
616         Py_XDECREF(ucvt);
617         return NULL;
618     }
619 
620     if (self->codec->encinit != NULL &&
621         self->codec->encinit(&state, self->codec->config) != 0)
622         goto errorexit;
623     r = multibytecodec_encode(self->codec, &state,
624                     input, NULL, errorcb,
625                     MBENC_FLUSH | MBENC_RESET);
626     if (r == NULL)
627         goto errorexit;
628 
629     ERROR_DECREF(errorcb);
630     Py_XDECREF(ucvt);
631     return make_tuple(r, datalen);
632 
633 errorexit:
634     ERROR_DECREF(errorcb);
635     Py_XDECREF(ucvt);
636     return NULL;
637 }
638 
639 /*[clinic input]
640 _multibytecodec.MultibyteCodec.decode
641 
642   input: Py_buffer
643   errors: str(accept={str, NoneType}) = None
644 
645 Decodes 'input'.
646 
647 'errors' may be given to set a different error handling scheme. Default is
648 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
649 values are 'ignore' and 'replace' as well as any other name registered with
650 codecs.register_error that is able to handle UnicodeDecodeErrors."
651 [clinic start generated code]*/
652 
653 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)654 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
655                                            Py_buffer *input,
656                                            const char *errors)
657 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
658 {
659     MultibyteCodec_State state;
660     MultibyteDecodeBuffer buf;
661     PyObject *errorcb, *res;
662     const char *data;
663     Py_ssize_t datalen;
664 
665     data = input->buf;
666     datalen = input->len;
667 
668     errorcb = internal_error_callback(errors);
669     if (errorcb == NULL) {
670         return NULL;
671     }
672 
673     if (datalen == 0) {
674         ERROR_DECREF(errorcb);
675         return make_tuple(PyUnicode_New(0, 0), 0);
676     }
677 
678     _PyUnicodeWriter_Init(&buf.writer);
679     buf.writer.min_length = datalen;
680     buf.excobj = NULL;
681     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
682     buf.inbuf_end = buf.inbuf_top + datalen;
683 
684     if (self->codec->decinit != NULL &&
685         self->codec->decinit(&state, self->codec->config) != 0)
686         goto errorexit;
687 
688     while (buf.inbuf < buf.inbuf_end) {
689         Py_ssize_t inleft, r;
690 
691         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
692 
693         r = self->codec->decode(&state, self->codec->config,
694                         &buf.inbuf, inleft, &buf.writer);
695         if (r == 0)
696             break;
697         else if (multibytecodec_decerror(self->codec, &state,
698                                          &buf, errorcb, r))
699             goto errorexit;
700     }
701 
702     res = _PyUnicodeWriter_Finish(&buf.writer);
703     if (res == NULL)
704         goto errorexit;
705 
706     Py_XDECREF(buf.excobj);
707     ERROR_DECREF(errorcb);
708     return make_tuple(res, datalen);
709 
710 errorexit:
711     ERROR_DECREF(errorcb);
712     Py_XDECREF(buf.excobj);
713     _PyUnicodeWriter_Dealloc(&buf.writer);
714 
715     return NULL;
716 }
717 
718 static struct PyMethodDef multibytecodec_methods[] = {
719     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
720     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
721     {NULL, NULL},
722 };
723 
724 static int
multibytecodec_traverse(PyObject * self,visitproc visit,void * arg)725 multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
726 {
727     Py_VISIT(Py_TYPE(self));
728     return 0;
729 }
730 
731 static void
multibytecodec_dealloc(MultibyteCodecObject * self)732 multibytecodec_dealloc(MultibyteCodecObject *self)
733 {
734     PyObject_GC_UnTrack(self);
735     PyTypeObject *tp = Py_TYPE(self);
736     tp->tp_free(self);
737     Py_DECREF(tp);
738 }
739 
740 static PyType_Slot multibytecodec_slots[] = {
741     {Py_tp_dealloc, multibytecodec_dealloc},
742     {Py_tp_getattro, PyObject_GenericGetAttr},
743     {Py_tp_methods, multibytecodec_methods},
744     {Py_tp_traverse, multibytecodec_traverse},
745     {0, NULL},
746 };
747 
748 static PyType_Spec multibytecodec_spec = {
749     .name = MODULE_NAME ".MultibyteCodec",
750     .basicsize = sizeof(MultibyteCodecObject),
751     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
752               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
753     .slots = multibytecodec_slots,
754 };
755 
756 
757 /**
758  * Utility functions for stateful codec mechanism
759  */
760 
761 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
762 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
763 
764 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)765 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
766                         PyObject *unistr, int final)
767 {
768     PyObject *ucvt, *r = NULL;
769     PyObject *inbuf = NULL;
770     Py_ssize_t inpos, datalen;
771     PyObject *origpending = NULL;
772 
773     if (PyUnicode_Check(unistr))
774         ucvt = NULL;
775     else {
776         unistr = ucvt = PyObject_Str(unistr);
777         if (unistr == NULL)
778             return NULL;
779         else if (!PyUnicode_Check(unistr)) {
780             PyErr_SetString(PyExc_TypeError,
781                 "couldn't convert the object to str.");
782             Py_DECREF(ucvt);
783             return NULL;
784         }
785     }
786 
787     if (ctx->pending) {
788         PyObject *inbuf_tmp;
789 
790         Py_INCREF(ctx->pending);
791         origpending = ctx->pending;
792 
793         Py_INCREF(ctx->pending);
794         inbuf_tmp = ctx->pending;
795         PyUnicode_Append(&inbuf_tmp, unistr);
796         if (inbuf_tmp == NULL)
797             goto errorexit;
798         Py_CLEAR(ctx->pending);
799         inbuf = inbuf_tmp;
800     }
801     else {
802         origpending = NULL;
803 
804         Py_INCREF(unistr);
805         inbuf = unistr;
806     }
807     if (PyUnicode_READY(inbuf) < 0)
808         goto errorexit;
809     inpos = 0;
810     datalen = PyUnicode_GET_LENGTH(inbuf);
811 
812     r = multibytecodec_encode(ctx->codec, &ctx->state,
813                               inbuf, &inpos,
814                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
815     if (r == NULL) {
816         /* recover the original pending buffer */
817         Py_XSETREF(ctx->pending, origpending);
818         origpending = NULL;
819         goto errorexit;
820     }
821     Py_XDECREF(origpending);
822 
823     if (inpos < datalen) {
824         if (datalen - inpos > MAXENCPENDING) {
825             /* normal codecs can't reach here */
826             PyErr_SetString(PyExc_UnicodeError,
827                             "pending buffer overflow");
828             goto errorexit;
829         }
830         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
831         if (ctx->pending == NULL) {
832             /* normal codecs can't reach here */
833             goto errorexit;
834         }
835     }
836 
837     Py_DECREF(inbuf);
838     Py_XDECREF(ucvt);
839     return r;
840 
841 errorexit:
842     Py_XDECREF(r);
843     Py_XDECREF(ucvt);
844     Py_XDECREF(origpending);
845     Py_XDECREF(inbuf);
846     return NULL;
847 }
848 
849 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)850 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
851                        MultibyteDecodeBuffer *buf)
852 {
853     Py_ssize_t npendings;
854 
855     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
856     if (npendings + ctx->pendingsize > MAXDECPENDING ||
857         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
858             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
859             return -1;
860     }
861     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
862     ctx->pendingsize += npendings;
863     return 0;
864 }
865 
866 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)867 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
868                        Py_ssize_t size)
869 {
870     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
871     buf->inbuf_end = buf->inbuf_top + size;
872     buf->writer.min_length += size;
873     return 0;
874 }
875 
876 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)877 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
878                     MultibyteDecodeBuffer *buf)
879 {
880     while (buf->inbuf < buf->inbuf_end) {
881         Py_ssize_t inleft;
882         Py_ssize_t r;
883 
884         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
885 
886         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
887             &buf->inbuf, inleft, &buf->writer);
888         if (r == 0 || r == MBERR_TOOFEW)
889             break;
890         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
891                                          buf, ctx->errors, r))
892             return -1;
893     }
894     return 0;
895 }
896 
897 
898 /*[clinic input]
899 _multibytecodec.MultibyteIncrementalEncoder.encode
900 
901     input: object
902     final: bool(accept={int}) = False
903 [clinic start generated code]*/
904 
905 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)906 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
907                                                         PyObject *input,
908                                                         int final)
909 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
910 {
911     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
912 }
913 
914 /*[clinic input]
915 _multibytecodec.MultibyteIncrementalEncoder.getstate
916 [clinic start generated code]*/
917 
918 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)919 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
920 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
921 {
922     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
923        for UTF-8 encoded buffer (each character can use up to 4
924        bytes), and required bytes for MultibyteCodec_State.c. A byte
925        array is used to avoid different compilers generating different
926        values for the same state, e.g. as a result of struct padding.
927     */
928     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
929     Py_ssize_t statesize;
930     const char *pendingbuffer = NULL;
931     Py_ssize_t pendingsize;
932 
933     if (self->pending != NULL) {
934         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
935         if (pendingbuffer == NULL) {
936             return NULL;
937         }
938         if (pendingsize > MAXENCPENDING*4) {
939             PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
940             return NULL;
941         }
942         statebytes[0] = (unsigned char)pendingsize;
943         memcpy(statebytes + 1, pendingbuffer, pendingsize);
944         statesize = 1 + pendingsize;
945     } else {
946         statebytes[0] = 0;
947         statesize = 1;
948     }
949     memcpy(statebytes+statesize, self->state.c,
950            sizeof(self->state.c));
951     statesize += sizeof(self->state.c);
952 
953     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
954                                              1 /* little-endian */ ,
955                                              0 /* unsigned */ );
956 }
957 
958 /*[clinic input]
959 _multibytecodec.MultibyteIncrementalEncoder.setstate
960     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
961     /
962 [clinic start generated code]*/
963 
964 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)965 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
966                                                           PyLongObject *statelong)
967 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
968 {
969     PyObject *pending = NULL;
970     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
971 
972     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
973                             1 /* little-endian */ ,
974                             0 /* unsigned */ ) < 0) {
975         goto errorexit;
976     }
977 
978     if (statebytes[0] > MAXENCPENDING*4) {
979         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
980         return NULL;
981     }
982 
983     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
984                                    statebytes[0], "strict");
985     if (pending == NULL) {
986         goto errorexit;
987     }
988 
989     Py_CLEAR(self->pending);
990     self->pending = pending;
991     memcpy(self->state.c, statebytes+1+statebytes[0],
992            sizeof(self->state.c));
993 
994     Py_RETURN_NONE;
995 
996 errorexit:
997     Py_XDECREF(pending);
998     return NULL;
999 }
1000 
1001 /*[clinic input]
1002 _multibytecodec.MultibyteIncrementalEncoder.reset
1003 [clinic start generated code]*/
1004 
1005 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1006 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1007 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1008 {
1009     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1010     unsigned char buffer[4], *outbuf;
1011     Py_ssize_t r;
1012     if (self->codec->encreset != NULL) {
1013         outbuf = buffer;
1014         r = self->codec->encreset(&self->state, self->codec->config,
1015                                   &outbuf, sizeof(buffer));
1016         if (r != 0)
1017             return NULL;
1018     }
1019     Py_CLEAR(self->pending);
1020     Py_RETURN_NONE;
1021 }
1022 
1023 static struct PyMethodDef mbiencoder_methods[] = {
1024     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1025     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1026     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1027     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1028     {NULL, NULL},
1029 };
1030 
1031 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1032 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1033 {
1034     MultibyteIncrementalEncoderObject *self;
1035     PyObject *codec = NULL;
1036     char *errors = NULL;
1037 
1038     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1039                                      incnewkwarglist, &errors))
1040         return NULL;
1041 
1042     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1043     if (self == NULL)
1044         return NULL;
1045 
1046     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1047     if (codec == NULL)
1048         goto errorexit;
1049 
1050     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1051     if (!MultibyteCodec_Check(state, codec)) {
1052         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1053         goto errorexit;
1054     }
1055 
1056     self->codec = ((MultibyteCodecObject *)codec)->codec;
1057     self->pending = NULL;
1058     self->errors = internal_error_callback(errors);
1059     if (self->errors == NULL)
1060         goto errorexit;
1061     if (self->codec->encinit != NULL &&
1062         self->codec->encinit(&self->state, self->codec->config) != 0)
1063         goto errorexit;
1064 
1065     Py_DECREF(codec);
1066     return (PyObject *)self;
1067 
1068 errorexit:
1069     Py_XDECREF(self);
1070     Py_XDECREF(codec);
1071     return NULL;
1072 }
1073 
1074 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1075 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1076 {
1077     return 0;
1078 }
1079 
1080 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1081 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1082                     visitproc visit, void *arg)
1083 {
1084     if (ERROR_ISCUSTOM(self->errors))
1085         Py_VISIT(self->errors);
1086     return 0;
1087 }
1088 
1089 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1090 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1091 {
1092     PyTypeObject *tp = Py_TYPE(self);
1093     PyObject_GC_UnTrack(self);
1094     ERROR_DECREF(self->errors);
1095     Py_CLEAR(self->pending);
1096     tp->tp_free(self);
1097     Py_DECREF(tp);
1098 }
1099 
1100 static PyType_Slot encoder_slots[] = {
1101     {Py_tp_dealloc, mbiencoder_dealloc},
1102     {Py_tp_getattro, PyObject_GenericGetAttr},
1103     {Py_tp_traverse, mbiencoder_traverse},
1104     {Py_tp_methods, mbiencoder_methods},
1105     {Py_tp_getset, codecctx_getsets},
1106     {Py_tp_init, mbiencoder_init},
1107     {Py_tp_new, mbiencoder_new},
1108     {0, NULL},
1109 };
1110 
1111 static PyType_Spec encoder_spec = {
1112     .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1113     .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1114     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1115               Py_TPFLAGS_IMMUTABLETYPE),
1116     .slots = encoder_slots,
1117 };
1118 
1119 
1120 /*[clinic input]
1121 _multibytecodec.MultibyteIncrementalDecoder.decode
1122 
1123     input: Py_buffer
1124     final: bool(accept={int}) = False
1125 [clinic start generated code]*/
1126 
1127 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1128 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1129                                                         Py_buffer *input,
1130                                                         int final)
1131 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1132 {
1133     MultibyteDecodeBuffer buf;
1134     char *data, *wdata = NULL;
1135     Py_ssize_t wsize, size, origpending;
1136     PyObject *res;
1137 
1138     data = input->buf;
1139     size = input->len;
1140 
1141     _PyUnicodeWriter_Init(&buf.writer);
1142     buf.excobj = NULL;
1143     origpending = self->pendingsize;
1144 
1145     if (self->pendingsize == 0) {
1146         wsize = size;
1147         wdata = data;
1148     }
1149     else {
1150         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1151             PyErr_NoMemory();
1152             goto errorexit;
1153         }
1154         wsize = size + self->pendingsize;
1155         wdata = PyMem_Malloc(wsize);
1156         if (wdata == NULL) {
1157             PyErr_NoMemory();
1158             goto errorexit;
1159         }
1160         memcpy(wdata, self->pending, self->pendingsize);
1161         memcpy(wdata + self->pendingsize, data, size);
1162         self->pendingsize = 0;
1163     }
1164 
1165     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1166         goto errorexit;
1167 
1168     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1169         goto errorexit;
1170 
1171     if (final && buf.inbuf < buf.inbuf_end) {
1172         if (multibytecodec_decerror(self->codec, &self->state,
1173                         &buf, self->errors, MBERR_TOOFEW)) {
1174             /* recover the original pending buffer */
1175             memcpy(self->pending, wdata, origpending);
1176             self->pendingsize = origpending;
1177             goto errorexit;
1178         }
1179     }
1180 
1181     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1182         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1183             goto errorexit;
1184     }
1185 
1186     res = _PyUnicodeWriter_Finish(&buf.writer);
1187     if (res == NULL)
1188         goto errorexit;
1189 
1190     if (wdata != data)
1191         PyMem_Free(wdata);
1192     Py_XDECREF(buf.excobj);
1193     return res;
1194 
1195 errorexit:
1196     if (wdata != NULL && wdata != data)
1197         PyMem_Free(wdata);
1198     Py_XDECREF(buf.excobj);
1199     _PyUnicodeWriter_Dealloc(&buf.writer);
1200     return NULL;
1201 }
1202 
1203 /*[clinic input]
1204 _multibytecodec.MultibyteIncrementalDecoder.getstate
1205 [clinic start generated code]*/
1206 
1207 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1208 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1209 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1210 {
1211     PyObject *buffer;
1212     PyObject *statelong;
1213 
1214     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1215                                        self->pendingsize);
1216     if (buffer == NULL) {
1217         return NULL;
1218     }
1219 
1220     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1221                                                   sizeof(self->state.c),
1222                                                   1 /* little-endian */ ,
1223                                                   0 /* unsigned */ );
1224     if (statelong == NULL) {
1225         Py_DECREF(buffer);
1226         return NULL;
1227     }
1228 
1229     return Py_BuildValue("NN", buffer, statelong);
1230 }
1231 
1232 /*[clinic input]
1233 _multibytecodec.MultibyteIncrementalDecoder.setstate
1234     state: object(subclass_of='&PyTuple_Type')
1235     /
1236 [clinic start generated code]*/
1237 
1238 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1239 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1240                                                           PyObject *state)
1241 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1242 {
1243     PyObject *buffer;
1244     PyLongObject *statelong;
1245     Py_ssize_t buffersize;
1246     const char *bufferstr;
1247     unsigned char statebytes[8];
1248 
1249     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1250                           &buffer, &PyLong_Type, &statelong))
1251     {
1252         return NULL;
1253     }
1254 
1255     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1256                             1 /* little-endian */ ,
1257                             0 /* unsigned */ ) < 0) {
1258         return NULL;
1259     }
1260 
1261     buffersize = PyBytes_Size(buffer);
1262     if (buffersize == -1) {
1263         return NULL;
1264     }
1265 
1266     if (buffersize > MAXDECPENDING) {
1267         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1268         return NULL;
1269     }
1270 
1271     bufferstr = PyBytes_AsString(buffer);
1272     if (bufferstr == NULL) {
1273         return NULL;
1274     }
1275     self->pendingsize = buffersize;
1276     memcpy(self->pending, bufferstr, self->pendingsize);
1277     memcpy(self->state.c, statebytes, sizeof(statebytes));
1278 
1279     Py_RETURN_NONE;
1280 }
1281 
1282 /*[clinic input]
1283 _multibytecodec.MultibyteIncrementalDecoder.reset
1284 [clinic start generated code]*/
1285 
1286 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1287 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1288 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1289 {
1290     if (self->codec->decreset != NULL &&
1291         self->codec->decreset(&self->state, self->codec->config) != 0)
1292         return NULL;
1293     self->pendingsize = 0;
1294 
1295     Py_RETURN_NONE;
1296 }
1297 
1298 static struct PyMethodDef mbidecoder_methods[] = {
1299     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1300     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1301     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1302     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1303     {NULL, NULL},
1304 };
1305 
1306 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1307 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1308 {
1309     MultibyteIncrementalDecoderObject *self;
1310     PyObject *codec = NULL;
1311     char *errors = NULL;
1312 
1313     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1314                                      incnewkwarglist, &errors))
1315         return NULL;
1316 
1317     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1318     if (self == NULL)
1319         return NULL;
1320 
1321     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1322     if (codec == NULL)
1323         goto errorexit;
1324 
1325     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1326     if (!MultibyteCodec_Check(state, codec)) {
1327         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1328         goto errorexit;
1329     }
1330 
1331     self->codec = ((MultibyteCodecObject *)codec)->codec;
1332     self->pendingsize = 0;
1333     self->errors = internal_error_callback(errors);
1334     if (self->errors == NULL)
1335         goto errorexit;
1336     if (self->codec->decinit != NULL &&
1337         self->codec->decinit(&self->state, self->codec->config) != 0)
1338         goto errorexit;
1339 
1340     Py_DECREF(codec);
1341     return (PyObject *)self;
1342 
1343 errorexit:
1344     Py_XDECREF(self);
1345     Py_XDECREF(codec);
1346     return NULL;
1347 }
1348 
1349 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1350 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1351 {
1352     return 0;
1353 }
1354 
1355 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1356 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1357                     visitproc visit, void *arg)
1358 {
1359     if (ERROR_ISCUSTOM(self->errors))
1360         Py_VISIT(self->errors);
1361     return 0;
1362 }
1363 
1364 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1365 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1366 {
1367     PyTypeObject *tp = Py_TYPE(self);
1368     PyObject_GC_UnTrack(self);
1369     ERROR_DECREF(self->errors);
1370     tp->tp_free(self);
1371     Py_DECREF(tp);
1372 }
1373 
1374 static PyType_Slot decoder_slots[] = {
1375     {Py_tp_dealloc, mbidecoder_dealloc},
1376     {Py_tp_getattro, PyObject_GenericGetAttr},
1377     {Py_tp_traverse, mbidecoder_traverse},
1378     {Py_tp_methods, mbidecoder_methods},
1379     {Py_tp_getset, codecctx_getsets},
1380     {Py_tp_init, mbidecoder_init},
1381     {Py_tp_new, mbidecoder_new},
1382     {0, NULL},
1383 };
1384 
1385 static PyType_Spec decoder_spec = {
1386     .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1387     .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1388     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1389               Py_TPFLAGS_IMMUTABLETYPE),
1390     .slots = decoder_slots,
1391 };
1392 
1393 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1394 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1395                      const char *method, Py_ssize_t sizehint)
1396 {
1397     MultibyteDecodeBuffer buf;
1398     PyObject *cres, *res;
1399     Py_ssize_t rsize;
1400 
1401     if (sizehint == 0)
1402         return PyUnicode_New(0, 0);
1403 
1404     _PyUnicodeWriter_Init(&buf.writer);
1405     buf.excobj = NULL;
1406     cres = NULL;
1407 
1408     for (;;) {
1409         int endoffile;
1410 
1411         if (sizehint < 0)
1412             cres = PyObject_CallMethod(self->stream,
1413                             method, NULL);
1414         else
1415             cres = PyObject_CallMethod(self->stream,
1416                             method, "i", sizehint);
1417         if (cres == NULL)
1418             goto errorexit;
1419 
1420         if (!PyBytes_Check(cres)) {
1421             PyErr_Format(PyExc_TypeError,
1422                          "stream function returned a "
1423                          "non-bytes object (%.100s)",
1424                          Py_TYPE(cres)->tp_name);
1425             goto errorexit;
1426         }
1427 
1428         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1429 
1430         if (self->pendingsize > 0) {
1431             PyObject *ctr;
1432             char *ctrdata;
1433 
1434             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1435                 PyErr_NoMemory();
1436                 goto errorexit;
1437             }
1438             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1439             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1440             if (ctr == NULL)
1441                 goto errorexit;
1442             ctrdata = PyBytes_AS_STRING(ctr);
1443             memcpy(ctrdata, self->pending, self->pendingsize);
1444             memcpy(ctrdata + self->pendingsize,
1445                     PyBytes_AS_STRING(cres),
1446                     PyBytes_GET_SIZE(cres));
1447             Py_DECREF(cres);
1448             cres = ctr;
1449             self->pendingsize = 0;
1450         }
1451 
1452         rsize = PyBytes_GET_SIZE(cres);
1453         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1454                                    rsize) != 0)
1455             goto errorexit;
1456 
1457         if (rsize > 0 && decoder_feed_buffer(
1458                         (MultibyteStatefulDecoderContext *)self, &buf))
1459             goto errorexit;
1460 
1461         if (endoffile || sizehint < 0) {
1462             if (buf.inbuf < buf.inbuf_end &&
1463                 multibytecodec_decerror(self->codec, &self->state,
1464                             &buf, self->errors, MBERR_TOOFEW))
1465                 goto errorexit;
1466         }
1467 
1468         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1469             if (decoder_append_pending(STATEFUL_DCTX(self),
1470                                        &buf) != 0)
1471                 goto errorexit;
1472         }
1473 
1474         Py_DECREF(cres);
1475         cres = NULL;
1476 
1477         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1478             break;
1479 
1480         sizehint = 1; /* read 1 more byte and retry */
1481     }
1482 
1483     res = _PyUnicodeWriter_Finish(&buf.writer);
1484     if (res == NULL)
1485         goto errorexit;
1486 
1487     Py_XDECREF(cres);
1488     Py_XDECREF(buf.excobj);
1489     return res;
1490 
1491 errorexit:
1492     Py_XDECREF(cres);
1493     Py_XDECREF(buf.excobj);
1494     _PyUnicodeWriter_Dealloc(&buf.writer);
1495     return NULL;
1496 }
1497 
1498 /*[clinic input]
1499  _multibytecodec.MultibyteStreamReader.read
1500 
1501     sizeobj: object = None
1502     /
1503 [clinic start generated code]*/
1504 
1505 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1506 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1507                                                 PyObject *sizeobj)
1508 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1509 {
1510     Py_ssize_t size;
1511 
1512     if (sizeobj == Py_None)
1513         size = -1;
1514     else if (PyLong_Check(sizeobj))
1515         size = PyLong_AsSsize_t(sizeobj);
1516     else {
1517         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1518         return NULL;
1519     }
1520 
1521     if (size == -1 && PyErr_Occurred())
1522         return NULL;
1523 
1524     return mbstreamreader_iread(self, "read", size);
1525 }
1526 
1527 /*[clinic input]
1528  _multibytecodec.MultibyteStreamReader.readline
1529 
1530     sizeobj: object = None
1531     /
1532 [clinic start generated code]*/
1533 
1534 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1535 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1536                                                     PyObject *sizeobj)
1537 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1538 {
1539     Py_ssize_t size;
1540 
1541     if (sizeobj == Py_None)
1542         size = -1;
1543     else if (PyLong_Check(sizeobj))
1544         size = PyLong_AsSsize_t(sizeobj);
1545     else {
1546         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1547         return NULL;
1548     }
1549 
1550     if (size == -1 && PyErr_Occurred())
1551         return NULL;
1552 
1553     return mbstreamreader_iread(self, "readline", size);
1554 }
1555 
1556 /*[clinic input]
1557  _multibytecodec.MultibyteStreamReader.readlines
1558 
1559     sizehintobj: object = None
1560     /
1561 [clinic start generated code]*/
1562 
1563 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1564 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1565                                                      PyObject *sizehintobj)
1566 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1567 {
1568     PyObject *r, *sr;
1569     Py_ssize_t sizehint;
1570 
1571     if (sizehintobj == Py_None)
1572         sizehint = -1;
1573     else if (PyLong_Check(sizehintobj))
1574         sizehint = PyLong_AsSsize_t(sizehintobj);
1575     else {
1576         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1577         return NULL;
1578     }
1579 
1580     if (sizehint == -1 && PyErr_Occurred())
1581         return NULL;
1582 
1583     r = mbstreamreader_iread(self, "read", sizehint);
1584     if (r == NULL)
1585         return NULL;
1586 
1587     sr = PyUnicode_Splitlines(r, 1);
1588     Py_DECREF(r);
1589     return sr;
1590 }
1591 
1592 /*[clinic input]
1593  _multibytecodec.MultibyteStreamReader.reset
1594 [clinic start generated code]*/
1595 
1596 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1597 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1598 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1599 {
1600     if (self->codec->decreset != NULL &&
1601         self->codec->decreset(&self->state, self->codec->config) != 0)
1602         return NULL;
1603     self->pendingsize = 0;
1604 
1605     Py_RETURN_NONE;
1606 }
1607 
1608 static struct PyMethodDef mbstreamreader_methods[] = {
1609     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1610     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1611     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1612     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1613     {NULL,              NULL},
1614 };
1615 
1616 static PyMemberDef mbstreamreader_members[] = {
1617     {"stream",          T_OBJECT,
1618                     offsetof(MultibyteStreamReaderObject, stream),
1619                     READONLY, NULL},
1620     {NULL,}
1621 };
1622 
1623 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1624 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1625 {
1626     MultibyteStreamReaderObject *self;
1627     PyObject *stream, *codec = NULL;
1628     char *errors = NULL;
1629 
1630     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1631                             streamkwarglist, &stream, &errors))
1632         return NULL;
1633 
1634     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1635     if (self == NULL)
1636         return NULL;
1637 
1638     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1639     if (codec == NULL)
1640         goto errorexit;
1641 
1642     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1643     if (!MultibyteCodec_Check(state, codec)) {
1644         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1645         goto errorexit;
1646     }
1647 
1648     self->codec = ((MultibyteCodecObject *)codec)->codec;
1649     self->stream = stream;
1650     Py_INCREF(stream);
1651     self->pendingsize = 0;
1652     self->errors = internal_error_callback(errors);
1653     if (self->errors == NULL)
1654         goto errorexit;
1655     if (self->codec->decinit != NULL &&
1656         self->codec->decinit(&self->state, self->codec->config) != 0)
1657         goto errorexit;
1658 
1659     Py_DECREF(codec);
1660     return (PyObject *)self;
1661 
1662 errorexit:
1663     Py_XDECREF(self);
1664     Py_XDECREF(codec);
1665     return NULL;
1666 }
1667 
1668 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1669 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1670 {
1671     return 0;
1672 }
1673 
1674 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1675 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1676                         visitproc visit, void *arg)
1677 {
1678     if (ERROR_ISCUSTOM(self->errors))
1679         Py_VISIT(self->errors);
1680     Py_VISIT(self->stream);
1681     return 0;
1682 }
1683 
1684 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1685 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1686 {
1687     PyTypeObject *tp = Py_TYPE(self);
1688     PyObject_GC_UnTrack(self);
1689     ERROR_DECREF(self->errors);
1690     Py_XDECREF(self->stream);
1691     tp->tp_free(self);
1692     Py_DECREF(tp);
1693 }
1694 
1695 static PyType_Slot reader_slots[] = {
1696     {Py_tp_dealloc, mbstreamreader_dealloc},
1697     {Py_tp_getattro, PyObject_GenericGetAttr},
1698     {Py_tp_traverse, mbstreamreader_traverse},
1699     {Py_tp_methods, mbstreamreader_methods},
1700     {Py_tp_members, mbstreamreader_members},
1701     {Py_tp_getset, codecctx_getsets},
1702     {Py_tp_init, mbstreamreader_init},
1703     {Py_tp_new, mbstreamreader_new},
1704     {0, NULL},
1705 };
1706 
1707 static PyType_Spec reader_spec = {
1708     .name = MODULE_NAME ".MultibyteStreamReader",
1709     .basicsize = sizeof(MultibyteStreamReaderObject),
1710     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1711               Py_TPFLAGS_IMMUTABLETYPE),
1712     .slots = reader_slots,
1713 };
1714 
1715 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr)1716 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1717                       PyObject *unistr)
1718 {
1719     PyObject *str, *wr;
1720 
1721     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1722     if (str == NULL)
1723         return -1;
1724 
1725     wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
1726     Py_DECREF(str);
1727     if (wr == NULL)
1728         return -1;
1729 
1730     Py_DECREF(wr);
1731     return 0;
1732 }
1733 
1734 /*[clinic input]
1735  _multibytecodec.MultibyteStreamWriter.write
1736 
1737     strobj: object
1738     /
1739 [clinic start generated code]*/
1740 
1741 static PyObject *
_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject * self,PyObject * strobj)1742 _multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1743                                             PyObject *strobj)
1744 /*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
1745 {
1746     if (mbstreamwriter_iwrite(self, strobj))
1747         return NULL;
1748     else
1749         Py_RETURN_NONE;
1750 }
1751 
1752 /*[clinic input]
1753  _multibytecodec.MultibyteStreamWriter.writelines
1754 
1755     lines: object
1756     /
1757 [clinic start generated code]*/
1758 
1759 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject * self,PyObject * lines)1760 _multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1761                                                  PyObject *lines)
1762 /*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
1763 {
1764     PyObject *strobj;
1765     int i, r;
1766 
1767     if (!PySequence_Check(lines)) {
1768         PyErr_SetString(PyExc_TypeError,
1769                         "arg must be a sequence object");
1770         return NULL;
1771     }
1772 
1773     for (i = 0; i < PySequence_Length(lines); i++) {
1774         /* length can be changed even within this loop */
1775         strobj = PySequence_GetItem(lines, i);
1776         if (strobj == NULL)
1777             return NULL;
1778 
1779         r = mbstreamwriter_iwrite(self, strobj);
1780         Py_DECREF(strobj);
1781         if (r == -1)
1782             return NULL;
1783     }
1784     /* PySequence_Length() can fail */
1785     if (PyErr_Occurred())
1786         return NULL;
1787 
1788     Py_RETURN_NONE;
1789 }
1790 
1791 /*[clinic input]
1792  _multibytecodec.MultibyteStreamWriter.reset
1793 [clinic start generated code]*/
1794 
1795 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self)1796 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1797 /*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
1798 {
1799     PyObject *pwrt;
1800 
1801     if (!self->pending)
1802         Py_RETURN_NONE;
1803 
1804     pwrt = multibytecodec_encode(self->codec, &self->state,
1805                     self->pending, NULL, self->errors,
1806                     MBENC_FLUSH | MBENC_RESET);
1807     /* some pending buffer can be truncated when UnicodeEncodeError is
1808      * raised on 'strict' mode. but, 'reset' method is designed to
1809      * reset the pending buffer or states so failed string sequence
1810      * ought to be missed */
1811     Py_CLEAR(self->pending);
1812     if (pwrt == NULL)
1813         return NULL;
1814 
1815     assert(PyBytes_Check(pwrt));
1816     if (PyBytes_Size(pwrt) > 0) {
1817         PyObject *wr;
1818 
1819         wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
1820         if (wr == NULL) {
1821             Py_DECREF(pwrt);
1822             return NULL;
1823         }
1824     }
1825     Py_DECREF(pwrt);
1826 
1827     Py_RETURN_NONE;
1828 }
1829 
1830 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1831 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1832 {
1833     MultibyteStreamWriterObject *self;
1834     PyObject *stream, *codec = NULL;
1835     char *errors = NULL;
1836 
1837     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1838                             streamkwarglist, &stream, &errors))
1839         return NULL;
1840 
1841     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1842     if (self == NULL)
1843         return NULL;
1844 
1845     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1846     if (codec == NULL)
1847         goto errorexit;
1848 
1849     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1850     if (!MultibyteCodec_Check(state, codec)) {
1851         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1852         goto errorexit;
1853     }
1854 
1855     self->codec = ((MultibyteCodecObject *)codec)->codec;
1856     self->stream = stream;
1857     Py_INCREF(stream);
1858     self->pending = NULL;
1859     self->errors = internal_error_callback(errors);
1860     if (self->errors == NULL)
1861         goto errorexit;
1862     if (self->codec->encinit != NULL &&
1863         self->codec->encinit(&self->state, self->codec->config) != 0)
1864         goto errorexit;
1865 
1866     Py_DECREF(codec);
1867     return (PyObject *)self;
1868 
1869 errorexit:
1870     Py_XDECREF(self);
1871     Py_XDECREF(codec);
1872     return NULL;
1873 }
1874 
1875 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1876 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1877 {
1878     return 0;
1879 }
1880 
1881 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1882 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1883                         visitproc visit, void *arg)
1884 {
1885     if (ERROR_ISCUSTOM(self->errors))
1886         Py_VISIT(self->errors);
1887     Py_VISIT(self->stream);
1888     return 0;
1889 }
1890 
1891 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1892 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1893 {
1894     PyTypeObject *tp = Py_TYPE(self);
1895     PyObject_GC_UnTrack(self);
1896     ERROR_DECREF(self->errors);
1897     Py_XDECREF(self->stream);
1898     tp->tp_free(self);
1899     Py_DECREF(tp);
1900 }
1901 
1902 static struct PyMethodDef mbstreamwriter_methods[] = {
1903     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1904     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1905     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1906     {NULL, NULL},
1907 };
1908 
1909 static PyMemberDef mbstreamwriter_members[] = {
1910     {"stream",          T_OBJECT,
1911                     offsetof(MultibyteStreamWriterObject, stream),
1912                     READONLY, NULL},
1913     {NULL,}
1914 };
1915 
1916 static PyType_Slot writer_slots[] = {
1917     {Py_tp_dealloc, mbstreamwriter_dealloc},
1918     {Py_tp_getattro, PyObject_GenericGetAttr},
1919     {Py_tp_traverse, mbstreamwriter_traverse},
1920     {Py_tp_methods, mbstreamwriter_methods},
1921     {Py_tp_members, mbstreamwriter_members},
1922     {Py_tp_getset, codecctx_getsets},
1923     {Py_tp_init, mbstreamwriter_init},
1924     {Py_tp_new, mbstreamwriter_new},
1925     {0, NULL},
1926 };
1927 
1928 static PyType_Spec writer_spec = {
1929     .name = MODULE_NAME ".MultibyteStreamWriter",
1930     .basicsize = sizeof(MultibyteStreamWriterObject),
1931     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1932               Py_TPFLAGS_IMMUTABLETYPE),
1933     .slots = writer_slots,
1934 };
1935 
1936 
1937 /*[clinic input]
1938 _multibytecodec.__create_codec
1939 
1940     arg: object
1941     /
1942 [clinic start generated code]*/
1943 
1944 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1945 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1946 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1947 {
1948     MultibyteCodecObject *self;
1949     MultibyteCodec *codec;
1950 
1951     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1952         PyErr_SetString(PyExc_ValueError, "argument type invalid");
1953         return NULL;
1954     }
1955 
1956     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1957     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1958         return NULL;
1959 
1960     _multibytecodec_state *state = _multibytecodec_get_state(module);
1961     self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
1962     if (self == NULL)
1963         return NULL;
1964     self->codec = codec;
1965 
1966     PyObject_GC_Track(self);
1967     return (PyObject *)self;
1968 }
1969 
1970 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)1971 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1972 {
1973     _multibytecodec_state *state = _multibytecodec_get_state(mod);
1974     Py_VISIT(state->multibytecodec_type);
1975     Py_VISIT(state->encoder_type);
1976     Py_VISIT(state->decoder_type);
1977     Py_VISIT(state->reader_type);
1978     Py_VISIT(state->writer_type);
1979     return 0;
1980 }
1981 
1982 static int
_multibytecodec_clear(PyObject * mod)1983 _multibytecodec_clear(PyObject *mod)
1984 {
1985     _multibytecodec_state *state = _multibytecodec_get_state(mod);
1986     Py_CLEAR(state->multibytecodec_type);
1987     Py_CLEAR(state->encoder_type);
1988     Py_CLEAR(state->decoder_type);
1989     Py_CLEAR(state->reader_type);
1990     Py_CLEAR(state->writer_type);
1991     return 0;
1992 }
1993 
1994 static void
_multibytecodec_free(void * mod)1995 _multibytecodec_free(void *mod)
1996 {
1997     _multibytecodec_clear((PyObject *)mod);
1998 }
1999 
2000 #define CREATE_TYPE(module, type, spec)                                      \
2001     do {                                                                     \
2002         type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2003         if (!type) {                                                         \
2004             return -1;                                                       \
2005         }                                                                    \
2006     } while (0)
2007 
2008 #define ADD_TYPE(module, type)                    \
2009     do {                                          \
2010         if (PyModule_AddType(module, type) < 0) { \
2011             return -1;                            \
2012         }                                         \
2013     } while (0)
2014 
2015 static int
_multibytecodec_exec(PyObject * mod)2016 _multibytecodec_exec(PyObject *mod)
2017 {
2018     _multibytecodec_state *state = _multibytecodec_get_state(mod);
2019     CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2020     CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2021     CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2022     CREATE_TYPE(mod, state->reader_type, &reader_spec);
2023     CREATE_TYPE(mod, state->writer_type, &writer_spec);
2024 
2025     ADD_TYPE(mod, state->encoder_type);
2026     ADD_TYPE(mod, state->decoder_type);
2027     ADD_TYPE(mod, state->reader_type);
2028     ADD_TYPE(mod, state->writer_type);
2029     return 0;
2030 }
2031 
2032 #undef CREATE_TYPE
2033 #undef ADD_TYPE
2034 
2035 static struct PyMethodDef _multibytecodec_methods[] = {
2036     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2037     {NULL, NULL},
2038 };
2039 
2040 static PyModuleDef_Slot _multibytecodec_slots[] = {
2041     {Py_mod_exec, _multibytecodec_exec},
2042     {0, NULL}
2043 };
2044 
2045 static struct PyModuleDef _multibytecodecmodule = {
2046     .m_base = PyModuleDef_HEAD_INIT,
2047     .m_name = "_multibytecodec",
2048     .m_size = sizeof(_multibytecodec_state),
2049     .m_methods = _multibytecodec_methods,
2050     .m_slots = _multibytecodec_slots,
2051     .m_traverse = _multibytecodec_traverse,
2052     .m_clear = _multibytecodec_clear,
2053     .m_free = _multibytecodec_free,
2054 };
2055 
2056 PyMODINIT_FUNC
PyInit__multibytecodec(void)2057 PyInit__multibytecodec(void)
2058 {
2059     return PyModuleDef_Init(&_multibytecodecmodule);
2060 }
2061