• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #ifndef Py_BUILD_CORE_BUILTIN
8 #  define Py_BUILD_CORE_MODULE 1
9 #endif
10 
11 #include "Python.h"
12 
13 #include "multibytecodec.h"
14 #include "clinic/multibytecodec.c.h"
15 
16 #include <stddef.h>               // offsetof()
17 
18 #define MODULE_NAME "_multibytecodec"
19 
20 typedef struct {
21     PyTypeObject *encoder_type;
22     PyTypeObject *decoder_type;
23     PyTypeObject *reader_type;
24     PyTypeObject *writer_type;
25     PyTypeObject *multibytecodec_type;
26     PyObject *str_write;
27 } module_state;
28 
29 static module_state *
get_module_state(PyObject * module)30 get_module_state(PyObject *module)
31 {
32     module_state *state = PyModule_GetState(module);
33     assert(state != NULL);
34     return state;
35 }
36 
37 static struct PyModuleDef _multibytecodecmodule;
38 
39 static module_state *
find_state_by_def(PyTypeObject * type)40 find_state_by_def(PyTypeObject *type)
41 {
42     PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
43     assert(module != NULL);
44     return get_module_state(module);
45 }
46 
47 #define clinic_get_state() find_state_by_def(type)
48 /*[clinic input]
49 module _multibytecodec
50 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
51 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
52 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
53 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
54 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
55 [clinic start generated code]*/
56 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
57 #undef clinic_get_state
58 
59 typedef struct {
60     PyObject            *inobj;
61     Py_ssize_t          inpos, inlen;
62     unsigned char       *outbuf, *outbuf_end;
63     PyObject            *excobj, *outobj;
64 } MultibyteEncodeBuffer;
65 
66 typedef struct {
67     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
68     PyObject            *excobj;
69     _PyUnicodeWriter    writer;
70 } MultibyteDecodeBuffer;
71 
72 static char *incnewkwarglist[] = {"errors", NULL};
73 static char *streamkwarglist[] = {"stream", "errors", NULL};
74 
75 static PyObject *multibytecodec_encode(const MultibyteCodec *,
76                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
77                 PyObject *, int);
78 
79 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
80 
81 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)82 make_tuple(PyObject *object, Py_ssize_t len)
83 {
84     PyObject *v, *w;
85 
86     if (object == NULL)
87         return NULL;
88 
89     v = PyTuple_New(2);
90     if (v == NULL) {
91         Py_DECREF(object);
92         return NULL;
93     }
94     PyTuple_SET_ITEM(v, 0, object);
95 
96     w = PyLong_FromSsize_t(len);
97     if (w == NULL) {
98         Py_DECREF(v);
99         return NULL;
100     }
101     PyTuple_SET_ITEM(v, 1, w);
102 
103     return v;
104 }
105 
106 static PyObject *
internal_error_callback(const char * errors)107 internal_error_callback(const char *errors)
108 {
109     if (errors == NULL || strcmp(errors, "strict") == 0)
110         return ERROR_STRICT;
111     else if (strcmp(errors, "ignore") == 0)
112         return ERROR_IGNORE;
113     else if (strcmp(errors, "replace") == 0)
114         return ERROR_REPLACE;
115     else
116         return PyUnicode_FromString(errors);
117 }
118 
119 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)120 call_error_callback(PyObject *errors, PyObject *exc)
121 {
122     PyObject *cb, *r;
123     const char *str;
124 
125     assert(PyUnicode_Check(errors));
126     str = PyUnicode_AsUTF8(errors);
127     if (str == NULL)
128         return NULL;
129     cb = PyCodec_LookupError(str);
130     if (cb == NULL)
131         return NULL;
132 
133     r = PyObject_CallOneArg(cb, exc);
134     Py_DECREF(cb);
135     return r;
136 }
137 
138 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))139 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
140 {
141     const char *errors;
142 
143     if (self->errors == ERROR_STRICT)
144         errors = "strict";
145     else if (self->errors == ERROR_IGNORE)
146         errors = "ignore";
147     else if (self->errors == ERROR_REPLACE)
148         errors = "replace";
149     else {
150         return Py_NewRef(self->errors);
151     }
152 
153     return PyUnicode_FromString(errors);
154 }
155 
156 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)157 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
158                     void *closure)
159 {
160     PyObject *cb;
161     const char *str;
162 
163     if (value == NULL) {
164         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
165         return -1;
166     }
167     if (!PyUnicode_Check(value)) {
168         PyErr_SetString(PyExc_TypeError, "errors must be a string");
169         return -1;
170     }
171 
172     str = PyUnicode_AsUTF8(value);
173     if (str == NULL)
174         return -1;
175 
176     cb = internal_error_callback(str);
177     if (cb == NULL)
178         return -1;
179 
180     ERROR_DECREF(self->errors);
181     self->errors = cb;
182     return 0;
183 }
184 
185 /* This getset handlers list is used by all the stateful codec objects */
186 static PyGetSetDef codecctx_getsets[] = {
187     {"errors",          (getter)codecctx_errors_get,
188                     (setter)codecctx_errors_set,
189                     PyDoc_STR("how to treat errors")},
190     {NULL,}
191 };
192 
193 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)194 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
195 {
196     Py_ssize_t orgpos, orgsize, incsize;
197 
198     orgpos = (Py_ssize_t)((char *)buf->outbuf -
199                             PyBytes_AS_STRING(buf->outobj));
200     orgsize = PyBytes_GET_SIZE(buf->outobj);
201     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
202 
203     if (orgsize > PY_SSIZE_T_MAX - incsize) {
204         PyErr_NoMemory();
205         return -1;
206     }
207 
208     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
209         return -1;
210 
211     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
212     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
213         + PyBytes_GET_SIZE(buf->outobj);
214 
215     return 0;
216 }
217 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
218     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
219         if (expand_encodebuffer(buf, s) == -1)                          \
220             goto errorexit;                                             \
221 } while(0)
222 
223 
224 /**
225  * MultibyteCodec object
226  */
227 
228 static int
multibytecodec_encerror(const MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)229 multibytecodec_encerror(const MultibyteCodec *codec,
230                         MultibyteCodec_State *state,
231                         MultibyteEncodeBuffer *buf,
232                         PyObject *errors, Py_ssize_t e)
233 {
234     PyObject *retobj = NULL, *retstr = NULL, *tobj;
235     Py_ssize_t retstrsize, newpos;
236     Py_ssize_t esize, start, end;
237     const char *reason;
238 
239     if (e > 0) {
240         reason = "illegal multibyte sequence";
241         esize = e;
242     }
243     else {
244         switch (e) {
245         case MBERR_TOOSMALL:
246             REQUIRE_ENCODEBUFFER(buf, -1);
247             return 0; /* retry it */
248         case MBERR_TOOFEW:
249             reason = "incomplete multibyte sequence";
250             esize = (Py_ssize_t)buf->inpos;
251             break;
252         case MBERR_INTERNAL:
253             PyErr_SetString(PyExc_RuntimeError,
254                             "internal codec error");
255             return -1;
256         default:
257             PyErr_SetString(PyExc_RuntimeError,
258                             "unknown runtime error");
259             return -1;
260         }
261     }
262 
263     if (errors == ERROR_REPLACE) {
264         PyObject *replchar;
265         Py_ssize_t r;
266         Py_ssize_t inpos;
267         int kind;
268         const void *data;
269 
270         replchar = PyUnicode_FromOrdinal('?');
271         if (replchar == NULL)
272             goto errorexit;
273         kind = PyUnicode_KIND(replchar);
274         data = PyUnicode_DATA(replchar);
275 
276         inpos = 0;
277         for (;;) {
278             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
279 
280             r = codec->encode(state, codec,
281                               kind, data, &inpos, 1,
282                               &buf->outbuf, outleft, 0);
283             if (r == MBERR_TOOSMALL) {
284                 REQUIRE_ENCODEBUFFER(buf, -1);
285                 continue;
286             }
287             else
288                 break;
289         }
290 
291         Py_DECREF(replchar);
292 
293         if (r != 0) {
294             REQUIRE_ENCODEBUFFER(buf, 1);
295             *buf->outbuf++ = '?';
296         }
297     }
298     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
299         buf->inpos += esize;
300         return 0;
301     }
302 
303     start = (Py_ssize_t)buf->inpos;
304     end = start + esize;
305 
306     /* use cached exception object if available */
307     if (buf->excobj == NULL) {
308         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
309                                              "sOnns",
310                                              codec->encoding, buf->inobj,
311                                              start, end, reason);
312         if (buf->excobj == NULL)
313             goto errorexit;
314     }
315     else
316         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
317             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
318             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
319             goto errorexit;
320 
321     if (errors == ERROR_STRICT) {
322         PyCodec_StrictErrors(buf->excobj);
323         goto errorexit;
324     }
325 
326     retobj = call_error_callback(errors, buf->excobj);
327     if (retobj == NULL)
328         goto errorexit;
329 
330     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
331         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
332         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
333         PyErr_SetString(PyExc_TypeError,
334                         "encoding error handler must return "
335                         "(str, int) tuple");
336         goto errorexit;
337     }
338 
339     if (PyUnicode_Check(tobj)) {
340         Py_ssize_t inpos;
341 
342         retstr = multibytecodec_encode(codec, state, tobj,
343                         &inpos, ERROR_STRICT,
344                         MBENC_FLUSH);
345         if (retstr == NULL)
346             goto errorexit;
347     }
348     else {
349         retstr = Py_NewRef(tobj);
350     }
351 
352     assert(PyBytes_Check(retstr));
353     retstrsize = PyBytes_GET_SIZE(retstr);
354     if (retstrsize > 0) {
355         REQUIRE_ENCODEBUFFER(buf, retstrsize);
356         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
357         buf->outbuf += retstrsize;
358     }
359 
360     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
361     if (newpos < 0 && !PyErr_Occurred())
362         newpos += (Py_ssize_t)buf->inlen;
363     if (newpos < 0 || newpos > buf->inlen) {
364         PyErr_Clear();
365         PyErr_Format(PyExc_IndexError,
366                      "position %zd from error handler out of bounds",
367                      newpos);
368         goto errorexit;
369     }
370     buf->inpos = newpos;
371 
372     Py_DECREF(retobj);
373     Py_DECREF(retstr);
374     return 0;
375 
376 errorexit:
377     Py_XDECREF(retobj);
378     Py_XDECREF(retstr);
379     return -1;
380 }
381 
382 static int
multibytecodec_decerror(const MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)383 multibytecodec_decerror(const MultibyteCodec *codec,
384                         MultibyteCodec_State *state,
385                         MultibyteDecodeBuffer *buf,
386                         PyObject *errors, Py_ssize_t e)
387 {
388     PyObject *retobj = NULL, *retuni = NULL;
389     Py_ssize_t newpos;
390     const char *reason;
391     Py_ssize_t esize, start, end;
392 
393     if (e > 0) {
394         reason = "illegal multibyte sequence";
395         esize = e;
396     }
397     else {
398         switch (e) {
399         case MBERR_TOOSMALL:
400             return 0; /* retry it */
401         case MBERR_TOOFEW:
402             reason = "incomplete multibyte sequence";
403             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
404             break;
405         case MBERR_INTERNAL:
406             PyErr_SetString(PyExc_RuntimeError,
407                             "internal codec error");
408             return -1;
409         case MBERR_EXCEPTION:
410             return -1;
411         default:
412             PyErr_SetString(PyExc_RuntimeError,
413                             "unknown runtime error");
414             return -1;
415         }
416     }
417 
418     if (errors == ERROR_REPLACE) {
419         if (_PyUnicodeWriter_WriteChar(&buf->writer,
420                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
421             goto errorexit;
422     }
423     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
424         buf->inbuf += esize;
425         return 0;
426     }
427 
428     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
429     end = start + esize;
430 
431     /* use cached exception object if available */
432     if (buf->excobj == NULL) {
433         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
434                         (const char *)buf->inbuf_top,
435                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
436                         start, end, reason);
437         if (buf->excobj == NULL)
438             goto errorexit;
439     }
440     else
441         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
442             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
443             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
444             goto errorexit;
445 
446     if (errors == ERROR_STRICT) {
447         PyCodec_StrictErrors(buf->excobj);
448         goto errorexit;
449     }
450 
451     retobj = call_error_callback(errors, buf->excobj);
452     if (retobj == NULL)
453         goto errorexit;
454 
455     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
456         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
457         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
458         PyErr_SetString(PyExc_TypeError,
459                         "decoding error handler must return "
460                         "(str, int) tuple");
461         goto errorexit;
462     }
463 
464     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
465         goto errorexit;
466 
467     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
468     if (newpos < 0 && !PyErr_Occurred())
469         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
470     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
471         PyErr_Clear();
472         PyErr_Format(PyExc_IndexError,
473                      "position %zd from error handler out of bounds",
474                      newpos);
475         goto errorexit;
476     }
477     buf->inbuf = buf->inbuf_top + newpos;
478     Py_DECREF(retobj);
479     return 0;
480 
481 errorexit:
482     Py_XDECREF(retobj);
483     return -1;
484 }
485 
486 static PyObject *
multibytecodec_encode(const MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)487 multibytecodec_encode(const MultibyteCodec *codec,
488                       MultibyteCodec_State *state,
489                       PyObject *text, Py_ssize_t *inpos_t,
490                       PyObject *errors, int flags)
491 {
492     MultibyteEncodeBuffer buf;
493     Py_ssize_t finalsize, r = 0;
494     Py_ssize_t datalen;
495     int kind;
496     const void *data;
497 
498     datalen = PyUnicode_GET_LENGTH(text);
499 
500     if (datalen == 0 && !(flags & MBENC_RESET))
501         return PyBytes_FromStringAndSize(NULL, 0);
502 
503     buf.excobj = NULL;
504     buf.outobj = NULL;
505     buf.inobj = text;   /* borrowed reference */
506     buf.inpos = 0;
507     buf.inlen = datalen;
508     kind = PyUnicode_KIND(buf.inobj);
509     data = PyUnicode_DATA(buf.inobj);
510 
511     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
512         PyErr_NoMemory();
513         goto errorexit;
514     }
515 
516     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
517     if (buf.outobj == NULL)
518         goto errorexit;
519     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
520     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
521 
522     while (buf.inpos < buf.inlen) {
523         /* we don't reuse inleft and outleft here.
524          * error callbacks can relocate the cursor anywhere on buffer*/
525         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
526 
527         r = codec->encode(state, codec,
528                           kind, data,
529                           &buf.inpos, buf.inlen,
530                           &buf.outbuf, outleft, flags);
531         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
532             break;
533         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
534             goto errorexit;
535         else if (r == MBERR_TOOFEW)
536             break;
537     }
538 
539     if (codec->encreset != NULL && (flags & MBENC_RESET))
540         for (;;) {
541             Py_ssize_t outleft;
542 
543             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
544             r = codec->encreset(state, codec, &buf.outbuf,
545                                 outleft);
546             if (r == 0)
547                 break;
548             else if (multibytecodec_encerror(codec, state,
549                                              &buf, errors, r))
550                 goto errorexit;
551         }
552 
553     finalsize = (Py_ssize_t)((char *)buf.outbuf -
554                              PyBytes_AS_STRING(buf.outobj));
555 
556     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
557         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
558             goto errorexit;
559 
560     if (inpos_t)
561         *inpos_t = buf.inpos;
562     Py_XDECREF(buf.excobj);
563     return buf.outobj;
564 
565 errorexit:
566     Py_XDECREF(buf.excobj);
567     Py_XDECREF(buf.outobj);
568     return NULL;
569 }
570 
571 /*[clinic input]
572 _multibytecodec.MultibyteCodec.encode
573 
574   input: object
575   errors: str(accept={str, NoneType}) = None
576 
577 Return an encoded string version of `input'.
578 
579 'errors' may be given to set a different error handling scheme. Default is
580 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
581 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
582 registered with codecs.register_error that can handle UnicodeEncodeErrors.
583 [clinic start generated code]*/
584 
585 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)586 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
587                                            PyObject *input,
588                                            const char *errors)
589 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
590 {
591     MultibyteCodec_State state;
592     PyObject *errorcb, *r, *ucvt;
593     Py_ssize_t datalen;
594 
595     if (PyUnicode_Check(input))
596         ucvt = NULL;
597     else {
598         input = ucvt = PyObject_Str(input);
599         if (input == NULL)
600             return NULL;
601         else if (!PyUnicode_Check(input)) {
602             PyErr_SetString(PyExc_TypeError,
603                 "couldn't convert the object to unicode.");
604             Py_DECREF(ucvt);
605             return NULL;
606         }
607     }
608 
609     datalen = PyUnicode_GET_LENGTH(input);
610 
611     errorcb = internal_error_callback(errors);
612     if (errorcb == NULL) {
613         Py_XDECREF(ucvt);
614         return NULL;
615     }
616 
617     if (self->codec->encinit != NULL &&
618         self->codec->encinit(&state, self->codec) != 0)
619         goto errorexit;
620     r = multibytecodec_encode(self->codec, &state,
621                     input, NULL, errorcb,
622                     MBENC_FLUSH | MBENC_RESET);
623     if (r == NULL)
624         goto errorexit;
625 
626     ERROR_DECREF(errorcb);
627     Py_XDECREF(ucvt);
628     return make_tuple(r, datalen);
629 
630 errorexit:
631     ERROR_DECREF(errorcb);
632     Py_XDECREF(ucvt);
633     return NULL;
634 }
635 
636 /*[clinic input]
637 _multibytecodec.MultibyteCodec.decode
638 
639   input: Py_buffer
640   errors: str(accept={str, NoneType}) = None
641 
642 Decodes 'input'.
643 
644 'errors' may be given to set a different error handling scheme. Default is
645 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
646 values are 'ignore' and 'replace' as well as any other name registered with
647 codecs.register_error that is able to handle UnicodeDecodeErrors."
648 [clinic start generated code]*/
649 
650 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)651 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
652                                            Py_buffer *input,
653                                            const char *errors)
654 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
655 {
656     MultibyteCodec_State state;
657     MultibyteDecodeBuffer buf;
658     PyObject *errorcb, *res;
659     const char *data;
660     Py_ssize_t datalen;
661 
662     data = input->buf;
663     datalen = input->len;
664 
665     errorcb = internal_error_callback(errors);
666     if (errorcb == NULL) {
667         return NULL;
668     }
669 
670     if (datalen == 0) {
671         ERROR_DECREF(errorcb);
672         return make_tuple(PyUnicode_New(0, 0), 0);
673     }
674 
675     _PyUnicodeWriter_Init(&buf.writer);
676     buf.writer.min_length = datalen;
677     buf.excobj = NULL;
678     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
679     buf.inbuf_end = buf.inbuf_top + datalen;
680 
681     if (self->codec->decinit != NULL &&
682         self->codec->decinit(&state, self->codec) != 0)
683         goto errorexit;
684 
685     while (buf.inbuf < buf.inbuf_end) {
686         Py_ssize_t inleft, r;
687 
688         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
689 
690         r = self->codec->decode(&state, self->codec,
691                         &buf.inbuf, inleft, &buf.writer);
692         if (r == 0)
693             break;
694         else if (multibytecodec_decerror(self->codec, &state,
695                                          &buf, errorcb, r))
696             goto errorexit;
697     }
698 
699     res = _PyUnicodeWriter_Finish(&buf.writer);
700     if (res == NULL)
701         goto errorexit;
702 
703     Py_XDECREF(buf.excobj);
704     ERROR_DECREF(errorcb);
705     return make_tuple(res, datalen);
706 
707 errorexit:
708     ERROR_DECREF(errorcb);
709     Py_XDECREF(buf.excobj);
710     _PyUnicodeWriter_Dealloc(&buf.writer);
711 
712     return NULL;
713 }
714 
715 static struct PyMethodDef multibytecodec_methods[] = {
716     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
717     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
718     {NULL, NULL},
719 };
720 
721 static int
multibytecodec_clear(MultibyteCodecObject * self)722 multibytecodec_clear(MultibyteCodecObject *self)
723 {
724     Py_CLEAR(self->cjk_module);
725     return 0;
726 }
727 
728 static int
multibytecodec_traverse(MultibyteCodecObject * self,visitproc visit,void * arg)729 multibytecodec_traverse(MultibyteCodecObject *self, visitproc visit, void *arg)
730 {
731     Py_VISIT(Py_TYPE(self));
732     Py_VISIT(self->cjk_module);
733     return 0;
734 }
735 
736 static void
multibytecodec_dealloc(MultibyteCodecObject * self)737 multibytecodec_dealloc(MultibyteCodecObject *self)
738 {
739     PyObject_GC_UnTrack(self);
740     PyTypeObject *tp = Py_TYPE(self);
741     (void)multibytecodec_clear(self);
742     tp->tp_free(self);
743     Py_DECREF(tp);
744 }
745 
746 static PyType_Slot multibytecodec_slots[] = {
747     {Py_tp_dealloc, multibytecodec_dealloc},
748     {Py_tp_getattro, PyObject_GenericGetAttr},
749     {Py_tp_methods, multibytecodec_methods},
750     {Py_tp_traverse, multibytecodec_traverse},
751     {Py_tp_clear, multibytecodec_clear},
752     {0, NULL},
753 };
754 
755 static PyType_Spec multibytecodec_spec = {
756     .name = MODULE_NAME ".MultibyteCodec",
757     .basicsize = sizeof(MultibyteCodecObject),
758     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
759               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
760     .slots = multibytecodec_slots,
761 };
762 
763 
764 /**
765  * Utility functions for stateful codec mechanism
766  */
767 
768 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
769 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
770 
771 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)772 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
773                         PyObject *unistr, int final)
774 {
775     PyObject *ucvt, *r = NULL;
776     PyObject *inbuf = NULL;
777     Py_ssize_t inpos, datalen;
778     PyObject *origpending = NULL;
779 
780     if (PyUnicode_Check(unistr))
781         ucvt = NULL;
782     else {
783         unistr = ucvt = PyObject_Str(unistr);
784         if (unistr == NULL)
785             return NULL;
786         else if (!PyUnicode_Check(unistr)) {
787             PyErr_SetString(PyExc_TypeError,
788                 "couldn't convert the object to str.");
789             Py_DECREF(ucvt);
790             return NULL;
791         }
792     }
793 
794     if (ctx->pending) {
795         PyObject *inbuf_tmp;
796 
797         origpending = Py_NewRef(ctx->pending);
798 
799         inbuf_tmp = Py_NewRef(ctx->pending);
800         PyUnicode_Append(&inbuf_tmp, unistr);
801         if (inbuf_tmp == NULL)
802             goto errorexit;
803         Py_CLEAR(ctx->pending);
804         inbuf = inbuf_tmp;
805     }
806     else {
807         origpending = NULL;
808 
809         inbuf = Py_NewRef(unistr);
810     }
811     inpos = 0;
812     datalen = PyUnicode_GET_LENGTH(inbuf);
813 
814     r = multibytecodec_encode(ctx->codec, &ctx->state,
815                               inbuf, &inpos,
816                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
817     if (r == NULL) {
818         /* recover the original pending buffer */
819         Py_XSETREF(ctx->pending, origpending);
820         origpending = NULL;
821         goto errorexit;
822     }
823     Py_XDECREF(origpending);
824 
825     if (inpos < datalen) {
826         if (datalen - inpos > MAXENCPENDING) {
827             /* normal codecs can't reach here */
828             PyObject *excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
829                                                      "sOnns",
830                                                      ctx->codec->encoding,
831                                                      inbuf,
832                                                      inpos, datalen,
833                                                      "pending buffer overflow");
834             if (excobj == NULL) goto errorexit;
835             PyErr_SetObject(PyExc_UnicodeEncodeError, excobj);
836             Py_DECREF(excobj);
837             goto errorexit;
838         }
839         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
840         if (ctx->pending == NULL) {
841             /* normal codecs can't reach here */
842             goto errorexit;
843         }
844     }
845 
846     Py_DECREF(inbuf);
847     Py_XDECREF(ucvt);
848     return r;
849 
850 errorexit:
851     Py_XDECREF(r);
852     Py_XDECREF(ucvt);
853     Py_XDECREF(origpending);
854     Py_XDECREF(inbuf);
855     return NULL;
856 }
857 
858 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)859 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
860                        MultibyteDecodeBuffer *buf)
861 {
862     Py_ssize_t npendings;
863 
864     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
865     if (npendings + ctx->pendingsize > MAXDECPENDING ||
866         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
867             Py_ssize_t bufsize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
868             PyObject *excobj = PyUnicodeDecodeError_Create(ctx->codec->encoding,
869                                                            (const char *)buf->inbuf_top,
870                                                            bufsize,
871                                                            0,
872                                                            bufsize,
873                                                            "pending buffer overflow");
874             if (excobj == NULL) return -1;
875             PyErr_SetObject(PyExc_UnicodeDecodeError, excobj);
876             Py_DECREF(excobj);
877             return -1;
878     }
879     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
880     ctx->pendingsize += npendings;
881     return 0;
882 }
883 
884 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)885 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
886                        Py_ssize_t size)
887 {
888     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
889     buf->inbuf_end = buf->inbuf_top + size;
890     buf->writer.min_length += size;
891     return 0;
892 }
893 
894 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)895 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
896                     MultibyteDecodeBuffer *buf)
897 {
898     while (buf->inbuf < buf->inbuf_end) {
899         Py_ssize_t inleft;
900         Py_ssize_t r;
901 
902         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
903 
904         r = ctx->codec->decode(&ctx->state, ctx->codec,
905             &buf->inbuf, inleft, &buf->writer);
906         if (r == 0 || r == MBERR_TOOFEW)
907             break;
908         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
909                                          buf, ctx->errors, r))
910             return -1;
911     }
912     return 0;
913 }
914 
915 
916 /*[clinic input]
917 _multibytecodec.MultibyteIncrementalEncoder.encode
918 
919     input: object
920     final: bool = False
921 [clinic start generated code]*/
922 
923 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)924 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
925                                                         PyObject *input,
926                                                         int final)
927 /*[clinic end generated code: output=123361b6c505e2c1 input=bd5f7d40d43e99b0]*/
928 {
929     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
930 }
931 
932 /*[clinic input]
933 _multibytecodec.MultibyteIncrementalEncoder.getstate
934 [clinic start generated code]*/
935 
936 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)937 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
938 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
939 {
940     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
941        for UTF-8 encoded buffer (each character can use up to 4
942        bytes), and required bytes for MultibyteCodec_State.c. A byte
943        array is used to avoid different compilers generating different
944        values for the same state, e.g. as a result of struct padding.
945     */
946     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
947     Py_ssize_t statesize;
948     const char *pendingbuffer = NULL;
949     Py_ssize_t pendingsize;
950 
951     if (self->pending != NULL) {
952         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
953         if (pendingbuffer == NULL) {
954             return NULL;
955         }
956         if (pendingsize > MAXENCPENDING*4) {
957             PyObject *excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
958                                                      "sOnns",
959                                                      self->codec->encoding,
960                                                      self->pending,
961                                                      0, PyUnicode_GET_LENGTH(self->pending),
962                                                      "pending buffer too large");
963             if (excobj == NULL) {
964                 return NULL;
965             }
966             PyErr_SetObject(PyExc_UnicodeEncodeError, excobj);
967             Py_DECREF(excobj);
968             return NULL;
969         }
970         statebytes[0] = (unsigned char)pendingsize;
971         memcpy(statebytes + 1, pendingbuffer, pendingsize);
972         statesize = 1 + pendingsize;
973     } else {
974         statebytes[0] = 0;
975         statesize = 1;
976     }
977     memcpy(statebytes+statesize, self->state.c,
978            sizeof(self->state.c));
979     statesize += sizeof(self->state.c);
980 
981     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
982                                              1 /* little-endian */ ,
983                                              0 /* unsigned */ );
984 }
985 
986 /*[clinic input]
987 _multibytecodec.MultibyteIncrementalEncoder.setstate
988     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
989     /
990 [clinic start generated code]*/
991 
992 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)993 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
994                                                           PyLongObject *statelong)
995 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
996 {
997     PyObject *pending = NULL;
998     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
999 
1000     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1001                             1 /* little-endian */ ,
1002                             0 /* unsigned */ ,
1003                             1 /* with_exceptions */) < 0) {
1004         goto errorexit;
1005     }
1006 
1007     if (statebytes[0] > MAXENCPENDING*4) {
1008         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1009         return NULL;
1010     }
1011 
1012     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
1013                                    statebytes[0], "strict");
1014     if (pending == NULL) {
1015         goto errorexit;
1016     }
1017 
1018     Py_XSETREF(self->pending, pending);
1019     memcpy(self->state.c, statebytes+1+statebytes[0],
1020            sizeof(self->state.c));
1021 
1022     Py_RETURN_NONE;
1023 
1024 errorexit:
1025     Py_XDECREF(pending);
1026     return NULL;
1027 }
1028 
1029 /*[clinic input]
1030 _multibytecodec.MultibyteIncrementalEncoder.reset
1031 [clinic start generated code]*/
1032 
1033 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1034 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1035 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1036 {
1037     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1038     unsigned char buffer[4], *outbuf;
1039     Py_ssize_t r;
1040     if (self->codec->encreset != NULL) {
1041         outbuf = buffer;
1042         r = self->codec->encreset(&self->state, self->codec,
1043                                   &outbuf, sizeof(buffer));
1044         if (r != 0)
1045             return NULL;
1046     }
1047     Py_CLEAR(self->pending);
1048     Py_RETURN_NONE;
1049 }
1050 
1051 static struct PyMethodDef mbiencoder_methods[] = {
1052     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1053     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1054     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1055     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1056     {NULL, NULL},
1057 };
1058 
1059 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1060 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1061 {
1062     MultibyteIncrementalEncoderObject *self;
1063     PyObject *codec = NULL;
1064     char *errors = NULL;
1065 
1066     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1067                                      incnewkwarglist, &errors))
1068         return NULL;
1069 
1070     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1071     if (self == NULL)
1072         return NULL;
1073 
1074     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1075     if (codec == NULL)
1076         goto errorexit;
1077 
1078     module_state *state = find_state_by_def(type);
1079     if (!MultibyteCodec_Check(state, codec)) {
1080         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1081         goto errorexit;
1082     }
1083 
1084     self->codec = ((MultibyteCodecObject *)codec)->codec;
1085     self->pending = NULL;
1086     self->errors = internal_error_callback(errors);
1087     if (self->errors == NULL)
1088         goto errorexit;
1089     if (self->codec->encinit != NULL &&
1090         self->codec->encinit(&self->state, self->codec) != 0)
1091         goto errorexit;
1092 
1093     Py_DECREF(codec);
1094     return (PyObject *)self;
1095 
1096 errorexit:
1097     Py_XDECREF(self);
1098     Py_XDECREF(codec);
1099     return NULL;
1100 }
1101 
1102 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1103 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1104 {
1105     return 0;
1106 }
1107 
1108 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1109 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1110                     visitproc visit, void *arg)
1111 {
1112     if (ERROR_ISCUSTOM(self->errors))
1113         Py_VISIT(self->errors);
1114     return 0;
1115 }
1116 
1117 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1118 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1119 {
1120     PyTypeObject *tp = Py_TYPE(self);
1121     PyObject_GC_UnTrack(self);
1122     ERROR_DECREF(self->errors);
1123     Py_CLEAR(self->pending);
1124     tp->tp_free(self);
1125     Py_DECREF(tp);
1126 }
1127 
1128 static PyType_Slot encoder_slots[] = {
1129     {Py_tp_dealloc, mbiencoder_dealloc},
1130     {Py_tp_getattro, PyObject_GenericGetAttr},
1131     {Py_tp_traverse, mbiencoder_traverse},
1132     {Py_tp_methods, mbiencoder_methods},
1133     {Py_tp_getset, codecctx_getsets},
1134     {Py_tp_init, mbiencoder_init},
1135     {Py_tp_new, mbiencoder_new},
1136     {0, NULL},
1137 };
1138 
1139 static PyType_Spec encoder_spec = {
1140     .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1141     .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1142     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1143               Py_TPFLAGS_IMMUTABLETYPE),
1144     .slots = encoder_slots,
1145 };
1146 
1147 
1148 /*[clinic input]
1149 _multibytecodec.MultibyteIncrementalDecoder.decode
1150 
1151     input: Py_buffer
1152     final: bool = False
1153 [clinic start generated code]*/
1154 
1155 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1156 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1157                                                         Py_buffer *input,
1158                                                         int final)
1159 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=8795fbb20860027a]*/
1160 {
1161     MultibyteDecodeBuffer buf;
1162     char *data, *wdata = NULL;
1163     Py_ssize_t wsize, size, origpending;
1164     PyObject *res;
1165 
1166     data = input->buf;
1167     size = input->len;
1168 
1169     _PyUnicodeWriter_Init(&buf.writer);
1170     buf.excobj = NULL;
1171     origpending = self->pendingsize;
1172 
1173     if (self->pendingsize == 0) {
1174         wsize = size;
1175         wdata = data;
1176     }
1177     else {
1178         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1179             PyErr_NoMemory();
1180             goto errorexit;
1181         }
1182         wsize = size + self->pendingsize;
1183         wdata = PyMem_Malloc(wsize);
1184         if (wdata == NULL) {
1185             PyErr_NoMemory();
1186             goto errorexit;
1187         }
1188         memcpy(wdata, self->pending, self->pendingsize);
1189         memcpy(wdata + self->pendingsize, data, size);
1190         self->pendingsize = 0;
1191     }
1192 
1193     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1194         goto errorexit;
1195 
1196     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1197         goto errorexit;
1198 
1199     if (final && buf.inbuf < buf.inbuf_end) {
1200         if (multibytecodec_decerror(self->codec, &self->state,
1201                         &buf, self->errors, MBERR_TOOFEW)) {
1202             /* recover the original pending buffer */
1203             memcpy(self->pending, wdata, origpending);
1204             self->pendingsize = origpending;
1205             goto errorexit;
1206         }
1207     }
1208 
1209     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1210         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1211             goto errorexit;
1212     }
1213 
1214     res = _PyUnicodeWriter_Finish(&buf.writer);
1215     if (res == NULL)
1216         goto errorexit;
1217 
1218     if (wdata != data)
1219         PyMem_Free(wdata);
1220     Py_XDECREF(buf.excobj);
1221     return res;
1222 
1223 errorexit:
1224     if (wdata != NULL && wdata != data)
1225         PyMem_Free(wdata);
1226     Py_XDECREF(buf.excobj);
1227     _PyUnicodeWriter_Dealloc(&buf.writer);
1228     return NULL;
1229 }
1230 
1231 /*[clinic input]
1232 _multibytecodec.MultibyteIncrementalDecoder.getstate
1233 [clinic start generated code]*/
1234 
1235 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1236 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1237 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1238 {
1239     PyObject *buffer;
1240     PyObject *statelong;
1241 
1242     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1243                                        self->pendingsize);
1244     if (buffer == NULL) {
1245         return NULL;
1246     }
1247 
1248     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1249                                                   sizeof(self->state.c),
1250                                                   1 /* little-endian */ ,
1251                                                   0 /* unsigned */ );
1252     if (statelong == NULL) {
1253         Py_DECREF(buffer);
1254         return NULL;
1255     }
1256 
1257     return Py_BuildValue("NN", buffer, statelong);
1258 }
1259 
1260 /*[clinic input]
1261 _multibytecodec.MultibyteIncrementalDecoder.setstate
1262     state: object(subclass_of='&PyTuple_Type')
1263     /
1264 [clinic start generated code]*/
1265 
1266 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1267 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1268                                                           PyObject *state)
1269 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1270 {
1271     PyObject *buffer;
1272     PyLongObject *statelong;
1273     Py_ssize_t buffersize;
1274     const char *bufferstr;
1275     unsigned char statebytes[8];
1276 
1277     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1278                           &buffer, &PyLong_Type, &statelong))
1279     {
1280         return NULL;
1281     }
1282 
1283     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1284                             1 /* little-endian */ ,
1285                             0 /* unsigned */ ,
1286                             1 /* with_exceptions */) < 0) {
1287         return NULL;
1288     }
1289 
1290     buffersize = PyBytes_Size(buffer);
1291     if (buffersize == -1) {
1292         return NULL;
1293     }
1294 
1295     if (buffersize > MAXDECPENDING) {
1296         PyObject *excobj = PyUnicodeDecodeError_Create(self->codec->encoding,
1297                                                        PyBytes_AS_STRING(buffer), buffersize,
1298                                                        0, buffersize,
1299                                                        "pending buffer too large");
1300         if (excobj == NULL) return NULL;
1301         PyErr_SetObject(PyExc_UnicodeDecodeError, excobj);
1302         Py_DECREF(excobj);
1303         return NULL;
1304     }
1305 
1306     bufferstr = PyBytes_AsString(buffer);
1307     if (bufferstr == NULL) {
1308         return NULL;
1309     }
1310     self->pendingsize = buffersize;
1311     memcpy(self->pending, bufferstr, self->pendingsize);
1312     memcpy(self->state.c, statebytes, sizeof(statebytes));
1313 
1314     Py_RETURN_NONE;
1315 }
1316 
1317 /*[clinic input]
1318 _multibytecodec.MultibyteIncrementalDecoder.reset
1319 [clinic start generated code]*/
1320 
1321 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1322 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1323 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1324 {
1325     if (self->codec->decreset != NULL &&
1326         self->codec->decreset(&self->state, self->codec) != 0)
1327         return NULL;
1328     self->pendingsize = 0;
1329 
1330     Py_RETURN_NONE;
1331 }
1332 
1333 static struct PyMethodDef mbidecoder_methods[] = {
1334     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1335     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1336     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1337     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1338     {NULL, NULL},
1339 };
1340 
1341 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1342 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1343 {
1344     MultibyteIncrementalDecoderObject *self;
1345     PyObject *codec = NULL;
1346     char *errors = NULL;
1347 
1348     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1349                                      incnewkwarglist, &errors))
1350         return NULL;
1351 
1352     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1353     if (self == NULL)
1354         return NULL;
1355 
1356     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1357     if (codec == NULL)
1358         goto errorexit;
1359 
1360     module_state *state = find_state_by_def(type);
1361     if (!MultibyteCodec_Check(state, codec)) {
1362         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1363         goto errorexit;
1364     }
1365 
1366     self->codec = ((MultibyteCodecObject *)codec)->codec;
1367     self->pendingsize = 0;
1368     self->errors = internal_error_callback(errors);
1369     if (self->errors == NULL)
1370         goto errorexit;
1371     if (self->codec->decinit != NULL &&
1372         self->codec->decinit(&self->state, self->codec) != 0)
1373         goto errorexit;
1374 
1375     Py_DECREF(codec);
1376     return (PyObject *)self;
1377 
1378 errorexit:
1379     Py_XDECREF(self);
1380     Py_XDECREF(codec);
1381     return NULL;
1382 }
1383 
1384 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1385 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1386 {
1387     return 0;
1388 }
1389 
1390 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1391 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1392                     visitproc visit, void *arg)
1393 {
1394     if (ERROR_ISCUSTOM(self->errors))
1395         Py_VISIT(self->errors);
1396     return 0;
1397 }
1398 
1399 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1400 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1401 {
1402     PyTypeObject *tp = Py_TYPE(self);
1403     PyObject_GC_UnTrack(self);
1404     ERROR_DECREF(self->errors);
1405     tp->tp_free(self);
1406     Py_DECREF(tp);
1407 }
1408 
1409 static PyType_Slot decoder_slots[] = {
1410     {Py_tp_dealloc, mbidecoder_dealloc},
1411     {Py_tp_getattro, PyObject_GenericGetAttr},
1412     {Py_tp_traverse, mbidecoder_traverse},
1413     {Py_tp_methods, mbidecoder_methods},
1414     {Py_tp_getset, codecctx_getsets},
1415     {Py_tp_init, mbidecoder_init},
1416     {Py_tp_new, mbidecoder_new},
1417     {0, NULL},
1418 };
1419 
1420 static PyType_Spec decoder_spec = {
1421     .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1422     .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1423     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1424               Py_TPFLAGS_IMMUTABLETYPE),
1425     .slots = decoder_slots,
1426 };
1427 
1428 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1429 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1430                      const char *method, Py_ssize_t sizehint)
1431 {
1432     MultibyteDecodeBuffer buf;
1433     PyObject *cres, *res;
1434     Py_ssize_t rsize;
1435 
1436     if (sizehint == 0)
1437         return PyUnicode_New(0, 0);
1438 
1439     _PyUnicodeWriter_Init(&buf.writer);
1440     buf.excobj = NULL;
1441     cres = NULL;
1442 
1443     for (;;) {
1444         int endoffile;
1445 
1446         if (sizehint < 0)
1447             cres = PyObject_CallMethod(self->stream,
1448                             method, NULL);
1449         else
1450             cres = PyObject_CallMethod(self->stream,
1451                             method, "i", sizehint);
1452         if (cres == NULL)
1453             goto errorexit;
1454 
1455         if (!PyBytes_Check(cres)) {
1456             PyErr_Format(PyExc_TypeError,
1457                          "stream function returned a "
1458                          "non-bytes object (%.100s)",
1459                          Py_TYPE(cres)->tp_name);
1460             goto errorexit;
1461         }
1462 
1463         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1464 
1465         if (self->pendingsize > 0) {
1466             PyObject *ctr;
1467             char *ctrdata;
1468 
1469             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1470                 PyErr_NoMemory();
1471                 goto errorexit;
1472             }
1473             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1474             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1475             if (ctr == NULL)
1476                 goto errorexit;
1477             ctrdata = PyBytes_AS_STRING(ctr);
1478             memcpy(ctrdata, self->pending, self->pendingsize);
1479             memcpy(ctrdata + self->pendingsize,
1480                     PyBytes_AS_STRING(cres),
1481                     PyBytes_GET_SIZE(cres));
1482             Py_SETREF(cres, ctr);
1483             self->pendingsize = 0;
1484         }
1485 
1486         rsize = PyBytes_GET_SIZE(cres);
1487         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1488                                    rsize) != 0)
1489             goto errorexit;
1490 
1491         if (rsize > 0 && decoder_feed_buffer(
1492                         (MultibyteStatefulDecoderContext *)self, &buf))
1493             goto errorexit;
1494 
1495         if (endoffile || sizehint < 0) {
1496             if (buf.inbuf < buf.inbuf_end &&
1497                 multibytecodec_decerror(self->codec, &self->state,
1498                             &buf, self->errors, MBERR_TOOFEW))
1499                 goto errorexit;
1500         }
1501 
1502         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1503             if (decoder_append_pending(STATEFUL_DCTX(self),
1504                                        &buf) != 0)
1505                 goto errorexit;
1506         }
1507 
1508         Py_SETREF(cres, NULL);
1509 
1510         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1511             break;
1512 
1513         sizehint = 1; /* read 1 more byte and retry */
1514     }
1515 
1516     res = _PyUnicodeWriter_Finish(&buf.writer);
1517     if (res == NULL)
1518         goto errorexit;
1519 
1520     Py_XDECREF(cres);
1521     Py_XDECREF(buf.excobj);
1522     return res;
1523 
1524 errorexit:
1525     Py_XDECREF(cres);
1526     Py_XDECREF(buf.excobj);
1527     _PyUnicodeWriter_Dealloc(&buf.writer);
1528     return NULL;
1529 }
1530 
1531 /*[clinic input]
1532  _multibytecodec.MultibyteStreamReader.read
1533 
1534     sizeobj: object = None
1535     /
1536 [clinic start generated code]*/
1537 
1538 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1539 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1540                                                 PyObject *sizeobj)
1541 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1542 {
1543     Py_ssize_t size;
1544 
1545     if (sizeobj == Py_None)
1546         size = -1;
1547     else if (PyLong_Check(sizeobj))
1548         size = PyLong_AsSsize_t(sizeobj);
1549     else {
1550         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1551         return NULL;
1552     }
1553 
1554     if (size == -1 && PyErr_Occurred())
1555         return NULL;
1556 
1557     return mbstreamreader_iread(self, "read", size);
1558 }
1559 
1560 /*[clinic input]
1561  _multibytecodec.MultibyteStreamReader.readline
1562 
1563     sizeobj: object = None
1564     /
1565 [clinic start generated code]*/
1566 
1567 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1568 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1569                                                     PyObject *sizeobj)
1570 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1571 {
1572     Py_ssize_t size;
1573 
1574     if (sizeobj == Py_None)
1575         size = -1;
1576     else if (PyLong_Check(sizeobj))
1577         size = PyLong_AsSsize_t(sizeobj);
1578     else {
1579         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1580         return NULL;
1581     }
1582 
1583     if (size == -1 && PyErr_Occurred())
1584         return NULL;
1585 
1586     return mbstreamreader_iread(self, "readline", size);
1587 }
1588 
1589 /*[clinic input]
1590  _multibytecodec.MultibyteStreamReader.readlines
1591 
1592     sizehintobj: object = None
1593     /
1594 [clinic start generated code]*/
1595 
1596 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1597 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1598                                                      PyObject *sizehintobj)
1599 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1600 {
1601     PyObject *r, *sr;
1602     Py_ssize_t sizehint;
1603 
1604     if (sizehintobj == Py_None)
1605         sizehint = -1;
1606     else if (PyLong_Check(sizehintobj))
1607         sizehint = PyLong_AsSsize_t(sizehintobj);
1608     else {
1609         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1610         return NULL;
1611     }
1612 
1613     if (sizehint == -1 && PyErr_Occurred())
1614         return NULL;
1615 
1616     r = mbstreamreader_iread(self, "read", sizehint);
1617     if (r == NULL)
1618         return NULL;
1619 
1620     sr = PyUnicode_Splitlines(r, 1);
1621     Py_DECREF(r);
1622     return sr;
1623 }
1624 
1625 /*[clinic input]
1626  _multibytecodec.MultibyteStreamReader.reset
1627 [clinic start generated code]*/
1628 
1629 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1630 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1631 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1632 {
1633     if (self->codec->decreset != NULL &&
1634         self->codec->decreset(&self->state, self->codec) != 0)
1635         return NULL;
1636     self->pendingsize = 0;
1637 
1638     Py_RETURN_NONE;
1639 }
1640 
1641 static struct PyMethodDef mbstreamreader_methods[] = {
1642     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1643     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1644     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1645     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1646     {NULL,              NULL},
1647 };
1648 
1649 static PyMemberDef mbstreamreader_members[] = {
1650     {"stream",          _Py_T_OBJECT,
1651                     offsetof(MultibyteStreamReaderObject, stream),
1652                     Py_READONLY, NULL},
1653     {NULL,}
1654 };
1655 
1656 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1657 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1658 {
1659     MultibyteStreamReaderObject *self;
1660     PyObject *stream, *codec = NULL;
1661     char *errors = NULL;
1662 
1663     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1664                             streamkwarglist, &stream, &errors))
1665         return NULL;
1666 
1667     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1668     if (self == NULL)
1669         return NULL;
1670 
1671     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1672     if (codec == NULL)
1673         goto errorexit;
1674 
1675     module_state *state = find_state_by_def(type);
1676     if (!MultibyteCodec_Check(state, codec)) {
1677         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1678         goto errorexit;
1679     }
1680 
1681     self->codec = ((MultibyteCodecObject *)codec)->codec;
1682     self->stream = Py_NewRef(stream);
1683     self->pendingsize = 0;
1684     self->errors = internal_error_callback(errors);
1685     if (self->errors == NULL)
1686         goto errorexit;
1687     if (self->codec->decinit != NULL &&
1688         self->codec->decinit(&self->state, self->codec) != 0)
1689         goto errorexit;
1690 
1691     Py_DECREF(codec);
1692     return (PyObject *)self;
1693 
1694 errorexit:
1695     Py_XDECREF(self);
1696     Py_XDECREF(codec);
1697     return NULL;
1698 }
1699 
1700 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1701 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1702 {
1703     return 0;
1704 }
1705 
1706 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1707 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1708                         visitproc visit, void *arg)
1709 {
1710     if (ERROR_ISCUSTOM(self->errors))
1711         Py_VISIT(self->errors);
1712     Py_VISIT(self->stream);
1713     return 0;
1714 }
1715 
1716 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1717 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1718 {
1719     PyTypeObject *tp = Py_TYPE(self);
1720     PyObject_GC_UnTrack(self);
1721     ERROR_DECREF(self->errors);
1722     Py_XDECREF(self->stream);
1723     tp->tp_free(self);
1724     Py_DECREF(tp);
1725 }
1726 
1727 static PyType_Slot reader_slots[] = {
1728     {Py_tp_dealloc, mbstreamreader_dealloc},
1729     {Py_tp_getattro, PyObject_GenericGetAttr},
1730     {Py_tp_traverse, mbstreamreader_traverse},
1731     {Py_tp_methods, mbstreamreader_methods},
1732     {Py_tp_members, mbstreamreader_members},
1733     {Py_tp_getset, codecctx_getsets},
1734     {Py_tp_init, mbstreamreader_init},
1735     {Py_tp_new, mbstreamreader_new},
1736     {0, NULL},
1737 };
1738 
1739 static PyType_Spec reader_spec = {
1740     .name = MODULE_NAME ".MultibyteStreamReader",
1741     .basicsize = sizeof(MultibyteStreamReaderObject),
1742     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1743               Py_TPFLAGS_IMMUTABLETYPE),
1744     .slots = reader_slots,
1745 };
1746 
1747 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr,PyObject * str_write)1748 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1749                       PyObject *unistr, PyObject *str_write)
1750 {
1751     PyObject *str, *wr;
1752 
1753     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1754     if (str == NULL)
1755         return -1;
1756 
1757     wr = PyObject_CallMethodOneArg(self->stream, str_write, str);
1758     Py_DECREF(str);
1759     if (wr == NULL)
1760         return -1;
1761 
1762     Py_DECREF(wr);
1763     return 0;
1764 }
1765 
1766 /*[clinic input]
1767  _multibytecodec.MultibyteStreamWriter.write
1768 
1769     cls: defining_class
1770     strobj: object
1771     /
1772 [clinic start generated code]*/
1773 
1774 static PyObject *
_multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * strobj)1775 _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
1776                                                  PyTypeObject *cls,
1777                                                  PyObject *strobj)
1778 /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
1779 {
1780     module_state *state = PyType_GetModuleState(cls);
1781     assert(state != NULL);
1782     if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
1783         return NULL;
1784     }
1785     Py_RETURN_NONE;
1786 }
1787 
1788 /*[clinic input]
1789  _multibytecodec.MultibyteStreamWriter.writelines
1790 
1791     cls: defining_class
1792     lines: object
1793     /
1794 [clinic start generated code]*/
1795 
1796 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * lines)1797 _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
1798                                                       PyTypeObject *cls,
1799                                                       PyObject *lines)
1800 /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
1801 {
1802     PyObject *strobj;
1803     int i, r;
1804 
1805     if (!PySequence_Check(lines)) {
1806         PyErr_SetString(PyExc_TypeError,
1807                         "arg must be a sequence object");
1808         return NULL;
1809     }
1810 
1811     module_state *state = PyType_GetModuleState(cls);
1812     assert(state != NULL);
1813     for (i = 0; i < PySequence_Length(lines); i++) {
1814         /* length can be changed even within this loop */
1815         strobj = PySequence_GetItem(lines, i);
1816         if (strobj == NULL)
1817             return NULL;
1818 
1819         r = mbstreamwriter_iwrite(self, strobj, state->str_write);
1820         Py_DECREF(strobj);
1821         if (r == -1)
1822             return NULL;
1823     }
1824     /* PySequence_Length() can fail */
1825     if (PyErr_Occurred())
1826         return NULL;
1827 
1828     Py_RETURN_NONE;
1829 }
1830 
1831 /*[clinic input]
1832  _multibytecodec.MultibyteStreamWriter.reset
1833 
1834     cls: defining_class
1835     /
1836 
1837 [clinic start generated code]*/
1838 
1839 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls)1840 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
1841                                                  PyTypeObject *cls)
1842 /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
1843 {
1844     PyObject *pwrt;
1845 
1846     if (!self->pending)
1847         Py_RETURN_NONE;
1848 
1849     pwrt = multibytecodec_encode(self->codec, &self->state,
1850                     self->pending, NULL, self->errors,
1851                     MBENC_FLUSH | MBENC_RESET);
1852     /* some pending buffer can be truncated when UnicodeEncodeError is
1853      * raised on 'strict' mode. but, 'reset' method is designed to
1854      * reset the pending buffer or states so failed string sequence
1855      * ought to be missed */
1856     Py_CLEAR(self->pending);
1857     if (pwrt == NULL)
1858         return NULL;
1859 
1860     assert(PyBytes_Check(pwrt));
1861 
1862     module_state *state = PyType_GetModuleState(cls);
1863     assert(state != NULL);
1864 
1865     if (PyBytes_Size(pwrt) > 0) {
1866         PyObject *wr;
1867 
1868         wr = PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
1869         if (wr == NULL) {
1870             Py_DECREF(pwrt);
1871             return NULL;
1872         }
1873     }
1874     Py_DECREF(pwrt);
1875 
1876     Py_RETURN_NONE;
1877 }
1878 
1879 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1880 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1881 {
1882     MultibyteStreamWriterObject *self;
1883     PyObject *stream, *codec = NULL;
1884     char *errors = NULL;
1885 
1886     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1887                             streamkwarglist, &stream, &errors))
1888         return NULL;
1889 
1890     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1891     if (self == NULL)
1892         return NULL;
1893 
1894     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1895     if (codec == NULL)
1896         goto errorexit;
1897 
1898     module_state *state = find_state_by_def(type);
1899     if (!MultibyteCodec_Check(state, codec)) {
1900         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1901         goto errorexit;
1902     }
1903 
1904     self->codec = ((MultibyteCodecObject *)codec)->codec;
1905     self->stream = Py_NewRef(stream);
1906     self->pending = NULL;
1907     self->errors = internal_error_callback(errors);
1908     if (self->errors == NULL)
1909         goto errorexit;
1910     if (self->codec->encinit != NULL &&
1911         self->codec->encinit(&self->state, self->codec) != 0)
1912         goto errorexit;
1913 
1914     Py_DECREF(codec);
1915     return (PyObject *)self;
1916 
1917 errorexit:
1918     Py_XDECREF(self);
1919     Py_XDECREF(codec);
1920     return NULL;
1921 }
1922 
1923 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1924 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1925 {
1926     return 0;
1927 }
1928 
1929 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1930 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1931                         visitproc visit, void *arg)
1932 {
1933     if (ERROR_ISCUSTOM(self->errors))
1934         Py_VISIT(self->errors);
1935     Py_VISIT(self->stream);
1936     return 0;
1937 }
1938 
1939 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1940 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1941 {
1942     PyTypeObject *tp = Py_TYPE(self);
1943     PyObject_GC_UnTrack(self);
1944     ERROR_DECREF(self->errors);
1945     Py_XDECREF(self->stream);
1946     tp->tp_free(self);
1947     Py_DECREF(tp);
1948 }
1949 
1950 static struct PyMethodDef mbstreamwriter_methods[] = {
1951     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1952     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1953     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1954     {NULL, NULL},
1955 };
1956 
1957 static PyMemberDef mbstreamwriter_members[] = {
1958     {"stream",          _Py_T_OBJECT,
1959                     offsetof(MultibyteStreamWriterObject, stream),
1960                     Py_READONLY, NULL},
1961     {NULL,}
1962 };
1963 
1964 static PyType_Slot writer_slots[] = {
1965     {Py_tp_dealloc, mbstreamwriter_dealloc},
1966     {Py_tp_getattro, PyObject_GenericGetAttr},
1967     {Py_tp_traverse, mbstreamwriter_traverse},
1968     {Py_tp_methods, mbstreamwriter_methods},
1969     {Py_tp_members, mbstreamwriter_members},
1970     {Py_tp_getset, codecctx_getsets},
1971     {Py_tp_init, mbstreamwriter_init},
1972     {Py_tp_new, mbstreamwriter_new},
1973     {0, NULL},
1974 };
1975 
1976 static PyType_Spec writer_spec = {
1977     .name = MODULE_NAME ".MultibyteStreamWriter",
1978     .basicsize = sizeof(MultibyteStreamWriterObject),
1979     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1980               Py_TPFLAGS_IMMUTABLETYPE),
1981     .slots = writer_slots,
1982 };
1983 
1984 
1985 /*[clinic input]
1986 _multibytecodec.__create_codec
1987 
1988     arg: object
1989     /
1990 [clinic start generated code]*/
1991 
1992 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1993 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1994 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1995 {
1996     MultibyteCodecObject *self;
1997 
1998     if (!PyCapsule_IsValid(arg, CODEC_CAPSULE)) {
1999         PyErr_SetString(PyExc_ValueError, "argument type invalid");
2000         return NULL;
2001     }
2002 
2003     codec_capsule *data = PyCapsule_GetPointer(arg, CODEC_CAPSULE);
2004     const MultibyteCodec *codec = data->codec;
2005     if (codec->codecinit != NULL && codec->codecinit(codec) != 0)
2006         return NULL;
2007 
2008     module_state *state = get_module_state(module);
2009     self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
2010     if (self == NULL)
2011         return NULL;
2012     self->codec = codec;
2013     self->cjk_module = Py_NewRef(data->cjk_module);
2014 
2015     PyObject_GC_Track(self);
2016     return (PyObject *)self;
2017 }
2018 
2019 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)2020 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
2021 {
2022     module_state *state = get_module_state(mod);
2023     Py_VISIT(state->multibytecodec_type);
2024     Py_VISIT(state->encoder_type);
2025     Py_VISIT(state->decoder_type);
2026     Py_VISIT(state->reader_type);
2027     Py_VISIT(state->writer_type);
2028     return 0;
2029 }
2030 
2031 static int
_multibytecodec_clear(PyObject * mod)2032 _multibytecodec_clear(PyObject *mod)
2033 {
2034     module_state *state = get_module_state(mod);
2035     Py_CLEAR(state->multibytecodec_type);
2036     Py_CLEAR(state->encoder_type);
2037     Py_CLEAR(state->decoder_type);
2038     Py_CLEAR(state->reader_type);
2039     Py_CLEAR(state->writer_type);
2040     Py_CLEAR(state->str_write);
2041     return 0;
2042 }
2043 
2044 static void
_multibytecodec_free(void * mod)2045 _multibytecodec_free(void *mod)
2046 {
2047     _multibytecodec_clear((PyObject *)mod);
2048 }
2049 
2050 #define CREATE_TYPE(module, type, spec)                                      \
2051     do {                                                                     \
2052         type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2053         if (!type) {                                                         \
2054             return -1;                                                       \
2055         }                                                                    \
2056     } while (0)
2057 
2058 #define ADD_TYPE(module, type)                    \
2059     do {                                          \
2060         if (PyModule_AddType(module, type) < 0) { \
2061             return -1;                            \
2062         }                                         \
2063     } while (0)
2064 
2065 static int
_multibytecodec_exec(PyObject * mod)2066 _multibytecodec_exec(PyObject *mod)
2067 {
2068     module_state *state = get_module_state(mod);
2069     state->str_write = PyUnicode_InternFromString("write");
2070     if (state->str_write == NULL) {
2071         return -1;
2072     }
2073     CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2074     CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2075     CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2076     CREATE_TYPE(mod, state->reader_type, &reader_spec);
2077     CREATE_TYPE(mod, state->writer_type, &writer_spec);
2078 
2079     ADD_TYPE(mod, state->encoder_type);
2080     ADD_TYPE(mod, state->decoder_type);
2081     ADD_TYPE(mod, state->reader_type);
2082     ADD_TYPE(mod, state->writer_type);
2083     return 0;
2084 }
2085 
2086 #undef CREATE_TYPE
2087 #undef ADD_TYPE
2088 
2089 static struct PyMethodDef _multibytecodec_methods[] = {
2090     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2091     {NULL, NULL},
2092 };
2093 
2094 static PyModuleDef_Slot _multibytecodec_slots[] = {
2095     {Py_mod_exec, _multibytecodec_exec},
2096     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2097     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2098     {0, NULL}
2099 };
2100 
2101 static struct PyModuleDef _multibytecodecmodule = {
2102     .m_base = PyModuleDef_HEAD_INIT,
2103     .m_name = "_multibytecodec",
2104     .m_size = sizeof(module_state),
2105     .m_methods = _multibytecodec_methods,
2106     .m_slots = _multibytecodec_slots,
2107     .m_traverse = _multibytecodec_traverse,
2108     .m_clear = _multibytecodec_clear,
2109     .m_free = _multibytecodec_free,
2110 };
2111 
2112 PyMODINIT_FUNC
PyInit__multibytecodec(void)2113 PyInit__multibytecodec(void)
2114 {
2115     return PyModuleDef_Init(&_multibytecodecmodule);
2116 }
2117