• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h"
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12 
13 /*[clinic input]
14 module _multibytecodec
15 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "&MultibyteCodec_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6ad689546cbb5450]*/
18 
19 typedef struct {
20     PyObject            *inobj;
21     Py_ssize_t          inpos, inlen;
22     unsigned char       *outbuf, *outbuf_end;
23     PyObject            *excobj, *outobj;
24 } MultibyteEncodeBuffer;
25 
26 typedef struct {
27     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
28     PyObject            *excobj;
29     _PyUnicodeWriter    writer;
30 } MultibyteDecodeBuffer;
31 
32 static char *incnewkwarglist[] = {"errors", NULL};
33 static char *streamkwarglist[] = {"stream", "errors", NULL};
34 
35 static PyObject *multibytecodec_encode(MultibyteCodec *,
36                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
37                 PyObject *, int);
38 
39 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
40 
41 _Py_IDENTIFIER(write);
42 
43 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)44 make_tuple(PyObject *object, Py_ssize_t len)
45 {
46     PyObject *v, *w;
47 
48     if (object == NULL)
49         return NULL;
50 
51     v = PyTuple_New(2);
52     if (v == NULL) {
53         Py_DECREF(object);
54         return NULL;
55     }
56     PyTuple_SET_ITEM(v, 0, object);
57 
58     w = PyLong_FromSsize_t(len);
59     if (w == NULL) {
60         Py_DECREF(v);
61         return NULL;
62     }
63     PyTuple_SET_ITEM(v, 1, w);
64 
65     return v;
66 }
67 
68 static PyObject *
internal_error_callback(const char * errors)69 internal_error_callback(const char *errors)
70 {
71     if (errors == NULL || strcmp(errors, "strict") == 0)
72         return ERROR_STRICT;
73     else if (strcmp(errors, "ignore") == 0)
74         return ERROR_IGNORE;
75     else if (strcmp(errors, "replace") == 0)
76         return ERROR_REPLACE;
77     else
78         return PyUnicode_FromString(errors);
79 }
80 
81 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)82 call_error_callback(PyObject *errors, PyObject *exc)
83 {
84     PyObject *args, *cb, *r;
85     const char *str;
86 
87     assert(PyUnicode_Check(errors));
88     str = PyUnicode_AsUTF8(errors);
89     if (str == NULL)
90         return NULL;
91     cb = PyCodec_LookupError(str);
92     if (cb == NULL)
93         return NULL;
94 
95     args = PyTuple_New(1);
96     if (args == NULL) {
97         Py_DECREF(cb);
98         return NULL;
99     }
100 
101     PyTuple_SET_ITEM(args, 0, exc);
102     Py_INCREF(exc);
103 
104     r = PyObject_CallObject(cb, args);
105     Py_DECREF(args);
106     Py_DECREF(cb);
107     return r;
108 }
109 
110 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))111 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
112 {
113     const char *errors;
114 
115     if (self->errors == ERROR_STRICT)
116         errors = "strict";
117     else if (self->errors == ERROR_IGNORE)
118         errors = "ignore";
119     else if (self->errors == ERROR_REPLACE)
120         errors = "replace";
121     else {
122         Py_INCREF(self->errors);
123         return self->errors;
124     }
125 
126     return PyUnicode_FromString(errors);
127 }
128 
129 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)130 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
131                     void *closure)
132 {
133     PyObject *cb;
134     const char *str;
135 
136     if (value == NULL) {
137         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
138         return -1;
139     }
140     if (!PyUnicode_Check(value)) {
141         PyErr_SetString(PyExc_TypeError, "errors must be a string");
142         return -1;
143     }
144 
145     str = PyUnicode_AsUTF8(value);
146     if (str == NULL)
147         return -1;
148 
149     cb = internal_error_callback(str);
150     if (cb == NULL)
151         return -1;
152 
153     ERROR_DECREF(self->errors);
154     self->errors = cb;
155     return 0;
156 }
157 
158 /* This getset handlers list is used by all the stateful codec objects */
159 static PyGetSetDef codecctx_getsets[] = {
160     {"errors",          (getter)codecctx_errors_get,
161                     (setter)codecctx_errors_set,
162                     PyDoc_STR("how to treat errors")},
163     {NULL,}
164 };
165 
166 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)167 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
168 {
169     Py_ssize_t orgpos, orgsize, incsize;
170 
171     orgpos = (Py_ssize_t)((char *)buf->outbuf -
172                             PyBytes_AS_STRING(buf->outobj));
173     orgsize = PyBytes_GET_SIZE(buf->outobj);
174     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
175 
176     if (orgsize > PY_SSIZE_T_MAX - incsize) {
177         PyErr_NoMemory();
178         return -1;
179     }
180 
181     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
182         return -1;
183 
184     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
185     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
186         + PyBytes_GET_SIZE(buf->outobj);
187 
188     return 0;
189 }
190 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
191     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
192         if (expand_encodebuffer(buf, s) == -1)                          \
193             goto errorexit;                                             \
194 } while(0)
195 
196 
197 /**
198  * MultibyteCodec object
199  */
200 
201 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)202 multibytecodec_encerror(MultibyteCodec *codec,
203                         MultibyteCodec_State *state,
204                         MultibyteEncodeBuffer *buf,
205                         PyObject *errors, Py_ssize_t e)
206 {
207     PyObject *retobj = NULL, *retstr = NULL, *tobj;
208     Py_ssize_t retstrsize, newpos;
209     Py_ssize_t esize, start, end;
210     const char *reason;
211 
212     if (e > 0) {
213         reason = "illegal multibyte sequence";
214         esize = e;
215     }
216     else {
217         switch (e) {
218         case MBERR_TOOSMALL:
219             REQUIRE_ENCODEBUFFER(buf, -1);
220             return 0; /* retry it */
221         case MBERR_TOOFEW:
222             reason = "incomplete multibyte sequence";
223             esize = (Py_ssize_t)buf->inpos;
224             break;
225         case MBERR_INTERNAL:
226             PyErr_SetString(PyExc_RuntimeError,
227                             "internal codec error");
228             return -1;
229         default:
230             PyErr_SetString(PyExc_RuntimeError,
231                             "unknown runtime error");
232             return -1;
233         }
234     }
235 
236     if (errors == ERROR_REPLACE) {
237         PyObject *replchar;
238         Py_ssize_t r;
239         Py_ssize_t inpos;
240         int kind;
241         void *data;
242 
243         replchar = PyUnicode_FromOrdinal('?');
244         if (replchar == NULL)
245             goto errorexit;
246         kind = PyUnicode_KIND(replchar);
247         data = PyUnicode_DATA(replchar);
248 
249         inpos = 0;
250         for (;;) {
251             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
252 
253             r = codec->encode(state, codec->config,
254                               kind, data, &inpos, 1,
255                               &buf->outbuf, outleft, 0);
256             if (r == MBERR_TOOSMALL) {
257                 REQUIRE_ENCODEBUFFER(buf, -1);
258                 continue;
259             }
260             else
261                 break;
262         }
263 
264         Py_DECREF(replchar);
265 
266         if (r != 0) {
267             REQUIRE_ENCODEBUFFER(buf, 1);
268             *buf->outbuf++ = '?';
269         }
270     }
271     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
272         buf->inpos += esize;
273         return 0;
274     }
275 
276     start = (Py_ssize_t)buf->inpos;
277     end = start + esize;
278 
279     /* use cached exception object if available */
280     if (buf->excobj == NULL) {
281         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
282                                              "sOnns",
283                                              codec->encoding, buf->inobj,
284                                              start, end, reason);
285         if (buf->excobj == NULL)
286             goto errorexit;
287     }
288     else
289         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
290             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
291             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
292             goto errorexit;
293 
294     if (errors == ERROR_STRICT) {
295         PyCodec_StrictErrors(buf->excobj);
296         goto errorexit;
297     }
298 
299     retobj = call_error_callback(errors, buf->excobj);
300     if (retobj == NULL)
301         goto errorexit;
302 
303     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
304         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
305         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
306         PyErr_SetString(PyExc_TypeError,
307                         "encoding error handler must return "
308                         "(str, int) tuple");
309         goto errorexit;
310     }
311 
312     if (PyUnicode_Check(tobj)) {
313         Py_ssize_t inpos;
314 
315         retstr = multibytecodec_encode(codec, state, tobj,
316                         &inpos, ERROR_STRICT,
317                         MBENC_FLUSH);
318         if (retstr == NULL)
319             goto errorexit;
320     }
321     else {
322         Py_INCREF(tobj);
323         retstr = tobj;
324     }
325 
326     assert(PyBytes_Check(retstr));
327     retstrsize = PyBytes_GET_SIZE(retstr);
328     if (retstrsize > 0) {
329         REQUIRE_ENCODEBUFFER(buf, retstrsize);
330         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
331         buf->outbuf += retstrsize;
332     }
333 
334     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
335     if (newpos < 0 && !PyErr_Occurred())
336         newpos += (Py_ssize_t)buf->inlen;
337     if (newpos < 0 || newpos > buf->inlen) {
338         PyErr_Clear();
339         PyErr_Format(PyExc_IndexError,
340                      "position %zd from error handler out of bounds",
341                      newpos);
342         goto errorexit;
343     }
344     buf->inpos = newpos;
345 
346     Py_DECREF(retobj);
347     Py_DECREF(retstr);
348     return 0;
349 
350 errorexit:
351     Py_XDECREF(retobj);
352     Py_XDECREF(retstr);
353     return -1;
354 }
355 
356 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)357 multibytecodec_decerror(MultibyteCodec *codec,
358                         MultibyteCodec_State *state,
359                         MultibyteDecodeBuffer *buf,
360                         PyObject *errors, Py_ssize_t e)
361 {
362     PyObject *retobj = NULL, *retuni = NULL;
363     Py_ssize_t newpos;
364     const char *reason;
365     Py_ssize_t esize, start, end;
366 
367     if (e > 0) {
368         reason = "illegal multibyte sequence";
369         esize = e;
370     }
371     else {
372         switch (e) {
373         case MBERR_TOOSMALL:
374             return 0; /* retry it */
375         case MBERR_TOOFEW:
376             reason = "incomplete multibyte sequence";
377             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
378             break;
379         case MBERR_INTERNAL:
380             PyErr_SetString(PyExc_RuntimeError,
381                             "internal codec error");
382             return -1;
383         case MBERR_EXCEPTION:
384             return -1;
385         default:
386             PyErr_SetString(PyExc_RuntimeError,
387                             "unknown runtime error");
388             return -1;
389         }
390     }
391 
392     if (errors == ERROR_REPLACE) {
393         if (_PyUnicodeWriter_WriteChar(&buf->writer,
394                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
395             goto errorexit;
396     }
397     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
398         buf->inbuf += esize;
399         return 0;
400     }
401 
402     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
403     end = start + esize;
404 
405     /* use cached exception object if available */
406     if (buf->excobj == NULL) {
407         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
408                         (const char *)buf->inbuf_top,
409                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
410                         start, end, reason);
411         if (buf->excobj == NULL)
412             goto errorexit;
413     }
414     else
415         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
416             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
417             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
418             goto errorexit;
419 
420     if (errors == ERROR_STRICT) {
421         PyCodec_StrictErrors(buf->excobj);
422         goto errorexit;
423     }
424 
425     retobj = call_error_callback(errors, buf->excobj);
426     if (retobj == NULL)
427         goto errorexit;
428 
429     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
430         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
431         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
432         PyErr_SetString(PyExc_TypeError,
433                         "decoding error handler must return "
434                         "(str, int) tuple");
435         goto errorexit;
436     }
437 
438     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
439         goto errorexit;
440 
441     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
442     if (newpos < 0 && !PyErr_Occurred())
443         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
444     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
445         PyErr_Clear();
446         PyErr_Format(PyExc_IndexError,
447                      "position %zd from error handler out of bounds",
448                      newpos);
449         goto errorexit;
450     }
451     buf->inbuf = buf->inbuf_top + newpos;
452     Py_DECREF(retobj);
453     return 0;
454 
455 errorexit:
456     Py_XDECREF(retobj);
457     return -1;
458 }
459 
460 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)461 multibytecodec_encode(MultibyteCodec *codec,
462                       MultibyteCodec_State *state,
463                       PyObject *text, Py_ssize_t *inpos_t,
464                       PyObject *errors, int flags)
465 {
466     MultibyteEncodeBuffer buf;
467     Py_ssize_t finalsize, r = 0;
468     Py_ssize_t datalen;
469     int kind;
470     void *data;
471 
472     if (PyUnicode_READY(text) < 0)
473         return NULL;
474     datalen = PyUnicode_GET_LENGTH(text);
475 
476     if (datalen == 0 && !(flags & MBENC_RESET))
477         return PyBytes_FromStringAndSize(NULL, 0);
478 
479     buf.excobj = NULL;
480     buf.outobj = NULL;
481     buf.inobj = text;   /* borrowed reference */
482     buf.inpos = 0;
483     buf.inlen = datalen;
484     kind = PyUnicode_KIND(buf.inobj);
485     data = PyUnicode_DATA(buf.inobj);
486 
487     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
488         PyErr_NoMemory();
489         goto errorexit;
490     }
491 
492     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
493     if (buf.outobj == NULL)
494         goto errorexit;
495     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
496     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
497 
498     while (buf.inpos < buf.inlen) {
499         /* we don't reuse inleft and outleft here.
500          * error callbacks can relocate the cursor anywhere on buffer*/
501         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
502 
503         r = codec->encode(state, codec->config,
504                           kind, data,
505                           &buf.inpos, buf.inlen,
506                           &buf.outbuf, outleft, flags);
507         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
508             break;
509         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
510             goto errorexit;
511         else if (r == MBERR_TOOFEW)
512             break;
513     }
514 
515     if (codec->encreset != NULL && (flags & MBENC_RESET))
516         for (;;) {
517             Py_ssize_t outleft;
518 
519             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
520             r = codec->encreset(state, codec->config, &buf.outbuf,
521                                 outleft);
522             if (r == 0)
523                 break;
524             else if (multibytecodec_encerror(codec, state,
525                                              &buf, errors, r))
526                 goto errorexit;
527         }
528 
529     finalsize = (Py_ssize_t)((char *)buf.outbuf -
530                              PyBytes_AS_STRING(buf.outobj));
531 
532     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
533         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
534             goto errorexit;
535 
536     if (inpos_t)
537         *inpos_t = buf.inpos;
538     Py_XDECREF(buf.excobj);
539     return buf.outobj;
540 
541 errorexit:
542     Py_XDECREF(buf.excobj);
543     Py_XDECREF(buf.outobj);
544     return NULL;
545 }
546 
547 /*[clinic input]
548 _multibytecodec.MultibyteCodec.encode
549 
550   input: object
551   errors: str(accept={str, NoneType}) = None
552 
553 Return an encoded string version of `input'.
554 
555 'errors' may be given to set a different error handling scheme. Default is
556 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
557 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
558 registered with codecs.register_error that can handle UnicodeEncodeErrors.
559 [clinic start generated code]*/
560 
561 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)562 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
563                                            PyObject *input,
564                                            const char *errors)
565 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
566 {
567     MultibyteCodec_State state;
568     PyObject *errorcb, *r, *ucvt;
569     Py_ssize_t datalen;
570 
571     if (PyUnicode_Check(input))
572         ucvt = NULL;
573     else {
574         input = ucvt = PyObject_Str(input);
575         if (input == NULL)
576             return NULL;
577         else if (!PyUnicode_Check(input)) {
578             PyErr_SetString(PyExc_TypeError,
579                 "couldn't convert the object to unicode.");
580             Py_DECREF(ucvt);
581             return NULL;
582         }
583     }
584 
585     if (PyUnicode_READY(input) < 0) {
586         Py_XDECREF(ucvt);
587         return NULL;
588     }
589     datalen = PyUnicode_GET_LENGTH(input);
590 
591     errorcb = internal_error_callback(errors);
592     if (errorcb == NULL) {
593         Py_XDECREF(ucvt);
594         return NULL;
595     }
596 
597     if (self->codec->encinit != NULL &&
598         self->codec->encinit(&state, self->codec->config) != 0)
599         goto errorexit;
600     r = multibytecodec_encode(self->codec, &state,
601                     input, NULL, errorcb,
602                     MBENC_FLUSH | MBENC_RESET);
603     if (r == NULL)
604         goto errorexit;
605 
606     ERROR_DECREF(errorcb);
607     Py_XDECREF(ucvt);
608     return make_tuple(r, datalen);
609 
610 errorexit:
611     ERROR_DECREF(errorcb);
612     Py_XDECREF(ucvt);
613     return NULL;
614 }
615 
616 /*[clinic input]
617 _multibytecodec.MultibyteCodec.decode
618 
619   input: Py_buffer
620   errors: str(accept={str, NoneType}) = None
621 
622 Decodes 'input'.
623 
624 'errors' may be given to set a different error handling scheme. Default is
625 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
626 values are 'ignore' and 'replace' as well as any other name registered with
627 codecs.register_error that is able to handle UnicodeDecodeErrors."
628 [clinic start generated code]*/
629 
630 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)631 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
632                                            Py_buffer *input,
633                                            const char *errors)
634 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
635 {
636     MultibyteCodec_State state;
637     MultibyteDecodeBuffer buf;
638     PyObject *errorcb, *res;
639     const char *data;
640     Py_ssize_t datalen;
641 
642     data = input->buf;
643     datalen = input->len;
644 
645     errorcb = internal_error_callback(errors);
646     if (errorcb == NULL) {
647         return NULL;
648     }
649 
650     if (datalen == 0) {
651         ERROR_DECREF(errorcb);
652         return make_tuple(PyUnicode_New(0, 0), 0);
653     }
654 
655     _PyUnicodeWriter_Init(&buf.writer);
656     buf.writer.min_length = datalen;
657     buf.excobj = NULL;
658     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
659     buf.inbuf_end = buf.inbuf_top + datalen;
660 
661     if (self->codec->decinit != NULL &&
662         self->codec->decinit(&state, self->codec->config) != 0)
663         goto errorexit;
664 
665     while (buf.inbuf < buf.inbuf_end) {
666         Py_ssize_t inleft, r;
667 
668         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
669 
670         r = self->codec->decode(&state, self->codec->config,
671                         &buf.inbuf, inleft, &buf.writer);
672         if (r == 0)
673             break;
674         else if (multibytecodec_decerror(self->codec, &state,
675                                          &buf, errorcb, r))
676             goto errorexit;
677     }
678 
679     res = _PyUnicodeWriter_Finish(&buf.writer);
680     if (res == NULL)
681         goto errorexit;
682 
683     Py_XDECREF(buf.excobj);
684     ERROR_DECREF(errorcb);
685     return make_tuple(res, datalen);
686 
687 errorexit:
688     ERROR_DECREF(errorcb);
689     Py_XDECREF(buf.excobj);
690     _PyUnicodeWriter_Dealloc(&buf.writer);
691 
692     return NULL;
693 }
694 
695 static struct PyMethodDef multibytecodec_methods[] = {
696     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
697     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
698     {NULL, NULL},
699 };
700 
701 static void
multibytecodec_dealloc(MultibyteCodecObject * self)702 multibytecodec_dealloc(MultibyteCodecObject *self)
703 {
704     PyObject_Del(self);
705 }
706 
707 static PyTypeObject MultibyteCodec_Type = {
708     PyVarObject_HEAD_INIT(NULL, 0)
709     "MultibyteCodec",                   /* tp_name */
710     sizeof(MultibyteCodecObject),       /* tp_basicsize */
711     0,                                  /* tp_itemsize */
712     /* methods */
713     (destructor)multibytecodec_dealloc, /* tp_dealloc */
714     0,                                  /* tp_vectorcall_offset */
715     0,                                  /* tp_getattr */
716     0,                                  /* tp_setattr */
717     0,                                  /* tp_as_async */
718     0,                                  /* tp_repr */
719     0,                                  /* tp_as_number */
720     0,                                  /* tp_as_sequence */
721     0,                                  /* tp_as_mapping */
722     0,                                  /* tp_hash */
723     0,                                  /* tp_call */
724     0,                                  /* tp_str */
725     PyObject_GenericGetAttr,            /* tp_getattro */
726     0,                                  /* tp_setattro */
727     0,                                  /* tp_as_buffer */
728     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
729     0,                                  /* tp_doc */
730     0,                                  /* tp_traverse */
731     0,                                  /* tp_clear */
732     0,                                  /* tp_richcompare */
733     0,                                  /* tp_weaklistoffset */
734     0,                                  /* tp_iter */
735     0,                                  /* tp_iterext */
736     multibytecodec_methods,             /* tp_methods */
737 };
738 
739 
740 /**
741  * Utility functions for stateful codec mechanism
742  */
743 
744 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
745 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
746 
747 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)748 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
749                         PyObject *unistr, int final)
750 {
751     PyObject *ucvt, *r = NULL;
752     PyObject *inbuf = NULL;
753     Py_ssize_t inpos, datalen;
754     PyObject *origpending = NULL;
755 
756     if (PyUnicode_Check(unistr))
757         ucvt = NULL;
758     else {
759         unistr = ucvt = PyObject_Str(unistr);
760         if (unistr == NULL)
761             return NULL;
762         else if (!PyUnicode_Check(unistr)) {
763             PyErr_SetString(PyExc_TypeError,
764                 "couldn't convert the object to str.");
765             Py_DECREF(ucvt);
766             return NULL;
767         }
768     }
769 
770     if (ctx->pending) {
771         PyObject *inbuf_tmp;
772 
773         Py_INCREF(ctx->pending);
774         origpending = ctx->pending;
775 
776         Py_INCREF(ctx->pending);
777         inbuf_tmp = ctx->pending;
778         PyUnicode_Append(&inbuf_tmp, unistr);
779         if (inbuf_tmp == NULL)
780             goto errorexit;
781         Py_CLEAR(ctx->pending);
782         inbuf = inbuf_tmp;
783     }
784     else {
785         origpending = NULL;
786 
787         Py_INCREF(unistr);
788         inbuf = unistr;
789     }
790     if (PyUnicode_READY(inbuf) < 0)
791         goto errorexit;
792     inpos = 0;
793     datalen = PyUnicode_GET_LENGTH(inbuf);
794 
795     r = multibytecodec_encode(ctx->codec, &ctx->state,
796                               inbuf, &inpos,
797                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
798     if (r == NULL) {
799         /* recover the original pending buffer */
800         Py_XSETREF(ctx->pending, origpending);
801         origpending = NULL;
802         goto errorexit;
803     }
804     Py_XDECREF(origpending);
805 
806     if (inpos < datalen) {
807         if (datalen - inpos > MAXENCPENDING) {
808             /* normal codecs can't reach here */
809             PyErr_SetString(PyExc_UnicodeError,
810                             "pending buffer overflow");
811             goto errorexit;
812         }
813         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
814         if (ctx->pending == NULL) {
815             /* normal codecs can't reach here */
816             goto errorexit;
817         }
818     }
819 
820     Py_DECREF(inbuf);
821     Py_XDECREF(ucvt);
822     return r;
823 
824 errorexit:
825     Py_XDECREF(r);
826     Py_XDECREF(ucvt);
827     Py_XDECREF(origpending);
828     Py_XDECREF(inbuf);
829     return NULL;
830 }
831 
832 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)833 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
834                        MultibyteDecodeBuffer *buf)
835 {
836     Py_ssize_t npendings;
837 
838     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
839     if (npendings + ctx->pendingsize > MAXDECPENDING ||
840         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
841             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
842             return -1;
843     }
844     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
845     ctx->pendingsize += npendings;
846     return 0;
847 }
848 
849 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)850 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
851                        Py_ssize_t size)
852 {
853     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
854     buf->inbuf_end = buf->inbuf_top + size;
855     buf->writer.min_length += size;
856     return 0;
857 }
858 
859 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)860 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
861                     MultibyteDecodeBuffer *buf)
862 {
863     while (buf->inbuf < buf->inbuf_end) {
864         Py_ssize_t inleft;
865         Py_ssize_t r;
866 
867         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
868 
869         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
870             &buf->inbuf, inleft, &buf->writer);
871         if (r == 0 || r == MBERR_TOOFEW)
872             break;
873         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
874                                          buf, ctx->errors, r))
875             return -1;
876     }
877     return 0;
878 }
879 
880 
881 /*[clinic input]
882  class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "&MultibyteIncrementalEncoder_Type"
883 [clinic start generated code]*/
884 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=3be82909cd08924d]*/
885 
886 /*[clinic input]
887 _multibytecodec.MultibyteIncrementalEncoder.encode
888 
889     input: object
890     final: bool(accept={int}) = False
891 [clinic start generated code]*/
892 
893 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)894 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
895                                                         PyObject *input,
896                                                         int final)
897 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
898 {
899     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
900 }
901 
902 /*[clinic input]
903 _multibytecodec.MultibyteIncrementalEncoder.getstate
904 [clinic start generated code]*/
905 
906 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)907 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
908 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
909 {
910     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
911        for UTF-8 encoded buffer (each character can use up to 4
912        bytes), and required bytes for MultibyteCodec_State.c. A byte
913        array is used to avoid different compilers generating different
914        values for the same state, e.g. as a result of struct padding.
915     */
916     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
917     Py_ssize_t statesize;
918     const char *pendingbuffer = NULL;
919     Py_ssize_t pendingsize;
920 
921     if (self->pending != NULL) {
922         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
923         if (pendingbuffer == NULL) {
924             return NULL;
925         }
926         if (pendingsize > MAXENCPENDING*4) {
927             PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
928             return NULL;
929         }
930         statebytes[0] = (unsigned char)pendingsize;
931         memcpy(statebytes + 1, pendingbuffer, pendingsize);
932         statesize = 1 + pendingsize;
933     } else {
934         statebytes[0] = 0;
935         statesize = 1;
936     }
937     memcpy(statebytes+statesize, self->state.c,
938            sizeof(self->state.c));
939     statesize += sizeof(self->state.c);
940 
941     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
942                                              1 /* little-endian */ ,
943                                              0 /* unsigned */ );
944 }
945 
946 /*[clinic input]
947 _multibytecodec.MultibyteIncrementalEncoder.setstate
948     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
949     /
950 [clinic start generated code]*/
951 
952 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)953 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
954                                                           PyLongObject *statelong)
955 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
956 {
957     PyObject *pending = NULL;
958     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
959 
960     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
961                             1 /* little-endian */ ,
962                             0 /* unsigned */ ) < 0) {
963         goto errorexit;
964     }
965 
966     if (statebytes[0] > MAXENCPENDING*4) {
967         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
968         return NULL;
969     }
970 
971     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
972                                    statebytes[0], "strict");
973     if (pending == NULL) {
974         goto errorexit;
975     }
976 
977     Py_CLEAR(self->pending);
978     self->pending = pending;
979     memcpy(self->state.c, statebytes+1+statebytes[0],
980            sizeof(self->state.c));
981 
982     Py_RETURN_NONE;
983 
984 errorexit:
985     Py_XDECREF(pending);
986     return NULL;
987 }
988 
989 /*[clinic input]
990 _multibytecodec.MultibyteIncrementalEncoder.reset
991 [clinic start generated code]*/
992 
993 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)994 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
995 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
996 {
997     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
998     unsigned char buffer[4], *outbuf;
999     Py_ssize_t r;
1000     if (self->codec->encreset != NULL) {
1001         outbuf = buffer;
1002         r = self->codec->encreset(&self->state, self->codec->config,
1003                                   &outbuf, sizeof(buffer));
1004         if (r != 0)
1005             return NULL;
1006     }
1007     Py_CLEAR(self->pending);
1008     Py_RETURN_NONE;
1009 }
1010 
1011 static struct PyMethodDef mbiencoder_methods[] = {
1012     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1013     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1014     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1015     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1016     {NULL, NULL},
1017 };
1018 
1019 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1020 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1021 {
1022     MultibyteIncrementalEncoderObject *self;
1023     PyObject *codec = NULL;
1024     char *errors = NULL;
1025 
1026     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1027                                      incnewkwarglist, &errors))
1028         return NULL;
1029 
1030     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1031     if (self == NULL)
1032         return NULL;
1033 
1034     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1035     if (codec == NULL)
1036         goto errorexit;
1037     if (!MultibyteCodec_Check(codec)) {
1038         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1039         goto errorexit;
1040     }
1041 
1042     self->codec = ((MultibyteCodecObject *)codec)->codec;
1043     self->pending = NULL;
1044     self->errors = internal_error_callback(errors);
1045     if (self->errors == NULL)
1046         goto errorexit;
1047     if (self->codec->encinit != NULL &&
1048         self->codec->encinit(&self->state, self->codec->config) != 0)
1049         goto errorexit;
1050 
1051     Py_DECREF(codec);
1052     return (PyObject *)self;
1053 
1054 errorexit:
1055     Py_XDECREF(self);
1056     Py_XDECREF(codec);
1057     return NULL;
1058 }
1059 
1060 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1061 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1062 {
1063     return 0;
1064 }
1065 
1066 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1067 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1068                     visitproc visit, void *arg)
1069 {
1070     if (ERROR_ISCUSTOM(self->errors))
1071         Py_VISIT(self->errors);
1072     return 0;
1073 }
1074 
1075 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1076 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1077 {
1078     PyObject_GC_UnTrack(self);
1079     ERROR_DECREF(self->errors);
1080     Py_CLEAR(self->pending);
1081     Py_TYPE(self)->tp_free(self);
1082 }
1083 
1084 static PyTypeObject MultibyteIncrementalEncoder_Type = {
1085     PyVarObject_HEAD_INIT(NULL, 0)
1086     "MultibyteIncrementalEncoder",      /* tp_name */
1087     sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
1088     0,                                  /* tp_itemsize */
1089     /*  methods  */
1090     (destructor)mbiencoder_dealloc, /* tp_dealloc */
1091     0,                                  /* tp_vectorcall_offset */
1092     0,                                  /* tp_getattr */
1093     0,                                  /* tp_setattr */
1094     0,                                  /* tp_as_async */
1095     0,                                  /* tp_repr */
1096     0,                                  /* tp_as_number */
1097     0,                                  /* tp_as_sequence */
1098     0,                                  /* tp_as_mapping */
1099     0,                                  /* tp_hash */
1100     0,                                  /* tp_call */
1101     0,                                  /* tp_str */
1102     PyObject_GenericGetAttr,            /* tp_getattro */
1103     0,                                  /* tp_setattro */
1104     0,                                  /* tp_as_buffer */
1105     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1106         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1107     0,                                  /* tp_doc */
1108     (traverseproc)mbiencoder_traverse,          /* tp_traverse */
1109     0,                                  /* tp_clear */
1110     0,                                  /* tp_richcompare */
1111     0,                                  /* tp_weaklistoffset */
1112     0,                                  /* tp_iter */
1113     0,                                  /* tp_iterext */
1114     mbiencoder_methods,                 /* tp_methods */
1115     0,                                  /* tp_members */
1116     codecctx_getsets,                   /* tp_getset */
1117     0,                                  /* tp_base */
1118     0,                                  /* tp_dict */
1119     0,                                  /* tp_descr_get */
1120     0,                                  /* tp_descr_set */
1121     0,                                  /* tp_dictoffset */
1122     mbiencoder_init,                    /* tp_init */
1123     0,                                  /* tp_alloc */
1124     mbiencoder_new,                     /* tp_new */
1125 };
1126 
1127 
1128 /*[clinic input]
1129  class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "&MultibyteIncrementalDecoder_Type"
1130 [clinic start generated code]*/
1131 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f6003faaf2cea692]*/
1132 
1133 /*[clinic input]
1134 _multibytecodec.MultibyteIncrementalDecoder.decode
1135 
1136     input: Py_buffer
1137     final: bool(accept={int}) = False
1138 [clinic start generated code]*/
1139 
1140 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1141 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1142                                                         Py_buffer *input,
1143                                                         int final)
1144 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1145 {
1146     MultibyteDecodeBuffer buf;
1147     char *data, *wdata = NULL;
1148     Py_ssize_t wsize, size, origpending;
1149     PyObject *res;
1150 
1151     data = input->buf;
1152     size = input->len;
1153 
1154     _PyUnicodeWriter_Init(&buf.writer);
1155     buf.excobj = NULL;
1156     origpending = self->pendingsize;
1157 
1158     if (self->pendingsize == 0) {
1159         wsize = size;
1160         wdata = data;
1161     }
1162     else {
1163         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1164             PyErr_NoMemory();
1165             goto errorexit;
1166         }
1167         wsize = size + self->pendingsize;
1168         wdata = PyMem_Malloc(wsize);
1169         if (wdata == NULL) {
1170             PyErr_NoMemory();
1171             goto errorexit;
1172         }
1173         memcpy(wdata, self->pending, self->pendingsize);
1174         memcpy(wdata + self->pendingsize, data, size);
1175         self->pendingsize = 0;
1176     }
1177 
1178     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1179         goto errorexit;
1180 
1181     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1182         goto errorexit;
1183 
1184     if (final && buf.inbuf < buf.inbuf_end) {
1185         if (multibytecodec_decerror(self->codec, &self->state,
1186                         &buf, self->errors, MBERR_TOOFEW)) {
1187             /* recover the original pending buffer */
1188             memcpy(self->pending, wdata, origpending);
1189             self->pendingsize = origpending;
1190             goto errorexit;
1191         }
1192     }
1193 
1194     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1195         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1196             goto errorexit;
1197     }
1198 
1199     res = _PyUnicodeWriter_Finish(&buf.writer);
1200     if (res == NULL)
1201         goto errorexit;
1202 
1203     if (wdata != data)
1204         PyMem_Del(wdata);
1205     Py_XDECREF(buf.excobj);
1206     return res;
1207 
1208 errorexit:
1209     if (wdata != NULL && wdata != data)
1210         PyMem_Del(wdata);
1211     Py_XDECREF(buf.excobj);
1212     _PyUnicodeWriter_Dealloc(&buf.writer);
1213     return NULL;
1214 }
1215 
1216 /*[clinic input]
1217 _multibytecodec.MultibyteIncrementalDecoder.getstate
1218 [clinic start generated code]*/
1219 
1220 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1221 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1222 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1223 {
1224     PyObject *buffer;
1225     PyObject *statelong;
1226 
1227     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1228                                        self->pendingsize);
1229     if (buffer == NULL) {
1230         return NULL;
1231     }
1232 
1233     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1234                                                   sizeof(self->state.c),
1235                                                   1 /* little-endian */ ,
1236                                                   0 /* unsigned */ );
1237     if (statelong == NULL) {
1238         Py_DECREF(buffer);
1239         return NULL;
1240     }
1241 
1242     return Py_BuildValue("NN", buffer, statelong);
1243 }
1244 
1245 /*[clinic input]
1246 _multibytecodec.MultibyteIncrementalDecoder.setstate
1247     state: object(subclass_of='&PyTuple_Type')
1248     /
1249 [clinic start generated code]*/
1250 
1251 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1252 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1253                                                           PyObject *state)
1254 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1255 {
1256     PyObject *buffer;
1257     PyLongObject *statelong;
1258     Py_ssize_t buffersize;
1259     char *bufferstr;
1260     unsigned char statebytes[8];
1261 
1262     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1263                           &buffer, &PyLong_Type, &statelong))
1264     {
1265         return NULL;
1266     }
1267 
1268     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1269                             1 /* little-endian */ ,
1270                             0 /* unsigned */ ) < 0) {
1271         return NULL;
1272     }
1273 
1274     buffersize = PyBytes_Size(buffer);
1275     if (buffersize == -1) {
1276         return NULL;
1277     }
1278 
1279     if (buffersize > MAXDECPENDING) {
1280         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1281         return NULL;
1282     }
1283 
1284     bufferstr = PyBytes_AsString(buffer);
1285     if (bufferstr == NULL) {
1286         return NULL;
1287     }
1288     self->pendingsize = buffersize;
1289     memcpy(self->pending, bufferstr, self->pendingsize);
1290     memcpy(self->state.c, statebytes, sizeof(statebytes));
1291 
1292     Py_RETURN_NONE;
1293 }
1294 
1295 /*[clinic input]
1296 _multibytecodec.MultibyteIncrementalDecoder.reset
1297 [clinic start generated code]*/
1298 
1299 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1300 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1301 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1302 {
1303     if (self->codec->decreset != NULL &&
1304         self->codec->decreset(&self->state, self->codec->config) != 0)
1305         return NULL;
1306     self->pendingsize = 0;
1307 
1308     Py_RETURN_NONE;
1309 }
1310 
1311 static struct PyMethodDef mbidecoder_methods[] = {
1312     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1313     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1314     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1315     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1316     {NULL, NULL},
1317 };
1318 
1319 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1320 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1321 {
1322     MultibyteIncrementalDecoderObject *self;
1323     PyObject *codec = NULL;
1324     char *errors = NULL;
1325 
1326     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1327                                      incnewkwarglist, &errors))
1328         return NULL;
1329 
1330     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1331     if (self == NULL)
1332         return NULL;
1333 
1334     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1335     if (codec == NULL)
1336         goto errorexit;
1337     if (!MultibyteCodec_Check(codec)) {
1338         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1339         goto errorexit;
1340     }
1341 
1342     self->codec = ((MultibyteCodecObject *)codec)->codec;
1343     self->pendingsize = 0;
1344     self->errors = internal_error_callback(errors);
1345     if (self->errors == NULL)
1346         goto errorexit;
1347     if (self->codec->decinit != NULL &&
1348         self->codec->decinit(&self->state, self->codec->config) != 0)
1349         goto errorexit;
1350 
1351     Py_DECREF(codec);
1352     return (PyObject *)self;
1353 
1354 errorexit:
1355     Py_XDECREF(self);
1356     Py_XDECREF(codec);
1357     return NULL;
1358 }
1359 
1360 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1361 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1362 {
1363     return 0;
1364 }
1365 
1366 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1367 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1368                     visitproc visit, void *arg)
1369 {
1370     if (ERROR_ISCUSTOM(self->errors))
1371         Py_VISIT(self->errors);
1372     return 0;
1373 }
1374 
1375 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1376 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1377 {
1378     PyObject_GC_UnTrack(self);
1379     ERROR_DECREF(self->errors);
1380     Py_TYPE(self)->tp_free(self);
1381 }
1382 
1383 static PyTypeObject MultibyteIncrementalDecoder_Type = {
1384     PyVarObject_HEAD_INIT(NULL, 0)
1385     "MultibyteIncrementalDecoder",      /* tp_name */
1386     sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1387     0,                                  /* tp_itemsize */
1388     /*  methods  */
1389     (destructor)mbidecoder_dealloc, /* tp_dealloc */
1390     0,                                  /* tp_vectorcall_offset */
1391     0,                                  /* tp_getattr */
1392     0,                                  /* tp_setattr */
1393     0,                                  /* tp_as_async */
1394     0,                                  /* tp_repr */
1395     0,                                  /* tp_as_number */
1396     0,                                  /* tp_as_sequence */
1397     0,                                  /* tp_as_mapping */
1398     0,                                  /* tp_hash */
1399     0,                                  /* tp_call */
1400     0,                                  /* tp_str */
1401     PyObject_GenericGetAttr,            /* tp_getattro */
1402     0,                                  /* tp_setattro */
1403     0,                                  /* tp_as_buffer */
1404     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1405         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1406     0,                                  /* tp_doc */
1407     (traverseproc)mbidecoder_traverse,          /* tp_traverse */
1408     0,                                  /* tp_clear */
1409     0,                                  /* tp_richcompare */
1410     0,                                  /* tp_weaklistoffset */
1411     0,                                  /* tp_iter */
1412     0,                                  /* tp_iterext */
1413     mbidecoder_methods,                 /* tp_methods */
1414     0,                                  /* tp_members */
1415     codecctx_getsets,                   /* tp_getset */
1416     0,                                  /* tp_base */
1417     0,                                  /* tp_dict */
1418     0,                                  /* tp_descr_get */
1419     0,                                  /* tp_descr_set */
1420     0,                                  /* tp_dictoffset */
1421     mbidecoder_init,                    /* tp_init */
1422     0,                                  /* tp_alloc */
1423     mbidecoder_new,                     /* tp_new */
1424 };
1425 
1426 
1427 /*[clinic input]
1428  class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "MultibyteStreamReader_Type"
1429 [clinic start generated code]*/
1430 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d323634b74976f09]*/
1431 
1432 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1433 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1434                      const char *method, Py_ssize_t sizehint)
1435 {
1436     MultibyteDecodeBuffer buf;
1437     PyObject *cres, *res;
1438     Py_ssize_t rsize;
1439 
1440     if (sizehint == 0)
1441         return PyUnicode_New(0, 0);
1442 
1443     _PyUnicodeWriter_Init(&buf.writer);
1444     buf.excobj = NULL;
1445     cres = NULL;
1446 
1447     for (;;) {
1448         int endoffile;
1449 
1450         if (sizehint < 0)
1451             cres = PyObject_CallMethod(self->stream,
1452                             method, NULL);
1453         else
1454             cres = PyObject_CallMethod(self->stream,
1455                             method, "i", sizehint);
1456         if (cres == NULL)
1457             goto errorexit;
1458 
1459         if (!PyBytes_Check(cres)) {
1460             PyErr_Format(PyExc_TypeError,
1461                          "stream function returned a "
1462                          "non-bytes object (%.100s)",
1463                          cres->ob_type->tp_name);
1464             goto errorexit;
1465         }
1466 
1467         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1468 
1469         if (self->pendingsize > 0) {
1470             PyObject *ctr;
1471             char *ctrdata;
1472 
1473             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1474                 PyErr_NoMemory();
1475                 goto errorexit;
1476             }
1477             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1478             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1479             if (ctr == NULL)
1480                 goto errorexit;
1481             ctrdata = PyBytes_AS_STRING(ctr);
1482             memcpy(ctrdata, self->pending, self->pendingsize);
1483             memcpy(ctrdata + self->pendingsize,
1484                     PyBytes_AS_STRING(cres),
1485                     PyBytes_GET_SIZE(cres));
1486             Py_DECREF(cres);
1487             cres = ctr;
1488             self->pendingsize = 0;
1489         }
1490 
1491         rsize = PyBytes_GET_SIZE(cres);
1492         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1493                                    rsize) != 0)
1494             goto errorexit;
1495 
1496         if (rsize > 0 && decoder_feed_buffer(
1497                         (MultibyteStatefulDecoderContext *)self, &buf))
1498             goto errorexit;
1499 
1500         if (endoffile || sizehint < 0) {
1501             if (buf.inbuf < buf.inbuf_end &&
1502                 multibytecodec_decerror(self->codec, &self->state,
1503                             &buf, self->errors, MBERR_TOOFEW))
1504                 goto errorexit;
1505         }
1506 
1507         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1508             if (decoder_append_pending(STATEFUL_DCTX(self),
1509                                        &buf) != 0)
1510                 goto errorexit;
1511         }
1512 
1513         Py_DECREF(cres);
1514         cres = NULL;
1515 
1516         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1517             break;
1518 
1519         sizehint = 1; /* read 1 more byte and retry */
1520     }
1521 
1522     res = _PyUnicodeWriter_Finish(&buf.writer);
1523     if (res == NULL)
1524         goto errorexit;
1525 
1526     Py_XDECREF(cres);
1527     Py_XDECREF(buf.excobj);
1528     return res;
1529 
1530 errorexit:
1531     Py_XDECREF(cres);
1532     Py_XDECREF(buf.excobj);
1533     _PyUnicodeWriter_Dealloc(&buf.writer);
1534     return NULL;
1535 }
1536 
1537 /*[clinic input]
1538  _multibytecodec.MultibyteStreamReader.read
1539 
1540     sizeobj: object = None
1541     /
1542 [clinic start generated code]*/
1543 
1544 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1545 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1546                                                 PyObject *sizeobj)
1547 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1548 {
1549     Py_ssize_t size;
1550 
1551     if (sizeobj == Py_None)
1552         size = -1;
1553     else if (PyLong_Check(sizeobj))
1554         size = PyLong_AsSsize_t(sizeobj);
1555     else {
1556         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1557         return NULL;
1558     }
1559 
1560     if (size == -1 && PyErr_Occurred())
1561         return NULL;
1562 
1563     return mbstreamreader_iread(self, "read", size);
1564 }
1565 
1566 /*[clinic input]
1567  _multibytecodec.MultibyteStreamReader.readline
1568 
1569     sizeobj: object = None
1570     /
1571 [clinic start generated code]*/
1572 
1573 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1574 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1575                                                     PyObject *sizeobj)
1576 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1577 {
1578     Py_ssize_t size;
1579 
1580     if (sizeobj == Py_None)
1581         size = -1;
1582     else if (PyLong_Check(sizeobj))
1583         size = PyLong_AsSsize_t(sizeobj);
1584     else {
1585         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1586         return NULL;
1587     }
1588 
1589     if (size == -1 && PyErr_Occurred())
1590         return NULL;
1591 
1592     return mbstreamreader_iread(self, "readline", size);
1593 }
1594 
1595 /*[clinic input]
1596  _multibytecodec.MultibyteStreamReader.readlines
1597 
1598     sizehintobj: object = None
1599     /
1600 [clinic start generated code]*/
1601 
1602 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1603 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1604                                                      PyObject *sizehintobj)
1605 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1606 {
1607     PyObject *r, *sr;
1608     Py_ssize_t sizehint;
1609 
1610     if (sizehintobj == Py_None)
1611         sizehint = -1;
1612     else if (PyLong_Check(sizehintobj))
1613         sizehint = PyLong_AsSsize_t(sizehintobj);
1614     else {
1615         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1616         return NULL;
1617     }
1618 
1619     if (sizehint == -1 && PyErr_Occurred())
1620         return NULL;
1621 
1622     r = mbstreamreader_iread(self, "read", sizehint);
1623     if (r == NULL)
1624         return NULL;
1625 
1626     sr = PyUnicode_Splitlines(r, 1);
1627     Py_DECREF(r);
1628     return sr;
1629 }
1630 
1631 /*[clinic input]
1632  _multibytecodec.MultibyteStreamReader.reset
1633 [clinic start generated code]*/
1634 
1635 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1636 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1637 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1638 {
1639     if (self->codec->decreset != NULL &&
1640         self->codec->decreset(&self->state, self->codec->config) != 0)
1641         return NULL;
1642     self->pendingsize = 0;
1643 
1644     Py_RETURN_NONE;
1645 }
1646 
1647 static struct PyMethodDef mbstreamreader_methods[] = {
1648     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1649     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1650     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1651     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1652     {NULL,              NULL},
1653 };
1654 
1655 static PyMemberDef mbstreamreader_members[] = {
1656     {"stream",          T_OBJECT,
1657                     offsetof(MultibyteStreamReaderObject, stream),
1658                     READONLY, NULL},
1659     {NULL,}
1660 };
1661 
1662 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1663 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1664 {
1665     MultibyteStreamReaderObject *self;
1666     PyObject *stream, *codec = NULL;
1667     char *errors = NULL;
1668 
1669     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1670                             streamkwarglist, &stream, &errors))
1671         return NULL;
1672 
1673     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1674     if (self == NULL)
1675         return NULL;
1676 
1677     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1678     if (codec == NULL)
1679         goto errorexit;
1680     if (!MultibyteCodec_Check(codec)) {
1681         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1682         goto errorexit;
1683     }
1684 
1685     self->codec = ((MultibyteCodecObject *)codec)->codec;
1686     self->stream = stream;
1687     Py_INCREF(stream);
1688     self->pendingsize = 0;
1689     self->errors = internal_error_callback(errors);
1690     if (self->errors == NULL)
1691         goto errorexit;
1692     if (self->codec->decinit != NULL &&
1693         self->codec->decinit(&self->state, self->codec->config) != 0)
1694         goto errorexit;
1695 
1696     Py_DECREF(codec);
1697     return (PyObject *)self;
1698 
1699 errorexit:
1700     Py_XDECREF(self);
1701     Py_XDECREF(codec);
1702     return NULL;
1703 }
1704 
1705 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1706 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1707 {
1708     return 0;
1709 }
1710 
1711 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1712 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1713                         visitproc visit, void *arg)
1714 {
1715     if (ERROR_ISCUSTOM(self->errors))
1716         Py_VISIT(self->errors);
1717     Py_VISIT(self->stream);
1718     return 0;
1719 }
1720 
1721 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1722 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1723 {
1724     PyObject_GC_UnTrack(self);
1725     ERROR_DECREF(self->errors);
1726     Py_XDECREF(self->stream);
1727     Py_TYPE(self)->tp_free(self);
1728 }
1729 
1730 static PyTypeObject MultibyteStreamReader_Type = {
1731     PyVarObject_HEAD_INIT(NULL, 0)
1732     "MultibyteStreamReader",            /* tp_name */
1733     sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1734     0,                                  /* tp_itemsize */
1735     /*  methods  */
1736     (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1737     0,                                  /* tp_vectorcall_offset */
1738     0,                                  /* tp_getattr */
1739     0,                                  /* tp_setattr */
1740     0,                                  /* tp_as_async */
1741     0,                                  /* tp_repr */
1742     0,                                  /* tp_as_number */
1743     0,                                  /* tp_as_sequence */
1744     0,                                  /* tp_as_mapping */
1745     0,                                  /* tp_hash */
1746     0,                                  /* tp_call */
1747     0,                                  /* tp_str */
1748     PyObject_GenericGetAttr,            /* tp_getattro */
1749     0,                                  /* tp_setattro */
1750     0,                                  /* tp_as_buffer */
1751     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1752         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1753     0,                                  /* tp_doc */
1754     (traverseproc)mbstreamreader_traverse,      /* tp_traverse */
1755     0,                                  /* tp_clear */
1756     0,                                  /* tp_richcompare */
1757     0,                                  /* tp_weaklistoffset */
1758     0,                                  /* tp_iter */
1759     0,                                  /* tp_iterext */
1760     mbstreamreader_methods,             /* tp_methods */
1761     mbstreamreader_members,             /* tp_members */
1762     codecctx_getsets,                   /* tp_getset */
1763     0,                                  /* tp_base */
1764     0,                                  /* tp_dict */
1765     0,                                  /* tp_descr_get */
1766     0,                                  /* tp_descr_set */
1767     0,                                  /* tp_dictoffset */
1768     mbstreamreader_init,                /* tp_init */
1769     0,                                  /* tp_alloc */
1770     mbstreamreader_new,                 /* tp_new */
1771 };
1772 
1773 
1774 /*[clinic input]
1775  class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "&MultibyteStreamWriter_Type"
1776 [clinic start generated code]*/
1777 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=cde22780a215d6ac]*/
1778 
1779 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr)1780 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1781                       PyObject *unistr)
1782 {
1783     PyObject *str, *wr;
1784 
1785     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1786     if (str == NULL)
1787         return -1;
1788 
1789     wr = _PyObject_CallMethodIdObjArgs(self->stream, &PyId_write, str, NULL);
1790     Py_DECREF(str);
1791     if (wr == NULL)
1792         return -1;
1793 
1794     Py_DECREF(wr);
1795     return 0;
1796 }
1797 
1798 /*[clinic input]
1799  _multibytecodec.MultibyteStreamWriter.write
1800 
1801     strobj: object
1802     /
1803 [clinic start generated code]*/
1804 
1805 static PyObject *
_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject * self,PyObject * strobj)1806 _multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1807                                             PyObject *strobj)
1808 /*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
1809 {
1810     if (mbstreamwriter_iwrite(self, strobj))
1811         return NULL;
1812     else
1813         Py_RETURN_NONE;
1814 }
1815 
1816 /*[clinic input]
1817  _multibytecodec.MultibyteStreamWriter.writelines
1818 
1819     lines: object
1820     /
1821 [clinic start generated code]*/
1822 
1823 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject * self,PyObject * lines)1824 _multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1825                                                  PyObject *lines)
1826 /*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
1827 {
1828     PyObject *strobj;
1829     int i, r;
1830 
1831     if (!PySequence_Check(lines)) {
1832         PyErr_SetString(PyExc_TypeError,
1833                         "arg must be a sequence object");
1834         return NULL;
1835     }
1836 
1837     for (i = 0; i < PySequence_Length(lines); i++) {
1838         /* length can be changed even within this loop */
1839         strobj = PySequence_GetItem(lines, i);
1840         if (strobj == NULL)
1841             return NULL;
1842 
1843         r = mbstreamwriter_iwrite(self, strobj);
1844         Py_DECREF(strobj);
1845         if (r == -1)
1846             return NULL;
1847     }
1848     /* PySequence_Length() can fail */
1849     if (PyErr_Occurred())
1850         return NULL;
1851 
1852     Py_RETURN_NONE;
1853 }
1854 
1855 /*[clinic input]
1856  _multibytecodec.MultibyteStreamWriter.reset
1857 [clinic start generated code]*/
1858 
1859 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self)1860 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1861 /*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
1862 {
1863     PyObject *pwrt;
1864 
1865     if (!self->pending)
1866         Py_RETURN_NONE;
1867 
1868     pwrt = multibytecodec_encode(self->codec, &self->state,
1869                     self->pending, NULL, self->errors,
1870                     MBENC_FLUSH | MBENC_RESET);
1871     /* some pending buffer can be truncated when UnicodeEncodeError is
1872      * raised on 'strict' mode. but, 'reset' method is designed to
1873      * reset the pending buffer or states so failed string sequence
1874      * ought to be missed */
1875     Py_CLEAR(self->pending);
1876     if (pwrt == NULL)
1877         return NULL;
1878 
1879     assert(PyBytes_Check(pwrt));
1880     if (PyBytes_Size(pwrt) > 0) {
1881         PyObject *wr;
1882 
1883         wr = _PyObject_CallMethodIdObjArgs(self->stream, &PyId_write, pwrt);
1884         if (wr == NULL) {
1885             Py_DECREF(pwrt);
1886             return NULL;
1887         }
1888     }
1889     Py_DECREF(pwrt);
1890 
1891     Py_RETURN_NONE;
1892 }
1893 
1894 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1895 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1896 {
1897     MultibyteStreamWriterObject *self;
1898     PyObject *stream, *codec = NULL;
1899     char *errors = NULL;
1900 
1901     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1902                             streamkwarglist, &stream, &errors))
1903         return NULL;
1904 
1905     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1906     if (self == NULL)
1907         return NULL;
1908 
1909     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1910     if (codec == NULL)
1911         goto errorexit;
1912     if (!MultibyteCodec_Check(codec)) {
1913         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1914         goto errorexit;
1915     }
1916 
1917     self->codec = ((MultibyteCodecObject *)codec)->codec;
1918     self->stream = stream;
1919     Py_INCREF(stream);
1920     self->pending = NULL;
1921     self->errors = internal_error_callback(errors);
1922     if (self->errors == NULL)
1923         goto errorexit;
1924     if (self->codec->encinit != NULL &&
1925         self->codec->encinit(&self->state, self->codec->config) != 0)
1926         goto errorexit;
1927 
1928     Py_DECREF(codec);
1929     return (PyObject *)self;
1930 
1931 errorexit:
1932     Py_XDECREF(self);
1933     Py_XDECREF(codec);
1934     return NULL;
1935 }
1936 
1937 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1938 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1939 {
1940     return 0;
1941 }
1942 
1943 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1944 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1945                         visitproc visit, void *arg)
1946 {
1947     if (ERROR_ISCUSTOM(self->errors))
1948         Py_VISIT(self->errors);
1949     Py_VISIT(self->stream);
1950     return 0;
1951 }
1952 
1953 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1954 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1955 {
1956     PyObject_GC_UnTrack(self);
1957     ERROR_DECREF(self->errors);
1958     Py_XDECREF(self->stream);
1959     Py_TYPE(self)->tp_free(self);
1960 }
1961 
1962 static struct PyMethodDef mbstreamwriter_methods[] = {
1963     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1964     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1965     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1966     {NULL, NULL},
1967 };
1968 
1969 static PyMemberDef mbstreamwriter_members[] = {
1970     {"stream",          T_OBJECT,
1971                     offsetof(MultibyteStreamWriterObject, stream),
1972                     READONLY, NULL},
1973     {NULL,}
1974 };
1975 
1976 static PyTypeObject MultibyteStreamWriter_Type = {
1977     PyVarObject_HEAD_INIT(NULL, 0)
1978     "MultibyteStreamWriter",            /* tp_name */
1979     sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1980     0,                                  /* tp_itemsize */
1981     /*  methods  */
1982     (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1983     0,                                  /* tp_vectorcall_offset */
1984     0,                                  /* tp_getattr */
1985     0,                                  /* tp_setattr */
1986     0,                                  /* tp_as_async */
1987     0,                                  /* tp_repr */
1988     0,                                  /* tp_as_number */
1989     0,                                  /* tp_as_sequence */
1990     0,                                  /* tp_as_mapping */
1991     0,                                  /* tp_hash */
1992     0,                                  /* tp_call */
1993     0,                                  /* tp_str */
1994     PyObject_GenericGetAttr,            /* tp_getattro */
1995     0,                                  /* tp_setattro */
1996     0,                                  /* tp_as_buffer */
1997     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1998         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1999     0,                                  /* tp_doc */
2000     (traverseproc)mbstreamwriter_traverse,      /* tp_traverse */
2001     0,                                  /* tp_clear */
2002     0,                                  /* tp_richcompare */
2003     0,                                  /* tp_weaklistoffset */
2004     0,                                  /* tp_iter */
2005     0,                                  /* tp_iterext */
2006     mbstreamwriter_methods,             /* tp_methods */
2007     mbstreamwriter_members,             /* tp_members */
2008     codecctx_getsets,                   /* tp_getset */
2009     0,                                  /* tp_base */
2010     0,                                  /* tp_dict */
2011     0,                                  /* tp_descr_get */
2012     0,                                  /* tp_descr_set */
2013     0,                                  /* tp_dictoffset */
2014     mbstreamwriter_init,                /* tp_init */
2015     0,                                  /* tp_alloc */
2016     mbstreamwriter_new,                 /* tp_new */
2017 };
2018 
2019 
2020 /*[clinic input]
2021 _multibytecodec.__create_codec
2022 
2023     arg: object
2024     /
2025 [clinic start generated code]*/
2026 
2027 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)2028 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
2029 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
2030 {
2031     MultibyteCodecObject *self;
2032     MultibyteCodec *codec;
2033 
2034     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
2035         PyErr_SetString(PyExc_ValueError, "argument type invalid");
2036         return NULL;
2037     }
2038 
2039     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
2040     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
2041         return NULL;
2042 
2043     self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
2044     if (self == NULL)
2045         return NULL;
2046     self->codec = codec;
2047 
2048     return (PyObject *)self;
2049 }
2050 
2051 static struct PyMethodDef __methods[] = {
2052     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2053     {NULL, NULL},
2054 };
2055 
2056 
2057 static struct PyModuleDef _multibytecodecmodule = {
2058     PyModuleDef_HEAD_INIT,
2059     "_multibytecodec",
2060     NULL,
2061     -1,
2062     __methods,
2063     NULL,
2064     NULL,
2065     NULL,
2066     NULL
2067 };
2068 
2069 PyMODINIT_FUNC
PyInit__multibytecodec(void)2070 PyInit__multibytecodec(void)
2071 {
2072     int i;
2073     PyObject *m;
2074     PyTypeObject *typelist[] = {
2075         &MultibyteIncrementalEncoder_Type,
2076         &MultibyteIncrementalDecoder_Type,
2077         &MultibyteStreamReader_Type,
2078         &MultibyteStreamWriter_Type,
2079         NULL
2080     };
2081 
2082     if (PyType_Ready(&MultibyteCodec_Type) < 0)
2083         return NULL;
2084 
2085     m = PyModule_Create(&_multibytecodecmodule);
2086     if (m == NULL)
2087         return NULL;
2088 
2089     for (i = 0; typelist[i] != NULL; i++) {
2090         if (PyType_Ready(typelist[i]) < 0)
2091             return NULL;
2092         Py_INCREF(typelist[i]);
2093         PyModule_AddObject(m, typelist[i]->tp_name,
2094                            (PyObject *)typelist[i]);
2095     }
2096 
2097     if (PyErr_Occurred()) {
2098         Py_FatalError("can't initialize the _multibytecodec module");
2099         Py_DECREF(m);
2100         m = NULL;
2101     }
2102     return m;
2103 }
2104