1 /*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10
11 #include "Python.h"
12
13 #include "multibytecodec.h"
14 #include "clinic/multibytecodec.c.h"
15
16 #include <stddef.h> // offsetof()
17
18 #define MODULE_NAME "_multibytecodec"
19
20 typedef struct {
21 PyTypeObject *encoder_type;
22 PyTypeObject *decoder_type;
23 PyTypeObject *reader_type;
24 PyTypeObject *writer_type;
25 PyTypeObject *multibytecodec_type;
26 PyObject *str_write;
27 } module_state;
28
29 static module_state *
get_module_state(PyObject * module)30 get_module_state(PyObject *module)
31 {
32 module_state *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return state;
35 }
36
37 static struct PyModuleDef _multibytecodecmodule;
38
39 static module_state *
find_state_by_def(PyTypeObject * type)40 find_state_by_def(PyTypeObject *type)
41 {
42 PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
43 assert(module != NULL);
44 return get_module_state(module);
45 }
46
47 #define clinic_get_state() find_state_by_def(type)
48 /*[clinic input]
49 module _multibytecodec
50 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
51 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
52 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
53 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
54 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
55 [clinic start generated code]*/
56 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
57 #undef clinic_get_state
58
59 typedef struct {
60 PyObject *inobj;
61 Py_ssize_t inpos, inlen;
62 unsigned char *outbuf, *outbuf_end;
63 PyObject *excobj, *outobj;
64 } MultibyteEncodeBuffer;
65
66 typedef struct {
67 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
68 PyObject *excobj;
69 _PyUnicodeWriter writer;
70 } MultibyteDecodeBuffer;
71
72 static char *incnewkwarglist[] = {"errors", NULL};
73 static char *streamkwarglist[] = {"stream", "errors", NULL};
74
75 static PyObject *multibytecodec_encode(const MultibyteCodec *,
76 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
77 PyObject *, int);
78
79 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
80
81 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)82 make_tuple(PyObject *object, Py_ssize_t len)
83 {
84 PyObject *v, *w;
85
86 if (object == NULL)
87 return NULL;
88
89 v = PyTuple_New(2);
90 if (v == NULL) {
91 Py_DECREF(object);
92 return NULL;
93 }
94 PyTuple_SET_ITEM(v, 0, object);
95
96 w = PyLong_FromSsize_t(len);
97 if (w == NULL) {
98 Py_DECREF(v);
99 return NULL;
100 }
101 PyTuple_SET_ITEM(v, 1, w);
102
103 return v;
104 }
105
106 static PyObject *
internal_error_callback(const char * errors)107 internal_error_callback(const char *errors)
108 {
109 if (errors == NULL || strcmp(errors, "strict") == 0)
110 return ERROR_STRICT;
111 else if (strcmp(errors, "ignore") == 0)
112 return ERROR_IGNORE;
113 else if (strcmp(errors, "replace") == 0)
114 return ERROR_REPLACE;
115 else
116 return PyUnicode_FromString(errors);
117 }
118
119 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)120 call_error_callback(PyObject *errors, PyObject *exc)
121 {
122 PyObject *cb, *r;
123 const char *str;
124
125 assert(PyUnicode_Check(errors));
126 str = PyUnicode_AsUTF8(errors);
127 if (str == NULL)
128 return NULL;
129 cb = PyCodec_LookupError(str);
130 if (cb == NULL)
131 return NULL;
132
133 r = PyObject_CallOneArg(cb, exc);
134 Py_DECREF(cb);
135 return r;
136 }
137
138 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))139 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
140 {
141 const char *errors;
142
143 if (self->errors == ERROR_STRICT)
144 errors = "strict";
145 else if (self->errors == ERROR_IGNORE)
146 errors = "ignore";
147 else if (self->errors == ERROR_REPLACE)
148 errors = "replace";
149 else {
150 return Py_NewRef(self->errors);
151 }
152
153 return PyUnicode_FromString(errors);
154 }
155
156 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)157 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
158 void *closure)
159 {
160 PyObject *cb;
161 const char *str;
162
163 if (value == NULL) {
164 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
165 return -1;
166 }
167 if (!PyUnicode_Check(value)) {
168 PyErr_SetString(PyExc_TypeError, "errors must be a string");
169 return -1;
170 }
171
172 str = PyUnicode_AsUTF8(value);
173 if (str == NULL)
174 return -1;
175
176 cb = internal_error_callback(str);
177 if (cb == NULL)
178 return -1;
179
180 ERROR_DECREF(self->errors);
181 self->errors = cb;
182 return 0;
183 }
184
185 /* This getset handlers list is used by all the stateful codec objects */
186 static PyGetSetDef codecctx_getsets[] = {
187 {"errors", (getter)codecctx_errors_get,
188 (setter)codecctx_errors_set,
189 PyDoc_STR("how to treat errors")},
190 {NULL,}
191 };
192
193 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)194 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
195 {
196 Py_ssize_t orgpos, orgsize, incsize;
197
198 orgpos = (Py_ssize_t)((char *)buf->outbuf -
199 PyBytes_AS_STRING(buf->outobj));
200 orgsize = PyBytes_GET_SIZE(buf->outobj);
201 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
202
203 if (orgsize > PY_SSIZE_T_MAX - incsize) {
204 PyErr_NoMemory();
205 return -1;
206 }
207
208 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
209 return -1;
210
211 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
212 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
213 + PyBytes_GET_SIZE(buf->outobj);
214
215 return 0;
216 }
217 #define REQUIRE_ENCODEBUFFER(buf, s) do { \
218 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
219 if (expand_encodebuffer(buf, s) == -1) \
220 goto errorexit; \
221 } while(0)
222
223
224 /**
225 * MultibyteCodec object
226 */
227
228 static int
multibytecodec_encerror(const MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)229 multibytecodec_encerror(const MultibyteCodec *codec,
230 MultibyteCodec_State *state,
231 MultibyteEncodeBuffer *buf,
232 PyObject *errors, Py_ssize_t e)
233 {
234 PyObject *retobj = NULL, *retstr = NULL, *tobj;
235 Py_ssize_t retstrsize, newpos;
236 Py_ssize_t esize, start, end;
237 const char *reason;
238
239 if (e > 0) {
240 reason = "illegal multibyte sequence";
241 esize = e;
242 }
243 else {
244 switch (e) {
245 case MBERR_TOOSMALL:
246 REQUIRE_ENCODEBUFFER(buf, -1);
247 return 0; /* retry it */
248 case MBERR_TOOFEW:
249 reason = "incomplete multibyte sequence";
250 esize = (Py_ssize_t)buf->inpos;
251 break;
252 case MBERR_INTERNAL:
253 PyErr_SetString(PyExc_RuntimeError,
254 "internal codec error");
255 return -1;
256 default:
257 PyErr_SetString(PyExc_RuntimeError,
258 "unknown runtime error");
259 return -1;
260 }
261 }
262
263 if (errors == ERROR_REPLACE) {
264 PyObject *replchar;
265 Py_ssize_t r;
266 Py_ssize_t inpos;
267 int kind;
268 const void *data;
269
270 replchar = PyUnicode_FromOrdinal('?');
271 if (replchar == NULL)
272 goto errorexit;
273 kind = PyUnicode_KIND(replchar);
274 data = PyUnicode_DATA(replchar);
275
276 inpos = 0;
277 for (;;) {
278 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
279
280 r = codec->encode(state, codec,
281 kind, data, &inpos, 1,
282 &buf->outbuf, outleft, 0);
283 if (r == MBERR_TOOSMALL) {
284 REQUIRE_ENCODEBUFFER(buf, -1);
285 continue;
286 }
287 else
288 break;
289 }
290
291 Py_DECREF(replchar);
292
293 if (r != 0) {
294 REQUIRE_ENCODEBUFFER(buf, 1);
295 *buf->outbuf++ = '?';
296 }
297 }
298 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
299 buf->inpos += esize;
300 return 0;
301 }
302
303 start = (Py_ssize_t)buf->inpos;
304 end = start + esize;
305
306 /* use cached exception object if available */
307 if (buf->excobj == NULL) {
308 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
309 "sOnns",
310 codec->encoding, buf->inobj,
311 start, end, reason);
312 if (buf->excobj == NULL)
313 goto errorexit;
314 }
315 else
316 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
317 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
318 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
319 goto errorexit;
320
321 if (errors == ERROR_STRICT) {
322 PyCodec_StrictErrors(buf->excobj);
323 goto errorexit;
324 }
325
326 retobj = call_error_callback(errors, buf->excobj);
327 if (retobj == NULL)
328 goto errorexit;
329
330 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
331 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
332 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
333 PyErr_SetString(PyExc_TypeError,
334 "encoding error handler must return "
335 "(str, int) tuple");
336 goto errorexit;
337 }
338
339 if (PyUnicode_Check(tobj)) {
340 Py_ssize_t inpos;
341
342 retstr = multibytecodec_encode(codec, state, tobj,
343 &inpos, ERROR_STRICT,
344 MBENC_FLUSH);
345 if (retstr == NULL)
346 goto errorexit;
347 }
348 else {
349 retstr = Py_NewRef(tobj);
350 }
351
352 assert(PyBytes_Check(retstr));
353 retstrsize = PyBytes_GET_SIZE(retstr);
354 if (retstrsize > 0) {
355 REQUIRE_ENCODEBUFFER(buf, retstrsize);
356 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
357 buf->outbuf += retstrsize;
358 }
359
360 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
361 if (newpos < 0 && !PyErr_Occurred())
362 newpos += (Py_ssize_t)buf->inlen;
363 if (newpos < 0 || newpos > buf->inlen) {
364 PyErr_Clear();
365 PyErr_Format(PyExc_IndexError,
366 "position %zd from error handler out of bounds",
367 newpos);
368 goto errorexit;
369 }
370 buf->inpos = newpos;
371
372 Py_DECREF(retobj);
373 Py_DECREF(retstr);
374 return 0;
375
376 errorexit:
377 Py_XDECREF(retobj);
378 Py_XDECREF(retstr);
379 return -1;
380 }
381
382 static int
multibytecodec_decerror(const MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)383 multibytecodec_decerror(const MultibyteCodec *codec,
384 MultibyteCodec_State *state,
385 MultibyteDecodeBuffer *buf,
386 PyObject *errors, Py_ssize_t e)
387 {
388 PyObject *retobj = NULL, *retuni = NULL;
389 Py_ssize_t newpos;
390 const char *reason;
391 Py_ssize_t esize, start, end;
392
393 if (e > 0) {
394 reason = "illegal multibyte sequence";
395 esize = e;
396 }
397 else {
398 switch (e) {
399 case MBERR_TOOSMALL:
400 return 0; /* retry it */
401 case MBERR_TOOFEW:
402 reason = "incomplete multibyte sequence";
403 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
404 break;
405 case MBERR_INTERNAL:
406 PyErr_SetString(PyExc_RuntimeError,
407 "internal codec error");
408 return -1;
409 case MBERR_EXCEPTION:
410 return -1;
411 default:
412 PyErr_SetString(PyExc_RuntimeError,
413 "unknown runtime error");
414 return -1;
415 }
416 }
417
418 if (errors == ERROR_REPLACE) {
419 if (_PyUnicodeWriter_WriteChar(&buf->writer,
420 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
421 goto errorexit;
422 }
423 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
424 buf->inbuf += esize;
425 return 0;
426 }
427
428 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
429 end = start + esize;
430
431 /* use cached exception object if available */
432 if (buf->excobj == NULL) {
433 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
434 (const char *)buf->inbuf_top,
435 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
436 start, end, reason);
437 if (buf->excobj == NULL)
438 goto errorexit;
439 }
440 else
441 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
442 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
443 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
444 goto errorexit;
445
446 if (errors == ERROR_STRICT) {
447 PyCodec_StrictErrors(buf->excobj);
448 goto errorexit;
449 }
450
451 retobj = call_error_callback(errors, buf->excobj);
452 if (retobj == NULL)
453 goto errorexit;
454
455 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
456 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
457 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
458 PyErr_SetString(PyExc_TypeError,
459 "decoding error handler must return "
460 "(str, int) tuple");
461 goto errorexit;
462 }
463
464 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
465 goto errorexit;
466
467 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
468 if (newpos < 0 && !PyErr_Occurred())
469 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
470 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
471 PyErr_Clear();
472 PyErr_Format(PyExc_IndexError,
473 "position %zd from error handler out of bounds",
474 newpos);
475 goto errorexit;
476 }
477 buf->inbuf = buf->inbuf_top + newpos;
478 Py_DECREF(retobj);
479 return 0;
480
481 errorexit:
482 Py_XDECREF(retobj);
483 return -1;
484 }
485
486 static PyObject *
multibytecodec_encode(const MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)487 multibytecodec_encode(const MultibyteCodec *codec,
488 MultibyteCodec_State *state,
489 PyObject *text, Py_ssize_t *inpos_t,
490 PyObject *errors, int flags)
491 {
492 MultibyteEncodeBuffer buf;
493 Py_ssize_t finalsize, r = 0;
494 Py_ssize_t datalen;
495 int kind;
496 const void *data;
497
498 datalen = PyUnicode_GET_LENGTH(text);
499
500 if (datalen == 0 && !(flags & MBENC_RESET))
501 return PyBytes_FromStringAndSize(NULL, 0);
502
503 buf.excobj = NULL;
504 buf.outobj = NULL;
505 buf.inobj = text; /* borrowed reference */
506 buf.inpos = 0;
507 buf.inlen = datalen;
508 kind = PyUnicode_KIND(buf.inobj);
509 data = PyUnicode_DATA(buf.inobj);
510
511 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
512 PyErr_NoMemory();
513 goto errorexit;
514 }
515
516 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
517 if (buf.outobj == NULL)
518 goto errorexit;
519 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
520 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
521
522 while (buf.inpos < buf.inlen) {
523 /* we don't reuse inleft and outleft here.
524 * error callbacks can relocate the cursor anywhere on buffer*/
525 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
526
527 r = codec->encode(state, codec,
528 kind, data,
529 &buf.inpos, buf.inlen,
530 &buf.outbuf, outleft, flags);
531 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
532 break;
533 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
534 goto errorexit;
535 else if (r == MBERR_TOOFEW)
536 break;
537 }
538
539 if (codec->encreset != NULL && (flags & MBENC_RESET))
540 for (;;) {
541 Py_ssize_t outleft;
542
543 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
544 r = codec->encreset(state, codec, &buf.outbuf,
545 outleft);
546 if (r == 0)
547 break;
548 else if (multibytecodec_encerror(codec, state,
549 &buf, errors, r))
550 goto errorexit;
551 }
552
553 finalsize = (Py_ssize_t)((char *)buf.outbuf -
554 PyBytes_AS_STRING(buf.outobj));
555
556 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
557 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
558 goto errorexit;
559
560 if (inpos_t)
561 *inpos_t = buf.inpos;
562 Py_XDECREF(buf.excobj);
563 return buf.outobj;
564
565 errorexit:
566 Py_XDECREF(buf.excobj);
567 Py_XDECREF(buf.outobj);
568 return NULL;
569 }
570
571 /*[clinic input]
572 _multibytecodec.MultibyteCodec.encode
573
574 input: object
575 errors: str(accept={str, NoneType}) = None
576
577 Return an encoded string version of `input'.
578
579 'errors' may be given to set a different error handling scheme. Default is
580 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
581 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
582 registered with codecs.register_error that can handle UnicodeEncodeErrors.
583 [clinic start generated code]*/
584
585 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)586 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
587 PyObject *input,
588 const char *errors)
589 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
590 {
591 MultibyteCodec_State state;
592 PyObject *errorcb, *r, *ucvt;
593 Py_ssize_t datalen;
594
595 if (PyUnicode_Check(input))
596 ucvt = NULL;
597 else {
598 input = ucvt = PyObject_Str(input);
599 if (input == NULL)
600 return NULL;
601 else if (!PyUnicode_Check(input)) {
602 PyErr_SetString(PyExc_TypeError,
603 "couldn't convert the object to unicode.");
604 Py_DECREF(ucvt);
605 return NULL;
606 }
607 }
608
609 datalen = PyUnicode_GET_LENGTH(input);
610
611 errorcb = internal_error_callback(errors);
612 if (errorcb == NULL) {
613 Py_XDECREF(ucvt);
614 return NULL;
615 }
616
617 if (self->codec->encinit != NULL &&
618 self->codec->encinit(&state, self->codec) != 0)
619 goto errorexit;
620 r = multibytecodec_encode(self->codec, &state,
621 input, NULL, errorcb,
622 MBENC_FLUSH | MBENC_RESET);
623 if (r == NULL)
624 goto errorexit;
625
626 ERROR_DECREF(errorcb);
627 Py_XDECREF(ucvt);
628 return make_tuple(r, datalen);
629
630 errorexit:
631 ERROR_DECREF(errorcb);
632 Py_XDECREF(ucvt);
633 return NULL;
634 }
635
636 /*[clinic input]
637 _multibytecodec.MultibyteCodec.decode
638
639 input: Py_buffer
640 errors: str(accept={str, NoneType}) = None
641
642 Decodes 'input'.
643
644 'errors' may be given to set a different error handling scheme. Default is
645 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
646 values are 'ignore' and 'replace' as well as any other name registered with
647 codecs.register_error that is able to handle UnicodeDecodeErrors."
648 [clinic start generated code]*/
649
650 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)651 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
652 Py_buffer *input,
653 const char *errors)
654 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
655 {
656 MultibyteCodec_State state;
657 MultibyteDecodeBuffer buf;
658 PyObject *errorcb, *res;
659 const char *data;
660 Py_ssize_t datalen;
661
662 data = input->buf;
663 datalen = input->len;
664
665 errorcb = internal_error_callback(errors);
666 if (errorcb == NULL) {
667 return NULL;
668 }
669
670 if (datalen == 0) {
671 ERROR_DECREF(errorcb);
672 return make_tuple(PyUnicode_New(0, 0), 0);
673 }
674
675 _PyUnicodeWriter_Init(&buf.writer);
676 buf.writer.min_length = datalen;
677 buf.excobj = NULL;
678 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
679 buf.inbuf_end = buf.inbuf_top + datalen;
680
681 if (self->codec->decinit != NULL &&
682 self->codec->decinit(&state, self->codec) != 0)
683 goto errorexit;
684
685 while (buf.inbuf < buf.inbuf_end) {
686 Py_ssize_t inleft, r;
687
688 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
689
690 r = self->codec->decode(&state, self->codec,
691 &buf.inbuf, inleft, &buf.writer);
692 if (r == 0)
693 break;
694 else if (multibytecodec_decerror(self->codec, &state,
695 &buf, errorcb, r))
696 goto errorexit;
697 }
698
699 res = _PyUnicodeWriter_Finish(&buf.writer);
700 if (res == NULL)
701 goto errorexit;
702
703 Py_XDECREF(buf.excobj);
704 ERROR_DECREF(errorcb);
705 return make_tuple(res, datalen);
706
707 errorexit:
708 ERROR_DECREF(errorcb);
709 Py_XDECREF(buf.excobj);
710 _PyUnicodeWriter_Dealloc(&buf.writer);
711
712 return NULL;
713 }
714
715 static struct PyMethodDef multibytecodec_methods[] = {
716 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
717 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
718 {NULL, NULL},
719 };
720
721 static int
multibytecodec_clear(MultibyteCodecObject * self)722 multibytecodec_clear(MultibyteCodecObject *self)
723 {
724 Py_CLEAR(self->cjk_module);
725 return 0;
726 }
727
728 static int
multibytecodec_traverse(MultibyteCodecObject * self,visitproc visit,void * arg)729 multibytecodec_traverse(MultibyteCodecObject *self, visitproc visit, void *arg)
730 {
731 Py_VISIT(Py_TYPE(self));
732 Py_VISIT(self->cjk_module);
733 return 0;
734 }
735
736 static void
multibytecodec_dealloc(MultibyteCodecObject * self)737 multibytecodec_dealloc(MultibyteCodecObject *self)
738 {
739 PyObject_GC_UnTrack(self);
740 PyTypeObject *tp = Py_TYPE(self);
741 (void)multibytecodec_clear(self);
742 tp->tp_free(self);
743 Py_DECREF(tp);
744 }
745
746 static PyType_Slot multibytecodec_slots[] = {
747 {Py_tp_dealloc, multibytecodec_dealloc},
748 {Py_tp_getattro, PyObject_GenericGetAttr},
749 {Py_tp_methods, multibytecodec_methods},
750 {Py_tp_traverse, multibytecodec_traverse},
751 {Py_tp_clear, multibytecodec_clear},
752 {0, NULL},
753 };
754
755 static PyType_Spec multibytecodec_spec = {
756 .name = MODULE_NAME ".MultibyteCodec",
757 .basicsize = sizeof(MultibyteCodecObject),
758 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
759 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
760 .slots = multibytecodec_slots,
761 };
762
763
764 /**
765 * Utility functions for stateful codec mechanism
766 */
767
768 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
769 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
770
771 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)772 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
773 PyObject *unistr, int final)
774 {
775 PyObject *ucvt, *r = NULL;
776 PyObject *inbuf = NULL;
777 Py_ssize_t inpos, datalen;
778 PyObject *origpending = NULL;
779
780 if (PyUnicode_Check(unistr))
781 ucvt = NULL;
782 else {
783 unistr = ucvt = PyObject_Str(unistr);
784 if (unistr == NULL)
785 return NULL;
786 else if (!PyUnicode_Check(unistr)) {
787 PyErr_SetString(PyExc_TypeError,
788 "couldn't convert the object to str.");
789 Py_DECREF(ucvt);
790 return NULL;
791 }
792 }
793
794 if (ctx->pending) {
795 PyObject *inbuf_tmp;
796
797 origpending = Py_NewRef(ctx->pending);
798
799 inbuf_tmp = Py_NewRef(ctx->pending);
800 PyUnicode_Append(&inbuf_tmp, unistr);
801 if (inbuf_tmp == NULL)
802 goto errorexit;
803 Py_CLEAR(ctx->pending);
804 inbuf = inbuf_tmp;
805 }
806 else {
807 origpending = NULL;
808
809 inbuf = Py_NewRef(unistr);
810 }
811 inpos = 0;
812 datalen = PyUnicode_GET_LENGTH(inbuf);
813
814 r = multibytecodec_encode(ctx->codec, &ctx->state,
815 inbuf, &inpos,
816 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
817 if (r == NULL) {
818 /* recover the original pending buffer */
819 Py_XSETREF(ctx->pending, origpending);
820 origpending = NULL;
821 goto errorexit;
822 }
823 Py_XDECREF(origpending);
824
825 if (inpos < datalen) {
826 if (datalen - inpos > MAXENCPENDING) {
827 /* normal codecs can't reach here */
828 PyObject *excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
829 "sOnns",
830 ctx->codec->encoding,
831 inbuf,
832 inpos, datalen,
833 "pending buffer overflow");
834 if (excobj == NULL) goto errorexit;
835 PyErr_SetObject(PyExc_UnicodeEncodeError, excobj);
836 Py_DECREF(excobj);
837 goto errorexit;
838 }
839 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
840 if (ctx->pending == NULL) {
841 /* normal codecs can't reach here */
842 goto errorexit;
843 }
844 }
845
846 Py_DECREF(inbuf);
847 Py_XDECREF(ucvt);
848 return r;
849
850 errorexit:
851 Py_XDECREF(r);
852 Py_XDECREF(ucvt);
853 Py_XDECREF(origpending);
854 Py_XDECREF(inbuf);
855 return NULL;
856 }
857
858 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)859 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
860 MultibyteDecodeBuffer *buf)
861 {
862 Py_ssize_t npendings;
863
864 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
865 if (npendings + ctx->pendingsize > MAXDECPENDING ||
866 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
867 Py_ssize_t bufsize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
868 PyObject *excobj = PyUnicodeDecodeError_Create(ctx->codec->encoding,
869 (const char *)buf->inbuf_top,
870 bufsize,
871 0,
872 bufsize,
873 "pending buffer overflow");
874 if (excobj == NULL) return -1;
875 PyErr_SetObject(PyExc_UnicodeDecodeError, excobj);
876 Py_DECREF(excobj);
877 return -1;
878 }
879 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
880 ctx->pendingsize += npendings;
881 return 0;
882 }
883
884 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)885 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
886 Py_ssize_t size)
887 {
888 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
889 buf->inbuf_end = buf->inbuf_top + size;
890 buf->writer.min_length += size;
891 return 0;
892 }
893
894 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)895 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
896 MultibyteDecodeBuffer *buf)
897 {
898 while (buf->inbuf < buf->inbuf_end) {
899 Py_ssize_t inleft;
900 Py_ssize_t r;
901
902 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
903
904 r = ctx->codec->decode(&ctx->state, ctx->codec,
905 &buf->inbuf, inleft, &buf->writer);
906 if (r == 0 || r == MBERR_TOOFEW)
907 break;
908 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
909 buf, ctx->errors, r))
910 return -1;
911 }
912 return 0;
913 }
914
915
916 /*[clinic input]
917 _multibytecodec.MultibyteIncrementalEncoder.encode
918
919 input: object
920 final: bool = False
921 [clinic start generated code]*/
922
923 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)924 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
925 PyObject *input,
926 int final)
927 /*[clinic end generated code: output=123361b6c505e2c1 input=bd5f7d40d43e99b0]*/
928 {
929 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
930 }
931
932 /*[clinic input]
933 _multibytecodec.MultibyteIncrementalEncoder.getstate
934 [clinic start generated code]*/
935
936 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)937 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
938 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
939 {
940 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
941 for UTF-8 encoded buffer (each character can use up to 4
942 bytes), and required bytes for MultibyteCodec_State.c. A byte
943 array is used to avoid different compilers generating different
944 values for the same state, e.g. as a result of struct padding.
945 */
946 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
947 Py_ssize_t statesize;
948 const char *pendingbuffer = NULL;
949 Py_ssize_t pendingsize;
950
951 if (self->pending != NULL) {
952 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
953 if (pendingbuffer == NULL) {
954 return NULL;
955 }
956 if (pendingsize > MAXENCPENDING*4) {
957 PyObject *excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
958 "sOnns",
959 self->codec->encoding,
960 self->pending,
961 0, PyUnicode_GET_LENGTH(self->pending),
962 "pending buffer too large");
963 if (excobj == NULL) {
964 return NULL;
965 }
966 PyErr_SetObject(PyExc_UnicodeEncodeError, excobj);
967 Py_DECREF(excobj);
968 return NULL;
969 }
970 statebytes[0] = (unsigned char)pendingsize;
971 memcpy(statebytes + 1, pendingbuffer, pendingsize);
972 statesize = 1 + pendingsize;
973 } else {
974 statebytes[0] = 0;
975 statesize = 1;
976 }
977 memcpy(statebytes+statesize, self->state.c,
978 sizeof(self->state.c));
979 statesize += sizeof(self->state.c);
980
981 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
982 1 /* little-endian */ ,
983 0 /* unsigned */ );
984 }
985
986 /*[clinic input]
987 _multibytecodec.MultibyteIncrementalEncoder.setstate
988 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
989 /
990 [clinic start generated code]*/
991
992 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)993 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
994 PyLongObject *statelong)
995 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
996 {
997 PyObject *pending = NULL;
998 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
999
1000 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1001 1 /* little-endian */ ,
1002 0 /* unsigned */ ,
1003 1 /* with_exceptions */) < 0) {
1004 goto errorexit;
1005 }
1006
1007 if (statebytes[0] > MAXENCPENDING*4) {
1008 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1009 return NULL;
1010 }
1011
1012 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
1013 statebytes[0], "strict");
1014 if (pending == NULL) {
1015 goto errorexit;
1016 }
1017
1018 Py_XSETREF(self->pending, pending);
1019 memcpy(self->state.c, statebytes+1+statebytes[0],
1020 sizeof(self->state.c));
1021
1022 Py_RETURN_NONE;
1023
1024 errorexit:
1025 Py_XDECREF(pending);
1026 return NULL;
1027 }
1028
1029 /*[clinic input]
1030 _multibytecodec.MultibyteIncrementalEncoder.reset
1031 [clinic start generated code]*/
1032
1033 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1034 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1035 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1036 {
1037 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1038 unsigned char buffer[4], *outbuf;
1039 Py_ssize_t r;
1040 if (self->codec->encreset != NULL) {
1041 outbuf = buffer;
1042 r = self->codec->encreset(&self->state, self->codec,
1043 &outbuf, sizeof(buffer));
1044 if (r != 0)
1045 return NULL;
1046 }
1047 Py_CLEAR(self->pending);
1048 Py_RETURN_NONE;
1049 }
1050
1051 static struct PyMethodDef mbiencoder_methods[] = {
1052 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1053 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1054 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1055 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1056 {NULL, NULL},
1057 };
1058
1059 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1060 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1061 {
1062 MultibyteIncrementalEncoderObject *self;
1063 PyObject *codec = NULL;
1064 char *errors = NULL;
1065
1066 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1067 incnewkwarglist, &errors))
1068 return NULL;
1069
1070 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1071 if (self == NULL)
1072 return NULL;
1073
1074 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1075 if (codec == NULL)
1076 goto errorexit;
1077
1078 module_state *state = find_state_by_def(type);
1079 if (!MultibyteCodec_Check(state, codec)) {
1080 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1081 goto errorexit;
1082 }
1083
1084 self->codec = ((MultibyteCodecObject *)codec)->codec;
1085 self->pending = NULL;
1086 self->errors = internal_error_callback(errors);
1087 if (self->errors == NULL)
1088 goto errorexit;
1089 if (self->codec->encinit != NULL &&
1090 self->codec->encinit(&self->state, self->codec) != 0)
1091 goto errorexit;
1092
1093 Py_DECREF(codec);
1094 return (PyObject *)self;
1095
1096 errorexit:
1097 Py_XDECREF(self);
1098 Py_XDECREF(codec);
1099 return NULL;
1100 }
1101
1102 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1103 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1104 {
1105 return 0;
1106 }
1107
1108 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1109 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1110 visitproc visit, void *arg)
1111 {
1112 if (ERROR_ISCUSTOM(self->errors))
1113 Py_VISIT(self->errors);
1114 return 0;
1115 }
1116
1117 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1118 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1119 {
1120 PyTypeObject *tp = Py_TYPE(self);
1121 PyObject_GC_UnTrack(self);
1122 ERROR_DECREF(self->errors);
1123 Py_CLEAR(self->pending);
1124 tp->tp_free(self);
1125 Py_DECREF(tp);
1126 }
1127
1128 static PyType_Slot encoder_slots[] = {
1129 {Py_tp_dealloc, mbiencoder_dealloc},
1130 {Py_tp_getattro, PyObject_GenericGetAttr},
1131 {Py_tp_traverse, mbiencoder_traverse},
1132 {Py_tp_methods, mbiencoder_methods},
1133 {Py_tp_getset, codecctx_getsets},
1134 {Py_tp_init, mbiencoder_init},
1135 {Py_tp_new, mbiencoder_new},
1136 {0, NULL},
1137 };
1138
1139 static PyType_Spec encoder_spec = {
1140 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1141 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1142 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1143 Py_TPFLAGS_IMMUTABLETYPE),
1144 .slots = encoder_slots,
1145 };
1146
1147
1148 /*[clinic input]
1149 _multibytecodec.MultibyteIncrementalDecoder.decode
1150
1151 input: Py_buffer
1152 final: bool = False
1153 [clinic start generated code]*/
1154
1155 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1156 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1157 Py_buffer *input,
1158 int final)
1159 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=8795fbb20860027a]*/
1160 {
1161 MultibyteDecodeBuffer buf;
1162 char *data, *wdata = NULL;
1163 Py_ssize_t wsize, size, origpending;
1164 PyObject *res;
1165
1166 data = input->buf;
1167 size = input->len;
1168
1169 _PyUnicodeWriter_Init(&buf.writer);
1170 buf.excobj = NULL;
1171 origpending = self->pendingsize;
1172
1173 if (self->pendingsize == 0) {
1174 wsize = size;
1175 wdata = data;
1176 }
1177 else {
1178 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1179 PyErr_NoMemory();
1180 goto errorexit;
1181 }
1182 wsize = size + self->pendingsize;
1183 wdata = PyMem_Malloc(wsize);
1184 if (wdata == NULL) {
1185 PyErr_NoMemory();
1186 goto errorexit;
1187 }
1188 memcpy(wdata, self->pending, self->pendingsize);
1189 memcpy(wdata + self->pendingsize, data, size);
1190 self->pendingsize = 0;
1191 }
1192
1193 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1194 goto errorexit;
1195
1196 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1197 goto errorexit;
1198
1199 if (final && buf.inbuf < buf.inbuf_end) {
1200 if (multibytecodec_decerror(self->codec, &self->state,
1201 &buf, self->errors, MBERR_TOOFEW)) {
1202 /* recover the original pending buffer */
1203 memcpy(self->pending, wdata, origpending);
1204 self->pendingsize = origpending;
1205 goto errorexit;
1206 }
1207 }
1208
1209 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1210 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1211 goto errorexit;
1212 }
1213
1214 res = _PyUnicodeWriter_Finish(&buf.writer);
1215 if (res == NULL)
1216 goto errorexit;
1217
1218 if (wdata != data)
1219 PyMem_Free(wdata);
1220 Py_XDECREF(buf.excobj);
1221 return res;
1222
1223 errorexit:
1224 if (wdata != NULL && wdata != data)
1225 PyMem_Free(wdata);
1226 Py_XDECREF(buf.excobj);
1227 _PyUnicodeWriter_Dealloc(&buf.writer);
1228 return NULL;
1229 }
1230
1231 /*[clinic input]
1232 _multibytecodec.MultibyteIncrementalDecoder.getstate
1233 [clinic start generated code]*/
1234
1235 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1236 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1237 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1238 {
1239 PyObject *buffer;
1240 PyObject *statelong;
1241
1242 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1243 self->pendingsize);
1244 if (buffer == NULL) {
1245 return NULL;
1246 }
1247
1248 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1249 sizeof(self->state.c),
1250 1 /* little-endian */ ,
1251 0 /* unsigned */ );
1252 if (statelong == NULL) {
1253 Py_DECREF(buffer);
1254 return NULL;
1255 }
1256
1257 return Py_BuildValue("NN", buffer, statelong);
1258 }
1259
1260 /*[clinic input]
1261 _multibytecodec.MultibyteIncrementalDecoder.setstate
1262 state: object(subclass_of='&PyTuple_Type')
1263 /
1264 [clinic start generated code]*/
1265
1266 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1267 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1268 PyObject *state)
1269 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1270 {
1271 PyObject *buffer;
1272 PyLongObject *statelong;
1273 Py_ssize_t buffersize;
1274 const char *bufferstr;
1275 unsigned char statebytes[8];
1276
1277 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1278 &buffer, &PyLong_Type, &statelong))
1279 {
1280 return NULL;
1281 }
1282
1283 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1284 1 /* little-endian */ ,
1285 0 /* unsigned */ ,
1286 1 /* with_exceptions */) < 0) {
1287 return NULL;
1288 }
1289
1290 buffersize = PyBytes_Size(buffer);
1291 if (buffersize == -1) {
1292 return NULL;
1293 }
1294
1295 if (buffersize > MAXDECPENDING) {
1296 PyObject *excobj = PyUnicodeDecodeError_Create(self->codec->encoding,
1297 PyBytes_AS_STRING(buffer), buffersize,
1298 0, buffersize,
1299 "pending buffer too large");
1300 if (excobj == NULL) return NULL;
1301 PyErr_SetObject(PyExc_UnicodeDecodeError, excobj);
1302 Py_DECREF(excobj);
1303 return NULL;
1304 }
1305
1306 bufferstr = PyBytes_AsString(buffer);
1307 if (bufferstr == NULL) {
1308 return NULL;
1309 }
1310 self->pendingsize = buffersize;
1311 memcpy(self->pending, bufferstr, self->pendingsize);
1312 memcpy(self->state.c, statebytes, sizeof(statebytes));
1313
1314 Py_RETURN_NONE;
1315 }
1316
1317 /*[clinic input]
1318 _multibytecodec.MultibyteIncrementalDecoder.reset
1319 [clinic start generated code]*/
1320
1321 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1322 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1323 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1324 {
1325 if (self->codec->decreset != NULL &&
1326 self->codec->decreset(&self->state, self->codec) != 0)
1327 return NULL;
1328 self->pendingsize = 0;
1329
1330 Py_RETURN_NONE;
1331 }
1332
1333 static struct PyMethodDef mbidecoder_methods[] = {
1334 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1335 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1336 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1337 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1338 {NULL, NULL},
1339 };
1340
1341 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1342 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1343 {
1344 MultibyteIncrementalDecoderObject *self;
1345 PyObject *codec = NULL;
1346 char *errors = NULL;
1347
1348 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1349 incnewkwarglist, &errors))
1350 return NULL;
1351
1352 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1353 if (self == NULL)
1354 return NULL;
1355
1356 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1357 if (codec == NULL)
1358 goto errorexit;
1359
1360 module_state *state = find_state_by_def(type);
1361 if (!MultibyteCodec_Check(state, codec)) {
1362 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1363 goto errorexit;
1364 }
1365
1366 self->codec = ((MultibyteCodecObject *)codec)->codec;
1367 self->pendingsize = 0;
1368 self->errors = internal_error_callback(errors);
1369 if (self->errors == NULL)
1370 goto errorexit;
1371 if (self->codec->decinit != NULL &&
1372 self->codec->decinit(&self->state, self->codec) != 0)
1373 goto errorexit;
1374
1375 Py_DECREF(codec);
1376 return (PyObject *)self;
1377
1378 errorexit:
1379 Py_XDECREF(self);
1380 Py_XDECREF(codec);
1381 return NULL;
1382 }
1383
1384 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1385 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1386 {
1387 return 0;
1388 }
1389
1390 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1391 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1392 visitproc visit, void *arg)
1393 {
1394 if (ERROR_ISCUSTOM(self->errors))
1395 Py_VISIT(self->errors);
1396 return 0;
1397 }
1398
1399 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1400 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1401 {
1402 PyTypeObject *tp = Py_TYPE(self);
1403 PyObject_GC_UnTrack(self);
1404 ERROR_DECREF(self->errors);
1405 tp->tp_free(self);
1406 Py_DECREF(tp);
1407 }
1408
1409 static PyType_Slot decoder_slots[] = {
1410 {Py_tp_dealloc, mbidecoder_dealloc},
1411 {Py_tp_getattro, PyObject_GenericGetAttr},
1412 {Py_tp_traverse, mbidecoder_traverse},
1413 {Py_tp_methods, mbidecoder_methods},
1414 {Py_tp_getset, codecctx_getsets},
1415 {Py_tp_init, mbidecoder_init},
1416 {Py_tp_new, mbidecoder_new},
1417 {0, NULL},
1418 };
1419
1420 static PyType_Spec decoder_spec = {
1421 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1422 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1423 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1424 Py_TPFLAGS_IMMUTABLETYPE),
1425 .slots = decoder_slots,
1426 };
1427
1428 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1429 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1430 const char *method, Py_ssize_t sizehint)
1431 {
1432 MultibyteDecodeBuffer buf;
1433 PyObject *cres, *res;
1434 Py_ssize_t rsize;
1435
1436 if (sizehint == 0)
1437 return PyUnicode_New(0, 0);
1438
1439 _PyUnicodeWriter_Init(&buf.writer);
1440 buf.excobj = NULL;
1441 cres = NULL;
1442
1443 for (;;) {
1444 int endoffile;
1445
1446 if (sizehint < 0)
1447 cres = PyObject_CallMethod(self->stream,
1448 method, NULL);
1449 else
1450 cres = PyObject_CallMethod(self->stream,
1451 method, "i", sizehint);
1452 if (cres == NULL)
1453 goto errorexit;
1454
1455 if (!PyBytes_Check(cres)) {
1456 PyErr_Format(PyExc_TypeError,
1457 "stream function returned a "
1458 "non-bytes object (%.100s)",
1459 Py_TYPE(cres)->tp_name);
1460 goto errorexit;
1461 }
1462
1463 endoffile = (PyBytes_GET_SIZE(cres) == 0);
1464
1465 if (self->pendingsize > 0) {
1466 PyObject *ctr;
1467 char *ctrdata;
1468
1469 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1470 PyErr_NoMemory();
1471 goto errorexit;
1472 }
1473 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1474 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1475 if (ctr == NULL)
1476 goto errorexit;
1477 ctrdata = PyBytes_AS_STRING(ctr);
1478 memcpy(ctrdata, self->pending, self->pendingsize);
1479 memcpy(ctrdata + self->pendingsize,
1480 PyBytes_AS_STRING(cres),
1481 PyBytes_GET_SIZE(cres));
1482 Py_SETREF(cres, ctr);
1483 self->pendingsize = 0;
1484 }
1485
1486 rsize = PyBytes_GET_SIZE(cres);
1487 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1488 rsize) != 0)
1489 goto errorexit;
1490
1491 if (rsize > 0 && decoder_feed_buffer(
1492 (MultibyteStatefulDecoderContext *)self, &buf))
1493 goto errorexit;
1494
1495 if (endoffile || sizehint < 0) {
1496 if (buf.inbuf < buf.inbuf_end &&
1497 multibytecodec_decerror(self->codec, &self->state,
1498 &buf, self->errors, MBERR_TOOFEW))
1499 goto errorexit;
1500 }
1501
1502 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1503 if (decoder_append_pending(STATEFUL_DCTX(self),
1504 &buf) != 0)
1505 goto errorexit;
1506 }
1507
1508 Py_SETREF(cres, NULL);
1509
1510 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1511 break;
1512
1513 sizehint = 1; /* read 1 more byte and retry */
1514 }
1515
1516 res = _PyUnicodeWriter_Finish(&buf.writer);
1517 if (res == NULL)
1518 goto errorexit;
1519
1520 Py_XDECREF(cres);
1521 Py_XDECREF(buf.excobj);
1522 return res;
1523
1524 errorexit:
1525 Py_XDECREF(cres);
1526 Py_XDECREF(buf.excobj);
1527 _PyUnicodeWriter_Dealloc(&buf.writer);
1528 return NULL;
1529 }
1530
1531 /*[clinic input]
1532 _multibytecodec.MultibyteStreamReader.read
1533
1534 sizeobj: object = None
1535 /
1536 [clinic start generated code]*/
1537
1538 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1539 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1540 PyObject *sizeobj)
1541 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1542 {
1543 Py_ssize_t size;
1544
1545 if (sizeobj == Py_None)
1546 size = -1;
1547 else if (PyLong_Check(sizeobj))
1548 size = PyLong_AsSsize_t(sizeobj);
1549 else {
1550 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1551 return NULL;
1552 }
1553
1554 if (size == -1 && PyErr_Occurred())
1555 return NULL;
1556
1557 return mbstreamreader_iread(self, "read", size);
1558 }
1559
1560 /*[clinic input]
1561 _multibytecodec.MultibyteStreamReader.readline
1562
1563 sizeobj: object = None
1564 /
1565 [clinic start generated code]*/
1566
1567 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1568 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1569 PyObject *sizeobj)
1570 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1571 {
1572 Py_ssize_t size;
1573
1574 if (sizeobj == Py_None)
1575 size = -1;
1576 else if (PyLong_Check(sizeobj))
1577 size = PyLong_AsSsize_t(sizeobj);
1578 else {
1579 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1580 return NULL;
1581 }
1582
1583 if (size == -1 && PyErr_Occurred())
1584 return NULL;
1585
1586 return mbstreamreader_iread(self, "readline", size);
1587 }
1588
1589 /*[clinic input]
1590 _multibytecodec.MultibyteStreamReader.readlines
1591
1592 sizehintobj: object = None
1593 /
1594 [clinic start generated code]*/
1595
1596 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1597 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1598 PyObject *sizehintobj)
1599 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1600 {
1601 PyObject *r, *sr;
1602 Py_ssize_t sizehint;
1603
1604 if (sizehintobj == Py_None)
1605 sizehint = -1;
1606 else if (PyLong_Check(sizehintobj))
1607 sizehint = PyLong_AsSsize_t(sizehintobj);
1608 else {
1609 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1610 return NULL;
1611 }
1612
1613 if (sizehint == -1 && PyErr_Occurred())
1614 return NULL;
1615
1616 r = mbstreamreader_iread(self, "read", sizehint);
1617 if (r == NULL)
1618 return NULL;
1619
1620 sr = PyUnicode_Splitlines(r, 1);
1621 Py_DECREF(r);
1622 return sr;
1623 }
1624
1625 /*[clinic input]
1626 _multibytecodec.MultibyteStreamReader.reset
1627 [clinic start generated code]*/
1628
1629 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1630 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1631 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1632 {
1633 if (self->codec->decreset != NULL &&
1634 self->codec->decreset(&self->state, self->codec) != 0)
1635 return NULL;
1636 self->pendingsize = 0;
1637
1638 Py_RETURN_NONE;
1639 }
1640
1641 static struct PyMethodDef mbstreamreader_methods[] = {
1642 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1643 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1644 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1645 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1646 {NULL, NULL},
1647 };
1648
1649 static PyMemberDef mbstreamreader_members[] = {
1650 {"stream", _Py_T_OBJECT,
1651 offsetof(MultibyteStreamReaderObject, stream),
1652 Py_READONLY, NULL},
1653 {NULL,}
1654 };
1655
1656 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1657 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1658 {
1659 MultibyteStreamReaderObject *self;
1660 PyObject *stream, *codec = NULL;
1661 char *errors = NULL;
1662
1663 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1664 streamkwarglist, &stream, &errors))
1665 return NULL;
1666
1667 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1668 if (self == NULL)
1669 return NULL;
1670
1671 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1672 if (codec == NULL)
1673 goto errorexit;
1674
1675 module_state *state = find_state_by_def(type);
1676 if (!MultibyteCodec_Check(state, codec)) {
1677 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1678 goto errorexit;
1679 }
1680
1681 self->codec = ((MultibyteCodecObject *)codec)->codec;
1682 self->stream = Py_NewRef(stream);
1683 self->pendingsize = 0;
1684 self->errors = internal_error_callback(errors);
1685 if (self->errors == NULL)
1686 goto errorexit;
1687 if (self->codec->decinit != NULL &&
1688 self->codec->decinit(&self->state, self->codec) != 0)
1689 goto errorexit;
1690
1691 Py_DECREF(codec);
1692 return (PyObject *)self;
1693
1694 errorexit:
1695 Py_XDECREF(self);
1696 Py_XDECREF(codec);
1697 return NULL;
1698 }
1699
1700 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1701 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1702 {
1703 return 0;
1704 }
1705
1706 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1707 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1708 visitproc visit, void *arg)
1709 {
1710 if (ERROR_ISCUSTOM(self->errors))
1711 Py_VISIT(self->errors);
1712 Py_VISIT(self->stream);
1713 return 0;
1714 }
1715
1716 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1717 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1718 {
1719 PyTypeObject *tp = Py_TYPE(self);
1720 PyObject_GC_UnTrack(self);
1721 ERROR_DECREF(self->errors);
1722 Py_XDECREF(self->stream);
1723 tp->tp_free(self);
1724 Py_DECREF(tp);
1725 }
1726
1727 static PyType_Slot reader_slots[] = {
1728 {Py_tp_dealloc, mbstreamreader_dealloc},
1729 {Py_tp_getattro, PyObject_GenericGetAttr},
1730 {Py_tp_traverse, mbstreamreader_traverse},
1731 {Py_tp_methods, mbstreamreader_methods},
1732 {Py_tp_members, mbstreamreader_members},
1733 {Py_tp_getset, codecctx_getsets},
1734 {Py_tp_init, mbstreamreader_init},
1735 {Py_tp_new, mbstreamreader_new},
1736 {0, NULL},
1737 };
1738
1739 static PyType_Spec reader_spec = {
1740 .name = MODULE_NAME ".MultibyteStreamReader",
1741 .basicsize = sizeof(MultibyteStreamReaderObject),
1742 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1743 Py_TPFLAGS_IMMUTABLETYPE),
1744 .slots = reader_slots,
1745 };
1746
1747 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr,PyObject * str_write)1748 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1749 PyObject *unistr, PyObject *str_write)
1750 {
1751 PyObject *str, *wr;
1752
1753 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1754 if (str == NULL)
1755 return -1;
1756
1757 wr = PyObject_CallMethodOneArg(self->stream, str_write, str);
1758 Py_DECREF(str);
1759 if (wr == NULL)
1760 return -1;
1761
1762 Py_DECREF(wr);
1763 return 0;
1764 }
1765
1766 /*[clinic input]
1767 _multibytecodec.MultibyteStreamWriter.write
1768
1769 cls: defining_class
1770 strobj: object
1771 /
1772 [clinic start generated code]*/
1773
1774 static PyObject *
_multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * strobj)1775 _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
1776 PyTypeObject *cls,
1777 PyObject *strobj)
1778 /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
1779 {
1780 module_state *state = PyType_GetModuleState(cls);
1781 assert(state != NULL);
1782 if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
1783 return NULL;
1784 }
1785 Py_RETURN_NONE;
1786 }
1787
1788 /*[clinic input]
1789 _multibytecodec.MultibyteStreamWriter.writelines
1790
1791 cls: defining_class
1792 lines: object
1793 /
1794 [clinic start generated code]*/
1795
1796 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * lines)1797 _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
1798 PyTypeObject *cls,
1799 PyObject *lines)
1800 /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
1801 {
1802 PyObject *strobj;
1803 int i, r;
1804
1805 if (!PySequence_Check(lines)) {
1806 PyErr_SetString(PyExc_TypeError,
1807 "arg must be a sequence object");
1808 return NULL;
1809 }
1810
1811 module_state *state = PyType_GetModuleState(cls);
1812 assert(state != NULL);
1813 for (i = 0; i < PySequence_Length(lines); i++) {
1814 /* length can be changed even within this loop */
1815 strobj = PySequence_GetItem(lines, i);
1816 if (strobj == NULL)
1817 return NULL;
1818
1819 r = mbstreamwriter_iwrite(self, strobj, state->str_write);
1820 Py_DECREF(strobj);
1821 if (r == -1)
1822 return NULL;
1823 }
1824 /* PySequence_Length() can fail */
1825 if (PyErr_Occurred())
1826 return NULL;
1827
1828 Py_RETURN_NONE;
1829 }
1830
1831 /*[clinic input]
1832 _multibytecodec.MultibyteStreamWriter.reset
1833
1834 cls: defining_class
1835 /
1836
1837 [clinic start generated code]*/
1838
1839 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls)1840 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
1841 PyTypeObject *cls)
1842 /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
1843 {
1844 PyObject *pwrt;
1845
1846 if (!self->pending)
1847 Py_RETURN_NONE;
1848
1849 pwrt = multibytecodec_encode(self->codec, &self->state,
1850 self->pending, NULL, self->errors,
1851 MBENC_FLUSH | MBENC_RESET);
1852 /* some pending buffer can be truncated when UnicodeEncodeError is
1853 * raised on 'strict' mode. but, 'reset' method is designed to
1854 * reset the pending buffer or states so failed string sequence
1855 * ought to be missed */
1856 Py_CLEAR(self->pending);
1857 if (pwrt == NULL)
1858 return NULL;
1859
1860 assert(PyBytes_Check(pwrt));
1861
1862 module_state *state = PyType_GetModuleState(cls);
1863 assert(state != NULL);
1864
1865 if (PyBytes_Size(pwrt) > 0) {
1866 PyObject *wr;
1867
1868 wr = PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
1869 if (wr == NULL) {
1870 Py_DECREF(pwrt);
1871 return NULL;
1872 }
1873 }
1874 Py_DECREF(pwrt);
1875
1876 Py_RETURN_NONE;
1877 }
1878
1879 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1880 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1881 {
1882 MultibyteStreamWriterObject *self;
1883 PyObject *stream, *codec = NULL;
1884 char *errors = NULL;
1885
1886 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1887 streamkwarglist, &stream, &errors))
1888 return NULL;
1889
1890 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1891 if (self == NULL)
1892 return NULL;
1893
1894 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1895 if (codec == NULL)
1896 goto errorexit;
1897
1898 module_state *state = find_state_by_def(type);
1899 if (!MultibyteCodec_Check(state, codec)) {
1900 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1901 goto errorexit;
1902 }
1903
1904 self->codec = ((MultibyteCodecObject *)codec)->codec;
1905 self->stream = Py_NewRef(stream);
1906 self->pending = NULL;
1907 self->errors = internal_error_callback(errors);
1908 if (self->errors == NULL)
1909 goto errorexit;
1910 if (self->codec->encinit != NULL &&
1911 self->codec->encinit(&self->state, self->codec) != 0)
1912 goto errorexit;
1913
1914 Py_DECREF(codec);
1915 return (PyObject *)self;
1916
1917 errorexit:
1918 Py_XDECREF(self);
1919 Py_XDECREF(codec);
1920 return NULL;
1921 }
1922
1923 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1924 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1925 {
1926 return 0;
1927 }
1928
1929 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1930 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1931 visitproc visit, void *arg)
1932 {
1933 if (ERROR_ISCUSTOM(self->errors))
1934 Py_VISIT(self->errors);
1935 Py_VISIT(self->stream);
1936 return 0;
1937 }
1938
1939 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1940 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1941 {
1942 PyTypeObject *tp = Py_TYPE(self);
1943 PyObject_GC_UnTrack(self);
1944 ERROR_DECREF(self->errors);
1945 Py_XDECREF(self->stream);
1946 tp->tp_free(self);
1947 Py_DECREF(tp);
1948 }
1949
1950 static struct PyMethodDef mbstreamwriter_methods[] = {
1951 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1952 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1953 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1954 {NULL, NULL},
1955 };
1956
1957 static PyMemberDef mbstreamwriter_members[] = {
1958 {"stream", _Py_T_OBJECT,
1959 offsetof(MultibyteStreamWriterObject, stream),
1960 Py_READONLY, NULL},
1961 {NULL,}
1962 };
1963
1964 static PyType_Slot writer_slots[] = {
1965 {Py_tp_dealloc, mbstreamwriter_dealloc},
1966 {Py_tp_getattro, PyObject_GenericGetAttr},
1967 {Py_tp_traverse, mbstreamwriter_traverse},
1968 {Py_tp_methods, mbstreamwriter_methods},
1969 {Py_tp_members, mbstreamwriter_members},
1970 {Py_tp_getset, codecctx_getsets},
1971 {Py_tp_init, mbstreamwriter_init},
1972 {Py_tp_new, mbstreamwriter_new},
1973 {0, NULL},
1974 };
1975
1976 static PyType_Spec writer_spec = {
1977 .name = MODULE_NAME ".MultibyteStreamWriter",
1978 .basicsize = sizeof(MultibyteStreamWriterObject),
1979 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1980 Py_TPFLAGS_IMMUTABLETYPE),
1981 .slots = writer_slots,
1982 };
1983
1984
1985 /*[clinic input]
1986 _multibytecodec.__create_codec
1987
1988 arg: object
1989 /
1990 [clinic start generated code]*/
1991
1992 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1993 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1994 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1995 {
1996 MultibyteCodecObject *self;
1997
1998 if (!PyCapsule_IsValid(arg, CODEC_CAPSULE)) {
1999 PyErr_SetString(PyExc_ValueError, "argument type invalid");
2000 return NULL;
2001 }
2002
2003 codec_capsule *data = PyCapsule_GetPointer(arg, CODEC_CAPSULE);
2004 const MultibyteCodec *codec = data->codec;
2005 if (codec->codecinit != NULL && codec->codecinit(codec) != 0)
2006 return NULL;
2007
2008 module_state *state = get_module_state(module);
2009 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
2010 if (self == NULL)
2011 return NULL;
2012 self->codec = codec;
2013 self->cjk_module = Py_NewRef(data->cjk_module);
2014
2015 PyObject_GC_Track(self);
2016 return (PyObject *)self;
2017 }
2018
2019 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)2020 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
2021 {
2022 module_state *state = get_module_state(mod);
2023 Py_VISIT(state->multibytecodec_type);
2024 Py_VISIT(state->encoder_type);
2025 Py_VISIT(state->decoder_type);
2026 Py_VISIT(state->reader_type);
2027 Py_VISIT(state->writer_type);
2028 return 0;
2029 }
2030
2031 static int
_multibytecodec_clear(PyObject * mod)2032 _multibytecodec_clear(PyObject *mod)
2033 {
2034 module_state *state = get_module_state(mod);
2035 Py_CLEAR(state->multibytecodec_type);
2036 Py_CLEAR(state->encoder_type);
2037 Py_CLEAR(state->decoder_type);
2038 Py_CLEAR(state->reader_type);
2039 Py_CLEAR(state->writer_type);
2040 Py_CLEAR(state->str_write);
2041 return 0;
2042 }
2043
2044 static void
_multibytecodec_free(void * mod)2045 _multibytecodec_free(void *mod)
2046 {
2047 _multibytecodec_clear((PyObject *)mod);
2048 }
2049
2050 #define CREATE_TYPE(module, type, spec) \
2051 do { \
2052 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2053 if (!type) { \
2054 return -1; \
2055 } \
2056 } while (0)
2057
2058 #define ADD_TYPE(module, type) \
2059 do { \
2060 if (PyModule_AddType(module, type) < 0) { \
2061 return -1; \
2062 } \
2063 } while (0)
2064
2065 static int
_multibytecodec_exec(PyObject * mod)2066 _multibytecodec_exec(PyObject *mod)
2067 {
2068 module_state *state = get_module_state(mod);
2069 state->str_write = PyUnicode_InternFromString("write");
2070 if (state->str_write == NULL) {
2071 return -1;
2072 }
2073 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2074 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2075 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2076 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2077 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2078
2079 ADD_TYPE(mod, state->encoder_type);
2080 ADD_TYPE(mod, state->decoder_type);
2081 ADD_TYPE(mod, state->reader_type);
2082 ADD_TYPE(mod, state->writer_type);
2083 return 0;
2084 }
2085
2086 #undef CREATE_TYPE
2087 #undef ADD_TYPE
2088
2089 static struct PyMethodDef _multibytecodec_methods[] = {
2090 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2091 {NULL, NULL},
2092 };
2093
2094 static PyModuleDef_Slot _multibytecodec_slots[] = {
2095 {Py_mod_exec, _multibytecodec_exec},
2096 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2097 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2098 {0, NULL}
2099 };
2100
2101 static struct PyModuleDef _multibytecodecmodule = {
2102 .m_base = PyModuleDef_HEAD_INIT,
2103 .m_name = "_multibytecodec",
2104 .m_size = sizeof(module_state),
2105 .m_methods = _multibytecodec_methods,
2106 .m_slots = _multibytecodec_slots,
2107 .m_traverse = _multibytecodec_traverse,
2108 .m_clear = _multibytecodec_clear,
2109 .m_free = _multibytecodec_free,
2110 };
2111
2112 PyMODINIT_FUNC
PyInit__multibytecodec(void)2113 PyInit__multibytecodec(void)
2114 {
2115 return PyModuleDef_Init(&_multibytecodecmodule);
2116 }
2117