1 /*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 */
6
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h" // PyMemberDef
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12
13 #define MODULE_NAME "_multibytecodec"
14
15 typedef struct {
16 PyTypeObject *encoder_type;
17 PyTypeObject *decoder_type;
18 PyTypeObject *reader_type;
19 PyTypeObject *writer_type;
20 PyTypeObject *multibytecodec_type;
21 } _multibytecodec_state;
22
23 static _multibytecodec_state *
_multibytecodec_get_state(PyObject * module)24 _multibytecodec_get_state(PyObject *module)
25 {
26 _multibytecodec_state *state = PyModule_GetState(module);
27 assert(state != NULL);
28 return state;
29 }
30
31 static struct PyModuleDef _multibytecodecmodule;
32 static _multibytecodec_state *
_multibyte_codec_find_state_by_type(PyTypeObject * type)33 _multibyte_codec_find_state_by_type(PyTypeObject *type)
34 {
35 PyObject *module = _PyType_GetModuleByDef(type, &_multibytecodecmodule);
36 assert(module != NULL);
37 return _multibytecodec_get_state(module);
38 }
39
40 #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
41 /*[clinic input]
42 module _multibytecodec
43 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
44 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
45 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
46 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
47 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
48 [clinic start generated code]*/
49 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
50 #undef clinic_get_state
51
52 typedef struct {
53 PyObject *inobj;
54 Py_ssize_t inpos, inlen;
55 unsigned char *outbuf, *outbuf_end;
56 PyObject *excobj, *outobj;
57 } MultibyteEncodeBuffer;
58
59 typedef struct {
60 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
61 PyObject *excobj;
62 _PyUnicodeWriter writer;
63 } MultibyteDecodeBuffer;
64
65 static char *incnewkwarglist[] = {"errors", NULL};
66 static char *streamkwarglist[] = {"stream", "errors", NULL};
67
68 static PyObject *multibytecodec_encode(MultibyteCodec *,
69 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
70 PyObject *, int);
71
72 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
73
74 _Py_IDENTIFIER(write);
75
76 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)77 make_tuple(PyObject *object, Py_ssize_t len)
78 {
79 PyObject *v, *w;
80
81 if (object == NULL)
82 return NULL;
83
84 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(object);
87 return NULL;
88 }
89 PyTuple_SET_ITEM(v, 0, object);
90
91 w = PyLong_FromSsize_t(len);
92 if (w == NULL) {
93 Py_DECREF(v);
94 return NULL;
95 }
96 PyTuple_SET_ITEM(v, 1, w);
97
98 return v;
99 }
100
101 static PyObject *
internal_error_callback(const char * errors)102 internal_error_callback(const char *errors)
103 {
104 if (errors == NULL || strcmp(errors, "strict") == 0)
105 return ERROR_STRICT;
106 else if (strcmp(errors, "ignore") == 0)
107 return ERROR_IGNORE;
108 else if (strcmp(errors, "replace") == 0)
109 return ERROR_REPLACE;
110 else
111 return PyUnicode_FromString(errors);
112 }
113
114 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)115 call_error_callback(PyObject *errors, PyObject *exc)
116 {
117 PyObject *cb, *r;
118 const char *str;
119
120 assert(PyUnicode_Check(errors));
121 str = PyUnicode_AsUTF8(errors);
122 if (str == NULL)
123 return NULL;
124 cb = PyCodec_LookupError(str);
125 if (cb == NULL)
126 return NULL;
127
128 r = PyObject_CallOneArg(cb, exc);
129 Py_DECREF(cb);
130 return r;
131 }
132
133 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))134 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
135 {
136 const char *errors;
137
138 if (self->errors == ERROR_STRICT)
139 errors = "strict";
140 else if (self->errors == ERROR_IGNORE)
141 errors = "ignore";
142 else if (self->errors == ERROR_REPLACE)
143 errors = "replace";
144 else {
145 Py_INCREF(self->errors);
146 return self->errors;
147 }
148
149 return PyUnicode_FromString(errors);
150 }
151
152 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)153 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
154 void *closure)
155 {
156 PyObject *cb;
157 const char *str;
158
159 if (value == NULL) {
160 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
161 return -1;
162 }
163 if (!PyUnicode_Check(value)) {
164 PyErr_SetString(PyExc_TypeError, "errors must be a string");
165 return -1;
166 }
167
168 str = PyUnicode_AsUTF8(value);
169 if (str == NULL)
170 return -1;
171
172 cb = internal_error_callback(str);
173 if (cb == NULL)
174 return -1;
175
176 ERROR_DECREF(self->errors);
177 self->errors = cb;
178 return 0;
179 }
180
181 /* This getset handlers list is used by all the stateful codec objects */
182 static PyGetSetDef codecctx_getsets[] = {
183 {"errors", (getter)codecctx_errors_get,
184 (setter)codecctx_errors_set,
185 PyDoc_STR("how to treat errors")},
186 {NULL,}
187 };
188
189 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)190 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
191 {
192 Py_ssize_t orgpos, orgsize, incsize;
193
194 orgpos = (Py_ssize_t)((char *)buf->outbuf -
195 PyBytes_AS_STRING(buf->outobj));
196 orgsize = PyBytes_GET_SIZE(buf->outobj);
197 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
198
199 if (orgsize > PY_SSIZE_T_MAX - incsize) {
200 PyErr_NoMemory();
201 return -1;
202 }
203
204 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
205 return -1;
206
207 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
208 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
209 + PyBytes_GET_SIZE(buf->outobj);
210
211 return 0;
212 }
213 #define REQUIRE_ENCODEBUFFER(buf, s) do { \
214 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
215 if (expand_encodebuffer(buf, s) == -1) \
216 goto errorexit; \
217 } while(0)
218
219
220 /**
221 * MultibyteCodec object
222 */
223
224 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)225 multibytecodec_encerror(MultibyteCodec *codec,
226 MultibyteCodec_State *state,
227 MultibyteEncodeBuffer *buf,
228 PyObject *errors, Py_ssize_t e)
229 {
230 PyObject *retobj = NULL, *retstr = NULL, *tobj;
231 Py_ssize_t retstrsize, newpos;
232 Py_ssize_t esize, start, end;
233 const char *reason;
234
235 if (e > 0) {
236 reason = "illegal multibyte sequence";
237 esize = e;
238 }
239 else {
240 switch (e) {
241 case MBERR_TOOSMALL:
242 REQUIRE_ENCODEBUFFER(buf, -1);
243 return 0; /* retry it */
244 case MBERR_TOOFEW:
245 reason = "incomplete multibyte sequence";
246 esize = (Py_ssize_t)buf->inpos;
247 break;
248 case MBERR_INTERNAL:
249 PyErr_SetString(PyExc_RuntimeError,
250 "internal codec error");
251 return -1;
252 default:
253 PyErr_SetString(PyExc_RuntimeError,
254 "unknown runtime error");
255 return -1;
256 }
257 }
258
259 if (errors == ERROR_REPLACE) {
260 PyObject *replchar;
261 Py_ssize_t r;
262 Py_ssize_t inpos;
263 int kind;
264 const void *data;
265
266 replchar = PyUnicode_FromOrdinal('?');
267 if (replchar == NULL)
268 goto errorexit;
269 kind = PyUnicode_KIND(replchar);
270 data = PyUnicode_DATA(replchar);
271
272 inpos = 0;
273 for (;;) {
274 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
275
276 r = codec->encode(state, codec->config,
277 kind, data, &inpos, 1,
278 &buf->outbuf, outleft, 0);
279 if (r == MBERR_TOOSMALL) {
280 REQUIRE_ENCODEBUFFER(buf, -1);
281 continue;
282 }
283 else
284 break;
285 }
286
287 Py_DECREF(replchar);
288
289 if (r != 0) {
290 REQUIRE_ENCODEBUFFER(buf, 1);
291 *buf->outbuf++ = '?';
292 }
293 }
294 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
295 buf->inpos += esize;
296 return 0;
297 }
298
299 start = (Py_ssize_t)buf->inpos;
300 end = start + esize;
301
302 /* use cached exception object if available */
303 if (buf->excobj == NULL) {
304 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
305 "sOnns",
306 codec->encoding, buf->inobj,
307 start, end, reason);
308 if (buf->excobj == NULL)
309 goto errorexit;
310 }
311 else
312 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
313 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
314 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
315 goto errorexit;
316
317 if (errors == ERROR_STRICT) {
318 PyCodec_StrictErrors(buf->excobj);
319 goto errorexit;
320 }
321
322 retobj = call_error_callback(errors, buf->excobj);
323 if (retobj == NULL)
324 goto errorexit;
325
326 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
327 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
328 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
329 PyErr_SetString(PyExc_TypeError,
330 "encoding error handler must return "
331 "(str, int) tuple");
332 goto errorexit;
333 }
334
335 if (PyUnicode_Check(tobj)) {
336 Py_ssize_t inpos;
337
338 retstr = multibytecodec_encode(codec, state, tobj,
339 &inpos, ERROR_STRICT,
340 MBENC_FLUSH);
341 if (retstr == NULL)
342 goto errorexit;
343 }
344 else {
345 Py_INCREF(tobj);
346 retstr = tobj;
347 }
348
349 assert(PyBytes_Check(retstr));
350 retstrsize = PyBytes_GET_SIZE(retstr);
351 if (retstrsize > 0) {
352 REQUIRE_ENCODEBUFFER(buf, retstrsize);
353 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
354 buf->outbuf += retstrsize;
355 }
356
357 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
358 if (newpos < 0 && !PyErr_Occurred())
359 newpos += (Py_ssize_t)buf->inlen;
360 if (newpos < 0 || newpos > buf->inlen) {
361 PyErr_Clear();
362 PyErr_Format(PyExc_IndexError,
363 "position %zd from error handler out of bounds",
364 newpos);
365 goto errorexit;
366 }
367 buf->inpos = newpos;
368
369 Py_DECREF(retobj);
370 Py_DECREF(retstr);
371 return 0;
372
373 errorexit:
374 Py_XDECREF(retobj);
375 Py_XDECREF(retstr);
376 return -1;
377 }
378
379 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)380 multibytecodec_decerror(MultibyteCodec *codec,
381 MultibyteCodec_State *state,
382 MultibyteDecodeBuffer *buf,
383 PyObject *errors, Py_ssize_t e)
384 {
385 PyObject *retobj = NULL, *retuni = NULL;
386 Py_ssize_t newpos;
387 const char *reason;
388 Py_ssize_t esize, start, end;
389
390 if (e > 0) {
391 reason = "illegal multibyte sequence";
392 esize = e;
393 }
394 else {
395 switch (e) {
396 case MBERR_TOOSMALL:
397 return 0; /* retry it */
398 case MBERR_TOOFEW:
399 reason = "incomplete multibyte sequence";
400 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
401 break;
402 case MBERR_INTERNAL:
403 PyErr_SetString(PyExc_RuntimeError,
404 "internal codec error");
405 return -1;
406 case MBERR_EXCEPTION:
407 return -1;
408 default:
409 PyErr_SetString(PyExc_RuntimeError,
410 "unknown runtime error");
411 return -1;
412 }
413 }
414
415 if (errors == ERROR_REPLACE) {
416 if (_PyUnicodeWriter_WriteChar(&buf->writer,
417 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
418 goto errorexit;
419 }
420 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
421 buf->inbuf += esize;
422 return 0;
423 }
424
425 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
426 end = start + esize;
427
428 /* use cached exception object if available */
429 if (buf->excobj == NULL) {
430 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
431 (const char *)buf->inbuf_top,
432 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
433 start, end, reason);
434 if (buf->excobj == NULL)
435 goto errorexit;
436 }
437 else
438 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
439 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
440 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
441 goto errorexit;
442
443 if (errors == ERROR_STRICT) {
444 PyCodec_StrictErrors(buf->excobj);
445 goto errorexit;
446 }
447
448 retobj = call_error_callback(errors, buf->excobj);
449 if (retobj == NULL)
450 goto errorexit;
451
452 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
453 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
454 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
455 PyErr_SetString(PyExc_TypeError,
456 "decoding error handler must return "
457 "(str, int) tuple");
458 goto errorexit;
459 }
460
461 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
462 goto errorexit;
463
464 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
465 if (newpos < 0 && !PyErr_Occurred())
466 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
467 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
468 PyErr_Clear();
469 PyErr_Format(PyExc_IndexError,
470 "position %zd from error handler out of bounds",
471 newpos);
472 goto errorexit;
473 }
474 buf->inbuf = buf->inbuf_top + newpos;
475 Py_DECREF(retobj);
476 return 0;
477
478 errorexit:
479 Py_XDECREF(retobj);
480 return -1;
481 }
482
483 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)484 multibytecodec_encode(MultibyteCodec *codec,
485 MultibyteCodec_State *state,
486 PyObject *text, Py_ssize_t *inpos_t,
487 PyObject *errors, int flags)
488 {
489 MultibyteEncodeBuffer buf;
490 Py_ssize_t finalsize, r = 0;
491 Py_ssize_t datalen;
492 int kind;
493 const void *data;
494
495 if (PyUnicode_READY(text) < 0)
496 return NULL;
497 datalen = PyUnicode_GET_LENGTH(text);
498
499 if (datalen == 0 && !(flags & MBENC_RESET))
500 return PyBytes_FromStringAndSize(NULL, 0);
501
502 buf.excobj = NULL;
503 buf.outobj = NULL;
504 buf.inobj = text; /* borrowed reference */
505 buf.inpos = 0;
506 buf.inlen = datalen;
507 kind = PyUnicode_KIND(buf.inobj);
508 data = PyUnicode_DATA(buf.inobj);
509
510 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
511 PyErr_NoMemory();
512 goto errorexit;
513 }
514
515 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
516 if (buf.outobj == NULL)
517 goto errorexit;
518 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
519 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
520
521 while (buf.inpos < buf.inlen) {
522 /* we don't reuse inleft and outleft here.
523 * error callbacks can relocate the cursor anywhere on buffer*/
524 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
525
526 r = codec->encode(state, codec->config,
527 kind, data,
528 &buf.inpos, buf.inlen,
529 &buf.outbuf, outleft, flags);
530 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
531 break;
532 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
533 goto errorexit;
534 else if (r == MBERR_TOOFEW)
535 break;
536 }
537
538 if (codec->encreset != NULL && (flags & MBENC_RESET))
539 for (;;) {
540 Py_ssize_t outleft;
541
542 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
543 r = codec->encreset(state, codec->config, &buf.outbuf,
544 outleft);
545 if (r == 0)
546 break;
547 else if (multibytecodec_encerror(codec, state,
548 &buf, errors, r))
549 goto errorexit;
550 }
551
552 finalsize = (Py_ssize_t)((char *)buf.outbuf -
553 PyBytes_AS_STRING(buf.outobj));
554
555 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
556 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
557 goto errorexit;
558
559 if (inpos_t)
560 *inpos_t = buf.inpos;
561 Py_XDECREF(buf.excobj);
562 return buf.outobj;
563
564 errorexit:
565 Py_XDECREF(buf.excobj);
566 Py_XDECREF(buf.outobj);
567 return NULL;
568 }
569
570 /*[clinic input]
571 _multibytecodec.MultibyteCodec.encode
572
573 input: object
574 errors: str(accept={str, NoneType}) = None
575
576 Return an encoded string version of `input'.
577
578 'errors' may be given to set a different error handling scheme. Default is
579 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
580 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
581 registered with codecs.register_error that can handle UnicodeEncodeErrors.
582 [clinic start generated code]*/
583
584 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)585 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
586 PyObject *input,
587 const char *errors)
588 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
589 {
590 MultibyteCodec_State state;
591 PyObject *errorcb, *r, *ucvt;
592 Py_ssize_t datalen;
593
594 if (PyUnicode_Check(input))
595 ucvt = NULL;
596 else {
597 input = ucvt = PyObject_Str(input);
598 if (input == NULL)
599 return NULL;
600 else if (!PyUnicode_Check(input)) {
601 PyErr_SetString(PyExc_TypeError,
602 "couldn't convert the object to unicode.");
603 Py_DECREF(ucvt);
604 return NULL;
605 }
606 }
607
608 if (PyUnicode_READY(input) < 0) {
609 Py_XDECREF(ucvt);
610 return NULL;
611 }
612 datalen = PyUnicode_GET_LENGTH(input);
613
614 errorcb = internal_error_callback(errors);
615 if (errorcb == NULL) {
616 Py_XDECREF(ucvt);
617 return NULL;
618 }
619
620 if (self->codec->encinit != NULL &&
621 self->codec->encinit(&state, self->codec->config) != 0)
622 goto errorexit;
623 r = multibytecodec_encode(self->codec, &state,
624 input, NULL, errorcb,
625 MBENC_FLUSH | MBENC_RESET);
626 if (r == NULL)
627 goto errorexit;
628
629 ERROR_DECREF(errorcb);
630 Py_XDECREF(ucvt);
631 return make_tuple(r, datalen);
632
633 errorexit:
634 ERROR_DECREF(errorcb);
635 Py_XDECREF(ucvt);
636 return NULL;
637 }
638
639 /*[clinic input]
640 _multibytecodec.MultibyteCodec.decode
641
642 input: Py_buffer
643 errors: str(accept={str, NoneType}) = None
644
645 Decodes 'input'.
646
647 'errors' may be given to set a different error handling scheme. Default is
648 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
649 values are 'ignore' and 'replace' as well as any other name registered with
650 codecs.register_error that is able to handle UnicodeDecodeErrors."
651 [clinic start generated code]*/
652
653 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)654 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
655 Py_buffer *input,
656 const char *errors)
657 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
658 {
659 MultibyteCodec_State state;
660 MultibyteDecodeBuffer buf;
661 PyObject *errorcb, *res;
662 const char *data;
663 Py_ssize_t datalen;
664
665 data = input->buf;
666 datalen = input->len;
667
668 errorcb = internal_error_callback(errors);
669 if (errorcb == NULL) {
670 return NULL;
671 }
672
673 if (datalen == 0) {
674 ERROR_DECREF(errorcb);
675 return make_tuple(PyUnicode_New(0, 0), 0);
676 }
677
678 _PyUnicodeWriter_Init(&buf.writer);
679 buf.writer.min_length = datalen;
680 buf.excobj = NULL;
681 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
682 buf.inbuf_end = buf.inbuf_top + datalen;
683
684 if (self->codec->decinit != NULL &&
685 self->codec->decinit(&state, self->codec->config) != 0)
686 goto errorexit;
687
688 while (buf.inbuf < buf.inbuf_end) {
689 Py_ssize_t inleft, r;
690
691 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
692
693 r = self->codec->decode(&state, self->codec->config,
694 &buf.inbuf, inleft, &buf.writer);
695 if (r == 0)
696 break;
697 else if (multibytecodec_decerror(self->codec, &state,
698 &buf, errorcb, r))
699 goto errorexit;
700 }
701
702 res = _PyUnicodeWriter_Finish(&buf.writer);
703 if (res == NULL)
704 goto errorexit;
705
706 Py_XDECREF(buf.excobj);
707 ERROR_DECREF(errorcb);
708 return make_tuple(res, datalen);
709
710 errorexit:
711 ERROR_DECREF(errorcb);
712 Py_XDECREF(buf.excobj);
713 _PyUnicodeWriter_Dealloc(&buf.writer);
714
715 return NULL;
716 }
717
718 static struct PyMethodDef multibytecodec_methods[] = {
719 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
720 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
721 {NULL, NULL},
722 };
723
724 static int
multibytecodec_traverse(PyObject * self,visitproc visit,void * arg)725 multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
726 {
727 Py_VISIT(Py_TYPE(self));
728 return 0;
729 }
730
731 static void
multibytecodec_dealloc(MultibyteCodecObject * self)732 multibytecodec_dealloc(MultibyteCodecObject *self)
733 {
734 PyObject_GC_UnTrack(self);
735 PyTypeObject *tp = Py_TYPE(self);
736 tp->tp_free(self);
737 Py_DECREF(tp);
738 }
739
740 static PyType_Slot multibytecodec_slots[] = {
741 {Py_tp_dealloc, multibytecodec_dealloc},
742 {Py_tp_getattro, PyObject_GenericGetAttr},
743 {Py_tp_methods, multibytecodec_methods},
744 {Py_tp_traverse, multibytecodec_traverse},
745 {0, NULL},
746 };
747
748 static PyType_Spec multibytecodec_spec = {
749 .name = MODULE_NAME ".MultibyteCodec",
750 .basicsize = sizeof(MultibyteCodecObject),
751 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
752 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
753 .slots = multibytecodec_slots,
754 };
755
756
757 /**
758 * Utility functions for stateful codec mechanism
759 */
760
761 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
762 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
763
764 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)765 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
766 PyObject *unistr, int final)
767 {
768 PyObject *ucvt, *r = NULL;
769 PyObject *inbuf = NULL;
770 Py_ssize_t inpos, datalen;
771 PyObject *origpending = NULL;
772
773 if (PyUnicode_Check(unistr))
774 ucvt = NULL;
775 else {
776 unistr = ucvt = PyObject_Str(unistr);
777 if (unistr == NULL)
778 return NULL;
779 else if (!PyUnicode_Check(unistr)) {
780 PyErr_SetString(PyExc_TypeError,
781 "couldn't convert the object to str.");
782 Py_DECREF(ucvt);
783 return NULL;
784 }
785 }
786
787 if (ctx->pending) {
788 PyObject *inbuf_tmp;
789
790 Py_INCREF(ctx->pending);
791 origpending = ctx->pending;
792
793 Py_INCREF(ctx->pending);
794 inbuf_tmp = ctx->pending;
795 PyUnicode_Append(&inbuf_tmp, unistr);
796 if (inbuf_tmp == NULL)
797 goto errorexit;
798 Py_CLEAR(ctx->pending);
799 inbuf = inbuf_tmp;
800 }
801 else {
802 origpending = NULL;
803
804 Py_INCREF(unistr);
805 inbuf = unistr;
806 }
807 if (PyUnicode_READY(inbuf) < 0)
808 goto errorexit;
809 inpos = 0;
810 datalen = PyUnicode_GET_LENGTH(inbuf);
811
812 r = multibytecodec_encode(ctx->codec, &ctx->state,
813 inbuf, &inpos,
814 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
815 if (r == NULL) {
816 /* recover the original pending buffer */
817 Py_XSETREF(ctx->pending, origpending);
818 origpending = NULL;
819 goto errorexit;
820 }
821 Py_XDECREF(origpending);
822
823 if (inpos < datalen) {
824 if (datalen - inpos > MAXENCPENDING) {
825 /* normal codecs can't reach here */
826 PyErr_SetString(PyExc_UnicodeError,
827 "pending buffer overflow");
828 goto errorexit;
829 }
830 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
831 if (ctx->pending == NULL) {
832 /* normal codecs can't reach here */
833 goto errorexit;
834 }
835 }
836
837 Py_DECREF(inbuf);
838 Py_XDECREF(ucvt);
839 return r;
840
841 errorexit:
842 Py_XDECREF(r);
843 Py_XDECREF(ucvt);
844 Py_XDECREF(origpending);
845 Py_XDECREF(inbuf);
846 return NULL;
847 }
848
849 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)850 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
851 MultibyteDecodeBuffer *buf)
852 {
853 Py_ssize_t npendings;
854
855 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
856 if (npendings + ctx->pendingsize > MAXDECPENDING ||
857 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
858 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
859 return -1;
860 }
861 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
862 ctx->pendingsize += npendings;
863 return 0;
864 }
865
866 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)867 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
868 Py_ssize_t size)
869 {
870 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
871 buf->inbuf_end = buf->inbuf_top + size;
872 buf->writer.min_length += size;
873 return 0;
874 }
875
876 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)877 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
878 MultibyteDecodeBuffer *buf)
879 {
880 while (buf->inbuf < buf->inbuf_end) {
881 Py_ssize_t inleft;
882 Py_ssize_t r;
883
884 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
885
886 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
887 &buf->inbuf, inleft, &buf->writer);
888 if (r == 0 || r == MBERR_TOOFEW)
889 break;
890 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
891 buf, ctx->errors, r))
892 return -1;
893 }
894 return 0;
895 }
896
897
898 /*[clinic input]
899 _multibytecodec.MultibyteIncrementalEncoder.encode
900
901 input: object
902 final: bool(accept={int}) = False
903 [clinic start generated code]*/
904
905 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)906 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
907 PyObject *input,
908 int final)
909 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
910 {
911 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
912 }
913
914 /*[clinic input]
915 _multibytecodec.MultibyteIncrementalEncoder.getstate
916 [clinic start generated code]*/
917
918 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)919 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
920 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
921 {
922 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
923 for UTF-8 encoded buffer (each character can use up to 4
924 bytes), and required bytes for MultibyteCodec_State.c. A byte
925 array is used to avoid different compilers generating different
926 values for the same state, e.g. as a result of struct padding.
927 */
928 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
929 Py_ssize_t statesize;
930 const char *pendingbuffer = NULL;
931 Py_ssize_t pendingsize;
932
933 if (self->pending != NULL) {
934 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
935 if (pendingbuffer == NULL) {
936 return NULL;
937 }
938 if (pendingsize > MAXENCPENDING*4) {
939 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
940 return NULL;
941 }
942 statebytes[0] = (unsigned char)pendingsize;
943 memcpy(statebytes + 1, pendingbuffer, pendingsize);
944 statesize = 1 + pendingsize;
945 } else {
946 statebytes[0] = 0;
947 statesize = 1;
948 }
949 memcpy(statebytes+statesize, self->state.c,
950 sizeof(self->state.c));
951 statesize += sizeof(self->state.c);
952
953 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
954 1 /* little-endian */ ,
955 0 /* unsigned */ );
956 }
957
958 /*[clinic input]
959 _multibytecodec.MultibyteIncrementalEncoder.setstate
960 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
961 /
962 [clinic start generated code]*/
963
964 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)965 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
966 PyLongObject *statelong)
967 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
968 {
969 PyObject *pending = NULL;
970 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
971
972 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
973 1 /* little-endian */ ,
974 0 /* unsigned */ ) < 0) {
975 goto errorexit;
976 }
977
978 if (statebytes[0] > MAXENCPENDING*4) {
979 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
980 return NULL;
981 }
982
983 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
984 statebytes[0], "strict");
985 if (pending == NULL) {
986 goto errorexit;
987 }
988
989 Py_CLEAR(self->pending);
990 self->pending = pending;
991 memcpy(self->state.c, statebytes+1+statebytes[0],
992 sizeof(self->state.c));
993
994 Py_RETURN_NONE;
995
996 errorexit:
997 Py_XDECREF(pending);
998 return NULL;
999 }
1000
1001 /*[clinic input]
1002 _multibytecodec.MultibyteIncrementalEncoder.reset
1003 [clinic start generated code]*/
1004
1005 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1006 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1007 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1008 {
1009 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1010 unsigned char buffer[4], *outbuf;
1011 Py_ssize_t r;
1012 if (self->codec->encreset != NULL) {
1013 outbuf = buffer;
1014 r = self->codec->encreset(&self->state, self->codec->config,
1015 &outbuf, sizeof(buffer));
1016 if (r != 0)
1017 return NULL;
1018 }
1019 Py_CLEAR(self->pending);
1020 Py_RETURN_NONE;
1021 }
1022
1023 static struct PyMethodDef mbiencoder_methods[] = {
1024 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1025 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1026 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1027 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1028 {NULL, NULL},
1029 };
1030
1031 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1032 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1033 {
1034 MultibyteIncrementalEncoderObject *self;
1035 PyObject *codec = NULL;
1036 char *errors = NULL;
1037
1038 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1039 incnewkwarglist, &errors))
1040 return NULL;
1041
1042 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1043 if (self == NULL)
1044 return NULL;
1045
1046 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1047 if (codec == NULL)
1048 goto errorexit;
1049
1050 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1051 if (!MultibyteCodec_Check(state, codec)) {
1052 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1053 goto errorexit;
1054 }
1055
1056 self->codec = ((MultibyteCodecObject *)codec)->codec;
1057 self->pending = NULL;
1058 self->errors = internal_error_callback(errors);
1059 if (self->errors == NULL)
1060 goto errorexit;
1061 if (self->codec->encinit != NULL &&
1062 self->codec->encinit(&self->state, self->codec->config) != 0)
1063 goto errorexit;
1064
1065 Py_DECREF(codec);
1066 return (PyObject *)self;
1067
1068 errorexit:
1069 Py_XDECREF(self);
1070 Py_XDECREF(codec);
1071 return NULL;
1072 }
1073
1074 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1075 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1076 {
1077 return 0;
1078 }
1079
1080 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1081 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1082 visitproc visit, void *arg)
1083 {
1084 if (ERROR_ISCUSTOM(self->errors))
1085 Py_VISIT(self->errors);
1086 return 0;
1087 }
1088
1089 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1090 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1091 {
1092 PyTypeObject *tp = Py_TYPE(self);
1093 PyObject_GC_UnTrack(self);
1094 ERROR_DECREF(self->errors);
1095 Py_CLEAR(self->pending);
1096 tp->tp_free(self);
1097 Py_DECREF(tp);
1098 }
1099
1100 static PyType_Slot encoder_slots[] = {
1101 {Py_tp_dealloc, mbiencoder_dealloc},
1102 {Py_tp_getattro, PyObject_GenericGetAttr},
1103 {Py_tp_traverse, mbiencoder_traverse},
1104 {Py_tp_methods, mbiencoder_methods},
1105 {Py_tp_getset, codecctx_getsets},
1106 {Py_tp_init, mbiencoder_init},
1107 {Py_tp_new, mbiencoder_new},
1108 {0, NULL},
1109 };
1110
1111 static PyType_Spec encoder_spec = {
1112 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1113 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1114 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1115 Py_TPFLAGS_IMMUTABLETYPE),
1116 .slots = encoder_slots,
1117 };
1118
1119
1120 /*[clinic input]
1121 _multibytecodec.MultibyteIncrementalDecoder.decode
1122
1123 input: Py_buffer
1124 final: bool(accept={int}) = False
1125 [clinic start generated code]*/
1126
1127 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1128 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1129 Py_buffer *input,
1130 int final)
1131 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1132 {
1133 MultibyteDecodeBuffer buf;
1134 char *data, *wdata = NULL;
1135 Py_ssize_t wsize, size, origpending;
1136 PyObject *res;
1137
1138 data = input->buf;
1139 size = input->len;
1140
1141 _PyUnicodeWriter_Init(&buf.writer);
1142 buf.excobj = NULL;
1143 origpending = self->pendingsize;
1144
1145 if (self->pendingsize == 0) {
1146 wsize = size;
1147 wdata = data;
1148 }
1149 else {
1150 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1151 PyErr_NoMemory();
1152 goto errorexit;
1153 }
1154 wsize = size + self->pendingsize;
1155 wdata = PyMem_Malloc(wsize);
1156 if (wdata == NULL) {
1157 PyErr_NoMemory();
1158 goto errorexit;
1159 }
1160 memcpy(wdata, self->pending, self->pendingsize);
1161 memcpy(wdata + self->pendingsize, data, size);
1162 self->pendingsize = 0;
1163 }
1164
1165 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1166 goto errorexit;
1167
1168 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1169 goto errorexit;
1170
1171 if (final && buf.inbuf < buf.inbuf_end) {
1172 if (multibytecodec_decerror(self->codec, &self->state,
1173 &buf, self->errors, MBERR_TOOFEW)) {
1174 /* recover the original pending buffer */
1175 memcpy(self->pending, wdata, origpending);
1176 self->pendingsize = origpending;
1177 goto errorexit;
1178 }
1179 }
1180
1181 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1182 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1183 goto errorexit;
1184 }
1185
1186 res = _PyUnicodeWriter_Finish(&buf.writer);
1187 if (res == NULL)
1188 goto errorexit;
1189
1190 if (wdata != data)
1191 PyMem_Free(wdata);
1192 Py_XDECREF(buf.excobj);
1193 return res;
1194
1195 errorexit:
1196 if (wdata != NULL && wdata != data)
1197 PyMem_Free(wdata);
1198 Py_XDECREF(buf.excobj);
1199 _PyUnicodeWriter_Dealloc(&buf.writer);
1200 return NULL;
1201 }
1202
1203 /*[clinic input]
1204 _multibytecodec.MultibyteIncrementalDecoder.getstate
1205 [clinic start generated code]*/
1206
1207 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1208 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1209 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1210 {
1211 PyObject *buffer;
1212 PyObject *statelong;
1213
1214 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1215 self->pendingsize);
1216 if (buffer == NULL) {
1217 return NULL;
1218 }
1219
1220 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1221 sizeof(self->state.c),
1222 1 /* little-endian */ ,
1223 0 /* unsigned */ );
1224 if (statelong == NULL) {
1225 Py_DECREF(buffer);
1226 return NULL;
1227 }
1228
1229 return Py_BuildValue("NN", buffer, statelong);
1230 }
1231
1232 /*[clinic input]
1233 _multibytecodec.MultibyteIncrementalDecoder.setstate
1234 state: object(subclass_of='&PyTuple_Type')
1235 /
1236 [clinic start generated code]*/
1237
1238 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1239 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1240 PyObject *state)
1241 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1242 {
1243 PyObject *buffer;
1244 PyLongObject *statelong;
1245 Py_ssize_t buffersize;
1246 const char *bufferstr;
1247 unsigned char statebytes[8];
1248
1249 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1250 &buffer, &PyLong_Type, &statelong))
1251 {
1252 return NULL;
1253 }
1254
1255 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1256 1 /* little-endian */ ,
1257 0 /* unsigned */ ) < 0) {
1258 return NULL;
1259 }
1260
1261 buffersize = PyBytes_Size(buffer);
1262 if (buffersize == -1) {
1263 return NULL;
1264 }
1265
1266 if (buffersize > MAXDECPENDING) {
1267 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1268 return NULL;
1269 }
1270
1271 bufferstr = PyBytes_AsString(buffer);
1272 if (bufferstr == NULL) {
1273 return NULL;
1274 }
1275 self->pendingsize = buffersize;
1276 memcpy(self->pending, bufferstr, self->pendingsize);
1277 memcpy(self->state.c, statebytes, sizeof(statebytes));
1278
1279 Py_RETURN_NONE;
1280 }
1281
1282 /*[clinic input]
1283 _multibytecodec.MultibyteIncrementalDecoder.reset
1284 [clinic start generated code]*/
1285
1286 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1287 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1288 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1289 {
1290 if (self->codec->decreset != NULL &&
1291 self->codec->decreset(&self->state, self->codec->config) != 0)
1292 return NULL;
1293 self->pendingsize = 0;
1294
1295 Py_RETURN_NONE;
1296 }
1297
1298 static struct PyMethodDef mbidecoder_methods[] = {
1299 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1300 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1301 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1302 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1303 {NULL, NULL},
1304 };
1305
1306 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1307 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1308 {
1309 MultibyteIncrementalDecoderObject *self;
1310 PyObject *codec = NULL;
1311 char *errors = NULL;
1312
1313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1314 incnewkwarglist, &errors))
1315 return NULL;
1316
1317 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1318 if (self == NULL)
1319 return NULL;
1320
1321 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1322 if (codec == NULL)
1323 goto errorexit;
1324
1325 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1326 if (!MultibyteCodec_Check(state, codec)) {
1327 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1328 goto errorexit;
1329 }
1330
1331 self->codec = ((MultibyteCodecObject *)codec)->codec;
1332 self->pendingsize = 0;
1333 self->errors = internal_error_callback(errors);
1334 if (self->errors == NULL)
1335 goto errorexit;
1336 if (self->codec->decinit != NULL &&
1337 self->codec->decinit(&self->state, self->codec->config) != 0)
1338 goto errorexit;
1339
1340 Py_DECREF(codec);
1341 return (PyObject *)self;
1342
1343 errorexit:
1344 Py_XDECREF(self);
1345 Py_XDECREF(codec);
1346 return NULL;
1347 }
1348
1349 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1350 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1351 {
1352 return 0;
1353 }
1354
1355 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1356 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1357 visitproc visit, void *arg)
1358 {
1359 if (ERROR_ISCUSTOM(self->errors))
1360 Py_VISIT(self->errors);
1361 return 0;
1362 }
1363
1364 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1365 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1366 {
1367 PyTypeObject *tp = Py_TYPE(self);
1368 PyObject_GC_UnTrack(self);
1369 ERROR_DECREF(self->errors);
1370 tp->tp_free(self);
1371 Py_DECREF(tp);
1372 }
1373
1374 static PyType_Slot decoder_slots[] = {
1375 {Py_tp_dealloc, mbidecoder_dealloc},
1376 {Py_tp_getattro, PyObject_GenericGetAttr},
1377 {Py_tp_traverse, mbidecoder_traverse},
1378 {Py_tp_methods, mbidecoder_methods},
1379 {Py_tp_getset, codecctx_getsets},
1380 {Py_tp_init, mbidecoder_init},
1381 {Py_tp_new, mbidecoder_new},
1382 {0, NULL},
1383 };
1384
1385 static PyType_Spec decoder_spec = {
1386 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1387 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1388 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1389 Py_TPFLAGS_IMMUTABLETYPE),
1390 .slots = decoder_slots,
1391 };
1392
1393 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1394 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1395 const char *method, Py_ssize_t sizehint)
1396 {
1397 MultibyteDecodeBuffer buf;
1398 PyObject *cres, *res;
1399 Py_ssize_t rsize;
1400
1401 if (sizehint == 0)
1402 return PyUnicode_New(0, 0);
1403
1404 _PyUnicodeWriter_Init(&buf.writer);
1405 buf.excobj = NULL;
1406 cres = NULL;
1407
1408 for (;;) {
1409 int endoffile;
1410
1411 if (sizehint < 0)
1412 cres = PyObject_CallMethod(self->stream,
1413 method, NULL);
1414 else
1415 cres = PyObject_CallMethod(self->stream,
1416 method, "i", sizehint);
1417 if (cres == NULL)
1418 goto errorexit;
1419
1420 if (!PyBytes_Check(cres)) {
1421 PyErr_Format(PyExc_TypeError,
1422 "stream function returned a "
1423 "non-bytes object (%.100s)",
1424 Py_TYPE(cres)->tp_name);
1425 goto errorexit;
1426 }
1427
1428 endoffile = (PyBytes_GET_SIZE(cres) == 0);
1429
1430 if (self->pendingsize > 0) {
1431 PyObject *ctr;
1432 char *ctrdata;
1433
1434 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1435 PyErr_NoMemory();
1436 goto errorexit;
1437 }
1438 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1439 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1440 if (ctr == NULL)
1441 goto errorexit;
1442 ctrdata = PyBytes_AS_STRING(ctr);
1443 memcpy(ctrdata, self->pending, self->pendingsize);
1444 memcpy(ctrdata + self->pendingsize,
1445 PyBytes_AS_STRING(cres),
1446 PyBytes_GET_SIZE(cres));
1447 Py_DECREF(cres);
1448 cres = ctr;
1449 self->pendingsize = 0;
1450 }
1451
1452 rsize = PyBytes_GET_SIZE(cres);
1453 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1454 rsize) != 0)
1455 goto errorexit;
1456
1457 if (rsize > 0 && decoder_feed_buffer(
1458 (MultibyteStatefulDecoderContext *)self, &buf))
1459 goto errorexit;
1460
1461 if (endoffile || sizehint < 0) {
1462 if (buf.inbuf < buf.inbuf_end &&
1463 multibytecodec_decerror(self->codec, &self->state,
1464 &buf, self->errors, MBERR_TOOFEW))
1465 goto errorexit;
1466 }
1467
1468 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1469 if (decoder_append_pending(STATEFUL_DCTX(self),
1470 &buf) != 0)
1471 goto errorexit;
1472 }
1473
1474 Py_DECREF(cres);
1475 cres = NULL;
1476
1477 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1478 break;
1479
1480 sizehint = 1; /* read 1 more byte and retry */
1481 }
1482
1483 res = _PyUnicodeWriter_Finish(&buf.writer);
1484 if (res == NULL)
1485 goto errorexit;
1486
1487 Py_XDECREF(cres);
1488 Py_XDECREF(buf.excobj);
1489 return res;
1490
1491 errorexit:
1492 Py_XDECREF(cres);
1493 Py_XDECREF(buf.excobj);
1494 _PyUnicodeWriter_Dealloc(&buf.writer);
1495 return NULL;
1496 }
1497
1498 /*[clinic input]
1499 _multibytecodec.MultibyteStreamReader.read
1500
1501 sizeobj: object = None
1502 /
1503 [clinic start generated code]*/
1504
1505 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1506 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1507 PyObject *sizeobj)
1508 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1509 {
1510 Py_ssize_t size;
1511
1512 if (sizeobj == Py_None)
1513 size = -1;
1514 else if (PyLong_Check(sizeobj))
1515 size = PyLong_AsSsize_t(sizeobj);
1516 else {
1517 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1518 return NULL;
1519 }
1520
1521 if (size == -1 && PyErr_Occurred())
1522 return NULL;
1523
1524 return mbstreamreader_iread(self, "read", size);
1525 }
1526
1527 /*[clinic input]
1528 _multibytecodec.MultibyteStreamReader.readline
1529
1530 sizeobj: object = None
1531 /
1532 [clinic start generated code]*/
1533
1534 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1535 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1536 PyObject *sizeobj)
1537 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1538 {
1539 Py_ssize_t size;
1540
1541 if (sizeobj == Py_None)
1542 size = -1;
1543 else if (PyLong_Check(sizeobj))
1544 size = PyLong_AsSsize_t(sizeobj);
1545 else {
1546 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1547 return NULL;
1548 }
1549
1550 if (size == -1 && PyErr_Occurred())
1551 return NULL;
1552
1553 return mbstreamreader_iread(self, "readline", size);
1554 }
1555
1556 /*[clinic input]
1557 _multibytecodec.MultibyteStreamReader.readlines
1558
1559 sizehintobj: object = None
1560 /
1561 [clinic start generated code]*/
1562
1563 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1564 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1565 PyObject *sizehintobj)
1566 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1567 {
1568 PyObject *r, *sr;
1569 Py_ssize_t sizehint;
1570
1571 if (sizehintobj == Py_None)
1572 sizehint = -1;
1573 else if (PyLong_Check(sizehintobj))
1574 sizehint = PyLong_AsSsize_t(sizehintobj);
1575 else {
1576 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1577 return NULL;
1578 }
1579
1580 if (sizehint == -1 && PyErr_Occurred())
1581 return NULL;
1582
1583 r = mbstreamreader_iread(self, "read", sizehint);
1584 if (r == NULL)
1585 return NULL;
1586
1587 sr = PyUnicode_Splitlines(r, 1);
1588 Py_DECREF(r);
1589 return sr;
1590 }
1591
1592 /*[clinic input]
1593 _multibytecodec.MultibyteStreamReader.reset
1594 [clinic start generated code]*/
1595
1596 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1597 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1598 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1599 {
1600 if (self->codec->decreset != NULL &&
1601 self->codec->decreset(&self->state, self->codec->config) != 0)
1602 return NULL;
1603 self->pendingsize = 0;
1604
1605 Py_RETURN_NONE;
1606 }
1607
1608 static struct PyMethodDef mbstreamreader_methods[] = {
1609 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1610 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1611 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1612 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1613 {NULL, NULL},
1614 };
1615
1616 static PyMemberDef mbstreamreader_members[] = {
1617 {"stream", T_OBJECT,
1618 offsetof(MultibyteStreamReaderObject, stream),
1619 READONLY, NULL},
1620 {NULL,}
1621 };
1622
1623 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1624 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1625 {
1626 MultibyteStreamReaderObject *self;
1627 PyObject *stream, *codec = NULL;
1628 char *errors = NULL;
1629
1630 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1631 streamkwarglist, &stream, &errors))
1632 return NULL;
1633
1634 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1635 if (self == NULL)
1636 return NULL;
1637
1638 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1639 if (codec == NULL)
1640 goto errorexit;
1641
1642 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1643 if (!MultibyteCodec_Check(state, codec)) {
1644 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1645 goto errorexit;
1646 }
1647
1648 self->codec = ((MultibyteCodecObject *)codec)->codec;
1649 self->stream = stream;
1650 Py_INCREF(stream);
1651 self->pendingsize = 0;
1652 self->errors = internal_error_callback(errors);
1653 if (self->errors == NULL)
1654 goto errorexit;
1655 if (self->codec->decinit != NULL &&
1656 self->codec->decinit(&self->state, self->codec->config) != 0)
1657 goto errorexit;
1658
1659 Py_DECREF(codec);
1660 return (PyObject *)self;
1661
1662 errorexit:
1663 Py_XDECREF(self);
1664 Py_XDECREF(codec);
1665 return NULL;
1666 }
1667
1668 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1669 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1670 {
1671 return 0;
1672 }
1673
1674 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1675 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1676 visitproc visit, void *arg)
1677 {
1678 if (ERROR_ISCUSTOM(self->errors))
1679 Py_VISIT(self->errors);
1680 Py_VISIT(self->stream);
1681 return 0;
1682 }
1683
1684 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1685 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1686 {
1687 PyTypeObject *tp = Py_TYPE(self);
1688 PyObject_GC_UnTrack(self);
1689 ERROR_DECREF(self->errors);
1690 Py_XDECREF(self->stream);
1691 tp->tp_free(self);
1692 Py_DECREF(tp);
1693 }
1694
1695 static PyType_Slot reader_slots[] = {
1696 {Py_tp_dealloc, mbstreamreader_dealloc},
1697 {Py_tp_getattro, PyObject_GenericGetAttr},
1698 {Py_tp_traverse, mbstreamreader_traverse},
1699 {Py_tp_methods, mbstreamreader_methods},
1700 {Py_tp_members, mbstreamreader_members},
1701 {Py_tp_getset, codecctx_getsets},
1702 {Py_tp_init, mbstreamreader_init},
1703 {Py_tp_new, mbstreamreader_new},
1704 {0, NULL},
1705 };
1706
1707 static PyType_Spec reader_spec = {
1708 .name = MODULE_NAME ".MultibyteStreamReader",
1709 .basicsize = sizeof(MultibyteStreamReaderObject),
1710 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1711 Py_TPFLAGS_IMMUTABLETYPE),
1712 .slots = reader_slots,
1713 };
1714
1715 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr)1716 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1717 PyObject *unistr)
1718 {
1719 PyObject *str, *wr;
1720
1721 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1722 if (str == NULL)
1723 return -1;
1724
1725 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
1726 Py_DECREF(str);
1727 if (wr == NULL)
1728 return -1;
1729
1730 Py_DECREF(wr);
1731 return 0;
1732 }
1733
1734 /*[clinic input]
1735 _multibytecodec.MultibyteStreamWriter.write
1736
1737 strobj: object
1738 /
1739 [clinic start generated code]*/
1740
1741 static PyObject *
_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject * self,PyObject * strobj)1742 _multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1743 PyObject *strobj)
1744 /*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
1745 {
1746 if (mbstreamwriter_iwrite(self, strobj))
1747 return NULL;
1748 else
1749 Py_RETURN_NONE;
1750 }
1751
1752 /*[clinic input]
1753 _multibytecodec.MultibyteStreamWriter.writelines
1754
1755 lines: object
1756 /
1757 [clinic start generated code]*/
1758
1759 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject * self,PyObject * lines)1760 _multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1761 PyObject *lines)
1762 /*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
1763 {
1764 PyObject *strobj;
1765 int i, r;
1766
1767 if (!PySequence_Check(lines)) {
1768 PyErr_SetString(PyExc_TypeError,
1769 "arg must be a sequence object");
1770 return NULL;
1771 }
1772
1773 for (i = 0; i < PySequence_Length(lines); i++) {
1774 /* length can be changed even within this loop */
1775 strobj = PySequence_GetItem(lines, i);
1776 if (strobj == NULL)
1777 return NULL;
1778
1779 r = mbstreamwriter_iwrite(self, strobj);
1780 Py_DECREF(strobj);
1781 if (r == -1)
1782 return NULL;
1783 }
1784 /* PySequence_Length() can fail */
1785 if (PyErr_Occurred())
1786 return NULL;
1787
1788 Py_RETURN_NONE;
1789 }
1790
1791 /*[clinic input]
1792 _multibytecodec.MultibyteStreamWriter.reset
1793 [clinic start generated code]*/
1794
1795 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self)1796 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1797 /*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
1798 {
1799 PyObject *pwrt;
1800
1801 if (!self->pending)
1802 Py_RETURN_NONE;
1803
1804 pwrt = multibytecodec_encode(self->codec, &self->state,
1805 self->pending, NULL, self->errors,
1806 MBENC_FLUSH | MBENC_RESET);
1807 /* some pending buffer can be truncated when UnicodeEncodeError is
1808 * raised on 'strict' mode. but, 'reset' method is designed to
1809 * reset the pending buffer or states so failed string sequence
1810 * ought to be missed */
1811 Py_CLEAR(self->pending);
1812 if (pwrt == NULL)
1813 return NULL;
1814
1815 assert(PyBytes_Check(pwrt));
1816 if (PyBytes_Size(pwrt) > 0) {
1817 PyObject *wr;
1818
1819 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
1820 if (wr == NULL) {
1821 Py_DECREF(pwrt);
1822 return NULL;
1823 }
1824 }
1825 Py_DECREF(pwrt);
1826
1827 Py_RETURN_NONE;
1828 }
1829
1830 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1831 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1832 {
1833 MultibyteStreamWriterObject *self;
1834 PyObject *stream, *codec = NULL;
1835 char *errors = NULL;
1836
1837 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1838 streamkwarglist, &stream, &errors))
1839 return NULL;
1840
1841 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1842 if (self == NULL)
1843 return NULL;
1844
1845 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1846 if (codec == NULL)
1847 goto errorexit;
1848
1849 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1850 if (!MultibyteCodec_Check(state, codec)) {
1851 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1852 goto errorexit;
1853 }
1854
1855 self->codec = ((MultibyteCodecObject *)codec)->codec;
1856 self->stream = stream;
1857 Py_INCREF(stream);
1858 self->pending = NULL;
1859 self->errors = internal_error_callback(errors);
1860 if (self->errors == NULL)
1861 goto errorexit;
1862 if (self->codec->encinit != NULL &&
1863 self->codec->encinit(&self->state, self->codec->config) != 0)
1864 goto errorexit;
1865
1866 Py_DECREF(codec);
1867 return (PyObject *)self;
1868
1869 errorexit:
1870 Py_XDECREF(self);
1871 Py_XDECREF(codec);
1872 return NULL;
1873 }
1874
1875 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1876 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1877 {
1878 return 0;
1879 }
1880
1881 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1882 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1883 visitproc visit, void *arg)
1884 {
1885 if (ERROR_ISCUSTOM(self->errors))
1886 Py_VISIT(self->errors);
1887 Py_VISIT(self->stream);
1888 return 0;
1889 }
1890
1891 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1892 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1893 {
1894 PyTypeObject *tp = Py_TYPE(self);
1895 PyObject_GC_UnTrack(self);
1896 ERROR_DECREF(self->errors);
1897 Py_XDECREF(self->stream);
1898 tp->tp_free(self);
1899 Py_DECREF(tp);
1900 }
1901
1902 static struct PyMethodDef mbstreamwriter_methods[] = {
1903 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1904 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1905 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1906 {NULL, NULL},
1907 };
1908
1909 static PyMemberDef mbstreamwriter_members[] = {
1910 {"stream", T_OBJECT,
1911 offsetof(MultibyteStreamWriterObject, stream),
1912 READONLY, NULL},
1913 {NULL,}
1914 };
1915
1916 static PyType_Slot writer_slots[] = {
1917 {Py_tp_dealloc, mbstreamwriter_dealloc},
1918 {Py_tp_getattro, PyObject_GenericGetAttr},
1919 {Py_tp_traverse, mbstreamwriter_traverse},
1920 {Py_tp_methods, mbstreamwriter_methods},
1921 {Py_tp_members, mbstreamwriter_members},
1922 {Py_tp_getset, codecctx_getsets},
1923 {Py_tp_init, mbstreamwriter_init},
1924 {Py_tp_new, mbstreamwriter_new},
1925 {0, NULL},
1926 };
1927
1928 static PyType_Spec writer_spec = {
1929 .name = MODULE_NAME ".MultibyteStreamWriter",
1930 .basicsize = sizeof(MultibyteStreamWriterObject),
1931 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1932 Py_TPFLAGS_IMMUTABLETYPE),
1933 .slots = writer_slots,
1934 };
1935
1936
1937 /*[clinic input]
1938 _multibytecodec.__create_codec
1939
1940 arg: object
1941 /
1942 [clinic start generated code]*/
1943
1944 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1945 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1946 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1947 {
1948 MultibyteCodecObject *self;
1949 MultibyteCodec *codec;
1950
1951 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1952 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1953 return NULL;
1954 }
1955
1956 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1957 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1958 return NULL;
1959
1960 _multibytecodec_state *state = _multibytecodec_get_state(module);
1961 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
1962 if (self == NULL)
1963 return NULL;
1964 self->codec = codec;
1965
1966 PyObject_GC_Track(self);
1967 return (PyObject *)self;
1968 }
1969
1970 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)1971 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1972 {
1973 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1974 Py_VISIT(state->multibytecodec_type);
1975 Py_VISIT(state->encoder_type);
1976 Py_VISIT(state->decoder_type);
1977 Py_VISIT(state->reader_type);
1978 Py_VISIT(state->writer_type);
1979 return 0;
1980 }
1981
1982 static int
_multibytecodec_clear(PyObject * mod)1983 _multibytecodec_clear(PyObject *mod)
1984 {
1985 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1986 Py_CLEAR(state->multibytecodec_type);
1987 Py_CLEAR(state->encoder_type);
1988 Py_CLEAR(state->decoder_type);
1989 Py_CLEAR(state->reader_type);
1990 Py_CLEAR(state->writer_type);
1991 return 0;
1992 }
1993
1994 static void
_multibytecodec_free(void * mod)1995 _multibytecodec_free(void *mod)
1996 {
1997 _multibytecodec_clear((PyObject *)mod);
1998 }
1999
2000 #define CREATE_TYPE(module, type, spec) \
2001 do { \
2002 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2003 if (!type) { \
2004 return -1; \
2005 } \
2006 } while (0)
2007
2008 #define ADD_TYPE(module, type) \
2009 do { \
2010 if (PyModule_AddType(module, type) < 0) { \
2011 return -1; \
2012 } \
2013 } while (0)
2014
2015 static int
_multibytecodec_exec(PyObject * mod)2016 _multibytecodec_exec(PyObject *mod)
2017 {
2018 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2019 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2020 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2021 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2022 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2023 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2024
2025 ADD_TYPE(mod, state->encoder_type);
2026 ADD_TYPE(mod, state->decoder_type);
2027 ADD_TYPE(mod, state->reader_type);
2028 ADD_TYPE(mod, state->writer_type);
2029 return 0;
2030 }
2031
2032 #undef CREATE_TYPE
2033 #undef ADD_TYPE
2034
2035 static struct PyMethodDef _multibytecodec_methods[] = {
2036 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2037 {NULL, NULL},
2038 };
2039
2040 static PyModuleDef_Slot _multibytecodec_slots[] = {
2041 {Py_mod_exec, _multibytecodec_exec},
2042 {0, NULL}
2043 };
2044
2045 static struct PyModuleDef _multibytecodecmodule = {
2046 .m_base = PyModuleDef_HEAD_INIT,
2047 .m_name = "_multibytecodec",
2048 .m_size = sizeof(_multibytecodec_state),
2049 .m_methods = _multibytecodec_methods,
2050 .m_slots = _multibytecodec_slots,
2051 .m_traverse = _multibytecodec_traverse,
2052 .m_clear = _multibytecodec_clear,
2053 .m_free = _multibytecodec_free,
2054 };
2055
2056 PyMODINIT_FUNC
PyInit__multibytecodec(void)2057 PyInit__multibytecodec(void)
2058 {
2059 return PyModuleDef_Init(&_multibytecodecmodule);
2060 }
2061