1 /* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 These <encoding>s are available: utf_8, unicode_escape,
24 raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27 Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29 Copyright (c) Corporation for National Research Initiatives.
30
31 ------------------------------------------------------------------------ */
32
33 #define PY_SSIZE_T_CLEAN
34 #include "Python.h"
35
36 #ifdef MS_WINDOWS
37 #include <windows.h>
38 #endif
39
40 /*[clinic input]
41 module _codecs
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45 #include "clinic/_codecsmodule.c.h"
46
47 /* --- Registry ----------------------------------------------------------- */
48
49 /*[clinic input]
50 _codecs.register
51 search_function: object
52 /
53
54 Register a codec search function.
55
56 Search functions are expected to take one argument, the encoding name in
57 all lower case letters, and either return None, or a tuple of functions
58 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59 [clinic start generated code]*/
60
61 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)62 _codecs_register(PyObject *module, PyObject *search_function)
63 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64 {
65 if (PyCodec_Register(search_function))
66 return NULL;
67
68 Py_RETURN_NONE;
69 }
70
71 /*[clinic input]
72 _codecs.lookup
73 encoding: str
74 /
75
76 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
77 [clinic start generated code]*/
78
79 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)80 _codecs_lookup_impl(PyObject *module, const char *encoding)
81 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
82 {
83 return _PyCodec_Lookup(encoding);
84 }
85
86 /*[clinic input]
87 _codecs.encode
88 obj: object
89 encoding: str(c_default="NULL") = "utf-8"
90 errors: str(c_default="NULL") = "strict"
91
92 Encodes obj using the codec registered for encoding.
93
94 The default encoding is 'utf-8'. errors may be given to set a
95 different error handling scheme. Default is 'strict' meaning that encoding
96 errors raise a ValueError. Other possible values are 'ignore', 'replace'
97 and 'backslashreplace' as well as any other name registered with
98 codecs.register_error that can handle ValueErrors.
99 [clinic start generated code]*/
100
101 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)102 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
103 const char *errors)
104 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
105 {
106 if (encoding == NULL)
107 encoding = PyUnicode_GetDefaultEncoding();
108
109 /* Encode via the codec registry */
110 return PyCodec_Encode(obj, encoding, errors);
111 }
112
113 /*[clinic input]
114 _codecs.decode
115 obj: object
116 encoding: str(c_default="NULL") = "utf-8"
117 errors: str(c_default="NULL") = "strict"
118
119 Decodes obj using the codec registered for encoding.
120
121 Default encoding is 'utf-8'. errors may be given to set a
122 different error handling scheme. Default is 'strict' meaning that encoding
123 errors raise a ValueError. Other possible values are 'ignore', 'replace'
124 and 'backslashreplace' as well as any other name registered with
125 codecs.register_error that can handle ValueErrors.
126 [clinic start generated code]*/
127
128 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)129 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
130 const char *errors)
131 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
132 {
133 if (encoding == NULL)
134 encoding = PyUnicode_GetDefaultEncoding();
135
136 /* Decode via the codec registry */
137 return PyCodec_Decode(obj, encoding, errors);
138 }
139
140 /* --- Helpers ------------------------------------------------------------ */
141
142 /*[clinic input]
143 _codecs._forget_codec
144
145 encoding: str
146 /
147
148 Purge the named codec from the internal codec lookup cache
149 [clinic start generated code]*/
150
151 static PyObject *
_codecs__forget_codec_impl(PyObject * module,const char * encoding)152 _codecs__forget_codec_impl(PyObject *module, const char *encoding)
153 /*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
154 {
155 if (_PyCodec_Forget(encoding) < 0) {
156 return NULL;
157 };
158 Py_RETURN_NONE;
159 }
160
161 static
codec_tuple(PyObject * decoded,Py_ssize_t len)162 PyObject *codec_tuple(PyObject *decoded,
163 Py_ssize_t len)
164 {
165 if (decoded == NULL)
166 return NULL;
167 return Py_BuildValue("Nn", decoded, len);
168 }
169
170 /* --- String codecs ------------------------------------------------------ */
171 /*[clinic input]
172 _codecs.escape_decode
173 data: Py_buffer(accept={str, buffer})
174 errors: str(accept={str, NoneType}) = None
175 /
176 [clinic start generated code]*/
177
178 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)179 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
180 const char *errors)
181 /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
182 {
183 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
184 errors, 0, NULL);
185 return codec_tuple(decoded, data->len);
186 }
187
188 /*[clinic input]
189 _codecs.escape_encode
190 data: object(subclass_of='&PyBytes_Type')
191 errors: str(accept={str, NoneType}) = None
192 /
193 [clinic start generated code]*/
194
195 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)196 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
197 const char *errors)
198 /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
199 {
200 Py_ssize_t size;
201 Py_ssize_t newsize;
202 PyObject *v;
203
204 size = PyBytes_GET_SIZE(data);
205 if (size > PY_SSIZE_T_MAX / 4) {
206 PyErr_SetString(PyExc_OverflowError,
207 "string is too large to encode");
208 return NULL;
209 }
210 newsize = 4*size;
211 v = PyBytes_FromStringAndSize(NULL, newsize);
212
213 if (v == NULL) {
214 return NULL;
215 }
216 else {
217 Py_ssize_t i;
218 char c;
219 char *p = PyBytes_AS_STRING(v);
220
221 for (i = 0; i < size; i++) {
222 /* There's at least enough room for a hex escape */
223 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
224 c = PyBytes_AS_STRING(data)[i];
225 if (c == '\'' || c == '\\')
226 *p++ = '\\', *p++ = c;
227 else if (c == '\t')
228 *p++ = '\\', *p++ = 't';
229 else if (c == '\n')
230 *p++ = '\\', *p++ = 'n';
231 else if (c == '\r')
232 *p++ = '\\', *p++ = 'r';
233 else if (c < ' ' || c >= 0x7f) {
234 *p++ = '\\';
235 *p++ = 'x';
236 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
237 *p++ = Py_hexdigits[c & 0xf];
238 }
239 else
240 *p++ = c;
241 }
242 *p = '\0';
243 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
244 return NULL;
245 }
246 }
247
248 return codec_tuple(v, size);
249 }
250
251 /* --- Decoder ------------------------------------------------------------ */
252 /*[clinic input]
253 _codecs.utf_7_decode
254 data: Py_buffer
255 errors: str(accept={str, NoneType}) = None
256 final: bool(accept={int}) = False
257 /
258 [clinic start generated code]*/
259
260 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)261 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
262 const char *errors, int final)
263 /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
264 {
265 Py_ssize_t consumed = data->len;
266 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
267 errors,
268 final ? NULL : &consumed);
269 return codec_tuple(decoded, consumed);
270 }
271
272 /*[clinic input]
273 _codecs.utf_8_decode
274 data: Py_buffer
275 errors: str(accept={str, NoneType}) = None
276 final: bool(accept={int}) = False
277 /
278 [clinic start generated code]*/
279
280 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)281 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
282 const char *errors, int final)
283 /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
284 {
285 Py_ssize_t consumed = data->len;
286 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
287 errors,
288 final ? NULL : &consumed);
289 return codec_tuple(decoded, consumed);
290 }
291
292 /*[clinic input]
293 _codecs.utf_16_decode
294 data: Py_buffer
295 errors: str(accept={str, NoneType}) = None
296 final: bool(accept={int}) = False
297 /
298 [clinic start generated code]*/
299
300 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)301 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
302 const char *errors, int final)
303 /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
304 {
305 int byteorder = 0;
306 /* This is overwritten unless final is true. */
307 Py_ssize_t consumed = data->len;
308 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
309 errors, &byteorder,
310 final ? NULL : &consumed);
311 return codec_tuple(decoded, consumed);
312 }
313
314 /*[clinic input]
315 _codecs.utf_16_le_decode
316 data: Py_buffer
317 errors: str(accept={str, NoneType}) = None
318 final: bool(accept={int}) = False
319 /
320 [clinic start generated code]*/
321
322 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)323 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
324 const char *errors, int final)
325 /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
326 {
327 int byteorder = -1;
328 /* This is overwritten unless final is true. */
329 Py_ssize_t consumed = data->len;
330 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
331 errors, &byteorder,
332 final ? NULL : &consumed);
333 return codec_tuple(decoded, consumed);
334 }
335
336 /*[clinic input]
337 _codecs.utf_16_be_decode
338 data: Py_buffer
339 errors: str(accept={str, NoneType}) = None
340 final: bool(accept={int}) = False
341 /
342 [clinic start generated code]*/
343
344 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)345 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
346 const char *errors, int final)
347 /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
348 {
349 int byteorder = 1;
350 /* This is overwritten unless final is true. */
351 Py_ssize_t consumed = data->len;
352 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
353 errors, &byteorder,
354 final ? NULL : &consumed);
355 return codec_tuple(decoded, consumed);
356 }
357
358 /* This non-standard version also provides access to the byteorder
359 parameter of the builtin UTF-16 codec.
360
361 It returns a tuple (unicode, bytesread, byteorder) with byteorder
362 being the value in effect at the end of data.
363
364 */
365 /*[clinic input]
366 _codecs.utf_16_ex_decode
367 data: Py_buffer
368 errors: str(accept={str, NoneType}) = None
369 byteorder: int = 0
370 final: bool(accept={int}) = False
371 /
372 [clinic start generated code]*/
373
374 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)375 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
376 const char *errors, int byteorder, int final)
377 /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
378 {
379 /* This is overwritten unless final is true. */
380 Py_ssize_t consumed = data->len;
381
382 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
383 errors, &byteorder,
384 final ? NULL : &consumed);
385 if (decoded == NULL)
386 return NULL;
387 return Py_BuildValue("Nni", decoded, consumed, byteorder);
388 }
389
390 /*[clinic input]
391 _codecs.utf_32_decode
392 data: Py_buffer
393 errors: str(accept={str, NoneType}) = None
394 final: bool(accept={int}) = False
395 /
396 [clinic start generated code]*/
397
398 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)399 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
400 const char *errors, int final)
401 /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
402 {
403 int byteorder = 0;
404 /* This is overwritten unless final is true. */
405 Py_ssize_t consumed = data->len;
406 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
407 errors, &byteorder,
408 final ? NULL : &consumed);
409 return codec_tuple(decoded, consumed);
410 }
411
412 /*[clinic input]
413 _codecs.utf_32_le_decode
414 data: Py_buffer
415 errors: str(accept={str, NoneType}) = None
416 final: bool(accept={int}) = False
417 /
418 [clinic start generated code]*/
419
420 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)421 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
422 const char *errors, int final)
423 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
424 {
425 int byteorder = -1;
426 /* This is overwritten unless final is true. */
427 Py_ssize_t consumed = data->len;
428 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
429 errors, &byteorder,
430 final ? NULL : &consumed);
431 return codec_tuple(decoded, consumed);
432 }
433
434 /*[clinic input]
435 _codecs.utf_32_be_decode
436 data: Py_buffer
437 errors: str(accept={str, NoneType}) = None
438 final: bool(accept={int}) = False
439 /
440 [clinic start generated code]*/
441
442 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)443 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
444 const char *errors, int final)
445 /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
446 {
447 int byteorder = 1;
448 /* This is overwritten unless final is true. */
449 Py_ssize_t consumed = data->len;
450 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
451 errors, &byteorder,
452 final ? NULL : &consumed);
453 return codec_tuple(decoded, consumed);
454 }
455
456 /* This non-standard version also provides access to the byteorder
457 parameter of the builtin UTF-32 codec.
458
459 It returns a tuple (unicode, bytesread, byteorder) with byteorder
460 being the value in effect at the end of data.
461
462 */
463 /*[clinic input]
464 _codecs.utf_32_ex_decode
465 data: Py_buffer
466 errors: str(accept={str, NoneType}) = None
467 byteorder: int = 0
468 final: bool(accept={int}) = False
469 /
470 [clinic start generated code]*/
471
472 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)473 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
474 const char *errors, int byteorder, int final)
475 /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
476 {
477 Py_ssize_t consumed = data->len;
478 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
479 errors, &byteorder,
480 final ? NULL : &consumed);
481 if (decoded == NULL)
482 return NULL;
483 return Py_BuildValue("Nni", decoded, consumed, byteorder);
484 }
485
486 /*[clinic input]
487 _codecs.unicode_escape_decode
488 data: Py_buffer(accept={str, buffer})
489 errors: str(accept={str, NoneType}) = None
490 /
491 [clinic start generated code]*/
492
493 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)494 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
495 const char *errors)
496 /*[clinic end generated code: output=3ca3c917176b82ab input=8328081a3a569bd6]*/
497 {
498 PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
499 errors);
500 return codec_tuple(decoded, data->len);
501 }
502
503 /*[clinic input]
504 _codecs.raw_unicode_escape_decode
505 data: Py_buffer(accept={str, buffer})
506 errors: str(accept={str, NoneType}) = None
507 /
508 [clinic start generated code]*/
509
510 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)511 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
512 const char *errors)
513 /*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/
514 {
515 PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
516 errors);
517 return codec_tuple(decoded, data->len);
518 }
519
520 /*[clinic input]
521 _codecs.latin_1_decode
522 data: Py_buffer
523 errors: str(accept={str, NoneType}) = None
524 /
525 [clinic start generated code]*/
526
527 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)528 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
529 const char *errors)
530 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
531 {
532 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
533 return codec_tuple(decoded, data->len);
534 }
535
536 /*[clinic input]
537 _codecs.ascii_decode
538 data: Py_buffer
539 errors: str(accept={str, NoneType}) = None
540 /
541 [clinic start generated code]*/
542
543 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)544 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
545 const char *errors)
546 /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
547 {
548 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
549 return codec_tuple(decoded, data->len);
550 }
551
552 /*[clinic input]
553 _codecs.charmap_decode
554 data: Py_buffer
555 errors: str(accept={str, NoneType}) = None
556 mapping: object = None
557 /
558 [clinic start generated code]*/
559
560 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)561 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
562 const char *errors, PyObject *mapping)
563 /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
564 {
565 PyObject *decoded;
566
567 if (mapping == Py_None)
568 mapping = NULL;
569
570 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
571 return codec_tuple(decoded, data->len);
572 }
573
574 #ifdef MS_WINDOWS
575
576 /*[clinic input]
577 _codecs.mbcs_decode
578 data: Py_buffer
579 errors: str(accept={str, NoneType}) = None
580 final: bool(accept={int}) = False
581 /
582 [clinic start generated code]*/
583
584 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)585 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
586 const char *errors, int final)
587 /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
588 {
589 Py_ssize_t consumed = data->len;
590 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
591 errors, final ? NULL : &consumed);
592 return codec_tuple(decoded, consumed);
593 }
594
595 /*[clinic input]
596 _codecs.oem_decode
597 data: Py_buffer
598 errors: str(accept={str, NoneType}) = None
599 final: bool(accept={int}) = False
600 /
601 [clinic start generated code]*/
602
603 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)604 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
605 const char *errors, int final)
606 /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
607 {
608 Py_ssize_t consumed = data->len;
609 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
610 data->buf, data->len, errors, final ? NULL : &consumed);
611 return codec_tuple(decoded, consumed);
612 }
613
614 /*[clinic input]
615 _codecs.code_page_decode
616 codepage: int
617 data: Py_buffer
618 errors: str(accept={str, NoneType}) = None
619 final: bool(accept={int}) = False
620 /
621 [clinic start generated code]*/
622
623 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)624 _codecs_code_page_decode_impl(PyObject *module, int codepage,
625 Py_buffer *data, const char *errors, int final)
626 /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
627 {
628 Py_ssize_t consumed = data->len;
629 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
630 data->buf, data->len,
631 errors,
632 final ? NULL : &consumed);
633 return codec_tuple(decoded, consumed);
634 }
635
636 #endif /* MS_WINDOWS */
637
638 /* --- Encoder ------------------------------------------------------------ */
639
640 /*[clinic input]
641 _codecs.readbuffer_encode
642 data: Py_buffer(accept={str, buffer})
643 errors: str(accept={str, NoneType}) = None
644 /
645 [clinic start generated code]*/
646
647 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)648 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
649 const char *errors)
650 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
651 {
652 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
653 return codec_tuple(result, data->len);
654 }
655
656 /*[clinic input]
657 _codecs.utf_7_encode
658 str: unicode
659 errors: str(accept={str, NoneType}) = None
660 /
661 [clinic start generated code]*/
662
663 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)664 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
665 const char *errors)
666 /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
667 {
668 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
669 PyUnicode_GET_LENGTH(str));
670 }
671
672 /*[clinic input]
673 _codecs.utf_8_encode
674 str: unicode
675 errors: str(accept={str, NoneType}) = None
676 /
677 [clinic start generated code]*/
678
679 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)680 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
681 const char *errors)
682 /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
683 {
684 return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
685 PyUnicode_GET_LENGTH(str));
686 }
687
688 /* This version provides access to the byteorder parameter of the
689 builtin UTF-16 codecs as optional third argument. It defaults to 0
690 which means: use the native byte order and prepend the data with a
691 BOM mark.
692
693 */
694
695 /*[clinic input]
696 _codecs.utf_16_encode
697 str: unicode
698 errors: str(accept={str, NoneType}) = None
699 byteorder: int = 0
700 /
701 [clinic start generated code]*/
702
703 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)704 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
705 const char *errors, int byteorder)
706 /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
707 {
708 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
709 PyUnicode_GET_LENGTH(str));
710 }
711
712 /*[clinic input]
713 _codecs.utf_16_le_encode
714 str: unicode
715 errors: str(accept={str, NoneType}) = None
716 /
717 [clinic start generated code]*/
718
719 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)720 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
721 const char *errors)
722 /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
723 {
724 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
725 PyUnicode_GET_LENGTH(str));
726 }
727
728 /*[clinic input]
729 _codecs.utf_16_be_encode
730 str: unicode
731 errors: str(accept={str, NoneType}) = None
732 /
733 [clinic start generated code]*/
734
735 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)736 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
737 const char *errors)
738 /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
739 {
740 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
741 PyUnicode_GET_LENGTH(str));
742 }
743
744 /* This version provides access to the byteorder parameter of the
745 builtin UTF-32 codecs as optional third argument. It defaults to 0
746 which means: use the native byte order and prepend the data with a
747 BOM mark.
748
749 */
750
751 /*[clinic input]
752 _codecs.utf_32_encode
753 str: unicode
754 errors: str(accept={str, NoneType}) = None
755 byteorder: int = 0
756 /
757 [clinic start generated code]*/
758
759 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)760 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
761 const char *errors, int byteorder)
762 /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
763 {
764 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
765 PyUnicode_GET_LENGTH(str));
766 }
767
768 /*[clinic input]
769 _codecs.utf_32_le_encode
770 str: unicode
771 errors: str(accept={str, NoneType}) = None
772 /
773 [clinic start generated code]*/
774
775 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)776 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
777 const char *errors)
778 /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
779 {
780 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
781 PyUnicode_GET_LENGTH(str));
782 }
783
784 /*[clinic input]
785 _codecs.utf_32_be_encode
786 str: unicode
787 errors: str(accept={str, NoneType}) = None
788 /
789 [clinic start generated code]*/
790
791 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)792 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
793 const char *errors)
794 /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
795 {
796 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
797 PyUnicode_GET_LENGTH(str));
798 }
799
800 /*[clinic input]
801 _codecs.unicode_escape_encode
802 str: unicode
803 errors: str(accept={str, NoneType}) = None
804 /
805 [clinic start generated code]*/
806
807 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)808 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
809 const char *errors)
810 /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
811 {
812 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
813 PyUnicode_GET_LENGTH(str));
814 }
815
816 /*[clinic input]
817 _codecs.raw_unicode_escape_encode
818 str: unicode
819 errors: str(accept={str, NoneType}) = None
820 /
821 [clinic start generated code]*/
822
823 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)824 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
825 const char *errors)
826 /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
827 {
828 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
829 PyUnicode_GET_LENGTH(str));
830 }
831
832 /*[clinic input]
833 _codecs.latin_1_encode
834 str: unicode
835 errors: str(accept={str, NoneType}) = None
836 /
837 [clinic start generated code]*/
838
839 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)840 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
841 const char *errors)
842 /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
843 {
844 return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
845 PyUnicode_GET_LENGTH(str));
846 }
847
848 /*[clinic input]
849 _codecs.ascii_encode
850 str: unicode
851 errors: str(accept={str, NoneType}) = None
852 /
853 [clinic start generated code]*/
854
855 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)856 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
857 const char *errors)
858 /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
859 {
860 return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
861 PyUnicode_GET_LENGTH(str));
862 }
863
864 /*[clinic input]
865 _codecs.charmap_encode
866 str: unicode
867 errors: str(accept={str, NoneType}) = None
868 mapping: object = None
869 /
870 [clinic start generated code]*/
871
872 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)873 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
874 const char *errors, PyObject *mapping)
875 /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
876 {
877 if (mapping == Py_None)
878 mapping = NULL;
879
880 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
881 PyUnicode_GET_LENGTH(str));
882 }
883
884 /*[clinic input]
885 _codecs.charmap_build
886 map: unicode
887 /
888 [clinic start generated code]*/
889
890 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)891 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
892 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
893 {
894 return PyUnicode_BuildEncodingMap(map);
895 }
896
897 #ifdef MS_WINDOWS
898
899 /*[clinic input]
900 _codecs.mbcs_encode
901 str: unicode
902 errors: str(accept={str, NoneType}) = None
903 /
904 [clinic start generated code]*/
905
906 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)907 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
908 /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
909 {
910 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
911 PyUnicode_GET_LENGTH(str));
912 }
913
914 /*[clinic input]
915 _codecs.oem_encode
916 str: unicode
917 errors: str(accept={str, NoneType}) = None
918 /
919 [clinic start generated code]*/
920
921 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)922 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
923 /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
924 {
925 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
926 PyUnicode_GET_LENGTH(str));
927 }
928
929 /*[clinic input]
930 _codecs.code_page_encode
931 code_page: int
932 str: unicode
933 errors: str(accept={str, NoneType}) = None
934 /
935 [clinic start generated code]*/
936
937 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)938 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
939 const char *errors)
940 /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
941 {
942 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
943 PyUnicode_GET_LENGTH(str));
944 }
945
946 #endif /* MS_WINDOWS */
947
948 /* --- Error handler registry --------------------------------------------- */
949
950 /*[clinic input]
951 _codecs.register_error
952 errors: str
953 handler: object
954 /
955
956 Register the specified error handler under the name errors.
957
958 handler must be a callable object, that will be called with an exception
959 instance containing information about the location of the encoding/decoding
960 error and must return a (replacement, new position) tuple.
961 [clinic start generated code]*/
962
963 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)964 _codecs_register_error_impl(PyObject *module, const char *errors,
965 PyObject *handler)
966 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
967 {
968 if (PyCodec_RegisterError(errors, handler))
969 return NULL;
970 Py_RETURN_NONE;
971 }
972
973 /*[clinic input]
974 _codecs.lookup_error
975 name: str
976 /
977
978 lookup_error(errors) -> handler
979
980 Return the error handler for the specified error handling name or raise a
981 LookupError, if no handler exists under this name.
982 [clinic start generated code]*/
983
984 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)985 _codecs_lookup_error_impl(PyObject *module, const char *name)
986 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
987 {
988 return PyCodec_LookupError(name);
989 }
990
991 /* --- Module API --------------------------------------------------------- */
992
993 static PyMethodDef _codecs_functions[] = {
994 _CODECS_REGISTER_METHODDEF
995 _CODECS_LOOKUP_METHODDEF
996 _CODECS_ENCODE_METHODDEF
997 _CODECS_DECODE_METHODDEF
998 _CODECS_ESCAPE_ENCODE_METHODDEF
999 _CODECS_ESCAPE_DECODE_METHODDEF
1000 _CODECS_UTF_8_ENCODE_METHODDEF
1001 _CODECS_UTF_8_DECODE_METHODDEF
1002 _CODECS_UTF_7_ENCODE_METHODDEF
1003 _CODECS_UTF_7_DECODE_METHODDEF
1004 _CODECS_UTF_16_ENCODE_METHODDEF
1005 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1006 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1007 _CODECS_UTF_16_DECODE_METHODDEF
1008 _CODECS_UTF_16_LE_DECODE_METHODDEF
1009 _CODECS_UTF_16_BE_DECODE_METHODDEF
1010 _CODECS_UTF_16_EX_DECODE_METHODDEF
1011 _CODECS_UTF_32_ENCODE_METHODDEF
1012 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1013 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1014 _CODECS_UTF_32_DECODE_METHODDEF
1015 _CODECS_UTF_32_LE_DECODE_METHODDEF
1016 _CODECS_UTF_32_BE_DECODE_METHODDEF
1017 _CODECS_UTF_32_EX_DECODE_METHODDEF
1018 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1019 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1020 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1021 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1022 _CODECS_LATIN_1_ENCODE_METHODDEF
1023 _CODECS_LATIN_1_DECODE_METHODDEF
1024 _CODECS_ASCII_ENCODE_METHODDEF
1025 _CODECS_ASCII_DECODE_METHODDEF
1026 _CODECS_CHARMAP_ENCODE_METHODDEF
1027 _CODECS_CHARMAP_DECODE_METHODDEF
1028 _CODECS_CHARMAP_BUILD_METHODDEF
1029 _CODECS_READBUFFER_ENCODE_METHODDEF
1030 _CODECS_MBCS_ENCODE_METHODDEF
1031 _CODECS_MBCS_DECODE_METHODDEF
1032 _CODECS_OEM_ENCODE_METHODDEF
1033 _CODECS_OEM_DECODE_METHODDEF
1034 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1035 _CODECS_CODE_PAGE_DECODE_METHODDEF
1036 _CODECS_REGISTER_ERROR_METHODDEF
1037 _CODECS_LOOKUP_ERROR_METHODDEF
1038 _CODECS__FORGET_CODEC_METHODDEF
1039 {NULL, NULL} /* sentinel */
1040 };
1041
1042 static struct PyModuleDef codecsmodule = {
1043 PyModuleDef_HEAD_INIT,
1044 "_codecs",
1045 NULL,
1046 -1,
1047 _codecs_functions,
1048 NULL,
1049 NULL,
1050 NULL,
1051 NULL
1052 };
1053
1054 PyMODINIT_FUNC
PyInit__codecs(void)1055 PyInit__codecs(void)
1056 {
1057 return PyModule_Create(&codecsmodule);
1058 }
1059