1 /* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 These <encoding>s are available: utf_8, unicode_escape,
24 raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27 Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29 Copyright (c) Corporation for National Research Initiatives.
30
31 ------------------------------------------------------------------------ */
32
33 #define PY_SSIZE_T_CLEAN
34 #include "Python.h"
35
36 #ifdef MS_WINDOWS
37 #include <windows.h>
38 #endif
39
40 /*[clinic input]
41 module _codecs
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45 #include "clinic/_codecsmodule.c.h"
46
47 /* --- Registry ----------------------------------------------------------- */
48
49 /*[clinic input]
50 _codecs.register
51 search_function: object
52 /
53
54 Register a codec search function.
55
56 Search functions are expected to take one argument, the encoding name in
57 all lower case letters, and either return None, or a tuple of functions
58 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59 [clinic start generated code]*/
60
61 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)62 _codecs_register(PyObject *module, PyObject *search_function)
63 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64 {
65 if (PyCodec_Register(search_function))
66 return NULL;
67
68 Py_RETURN_NONE;
69 }
70
71 /*[clinic input]
72 _codecs.unregister
73 search_function: object
74 /
75
76 Unregister a codec search function and clear the registry's cache.
77
78 If the search function is not registered, do nothing.
79 [clinic start generated code]*/
80
81 static PyObject *
_codecs_unregister(PyObject * module,PyObject * search_function)82 _codecs_unregister(PyObject *module, PyObject *search_function)
83 /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
84 {
85 if (PyCodec_Unregister(search_function) < 0) {
86 return NULL;
87 }
88
89 Py_RETURN_NONE;
90 }
91
92 /*[clinic input]
93 _codecs.lookup
94 encoding: str
95 /
96
97 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
98 [clinic start generated code]*/
99
100 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)101 _codecs_lookup_impl(PyObject *module, const char *encoding)
102 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
103 {
104 return _PyCodec_Lookup(encoding);
105 }
106
107 /*[clinic input]
108 _codecs.encode
109 obj: object
110 encoding: str(c_default="NULL") = "utf-8"
111 errors: str(c_default="NULL") = "strict"
112
113 Encodes obj using the codec registered for encoding.
114
115 The default encoding is 'utf-8'. errors may be given to set a
116 different error handling scheme. Default is 'strict' meaning that encoding
117 errors raise a ValueError. Other possible values are 'ignore', 'replace'
118 and 'backslashreplace' as well as any other name registered with
119 codecs.register_error that can handle ValueErrors.
120 [clinic start generated code]*/
121
122 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)123 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
124 const char *errors)
125 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
126 {
127 if (encoding == NULL)
128 encoding = PyUnicode_GetDefaultEncoding();
129
130 /* Encode via the codec registry */
131 return PyCodec_Encode(obj, encoding, errors);
132 }
133
134 /*[clinic input]
135 _codecs.decode
136 obj: object
137 encoding: str(c_default="NULL") = "utf-8"
138 errors: str(c_default="NULL") = "strict"
139
140 Decodes obj using the codec registered for encoding.
141
142 Default encoding is 'utf-8'. errors may be given to set a
143 different error handling scheme. Default is 'strict' meaning that encoding
144 errors raise a ValueError. Other possible values are 'ignore', 'replace'
145 and 'backslashreplace' as well as any other name registered with
146 codecs.register_error that can handle ValueErrors.
147 [clinic start generated code]*/
148
149 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)150 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
151 const char *errors)
152 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
153 {
154 if (encoding == NULL)
155 encoding = PyUnicode_GetDefaultEncoding();
156
157 /* Decode via the codec registry */
158 return PyCodec_Decode(obj, encoding, errors);
159 }
160
161 /* --- Helpers ------------------------------------------------------------ */
162
163 static
codec_tuple(PyObject * decoded,Py_ssize_t len)164 PyObject *codec_tuple(PyObject *decoded,
165 Py_ssize_t len)
166 {
167 if (decoded == NULL)
168 return NULL;
169 return Py_BuildValue("Nn", decoded, len);
170 }
171
172 /* --- String codecs ------------------------------------------------------ */
173 /*[clinic input]
174 _codecs.escape_decode
175 data: Py_buffer(accept={str, buffer})
176 errors: str(accept={str, NoneType}) = None
177 /
178 [clinic start generated code]*/
179
180 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)181 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
182 const char *errors)
183 /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
184 {
185 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
186 errors, 0, NULL);
187 return codec_tuple(decoded, data->len);
188 }
189
190 /*[clinic input]
191 _codecs.escape_encode
192 data: object(subclass_of='&PyBytes_Type')
193 errors: str(accept={str, NoneType}) = None
194 /
195 [clinic start generated code]*/
196
197 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)198 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
199 const char *errors)
200 /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
201 {
202 Py_ssize_t size;
203 Py_ssize_t newsize;
204 PyObject *v;
205
206 size = PyBytes_GET_SIZE(data);
207 if (size > PY_SSIZE_T_MAX / 4) {
208 PyErr_SetString(PyExc_OverflowError,
209 "string is too large to encode");
210 return NULL;
211 }
212 newsize = 4*size;
213 v = PyBytes_FromStringAndSize(NULL, newsize);
214
215 if (v == NULL) {
216 return NULL;
217 }
218 else {
219 Py_ssize_t i;
220 char c;
221 char *p = PyBytes_AS_STRING(v);
222
223 for (i = 0; i < size; i++) {
224 /* There's at least enough room for a hex escape */
225 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
226 c = PyBytes_AS_STRING(data)[i];
227 if (c == '\'' || c == '\\')
228 *p++ = '\\', *p++ = c;
229 else if (c == '\t')
230 *p++ = '\\', *p++ = 't';
231 else if (c == '\n')
232 *p++ = '\\', *p++ = 'n';
233 else if (c == '\r')
234 *p++ = '\\', *p++ = 'r';
235 else if (c < ' ' || c >= 0x7f) {
236 *p++ = '\\';
237 *p++ = 'x';
238 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
239 *p++ = Py_hexdigits[c & 0xf];
240 }
241 else
242 *p++ = c;
243 }
244 *p = '\0';
245 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
246 return NULL;
247 }
248 }
249
250 return codec_tuple(v, size);
251 }
252
253 /* --- Decoder ------------------------------------------------------------ */
254 /*[clinic input]
255 _codecs.utf_7_decode
256 data: Py_buffer
257 errors: str(accept={str, NoneType}) = None
258 final: bool(accept={int}) = False
259 /
260 [clinic start generated code]*/
261
262 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)263 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
264 const char *errors, int final)
265 /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
266 {
267 Py_ssize_t consumed = data->len;
268 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
269 errors,
270 final ? NULL : &consumed);
271 return codec_tuple(decoded, consumed);
272 }
273
274 /*[clinic input]
275 _codecs.utf_8_decode
276 data: Py_buffer
277 errors: str(accept={str, NoneType}) = None
278 final: bool(accept={int}) = False
279 /
280 [clinic start generated code]*/
281
282 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)283 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
284 const char *errors, int final)
285 /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
286 {
287 Py_ssize_t consumed = data->len;
288 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
289 errors,
290 final ? NULL : &consumed);
291 return codec_tuple(decoded, consumed);
292 }
293
294 /*[clinic input]
295 _codecs.utf_16_decode
296 data: Py_buffer
297 errors: str(accept={str, NoneType}) = None
298 final: bool(accept={int}) = False
299 /
300 [clinic start generated code]*/
301
302 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)303 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
304 const char *errors, int final)
305 /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
306 {
307 int byteorder = 0;
308 /* This is overwritten unless final is true. */
309 Py_ssize_t consumed = data->len;
310 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
311 errors, &byteorder,
312 final ? NULL : &consumed);
313 return codec_tuple(decoded, consumed);
314 }
315
316 /*[clinic input]
317 _codecs.utf_16_le_decode
318 data: Py_buffer
319 errors: str(accept={str, NoneType}) = None
320 final: bool(accept={int}) = False
321 /
322 [clinic start generated code]*/
323
324 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)325 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
326 const char *errors, int final)
327 /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
328 {
329 int byteorder = -1;
330 /* This is overwritten unless final is true. */
331 Py_ssize_t consumed = data->len;
332 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
333 errors, &byteorder,
334 final ? NULL : &consumed);
335 return codec_tuple(decoded, consumed);
336 }
337
338 /*[clinic input]
339 _codecs.utf_16_be_decode
340 data: Py_buffer
341 errors: str(accept={str, NoneType}) = None
342 final: bool(accept={int}) = False
343 /
344 [clinic start generated code]*/
345
346 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)347 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
348 const char *errors, int final)
349 /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
350 {
351 int byteorder = 1;
352 /* This is overwritten unless final is true. */
353 Py_ssize_t consumed = data->len;
354 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
355 errors, &byteorder,
356 final ? NULL : &consumed);
357 return codec_tuple(decoded, consumed);
358 }
359
360 /* This non-standard version also provides access to the byteorder
361 parameter of the builtin UTF-16 codec.
362
363 It returns a tuple (unicode, bytesread, byteorder) with byteorder
364 being the value in effect at the end of data.
365
366 */
367 /*[clinic input]
368 _codecs.utf_16_ex_decode
369 data: Py_buffer
370 errors: str(accept={str, NoneType}) = None
371 byteorder: int = 0
372 final: bool(accept={int}) = False
373 /
374 [clinic start generated code]*/
375
376 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)377 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
378 const char *errors, int byteorder, int final)
379 /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
380 {
381 /* This is overwritten unless final is true. */
382 Py_ssize_t consumed = data->len;
383
384 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
385 errors, &byteorder,
386 final ? NULL : &consumed);
387 if (decoded == NULL)
388 return NULL;
389 return Py_BuildValue("Nni", decoded, consumed, byteorder);
390 }
391
392 /*[clinic input]
393 _codecs.utf_32_decode
394 data: Py_buffer
395 errors: str(accept={str, NoneType}) = None
396 final: bool(accept={int}) = False
397 /
398 [clinic start generated code]*/
399
400 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)401 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
402 const char *errors, int final)
403 /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
404 {
405 int byteorder = 0;
406 /* This is overwritten unless final is true. */
407 Py_ssize_t consumed = data->len;
408 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
409 errors, &byteorder,
410 final ? NULL : &consumed);
411 return codec_tuple(decoded, consumed);
412 }
413
414 /*[clinic input]
415 _codecs.utf_32_le_decode
416 data: Py_buffer
417 errors: str(accept={str, NoneType}) = None
418 final: bool(accept={int}) = False
419 /
420 [clinic start generated code]*/
421
422 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)423 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
424 const char *errors, int final)
425 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
426 {
427 int byteorder = -1;
428 /* This is overwritten unless final is true. */
429 Py_ssize_t consumed = data->len;
430 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
431 errors, &byteorder,
432 final ? NULL : &consumed);
433 return codec_tuple(decoded, consumed);
434 }
435
436 /*[clinic input]
437 _codecs.utf_32_be_decode
438 data: Py_buffer
439 errors: str(accept={str, NoneType}) = None
440 final: bool(accept={int}) = False
441 /
442 [clinic start generated code]*/
443
444 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)445 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
446 const char *errors, int final)
447 /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
448 {
449 int byteorder = 1;
450 /* This is overwritten unless final is true. */
451 Py_ssize_t consumed = data->len;
452 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
453 errors, &byteorder,
454 final ? NULL : &consumed);
455 return codec_tuple(decoded, consumed);
456 }
457
458 /* This non-standard version also provides access to the byteorder
459 parameter of the builtin UTF-32 codec.
460
461 It returns a tuple (unicode, bytesread, byteorder) with byteorder
462 being the value in effect at the end of data.
463
464 */
465 /*[clinic input]
466 _codecs.utf_32_ex_decode
467 data: Py_buffer
468 errors: str(accept={str, NoneType}) = None
469 byteorder: int = 0
470 final: bool(accept={int}) = False
471 /
472 [clinic start generated code]*/
473
474 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)475 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
476 const char *errors, int byteorder, int final)
477 /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
478 {
479 Py_ssize_t consumed = data->len;
480 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
481 errors, &byteorder,
482 final ? NULL : &consumed);
483 if (decoded == NULL)
484 return NULL;
485 return Py_BuildValue("Nni", decoded, consumed, byteorder);
486 }
487
488 /*[clinic input]
489 _codecs.unicode_escape_decode
490 data: Py_buffer(accept={str, buffer})
491 errors: str(accept={str, NoneType}) = None
492 final: bool(accept={int}) = True
493 /
494 [clinic start generated code]*/
495
496 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)497 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
498 const char *errors, int final)
499 /*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
500 {
501 Py_ssize_t consumed = data->len;
502 PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
503 errors,
504 final ? NULL : &consumed);
505 return codec_tuple(decoded, consumed);
506 }
507
508 /*[clinic input]
509 _codecs.raw_unicode_escape_decode
510 data: Py_buffer(accept={str, buffer})
511 errors: str(accept={str, NoneType}) = None
512 final: bool(accept={int}) = True
513 /
514 [clinic start generated code]*/
515
516 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)517 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
518 const char *errors, int final)
519 /*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
520 {
521 Py_ssize_t consumed = data->len;
522 PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
523 errors,
524 final ? NULL : &consumed);
525 return codec_tuple(decoded, consumed);
526 }
527
528 /*[clinic input]
529 _codecs.latin_1_decode
530 data: Py_buffer
531 errors: str(accept={str, NoneType}) = None
532 /
533 [clinic start generated code]*/
534
535 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)536 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
537 const char *errors)
538 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
539 {
540 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
541 return codec_tuple(decoded, data->len);
542 }
543
544 /*[clinic input]
545 _codecs.ascii_decode
546 data: Py_buffer
547 errors: str(accept={str, NoneType}) = None
548 /
549 [clinic start generated code]*/
550
551 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)552 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
553 const char *errors)
554 /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
555 {
556 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
557 return codec_tuple(decoded, data->len);
558 }
559
560 /*[clinic input]
561 _codecs.charmap_decode
562 data: Py_buffer
563 errors: str(accept={str, NoneType}) = None
564 mapping: object = None
565 /
566 [clinic start generated code]*/
567
568 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)569 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
570 const char *errors, PyObject *mapping)
571 /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
572 {
573 PyObject *decoded;
574
575 if (mapping == Py_None)
576 mapping = NULL;
577
578 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
579 return codec_tuple(decoded, data->len);
580 }
581
582 #ifdef MS_WINDOWS
583
584 /*[clinic input]
585 _codecs.mbcs_decode
586 data: Py_buffer
587 errors: str(accept={str, NoneType}) = None
588 final: bool(accept={int}) = False
589 /
590 [clinic start generated code]*/
591
592 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)593 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
594 const char *errors, int final)
595 /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
596 {
597 Py_ssize_t consumed = data->len;
598 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
599 errors, final ? NULL : &consumed);
600 return codec_tuple(decoded, consumed);
601 }
602
603 /*[clinic input]
604 _codecs.oem_decode
605 data: Py_buffer
606 errors: str(accept={str, NoneType}) = None
607 final: bool(accept={int}) = False
608 /
609 [clinic start generated code]*/
610
611 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)612 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
613 const char *errors, int final)
614 /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
615 {
616 Py_ssize_t consumed = data->len;
617 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
618 data->buf, data->len, errors, final ? NULL : &consumed);
619 return codec_tuple(decoded, consumed);
620 }
621
622 /*[clinic input]
623 _codecs.code_page_decode
624 codepage: int
625 data: Py_buffer
626 errors: str(accept={str, NoneType}) = None
627 final: bool(accept={int}) = False
628 /
629 [clinic start generated code]*/
630
631 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)632 _codecs_code_page_decode_impl(PyObject *module, int codepage,
633 Py_buffer *data, const char *errors, int final)
634 /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
635 {
636 Py_ssize_t consumed = data->len;
637 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
638 data->buf, data->len,
639 errors,
640 final ? NULL : &consumed);
641 return codec_tuple(decoded, consumed);
642 }
643
644 #endif /* MS_WINDOWS */
645
646 /* --- Encoder ------------------------------------------------------------ */
647
648 /*[clinic input]
649 _codecs.readbuffer_encode
650 data: Py_buffer(accept={str, buffer})
651 errors: str(accept={str, NoneType}) = None
652 /
653 [clinic start generated code]*/
654
655 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)656 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
657 const char *errors)
658 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
659 {
660 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
661 return codec_tuple(result, data->len);
662 }
663
664 /*[clinic input]
665 _codecs.utf_7_encode
666 str: unicode
667 errors: str(accept={str, NoneType}) = None
668 /
669 [clinic start generated code]*/
670
671 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)672 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
673 const char *errors)
674 /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
675 {
676 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
677 PyUnicode_GET_LENGTH(str));
678 }
679
680 /*[clinic input]
681 _codecs.utf_8_encode
682 str: unicode
683 errors: str(accept={str, NoneType}) = None
684 /
685 [clinic start generated code]*/
686
687 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)688 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
689 const char *errors)
690 /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
691 {
692 return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
693 PyUnicode_GET_LENGTH(str));
694 }
695
696 /* This version provides access to the byteorder parameter of the
697 builtin UTF-16 codecs as optional third argument. It defaults to 0
698 which means: use the native byte order and prepend the data with a
699 BOM mark.
700
701 */
702
703 /*[clinic input]
704 _codecs.utf_16_encode
705 str: unicode
706 errors: str(accept={str, NoneType}) = None
707 byteorder: int = 0
708 /
709 [clinic start generated code]*/
710
711 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)712 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
713 const char *errors, int byteorder)
714 /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
715 {
716 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
717 PyUnicode_GET_LENGTH(str));
718 }
719
720 /*[clinic input]
721 _codecs.utf_16_le_encode
722 str: unicode
723 errors: str(accept={str, NoneType}) = None
724 /
725 [clinic start generated code]*/
726
727 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)728 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
729 const char *errors)
730 /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
731 {
732 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
733 PyUnicode_GET_LENGTH(str));
734 }
735
736 /*[clinic input]
737 _codecs.utf_16_be_encode
738 str: unicode
739 errors: str(accept={str, NoneType}) = None
740 /
741 [clinic start generated code]*/
742
743 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)744 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
745 const char *errors)
746 /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
747 {
748 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
749 PyUnicode_GET_LENGTH(str));
750 }
751
752 /* This version provides access to the byteorder parameter of the
753 builtin UTF-32 codecs as optional third argument. It defaults to 0
754 which means: use the native byte order and prepend the data with a
755 BOM mark.
756
757 */
758
759 /*[clinic input]
760 _codecs.utf_32_encode
761 str: unicode
762 errors: str(accept={str, NoneType}) = None
763 byteorder: int = 0
764 /
765 [clinic start generated code]*/
766
767 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)768 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
769 const char *errors, int byteorder)
770 /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
771 {
772 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
773 PyUnicode_GET_LENGTH(str));
774 }
775
776 /*[clinic input]
777 _codecs.utf_32_le_encode
778 str: unicode
779 errors: str(accept={str, NoneType}) = None
780 /
781 [clinic start generated code]*/
782
783 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)784 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
785 const char *errors)
786 /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
787 {
788 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
789 PyUnicode_GET_LENGTH(str));
790 }
791
792 /*[clinic input]
793 _codecs.utf_32_be_encode
794 str: unicode
795 errors: str(accept={str, NoneType}) = None
796 /
797 [clinic start generated code]*/
798
799 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)800 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
801 const char *errors)
802 /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
803 {
804 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
805 PyUnicode_GET_LENGTH(str));
806 }
807
808 /*[clinic input]
809 _codecs.unicode_escape_encode
810 str: unicode
811 errors: str(accept={str, NoneType}) = None
812 /
813 [clinic start generated code]*/
814
815 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)816 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
817 const char *errors)
818 /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
819 {
820 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
821 PyUnicode_GET_LENGTH(str));
822 }
823
824 /*[clinic input]
825 _codecs.raw_unicode_escape_encode
826 str: unicode
827 errors: str(accept={str, NoneType}) = None
828 /
829 [clinic start generated code]*/
830
831 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)832 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
833 const char *errors)
834 /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
835 {
836 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
837 PyUnicode_GET_LENGTH(str));
838 }
839
840 /*[clinic input]
841 _codecs.latin_1_encode
842 str: unicode
843 errors: str(accept={str, NoneType}) = None
844 /
845 [clinic start generated code]*/
846
847 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)848 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
849 const char *errors)
850 /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
851 {
852 return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
853 PyUnicode_GET_LENGTH(str));
854 }
855
856 /*[clinic input]
857 _codecs.ascii_encode
858 str: unicode
859 errors: str(accept={str, NoneType}) = None
860 /
861 [clinic start generated code]*/
862
863 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)864 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
865 const char *errors)
866 /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
867 {
868 return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
869 PyUnicode_GET_LENGTH(str));
870 }
871
872 /*[clinic input]
873 _codecs.charmap_encode
874 str: unicode
875 errors: str(accept={str, NoneType}) = None
876 mapping: object = None
877 /
878 [clinic start generated code]*/
879
880 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)881 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
882 const char *errors, PyObject *mapping)
883 /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
884 {
885 if (mapping == Py_None)
886 mapping = NULL;
887
888 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
889 PyUnicode_GET_LENGTH(str));
890 }
891
892 /*[clinic input]
893 _codecs.charmap_build
894 map: unicode
895 /
896 [clinic start generated code]*/
897
898 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)899 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
900 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
901 {
902 return PyUnicode_BuildEncodingMap(map);
903 }
904
905 #ifdef MS_WINDOWS
906
907 /*[clinic input]
908 _codecs.mbcs_encode
909 str: unicode
910 errors: str(accept={str, NoneType}) = None
911 /
912 [clinic start generated code]*/
913
914 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)915 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
916 /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
917 {
918 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
919 PyUnicode_GET_LENGTH(str));
920 }
921
922 /*[clinic input]
923 _codecs.oem_encode
924 str: unicode
925 errors: str(accept={str, NoneType}) = None
926 /
927 [clinic start generated code]*/
928
929 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)930 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
931 /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
932 {
933 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
934 PyUnicode_GET_LENGTH(str));
935 }
936
937 /*[clinic input]
938 _codecs.code_page_encode
939 code_page: int
940 str: unicode
941 errors: str(accept={str, NoneType}) = None
942 /
943 [clinic start generated code]*/
944
945 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)946 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
947 const char *errors)
948 /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
949 {
950 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
951 PyUnicode_GET_LENGTH(str));
952 }
953
954 #endif /* MS_WINDOWS */
955
956 /* --- Error handler registry --------------------------------------------- */
957
958 /*[clinic input]
959 _codecs.register_error
960 errors: str
961 handler: object
962 /
963
964 Register the specified error handler under the name errors.
965
966 handler must be a callable object, that will be called with an exception
967 instance containing information about the location of the encoding/decoding
968 error and must return a (replacement, new position) tuple.
969 [clinic start generated code]*/
970
971 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)972 _codecs_register_error_impl(PyObject *module, const char *errors,
973 PyObject *handler)
974 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
975 {
976 if (PyCodec_RegisterError(errors, handler))
977 return NULL;
978 Py_RETURN_NONE;
979 }
980
981 /*[clinic input]
982 _codecs.lookup_error
983 name: str
984 /
985
986 lookup_error(errors) -> handler
987
988 Return the error handler for the specified error handling name or raise a
989 LookupError, if no handler exists under this name.
990 [clinic start generated code]*/
991
992 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)993 _codecs_lookup_error_impl(PyObject *module, const char *name)
994 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
995 {
996 return PyCodec_LookupError(name);
997 }
998
999 /* --- Module API --------------------------------------------------------- */
1000
1001 static PyMethodDef _codecs_functions[] = {
1002 _CODECS_REGISTER_METHODDEF
1003 _CODECS_UNREGISTER_METHODDEF
1004 _CODECS_LOOKUP_METHODDEF
1005 _CODECS_ENCODE_METHODDEF
1006 _CODECS_DECODE_METHODDEF
1007 _CODECS_ESCAPE_ENCODE_METHODDEF
1008 _CODECS_ESCAPE_DECODE_METHODDEF
1009 _CODECS_UTF_8_ENCODE_METHODDEF
1010 _CODECS_UTF_8_DECODE_METHODDEF
1011 _CODECS_UTF_7_ENCODE_METHODDEF
1012 _CODECS_UTF_7_DECODE_METHODDEF
1013 _CODECS_UTF_16_ENCODE_METHODDEF
1014 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1015 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1016 _CODECS_UTF_16_DECODE_METHODDEF
1017 _CODECS_UTF_16_LE_DECODE_METHODDEF
1018 _CODECS_UTF_16_BE_DECODE_METHODDEF
1019 _CODECS_UTF_16_EX_DECODE_METHODDEF
1020 _CODECS_UTF_32_ENCODE_METHODDEF
1021 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1022 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1023 _CODECS_UTF_32_DECODE_METHODDEF
1024 _CODECS_UTF_32_LE_DECODE_METHODDEF
1025 _CODECS_UTF_32_BE_DECODE_METHODDEF
1026 _CODECS_UTF_32_EX_DECODE_METHODDEF
1027 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1028 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1029 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1030 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1031 _CODECS_LATIN_1_ENCODE_METHODDEF
1032 _CODECS_LATIN_1_DECODE_METHODDEF
1033 _CODECS_ASCII_ENCODE_METHODDEF
1034 _CODECS_ASCII_DECODE_METHODDEF
1035 _CODECS_CHARMAP_ENCODE_METHODDEF
1036 _CODECS_CHARMAP_DECODE_METHODDEF
1037 _CODECS_CHARMAP_BUILD_METHODDEF
1038 _CODECS_READBUFFER_ENCODE_METHODDEF
1039 _CODECS_MBCS_ENCODE_METHODDEF
1040 _CODECS_MBCS_DECODE_METHODDEF
1041 _CODECS_OEM_ENCODE_METHODDEF
1042 _CODECS_OEM_DECODE_METHODDEF
1043 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1044 _CODECS_CODE_PAGE_DECODE_METHODDEF
1045 _CODECS_REGISTER_ERROR_METHODDEF
1046 _CODECS_LOOKUP_ERROR_METHODDEF
1047 {NULL, NULL} /* sentinel */
1048 };
1049
1050 static PyModuleDef_Slot _codecs_slots[] = {
1051 {0, NULL}
1052 };
1053
1054 static struct PyModuleDef codecsmodule = {
1055 PyModuleDef_HEAD_INIT,
1056 "_codecs",
1057 NULL,
1058 0,
1059 _codecs_functions,
1060 _codecs_slots,
1061 NULL,
1062 NULL,
1063 NULL
1064 };
1065
1066 PyMODINIT_FUNC
PyInit__codecs(void)1067 PyInit__codecs(void)
1068 {
1069 return PyModuleDef_Init(&codecsmodule);
1070 }
1071