• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------------
2 
3    _codecs -- Provides access to the codec registry and the builtin
4               codecs.
5 
6    This module should never be imported directly. The standard library
7    module "codecs" wraps this builtin module for use within Python.
8 
9    The codec registry is accessible via:
10 
11      register(search_function) -> None
12 
13      lookup(encoding) -> CodecInfo object
14 
15    The builtin Unicode codecs use the following interface:
16 
17      <encoding>_encode(Unicode_object[,errors='strict']) ->
18         (string object, bytes consumed)
19 
20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21         (Unicode object, bytes consumed)
22 
23    These <encoding>s are available: utf_8, unicode_escape,
24    raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25 
26 
27 Written by Marc-Andre Lemburg (mal@lemburg.com).
28 
29 Copyright (c) Corporation for National Research Initiatives.
30 
31    ------------------------------------------------------------------------ */
32 
33 #include "Python.h"
34 #include "pycore_codecs.h"        // _PyCodec_Lookup()
35 
36 #ifdef MS_WINDOWS
37 #include <windows.h>
38 #endif
39 
40 /*[clinic input]
41 module _codecs
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44 
45 #include "pycore_runtime.h"
46 #include "clinic/_codecsmodule.c.h"
47 
48 /* --- Registry ----------------------------------------------------------- */
49 
50 /*[clinic input]
51 _codecs.register
52     search_function: object
53     /
54 
55 Register a codec search function.
56 
57 Search functions are expected to take one argument, the encoding name in
58 all lower case letters, and either return None, or a tuple of functions
59 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
60 [clinic start generated code]*/
61 
62 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)63 _codecs_register(PyObject *module, PyObject *search_function)
64 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
65 {
66     if (PyCodec_Register(search_function))
67         return NULL;
68 
69     Py_RETURN_NONE;
70 }
71 
72 /*[clinic input]
73 _codecs.unregister
74     search_function: object
75     /
76 
77 Unregister a codec search function and clear the registry's cache.
78 
79 If the search function is not registered, do nothing.
80 [clinic start generated code]*/
81 
82 static PyObject *
_codecs_unregister(PyObject * module,PyObject * search_function)83 _codecs_unregister(PyObject *module, PyObject *search_function)
84 /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
85 {
86     if (PyCodec_Unregister(search_function) < 0) {
87         return NULL;
88     }
89 
90     Py_RETURN_NONE;
91 }
92 
93 /*[clinic input]
94 _codecs.lookup
95     encoding: str
96     /
97 
98 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
99 [clinic start generated code]*/
100 
101 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)102 _codecs_lookup_impl(PyObject *module, const char *encoding)
103 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
104 {
105     return _PyCodec_Lookup(encoding);
106 }
107 
108 /*[clinic input]
109 _codecs.encode
110     obj: object
111     encoding: str(c_default="NULL") = "utf-8"
112     errors: str(c_default="NULL") = "strict"
113 
114 Encodes obj using the codec registered for encoding.
115 
116 The default encoding is 'utf-8'.  errors may be given to set a
117 different error handling scheme.  Default is 'strict' meaning that encoding
118 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
119 and 'backslashreplace' as well as any other name registered with
120 codecs.register_error that can handle ValueErrors.
121 [clinic start generated code]*/
122 
123 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)124 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
125                     const char *errors)
126 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
127 {
128     if (encoding == NULL)
129         encoding = PyUnicode_GetDefaultEncoding();
130 
131     /* Encode via the codec registry */
132     return PyCodec_Encode(obj, encoding, errors);
133 }
134 
135 /*[clinic input]
136 _codecs.decode
137     obj: object
138     encoding: str(c_default="NULL") = "utf-8"
139     errors: str(c_default="NULL") = "strict"
140 
141 Decodes obj using the codec registered for encoding.
142 
143 Default encoding is 'utf-8'.  errors may be given to set a
144 different error handling scheme.  Default is 'strict' meaning that encoding
145 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
146 and 'backslashreplace' as well as any other name registered with
147 codecs.register_error that can handle ValueErrors.
148 [clinic start generated code]*/
149 
150 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)151 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
152                     const char *errors)
153 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
154 {
155     if (encoding == NULL)
156         encoding = PyUnicode_GetDefaultEncoding();
157 
158     /* Decode via the codec registry */
159     return PyCodec_Decode(obj, encoding, errors);
160 }
161 
162 /* --- Helpers ------------------------------------------------------------ */
163 
164 static
codec_tuple(PyObject * decoded,Py_ssize_t len)165 PyObject *codec_tuple(PyObject *decoded,
166                       Py_ssize_t len)
167 {
168     if (decoded == NULL)
169         return NULL;
170     return Py_BuildValue("Nn", decoded, len);
171 }
172 
173 /* --- String codecs ------------------------------------------------------ */
174 /*[clinic input]
175 _codecs.escape_decode
176     data: Py_buffer(accept={str, buffer})
177     errors: str(accept={str, NoneType}) = None
178     /
179 [clinic start generated code]*/
180 
181 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)182 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
183                            const char *errors)
184 /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
185 {
186     PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
187                                              errors, 0, NULL);
188     return codec_tuple(decoded, data->len);
189 }
190 
191 /*[clinic input]
192 _codecs.escape_encode
193     data: object(subclass_of='&PyBytes_Type')
194     errors: str(accept={str, NoneType}) = None
195     /
196 [clinic start generated code]*/
197 
198 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)199 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
200                            const char *errors)
201 /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
202 {
203     Py_ssize_t size;
204     Py_ssize_t newsize;
205     PyObject *v;
206 
207     size = PyBytes_GET_SIZE(data);
208     if (size > PY_SSIZE_T_MAX / 4) {
209         PyErr_SetString(PyExc_OverflowError,
210             "string is too large to encode");
211             return NULL;
212     }
213     newsize = 4*size;
214     v = PyBytes_FromStringAndSize(NULL, newsize);
215 
216     if (v == NULL) {
217         return NULL;
218     }
219     else {
220         Py_ssize_t i;
221         char c;
222         char *p = PyBytes_AS_STRING(v);
223 
224         for (i = 0; i < size; i++) {
225             /* There's at least enough room for a hex escape */
226             assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
227             c = PyBytes_AS_STRING(data)[i];
228             if (c == '\'' || c == '\\')
229                 *p++ = '\\', *p++ = c;
230             else if (c == '\t')
231                 *p++ = '\\', *p++ = 't';
232             else if (c == '\n')
233                 *p++ = '\\', *p++ = 'n';
234             else if (c == '\r')
235                 *p++ = '\\', *p++ = 'r';
236             else if (c < ' ' || c >= 0x7f) {
237                 *p++ = '\\';
238                 *p++ = 'x';
239                 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
240                 *p++ = Py_hexdigits[c & 0xf];
241             }
242             else
243                 *p++ = c;
244         }
245         *p = '\0';
246         if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
247             return NULL;
248         }
249     }
250 
251     return codec_tuple(v, size);
252 }
253 
254 /* --- Decoder ------------------------------------------------------------ */
255 /*[clinic input]
256 _codecs.utf_7_decode
257     data: Py_buffer
258     errors: str(accept={str, NoneType}) = None
259     final: bool = False
260     /
261 [clinic start generated code]*/
262 
263 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)264 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
265                           const char *errors, int final)
266 /*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
267 {
268     Py_ssize_t consumed = data->len;
269     PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
270                                                      errors,
271                                                      final ? NULL : &consumed);
272     return codec_tuple(decoded, consumed);
273 }
274 
275 /*[clinic input]
276 _codecs.utf_8_decode
277     data: Py_buffer
278     errors: str(accept={str, NoneType}) = None
279     final: bool = False
280     /
281 [clinic start generated code]*/
282 
283 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)284 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
285                           const char *errors, int final)
286 /*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
287 {
288     Py_ssize_t consumed = data->len;
289     PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
290                                                      errors,
291                                                      final ? NULL : &consumed);
292     return codec_tuple(decoded, consumed);
293 }
294 
295 /*[clinic input]
296 _codecs.utf_16_decode
297     data: Py_buffer
298     errors: str(accept={str, NoneType}) = None
299     final: bool = False
300     /
301 [clinic start generated code]*/
302 
303 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)304 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
305                            const char *errors, int final)
306 /*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
307 {
308     int byteorder = 0;
309     /* This is overwritten unless final is true. */
310     Py_ssize_t consumed = data->len;
311     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
312                                                       errors, &byteorder,
313                                                       final ? NULL : &consumed);
314     return codec_tuple(decoded, consumed);
315 }
316 
317 /*[clinic input]
318 _codecs.utf_16_le_decode
319     data: Py_buffer
320     errors: str(accept={str, NoneType}) = None
321     final: bool = False
322     /
323 [clinic start generated code]*/
324 
325 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)326 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
327                               const char *errors, int final)
328 /*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
329 {
330     int byteorder = -1;
331     /* This is overwritten unless final is true. */
332     Py_ssize_t consumed = data->len;
333     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
334                                                       errors, &byteorder,
335                                                       final ? NULL : &consumed);
336     return codec_tuple(decoded, consumed);
337 }
338 
339 /*[clinic input]
340 _codecs.utf_16_be_decode
341     data: Py_buffer
342     errors: str(accept={str, NoneType}) = None
343     final: bool = False
344     /
345 [clinic start generated code]*/
346 
347 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)348 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
349                               const char *errors, int final)
350 /*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
351 {
352     int byteorder = 1;
353     /* This is overwritten unless final is true. */
354     Py_ssize_t consumed = data->len;
355     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
356                                                       errors, &byteorder,
357                                                       final ? NULL : &consumed);
358     return codec_tuple(decoded, consumed);
359 }
360 
361 /* This non-standard version also provides access to the byteorder
362    parameter of the builtin UTF-16 codec.
363 
364    It returns a tuple (unicode, bytesread, byteorder) with byteorder
365    being the value in effect at the end of data.
366 
367 */
368 /*[clinic input]
369 _codecs.utf_16_ex_decode
370     data: Py_buffer
371     errors: str(accept={str, NoneType}) = None
372     byteorder: int = 0
373     final: bool = False
374     /
375 [clinic start generated code]*/
376 
377 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)378 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
379                               const char *errors, int byteorder, int final)
380 /*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
381 {
382     /* This is overwritten unless final is true. */
383     Py_ssize_t consumed = data->len;
384 
385     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
386                                                       errors, &byteorder,
387                                                       final ? NULL : &consumed);
388     if (decoded == NULL)
389         return NULL;
390     return Py_BuildValue("Nni", decoded, consumed, byteorder);
391 }
392 
393 /*[clinic input]
394 _codecs.utf_32_decode
395     data: Py_buffer
396     errors: str(accept={str, NoneType}) = None
397     final: bool = False
398     /
399 [clinic start generated code]*/
400 
401 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)402 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
403                            const char *errors, int final)
404 /*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
405 {
406     int byteorder = 0;
407     /* This is overwritten unless final is true. */
408     Py_ssize_t consumed = data->len;
409     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
410                                                       errors, &byteorder,
411                                                       final ? NULL : &consumed);
412     return codec_tuple(decoded, consumed);
413 }
414 
415 /*[clinic input]
416 _codecs.utf_32_le_decode
417     data: Py_buffer
418     errors: str(accept={str, NoneType}) = None
419     final: bool = False
420     /
421 [clinic start generated code]*/
422 
423 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)424 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
425                               const char *errors, int final)
426 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
427 {
428     int byteorder = -1;
429     /* This is overwritten unless final is true. */
430     Py_ssize_t consumed = data->len;
431     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
432                                                       errors, &byteorder,
433                                                       final ? NULL : &consumed);
434     return codec_tuple(decoded, consumed);
435 }
436 
437 /*[clinic input]
438 _codecs.utf_32_be_decode
439     data: Py_buffer
440     errors: str(accept={str, NoneType}) = None
441     final: bool = False
442     /
443 [clinic start generated code]*/
444 
445 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)446 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
447                               const char *errors, int final)
448 /*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
449 {
450     int byteorder = 1;
451     /* This is overwritten unless final is true. */
452     Py_ssize_t consumed = data->len;
453     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
454                                                       errors, &byteorder,
455                                                       final ? NULL : &consumed);
456     return codec_tuple(decoded, consumed);
457 }
458 
459 /* This non-standard version also provides access to the byteorder
460    parameter of the builtin UTF-32 codec.
461 
462    It returns a tuple (unicode, bytesread, byteorder) with byteorder
463    being the value in effect at the end of data.
464 
465 */
466 /*[clinic input]
467 _codecs.utf_32_ex_decode
468     data: Py_buffer
469     errors: str(accept={str, NoneType}) = None
470     byteorder: int = 0
471     final: bool = False
472     /
473 [clinic start generated code]*/
474 
475 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)476 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
477                               const char *errors, int byteorder, int final)
478 /*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
479 {
480     Py_ssize_t consumed = data->len;
481     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
482                                                       errors, &byteorder,
483                                                       final ? NULL : &consumed);
484     if (decoded == NULL)
485         return NULL;
486     return Py_BuildValue("Nni", decoded, consumed, byteorder);
487 }
488 
489 /*[clinic input]
490 _codecs.unicode_escape_decode
491     data: Py_buffer(accept={str, buffer})
492     errors: str(accept={str, NoneType}) = None
493     final: bool = True
494     /
495 [clinic start generated code]*/
496 
497 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)498 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
499                                    const char *errors, int final)
500 /*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
501 {
502     Py_ssize_t consumed = data->len;
503     PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
504                                                                errors,
505                                                                final ? NULL : &consumed);
506     return codec_tuple(decoded, consumed);
507 }
508 
509 /*[clinic input]
510 _codecs.raw_unicode_escape_decode
511     data: Py_buffer(accept={str, buffer})
512     errors: str(accept={str, NoneType}) = None
513     final: bool = True
514     /
515 [clinic start generated code]*/
516 
517 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)518 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
519                                        const char *errors, int final)
520 /*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
521 {
522     Py_ssize_t consumed = data->len;
523     PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
524                                                                   errors,
525                                                                   final ? NULL : &consumed);
526     return codec_tuple(decoded, consumed);
527 }
528 
529 /*[clinic input]
530 _codecs.latin_1_decode
531     data: Py_buffer
532     errors: str(accept={str, NoneType}) = None
533     /
534 [clinic start generated code]*/
535 
536 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)537 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
538                             const char *errors)
539 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
540 {
541     PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
542     return codec_tuple(decoded, data->len);
543 }
544 
545 /*[clinic input]
546 _codecs.ascii_decode
547     data: Py_buffer
548     errors: str(accept={str, NoneType}) = None
549     /
550 [clinic start generated code]*/
551 
552 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)553 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
554                           const char *errors)
555 /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
556 {
557     PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
558     return codec_tuple(decoded, data->len);
559 }
560 
561 /*[clinic input]
562 _codecs.charmap_decode
563     data: Py_buffer
564     errors: str(accept={str, NoneType}) = None
565     mapping: object = None
566     /
567 [clinic start generated code]*/
568 
569 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)570 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
571                             const char *errors, PyObject *mapping)
572 /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
573 {
574     PyObject *decoded;
575 
576     if (mapping == Py_None)
577         mapping = NULL;
578 
579     decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
580     return codec_tuple(decoded, data->len);
581 }
582 
583 #ifdef MS_WINDOWS
584 
585 /*[clinic input]
586 _codecs.mbcs_decode
587     data: Py_buffer
588     errors: str(accept={str, NoneType}) = None
589     final: bool = False
590     /
591 [clinic start generated code]*/
592 
593 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)594 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
595                          const char *errors, int final)
596 /*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
597 {
598     Py_ssize_t consumed = data->len;
599     PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
600             errors, final ? NULL : &consumed);
601     return codec_tuple(decoded, consumed);
602 }
603 
604 /*[clinic input]
605 _codecs.oem_decode
606     data: Py_buffer
607     errors: str(accept={str, NoneType}) = None
608     final: bool = False
609     /
610 [clinic start generated code]*/
611 
612 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)613 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
614                         const char *errors, int final)
615 /*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
616 {
617     Py_ssize_t consumed = data->len;
618     PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
619         data->buf, data->len, errors, final ? NULL : &consumed);
620     return codec_tuple(decoded, consumed);
621 }
622 
623 /*[clinic input]
624 _codecs.code_page_decode
625     codepage: int
626     data: Py_buffer
627     errors: str(accept={str, NoneType}) = None
628     final: bool = False
629     /
630 [clinic start generated code]*/
631 
632 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)633 _codecs_code_page_decode_impl(PyObject *module, int codepage,
634                               Py_buffer *data, const char *errors, int final)
635 /*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
636 {
637     Py_ssize_t consumed = data->len;
638     PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
639                                                          data->buf, data->len,
640                                                          errors,
641                                                          final ? NULL : &consumed);
642     return codec_tuple(decoded, consumed);
643 }
644 
645 #endif /* MS_WINDOWS */
646 
647 /* --- Encoder ------------------------------------------------------------ */
648 
649 /*[clinic input]
650 _codecs.readbuffer_encode
651     data: Py_buffer(accept={str, buffer})
652     errors: str(accept={str, NoneType}) = None
653     /
654 [clinic start generated code]*/
655 
656 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)657 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
658                                const char *errors)
659 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
660 {
661     PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
662     return codec_tuple(result, data->len);
663 }
664 
665 /*[clinic input]
666 _codecs.utf_7_encode
667     str: unicode
668     errors: str(accept={str, NoneType}) = None
669     /
670 [clinic start generated code]*/
671 
672 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)673 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
674                           const char *errors)
675 /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
676 {
677     return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
678                        PyUnicode_GET_LENGTH(str));
679 }
680 
681 /*[clinic input]
682 _codecs.utf_8_encode
683     str: unicode
684     errors: str(accept={str, NoneType}) = None
685     /
686 [clinic start generated code]*/
687 
688 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)689 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
690                           const char *errors)
691 /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
692 {
693     return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
694                        PyUnicode_GET_LENGTH(str));
695 }
696 
697 /* This version provides access to the byteorder parameter of the
698    builtin UTF-16 codecs as optional third argument. It defaults to 0
699    which means: use the native byte order and prepend the data with a
700    BOM mark.
701 
702 */
703 
704 /*[clinic input]
705 _codecs.utf_16_encode
706     str: unicode
707     errors: str(accept={str, NoneType}) = None
708     byteorder: int = 0
709     /
710 [clinic start generated code]*/
711 
712 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)713 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
714                            const char *errors, int byteorder)
715 /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
716 {
717     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
718                        PyUnicode_GET_LENGTH(str));
719 }
720 
721 /*[clinic input]
722 _codecs.utf_16_le_encode
723     str: unicode
724     errors: str(accept={str, NoneType}) = None
725     /
726 [clinic start generated code]*/
727 
728 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)729 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
730                               const char *errors)
731 /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
732 {
733     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
734                        PyUnicode_GET_LENGTH(str));
735 }
736 
737 /*[clinic input]
738 _codecs.utf_16_be_encode
739     str: unicode
740     errors: str(accept={str, NoneType}) = None
741     /
742 [clinic start generated code]*/
743 
744 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)745 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
746                               const char *errors)
747 /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
748 {
749     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
750                        PyUnicode_GET_LENGTH(str));
751 }
752 
753 /* This version provides access to the byteorder parameter of the
754    builtin UTF-32 codecs as optional third argument. It defaults to 0
755    which means: use the native byte order and prepend the data with a
756    BOM mark.
757 
758 */
759 
760 /*[clinic input]
761 _codecs.utf_32_encode
762     str: unicode
763     errors: str(accept={str, NoneType}) = None
764     byteorder: int = 0
765     /
766 [clinic start generated code]*/
767 
768 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)769 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
770                            const char *errors, int byteorder)
771 /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
772 {
773     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
774                        PyUnicode_GET_LENGTH(str));
775 }
776 
777 /*[clinic input]
778 _codecs.utf_32_le_encode
779     str: unicode
780     errors: str(accept={str, NoneType}) = None
781     /
782 [clinic start generated code]*/
783 
784 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)785 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
786                               const char *errors)
787 /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
788 {
789     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
790                        PyUnicode_GET_LENGTH(str));
791 }
792 
793 /*[clinic input]
794 _codecs.utf_32_be_encode
795     str: unicode
796     errors: str(accept={str, NoneType}) = None
797     /
798 [clinic start generated code]*/
799 
800 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)801 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
802                               const char *errors)
803 /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
804 {
805     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
806                        PyUnicode_GET_LENGTH(str));
807 }
808 
809 /*[clinic input]
810 _codecs.unicode_escape_encode
811     str: unicode
812     errors: str(accept={str, NoneType}) = None
813     /
814 [clinic start generated code]*/
815 
816 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)817 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
818                                    const char *errors)
819 /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
820 {
821     return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
822                        PyUnicode_GET_LENGTH(str));
823 }
824 
825 /*[clinic input]
826 _codecs.raw_unicode_escape_encode
827     str: unicode
828     errors: str(accept={str, NoneType}) = None
829     /
830 [clinic start generated code]*/
831 
832 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)833 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
834                                        const char *errors)
835 /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
836 {
837     return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
838                        PyUnicode_GET_LENGTH(str));
839 }
840 
841 /*[clinic input]
842 _codecs.latin_1_encode
843     str: unicode
844     errors: str(accept={str, NoneType}) = None
845     /
846 [clinic start generated code]*/
847 
848 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)849 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
850                             const char *errors)
851 /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
852 {
853     return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
854                        PyUnicode_GET_LENGTH(str));
855 }
856 
857 /*[clinic input]
858 _codecs.ascii_encode
859     str: unicode
860     errors: str(accept={str, NoneType}) = None
861     /
862 [clinic start generated code]*/
863 
864 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)865 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
866                           const char *errors)
867 /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
868 {
869     return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
870                        PyUnicode_GET_LENGTH(str));
871 }
872 
873 /*[clinic input]
874 _codecs.charmap_encode
875     str: unicode
876     errors: str(accept={str, NoneType}) = None
877     mapping: object = None
878     /
879 [clinic start generated code]*/
880 
881 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)882 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
883                             const char *errors, PyObject *mapping)
884 /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
885 {
886     if (mapping == Py_None)
887         mapping = NULL;
888 
889     return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
890                        PyUnicode_GET_LENGTH(str));
891 }
892 
893 /*[clinic input]
894 _codecs.charmap_build
895     map: unicode
896     /
897 [clinic start generated code]*/
898 
899 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)900 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
901 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
902 {
903     return PyUnicode_BuildEncodingMap(map);
904 }
905 
906 #ifdef MS_WINDOWS
907 
908 /*[clinic input]
909 _codecs.mbcs_encode
910     str: unicode
911     errors: str(accept={str, NoneType}) = None
912     /
913 [clinic start generated code]*/
914 
915 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)916 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
917 /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
918 {
919     return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
920                        PyUnicode_GET_LENGTH(str));
921 }
922 
923 /*[clinic input]
924 _codecs.oem_encode
925     str: unicode
926     errors: str(accept={str, NoneType}) = None
927     /
928 [clinic start generated code]*/
929 
930 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)931 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
932 /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
933 {
934     return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
935         PyUnicode_GET_LENGTH(str));
936 }
937 
938 /*[clinic input]
939 _codecs.code_page_encode
940     code_page: int
941     str: unicode
942     errors: str(accept={str, NoneType}) = None
943     /
944 [clinic start generated code]*/
945 
946 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)947 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
948                               const char *errors)
949 /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
950 {
951     return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
952                        PyUnicode_GET_LENGTH(str));
953 }
954 
955 #endif /* MS_WINDOWS */
956 
957 /* --- Error handler registry --------------------------------------------- */
958 
959 /*[clinic input]
960 _codecs.register_error
961     errors: str
962     handler: object
963     /
964 
965 Register the specified error handler under the name errors.
966 
967 handler must be a callable object, that will be called with an exception
968 instance containing information about the location of the encoding/decoding
969 error and must return a (replacement, new position) tuple.
970 [clinic start generated code]*/
971 
972 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)973 _codecs_register_error_impl(PyObject *module, const char *errors,
974                             PyObject *handler)
975 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
976 {
977     if (PyCodec_RegisterError(errors, handler))
978         return NULL;
979     Py_RETURN_NONE;
980 }
981 
982 /*[clinic input]
983 _codecs.lookup_error
984     name: str
985     /
986 
987 lookup_error(errors) -> handler
988 
989 Return the error handler for the specified error handling name or raise a
990 LookupError, if no handler exists under this name.
991 [clinic start generated code]*/
992 
993 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)994 _codecs_lookup_error_impl(PyObject *module, const char *name)
995 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
996 {
997     return PyCodec_LookupError(name);
998 }
999 
1000 /* --- Module API --------------------------------------------------------- */
1001 
1002 static PyMethodDef _codecs_functions[] = {
1003     _CODECS_REGISTER_METHODDEF
1004     _CODECS_UNREGISTER_METHODDEF
1005     _CODECS_LOOKUP_METHODDEF
1006     _CODECS_ENCODE_METHODDEF
1007     _CODECS_DECODE_METHODDEF
1008     _CODECS_ESCAPE_ENCODE_METHODDEF
1009     _CODECS_ESCAPE_DECODE_METHODDEF
1010     _CODECS_UTF_8_ENCODE_METHODDEF
1011     _CODECS_UTF_8_DECODE_METHODDEF
1012     _CODECS_UTF_7_ENCODE_METHODDEF
1013     _CODECS_UTF_7_DECODE_METHODDEF
1014     _CODECS_UTF_16_ENCODE_METHODDEF
1015     _CODECS_UTF_16_LE_ENCODE_METHODDEF
1016     _CODECS_UTF_16_BE_ENCODE_METHODDEF
1017     _CODECS_UTF_16_DECODE_METHODDEF
1018     _CODECS_UTF_16_LE_DECODE_METHODDEF
1019     _CODECS_UTF_16_BE_DECODE_METHODDEF
1020     _CODECS_UTF_16_EX_DECODE_METHODDEF
1021     _CODECS_UTF_32_ENCODE_METHODDEF
1022     _CODECS_UTF_32_LE_ENCODE_METHODDEF
1023     _CODECS_UTF_32_BE_ENCODE_METHODDEF
1024     _CODECS_UTF_32_DECODE_METHODDEF
1025     _CODECS_UTF_32_LE_DECODE_METHODDEF
1026     _CODECS_UTF_32_BE_DECODE_METHODDEF
1027     _CODECS_UTF_32_EX_DECODE_METHODDEF
1028     _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1029     _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1030     _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1031     _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1032     _CODECS_LATIN_1_ENCODE_METHODDEF
1033     _CODECS_LATIN_1_DECODE_METHODDEF
1034     _CODECS_ASCII_ENCODE_METHODDEF
1035     _CODECS_ASCII_DECODE_METHODDEF
1036     _CODECS_CHARMAP_ENCODE_METHODDEF
1037     _CODECS_CHARMAP_DECODE_METHODDEF
1038     _CODECS_CHARMAP_BUILD_METHODDEF
1039     _CODECS_READBUFFER_ENCODE_METHODDEF
1040     _CODECS_MBCS_ENCODE_METHODDEF
1041     _CODECS_MBCS_DECODE_METHODDEF
1042     _CODECS_OEM_ENCODE_METHODDEF
1043     _CODECS_OEM_DECODE_METHODDEF
1044     _CODECS_CODE_PAGE_ENCODE_METHODDEF
1045     _CODECS_CODE_PAGE_DECODE_METHODDEF
1046     _CODECS_REGISTER_ERROR_METHODDEF
1047     _CODECS_LOOKUP_ERROR_METHODDEF
1048     {NULL, NULL}                /* sentinel */
1049 };
1050 
1051 static PyModuleDef_Slot _codecs_slots[] = {
1052     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1053     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1054     {0, NULL}
1055 };
1056 
1057 static struct PyModuleDef codecsmodule = {
1058         PyModuleDef_HEAD_INIT,
1059         "_codecs",
1060         NULL,
1061         0,
1062         _codecs_functions,
1063         _codecs_slots,
1064         NULL,
1065         NULL,
1066         NULL
1067 };
1068 
1069 PyMODINIT_FUNC
PyInit__codecs(void)1070 PyInit__codecs(void)
1071 {
1072     return PyModuleDef_Init(&codecsmodule);
1073 }
1074