• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_UNICODEOBJECT_H
2 #define Py_UNICODEOBJECT_H
3 
4 #include <stdarg.h>
5 
6 /*
7 
8 Unicode implementation based on original code by Fredrik Lundh,
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10 Unicode Integration Proposal. (See
11 http://www.egenix.com/files/python/unicode-proposal.txt).
12 
13 Copyright (c) Corporation for National Research Initiatives.
14 
15 
16  Original header:
17  --------------------------------------------------------------------
18 
19  * Yet another Unicode string type for Python.  This type supports the
20  * 16-bit Basic Multilingual Plane (BMP) only.
21  *
22  * Written by Fredrik Lundh, January 1999.
23  *
24  * Copyright (c) 1999 by Secret Labs AB.
25  * Copyright (c) 1999 by Fredrik Lundh.
26  *
27  * fredrik@pythonware.com
28  * http://www.pythonware.com
29  *
30  * --------------------------------------------------------------------
31  * This Unicode String Type is
32  *
33  * Copyright (c) 1999 by Secret Labs AB
34  * Copyright (c) 1999 by Fredrik Lundh
35  *
36  * By obtaining, using, and/or copying this software and/or its
37  * associated documentation, you agree that you have read, understood,
38  * and will comply with the following terms and conditions:
39  *
40  * Permission to use, copy, modify, and distribute this software and its
41  * associated documentation for any purpose and without fee is hereby
42  * granted, provided that the above copyright notice appears in all
43  * copies, and that both that copyright notice and this permission notice
44  * appear in supporting documentation, and that the name of Secret Labs
45  * AB or the author not be used in advertising or publicity pertaining to
46  * distribution of the software without specific, written prior
47  * permission.
48  *
49  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56  * -------------------------------------------------------------------- */
57 
58 #include <ctype.h>
59 
60 /* === Internal API ======================================================= */
61 
62 /* --- Internal Unicode Format -------------------------------------------- */
63 
64 /* Python 3.x requires unicode */
65 #define Py_USING_UNICODE
66 
67 #ifndef SIZEOF_WCHAR_T
68 #error Must define SIZEOF_WCHAR_T
69 #endif
70 
71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
72 
73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
74    Otherwise, Unicode strings are stored as UCS-2 (with limited support
75    for UTF-16) */
76 
77 #if Py_UNICODE_SIZE >= 4
78 #define Py_UNICODE_WIDE
79 #endif
80 
81 /* Set these flags if the platform has "wchar.h" and the
82    wchar_t type is a 16-bit unsigned type */
83 /* #define HAVE_WCHAR_H */
84 /* #define HAVE_USABLE_WCHAR_T */
85 
86 /* Py_UNICODE was the native Unicode storage format (code unit) used by
87    Python and represents a single Unicode element in the Unicode type.
88    With PEP 393, Py_UNICODE is deprecated and replaced with a
89    typedef to wchar_t. */
90 
91 #ifndef Py_LIMITED_API
92 #define PY_UNICODE_TYPE wchar_t
93 typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
94 #endif
95 
96 /* If the compiler provides a wchar_t type we try to support it
97    through the interface functions PyUnicode_FromWideChar(),
98    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
99 
100 #ifdef HAVE_USABLE_WCHAR_T
101 # ifndef HAVE_WCHAR_H
102 #  define HAVE_WCHAR_H
103 # endif
104 #endif
105 
106 #ifdef HAVE_WCHAR_H
107 #  include <wchar.h>
108 #endif
109 
110 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
111    unicode representations. */
112 typedef uint32_t Py_UCS4;
113 typedef uint16_t Py_UCS2;
114 typedef uint8_t Py_UCS1;
115 
116 /* --- Internal Unicode Operations ---------------------------------------- */
117 
118 /* Since splitting on whitespace is an important use case, and
119    whitespace in most situations is solely ASCII whitespace, we
120    optimize for the common case by using a quick look-up table
121    _Py_ascii_whitespace (see below) with an inlined check.
122 
123  */
124 #ifndef Py_LIMITED_API
125 #define Py_UNICODE_ISSPACE(ch) \
126     ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
127 
128 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
129 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
130 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
131 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
132 
133 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
134 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
135 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
136 
137 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
138 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
139 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
140 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
141 
142 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
143 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
144 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
145 
146 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
147 
148 #define Py_UNICODE_ISALNUM(ch) \
149        (Py_UNICODE_ISALPHA(ch) || \
150     Py_UNICODE_ISDECIMAL(ch) || \
151     Py_UNICODE_ISDIGIT(ch) || \
152     Py_UNICODE_ISNUMERIC(ch))
153 
154 #define Py_UNICODE_COPY(target, source, length) \
155     memcpy((target), (source), (length)*sizeof(Py_UNICODE))
156 
157 #define Py_UNICODE_FILL(target, value, length) \
158     do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
159         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
160     } while (0)
161 
162 /* macros to work with surrogates */
163 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
164 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
165 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
166 /* Join two surrogate characters and return a single Py_UCS4 value. */
167 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
168     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
169       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
170 /* high surrogate = top 10 bits added to D800 */
171 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
172 /* low surrogate = bottom 10 bits added to DC00 */
173 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
174 
175 /* Check if substring matches at given offset.  The offset must be
176    valid, and the substring must not be empty. */
177 
178 #define Py_UNICODE_MATCH(string, offset, substring) \
179     ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
180      ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
181      !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
182 
183 #endif /* Py_LIMITED_API */
184 
185 #ifdef __cplusplus
186 extern "C" {
187 #endif
188 
189 /* --- Unicode Type ------------------------------------------------------- */
190 
191 #ifndef Py_LIMITED_API
192 
193 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
194    structure. state.ascii and state.compact are set, and the data
195    immediately follow the structure. utf8_length and wstr_length can be found
196    in the length field; the utf8 pointer is equal to the data pointer. */
197 typedef struct {
198     /* There are 4 forms of Unicode strings:
199 
200        - compact ascii:
201 
202          * structure = PyASCIIObject
203          * test: PyUnicode_IS_COMPACT_ASCII(op)
204          * kind = PyUnicode_1BYTE_KIND
205          * compact = 1
206          * ascii = 1
207          * ready = 1
208          * (length is the length of the utf8 and wstr strings)
209          * (data starts just after the structure)
210          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
211 
212        - compact:
213 
214          * structure = PyCompactUnicodeObject
215          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
216          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
217            PyUnicode_4BYTE_KIND
218          * compact = 1
219          * ready = 1
220          * ascii = 0
221          * utf8 is not shared with data
222          * utf8_length = 0 if utf8 is NULL
223          * wstr is shared with data and wstr_length=length
224            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
225            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
226          * wstr_length = 0 if wstr is NULL
227          * (data starts just after the structure)
228 
229        - legacy string, not ready:
230 
231          * structure = PyUnicodeObject
232          * test: kind == PyUnicode_WCHAR_KIND
233          * length = 0 (use wstr_length)
234          * hash = -1
235          * kind = PyUnicode_WCHAR_KIND
236          * compact = 0
237          * ascii = 0
238          * ready = 0
239          * interned = SSTATE_NOT_INTERNED
240          * wstr is not NULL
241          * data.any is NULL
242          * utf8 is NULL
243          * utf8_length = 0
244 
245        - legacy string, ready:
246 
247          * structure = PyUnicodeObject structure
248          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
249          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
250            PyUnicode_4BYTE_KIND
251          * compact = 0
252          * ready = 1
253          * data.any is not NULL
254          * utf8 is shared and utf8_length = length with data.any if ascii = 1
255          * utf8_length = 0 if utf8 is NULL
256          * wstr is shared with data.any and wstr_length = length
257            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
258            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
259          * wstr_length = 0 if wstr is NULL
260 
261        Compact strings use only one memory block (structure + characters),
262        whereas legacy strings use one block for the structure and one block
263        for characters.
264 
265        Legacy strings are created by PyUnicode_FromUnicode() and
266        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
267        when PyUnicode_READY() is called.
268 
269        See also _PyUnicode_CheckConsistency().
270     */
271     PyObject_HEAD
272     Py_ssize_t length;          /* Number of code points in the string */
273     Py_hash_t hash;             /* Hash value; -1 if not set */
274     struct {
275         /*
276            SSTATE_NOT_INTERNED (0)
277            SSTATE_INTERNED_MORTAL (1)
278            SSTATE_INTERNED_IMMORTAL (2)
279 
280            If interned != SSTATE_NOT_INTERNED, the two references from the
281            dictionary to this object are *not* counted in ob_refcnt.
282          */
283         unsigned int interned:2;
284         /* Character size:
285 
286            - PyUnicode_WCHAR_KIND (0):
287 
288              * character type = wchar_t (16 or 32 bits, depending on the
289                platform)
290 
291            - PyUnicode_1BYTE_KIND (1):
292 
293              * character type = Py_UCS1 (8 bits, unsigned)
294              * all characters are in the range U+0000-U+00FF (latin1)
295              * if ascii is set, all characters are in the range U+0000-U+007F
296                (ASCII), otherwise at least one character is in the range
297                U+0080-U+00FF
298 
299            - PyUnicode_2BYTE_KIND (2):
300 
301              * character type = Py_UCS2 (16 bits, unsigned)
302              * all characters are in the range U+0000-U+FFFF (BMP)
303              * at least one character is in the range U+0100-U+FFFF
304 
305            - PyUnicode_4BYTE_KIND (4):
306 
307              * character type = Py_UCS4 (32 bits, unsigned)
308              * all characters are in the range U+0000-U+10FFFF
309              * at least one character is in the range U+10000-U+10FFFF
310          */
311         unsigned int kind:3;
312         /* Compact is with respect to the allocation scheme. Compact unicode
313            objects only require one memory block while non-compact objects use
314            one block for the PyUnicodeObject struct and another for its data
315            buffer. */
316         unsigned int compact:1;
317         /* The string only contains characters in the range U+0000-U+007F (ASCII)
318            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
319            set, use the PyASCIIObject structure. */
320         unsigned int ascii:1;
321         /* The ready flag indicates whether the object layout is initialized
322            completely. This means that this is either a compact object, or
323            the data pointer is filled out. The bit is redundant, and helps
324            to minimize the test in PyUnicode_IS_READY(). */
325         unsigned int ready:1;
326         /* Padding to ensure that PyUnicode_DATA() is always aligned to
327            4 bytes (see issue #19537 on m68k). */
328         unsigned int :24;
329     } state;
330     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
331 } PyASCIIObject;
332 
333 /* Non-ASCII strings allocated through PyUnicode_New use the
334    PyCompactUnicodeObject structure. state.compact is set, and the data
335    immediately follow the structure. */
336 typedef struct {
337     PyASCIIObject _base;
338     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
339                                  * terminating \0. */
340     char *utf8;                 /* UTF-8 representation (null-terminated) */
341     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
342                                  * surrogates count as two code points. */
343 } PyCompactUnicodeObject;
344 
345 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
346    PyUnicodeObject structure. The actual string data is initially in the wstr
347    block, and copied into the data block using _PyUnicode_Ready. */
348 typedef struct {
349     PyCompactUnicodeObject _base;
350     union {
351         void *any;
352         Py_UCS1 *latin1;
353         Py_UCS2 *ucs2;
354         Py_UCS4 *ucs4;
355     } data;                     /* Canonical, smallest-form Unicode buffer */
356 } PyUnicodeObject;
357 #endif
358 
359 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
360 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
361 
362 #define PyUnicode_Check(op) \
363                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
364 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
365 
366 /* Fast access macros */
367 #ifndef Py_LIMITED_API
368 
369 #define PyUnicode_WSTR_LENGTH(op) \
370     (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
371      ((PyASCIIObject*)op)->length :                    \
372      ((PyCompactUnicodeObject*)op)->wstr_length)
373 
374 /* Returns the deprecated Py_UNICODE representation's size in code units
375    (this includes surrogate pairs as 2 units).
376    If the Py_UNICODE representation is not available, it will be computed
377    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
378 
379 #define PyUnicode_GET_SIZE(op)                       \
380     (assert(PyUnicode_Check(op)),                    \
381      (((PyASCIIObject *)(op))->wstr) ?               \
382       PyUnicode_WSTR_LENGTH(op) :                    \
383       ((void)PyUnicode_AsUnicode((PyObject *)(op)),  \
384        assert(((PyASCIIObject *)(op))->wstr),        \
385        PyUnicode_WSTR_LENGTH(op)))
386     /* Py_DEPRECATED(3.3) */
387 
388 #define PyUnicode_GET_DATA_SIZE(op) \
389     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
390     /* Py_DEPRECATED(3.3) */
391 
392 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
393    representation on demand.  Using this macro is very inefficient now,
394    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
395    use PyUnicode_WRITE() and PyUnicode_READ(). */
396 
397 #define PyUnicode_AS_UNICODE(op) \
398     (assert(PyUnicode_Check(op)), \
399      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
400       PyUnicode_AsUnicode((PyObject *)(op)))
401     /* Py_DEPRECATED(3.3) */
402 
403 #define PyUnicode_AS_DATA(op) \
404     ((const char *)(PyUnicode_AS_UNICODE(op)))
405     /* Py_DEPRECATED(3.3) */
406 
407 
408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
409 
410 /* Values for PyASCIIObject.state: */
411 
412 /* Interning state. */
413 #define SSTATE_NOT_INTERNED 0
414 #define SSTATE_INTERNED_MORTAL 1
415 #define SSTATE_INTERNED_IMMORTAL 2
416 
417 /* Return true if the string contains only ASCII characters, or 0 if not. The
418    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
419    ready. */
420 #define PyUnicode_IS_ASCII(op)                   \
421     (assert(PyUnicode_Check(op)),                \
422      assert(PyUnicode_IS_READY(op)),             \
423      ((PyASCIIObject*)op)->state.ascii)
424 
425 /* Return true if the string is compact or 0 if not.
426    No type checks or Ready calls are performed. */
427 #define PyUnicode_IS_COMPACT(op) \
428     (((PyASCIIObject*)(op))->state.compact)
429 
430 /* Return true if the string is a compact ASCII string (use PyASCIIObject
431    structure), or 0 if not.  No type checks or Ready calls are performed. */
432 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
433     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
434 
435 enum PyUnicode_Kind {
436 /* String contains only wstr byte characters.  This is only possible
437    when the string was created with a legacy API and _PyUnicode_Ready()
438    has not been called yet.  */
439     PyUnicode_WCHAR_KIND = 0,
440 /* Return values of the PyUnicode_KIND() macro: */
441     PyUnicode_1BYTE_KIND = 1,
442     PyUnicode_2BYTE_KIND = 2,
443     PyUnicode_4BYTE_KIND = 4
444 };
445 
446 /* Return pointers to the canonical representation cast to unsigned char,
447    Py_UCS2, or Py_UCS4 for direct character access.
448    No checks are performed, use PyUnicode_KIND() before to ensure
449    these will work correctly. */
450 
451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
454 
455 /* Return one of the PyUnicode_*_KIND values defined above. */
456 #define PyUnicode_KIND(op) \
457     (assert(PyUnicode_Check(op)), \
458      assert(PyUnicode_IS_READY(op)),            \
459      ((PyASCIIObject *)(op))->state.kind)
460 
461 /* Return a void pointer to the raw unicode buffer. */
462 #define _PyUnicode_COMPACT_DATA(op)                     \
463     (PyUnicode_IS_ASCII(op) ?                   \
464      ((void*)((PyASCIIObject*)(op) + 1)) :              \
465      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
466 
467 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
468     (assert(((PyUnicodeObject*)(op))->data.any),        \
469      ((((PyUnicodeObject *)(op))->data.any)))
470 
471 #define PyUnicode_DATA(op) \
472     (assert(PyUnicode_Check(op)), \
473      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
474      _PyUnicode_NONCOMPACT_DATA(op))
475 
476 /* In the access macros below, "kind" may be evaluated more than once.
477    All other macro parameters are evaluated exactly once, so it is safe
478    to put side effects into them (such as increasing the index). */
479 
480 /* Write into the canonical representation, this macro does not do any sanity
481    checks and is intended for usage in loops.  The caller should cache the
482    kind and data pointers obtained from other macro calls.
483    index is the index in the string (starts at 0) and value is the new
484    code point value which should be written to that location. */
485 #define PyUnicode_WRITE(kind, data, index, value) \
486     do { \
487         switch ((kind)) { \
488         case PyUnicode_1BYTE_KIND: { \
489             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
490             break; \
491         } \
492         case PyUnicode_2BYTE_KIND: { \
493             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
494             break; \
495         } \
496         default: { \
497             assert((kind) == PyUnicode_4BYTE_KIND); \
498             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
499         } \
500         } \
501     } while (0)
502 
503 /* Read a code point from the string's canonical representation.  No checks
504    or ready calls are performed. */
505 #define PyUnicode_READ(kind, data, index) \
506     ((Py_UCS4) \
507     ((kind) == PyUnicode_1BYTE_KIND ? \
508         ((const Py_UCS1 *)(data))[(index)] : \
509         ((kind) == PyUnicode_2BYTE_KIND ? \
510             ((const Py_UCS2 *)(data))[(index)] : \
511             ((const Py_UCS4 *)(data))[(index)] \
512         ) \
513     ))
514 
515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
516    calls PyUnicode_KIND() and might call it twice.  For single reads, use
517    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
518    cache kind and use PyUnicode_READ instead. */
519 #define PyUnicode_READ_CHAR(unicode, index) \
520     (assert(PyUnicode_Check(unicode)),          \
521      assert(PyUnicode_IS_READY(unicode)),       \
522      (Py_UCS4)                                  \
523         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
524             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
525             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
526                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
527                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
528             ) \
529         ))
530 
531 /* Returns the length of the unicode string. The caller has to make sure that
532    the string has it's canonical representation set before calling
533    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
534 #define PyUnicode_GET_LENGTH(op)                \
535     (assert(PyUnicode_Check(op)),               \
536      assert(PyUnicode_IS_READY(op)),            \
537      ((PyASCIIObject *)(op))->length)
538 
539 
540 /* Fast check to determine whether an object is ready. Equivalent to
541    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
542 
543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
544 
545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
546    case.  If the canonical representation is not yet set, it will still call
547    _PyUnicode_Ready().
548    Returns 0 on success and -1 on errors. */
549 #define PyUnicode_READY(op)                        \
550     (assert(PyUnicode_Check(op)),                       \
551      (PyUnicode_IS_READY(op) ?                          \
552       0 : _PyUnicode_Ready((PyObject *)(op))))
553 
554 /* Return a maximum character value which is suitable for creating another
555    string based on op.  This is always an approximation but more efficient
556    than iterating over the string. */
557 #define PyUnicode_MAX_CHAR_VALUE(op) \
558     (assert(PyUnicode_IS_READY(op)),                                    \
559      (PyUnicode_IS_ASCII(op) ?                                          \
560       (0x7f) :                                                          \
561       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
562        (0xffU) :                                                        \
563        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
564         (0xffffU) :                                                     \
565         (0x10ffffU)))))
566 
567 #endif
568 
569 /* --- Constants ---------------------------------------------------------- */
570 
571 /* This Unicode character will be used as replacement character during
572    decoding if the errors argument is set to "replace". Note: the
573    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
574    Unicode 3.0. */
575 
576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
577 
578 /* === Public API ========================================================= */
579 
580 /* --- Plain Py_UNICODE --------------------------------------------------- */
581 
582 /* With PEP 393, this is the recommended way to allocate a new unicode object.
583    This function will allocate the object and its buffer in a single memory
584    block.  Objects created using this function are not resizable. */
585 #ifndef Py_LIMITED_API
586 PyAPI_FUNC(PyObject*) PyUnicode_New(
587     Py_ssize_t size,            /* Number of code points in the new string */
588     Py_UCS4 maxchar             /* maximum code point value in the string */
589     );
590 #endif
591 
592 /* Initializes the canonical string representation from the deprecated
593    wstr/Py_UNICODE representation. This function is used to convert Unicode
594    objects which were created using the old API to the new flexible format
595    introduced with PEP 393.
596 
597    Don't call this function directly, use the public PyUnicode_READY() macro
598    instead. */
599 #ifndef Py_LIMITED_API
600 PyAPI_FUNC(int) _PyUnicode_Ready(
601     PyObject *unicode           /* Unicode object */
602     );
603 #endif
604 
605 /* Get a copy of a Unicode string. */
606 #ifndef Py_LIMITED_API
607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
608     PyObject *unicode
609     );
610 #endif
611 
612 /* Copy character from one unicode object into another, this function performs
613    character conversion when necessary and falls back to memcpy() if possible.
614 
615    Fail if to is too small (smaller than *how_many* or smaller than
616    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
617    kind(to), or if *to* has more than 1 reference.
618 
619    Return the number of written character, or return -1 and raise an exception
620    on error.
621 
622    Pseudo-code:
623 
624        how_many = min(how_many, len(from) - from_start)
625        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
626        return how_many
627 
628    Note: The function doesn't write a terminating null character.
629    */
630 #ifndef Py_LIMITED_API
631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
632     PyObject *to,
633     Py_ssize_t to_start,
634     PyObject *from,
635     Py_ssize_t from_start,
636     Py_ssize_t how_many
637     );
638 
639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
640    may crash if parameters are invalid (e.g. if the output string
641    is too short). */
642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
643     PyObject *to,
644     Py_ssize_t to_start,
645     PyObject *from,
646     Py_ssize_t from_start,
647     Py_ssize_t how_many
648     );
649 #endif
650 
651 #ifndef Py_LIMITED_API
652 /* Fill a string with a character: write fill_char into
653    unicode[start:start+length].
654 
655    Fail if fill_char is bigger than the string maximum character, or if the
656    string has more than 1 reference.
657 
658    Return the number of written character, or return -1 and raise an exception
659    on error. */
660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
661     PyObject *unicode,
662     Py_ssize_t start,
663     Py_ssize_t length,
664     Py_UCS4 fill_char
665     );
666 
667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
668    if parameters are invalid (e.g. if length is longer than the string). */
669 PyAPI_FUNC(void) _PyUnicode_FastFill(
670     PyObject *unicode,
671     Py_ssize_t start,
672     Py_ssize_t length,
673     Py_UCS4 fill_char
674     );
675 #endif
676 
677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
678    size.
679 
680    u may be NULL which causes the contents to be undefined. It is the
681    user's responsibility to fill in the needed data afterwards. Note
682    that modifying the Unicode object contents after construction is
683    only allowed if u was set to NULL.
684 
685    The buffer is copied into the new object. */
686 
687 #ifndef Py_LIMITED_API
688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
689     const Py_UNICODE *u,        /* Unicode buffer */
690     Py_ssize_t size             /* size of buffer */
691     ) /* Py_DEPRECATED(3.3) */;
692 #endif
693 
694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
696     const char *u,             /* UTF-8 encoded string */
697     Py_ssize_t size            /* size of buffer */
698     );
699 
700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
701    UTF-8 encoded bytes.  The size is determined with strlen(). */
702 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
703     const char *u              /* UTF-8 encoded string */
704     );
705 
706 #ifndef Py_LIMITED_API
707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
708    Scan the string to find the maximum character. */
709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
710     int kind,
711     const void *buffer,
712     Py_ssize_t size);
713 
714 /* Create a new string from a buffer of ASCII characters.
715    WARNING: Don't check if the string contains any non-ASCII character. */
716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
717     const char *buffer,
718     Py_ssize_t size);
719 #endif
720 
721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
722 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
723     PyObject *str,
724     Py_ssize_t start,
725     Py_ssize_t end);
726 #endif
727 
728 #ifndef Py_LIMITED_API
729 /* Compute the maximum character of the substring unicode[start:end].
730    Return 127 for an empty string. */
731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
732     PyObject *unicode,
733     Py_ssize_t start,
734     Py_ssize_t end);
735 #endif
736 
737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
738 /* Copy the string into a UCS4 buffer including the null character if copy_null
739    is set. Return NULL and raise an exception on error. Raise a SystemError if
740    the buffer is smaller than the string. Return buffer on success.
741 
742    buflen is the length of the buffer in (Py_UCS4) characters. */
743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
744     PyObject *unicode,
745     Py_UCS4* buffer,
746     Py_ssize_t buflen,
747     int copy_null);
748 
749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
750  * PyMem_Malloc; if this fails, NULL is returned with a memory error
751    exception set. */
752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
753 #endif
754 
755 #ifndef Py_LIMITED_API
756 /* Return a read-only pointer to the Unicode object's internal
757    Py_UNICODE buffer.
758    If the wchar_t/Py_UNICODE representation is not yet available, this
759    function will calculate it. */
760 
761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
762     PyObject *unicode           /* Unicode object */
763     ) /* Py_DEPRECATED(3.3) */;
764 
765 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
766    contains null characters. */
767 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
768     PyObject *unicode           /* Unicode object */
769     );
770 
771 /* Return a read-only pointer to the Unicode object's internal
772    Py_UNICODE buffer and save the length at size.
773    If the wchar_t/Py_UNICODE representation is not yet available, this
774    function will calculate it. */
775 
776 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
777     PyObject *unicode,          /* Unicode object */
778     Py_ssize_t *size            /* location where to save the length */
779     )  /* Py_DEPRECATED(3.3) */;
780 #endif
781 
782 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
783 /* Get the length of the Unicode object. */
784 
785 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
786     PyObject *unicode
787 );
788 #endif
789 
790 /* Get the number of Py_UNICODE units in the
791    string representation. */
792 
793 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
794     PyObject *unicode           /* Unicode object */
795     ) Py_DEPRECATED(3.3);
796 
797 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
798 /* Read a character from the string. */
799 
800 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
801     PyObject *unicode,
802     Py_ssize_t index
803     );
804 
805 /* Write a character to the string. The string must have been created through
806    PyUnicode_New, must not be shared, and must not have been hashed yet.
807 
808    Return 0 on success, -1 on error. */
809 
810 PyAPI_FUNC(int) PyUnicode_WriteChar(
811     PyObject *unicode,
812     Py_ssize_t index,
813     Py_UCS4 character
814     );
815 #endif
816 
817 #ifndef Py_LIMITED_API
818 /* Get the maximum ordinal for a Unicode character. */
819 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3);
820 #endif
821 
822 /* Resize a Unicode object. The length is the number of characters, except
823    if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
824    is the number of Py_UNICODE characters.
825 
826    *unicode is modified to point to the new (resized) object and 0
827    returned on success.
828 
829    Try to resize the string in place (which is usually faster than allocating
830    a new string and copy characters), or create a new string.
831 
832    Error handling is implemented as follows: an exception is set, -1
833    is returned and *unicode left untouched.
834 
835    WARNING: The function doesn't check string content, the result may not be a
836             string in canonical representation. */
837 
838 PyAPI_FUNC(int) PyUnicode_Resize(
839     PyObject **unicode,         /* Pointer to the Unicode object */
840     Py_ssize_t length           /* New length */
841     );
842 
843 /* Decode obj to a Unicode object.
844 
845    bytes, bytearray and other bytes-like objects are decoded according to the
846    given encoding and error handler. The encoding and error handler can be
847    NULL to have the interface use UTF-8 and "strict".
848 
849    All other objects (including Unicode objects) raise an exception.
850 
851    The API returns NULL in case of an error. The caller is responsible
852    for decref'ing the returned objects.
853 
854 */
855 
856 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
857     PyObject *obj,              /* Object */
858     const char *encoding,       /* encoding */
859     const char *errors          /* error handling */
860     );
861 
862 /* Copy an instance of a Unicode subtype to a new true Unicode object if
863    necessary. If obj is already a true Unicode object (not a subtype), return
864    the reference with *incremented* refcount.
865 
866    The API returns NULL in case of an error. The caller is responsible
867    for decref'ing the returned objects.
868 
869 */
870 
871 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
872     PyObject *obj      /* Object */
873     );
874 
875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
876     const char *format,   /* ASCII-encoded string  */
877     va_list vargs
878     );
879 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
880     const char *format,   /* ASCII-encoded string  */
881     ...
882     );
883 
884 #ifndef Py_LIMITED_API
885 typedef struct {
886     PyObject *buffer;
887     void *data;
888     enum PyUnicode_Kind kind;
889     Py_UCS4 maxchar;
890     Py_ssize_t size;
891     Py_ssize_t pos;
892 
893     /* minimum number of allocated characters (default: 0) */
894     Py_ssize_t min_length;
895 
896     /* minimum character (default: 127, ASCII) */
897     Py_UCS4 min_char;
898 
899     /* If non-zero, overallocate the buffer (default: 0). */
900     unsigned char overallocate;
901 
902     /* If readonly is 1, buffer is a shared string (cannot be modified)
903        and size is set to 0. */
904     unsigned char readonly;
905 } _PyUnicodeWriter ;
906 
907 /* Initialize a Unicode writer.
908  *
909  * By default, the minimum buffer size is 0 character and overallocation is
910  * disabled. Set min_length, min_char and overallocate attributes to control
911  * the allocation of the buffer. */
912 PyAPI_FUNC(void)
913 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
914 
915 /* Prepare the buffer to write 'length' characters
916    with the specified maximum character.
917 
918    Return 0 on success, raise an exception and return -1 on error. */
919 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
920     (((MAXCHAR) <= (WRITER)->maxchar                                  \
921       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
922      ? 0                                                              \
923      : (((LENGTH) == 0)                                               \
924         ? 0                                                           \
925         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
926 
927 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
928    instead. */
929 PyAPI_FUNC(int)
930 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
931                                  Py_ssize_t length, Py_UCS4 maxchar);
932 
933 /* Prepare the buffer to have at least the kind KIND.
934    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
935    support characters in range U+000-U+FFFF.
936 
937    Return 0 on success, raise an exception and return -1 on error. */
938 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
939     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
940      (KIND) <= (WRITER)->kind                                         \
941      ? 0                                                              \
942      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
943 
944 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
945    macro instead. */
946 PyAPI_FUNC(int)
947 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
948                                      enum PyUnicode_Kind kind);
949 
950 /* Append a Unicode character.
951    Return 0 on success, raise an exception and return -1 on error. */
952 PyAPI_FUNC(int)
953 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
954     Py_UCS4 ch
955     );
956 
957 /* Append a Unicode string.
958    Return 0 on success, raise an exception and return -1 on error. */
959 PyAPI_FUNC(int)
960 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
961     PyObject *str               /* Unicode string */
962     );
963 
964 /* Append a substring of a Unicode string.
965    Return 0 on success, raise an exception and return -1 on error. */
966 PyAPI_FUNC(int)
967 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
968     PyObject *str,              /* Unicode string */
969     Py_ssize_t start,
970     Py_ssize_t end
971     );
972 
973 /* Append an ASCII-encoded byte string.
974    Return 0 on success, raise an exception and return -1 on error. */
975 PyAPI_FUNC(int)
976 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
977     const char *str,           /* ASCII-encoded byte string */
978     Py_ssize_t len             /* number of bytes, or -1 if unknown */
979     );
980 
981 /* Append a latin1-encoded byte string.
982    Return 0 on success, raise an exception and return -1 on error. */
983 PyAPI_FUNC(int)
984 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
985     const char *str,           /* latin1-encoded byte string */
986     Py_ssize_t len             /* length in bytes */
987     );
988 
989 /* Get the value of the writer as a Unicode string. Clear the
990    buffer of the writer. Raise an exception and return NULL
991    on error. */
992 PyAPI_FUNC(PyObject *)
993 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
994 
995 /* Deallocate memory of a writer (clear its internal buffer). */
996 PyAPI_FUNC(void)
997 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
998 #endif
999 
1000 #ifndef Py_LIMITED_API
1001 /* Format the object based on the format_spec, as defined in PEP 3101
1002    (Advanced String Formatting). */
1003 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
1004     _PyUnicodeWriter *writer,
1005     PyObject *obj,
1006     PyObject *format_spec,
1007     Py_ssize_t start,
1008     Py_ssize_t end);
1009 #endif
1010 
1011 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
1012 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
1013 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
1014     const char *u              /* UTF-8 encoded string */
1015     );
1016 #ifndef Py_LIMITED_API
1017 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
1018 #endif
1019 
1020 /* Use only if you know it's a string */
1021 #define PyUnicode_CHECK_INTERNED(op) \
1022     (((PyASCIIObject *)(op))->state.interned)
1023 
1024 /* --- wchar_t support for platforms which support it --------------------- */
1025 
1026 #ifdef HAVE_WCHAR_H
1027 
1028 /* Create a Unicode Object from the wchar_t buffer w of the given
1029    size.
1030 
1031    The buffer is copied into the new object. */
1032 
1033 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
1034     const wchar_t *w,           /* wchar_t buffer */
1035     Py_ssize_t size             /* size of buffer */
1036     );
1037 
1038 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
1039    most size wchar_t characters are copied.
1040 
1041    Note that the resulting wchar_t string may or may not be
1042    0-terminated.  It is the responsibility of the caller to make sure
1043    that the wchar_t string is 0-terminated in case this is required by
1044    the application.
1045 
1046    Returns the number of wchar_t characters copied (excluding a
1047    possibly trailing 0-termination character) or -1 in case of an
1048    error. */
1049 
1050 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
1051     PyObject *unicode,          /* Unicode object */
1052     wchar_t *w,                 /* wchar_t buffer */
1053     Py_ssize_t size             /* size of buffer */
1054     );
1055 
1056 /* Convert the Unicode object to a wide character string. The output string
1057    always ends with a nul character. If size is not NULL, write the number of
1058    wide characters (excluding the null character) into *size.
1059 
1060    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
1061    on success. On error, returns NULL, *size is undefined and raises a
1062    MemoryError. */
1063 
1064 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
1065     PyObject *unicode,          /* Unicode object */
1066     Py_ssize_t *size            /* number of characters of the result */
1067     );
1068 
1069 #ifndef Py_LIMITED_API
1070 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
1071 #endif
1072 
1073 #endif
1074 
1075 /* --- Unicode ordinals --------------------------------------------------- */
1076 
1077 /* Create a Unicode Object from the given Unicode code point ordinal.
1078 
1079    The ordinal must be in range(0x110000). A ValueError is
1080    raised in case it is not.
1081 
1082 */
1083 
1084 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
1085 
1086 /* --- Free-list management ----------------------------------------------- */
1087 
1088 /* Clear the free list used by the Unicode implementation.
1089 
1090    This can be used to release memory used for objects on the free
1091    list back to the Python memory allocator.
1092 
1093 */
1094 
1095 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
1096 
1097 /* === Builtin Codecs =====================================================
1098 
1099    Many of these APIs take two arguments encoding and errors. These
1100    parameters encoding and errors have the same semantics as the ones
1101    of the builtin str() API.
1102 
1103    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
1104 
1105    Error handling is set by errors which may also be set to NULL
1106    meaning to use the default handling defined for the codec. Default
1107    error handling for all builtin codecs is "strict" (ValueErrors are
1108    raised).
1109 
1110    The codecs all use a similar interface. Only deviation from the
1111    generic ones are documented.
1112 
1113 */
1114 
1115 /* --- Manage the default encoding ---------------------------------------- */
1116 
1117 /* Returns a pointer to the default encoding (UTF-8) of the
1118    Unicode object unicode and the size of the encoded representation
1119    in bytes stored in *size.
1120 
1121    In case of an error, no *size is set.
1122 
1123    This function caches the UTF-8 encoded string in the unicodeobject
1124    and subsequent calls will return the same string.  The memory is released
1125    when the unicodeobject is deallocated.
1126 
1127    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
1128    support the previous internal function with the same behaviour.
1129 
1130    *** This API is for interpreter INTERNAL USE ONLY and will likely
1131    *** be removed or changed in the future.
1132 
1133    *** If you need to access the Unicode object as UTF-8 bytes string,
1134    *** please use PyUnicode_AsUTF8String() instead.
1135 */
1136 
1137 #ifndef Py_LIMITED_API
1138 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
1139     PyObject *unicode,
1140     Py_ssize_t *size);
1141 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
1142 #endif
1143 
1144 /* Returns a pointer to the default encoding (UTF-8) of the
1145    Unicode object unicode.
1146 
1147    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
1148    in the unicodeobject.
1149 
1150    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
1151    support the previous internal function with the same behaviour.
1152 
1153    Use of this API is DEPRECATED since no size information can be
1154    extracted from the returned data.
1155 
1156    *** This API is for interpreter INTERNAL USE ONLY and will likely
1157    *** be removed or changed for Python 3.1.
1158 
1159    *** If you need to access the Unicode object as UTF-8 bytes string,
1160    *** please use PyUnicode_AsUTF8String() instead.
1161 
1162 */
1163 
1164 #ifndef Py_LIMITED_API
1165 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
1166 #define _PyUnicode_AsString PyUnicode_AsUTF8
1167 #endif
1168 
1169 /* Returns "utf-8".  */
1170 
1171 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
1172 
1173 /* --- Generic Codecs ----------------------------------------------------- */
1174 
1175 /* Create a Unicode object by decoding the encoded string s of the
1176    given size. */
1177 
1178 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
1179     const char *s,              /* encoded string */
1180     Py_ssize_t size,            /* size of buffer */
1181     const char *encoding,       /* encoding */
1182     const char *errors          /* error handling */
1183     );
1184 
1185 /* Decode a Unicode object unicode and return the result as Python
1186    object.
1187 
1188    This API is DEPRECATED. The only supported standard encoding is rot13.
1189    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
1190    that decode from str. */
1191 
1192 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
1193     PyObject *unicode,          /* Unicode object */
1194     const char *encoding,       /* encoding */
1195     const char *errors          /* error handling */
1196     ) Py_DEPRECATED(3.6);
1197 
1198 /* Decode a Unicode object unicode and return the result as Unicode
1199    object.
1200 
1201    This API is DEPRECATED. The only supported standard encoding is rot13.
1202    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
1203    that decode from str to str. */
1204 
1205 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
1206     PyObject *unicode,          /* Unicode object */
1207     const char *encoding,       /* encoding */
1208     const char *errors          /* error handling */
1209     ) Py_DEPRECATED(3.6);
1210 
1211 /* Encodes a Py_UNICODE buffer of the given size and returns a
1212    Python string object. */
1213 
1214 #ifndef Py_LIMITED_API
1215 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
1216     const Py_UNICODE *s,        /* Unicode char buffer */
1217     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
1218     const char *encoding,       /* encoding */
1219     const char *errors          /* error handling */
1220     ) Py_DEPRECATED(3.3);
1221 #endif
1222 
1223 /* Encodes a Unicode object and returns the result as Python
1224    object.
1225 
1226    This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString()
1227    since all standard encodings (except rot13) encode str to bytes.
1228    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
1229    that encode form str to non-bytes. */
1230 
1231 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
1232     PyObject *unicode,          /* Unicode object */
1233     const char *encoding,       /* encoding */
1234     const char *errors          /* error handling */
1235     ) Py_DEPRECATED(3.6);
1236 
1237 /* Encodes a Unicode object and returns the result as Python string
1238    object. */
1239 
1240 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
1241     PyObject *unicode,          /* Unicode object */
1242     const char *encoding,       /* encoding */
1243     const char *errors          /* error handling */
1244     );
1245 
1246 /* Encodes a Unicode object and returns the result as Unicode
1247    object.
1248 
1249    This API is DEPRECATED.  The only supported standard encodings is rot13.
1250    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
1251    that encode from str to str. */
1252 
1253 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
1254     PyObject *unicode,          /* Unicode object */
1255     const char *encoding,       /* encoding */
1256     const char *errors          /* error handling */
1257     ) Py_DEPRECATED(3.6);
1258 
1259 /* Build an encoding map. */
1260 
1261 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
1262     PyObject* string            /* 256 character map */
1263    );
1264 
1265 /* --- UTF-7 Codecs ------------------------------------------------------- */
1266 
1267 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
1268     const char *string,         /* UTF-7 encoded string */
1269     Py_ssize_t length,          /* size of string */
1270     const char *errors          /* error handling */
1271     );
1272 
1273 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
1274     const char *string,         /* UTF-7 encoded string */
1275     Py_ssize_t length,          /* size of string */
1276     const char *errors,         /* error handling */
1277     Py_ssize_t *consumed        /* bytes consumed */
1278     );
1279 
1280 #ifndef Py_LIMITED_API
1281 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
1282     const Py_UNICODE *data,     /* Unicode char buffer */
1283     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1284     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
1285     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
1286     const char *errors          /* error handling */
1287     ) Py_DEPRECATED(3.3);
1288 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
1289     PyObject *unicode,          /* Unicode object */
1290     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
1291     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
1292     const char *errors          /* error handling */
1293     );
1294 #endif
1295 
1296 /* --- UTF-8 Codecs ------------------------------------------------------- */
1297 
1298 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1299     const char *string,         /* UTF-8 encoded string */
1300     Py_ssize_t length,          /* size of string */
1301     const char *errors          /* error handling */
1302     );
1303 
1304 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1305     const char *string,         /* UTF-8 encoded string */
1306     Py_ssize_t length,          /* size of string */
1307     const char *errors,         /* error handling */
1308     Py_ssize_t *consumed        /* bytes consumed */
1309     );
1310 
1311 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1312     PyObject *unicode           /* Unicode object */
1313     );
1314 
1315 #ifndef Py_LIMITED_API
1316 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1317     PyObject *unicode,
1318     const char *errors);
1319 
1320 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1321     const Py_UNICODE *data,     /* Unicode char buffer */
1322     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1323     const char *errors          /* error handling */
1324     ) Py_DEPRECATED(3.3);
1325 #endif
1326 
1327 /* --- UTF-32 Codecs ------------------------------------------------------ */
1328 
1329 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
1330    the corresponding Unicode object.
1331 
1332    errors (if non-NULL) defines the error handling. It defaults
1333    to "strict".
1334 
1335    If byteorder is non-NULL, the decoder starts decoding using the
1336    given byte order:
1337 
1338     *byteorder == -1: little endian
1339     *byteorder == 0:  native order
1340     *byteorder == 1:  big endian
1341 
1342    In native mode, the first four bytes of the stream are checked for a
1343    BOM mark. If found, the BOM mark is analysed, the byte order
1344    adjusted and the BOM skipped.  In the other modes, no BOM mark
1345    interpretation is done. After completion, *byteorder is set to the
1346    current byte order at the end of input data.
1347 
1348    If byteorder is NULL, the codec starts in native order mode.
1349 
1350 */
1351 
1352 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1353     const char *string,         /* UTF-32 encoded string */
1354     Py_ssize_t length,          /* size of string */
1355     const char *errors,         /* error handling */
1356     int *byteorder              /* pointer to byteorder to use
1357                                    0=native;-1=LE,1=BE; updated on
1358                                    exit */
1359     );
1360 
1361 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1362     const char *string,         /* UTF-32 encoded string */
1363     Py_ssize_t length,          /* size of string */
1364     const char *errors,         /* error handling */
1365     int *byteorder,             /* pointer to byteorder to use
1366                                    0=native;-1=LE,1=BE; updated on
1367                                    exit */
1368     Py_ssize_t *consumed        /* bytes consumed */
1369     );
1370 
1371 /* Returns a Python string using the UTF-32 encoding in native byte
1372    order. The string always starts with a BOM mark.  */
1373 
1374 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1375     PyObject *unicode           /* Unicode object */
1376     );
1377 
1378 /* Returns a Python string object holding the UTF-32 encoded value of
1379    the Unicode data.
1380 
1381    If byteorder is not 0, output is written according to the following
1382    byte order:
1383 
1384    byteorder == -1: little endian
1385    byteorder == 0:  native byte order (writes a BOM mark)
1386    byteorder == 1:  big endian
1387 
1388    If byteorder is 0, the output string will always start with the
1389    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1390    prepended.
1391 
1392 */
1393 
1394 #ifndef Py_LIMITED_API
1395 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1396     const Py_UNICODE *data,     /* Unicode char buffer */
1397     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1398     const char *errors,         /* error handling */
1399     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1400     ) Py_DEPRECATED(3.3);
1401 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
1402     PyObject *object,           /* Unicode object */
1403     const char *errors,         /* error handling */
1404     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1405     );
1406 #endif
1407 
1408 /* --- UTF-16 Codecs ------------------------------------------------------ */
1409 
1410 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
1411    the corresponding Unicode object.
1412 
1413    errors (if non-NULL) defines the error handling. It defaults
1414    to "strict".
1415 
1416    If byteorder is non-NULL, the decoder starts decoding using the
1417    given byte order:
1418 
1419     *byteorder == -1: little endian
1420     *byteorder == 0:  native order
1421     *byteorder == 1:  big endian
1422 
1423    In native mode, the first two bytes of the stream are checked for a
1424    BOM mark. If found, the BOM mark is analysed, the byte order
1425    adjusted and the BOM skipped.  In the other modes, no BOM mark
1426    interpretation is done. After completion, *byteorder is set to the
1427    current byte order at the end of input data.
1428 
1429    If byteorder is NULL, the codec starts in native order mode.
1430 
1431 */
1432 
1433 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1434     const char *string,         /* UTF-16 encoded string */
1435     Py_ssize_t length,          /* size of string */
1436     const char *errors,         /* error handling */
1437     int *byteorder              /* pointer to byteorder to use
1438                                    0=native;-1=LE,1=BE; updated on
1439                                    exit */
1440     );
1441 
1442 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1443     const char *string,         /* UTF-16 encoded string */
1444     Py_ssize_t length,          /* size of string */
1445     const char *errors,         /* error handling */
1446     int *byteorder,             /* pointer to byteorder to use
1447                                    0=native;-1=LE,1=BE; updated on
1448                                    exit */
1449     Py_ssize_t *consumed        /* bytes consumed */
1450     );
1451 
1452 /* Returns a Python string using the UTF-16 encoding in native byte
1453    order. The string always starts with a BOM mark.  */
1454 
1455 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1456     PyObject *unicode           /* Unicode object */
1457     );
1458 
1459 /* Returns a Python string object holding the UTF-16 encoded value of
1460    the Unicode data.
1461 
1462    If byteorder is not 0, output is written according to the following
1463    byte order:
1464 
1465    byteorder == -1: little endian
1466    byteorder == 0:  native byte order (writes a BOM mark)
1467    byteorder == 1:  big endian
1468 
1469    If byteorder is 0, the output string will always start with the
1470    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1471    prepended.
1472 
1473    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1474    UCS-2. This trick makes it possible to add full UTF-16 capabilities
1475    at a later point without compromising the APIs.
1476 
1477 */
1478 
1479 #ifndef Py_LIMITED_API
1480 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1481     const Py_UNICODE *data,     /* Unicode char buffer */
1482     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1483     const char *errors,         /* error handling */
1484     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1485     ) Py_DEPRECATED(3.3);
1486 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
1487     PyObject* unicode,          /* Unicode object */
1488     const char *errors,         /* error handling */
1489     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1490     );
1491 #endif
1492 
1493 /* --- Unicode-Escape Codecs ---------------------------------------------- */
1494 
1495 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1496     const char *string,         /* Unicode-Escape encoded string */
1497     Py_ssize_t length,          /* size of string */
1498     const char *errors          /* error handling */
1499     );
1500 
1501 #ifndef Py_LIMITED_API
1502 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
1503    chars. */
1504 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
1505         const char *string,     /* Unicode-Escape encoded string */
1506         Py_ssize_t length,      /* size of string */
1507         const char *errors,     /* error handling */
1508         const char **first_invalid_escape  /* on return, points to first
1509                                               invalid escaped char in
1510                                               string. */
1511 );
1512 #endif
1513 
1514 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1515     PyObject *unicode           /* Unicode object */
1516     );
1517 
1518 #ifndef Py_LIMITED_API
1519 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1520     const Py_UNICODE *data,     /* Unicode char buffer */
1521     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1522     ) Py_DEPRECATED(3.3);
1523 #endif
1524 
1525 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1526 
1527 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1528     const char *string,         /* Raw-Unicode-Escape encoded string */
1529     Py_ssize_t length,          /* size of string */
1530     const char *errors          /* error handling */
1531     );
1532 
1533 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1534     PyObject *unicode           /* Unicode object */
1535     );
1536 
1537 #ifndef Py_LIMITED_API
1538 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1539     const Py_UNICODE *data,     /* Unicode char buffer */
1540     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1541     ) Py_DEPRECATED(3.3);
1542 #endif
1543 
1544 /* --- Unicode Internal Codec ---------------------------------------------
1545 
1546     Only for internal use in _codecsmodule.c */
1547 
1548 #ifndef Py_LIMITED_API
1549 PyObject *_PyUnicode_DecodeUnicodeInternal(
1550     const char *string,
1551     Py_ssize_t length,
1552     const char *errors
1553     );
1554 #endif
1555 
1556 /* --- Latin-1 Codecs -----------------------------------------------------
1557 
1558    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1559 
1560 */
1561 
1562 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1563     const char *string,         /* Latin-1 encoded string */
1564     Py_ssize_t length,          /* size of string */
1565     const char *errors          /* error handling */
1566     );
1567 
1568 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1569     PyObject *unicode           /* Unicode object */
1570     );
1571 
1572 #ifndef Py_LIMITED_API
1573 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1574     PyObject* unicode,
1575     const char* errors);
1576 
1577 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1578     const Py_UNICODE *data,     /* Unicode char buffer */
1579     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1580     const char *errors          /* error handling */
1581     ) Py_DEPRECATED(3.3);
1582 #endif
1583 
1584 /* --- ASCII Codecs -------------------------------------------------------
1585 
1586    Only 7-bit ASCII data is excepted. All other codes generate errors.
1587 
1588 */
1589 
1590 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1591     const char *string,         /* ASCII encoded string */
1592     Py_ssize_t length,          /* size of string */
1593     const char *errors          /* error handling */
1594     );
1595 
1596 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1597     PyObject *unicode           /* Unicode object */
1598     );
1599 
1600 #ifndef Py_LIMITED_API
1601 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1602     PyObject* unicode,
1603     const char* errors);
1604 
1605 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1606     const Py_UNICODE *data,     /* Unicode char buffer */
1607     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1608     const char *errors          /* error handling */
1609     ) Py_DEPRECATED(3.3);
1610 #endif
1611 
1612 /* --- Character Map Codecs -----------------------------------------------
1613 
1614    This codec uses mappings to encode and decode characters.
1615 
1616    Decoding mappings must map byte ordinals (integers in the range from 0 to
1617    255) to Unicode strings, integers (which are then interpreted as Unicode
1618    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
1619    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
1620    mapping" and cause an error.
1621 
1622    Encoding mappings must map Unicode ordinal integers to bytes objects,
1623    integers in the range from 0 to 255 or None.  Unmapped character
1624    ordinals (ones which cause a LookupError) as well as mapped to
1625    None are treated as "undefined mapping" and cause an error.
1626 
1627 */
1628 
1629 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1630     const char *string,         /* Encoded string */
1631     Py_ssize_t length,          /* size of string */
1632     PyObject *mapping,          /* decoding mapping */
1633     const char *errors          /* error handling */
1634     );
1635 
1636 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1637     PyObject *unicode,          /* Unicode object */
1638     PyObject *mapping           /* encoding mapping */
1639     );
1640 
1641 #ifndef Py_LIMITED_API
1642 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1643     const Py_UNICODE *data,     /* Unicode char buffer */
1644     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1645     PyObject *mapping,          /* encoding mapping */
1646     const char *errors          /* error handling */
1647     ) Py_DEPRECATED(3.3);
1648 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
1649     PyObject *unicode,          /* Unicode object */
1650     PyObject *mapping,          /* encoding mapping */
1651     const char *errors          /* error handling */
1652     );
1653 #endif
1654 
1655 /* Translate a Py_UNICODE buffer of the given length by applying a
1656    character mapping table to it and return the resulting Unicode
1657    object.
1658 
1659    The mapping table must map Unicode ordinal integers to Unicode strings,
1660    Unicode ordinal integers or None (causing deletion of the character).
1661 
1662    Mapping tables may be dictionaries or sequences. Unmapped character
1663    ordinals (ones which cause a LookupError) are left untouched and
1664    are copied as-is.
1665 
1666 */
1667 
1668 #ifndef Py_LIMITED_API
1669 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1670     const Py_UNICODE *data,     /* Unicode char buffer */
1671     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1672     PyObject *table,            /* Translate table */
1673     const char *errors          /* error handling */
1674     ) Py_DEPRECATED(3.3);
1675 #endif
1676 
1677 #ifdef MS_WINDOWS
1678 
1679 /* --- MBCS codecs for Windows -------------------------------------------- */
1680 
1681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1682     const char *string,         /* MBCS encoded string */
1683     Py_ssize_t length,          /* size of string */
1684     const char *errors          /* error handling */
1685     );
1686 
1687 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1688     const char *string,         /* MBCS encoded string */
1689     Py_ssize_t length,          /* size of string */
1690     const char *errors,         /* error handling */
1691     Py_ssize_t *consumed        /* bytes consumed */
1692     );
1693 
1694 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1695 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
1696     int code_page,              /* code page number */
1697     const char *string,         /* encoded string */
1698     Py_ssize_t length,          /* size of string */
1699     const char *errors,         /* error handling */
1700     Py_ssize_t *consumed        /* bytes consumed */
1701     );
1702 #endif
1703 
1704 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1705     PyObject *unicode           /* Unicode object */
1706     );
1707 
1708 #ifndef Py_LIMITED_API
1709 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1710     const Py_UNICODE *data,     /* Unicode char buffer */
1711     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1712     const char *errors          /* error handling */
1713     ) Py_DEPRECATED(3.3);
1714 #endif
1715 
1716 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1717 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
1718     int code_page,              /* code page number */
1719     PyObject *unicode,          /* Unicode object */
1720     const char *errors          /* error handling */
1721     );
1722 #endif
1723 
1724 #endif /* MS_WINDOWS */
1725 
1726 #ifndef Py_LIMITED_API
1727 /* --- Decimal Encoder ---------------------------------------------------- */
1728 
1729 /* Takes a Unicode string holding a decimal value and writes it into
1730    an output buffer using standard ASCII digit codes.
1731 
1732    The output buffer has to provide at least length+1 bytes of storage
1733    area. The output string is 0-terminated.
1734 
1735    The encoder converts whitespace to ' ', decimal characters to their
1736    corresponding ASCII digit and all other Latin-1 characters except
1737    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1738    are treated as errors. This includes embedded NULL bytes.
1739 
1740    Error handling is defined by the errors argument:
1741 
1742       NULL or "strict": raise a ValueError
1743       "ignore": ignore the wrong characters (these are not copied to the
1744                 output buffer)
1745       "replace": replaces illegal characters with '?'
1746 
1747    Returns 0 on success, -1 on failure.
1748 
1749 */
1750 
1751 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1752     Py_UNICODE *s,              /* Unicode buffer */
1753     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1754     char *output,               /* Output buffer; must have size >= length */
1755     const char *errors          /* error handling */
1756     ) /* Py_DEPRECATED(3.3) */;
1757 
1758 /* Transforms code points that have decimal digit property to the
1759    corresponding ASCII digit code points.
1760 
1761    Returns a new Unicode string on success, NULL on failure.
1762 */
1763 
1764 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1765     Py_UNICODE *s,              /* Unicode buffer */
1766     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
1767     ) /* Py_DEPRECATED(3.3) */;
1768 
1769 /* Coverts a Unicode object holding a decimal value to an ASCII string
1770    for using in int, float and complex parsers.
1771    Transforms code points that have decimal digit property to the
1772    corresponding ASCII digit code points.  Transforms spaces to ASCII.
1773    Transforms code points starting from the first non-ASCII code point that
1774    is neither a decimal digit nor a space to the end into '?'. */
1775 
1776 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1777     PyObject *unicode           /* Unicode object */
1778     );
1779 #endif
1780 
1781 /* --- Locale encoding --------------------------------------------------- */
1782 
1783 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1784 /* Decode a string from the current locale encoding. The decoder is strict if
1785    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
1786    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
1787    be decoded as a surrogate character and *surrogateescape* is not equal to
1788    zero, the byte sequence is escaped using the 'surrogateescape' error handler
1789    instead of being decoded. *str* must end with a null character but cannot
1790    contain embedded null characters. */
1791 
1792 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
1793     const char *str,
1794     Py_ssize_t len,
1795     const char *errors);
1796 
1797 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
1798    length using strlen(). */
1799 
1800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
1801     const char *str,
1802     const char *errors);
1803 
1804 /* Encode a Unicode object to the current locale encoding. The encoder is
1805    strict is *surrogateescape* is equal to zero, otherwise the
1806    "surrogateescape" error handler is used. Return a bytes object. The string
1807    cannot contain embedded null characters. */
1808 
1809 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
1810     PyObject *unicode,
1811     const char *errors
1812     );
1813 #endif
1814 
1815 /* --- File system encoding ---------------------------------------------- */
1816 
1817 /* ParseTuple converter: encode str objects to bytes using
1818    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
1819 
1820 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1821 
1822 /* ParseTuple converter: decode bytes objects to unicode using
1823    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1824 
1825 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1826 
1827 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1828    and the "surrogateescape" error handler.
1829 
1830    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1831    encoding.
1832 
1833    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1834 */
1835 
1836 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1837     const char *s               /* encoded string */
1838     );
1839 
1840 /* Decode a string using Py_FileSystemDefaultEncoding
1841    and the "surrogateescape" error handler.
1842 
1843    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1844    encoding.
1845 */
1846 
1847 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1848     const char *s,               /* encoded string */
1849     Py_ssize_t size              /* size */
1850     );
1851 
1852 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1853    "surrogateescape" error handler, and return bytes.
1854 
1855    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1856    encoding.
1857 */
1858 
1859 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1860     PyObject *unicode
1861     );
1862 
1863 /* --- Methods & Slots ----------------------------------------------------
1864 
1865    These are capable of handling Unicode objects and strings on input
1866    (we refer to them as strings in the descriptions) and return
1867    Unicode objects or integers as appropriate. */
1868 
1869 /* Concat two strings giving a new Unicode string. */
1870 
1871 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1872     PyObject *left,             /* Left string */
1873     PyObject *right             /* Right string */
1874     );
1875 
1876 /* Concat two strings and put the result in *pleft
1877    (sets *pleft to NULL on error) */
1878 
1879 PyAPI_FUNC(void) PyUnicode_Append(
1880     PyObject **pleft,           /* Pointer to left string */
1881     PyObject *right             /* Right string */
1882     );
1883 
1884 /* Concat two strings, put the result in *pleft and drop the right object
1885    (sets *pleft to NULL on error) */
1886 
1887 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1888     PyObject **pleft,           /* Pointer to left string */
1889     PyObject *right             /* Right string */
1890     );
1891 
1892 /* Split a string giving a list of Unicode strings.
1893 
1894    If sep is NULL, splitting will be done at all whitespace
1895    substrings. Otherwise, splits occur at the given separator.
1896 
1897    At most maxsplit splits will be done. If negative, no limit is set.
1898 
1899    Separators are not included in the resulting list.
1900 
1901 */
1902 
1903 PyAPI_FUNC(PyObject*) PyUnicode_Split(
1904     PyObject *s,                /* String to split */
1905     PyObject *sep,              /* String separator */
1906     Py_ssize_t maxsplit         /* Maxsplit count */
1907     );
1908 
1909 /* Dito, but split at line breaks.
1910 
1911    CRLF is considered to be one line break. Line breaks are not
1912    included in the resulting list. */
1913 
1914 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1915     PyObject *s,                /* String to split */
1916     int keepends                /* If true, line end markers are included */
1917     );
1918 
1919 /* Partition a string using a given separator. */
1920 
1921 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1922     PyObject *s,                /* String to partition */
1923     PyObject *sep               /* String separator */
1924     );
1925 
1926 /* Partition a string using a given separator, searching from the end of the
1927    string. */
1928 
1929 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1930     PyObject *s,                /* String to partition */
1931     PyObject *sep               /* String separator */
1932     );
1933 
1934 /* Split a string giving a list of Unicode strings.
1935 
1936    If sep is NULL, splitting will be done at all whitespace
1937    substrings. Otherwise, splits occur at the given separator.
1938 
1939    At most maxsplit splits will be done. But unlike PyUnicode_Split
1940    PyUnicode_RSplit splits from the end of the string. If negative,
1941    no limit is set.
1942 
1943    Separators are not included in the resulting list.
1944 
1945 */
1946 
1947 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1948     PyObject *s,                /* String to split */
1949     PyObject *sep,              /* String separator */
1950     Py_ssize_t maxsplit         /* Maxsplit count */
1951     );
1952 
1953 /* Translate a string by applying a character mapping table to it and
1954    return the resulting Unicode object.
1955 
1956    The mapping table must map Unicode ordinal integers to Unicode strings,
1957    Unicode ordinal integers or None (causing deletion of the character).
1958 
1959    Mapping tables may be dictionaries or sequences. Unmapped character
1960    ordinals (ones which cause a LookupError) are left untouched and
1961    are copied as-is.
1962 
1963 */
1964 
1965 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1966     PyObject *str,              /* String */
1967     PyObject *table,            /* Translate table */
1968     const char *errors          /* error handling */
1969     );
1970 
1971 /* Join a sequence of strings using the given separator and return
1972    the resulting Unicode string. */
1973 
1974 PyAPI_FUNC(PyObject*) PyUnicode_Join(
1975     PyObject *separator,        /* Separator string */
1976     PyObject *seq               /* Sequence object */
1977     );
1978 
1979 #ifndef Py_LIMITED_API
1980 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1981     PyObject *separator,
1982     PyObject *const *items,
1983     Py_ssize_t seqlen
1984     );
1985 #endif /* Py_LIMITED_API */
1986 
1987 /* Return 1 if substr matches str[start:end] at the given tail end, 0
1988    otherwise. */
1989 
1990 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1991     PyObject *str,              /* String */
1992     PyObject *substr,           /* Prefix or Suffix string */
1993     Py_ssize_t start,           /* Start index */
1994     Py_ssize_t end,             /* Stop index */
1995     int direction               /* Tail end: -1 prefix, +1 suffix */
1996     );
1997 
1998 /* Return the first position of substr in str[start:end] using the
1999    given search direction or -1 if not found. -2 is returned in case
2000    an error occurred and an exception is set. */
2001 
2002 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
2003     PyObject *str,              /* String */
2004     PyObject *substr,           /* Substring to find */
2005     Py_ssize_t start,           /* Start index */
2006     Py_ssize_t end,             /* Stop index */
2007     int direction               /* Find direction: +1 forward, -1 backward */
2008     );
2009 
2010 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
2011 /* Like PyUnicode_Find, but search for single character only. */
2012 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
2013     PyObject *str,
2014     Py_UCS4 ch,
2015     Py_ssize_t start,
2016     Py_ssize_t end,
2017     int direction
2018     );
2019 #endif
2020 
2021 /* Count the number of occurrences of substr in str[start:end]. */
2022 
2023 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
2024     PyObject *str,              /* String */
2025     PyObject *substr,           /* Substring to count */
2026     Py_ssize_t start,           /* Start index */
2027     Py_ssize_t end              /* Stop index */
2028     );
2029 
2030 /* Replace at most maxcount occurrences of substr in str with replstr
2031    and return the resulting Unicode object. */
2032 
2033 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
2034     PyObject *str,              /* String */
2035     PyObject *substr,           /* Substring to find */
2036     PyObject *replstr,          /* Substring to replace */
2037     Py_ssize_t maxcount         /* Max. number of replacements to apply;
2038                                    -1 = all */
2039     );
2040 
2041 /* Compare two strings and return -1, 0, 1 for less than, equal,
2042    greater than resp.
2043    Raise an exception and return -1 on error. */
2044 
2045 PyAPI_FUNC(int) PyUnicode_Compare(
2046     PyObject *left,             /* Left string */
2047     PyObject *right             /* Right string */
2048     );
2049 
2050 #ifndef Py_LIMITED_API
2051 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
2052    0 otherwise.  The right argument must be ASCII identifier.
2053    Any error occurs inside will be cleared before return. */
2054 
2055 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
2056     PyObject *left,             /* Left string */
2057     _Py_Identifier *right       /* Right identifier */
2058     );
2059 #endif
2060 
2061 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
2062    equal, and greater than, respectively.  It is best to pass only
2063    ASCII-encoded strings, but the function interprets the input string as
2064    ISO-8859-1 if it contains non-ASCII characters.
2065    This function does not raise exceptions. */
2066 
2067 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
2068     PyObject *left,
2069     const char *right           /* ASCII-encoded string */
2070     );
2071 
2072 #ifndef Py_LIMITED_API
2073 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
2074    0 otherwise.  The right argument must be ASCII-encoded string.
2075    Any error occurs inside will be cleared before return. */
2076 
2077 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
2078     PyObject *left,
2079     const char *right           /* ASCII-encoded string */
2080     );
2081 #endif
2082 
2083 /* Rich compare two strings and return one of the following:
2084 
2085    - NULL in case an exception was raised
2086    - Py_True or Py_False for successful comparisons
2087    - Py_NotImplemented in case the type combination is unknown
2088 
2089    Possible values for op:
2090 
2091      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
2092 
2093 */
2094 
2095 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
2096     PyObject *left,             /* Left string */
2097     PyObject *right,            /* Right string */
2098     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
2099     );
2100 
2101 /* Apply an argument tuple or dictionary to a format string and return
2102    the resulting Unicode string. */
2103 
2104 PyAPI_FUNC(PyObject *) PyUnicode_Format(
2105     PyObject *format,           /* Format string */
2106     PyObject *args              /* Argument tuple or dictionary */
2107     );
2108 
2109 /* Checks whether element is contained in container and return 1/0
2110    accordingly.
2111 
2112    element has to coerce to a one element Unicode string. -1 is
2113    returned in case of an error. */
2114 
2115 PyAPI_FUNC(int) PyUnicode_Contains(
2116     PyObject *container,        /* Container string */
2117     PyObject *element           /* Element string */
2118     );
2119 
2120 /* Checks whether argument is a valid identifier. */
2121 
2122 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
2123 
2124 #ifndef Py_LIMITED_API
2125 /* Externally visible for str.strip(unicode) */
2126 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
2127     PyObject *self,
2128     int striptype,
2129     PyObject *sepobj
2130     );
2131 #endif
2132 
2133 /* Using explicit passed-in values, insert the thousands grouping
2134    into the string pointed to by buffer.  For the argument descriptions,
2135    see Objects/stringlib/localeutil.h */
2136 #ifndef Py_LIMITED_API
2137 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2138     _PyUnicodeWriter *writer,
2139     Py_ssize_t n_buffer,
2140     PyObject *digits,
2141     Py_ssize_t d_pos,
2142     Py_ssize_t n_digits,
2143     Py_ssize_t min_width,
2144     const char *grouping,
2145     PyObject *thousands_sep,
2146     Py_UCS4 *maxchar);
2147 #endif
2148 /* === Characters Type APIs =============================================== */
2149 
2150 /* Helper array used by Py_UNICODE_ISSPACE(). */
2151 
2152 #ifndef Py_LIMITED_API
2153 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
2154 
2155 /* These should not be used directly. Use the Py_UNICODE_IS* and
2156    Py_UNICODE_TO* macros instead.
2157 
2158    These APIs are implemented in Objects/unicodectype.c.
2159 
2160 */
2161 
2162 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
2163     Py_UCS4 ch       /* Unicode character */
2164     );
2165 
2166 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
2167     Py_UCS4 ch       /* Unicode character */
2168     );
2169 
2170 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
2171     Py_UCS4 ch       /* Unicode character */
2172     );
2173 
2174 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
2175     Py_UCS4 ch       /* Unicode character */
2176     );
2177 
2178 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
2179     Py_UCS4 ch       /* Unicode character */
2180     );
2181 
2182 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
2183     const Py_UCS4 ch         /* Unicode character */
2184     );
2185 
2186 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
2187     const Py_UCS4 ch         /* Unicode character */
2188     );
2189 
2190 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
2191     Py_UCS4 ch       /* Unicode character */
2192     ) /* Py_DEPRECATED(3.3) */;
2193 
2194 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
2195     Py_UCS4 ch       /* Unicode character */
2196     ) /* Py_DEPRECATED(3.3) */;
2197 
2198 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
2199     Py_UCS4 ch       /* Unicode character */
2200     ) Py_DEPRECATED(3.3);
2201 
2202 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
2203     Py_UCS4 ch,       /* Unicode character */
2204     Py_UCS4 *res
2205     );
2206 
2207 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
2208     Py_UCS4 ch,       /* Unicode character */
2209     Py_UCS4 *res
2210     );
2211 
2212 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
2213     Py_UCS4 ch,       /* Unicode character */
2214     Py_UCS4 *res
2215     );
2216 
2217 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
2218     Py_UCS4 ch,       /* Unicode character */
2219     Py_UCS4 *res
2220     );
2221 
2222 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
2223     Py_UCS4 ch         /* Unicode character */
2224     );
2225 
2226 PyAPI_FUNC(int) _PyUnicode_IsCased(
2227     Py_UCS4 ch         /* Unicode character */
2228     );
2229 
2230 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
2231     Py_UCS4 ch       /* Unicode character */
2232     );
2233 
2234 PyAPI_FUNC(int) _PyUnicode_ToDigit(
2235     Py_UCS4 ch       /* Unicode character */
2236     );
2237 
2238 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
2239     Py_UCS4 ch       /* Unicode character */
2240     );
2241 
2242 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
2243     Py_UCS4 ch       /* Unicode character */
2244     );
2245 
2246 PyAPI_FUNC(int) _PyUnicode_IsDigit(
2247     Py_UCS4 ch       /* Unicode character */
2248     );
2249 
2250 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
2251     Py_UCS4 ch       /* Unicode character */
2252     );
2253 
2254 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
2255     Py_UCS4 ch       /* Unicode character */
2256     );
2257 
2258 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
2259     Py_UCS4 ch       /* Unicode character */
2260     );
2261 
2262 PyAPI_FUNC(size_t) Py_UNICODE_strlen(
2263     const Py_UNICODE *u
2264     ) Py_DEPRECATED(3.3);
2265 
2266 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
2267     Py_UNICODE *s1,
2268     const Py_UNICODE *s2) Py_DEPRECATED(3.3);
2269 
2270 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
2271     Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3);
2272 
2273 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
2274     Py_UNICODE *s1,
2275     const Py_UNICODE *s2,
2276     size_t n) Py_DEPRECATED(3.3);
2277 
2278 PyAPI_FUNC(int) Py_UNICODE_strcmp(
2279     const Py_UNICODE *s1,
2280     const Py_UNICODE *s2
2281     ) Py_DEPRECATED(3.3);
2282 
2283 PyAPI_FUNC(int) Py_UNICODE_strncmp(
2284     const Py_UNICODE *s1,
2285     const Py_UNICODE *s2,
2286     size_t n
2287     ) Py_DEPRECATED(3.3);
2288 
2289 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
2290     const Py_UNICODE *s,
2291     Py_UNICODE c
2292     ) Py_DEPRECATED(3.3);
2293 
2294 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
2295     const Py_UNICODE *s,
2296     Py_UNICODE c
2297     ) Py_DEPRECATED(3.3);
2298 
2299 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
2300 
2301 /* Create a copy of a unicode string ending with a nul character. Return NULL
2302    and raise a MemoryError exception on memory allocation failure, otherwise
2303    return a new allocated buffer (use PyMem_Free() to free the buffer). */
2304 
2305 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
2306     PyObject *unicode
2307     ) Py_DEPRECATED(3.3);
2308 #endif /* Py_LIMITED_API */
2309 
2310 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
2311 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
2312     PyObject *op,
2313     int check_content);
2314 #elif !defined(NDEBUG)
2315 /* For asserts that call _PyUnicode_CheckConsistency(), which would
2316  * otherwise be a problem when building with asserts but without Py_DEBUG. */
2317 #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op)
2318 #endif
2319 
2320 #ifndef Py_LIMITED_API
2321 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
2322 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
2323 /* Clear all static strings. */
2324 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
2325 
2326 /* Fast equality check when the inputs are known to be exact unicode types
2327    and where the hash values are equal (i.e. a very probable match) */
2328 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
2329 #endif /* !Py_LIMITED_API */
2330 
2331 #ifdef __cplusplus
2332 }
2333 #endif
2334 #endif /* !Py_UNICODEOBJECT_H */
2335