• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
2 #  error "this header file must not be included directly"
3 #endif
4 
5 /* Py_UNICODE was the native Unicode storage format (code unit) used by
6    Python and represents a single Unicode element in the Unicode type.
7    With PEP 393, Py_UNICODE is deprecated and replaced with a
8    typedef to wchar_t. */
9 #define PY_UNICODE_TYPE wchar_t
10 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
11 
12 /* --- Internal Unicode Operations ---------------------------------------- */
13 
14 #ifndef USE_UNICODE_WCHAR_CACHE
15 #  define USE_UNICODE_WCHAR_CACHE 1
16 #endif /* USE_UNICODE_WCHAR_CACHE */
17 
18 /* Since splitting on whitespace is an important use case, and
19    whitespace in most situations is solely ASCII whitespace, we
20    optimize for the common case by using a quick look-up table
21    _Py_ascii_whitespace (see below) with an inlined check.
22 
23  */
24 #define Py_UNICODE_ISSPACE(ch) \
25     ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26 
27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31 
32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35 
36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40 
41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44 
45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46 
47 #define Py_UNICODE_ISALNUM(ch) \
48        (Py_UNICODE_ISALPHA(ch) || \
49     Py_UNICODE_ISDECIMAL(ch) || \
50     Py_UNICODE_ISDIGIT(ch) || \
51     Py_UNICODE_ISNUMERIC(ch))
52 
53 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_COPY(Py_UNICODE * target,const Py_UNICODE * source,Py_ssize_t length)54 Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
55     memcpy(target, source, (size_t)(length) * sizeof(Py_UNICODE));
56 }
57 
58 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_FILL(Py_UNICODE * target,Py_UNICODE value,Py_ssize_t length)59 Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60     Py_ssize_t i;
61     for (i = 0; i < length; i++) {
62         target[i] = value;
63     }
64 }
65 
66 /* macros to work with surrogates */
67 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
68 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
69 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
70 /* Join two surrogate characters and return a single Py_UCS4 value. */
71 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
72     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
73       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
74 /* high surrogate = top 10 bits added to D800 */
75 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
76 /* low surrogate = bottom 10 bits added to DC00 */
77 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
78 
79 /* --- Unicode Type ------------------------------------------------------- */
80 
81 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
82    structure. state.ascii and state.compact are set, and the data
83    immediately follow the structure. utf8_length and wstr_length can be found
84    in the length field; the utf8 pointer is equal to the data pointer. */
85 typedef struct {
86     /* There are 4 forms of Unicode strings:
87 
88        - compact ascii:
89 
90          * structure = PyASCIIObject
91          * test: PyUnicode_IS_COMPACT_ASCII(op)
92          * kind = PyUnicode_1BYTE_KIND
93          * compact = 1
94          * ascii = 1
95          * ready = 1
96          * (length is the length of the utf8 and wstr strings)
97          * (data starts just after the structure)
98          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
99 
100        - compact:
101 
102          * structure = PyCompactUnicodeObject
103          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
104          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
105            PyUnicode_4BYTE_KIND
106          * compact = 1
107          * ready = 1
108          * ascii = 0
109          * utf8 is not shared with data
110          * utf8_length = 0 if utf8 is NULL
111          * wstr is shared with data and wstr_length=length
112            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
113            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
114          * wstr_length = 0 if wstr is NULL
115          * (data starts just after the structure)
116 
117        - legacy string, not ready:
118 
119          * structure = PyUnicodeObject
120          * test: kind == PyUnicode_WCHAR_KIND
121          * length = 0 (use wstr_length)
122          * hash = -1
123          * kind = PyUnicode_WCHAR_KIND
124          * compact = 0
125          * ascii = 0
126          * ready = 0
127          * interned = SSTATE_NOT_INTERNED
128          * wstr is not NULL
129          * data.any is NULL
130          * utf8 is NULL
131          * utf8_length = 0
132 
133        - legacy string, ready:
134 
135          * structure = PyUnicodeObject structure
136          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
137          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
138            PyUnicode_4BYTE_KIND
139          * compact = 0
140          * ready = 1
141          * data.any is not NULL
142          * utf8 is shared and utf8_length = length with data.any if ascii = 1
143          * utf8_length = 0 if utf8 is NULL
144          * wstr is shared with data.any and wstr_length = length
145            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
146            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
147          * wstr_length = 0 if wstr is NULL
148 
149        Compact strings use only one memory block (structure + characters),
150        whereas legacy strings use one block for the structure and one block
151        for characters.
152 
153        Legacy strings are created by PyUnicode_FromUnicode() and
154        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
155        when PyUnicode_READY() is called.
156 
157        See also _PyUnicode_CheckConsistency().
158     */
159     PyObject_HEAD
160     Py_ssize_t length;          /* Number of code points in the string */
161     Py_hash_t hash;             /* Hash value; -1 if not set */
162     struct {
163         /*
164            SSTATE_NOT_INTERNED (0)
165            SSTATE_INTERNED_MORTAL (1)
166            SSTATE_INTERNED_IMMORTAL (2)
167 
168            If interned != SSTATE_NOT_INTERNED, the two references from the
169            dictionary to this object are *not* counted in ob_refcnt.
170          */
171         unsigned int interned:2;
172         /* Character size:
173 
174            - PyUnicode_WCHAR_KIND (0):
175 
176              * character type = wchar_t (16 or 32 bits, depending on the
177                platform)
178 
179            - PyUnicode_1BYTE_KIND (1):
180 
181              * character type = Py_UCS1 (8 bits, unsigned)
182              * all characters are in the range U+0000-U+00FF (latin1)
183              * if ascii is set, all characters are in the range U+0000-U+007F
184                (ASCII), otherwise at least one character is in the range
185                U+0080-U+00FF
186 
187            - PyUnicode_2BYTE_KIND (2):
188 
189              * character type = Py_UCS2 (16 bits, unsigned)
190              * all characters are in the range U+0000-U+FFFF (BMP)
191              * at least one character is in the range U+0100-U+FFFF
192 
193            - PyUnicode_4BYTE_KIND (4):
194 
195              * character type = Py_UCS4 (32 bits, unsigned)
196              * all characters are in the range U+0000-U+10FFFF
197              * at least one character is in the range U+10000-U+10FFFF
198          */
199         unsigned int kind:3;
200         /* Compact is with respect to the allocation scheme. Compact unicode
201            objects only require one memory block while non-compact objects use
202            one block for the PyUnicodeObject struct and another for its data
203            buffer. */
204         unsigned int compact:1;
205         /* The string only contains characters in the range U+0000-U+007F (ASCII)
206            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
207            set, use the PyASCIIObject structure. */
208         unsigned int ascii:1;
209         /* The ready flag indicates whether the object layout is initialized
210            completely. This means that this is either a compact object, or
211            the data pointer is filled out. The bit is redundant, and helps
212            to minimize the test in PyUnicode_IS_READY(). */
213         unsigned int ready:1;
214         /* Padding to ensure that PyUnicode_DATA() is always aligned to
215            4 bytes (see issue #19537 on m68k). */
216         unsigned int :24;
217     } state;
218     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
219 } PyASCIIObject;
220 
221 /* Non-ASCII strings allocated through PyUnicode_New use the
222    PyCompactUnicodeObject structure. state.compact is set, and the data
223    immediately follow the structure. */
224 typedef struct {
225     PyASCIIObject _base;
226     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
227                                  * terminating \0. */
228     char *utf8;                 /* UTF-8 representation (null-terminated) */
229     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
230                                  * surrogates count as two code points. */
231 } PyCompactUnicodeObject;
232 
233 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
234    PyUnicodeObject structure. The actual string data is initially in the wstr
235    block, and copied into the data block using _PyUnicode_Ready. */
236 typedef struct {
237     PyCompactUnicodeObject _base;
238     union {
239         void *any;
240         Py_UCS1 *latin1;
241         Py_UCS2 *ucs2;
242         Py_UCS4 *ucs4;
243     } data;                     /* Canonical, smallest-form Unicode buffer */
244 } PyUnicodeObject;
245 
246 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247     PyObject *op,
248     int check_content);
249 
250 /* Fast access macros */
251 
252 /* Returns the deprecated Py_UNICODE representation's size in code units
253    (this includes surrogate pairs as 2 units).
254    If the Py_UNICODE representation is not available, it will be computed
255    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
256 
257 /* Py_DEPRECATED(3.3) */
258 #define PyUnicode_GET_SIZE(op)                       \
259     (assert(PyUnicode_Check(op)),                    \
260      (((PyASCIIObject *)(op))->wstr) ?               \
261       PyUnicode_WSTR_LENGTH(op) :                    \
262       ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
263        assert(((PyASCIIObject *)(op))->wstr),        \
264        PyUnicode_WSTR_LENGTH(op)))
265 
266 /* Py_DEPRECATED(3.3) */
267 #define PyUnicode_GET_DATA_SIZE(op) \
268     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
269 
270 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
271    representation on demand.  Using this macro is very inefficient now,
272    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
273    use PyUnicode_WRITE() and PyUnicode_READ(). */
274 
275 /* Py_DEPRECATED(3.3) */
276 #define PyUnicode_AS_UNICODE(op) \
277     (assert(PyUnicode_Check(op)), \
278      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
279       PyUnicode_AsUnicode(_PyObject_CAST(op)))
280 
281 /* Py_DEPRECATED(3.3) */
282 #define PyUnicode_AS_DATA(op) \
283     ((const char *)(PyUnicode_AS_UNICODE(op)))
284 
285 
286 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
287 
288 /* Values for PyASCIIObject.state: */
289 
290 /* Interning state. */
291 #define SSTATE_NOT_INTERNED 0
292 #define SSTATE_INTERNED_MORTAL 1
293 #define SSTATE_INTERNED_IMMORTAL 2
294 
295 /* Return true if the string contains only ASCII characters, or 0 if not. The
296    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
297    ready. */
298 #define PyUnicode_IS_ASCII(op)                   \
299     (assert(PyUnicode_Check(op)),                \
300      assert(PyUnicode_IS_READY(op)),             \
301      ((PyASCIIObject*)op)->state.ascii)
302 
303 /* Return true if the string is compact or 0 if not.
304    No type checks or Ready calls are performed. */
305 #define PyUnicode_IS_COMPACT(op) \
306     (((PyASCIIObject*)(op))->state.compact)
307 
308 /* Return true if the string is a compact ASCII string (use PyASCIIObject
309    structure), or 0 if not.  No type checks or Ready calls are performed. */
310 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
311     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
312 
313 enum PyUnicode_Kind {
314 /* String contains only wstr byte characters.  This is only possible
315    when the string was created with a legacy API and _PyUnicode_Ready()
316    has not been called yet.  */
317     PyUnicode_WCHAR_KIND = 0,
318 /* Return values of the PyUnicode_KIND() macro: */
319     PyUnicode_1BYTE_KIND = 1,
320     PyUnicode_2BYTE_KIND = 2,
321     PyUnicode_4BYTE_KIND = 4
322 };
323 
324 /* Return pointers to the canonical representation cast to unsigned char,
325    Py_UCS2, or Py_UCS4 for direct character access.
326    No checks are performed, use PyUnicode_KIND() before to ensure
327    these will work correctly. */
328 
329 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
330 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
331 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
332 
333 /* Return one of the PyUnicode_*_KIND values defined above. */
334 #define PyUnicode_KIND(op) \
335     (assert(PyUnicode_Check(op)), \
336      assert(PyUnicode_IS_READY(op)),            \
337      ((PyASCIIObject *)(op))->state.kind)
338 
339 /* Return a void pointer to the raw unicode buffer. */
340 #define _PyUnicode_COMPACT_DATA(op)                     \
341     (PyUnicode_IS_ASCII(op) ?                   \
342      ((void*)((PyASCIIObject*)(op) + 1)) :              \
343      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
344 
345 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
346     (assert(((PyUnicodeObject*)(op))->data.any),        \
347      ((((PyUnicodeObject *)(op))->data.any)))
348 
349 #define PyUnicode_DATA(op) \
350     (assert(PyUnicode_Check(op)), \
351      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
352      _PyUnicode_NONCOMPACT_DATA(op))
353 
354 /* In the access macros below, "kind" may be evaluated more than once.
355    All other macro parameters are evaluated exactly once, so it is safe
356    to put side effects into them (such as increasing the index). */
357 
358 /* Write into the canonical representation, this macro does not do any sanity
359    checks and is intended for usage in loops.  The caller should cache the
360    kind and data pointers obtained from other macro calls.
361    index is the index in the string (starts at 0) and value is the new
362    code point value which should be written to that location. */
363 #define PyUnicode_WRITE(kind, data, index, value) \
364     do { \
365         switch ((kind)) { \
366         case PyUnicode_1BYTE_KIND: { \
367             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
368             break; \
369         } \
370         case PyUnicode_2BYTE_KIND: { \
371             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
372             break; \
373         } \
374         default: { \
375             assert((kind) == PyUnicode_4BYTE_KIND); \
376             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
377         } \
378         } \
379     } while (0)
380 
381 /* Read a code point from the string's canonical representation.  No checks
382    or ready calls are performed. */
383 #define PyUnicode_READ(kind, data, index) \
384     ((Py_UCS4) \
385     ((kind) == PyUnicode_1BYTE_KIND ? \
386         ((const Py_UCS1 *)(data))[(index)] : \
387         ((kind) == PyUnicode_2BYTE_KIND ? \
388             ((const Py_UCS2 *)(data))[(index)] : \
389             ((const Py_UCS4 *)(data))[(index)] \
390         ) \
391     ))
392 
393 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
394    calls PyUnicode_KIND() and might call it twice.  For single reads, use
395    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
396    cache kind and use PyUnicode_READ instead. */
397 #define PyUnicode_READ_CHAR(unicode, index) \
398     (assert(PyUnicode_Check(unicode)),          \
399      assert(PyUnicode_IS_READY(unicode)),       \
400      (Py_UCS4)                                  \
401         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
402             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
403             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
404                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
405                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
406             ) \
407         ))
408 
409 /* Returns the length of the unicode string. The caller has to make sure that
410    the string has it's canonical representation set before calling
411    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
412 #define PyUnicode_GET_LENGTH(op)                \
413     (assert(PyUnicode_Check(op)),               \
414      assert(PyUnicode_IS_READY(op)),            \
415      ((PyASCIIObject *)(op))->length)
416 
417 
418 /* Fast check to determine whether an object is ready. Equivalent to
419    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any */
420 
421 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
422 
423 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
424    case.  If the canonical representation is not yet set, it will still call
425    _PyUnicode_Ready().
426    Returns 0 on success and -1 on errors. */
427 #define PyUnicode_READY(op)                        \
428     (assert(PyUnicode_Check(op)),                       \
429      (PyUnicode_IS_READY(op) ?                          \
430       0 : _PyUnicode_Ready(_PyObject_CAST(op))))
431 
432 /* Return a maximum character value which is suitable for creating another
433    string based on op.  This is always an approximation but more efficient
434    than iterating over the string. */
435 #define PyUnicode_MAX_CHAR_VALUE(op) \
436     (assert(PyUnicode_IS_READY(op)),                                    \
437      (PyUnicode_IS_ASCII(op) ?                                          \
438       (0x7f) :                                                          \
439       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
440        (0xffU) :                                                        \
441        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
442         (0xffffU) :                                                     \
443         (0x10ffffU)))))
444 
445 Py_DEPRECATED(3.3)
_PyUnicode_get_wstr_length(PyObject * op)446 static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447     return PyUnicode_IS_COMPACT_ASCII(op) ?
448             ((PyASCIIObject*)op)->length :
449             ((PyCompactUnicodeObject*)op)->wstr_length;
450 }
451 #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452 
453 /* === Public API ========================================================= */
454 
455 /* --- Plain Py_UNICODE --------------------------------------------------- */
456 
457 /* With PEP 393, this is the recommended way to allocate a new unicode object.
458    This function will allocate the object and its buffer in a single memory
459    block.  Objects created using this function are not resizable. */
460 PyAPI_FUNC(PyObject*) PyUnicode_New(
461     Py_ssize_t size,            /* Number of code points in the new string */
462     Py_UCS4 maxchar             /* maximum code point value in the string */
463     );
464 
465 /* Initializes the canonical string representation from the deprecated
466    wstr/Py_UNICODE representation. This function is used to convert Unicode
467    objects which were created using the old API to the new flexible format
468    introduced with PEP 393.
469 
470    Don't call this function directly, use the public PyUnicode_READY() macro
471    instead. */
472 PyAPI_FUNC(int) _PyUnicode_Ready(
473     PyObject *unicode           /* Unicode object */
474     );
475 
476 /* Get a copy of a Unicode string. */
477 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
478     PyObject *unicode
479     );
480 
481 /* Copy character from one unicode object into another, this function performs
482    character conversion when necessary and falls back to memcpy() if possible.
483 
484    Fail if to is too small (smaller than *how_many* or smaller than
485    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
486    kind(to), or if *to* has more than 1 reference.
487 
488    Return the number of written character, or return -1 and raise an exception
489    on error.
490 
491    Pseudo-code:
492 
493        how_many = min(how_many, len(from) - from_start)
494        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
495        return how_many
496 
497    Note: The function doesn't write a terminating null character.
498    */
499 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
500     PyObject *to,
501     Py_ssize_t to_start,
502     PyObject *from,
503     Py_ssize_t from_start,
504     Py_ssize_t how_many
505     );
506 
507 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
508    may crash if parameters are invalid (e.g. if the output string
509    is too short). */
510 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
511     PyObject *to,
512     Py_ssize_t to_start,
513     PyObject *from,
514     Py_ssize_t from_start,
515     Py_ssize_t how_many
516     );
517 
518 /* Fill a string with a character: write fill_char into
519    unicode[start:start+length].
520 
521    Fail if fill_char is bigger than the string maximum character, or if the
522    string has more than 1 reference.
523 
524    Return the number of written character, or return -1 and raise an exception
525    on error. */
526 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
527     PyObject *unicode,
528     Py_ssize_t start,
529     Py_ssize_t length,
530     Py_UCS4 fill_char
531     );
532 
533 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
534    if parameters are invalid (e.g. if length is longer than the string). */
535 PyAPI_FUNC(void) _PyUnicode_FastFill(
536     PyObject *unicode,
537     Py_ssize_t start,
538     Py_ssize_t length,
539     Py_UCS4 fill_char
540     );
541 
542 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
543    size.
544 
545    u may be NULL which causes the contents to be undefined. It is the
546    user's responsibility to fill in the needed data afterwards. Note
547    that modifying the Unicode object contents after construction is
548    only allowed if u was set to NULL.
549 
550    The buffer is copied into the new object. */
551 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
552     const Py_UNICODE *u,        /* Unicode buffer */
553     Py_ssize_t size             /* size of buffer */
554     );
555 
556 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
557    Scan the string to find the maximum character. */
558 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
559     int kind,
560     const void *buffer,
561     Py_ssize_t size);
562 
563 /* Create a new string from a buffer of ASCII characters.
564    WARNING: Don't check if the string contains any non-ASCII character. */
565 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
566     const char *buffer,
567     Py_ssize_t size);
568 
569 /* Compute the maximum character of the substring unicode[start:end].
570    Return 127 for an empty string. */
571 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572     PyObject *unicode,
573     Py_ssize_t start,
574     Py_ssize_t end);
575 
576 /* Return a read-only pointer to the Unicode object's internal
577    Py_UNICODE buffer.
578    If the wchar_t/Py_UNICODE representation is not yet available, this
579    function will calculate it. */
580 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
581     PyObject *unicode           /* Unicode object */
582     );
583 
584 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
585    contains null characters. */
586 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587     PyObject *unicode           /* Unicode object */
588     );
589 
590 /* Return a read-only pointer to the Unicode object's internal
591    Py_UNICODE buffer and save the length at size.
592    If the wchar_t/Py_UNICODE representation is not yet available, this
593    function will calculate it. */
594 
595 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
596     PyObject *unicode,          /* Unicode object */
597     Py_ssize_t *size            /* location where to save the length */
598     );
599 
600 
601 /* --- _PyUnicodeWriter API ----------------------------------------------- */
602 
603 typedef struct {
604     PyObject *buffer;
605     void *data;
606     enum PyUnicode_Kind kind;
607     Py_UCS4 maxchar;
608     Py_ssize_t size;
609     Py_ssize_t pos;
610 
611     /* minimum number of allocated characters (default: 0) */
612     Py_ssize_t min_length;
613 
614     /* minimum character (default: 127, ASCII) */
615     Py_UCS4 min_char;
616 
617     /* If non-zero, overallocate the buffer (default: 0). */
618     unsigned char overallocate;
619 
620     /* If readonly is 1, buffer is a shared string (cannot be modified)
621        and size is set to 0. */
622     unsigned char readonly;
623 } _PyUnicodeWriter ;
624 
625 /* Initialize a Unicode writer.
626  *
627  * By default, the minimum buffer size is 0 character and overallocation is
628  * disabled. Set min_length, min_char and overallocate attributes to control
629  * the allocation of the buffer. */
630 PyAPI_FUNC(void)
631 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
632 
633 /* Prepare the buffer to write 'length' characters
634    with the specified maximum character.
635 
636    Return 0 on success, raise an exception and return -1 on error. */
637 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
638     (((MAXCHAR) <= (WRITER)->maxchar                                  \
639       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
640      ? 0                                                              \
641      : (((LENGTH) == 0)                                               \
642         ? 0                                                           \
643         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
644 
645 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
646    instead. */
647 PyAPI_FUNC(int)
648 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
649                                  Py_ssize_t length, Py_UCS4 maxchar);
650 
651 /* Prepare the buffer to have at least the kind KIND.
652    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
653    support characters in range U+000-U+FFFF.
654 
655    Return 0 on success, raise an exception and return -1 on error. */
656 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
657     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
658      (KIND) <= (WRITER)->kind                                         \
659      ? 0                                                              \
660      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
661 
662 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
663    macro instead. */
664 PyAPI_FUNC(int)
665 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
666                                      enum PyUnicode_Kind kind);
667 
668 /* Append a Unicode character.
669    Return 0 on success, raise an exception and return -1 on error. */
670 PyAPI_FUNC(int)
671 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
672     Py_UCS4 ch
673     );
674 
675 /* Append a Unicode string.
676    Return 0 on success, raise an exception and return -1 on error. */
677 PyAPI_FUNC(int)
678 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
679     PyObject *str               /* Unicode string */
680     );
681 
682 /* Append a substring of a Unicode string.
683    Return 0 on success, raise an exception and return -1 on error. */
684 PyAPI_FUNC(int)
685 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
686     PyObject *str,              /* Unicode string */
687     Py_ssize_t start,
688     Py_ssize_t end
689     );
690 
691 /* Append an ASCII-encoded byte string.
692    Return 0 on success, raise an exception and return -1 on error. */
693 PyAPI_FUNC(int)
694 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
695     const char *str,           /* ASCII-encoded byte string */
696     Py_ssize_t len             /* number of bytes, or -1 if unknown */
697     );
698 
699 /* Append a latin1-encoded byte string.
700    Return 0 on success, raise an exception and return -1 on error. */
701 PyAPI_FUNC(int)
702 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
703     const char *str,           /* latin1-encoded byte string */
704     Py_ssize_t len             /* length in bytes */
705     );
706 
707 /* Get the value of the writer as a Unicode string. Clear the
708    buffer of the writer. Raise an exception and return NULL
709    on error. */
710 PyAPI_FUNC(PyObject *)
711 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
712 
713 /* Deallocate memory of a writer (clear its internal buffer). */
714 PyAPI_FUNC(void)
715 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
716 
717 
718 /* Format the object based on the format_spec, as defined in PEP 3101
719    (Advanced String Formatting). */
720 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
721     _PyUnicodeWriter *writer,
722     PyObject *obj,
723     PyObject *format_spec,
724     Py_ssize_t start,
725     Py_ssize_t end);
726 
727 /* --- Manage the default encoding ---------------------------------------- */
728 
729 /* Returns a pointer to the default encoding (UTF-8) of the
730    Unicode object unicode.
731 
732    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
733    in the unicodeobject.
734 
735    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
736    support the previous internal function with the same behaviour.
737 
738    Use of this API is DEPRECATED since no size information can be
739    extracted from the returned data.
740 */
741 
742 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
743 
744 #define _PyUnicode_AsString PyUnicode_AsUTF8
745 
746 /* --- Generic Codecs ----------------------------------------------------- */
747 
748 /* Encodes a Py_UNICODE buffer of the given size and returns a
749    Python string object. */
750 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
751     const Py_UNICODE *s,        /* Unicode char buffer */
752     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
753     const char *encoding,       /* encoding */
754     const char *errors          /* error handling */
755     );
756 
757 /* --- UTF-7 Codecs ------------------------------------------------------- */
758 
759 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
760     const Py_UNICODE *data,     /* Unicode char buffer */
761     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
762     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
763     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
764     const char *errors          /* error handling */
765     );
766 
767 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
768     PyObject *unicode,          /* Unicode object */
769     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
770     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
771     const char *errors          /* error handling */
772     );
773 
774 /* --- UTF-8 Codecs ------------------------------------------------------- */
775 
776 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
777     PyObject *unicode,
778     const char *errors);
779 
780 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
781     const Py_UNICODE *data,     /* Unicode char buffer */
782     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
783     const char *errors          /* error handling */
784     );
785 
786 /* --- UTF-32 Codecs ------------------------------------------------------ */
787 
788 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
789     const Py_UNICODE *data,     /* Unicode char buffer */
790     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
791     const char *errors,         /* error handling */
792     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
793     );
794 
795 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
796     PyObject *object,           /* Unicode object */
797     const char *errors,         /* error handling */
798     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
799     );
800 
801 /* --- UTF-16 Codecs ------------------------------------------------------ */
802 
803 /* Returns a Python string object holding the UTF-16 encoded value of
804    the Unicode data.
805 
806    If byteorder is not 0, output is written according to the following
807    byte order:
808 
809    byteorder == -1: little endian
810    byteorder == 0:  native byte order (writes a BOM mark)
811    byteorder == 1:  big endian
812 
813    If byteorder is 0, the output string will always start with the
814    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
815    prepended.
816 
817    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
818    UCS-2. This trick makes it possible to add full UTF-16 capabilities
819    at a later point without compromising the APIs.
820 
821 */
822 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
823     const Py_UNICODE *data,     /* Unicode char buffer */
824     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
825     const char *errors,         /* error handling */
826     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
827     );
828 
829 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
830     PyObject* unicode,          /* Unicode object */
831     const char *errors,         /* error handling */
832     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
833     );
834 
835 /* --- Unicode-Escape Codecs ---------------------------------------------- */
836 
837 /* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
838 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
839         const char *string,     /* Unicode-Escape encoded string */
840         Py_ssize_t length,      /* size of string */
841         const char *errors,     /* error handling */
842         Py_ssize_t *consumed    /* bytes consumed */
843 );
844 
845 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
846    chars. */
847 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
848         const char *string,     /* Unicode-Escape encoded string */
849         Py_ssize_t length,      /* size of string */
850         const char *errors,     /* error handling */
851         Py_ssize_t *consumed,   /* bytes consumed */
852         const char **first_invalid_escape  /* on return, points to first
853                                               invalid escaped char in
854                                               string. */
855 );
856 
857 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
858     const Py_UNICODE *data,     /* Unicode char buffer */
859     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
860     );
861 
862 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
863 
864 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
865     const Py_UNICODE *data,     /* Unicode char buffer */
866     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
867     );
868 
869 /* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
870 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
871         const char *string,     /* Unicode-Escape encoded string */
872         Py_ssize_t length,      /* size of string */
873         const char *errors,     /* error handling */
874         Py_ssize_t *consumed    /* bytes consumed */
875 );
876 
877 /* --- Latin-1 Codecs ----------------------------------------------------- */
878 
879 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
880     PyObject* unicode,
881     const char* errors);
882 
883 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
884     const Py_UNICODE *data,     /* Unicode char buffer */
885     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
886     const char *errors          /* error handling */
887     );
888 
889 /* --- ASCII Codecs ------------------------------------------------------- */
890 
891 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
892     PyObject* unicode,
893     const char* errors);
894 
895 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
896     const Py_UNICODE *data,     /* Unicode char buffer */
897     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
898     const char *errors          /* error handling */
899     );
900 
901 /* --- Character Map Codecs ----------------------------------------------- */
902 
903 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
904     const Py_UNICODE *data,     /* Unicode char buffer */
905     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
906     PyObject *mapping,          /* encoding mapping */
907     const char *errors          /* error handling */
908     );
909 
910 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
911     PyObject *unicode,          /* Unicode object */
912     PyObject *mapping,          /* encoding mapping */
913     const char *errors          /* error handling */
914     );
915 
916 /* Translate a Py_UNICODE buffer of the given length by applying a
917    character mapping table to it and return the resulting Unicode
918    object.
919 
920    The mapping table must map Unicode ordinal integers to Unicode strings,
921    Unicode ordinal integers or None (causing deletion of the character).
922 
923    Mapping tables may be dictionaries or sequences. Unmapped character
924    ordinals (ones which cause a LookupError) are left untouched and
925    are copied as-is.
926 
927 */
928 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
929     const Py_UNICODE *data,     /* Unicode char buffer */
930     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
931     PyObject *table,            /* Translate table */
932     const char *errors          /* error handling */
933     );
934 
935 /* --- MBCS codecs for Windows -------------------------------------------- */
936 
937 #ifdef MS_WINDOWS
938 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
939     const Py_UNICODE *data,     /* Unicode char buffer */
940     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
941     const char *errors          /* error handling */
942     );
943 #endif
944 
945 /* --- Decimal Encoder ---------------------------------------------------- */
946 
947 /* Takes a Unicode string holding a decimal value and writes it into
948    an output buffer using standard ASCII digit codes.
949 
950    The output buffer has to provide at least length+1 bytes of storage
951    area. The output string is 0-terminated.
952 
953    The encoder converts whitespace to ' ', decimal characters to their
954    corresponding ASCII digit and all other Latin-1 characters except
955    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
956    are treated as errors. This includes embedded NULL bytes.
957 
958    Error handling is defined by the errors argument:
959 
960       NULL or "strict": raise a ValueError
961       "ignore": ignore the wrong characters (these are not copied to the
962                 output buffer)
963       "replace": replaces illegal characters with '?'
964 
965    Returns 0 on success, -1 on failure.
966 
967 */
968 
969 Py_DEPRECATED(3.3) PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
970     Py_UNICODE *s,              /* Unicode buffer */
971     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
972     char *output,               /* Output buffer; must have size >= length */
973     const char *errors          /* error handling */
974     );
975 
976 /* Transforms code points that have decimal digit property to the
977    corresponding ASCII digit code points.
978 
979    Returns a new Unicode string on success, NULL on failure.
980 */
981 
982 Py_DEPRECATED(3.3)
983 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
984     Py_UNICODE *s,              /* Unicode buffer */
985     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
986     );
987 
988 /* Coverts a Unicode object holding a decimal value to an ASCII string
989    for using in int, float and complex parsers.
990    Transforms code points that have decimal digit property to the
991    corresponding ASCII digit code points.  Transforms spaces to ASCII.
992    Transforms code points starting from the first non-ASCII code point that
993    is neither a decimal digit nor a space to the end into '?'. */
994 
995 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
996     PyObject *unicode           /* Unicode object */
997     );
998 
999 /* --- Methods & Slots ---------------------------------------------------- */
1000 
1001 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1002     PyObject *separator,
1003     PyObject *const *items,
1004     Py_ssize_t seqlen
1005     );
1006 
1007 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
1008    0 otherwise.  The right argument must be ASCII identifier.
1009    Any error occurs inside will be cleared before return. */
1010 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
1011     PyObject *left,             /* Left string */
1012     _Py_Identifier *right       /* Right identifier */
1013     );
1014 
1015 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
1016    0 otherwise.  The right argument must be ASCII-encoded string.
1017    Any error occurs inside will be cleared before return. */
1018 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1019     PyObject *left,
1020     const char *right           /* ASCII-encoded string */
1021     );
1022 
1023 /* Externally visible for str.strip(unicode) */
1024 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1025     PyObject *self,
1026     int striptype,
1027     PyObject *sepobj
1028     );
1029 
1030 /* Using explicit passed-in values, insert the thousands grouping
1031    into the string pointed to by buffer.  For the argument descriptions,
1032    see Objects/stringlib/localeutil.h */
1033 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1034     _PyUnicodeWriter *writer,
1035     Py_ssize_t n_buffer,
1036     PyObject *digits,
1037     Py_ssize_t d_pos,
1038     Py_ssize_t n_digits,
1039     Py_ssize_t min_width,
1040     const char *grouping,
1041     PyObject *thousands_sep,
1042     Py_UCS4 *maxchar);
1043 
1044 /* === Characters Type APIs =============================================== */
1045 
1046 /* Helper array used by Py_UNICODE_ISSPACE(). */
1047 
1048 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1049 
1050 /* These should not be used directly. Use the Py_UNICODE_IS* and
1051    Py_UNICODE_TO* macros instead.
1052 
1053    These APIs are implemented in Objects/unicodectype.c.
1054 
1055 */
1056 
1057 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1058     Py_UCS4 ch       /* Unicode character */
1059     );
1060 
1061 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1062     Py_UCS4 ch       /* Unicode character */
1063     );
1064 
1065 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1066     Py_UCS4 ch       /* Unicode character */
1067     );
1068 
1069 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1070     Py_UCS4 ch       /* Unicode character */
1071     );
1072 
1073 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1074     Py_UCS4 ch       /* Unicode character */
1075     );
1076 
1077 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1078     const Py_UCS4 ch         /* Unicode character */
1079     );
1080 
1081 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1082     const Py_UCS4 ch         /* Unicode character */
1083     );
1084 
1085 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1086     Py_UCS4 ch       /* Unicode character */
1087     );
1088 
1089 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1090     Py_UCS4 ch       /* Unicode character */
1091     );
1092 
1093 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1094     Py_UCS4 ch       /* Unicode character */
1095     );
1096 
1097 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1098     Py_UCS4 ch,       /* Unicode character */
1099     Py_UCS4 *res
1100     );
1101 
1102 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1103     Py_UCS4 ch,       /* Unicode character */
1104     Py_UCS4 *res
1105     );
1106 
1107 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1108     Py_UCS4 ch,       /* Unicode character */
1109     Py_UCS4 *res
1110     );
1111 
1112 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1113     Py_UCS4 ch,       /* Unicode character */
1114     Py_UCS4 *res
1115     );
1116 
1117 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1118     Py_UCS4 ch         /* Unicode character */
1119     );
1120 
1121 PyAPI_FUNC(int) _PyUnicode_IsCased(
1122     Py_UCS4 ch         /* Unicode character */
1123     );
1124 
1125 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1126     Py_UCS4 ch       /* Unicode character */
1127     );
1128 
1129 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1130     Py_UCS4 ch       /* Unicode character */
1131     );
1132 
1133 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1134     Py_UCS4 ch       /* Unicode character */
1135     );
1136 
1137 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1138     Py_UCS4 ch       /* Unicode character */
1139     );
1140 
1141 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1142     Py_UCS4 ch       /* Unicode character */
1143     );
1144 
1145 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1146     Py_UCS4 ch       /* Unicode character */
1147     );
1148 
1149 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1150     Py_UCS4 ch       /* Unicode character */
1151     );
1152 
1153 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1154     Py_UCS4 ch       /* Unicode character */
1155     );
1156 
1157 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
1158 
1159 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
1160 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
1161 
1162 /* Fast equality check when the inputs are known to be exact unicode types
1163    and where the hash values are equal (i.e. a very probable match) */
1164 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
1165 
1166 PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
1167 PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
1168 
1169 PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1170