• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
2 #  error "this header file must not be included directly"
3 #endif
4 
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 
9 /* Py_UNICODE was the native Unicode storage format (code unit) used by
10    Python and represents a single Unicode element in the Unicode type.
11    With PEP 393, Py_UNICODE is deprecated and replaced with a
12    typedef to wchar_t. */
13 #define PY_UNICODE_TYPE wchar_t
14 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
15 
16 /* --- Internal Unicode Operations ---------------------------------------- */
17 
18 /* Since splitting on whitespace is an important use case, and
19    whitespace in most situations is solely ASCII whitespace, we
20    optimize for the common case by using a quick look-up table
21    _Py_ascii_whitespace (see below) with an inlined check.
22 
23  */
24 #define Py_UNICODE_ISSPACE(ch) \
25     ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26 
27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31 
32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35 
36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40 
41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44 
45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46 
47 #define Py_UNICODE_ISALNUM(ch) \
48        (Py_UNICODE_ISALPHA(ch) || \
49     Py_UNICODE_ISDECIMAL(ch) || \
50     Py_UNICODE_ISDIGIT(ch) || \
51     Py_UNICODE_ISNUMERIC(ch))
52 
53 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_COPY(Py_UNICODE * target,const Py_UNICODE * source,Py_ssize_t length)54 Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
55     memcpy(target, source, (size_t)(length) * sizeof(Py_UNICODE));
56 }
57 
58 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_FILL(Py_UNICODE * target,Py_UNICODE value,Py_ssize_t length)59 Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60     Py_ssize_t i;
61     for (i = 0; i < length; i++) {
62         target[i] = value;
63     }
64 }
65 
66 /* macros to work with surrogates */
67 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
68 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
69 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
70 /* Join two surrogate characters and return a single Py_UCS4 value. */
71 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
72     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
73       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
74 /* high surrogate = top 10 bits added to D800 */
75 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
76 /* low surrogate = bottom 10 bits added to DC00 */
77 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
78 
79 /* --- Unicode Type ------------------------------------------------------- */
80 
81 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
82    structure. state.ascii and state.compact are set, and the data
83    immediately follow the structure. utf8_length and wstr_length can be found
84    in the length field; the utf8 pointer is equal to the data pointer. */
85 typedef struct {
86     /* There are 4 forms of Unicode strings:
87 
88        - compact ascii:
89 
90          * structure = PyASCIIObject
91          * test: PyUnicode_IS_COMPACT_ASCII(op)
92          * kind = PyUnicode_1BYTE_KIND
93          * compact = 1
94          * ascii = 1
95          * ready = 1
96          * (length is the length of the utf8 and wstr strings)
97          * (data starts just after the structure)
98          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
99 
100        - compact:
101 
102          * structure = PyCompactUnicodeObject
103          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
104          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
105            PyUnicode_4BYTE_KIND
106          * compact = 1
107          * ready = 1
108          * ascii = 0
109          * utf8 is not shared with data
110          * utf8_length = 0 if utf8 is NULL
111          * wstr is shared with data and wstr_length=length
112            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
113            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
114          * wstr_length = 0 if wstr is NULL
115          * (data starts just after the structure)
116 
117        - legacy string, not ready:
118 
119          * structure = PyUnicodeObject
120          * test: kind == PyUnicode_WCHAR_KIND
121          * length = 0 (use wstr_length)
122          * hash = -1
123          * kind = PyUnicode_WCHAR_KIND
124          * compact = 0
125          * ascii = 0
126          * ready = 0
127          * interned = SSTATE_NOT_INTERNED
128          * wstr is not NULL
129          * data.any is NULL
130          * utf8 is NULL
131          * utf8_length = 0
132 
133        - legacy string, ready:
134 
135          * structure = PyUnicodeObject structure
136          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
137          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
138            PyUnicode_4BYTE_KIND
139          * compact = 0
140          * ready = 1
141          * data.any is not NULL
142          * utf8 is shared and utf8_length = length with data.any if ascii = 1
143          * utf8_length = 0 if utf8 is NULL
144          * wstr is shared with data.any and wstr_length = length
145            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
146            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
147          * wstr_length = 0 if wstr is NULL
148 
149        Compact strings use only one memory block (structure + characters),
150        whereas legacy strings use one block for the structure and one block
151        for characters.
152 
153        Legacy strings are created by PyUnicode_FromUnicode() and
154        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
155        when PyUnicode_READY() is called.
156 
157        See also _PyUnicode_CheckConsistency().
158     */
159     PyObject_HEAD
160     Py_ssize_t length;          /* Number of code points in the string */
161     Py_hash_t hash;             /* Hash value; -1 if not set */
162     struct {
163         /*
164            SSTATE_NOT_INTERNED (0)
165            SSTATE_INTERNED_MORTAL (1)
166            SSTATE_INTERNED_IMMORTAL (2)
167 
168            If interned != SSTATE_NOT_INTERNED, the two references from the
169            dictionary to this object are *not* counted in ob_refcnt.
170          */
171         unsigned int interned:2;
172         /* Character size:
173 
174            - PyUnicode_WCHAR_KIND (0):
175 
176              * character type = wchar_t (16 or 32 bits, depending on the
177                platform)
178 
179            - PyUnicode_1BYTE_KIND (1):
180 
181              * character type = Py_UCS1 (8 bits, unsigned)
182              * all characters are in the range U+0000-U+00FF (latin1)
183              * if ascii is set, all characters are in the range U+0000-U+007F
184                (ASCII), otherwise at least one character is in the range
185                U+0080-U+00FF
186 
187            - PyUnicode_2BYTE_KIND (2):
188 
189              * character type = Py_UCS2 (16 bits, unsigned)
190              * all characters are in the range U+0000-U+FFFF (BMP)
191              * at least one character is in the range U+0100-U+FFFF
192 
193            - PyUnicode_4BYTE_KIND (4):
194 
195              * character type = Py_UCS4 (32 bits, unsigned)
196              * all characters are in the range U+0000-U+10FFFF
197              * at least one character is in the range U+10000-U+10FFFF
198          */
199         unsigned int kind:3;
200         /* Compact is with respect to the allocation scheme. Compact unicode
201            objects only require one memory block while non-compact objects use
202            one block for the PyUnicodeObject struct and another for its data
203            buffer. */
204         unsigned int compact:1;
205         /* The string only contains characters in the range U+0000-U+007F (ASCII)
206            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
207            set, use the PyASCIIObject structure. */
208         unsigned int ascii:1;
209         /* The ready flag indicates whether the object layout is initialized
210            completely. This means that this is either a compact object, or
211            the data pointer is filled out. The bit is redundant, and helps
212            to minimize the test in PyUnicode_IS_READY(). */
213         unsigned int ready:1;
214         /* Padding to ensure that PyUnicode_DATA() is always aligned to
215            4 bytes (see issue #19537 on m68k). */
216         unsigned int :24;
217     } state;
218     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
219 } PyASCIIObject;
220 
221 /* Non-ASCII strings allocated through PyUnicode_New use the
222    PyCompactUnicodeObject structure. state.compact is set, and the data
223    immediately follow the structure. */
224 typedef struct {
225     PyASCIIObject _base;
226     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
227                                  * terminating \0. */
228     char *utf8;                 /* UTF-8 representation (null-terminated) */
229     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
230                                  * surrogates count as two code points. */
231 } PyCompactUnicodeObject;
232 
233 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
234    PyUnicodeObject structure. The actual string data is initially in the wstr
235    block, and copied into the data block using _PyUnicode_Ready. */
236 typedef struct {
237     PyCompactUnicodeObject _base;
238     union {
239         void *any;
240         Py_UCS1 *latin1;
241         Py_UCS2 *ucs2;
242         Py_UCS4 *ucs4;
243     } data;                     /* Canonical, smallest-form Unicode buffer */
244 } PyUnicodeObject;
245 
246 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247     PyObject *op,
248     int check_content);
249 
250 /* Fast access macros */
251 
252 /* Returns the deprecated Py_UNICODE representation's size in code units
253    (this includes surrogate pairs as 2 units).
254    If the Py_UNICODE representation is not available, it will be computed
255    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
256 
257 /* Py_DEPRECATED(3.3) */
258 #define PyUnicode_GET_SIZE(op)                       \
259     (assert(PyUnicode_Check(op)),                    \
260      (((PyASCIIObject *)(op))->wstr) ?               \
261       PyUnicode_WSTR_LENGTH(op) :                    \
262       ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
263        assert(((PyASCIIObject *)(op))->wstr),        \
264        PyUnicode_WSTR_LENGTH(op)))
265 
266 /* Py_DEPRECATED(3.3) */
267 #define PyUnicode_GET_DATA_SIZE(op) \
268     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
269 
270 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
271    representation on demand.  Using this macro is very inefficient now,
272    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
273    use PyUnicode_WRITE() and PyUnicode_READ(). */
274 
275 /* Py_DEPRECATED(3.3) */
276 #define PyUnicode_AS_UNICODE(op) \
277     (assert(PyUnicode_Check(op)), \
278      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
279       PyUnicode_AsUnicode(_PyObject_CAST(op)))
280 
281 /* Py_DEPRECATED(3.3) */
282 #define PyUnicode_AS_DATA(op) \
283     ((const char *)(PyUnicode_AS_UNICODE(op)))
284 
285 
286 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
287 
288 /* Values for PyASCIIObject.state: */
289 
290 /* Interning state. */
291 #define SSTATE_NOT_INTERNED 0
292 #define SSTATE_INTERNED_MORTAL 1
293 #define SSTATE_INTERNED_IMMORTAL 2
294 
295 /* Return true if the string contains only ASCII characters, or 0 if not. The
296    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
297    ready. */
298 #define PyUnicode_IS_ASCII(op)                   \
299     (assert(PyUnicode_Check(op)),                \
300      assert(PyUnicode_IS_READY(op)),             \
301      ((PyASCIIObject*)op)->state.ascii)
302 
303 /* Return true if the string is compact or 0 if not.
304    No type checks or Ready calls are performed. */
305 #define PyUnicode_IS_COMPACT(op) \
306     (((PyASCIIObject*)(op))->state.compact)
307 
308 /* Return true if the string is a compact ASCII string (use PyASCIIObject
309    structure), or 0 if not.  No type checks or Ready calls are performed. */
310 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
311     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
312 
313 enum PyUnicode_Kind {
314 /* String contains only wstr byte characters.  This is only possible
315    when the string was created with a legacy API and _PyUnicode_Ready()
316    has not been called yet.  */
317     PyUnicode_WCHAR_KIND = 0,
318 /* Return values of the PyUnicode_KIND() macro: */
319     PyUnicode_1BYTE_KIND = 1,
320     PyUnicode_2BYTE_KIND = 2,
321     PyUnicode_4BYTE_KIND = 4
322 };
323 
324 /* Return pointers to the canonical representation cast to unsigned char,
325    Py_UCS2, or Py_UCS4 for direct character access.
326    No checks are performed, use PyUnicode_KIND() before to ensure
327    these will work correctly. */
328 
329 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
330 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
331 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
332 
333 /* Return one of the PyUnicode_*_KIND values defined above. */
334 #define PyUnicode_KIND(op) \
335     (assert(PyUnicode_Check(op)), \
336      assert(PyUnicode_IS_READY(op)),            \
337      ((PyASCIIObject *)(op))->state.kind)
338 
339 /* Return a void pointer to the raw unicode buffer. */
340 #define _PyUnicode_COMPACT_DATA(op)                     \
341     (PyUnicode_IS_ASCII(op) ?                   \
342      ((void*)((PyASCIIObject*)(op) + 1)) :              \
343      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
344 
345 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
346     (assert(((PyUnicodeObject*)(op))->data.any),        \
347      ((((PyUnicodeObject *)(op))->data.any)))
348 
349 #define PyUnicode_DATA(op) \
350     (assert(PyUnicode_Check(op)), \
351      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
352      _PyUnicode_NONCOMPACT_DATA(op))
353 
354 /* In the access macros below, "kind" may be evaluated more than once.
355    All other macro parameters are evaluated exactly once, so it is safe
356    to put side effects into them (such as increasing the index). */
357 
358 /* Write into the canonical representation, this macro does not do any sanity
359    checks and is intended for usage in loops.  The caller should cache the
360    kind and data pointers obtained from other macro calls.
361    index is the index in the string (starts at 0) and value is the new
362    code point value which should be written to that location. */
363 #define PyUnicode_WRITE(kind, data, index, value) \
364     do { \
365         switch ((kind)) { \
366         case PyUnicode_1BYTE_KIND: { \
367             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
368             break; \
369         } \
370         case PyUnicode_2BYTE_KIND: { \
371             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
372             break; \
373         } \
374         default: { \
375             assert((kind) == PyUnicode_4BYTE_KIND); \
376             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
377         } \
378         } \
379     } while (0)
380 
381 /* Read a code point from the string's canonical representation.  No checks
382    or ready calls are performed. */
383 #define PyUnicode_READ(kind, data, index) \
384     ((Py_UCS4) \
385     ((kind) == PyUnicode_1BYTE_KIND ? \
386         ((const Py_UCS1 *)(data))[(index)] : \
387         ((kind) == PyUnicode_2BYTE_KIND ? \
388             ((const Py_UCS2 *)(data))[(index)] : \
389             ((const Py_UCS4 *)(data))[(index)] \
390         ) \
391     ))
392 
393 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
394    calls PyUnicode_KIND() and might call it twice.  For single reads, use
395    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
396    cache kind and use PyUnicode_READ instead. */
397 #define PyUnicode_READ_CHAR(unicode, index) \
398     (assert(PyUnicode_Check(unicode)),          \
399      assert(PyUnicode_IS_READY(unicode)),       \
400      (Py_UCS4)                                  \
401         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
402             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
403             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
404                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
405                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
406             ) \
407         ))
408 
409 /* Returns the length of the unicode string. The caller has to make sure that
410    the string has it's canonical representation set before calling
411    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
412 #define PyUnicode_GET_LENGTH(op)                \
413     (assert(PyUnicode_Check(op)),               \
414      assert(PyUnicode_IS_READY(op)),            \
415      ((PyASCIIObject *)(op))->length)
416 
417 
418 /* Fast check to determine whether an object is ready. Equivalent to
419    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
420 
421 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
422 
423 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
424    case.  If the canonical representation is not yet set, it will still call
425    _PyUnicode_Ready().
426    Returns 0 on success and -1 on errors. */
427 #define PyUnicode_READY(op)                        \
428     (assert(PyUnicode_Check(op)),                       \
429      (PyUnicode_IS_READY(op) ?                          \
430       0 : _PyUnicode_Ready(_PyObject_CAST(op))))
431 
432 /* Return a maximum character value which is suitable for creating another
433    string based on op.  This is always an approximation but more efficient
434    than iterating over the string. */
435 #define PyUnicode_MAX_CHAR_VALUE(op) \
436     (assert(PyUnicode_IS_READY(op)),                                    \
437      (PyUnicode_IS_ASCII(op) ?                                          \
438       (0x7f) :                                                          \
439       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
440        (0xffU) :                                                        \
441        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
442         (0xffffU) :                                                     \
443         (0x10ffffU)))))
444 
445 Py_DEPRECATED(3.3)
_PyUnicode_get_wstr_length(PyObject * op)446 static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447     return PyUnicode_IS_COMPACT_ASCII(op) ?
448             ((PyASCIIObject*)op)->length :
449             ((PyCompactUnicodeObject*)op)->wstr_length;
450 }
451 #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452 
453 /* === Public API ========================================================= */
454 
455 /* --- Plain Py_UNICODE --------------------------------------------------- */
456 
457 /* With PEP 393, this is the recommended way to allocate a new unicode object.
458    This function will allocate the object and its buffer in a single memory
459    block.  Objects created using this function are not resizable. */
460 PyAPI_FUNC(PyObject*) PyUnicode_New(
461     Py_ssize_t size,            /* Number of code points in the new string */
462     Py_UCS4 maxchar             /* maximum code point value in the string */
463     );
464 
465 /* Initializes the canonical string representation from the deprecated
466    wstr/Py_UNICODE representation. This function is used to convert Unicode
467    objects which were created using the old API to the new flexible format
468    introduced with PEP 393.
469 
470    Don't call this function directly, use the public PyUnicode_READY() macro
471    instead. */
472 PyAPI_FUNC(int) _PyUnicode_Ready(
473     PyObject *unicode           /* Unicode object */
474     );
475 
476 /* Get a copy of a Unicode string. */
477 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
478     PyObject *unicode
479     );
480 
481 /* Copy character from one unicode object into another, this function performs
482    character conversion when necessary and falls back to memcpy() if possible.
483 
484    Fail if to is too small (smaller than *how_many* or smaller than
485    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
486    kind(to), or if *to* has more than 1 reference.
487 
488    Return the number of written character, or return -1 and raise an exception
489    on error.
490 
491    Pseudo-code:
492 
493        how_many = min(how_many, len(from) - from_start)
494        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
495        return how_many
496 
497    Note: The function doesn't write a terminating null character.
498    */
499 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
500     PyObject *to,
501     Py_ssize_t to_start,
502     PyObject *from,
503     Py_ssize_t from_start,
504     Py_ssize_t how_many
505     );
506 
507 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
508    may crash if parameters are invalid (e.g. if the output string
509    is too short). */
510 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
511     PyObject *to,
512     Py_ssize_t to_start,
513     PyObject *from,
514     Py_ssize_t from_start,
515     Py_ssize_t how_many
516     );
517 
518 /* Fill a string with a character: write fill_char into
519    unicode[start:start+length].
520 
521    Fail if fill_char is bigger than the string maximum character, or if the
522    string has more than 1 reference.
523 
524    Return the number of written character, or return -1 and raise an exception
525    on error. */
526 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
527     PyObject *unicode,
528     Py_ssize_t start,
529     Py_ssize_t length,
530     Py_UCS4 fill_char
531     );
532 
533 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
534    if parameters are invalid (e.g. if length is longer than the string). */
535 PyAPI_FUNC(void) _PyUnicode_FastFill(
536     PyObject *unicode,
537     Py_ssize_t start,
538     Py_ssize_t length,
539     Py_UCS4 fill_char
540     );
541 
542 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
543    size.
544 
545    u may be NULL which causes the contents to be undefined. It is the
546    user's responsibility to fill in the needed data afterwards. Note
547    that modifying the Unicode object contents after construction is
548    only allowed if u was set to NULL.
549 
550    The buffer is copied into the new object. */
551 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
552     const Py_UNICODE *u,        /* Unicode buffer */
553     Py_ssize_t size             /* size of buffer */
554     );
555 
556 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
557    Scan the string to find the maximum character. */
558 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
559     int kind,
560     const void *buffer,
561     Py_ssize_t size);
562 
563 /* Create a new string from a buffer of ASCII characters.
564    WARNING: Don't check if the string contains any non-ASCII character. */
565 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
566     const char *buffer,
567     Py_ssize_t size);
568 
569 /* Compute the maximum character of the substring unicode[start:end].
570    Return 127 for an empty string. */
571 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572     PyObject *unicode,
573     Py_ssize_t start,
574     Py_ssize_t end);
575 
576 /* Return a read-only pointer to the Unicode object's internal
577    Py_UNICODE buffer.
578    If the wchar_t/Py_UNICODE representation is not yet available, this
579    function will calculate it. */
580 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
581     PyObject *unicode           /* Unicode object */
582     );
583 
584 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
585    contains null characters. */
586 Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587     PyObject *unicode           /* Unicode object */
588     );
589 
590 /* Return a read-only pointer to the Unicode object's internal
591    Py_UNICODE buffer and save the length at size.
592    If the wchar_t/Py_UNICODE representation is not yet available, this
593    function will calculate it. */
594 
595 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
596     PyObject *unicode,          /* Unicode object */
597     Py_ssize_t *size            /* location where to save the length */
598     );
599 
600 /* Get the maximum ordinal for a Unicode character. */
601 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
602 
603 
604 /* --- _PyUnicodeWriter API ----------------------------------------------- */
605 
606 typedef struct {
607     PyObject *buffer;
608     void *data;
609     enum PyUnicode_Kind kind;
610     Py_UCS4 maxchar;
611     Py_ssize_t size;
612     Py_ssize_t pos;
613 
614     /* minimum number of allocated characters (default: 0) */
615     Py_ssize_t min_length;
616 
617     /* minimum character (default: 127, ASCII) */
618     Py_UCS4 min_char;
619 
620     /* If non-zero, overallocate the buffer (default: 0). */
621     unsigned char overallocate;
622 
623     /* If readonly is 1, buffer is a shared string (cannot be modified)
624        and size is set to 0. */
625     unsigned char readonly;
626 } _PyUnicodeWriter ;
627 
628 /* Initialize a Unicode writer.
629  *
630  * By default, the minimum buffer size is 0 character and overallocation is
631  * disabled. Set min_length, min_char and overallocate attributes to control
632  * the allocation of the buffer. */
633 PyAPI_FUNC(void)
634 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
635 
636 /* Prepare the buffer to write 'length' characters
637    with the specified maximum character.
638 
639    Return 0 on success, raise an exception and return -1 on error. */
640 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
641     (((MAXCHAR) <= (WRITER)->maxchar                                  \
642       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
643      ? 0                                                              \
644      : (((LENGTH) == 0)                                               \
645         ? 0                                                           \
646         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
647 
648 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
649    instead. */
650 PyAPI_FUNC(int)
651 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
652                                  Py_ssize_t length, Py_UCS4 maxchar);
653 
654 /* Prepare the buffer to have at least the kind KIND.
655    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
656    support characters in range U+000-U+FFFF.
657 
658    Return 0 on success, raise an exception and return -1 on error. */
659 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
660     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
661      (KIND) <= (WRITER)->kind                                         \
662      ? 0                                                              \
663      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
664 
665 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
666    macro instead. */
667 PyAPI_FUNC(int)
668 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
669                                      enum PyUnicode_Kind kind);
670 
671 /* Append a Unicode character.
672    Return 0 on success, raise an exception and return -1 on error. */
673 PyAPI_FUNC(int)
674 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
675     Py_UCS4 ch
676     );
677 
678 /* Append a Unicode string.
679    Return 0 on success, raise an exception and return -1 on error. */
680 PyAPI_FUNC(int)
681 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
682     PyObject *str               /* Unicode string */
683     );
684 
685 /* Append a substring of a Unicode string.
686    Return 0 on success, raise an exception and return -1 on error. */
687 PyAPI_FUNC(int)
688 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
689     PyObject *str,              /* Unicode string */
690     Py_ssize_t start,
691     Py_ssize_t end
692     );
693 
694 /* Append an ASCII-encoded byte string.
695    Return 0 on success, raise an exception and return -1 on error. */
696 PyAPI_FUNC(int)
697 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
698     const char *str,           /* ASCII-encoded byte string */
699     Py_ssize_t len             /* number of bytes, or -1 if unknown */
700     );
701 
702 /* Append a latin1-encoded byte string.
703    Return 0 on success, raise an exception and return -1 on error. */
704 PyAPI_FUNC(int)
705 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
706     const char *str,           /* latin1-encoded byte string */
707     Py_ssize_t len             /* length in bytes */
708     );
709 
710 /* Get the value of the writer as a Unicode string. Clear the
711    buffer of the writer. Raise an exception and return NULL
712    on error. */
713 PyAPI_FUNC(PyObject *)
714 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
715 
716 /* Deallocate memory of a writer (clear its internal buffer). */
717 PyAPI_FUNC(void)
718 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
719 
720 
721 /* Format the object based on the format_spec, as defined in PEP 3101
722    (Advanced String Formatting). */
723 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
724     _PyUnicodeWriter *writer,
725     PyObject *obj,
726     PyObject *format_spec,
727     Py_ssize_t start,
728     Py_ssize_t end);
729 
730 /* --- Manage the default encoding ---------------------------------------- */
731 
732 /* Returns a pointer to the default encoding (UTF-8) of the
733    Unicode object unicode and the size of the encoded representation
734    in bytes stored in *size.
735 
736    In case of an error, no *size is set.
737 
738    This function caches the UTF-8 encoded string in the unicodeobject
739    and subsequent calls will return the same string.  The memory is released
740    when the unicodeobject is deallocated.
741 
742    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
743    support the previous internal function with the same behaviour.
744 */
745 
746 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
747     PyObject *unicode,
748     Py_ssize_t *size);
749 
750 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
751 
752 /* Returns a pointer to the default encoding (UTF-8) of the
753    Unicode object unicode.
754 
755    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
756    in the unicodeobject.
757 
758    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
759    support the previous internal function with the same behaviour.
760 
761    Use of this API is DEPRECATED since no size information can be
762    extracted from the returned data.
763 */
764 
765 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
766 
767 #define _PyUnicode_AsString PyUnicode_AsUTF8
768 
769 /* --- Generic Codecs ----------------------------------------------------- */
770 
771 /* Encodes a Py_UNICODE buffer of the given size and returns a
772    Python string object. */
773 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
774     const Py_UNICODE *s,        /* Unicode char buffer */
775     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
776     const char *encoding,       /* encoding */
777     const char *errors          /* error handling */
778     );
779 
780 /* --- UTF-7 Codecs ------------------------------------------------------- */
781 
782 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
783     const Py_UNICODE *data,     /* Unicode char buffer */
784     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
785     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
786     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
787     const char *errors          /* error handling */
788     );
789 
790 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
791     PyObject *unicode,          /* Unicode object */
792     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
793     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
794     const char *errors          /* error handling */
795     );
796 
797 /* --- UTF-8 Codecs ------------------------------------------------------- */
798 
799 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
800     PyObject *unicode,
801     const char *errors);
802 
803 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
804     const Py_UNICODE *data,     /* Unicode char buffer */
805     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
806     const char *errors          /* error handling */
807     );
808 
809 /* --- UTF-32 Codecs ------------------------------------------------------ */
810 
811 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
812     const Py_UNICODE *data,     /* Unicode char buffer */
813     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
814     const char *errors,         /* error handling */
815     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
816     );
817 
818 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
819     PyObject *object,           /* Unicode object */
820     const char *errors,         /* error handling */
821     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
822     );
823 
824 /* --- UTF-16 Codecs ------------------------------------------------------ */
825 
826 /* Returns a Python string object holding the UTF-16 encoded value of
827    the Unicode data.
828 
829    If byteorder is not 0, output is written according to the following
830    byte order:
831 
832    byteorder == -1: little endian
833    byteorder == 0:  native byte order (writes a BOM mark)
834    byteorder == 1:  big endian
835 
836    If byteorder is 0, the output string will always start with the
837    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
838    prepended.
839 
840    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
841    UCS-2. This trick makes it possible to add full UTF-16 capabilities
842    at a later point without compromising the APIs.
843 
844 */
845 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
846     const Py_UNICODE *data,     /* Unicode char buffer */
847     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
848     const char *errors,         /* error handling */
849     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
850     );
851 
852 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
853     PyObject* unicode,          /* Unicode object */
854     const char *errors,         /* error handling */
855     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
856     );
857 
858 /* --- Unicode-Escape Codecs ---------------------------------------------- */
859 
860 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
861    chars. */
862 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
863         const char *string,     /* Unicode-Escape encoded string */
864         Py_ssize_t length,      /* size of string */
865         const char *errors,     /* error handling */
866         const char **first_invalid_escape  /* on return, points to first
867                                               invalid escaped char in
868                                               string. */
869 );
870 
871 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
872     const Py_UNICODE *data,     /* Unicode char buffer */
873     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
874     );
875 
876 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
877 
878 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
879     const Py_UNICODE *data,     /* Unicode char buffer */
880     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
881     );
882 
883 /* --- Latin-1 Codecs ----------------------------------------------------- */
884 
885 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
886     PyObject* unicode,
887     const char* errors);
888 
889 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
890     const Py_UNICODE *data,     /* Unicode char buffer */
891     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
892     const char *errors          /* error handling */
893     );
894 
895 /* --- ASCII Codecs ------------------------------------------------------- */
896 
897 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
898     PyObject* unicode,
899     const char* errors);
900 
901 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
902     const Py_UNICODE *data,     /* Unicode char buffer */
903     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
904     const char *errors          /* error handling */
905     );
906 
907 /* --- Character Map Codecs ----------------------------------------------- */
908 
909 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
910     const Py_UNICODE *data,     /* Unicode char buffer */
911     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
912     PyObject *mapping,          /* encoding mapping */
913     const char *errors          /* error handling */
914     );
915 
916 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
917     PyObject *unicode,          /* Unicode object */
918     PyObject *mapping,          /* encoding mapping */
919     const char *errors          /* error handling */
920     );
921 
922 /* Translate a Py_UNICODE buffer of the given length by applying a
923    character mapping table to it and return the resulting Unicode
924    object.
925 
926    The mapping table must map Unicode ordinal integers to Unicode strings,
927    Unicode ordinal integers or None (causing deletion of the character).
928 
929    Mapping tables may be dictionaries or sequences. Unmapped character
930    ordinals (ones which cause a LookupError) are left untouched and
931    are copied as-is.
932 
933 */
934 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
935     const Py_UNICODE *data,     /* Unicode char buffer */
936     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
937     PyObject *table,            /* Translate table */
938     const char *errors          /* error handling */
939     );
940 
941 /* --- MBCS codecs for Windows -------------------------------------------- */
942 
943 #ifdef MS_WINDOWS
944 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
945     const Py_UNICODE *data,     /* Unicode char buffer */
946     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
947     const char *errors          /* error handling */
948     );
949 #endif
950 
951 /* --- Decimal Encoder ---------------------------------------------------- */
952 
953 /* Takes a Unicode string holding a decimal value and writes it into
954    an output buffer using standard ASCII digit codes.
955 
956    The output buffer has to provide at least length+1 bytes of storage
957    area. The output string is 0-terminated.
958 
959    The encoder converts whitespace to ' ', decimal characters to their
960    corresponding ASCII digit and all other Latin-1 characters except
961    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
962    are treated as errors. This includes embedded NULL bytes.
963 
964    Error handling is defined by the errors argument:
965 
966       NULL or "strict": raise a ValueError
967       "ignore": ignore the wrong characters (these are not copied to the
968                 output buffer)
969       "replace": replaces illegal characters with '?'
970 
971    Returns 0 on success, -1 on failure.
972 
973 */
974 
975 Py_DEPRECATED(3.3) PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
976     Py_UNICODE *s,              /* Unicode buffer */
977     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
978     char *output,               /* Output buffer; must have size >= length */
979     const char *errors          /* error handling */
980     );
981 
982 /* Transforms code points that have decimal digit property to the
983    corresponding ASCII digit code points.
984 
985    Returns a new Unicode string on success, NULL on failure.
986 */
987 
988 Py_DEPRECATED(3.3)
989 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
990     Py_UNICODE *s,              /* Unicode buffer */
991     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
992     );
993 
994 /* Coverts a Unicode object holding a decimal value to an ASCII string
995    for using in int, float and complex parsers.
996    Transforms code points that have decimal digit property to the
997    corresponding ASCII digit code points.  Transforms spaces to ASCII.
998    Transforms code points starting from the first non-ASCII code point that
999    is neither a decimal digit nor a space to the end into '?'. */
1000 
1001 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1002     PyObject *unicode           /* Unicode object */
1003     );
1004 
1005 /* --- Methods & Slots ---------------------------------------------------- */
1006 
1007 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1008     PyObject *separator,
1009     PyObject *const *items,
1010     Py_ssize_t seqlen
1011     );
1012 
1013 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
1014    0 otherwise.  The right argument must be ASCII identifier.
1015    Any error occurs inside will be cleared before return. */
1016 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
1017     PyObject *left,             /* Left string */
1018     _Py_Identifier *right       /* Right identifier */
1019     );
1020 
1021 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
1022    0 otherwise.  The right argument must be ASCII-encoded string.
1023    Any error occurs inside will be cleared before return. */
1024 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1025     PyObject *left,
1026     const char *right           /* ASCII-encoded string */
1027     );
1028 
1029 /* Externally visible for str.strip(unicode) */
1030 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1031     PyObject *self,
1032     int striptype,
1033     PyObject *sepobj
1034     );
1035 
1036 /* Using explicit passed-in values, insert the thousands grouping
1037    into the string pointed to by buffer.  For the argument descriptions,
1038    see Objects/stringlib/localeutil.h */
1039 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1040     _PyUnicodeWriter *writer,
1041     Py_ssize_t n_buffer,
1042     PyObject *digits,
1043     Py_ssize_t d_pos,
1044     Py_ssize_t n_digits,
1045     Py_ssize_t min_width,
1046     const char *grouping,
1047     PyObject *thousands_sep,
1048     Py_UCS4 *maxchar);
1049 
1050 /* === Characters Type APIs =============================================== */
1051 
1052 /* Helper array used by Py_UNICODE_ISSPACE(). */
1053 
1054 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1055 
1056 /* These should not be used directly. Use the Py_UNICODE_IS* and
1057    Py_UNICODE_TO* macros instead.
1058 
1059    These APIs are implemented in Objects/unicodectype.c.
1060 
1061 */
1062 
1063 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1064     Py_UCS4 ch       /* Unicode character */
1065     );
1066 
1067 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1068     Py_UCS4 ch       /* Unicode character */
1069     );
1070 
1071 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1072     Py_UCS4 ch       /* Unicode character */
1073     );
1074 
1075 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1076     Py_UCS4 ch       /* Unicode character */
1077     );
1078 
1079 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1080     Py_UCS4 ch       /* Unicode character */
1081     );
1082 
1083 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1084     const Py_UCS4 ch         /* Unicode character */
1085     );
1086 
1087 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1088     const Py_UCS4 ch         /* Unicode character */
1089     );
1090 
1091 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1092     Py_UCS4 ch       /* Unicode character */
1093     );
1094 
1095 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1096     Py_UCS4 ch       /* Unicode character */
1097     );
1098 
1099 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1100     Py_UCS4 ch       /* Unicode character */
1101     );
1102 
1103 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1104     Py_UCS4 ch,       /* Unicode character */
1105     Py_UCS4 *res
1106     );
1107 
1108 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1109     Py_UCS4 ch,       /* Unicode character */
1110     Py_UCS4 *res
1111     );
1112 
1113 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1114     Py_UCS4 ch,       /* Unicode character */
1115     Py_UCS4 *res
1116     );
1117 
1118 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1119     Py_UCS4 ch,       /* Unicode character */
1120     Py_UCS4 *res
1121     );
1122 
1123 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1124     Py_UCS4 ch         /* Unicode character */
1125     );
1126 
1127 PyAPI_FUNC(int) _PyUnicode_IsCased(
1128     Py_UCS4 ch         /* Unicode character */
1129     );
1130 
1131 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1132     Py_UCS4 ch       /* Unicode character */
1133     );
1134 
1135 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1136     Py_UCS4 ch       /* Unicode character */
1137     );
1138 
1139 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1140     Py_UCS4 ch       /* Unicode character */
1141     );
1142 
1143 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1144     Py_UCS4 ch       /* Unicode character */
1145     );
1146 
1147 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1148     Py_UCS4 ch       /* Unicode character */
1149     );
1150 
1151 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1152     Py_UCS4 ch       /* Unicode character */
1153     );
1154 
1155 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1156     Py_UCS4 ch       /* Unicode character */
1157     );
1158 
1159 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1160     Py_UCS4 ch       /* Unicode character */
1161     );
1162 
1163 Py_DEPRECATED(3.3) PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1164     const Py_UNICODE *u
1165     );
1166 
1167 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
1168     Py_UNICODE *s1,
1169     const Py_UNICODE *s2);
1170 
1171 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1172     Py_UNICODE *s1, const Py_UNICODE *s2);
1173 
1174 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
1175     Py_UNICODE *s1,
1176     const Py_UNICODE *s2,
1177     size_t n);
1178 
1179 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strcmp(
1180     const Py_UNICODE *s1,
1181     const Py_UNICODE *s2
1182     );
1183 
1184 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strncmp(
1185     const Py_UNICODE *s1,
1186     const Py_UNICODE *s2,
1187     size_t n
1188     );
1189 
1190 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
1191     const Py_UNICODE *s,
1192     Py_UNICODE c
1193     );
1194 
1195 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
1196     const Py_UNICODE *s,
1197     Py_UNICODE c
1198     );
1199 
1200 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
1201 
1202 /* Create a copy of a unicode string ending with a nul character. Return NULL
1203    and raise a MemoryError exception on memory allocation failure, otherwise
1204    return a new allocated buffer (use PyMem_Free() to free the buffer). */
1205 
1206 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
1207     PyObject *unicode
1208     );
1209 
1210 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
1211 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
1212 
1213 /* Fast equality check when the inputs are known to be exact unicode types
1214    and where the hash values are equal (i.e. a very probable match) */
1215 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
1216 
1217 PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1218 
1219 #ifdef __cplusplus
1220 }
1221 #endif
1222