• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
2 #  error "this header file must not be included directly"
3 #endif
4 
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 
9 /* Py_UNICODE was the native Unicode storage format (code unit) used by
10    Python and represents a single Unicode element in the Unicode type.
11    With PEP 393, Py_UNICODE is deprecated and replaced with a
12    typedef to wchar_t. */
13 #define PY_UNICODE_TYPE wchar_t
14 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
15 
16 /* --- Internal Unicode Operations ---------------------------------------- */
17 
18 /* Since splitting on whitespace is an important use case, and
19    whitespace in most situations is solely ASCII whitespace, we
20    optimize for the common case by using a quick look-up table
21    _Py_ascii_whitespace (see below) with an inlined check.
22 
23  */
24 #define Py_UNICODE_ISSPACE(ch) \
25     ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26 
27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31 
32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35 
36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40 
41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44 
45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46 
47 #define Py_UNICODE_ISALNUM(ch) \
48        (Py_UNICODE_ISALPHA(ch) || \
49     Py_UNICODE_ISDECIMAL(ch) || \
50     Py_UNICODE_ISDIGIT(ch) || \
51     Py_UNICODE_ISNUMERIC(ch))
52 
53 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_COPY(Py_UNICODE * target,const Py_UNICODE * source,Py_ssize_t length)54 Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
55     memcpy(target, source, (size_t)(length) * sizeof(Py_UNICODE));
56 }
57 
58 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_FILL(Py_UNICODE * target,Py_UNICODE value,Py_ssize_t length)59 Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60     Py_ssize_t i;
61     for (i = 0; i < length; i++) {
62         target[i] = value;
63     }
64 }
65 
66 /* macros to work with surrogates */
67 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
68 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
69 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
70 /* Join two surrogate characters and return a single Py_UCS4 value. */
71 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
72     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
73       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
74 /* high surrogate = top 10 bits added to D800 */
75 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
76 /* low surrogate = bottom 10 bits added to DC00 */
77 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
78 
79 /* --- Unicode Type ------------------------------------------------------- */
80 
81 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
82    structure. state.ascii and state.compact are set, and the data
83    immediately follow the structure. utf8_length and wstr_length can be found
84    in the length field; the utf8 pointer is equal to the data pointer. */
85 typedef struct {
86     /* There are 4 forms of Unicode strings:
87 
88        - compact ascii:
89 
90          * structure = PyASCIIObject
91          * test: PyUnicode_IS_COMPACT_ASCII(op)
92          * kind = PyUnicode_1BYTE_KIND
93          * compact = 1
94          * ascii = 1
95          * ready = 1
96          * (length is the length of the utf8 and wstr strings)
97          * (data starts just after the structure)
98          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
99 
100        - compact:
101 
102          * structure = PyCompactUnicodeObject
103          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
104          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
105            PyUnicode_4BYTE_KIND
106          * compact = 1
107          * ready = 1
108          * ascii = 0
109          * utf8 is not shared with data
110          * utf8_length = 0 if utf8 is NULL
111          * wstr is shared with data and wstr_length=length
112            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
113            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
114          * wstr_length = 0 if wstr is NULL
115          * (data starts just after the structure)
116 
117        - legacy string, not ready:
118 
119          * structure = PyUnicodeObject
120          * test: kind == PyUnicode_WCHAR_KIND
121          * length = 0 (use wstr_length)
122          * hash = -1
123          * kind = PyUnicode_WCHAR_KIND
124          * compact = 0
125          * ascii = 0
126          * ready = 0
127          * interned = SSTATE_NOT_INTERNED
128          * wstr is not NULL
129          * data.any is NULL
130          * utf8 is NULL
131          * utf8_length = 0
132 
133        - legacy string, ready:
134 
135          * structure = PyUnicodeObject structure
136          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
137          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
138            PyUnicode_4BYTE_KIND
139          * compact = 0
140          * ready = 1
141          * data.any is not NULL
142          * utf8 is shared and utf8_length = length with data.any if ascii = 1
143          * utf8_length = 0 if utf8 is NULL
144          * wstr is shared with data.any and wstr_length = length
145            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
146            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
147          * wstr_length = 0 if wstr is NULL
148 
149        Compact strings use only one memory block (structure + characters),
150        whereas legacy strings use one block for the structure and one block
151        for characters.
152 
153        Legacy strings are created by PyUnicode_FromUnicode() and
154        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
155        when PyUnicode_READY() is called.
156 
157        See also _PyUnicode_CheckConsistency().
158     */
159     PyObject_HEAD
160     Py_ssize_t length;          /* Number of code points in the string */
161     Py_hash_t hash;             /* Hash value; -1 if not set */
162     struct {
163         /*
164            SSTATE_NOT_INTERNED (0)
165            SSTATE_INTERNED_MORTAL (1)
166            SSTATE_INTERNED_IMMORTAL (2)
167 
168            If interned != SSTATE_NOT_INTERNED, the two references from the
169            dictionary to this object are *not* counted in ob_refcnt.
170          */
171         unsigned int interned:2;
172         /* Character size:
173 
174            - PyUnicode_WCHAR_KIND (0):
175 
176              * character type = wchar_t (16 or 32 bits, depending on the
177                platform)
178 
179            - PyUnicode_1BYTE_KIND (1):
180 
181              * character type = Py_UCS1 (8 bits, unsigned)
182              * all characters are in the range U+0000-U+00FF (latin1)
183              * if ascii is set, all characters are in the range U+0000-U+007F
184                (ASCII), otherwise at least one character is in the range
185                U+0080-U+00FF
186 
187            - PyUnicode_2BYTE_KIND (2):
188 
189              * character type = Py_UCS2 (16 bits, unsigned)
190              * all characters are in the range U+0000-U+FFFF (BMP)
191              * at least one character is in the range U+0100-U+FFFF
192 
193            - PyUnicode_4BYTE_KIND (4):
194 
195              * character type = Py_UCS4 (32 bits, unsigned)
196              * all characters are in the range U+0000-U+10FFFF
197              * at least one character is in the range U+10000-U+10FFFF
198          */
199         unsigned int kind:3;
200         /* Compact is with respect to the allocation scheme. Compact unicode
201            objects only require one memory block while non-compact objects use
202            one block for the PyUnicodeObject struct and another for its data
203            buffer. */
204         unsigned int compact:1;
205         /* The string only contains characters in the range U+0000-U+007F (ASCII)
206            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
207            set, use the PyASCIIObject structure. */
208         unsigned int ascii:1;
209         /* The ready flag indicates whether the object layout is initialized
210            completely. This means that this is either a compact object, or
211            the data pointer is filled out. The bit is redundant, and helps
212            to minimize the test in PyUnicode_IS_READY(). */
213         unsigned int ready:1;
214         /* Padding to ensure that PyUnicode_DATA() is always aligned to
215            4 bytes (see issue #19537 on m68k). */
216         unsigned int :24;
217     } state;
218     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
219 } PyASCIIObject;
220 
221 /* Non-ASCII strings allocated through PyUnicode_New use the
222    PyCompactUnicodeObject structure. state.compact is set, and the data
223    immediately follow the structure. */
224 typedef struct {
225     PyASCIIObject _base;
226     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
227                                  * terminating \0. */
228     char *utf8;                 /* UTF-8 representation (null-terminated) */
229     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
230                                  * surrogates count as two code points. */
231 } PyCompactUnicodeObject;
232 
233 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
234    PyUnicodeObject structure. The actual string data is initially in the wstr
235    block, and copied into the data block using _PyUnicode_Ready. */
236 typedef struct {
237     PyCompactUnicodeObject _base;
238     union {
239         void *any;
240         Py_UCS1 *latin1;
241         Py_UCS2 *ucs2;
242         Py_UCS4 *ucs4;
243     } data;                     /* Canonical, smallest-form Unicode buffer */
244 } PyUnicodeObject;
245 
246 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247     PyObject *op,
248     int check_content);
249 
250 /* Fast access macros */
251 
252 /* Returns the deprecated Py_UNICODE representation's size in code units
253    (this includes surrogate pairs as 2 units).
254    If the Py_UNICODE representation is not available, it will be computed
255    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
256 
257 /* Py_DEPRECATED(3.3) */
258 #define PyUnicode_GET_SIZE(op)                       \
259     (assert(PyUnicode_Check(op)),                    \
260      (((PyASCIIObject *)(op))->wstr) ?               \
261       PyUnicode_WSTR_LENGTH(op) :                    \
262       ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
263        assert(((PyASCIIObject *)(op))->wstr),        \
264        PyUnicode_WSTR_LENGTH(op)))
265 
266 /* Py_DEPRECATED(3.3) */
267 #define PyUnicode_GET_DATA_SIZE(op) \
268     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
269 
270 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
271    representation on demand.  Using this macro is very inefficient now,
272    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
273    use PyUnicode_WRITE() and PyUnicode_READ(). */
274 
275 /* Py_DEPRECATED(3.3) */
276 #define PyUnicode_AS_UNICODE(op) \
277     (assert(PyUnicode_Check(op)), \
278      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
279       PyUnicode_AsUnicode(_PyObject_CAST(op)))
280 
281 /* Py_DEPRECATED(3.3) */
282 #define PyUnicode_AS_DATA(op) \
283     ((const char *)(PyUnicode_AS_UNICODE(op)))
284 
285 
286 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
287 
288 /* Values for PyASCIIObject.state: */
289 
290 /* Interning state. */
291 #define SSTATE_NOT_INTERNED 0
292 #define SSTATE_INTERNED_MORTAL 1
293 #define SSTATE_INTERNED_IMMORTAL 2
294 
295 /* Return true if the string contains only ASCII characters, or 0 if not. The
296    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
297    ready. */
298 #define PyUnicode_IS_ASCII(op)                   \
299     (assert(PyUnicode_Check(op)),                \
300      assert(PyUnicode_IS_READY(op)),             \
301      ((PyASCIIObject*)op)->state.ascii)
302 
303 /* Return true if the string is compact or 0 if not.
304    No type checks or Ready calls are performed. */
305 #define PyUnicode_IS_COMPACT(op) \
306     (((PyASCIIObject*)(op))->state.compact)
307 
308 /* Return true if the string is a compact ASCII string (use PyASCIIObject
309    structure), or 0 if not.  No type checks or Ready calls are performed. */
310 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
311     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
312 
313 enum PyUnicode_Kind {
314 /* String contains only wstr byte characters.  This is only possible
315    when the string was created with a legacy API and _PyUnicode_Ready()
316    has not been called yet.  */
317     PyUnicode_WCHAR_KIND = 0,
318 /* Return values of the PyUnicode_KIND() macro: */
319     PyUnicode_1BYTE_KIND = 1,
320     PyUnicode_2BYTE_KIND = 2,
321     PyUnicode_4BYTE_KIND = 4
322 };
323 
324 /* Return pointers to the canonical representation cast to unsigned char,
325    Py_UCS2, or Py_UCS4 for direct character access.
326    No checks are performed, use PyUnicode_KIND() before to ensure
327    these will work correctly. */
328 
329 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
330 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
331 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
332 
333 /* Return one of the PyUnicode_*_KIND values defined above. */
334 #define PyUnicode_KIND(op) \
335     (assert(PyUnicode_Check(op)), \
336      assert(PyUnicode_IS_READY(op)),            \
337      ((PyASCIIObject *)(op))->state.kind)
338 
339 /* Return a void pointer to the raw unicode buffer. */
340 #define _PyUnicode_COMPACT_DATA(op)                     \
341     (PyUnicode_IS_ASCII(op) ?                   \
342      ((void*)((PyASCIIObject*)(op) + 1)) :              \
343      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
344 
345 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
346     (assert(((PyUnicodeObject*)(op))->data.any),        \
347      ((((PyUnicodeObject *)(op))->data.any)))
348 
349 #define PyUnicode_DATA(op) \
350     (assert(PyUnicode_Check(op)), \
351      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
352      _PyUnicode_NONCOMPACT_DATA(op))
353 
354 /* In the access macros below, "kind" may be evaluated more than once.
355    All other macro parameters are evaluated exactly once, so it is safe
356    to put side effects into them (such as increasing the index). */
357 
358 /* Write into the canonical representation, this macro does not do any sanity
359    checks and is intended for usage in loops.  The caller should cache the
360    kind and data pointers obtained from other macro calls.
361    index is the index in the string (starts at 0) and value is the new
362    code point value which should be written to that location. */
363 #define PyUnicode_WRITE(kind, data, index, value) \
364     do { \
365         switch ((kind)) { \
366         case PyUnicode_1BYTE_KIND: { \
367             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
368             break; \
369         } \
370         case PyUnicode_2BYTE_KIND: { \
371             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
372             break; \
373         } \
374         default: { \
375             assert((kind) == PyUnicode_4BYTE_KIND); \
376             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
377         } \
378         } \
379     } while (0)
380 
381 /* Read a code point from the string's canonical representation.  No checks
382    or ready calls are performed. */
383 #define PyUnicode_READ(kind, data, index) \
384     ((Py_UCS4) \
385     ((kind) == PyUnicode_1BYTE_KIND ? \
386         ((const Py_UCS1 *)(data))[(index)] : \
387         ((kind) == PyUnicode_2BYTE_KIND ? \
388             ((const Py_UCS2 *)(data))[(index)] : \
389             ((const Py_UCS4 *)(data))[(index)] \
390         ) \
391     ))
392 
393 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
394    calls PyUnicode_KIND() and might call it twice.  For single reads, use
395    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
396    cache kind and use PyUnicode_READ instead. */
397 #define PyUnicode_READ_CHAR(unicode, index) \
398     (assert(PyUnicode_Check(unicode)),          \
399      assert(PyUnicode_IS_READY(unicode)),       \
400      (Py_UCS4)                                  \
401         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
402             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
403             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
404                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
405                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
406             ) \
407         ))
408 
409 /* Returns the length of the unicode string. The caller has to make sure that
410    the string has it's canonical representation set before calling
411    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
412 #define PyUnicode_GET_LENGTH(op)                \
413     (assert(PyUnicode_Check(op)),               \
414      assert(PyUnicode_IS_READY(op)),            \
415      ((PyASCIIObject *)(op))->length)
416 
417 
418 /* Fast check to determine whether an object is ready. Equivalent to
419    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
420 
421 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
422 
423 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
424    case.  If the canonical representation is not yet set, it will still call
425    _PyUnicode_Ready().
426    Returns 0 on success and -1 on errors. */
427 #define PyUnicode_READY(op)                        \
428     (assert(PyUnicode_Check(op)),                       \
429      (PyUnicode_IS_READY(op) ?                          \
430       0 : _PyUnicode_Ready(_PyObject_CAST(op))))
431 
432 /* Return a maximum character value which is suitable for creating another
433    string based on op.  This is always an approximation but more efficient
434    than iterating over the string. */
435 #define PyUnicode_MAX_CHAR_VALUE(op) \
436     (assert(PyUnicode_IS_READY(op)),                                    \
437      (PyUnicode_IS_ASCII(op) ?                                          \
438       (0x7f) :                                                          \
439       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
440        (0xffU) :                                                        \
441        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
442         (0xffffU) :                                                     \
443         (0x10ffffU)))))
444 
445 Py_DEPRECATED(3.3)
_PyUnicode_get_wstr_length(PyObject * op)446 static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447     return PyUnicode_IS_COMPACT_ASCII(op) ?
448             ((PyASCIIObject*)op)->length :
449             ((PyCompactUnicodeObject*)op)->wstr_length;
450 }
451 #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452 
453 /* === Public API ========================================================= */
454 
455 /* --- Plain Py_UNICODE --------------------------------------------------- */
456 
457 /* With PEP 393, this is the recommended way to allocate a new unicode object.
458    This function will allocate the object and its buffer in a single memory
459    block.  Objects created using this function are not resizable. */
460 PyAPI_FUNC(PyObject*) PyUnicode_New(
461     Py_ssize_t size,            /* Number of code points in the new string */
462     Py_UCS4 maxchar             /* maximum code point value in the string */
463     );
464 
465 /* Initializes the canonical string representation from the deprecated
466    wstr/Py_UNICODE representation. This function is used to convert Unicode
467    objects which were created using the old API to the new flexible format
468    introduced with PEP 393.
469 
470    Don't call this function directly, use the public PyUnicode_READY() macro
471    instead. */
472 PyAPI_FUNC(int) _PyUnicode_Ready(
473     PyObject *unicode           /* Unicode object */
474     );
475 
476 /* Get a copy of a Unicode string. */
477 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
478     PyObject *unicode
479     );
480 
481 /* Copy character from one unicode object into another, this function performs
482    character conversion when necessary and falls back to memcpy() if possible.
483 
484    Fail if to is too small (smaller than *how_many* or smaller than
485    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
486    kind(to), or if *to* has more than 1 reference.
487 
488    Return the number of written character, or return -1 and raise an exception
489    on error.
490 
491    Pseudo-code:
492 
493        how_many = min(how_many, len(from) - from_start)
494        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
495        return how_many
496 
497    Note: The function doesn't write a terminating null character.
498    */
499 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
500     PyObject *to,
501     Py_ssize_t to_start,
502     PyObject *from,
503     Py_ssize_t from_start,
504     Py_ssize_t how_many
505     );
506 
507 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
508    may crash if parameters are invalid (e.g. if the output string
509    is too short). */
510 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
511     PyObject *to,
512     Py_ssize_t to_start,
513     PyObject *from,
514     Py_ssize_t from_start,
515     Py_ssize_t how_many
516     );
517 
518 /* Fill a string with a character: write fill_char into
519    unicode[start:start+length].
520 
521    Fail if fill_char is bigger than the string maximum character, or if the
522    string has more than 1 reference.
523 
524    Return the number of written character, or return -1 and raise an exception
525    on error. */
526 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
527     PyObject *unicode,
528     Py_ssize_t start,
529     Py_ssize_t length,
530     Py_UCS4 fill_char
531     );
532 
533 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
534    if parameters are invalid (e.g. if length is longer than the string). */
535 PyAPI_FUNC(void) _PyUnicode_FastFill(
536     PyObject *unicode,
537     Py_ssize_t start,
538     Py_ssize_t length,
539     Py_UCS4 fill_char
540     );
541 
542 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
543    size.
544 
545    u may be NULL which causes the contents to be undefined. It is the
546    user's responsibility to fill in the needed data afterwards. Note
547    that modifying the Unicode object contents after construction is
548    only allowed if u was set to NULL.
549 
550    The buffer is copied into the new object. */
551 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
552     const Py_UNICODE *u,        /* Unicode buffer */
553     Py_ssize_t size             /* size of buffer */
554     );
555 
556 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
557    Scan the string to find the maximum character. */
558 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
559     int kind,
560     const void *buffer,
561     Py_ssize_t size);
562 
563 /* Create a new string from a buffer of ASCII characters.
564    WARNING: Don't check if the string contains any non-ASCII character. */
565 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
566     const char *buffer,
567     Py_ssize_t size);
568 
569 /* Compute the maximum character of the substring unicode[start:end].
570    Return 127 for an empty string. */
571 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572     PyObject *unicode,
573     Py_ssize_t start,
574     Py_ssize_t end);
575 
576 /* Return a read-only pointer to the Unicode object's internal
577    Py_UNICODE buffer.
578    If the wchar_t/Py_UNICODE representation is not yet available, this
579    function will calculate it. */
580 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
581     PyObject *unicode           /* Unicode object */
582     );
583 
584 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
585    contains null characters. */
586 Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587     PyObject *unicode           /* Unicode object */
588     );
589 
590 /* Return a read-only pointer to the Unicode object's internal
591    Py_UNICODE buffer and save the length at size.
592    If the wchar_t/Py_UNICODE representation is not yet available, this
593    function will calculate it. */
594 
595 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
596     PyObject *unicode,          /* Unicode object */
597     Py_ssize_t *size            /* location where to save the length */
598     );
599 
600 /* Get the maximum ordinal for a Unicode character. */
601 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
602 
603 
604 /* --- _PyUnicodeWriter API ----------------------------------------------- */
605 
606 typedef struct {
607     PyObject *buffer;
608     void *data;
609     enum PyUnicode_Kind kind;
610     Py_UCS4 maxchar;
611     Py_ssize_t size;
612     Py_ssize_t pos;
613 
614     /* minimum number of allocated characters (default: 0) */
615     Py_ssize_t min_length;
616 
617     /* minimum character (default: 127, ASCII) */
618     Py_UCS4 min_char;
619 
620     /* If non-zero, overallocate the buffer (default: 0). */
621     unsigned char overallocate;
622 
623     /* If readonly is 1, buffer is a shared string (cannot be modified)
624        and size is set to 0. */
625     unsigned char readonly;
626 } _PyUnicodeWriter ;
627 
628 /* Initialize a Unicode writer.
629  *
630  * By default, the minimum buffer size is 0 character and overallocation is
631  * disabled. Set min_length, min_char and overallocate attributes to control
632  * the allocation of the buffer. */
633 PyAPI_FUNC(void)
634 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
635 
636 /* Prepare the buffer to write 'length' characters
637    with the specified maximum character.
638 
639    Return 0 on success, raise an exception and return -1 on error. */
640 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
641     (((MAXCHAR) <= (WRITER)->maxchar                                  \
642       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
643      ? 0                                                              \
644      : (((LENGTH) == 0)                                               \
645         ? 0                                                           \
646         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
647 
648 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
649    instead. */
650 PyAPI_FUNC(int)
651 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
652                                  Py_ssize_t length, Py_UCS4 maxchar);
653 
654 /* Prepare the buffer to have at least the kind KIND.
655    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
656    support characters in range U+000-U+FFFF.
657 
658    Return 0 on success, raise an exception and return -1 on error. */
659 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
660     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
661      (KIND) <= (WRITER)->kind                                         \
662      ? 0                                                              \
663      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
664 
665 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
666    macro instead. */
667 PyAPI_FUNC(int)
668 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
669                                      enum PyUnicode_Kind kind);
670 
671 /* Append a Unicode character.
672    Return 0 on success, raise an exception and return -1 on error. */
673 PyAPI_FUNC(int)
674 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
675     Py_UCS4 ch
676     );
677 
678 /* Append a Unicode string.
679    Return 0 on success, raise an exception and return -1 on error. */
680 PyAPI_FUNC(int)
681 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
682     PyObject *str               /* Unicode string */
683     );
684 
685 /* Append a substring of a Unicode string.
686    Return 0 on success, raise an exception and return -1 on error. */
687 PyAPI_FUNC(int)
688 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
689     PyObject *str,              /* Unicode string */
690     Py_ssize_t start,
691     Py_ssize_t end
692     );
693 
694 /* Append an ASCII-encoded byte string.
695    Return 0 on success, raise an exception and return -1 on error. */
696 PyAPI_FUNC(int)
697 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
698     const char *str,           /* ASCII-encoded byte string */
699     Py_ssize_t len             /* number of bytes, or -1 if unknown */
700     );
701 
702 /* Append a latin1-encoded byte string.
703    Return 0 on success, raise an exception and return -1 on error. */
704 PyAPI_FUNC(int)
705 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
706     const char *str,           /* latin1-encoded byte string */
707     Py_ssize_t len             /* length in bytes */
708     );
709 
710 /* Get the value of the writer as a Unicode string. Clear the
711    buffer of the writer. Raise an exception and return NULL
712    on error. */
713 PyAPI_FUNC(PyObject *)
714 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
715 
716 /* Deallocate memory of a writer (clear its internal buffer). */
717 PyAPI_FUNC(void)
718 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
719 
720 
721 /* Format the object based on the format_spec, as defined in PEP 3101
722    (Advanced String Formatting). */
723 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
724     _PyUnicodeWriter *writer,
725     PyObject *obj,
726     PyObject *format_spec,
727     Py_ssize_t start,
728     Py_ssize_t end);
729 
730 /* --- Manage the default encoding ---------------------------------------- */
731 
732 /* Returns a pointer to the default encoding (UTF-8) of the
733    Unicode object unicode and the size of the encoded representation
734    in bytes stored in *size.
735 
736    In case of an error, no *size is set.
737 
738    This function caches the UTF-8 encoded string in the unicodeobject
739    and subsequent calls will return the same string.  The memory is released
740    when the unicodeobject is deallocated.
741 
742    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
743    support the previous internal function with the same behaviour.
744 */
745 
746 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
747     PyObject *unicode,
748     Py_ssize_t *size);
749 
750 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
751 
752 /* Returns a pointer to the default encoding (UTF-8) of the
753    Unicode object unicode.
754 
755    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
756    in the unicodeobject.
757 
758    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
759    support the previous internal function with the same behaviour.
760 
761    Use of this API is DEPRECATED since no size information can be
762    extracted from the returned data.
763 
764    *** This API is for interpreter INTERNAL USE ONLY and will likely
765    *** be removed or changed for Python 3.1.
766 
767    *** If you need to access the Unicode object as UTF-8 bytes string,
768    *** please use PyUnicode_AsUTF8String() instead.
769 
770 */
771 
772 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
773 
774 #define _PyUnicode_AsString PyUnicode_AsUTF8
775 
776 /* --- Generic Codecs ----------------------------------------------------- */
777 
778 /* Encodes a Py_UNICODE buffer of the given size and returns a
779    Python string object. */
780 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
781     const Py_UNICODE *s,        /* Unicode char buffer */
782     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
783     const char *encoding,       /* encoding */
784     const char *errors          /* error handling */
785     );
786 
787 /* --- UTF-7 Codecs ------------------------------------------------------- */
788 
789 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
790     const Py_UNICODE *data,     /* Unicode char buffer */
791     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
792     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
793     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
794     const char *errors          /* error handling */
795     );
796 
797 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
798     PyObject *unicode,          /* Unicode object */
799     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
800     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
801     const char *errors          /* error handling */
802     );
803 
804 /* --- UTF-8 Codecs ------------------------------------------------------- */
805 
806 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
807     PyObject *unicode,
808     const char *errors);
809 
810 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
811     const Py_UNICODE *data,     /* Unicode char buffer */
812     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
813     const char *errors          /* error handling */
814     );
815 
816 /* --- UTF-32 Codecs ------------------------------------------------------ */
817 
818 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
819     const Py_UNICODE *data,     /* Unicode char buffer */
820     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
821     const char *errors,         /* error handling */
822     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
823     );
824 
825 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
826     PyObject *object,           /* Unicode object */
827     const char *errors,         /* error handling */
828     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
829     );
830 
831 /* --- UTF-16 Codecs ------------------------------------------------------ */
832 
833 /* Returns a Python string object holding the UTF-16 encoded value of
834    the Unicode data.
835 
836    If byteorder is not 0, output is written according to the following
837    byte order:
838 
839    byteorder == -1: little endian
840    byteorder == 0:  native byte order (writes a BOM mark)
841    byteorder == 1:  big endian
842 
843    If byteorder is 0, the output string will always start with the
844    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
845    prepended.
846 
847    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
848    UCS-2. This trick makes it possible to add full UTF-16 capabilities
849    at a later point without compromising the APIs.
850 
851 */
852 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
853     const Py_UNICODE *data,     /* Unicode char buffer */
854     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
855     const char *errors,         /* error handling */
856     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
857     );
858 
859 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
860     PyObject* unicode,          /* Unicode object */
861     const char *errors,         /* error handling */
862     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
863     );
864 
865 /* --- Unicode-Escape Codecs ---------------------------------------------- */
866 
867 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
868    chars. */
869 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
870         const char *string,     /* Unicode-Escape encoded string */
871         Py_ssize_t length,      /* size of string */
872         const char *errors,     /* error handling */
873         const char **first_invalid_escape  /* on return, points to first
874                                               invalid escaped char in
875                                               string. */
876 );
877 
878 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
879     const Py_UNICODE *data,     /* Unicode char buffer */
880     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
881     );
882 
883 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
884 
885 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
886     const Py_UNICODE *data,     /* Unicode char buffer */
887     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
888     );
889 
890 /* --- Latin-1 Codecs ----------------------------------------------------- */
891 
892 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
893     PyObject* unicode,
894     const char* errors);
895 
896 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
897     const Py_UNICODE *data,     /* Unicode char buffer */
898     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
899     const char *errors          /* error handling */
900     );
901 
902 /* --- ASCII Codecs ------------------------------------------------------- */
903 
904 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
905     PyObject* unicode,
906     const char* errors);
907 
908 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
909     const Py_UNICODE *data,     /* Unicode char buffer */
910     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
911     const char *errors          /* error handling */
912     );
913 
914 /* --- Character Map Codecs ----------------------------------------------- */
915 
916 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
917     const Py_UNICODE *data,     /* Unicode char buffer */
918     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
919     PyObject *mapping,          /* encoding mapping */
920     const char *errors          /* error handling */
921     );
922 
923 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
924     PyObject *unicode,          /* Unicode object */
925     PyObject *mapping,          /* encoding mapping */
926     const char *errors          /* error handling */
927     );
928 
929 /* Translate a Py_UNICODE buffer of the given length by applying a
930    character mapping table to it and return the resulting Unicode
931    object.
932 
933    The mapping table must map Unicode ordinal integers to Unicode strings,
934    Unicode ordinal integers or None (causing deletion of the character).
935 
936    Mapping tables may be dictionaries or sequences. Unmapped character
937    ordinals (ones which cause a LookupError) are left untouched and
938    are copied as-is.
939 
940 */
941 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
942     const Py_UNICODE *data,     /* Unicode char buffer */
943     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
944     PyObject *table,            /* Translate table */
945     const char *errors          /* error handling */
946     );
947 
948 /* --- MBCS codecs for Windows -------------------------------------------- */
949 
950 #ifdef MS_WINDOWS
951 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
952     const Py_UNICODE *data,     /* Unicode char buffer */
953     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
954     const char *errors          /* error handling */
955     );
956 #endif
957 
958 /* --- Decimal Encoder ---------------------------------------------------- */
959 
960 /* Takes a Unicode string holding a decimal value and writes it into
961    an output buffer using standard ASCII digit codes.
962 
963    The output buffer has to provide at least length+1 bytes of storage
964    area. The output string is 0-terminated.
965 
966    The encoder converts whitespace to ' ', decimal characters to their
967    corresponding ASCII digit and all other Latin-1 characters except
968    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
969    are treated as errors. This includes embedded NULL bytes.
970 
971    Error handling is defined by the errors argument:
972 
973       NULL or "strict": raise a ValueError
974       "ignore": ignore the wrong characters (these are not copied to the
975                 output buffer)
976       "replace": replaces illegal characters with '?'
977 
978    Returns 0 on success, -1 on failure.
979 
980 */
981 
982 Py_DEPRECATED(3.3) PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
983     Py_UNICODE *s,              /* Unicode buffer */
984     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
985     char *output,               /* Output buffer; must have size >= length */
986     const char *errors          /* error handling */
987     );
988 
989 /* Transforms code points that have decimal digit property to the
990    corresponding ASCII digit code points.
991 
992    Returns a new Unicode string on success, NULL on failure.
993 */
994 
995 Py_DEPRECATED(3.3)
996 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
997     Py_UNICODE *s,              /* Unicode buffer */
998     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
999     );
1000 
1001 /* Coverts a Unicode object holding a decimal value to an ASCII string
1002    for using in int, float and complex parsers.
1003    Transforms code points that have decimal digit property to the
1004    corresponding ASCII digit code points.  Transforms spaces to ASCII.
1005    Transforms code points starting from the first non-ASCII code point that
1006    is neither a decimal digit nor a space to the end into '?'. */
1007 
1008 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1009     PyObject *unicode           /* Unicode object */
1010     );
1011 
1012 /* --- Methods & Slots ---------------------------------------------------- */
1013 
1014 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1015     PyObject *separator,
1016     PyObject *const *items,
1017     Py_ssize_t seqlen
1018     );
1019 
1020 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
1021    0 otherwise.  The right argument must be ASCII identifier.
1022    Any error occurs inside will be cleared before return. */
1023 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
1024     PyObject *left,             /* Left string */
1025     _Py_Identifier *right       /* Right identifier */
1026     );
1027 
1028 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
1029    0 otherwise.  The right argument must be ASCII-encoded string.
1030    Any error occurs inside will be cleared before return. */
1031 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1032     PyObject *left,
1033     const char *right           /* ASCII-encoded string */
1034     );
1035 
1036 /* Externally visible for str.strip(unicode) */
1037 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1038     PyObject *self,
1039     int striptype,
1040     PyObject *sepobj
1041     );
1042 
1043 /* Using explicit passed-in values, insert the thousands grouping
1044    into the string pointed to by buffer.  For the argument descriptions,
1045    see Objects/stringlib/localeutil.h */
1046 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1047     _PyUnicodeWriter *writer,
1048     Py_ssize_t n_buffer,
1049     PyObject *digits,
1050     Py_ssize_t d_pos,
1051     Py_ssize_t n_digits,
1052     Py_ssize_t min_width,
1053     const char *grouping,
1054     PyObject *thousands_sep,
1055     Py_UCS4 *maxchar);
1056 
1057 /* === Characters Type APIs =============================================== */
1058 
1059 /* Helper array used by Py_UNICODE_ISSPACE(). */
1060 
1061 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1062 
1063 /* These should not be used directly. Use the Py_UNICODE_IS* and
1064    Py_UNICODE_TO* macros instead.
1065 
1066    These APIs are implemented in Objects/unicodectype.c.
1067 
1068 */
1069 
1070 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1071     Py_UCS4 ch       /* Unicode character */
1072     );
1073 
1074 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1075     Py_UCS4 ch       /* Unicode character */
1076     );
1077 
1078 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1079     Py_UCS4 ch       /* Unicode character */
1080     );
1081 
1082 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1083     Py_UCS4 ch       /* Unicode character */
1084     );
1085 
1086 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1087     Py_UCS4 ch       /* Unicode character */
1088     );
1089 
1090 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1091     const Py_UCS4 ch         /* Unicode character */
1092     );
1093 
1094 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1095     const Py_UCS4 ch         /* Unicode character */
1096     );
1097 
1098 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1099     Py_UCS4 ch       /* Unicode character */
1100     );
1101 
1102 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1103     Py_UCS4 ch       /* Unicode character */
1104     );
1105 
1106 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1107     Py_UCS4 ch       /* Unicode character */
1108     );
1109 
1110 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1111     Py_UCS4 ch,       /* Unicode character */
1112     Py_UCS4 *res
1113     );
1114 
1115 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1116     Py_UCS4 ch,       /* Unicode character */
1117     Py_UCS4 *res
1118     );
1119 
1120 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1121     Py_UCS4 ch,       /* Unicode character */
1122     Py_UCS4 *res
1123     );
1124 
1125 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1126     Py_UCS4 ch,       /* Unicode character */
1127     Py_UCS4 *res
1128     );
1129 
1130 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1131     Py_UCS4 ch         /* Unicode character */
1132     );
1133 
1134 PyAPI_FUNC(int) _PyUnicode_IsCased(
1135     Py_UCS4 ch         /* Unicode character */
1136     );
1137 
1138 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1139     Py_UCS4 ch       /* Unicode character */
1140     );
1141 
1142 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1143     Py_UCS4 ch       /* Unicode character */
1144     );
1145 
1146 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1147     Py_UCS4 ch       /* Unicode character */
1148     );
1149 
1150 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1151     Py_UCS4 ch       /* Unicode character */
1152     );
1153 
1154 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1155     Py_UCS4 ch       /* Unicode character */
1156     );
1157 
1158 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1159     Py_UCS4 ch       /* Unicode character */
1160     );
1161 
1162 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1163     Py_UCS4 ch       /* Unicode character */
1164     );
1165 
1166 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1167     Py_UCS4 ch       /* Unicode character */
1168     );
1169 
1170 Py_DEPRECATED(3.3) PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1171     const Py_UNICODE *u
1172     );
1173 
1174 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
1175     Py_UNICODE *s1,
1176     const Py_UNICODE *s2);
1177 
1178 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1179     Py_UNICODE *s1, const Py_UNICODE *s2);
1180 
1181 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
1182     Py_UNICODE *s1,
1183     const Py_UNICODE *s2,
1184     size_t n);
1185 
1186 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strcmp(
1187     const Py_UNICODE *s1,
1188     const Py_UNICODE *s2
1189     );
1190 
1191 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strncmp(
1192     const Py_UNICODE *s1,
1193     const Py_UNICODE *s2,
1194     size_t n
1195     );
1196 
1197 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
1198     const Py_UNICODE *s,
1199     Py_UNICODE c
1200     );
1201 
1202 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
1203     const Py_UNICODE *s,
1204     Py_UNICODE c
1205     );
1206 
1207 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
1208 
1209 /* Create a copy of a unicode string ending with a nul character. Return NULL
1210    and raise a MemoryError exception on memory allocation failure, otherwise
1211    return a new allocated buffer (use PyMem_Free() to free the buffer). */
1212 
1213 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
1214     PyObject *unicode
1215     );
1216 
1217 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
1218 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
1219 
1220 /* Fast equality check when the inputs are known to be exact unicode types
1221    and where the hash values are equal (i.e. a very probable match) */
1222 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
1223 
1224 PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1225 
1226 #ifdef __cplusplus
1227 }
1228 #endif
1229