1 #ifndef Py_UNICODEOBJECT_H 2 #define Py_UNICODEOBJECT_H 3 4 #include <stdarg.h> 5 6 /* 7 8 Unicode implementation based on original code by Fredrik Lundh, 9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the 10 Unicode Integration Proposal. (See 11 http://www.egenix.com/files/python/unicode-proposal.txt). 12 13 Copyright (c) Corporation for National Research Initiatives. 14 15 16 Original header: 17 -------------------------------------------------------------------- 18 19 * Yet another Unicode string type for Python. This type supports the 20 * 16-bit Basic Multilingual Plane (BMP) only. 21 * 22 * Written by Fredrik Lundh, January 1999. 23 * 24 * Copyright (c) 1999 by Secret Labs AB. 25 * Copyright (c) 1999 by Fredrik Lundh. 26 * 27 * fredrik@pythonware.com 28 * http://www.pythonware.com 29 * 30 * -------------------------------------------------------------------- 31 * This Unicode String Type is 32 * 33 * Copyright (c) 1999 by Secret Labs AB 34 * Copyright (c) 1999 by Fredrik Lundh 35 * 36 * By obtaining, using, and/or copying this software and/or its 37 * associated documentation, you agree that you have read, understood, 38 * and will comply with the following terms and conditions: 39 * 40 * Permission to use, copy, modify, and distribute this software and its 41 * associated documentation for any purpose and without fee is hereby 42 * granted, provided that the above copyright notice appears in all 43 * copies, and that both that copyright notice and this permission notice 44 * appear in supporting documentation, and that the name of Secret Labs 45 * AB or the author not be used in advertising or publicity pertaining to 46 * distribution of the software without specific, written prior 47 * permission. 48 * 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 56 * -------------------------------------------------------------------- */ 57 58 #include <ctype.h> 59 60 /* === Internal API ======================================================= */ 61 62 /* --- Internal Unicode Format -------------------------------------------- */ 63 64 /* Python 3.x requires unicode */ 65 #define Py_USING_UNICODE 66 67 #ifndef SIZEOF_WCHAR_T 68 #error Must define SIZEOF_WCHAR_T 69 #endif 70 71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T 72 73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support 75 for UTF-16) */ 76 77 #if Py_UNICODE_SIZE >= 4 78 #define Py_UNICODE_WIDE 79 #endif 80 81 /* Set these flags if the platform has "wchar.h" and the 82 wchar_t type is a 16-bit unsigned type */ 83 /* #define HAVE_WCHAR_H */ 84 /* #define HAVE_USABLE_WCHAR_T */ 85 86 /* Py_UNICODE was the native Unicode storage format (code unit) used by 87 Python and represents a single Unicode element in the Unicode type. 88 With PEP 393, Py_UNICODE is deprecated and replaced with a 89 typedef to wchar_t. */ 90 91 #ifndef Py_LIMITED_API 92 #define PY_UNICODE_TYPE wchar_t 93 typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; 94 #endif 95 96 /* If the compiler provides a wchar_t type we try to support it 97 through the interface functions PyUnicode_FromWideChar(), 98 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ 99 100 #ifdef HAVE_USABLE_WCHAR_T 101 # ifndef HAVE_WCHAR_H 102 # define HAVE_WCHAR_H 103 # endif 104 #endif 105 106 #ifdef HAVE_WCHAR_H 107 # include <wchar.h> 108 #endif 109 110 /* Py_UCS4 and Py_UCS2 are typedefs for the respective 111 unicode representations. */ 112 typedef uint32_t Py_UCS4; 113 typedef uint16_t Py_UCS2; 114 typedef uint8_t Py_UCS1; 115 116 /* --- Internal Unicode Operations ---------------------------------------- */ 117 118 /* Since splitting on whitespace is an important use case, and 119 whitespace in most situations is solely ASCII whitespace, we 120 optimize for the common case by using a quick look-up table 121 _Py_ascii_whitespace (see below) with an inlined check. 122 123 */ 124 #ifndef Py_LIMITED_API 125 #define Py_UNICODE_ISSPACE(ch) \ 126 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) 127 128 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) 129 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) 130 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) 131 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) 132 133 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) 134 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) 135 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) 136 137 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) 138 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) 139 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) 140 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) 141 142 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) 143 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) 144 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) 145 146 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) 147 148 #define Py_UNICODE_ISALNUM(ch) \ 149 (Py_UNICODE_ISALPHA(ch) || \ 150 Py_UNICODE_ISDECIMAL(ch) || \ 151 Py_UNICODE_ISDIGIT(ch) || \ 152 Py_UNICODE_ISNUMERIC(ch)) 153 154 #define Py_UNICODE_COPY(target, source, length) \ 155 memcpy((target), (source), (length)*sizeof(Py_UNICODE)) 156 157 #define Py_UNICODE_FILL(target, value, length) \ 158 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ 159 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ 160 } while (0) 161 162 /* macros to work with surrogates */ 163 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) 164 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) 165 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) 166 /* Join two surrogate characters and return a single Py_UCS4 value. */ 167 #define Py_UNICODE_JOIN_SURROGATES(high, low) \ 168 (((((Py_UCS4)(high) & 0x03FF) << 10) | \ 169 ((Py_UCS4)(low) & 0x03FF)) + 0x10000) 170 /* high surrogate = top 10 bits added to D800 */ 171 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) 172 /* low surrogate = bottom 10 bits added to DC00 */ 173 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) 174 175 /* Check if substring matches at given offset. The offset must be 176 valid, and the substring must not be empty. */ 177 178 #define Py_UNICODE_MATCH(string, offset, substring) \ 179 ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ 180 ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ 181 !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) 182 183 #endif /* Py_LIMITED_API */ 184 185 #ifdef __cplusplus 186 extern "C" { 187 #endif 188 189 /* --- Unicode Type ------------------------------------------------------- */ 190 191 #ifndef Py_LIMITED_API 192 193 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject 194 structure. state.ascii and state.compact are set, and the data 195 immediately follow the structure. utf8_length and wstr_length can be found 196 in the length field; the utf8 pointer is equal to the data pointer. */ 197 typedef struct { 198 /* There are 4 forms of Unicode strings: 199 200 - compact ascii: 201 202 * structure = PyASCIIObject 203 * test: PyUnicode_IS_COMPACT_ASCII(op) 204 * kind = PyUnicode_1BYTE_KIND 205 * compact = 1 206 * ascii = 1 207 * ready = 1 208 * (length is the length of the utf8 and wstr strings) 209 * (data starts just after the structure) 210 * (since ASCII is decoded from UTF-8, the utf8 string are the data) 211 212 - compact: 213 214 * structure = PyCompactUnicodeObject 215 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 216 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 217 PyUnicode_4BYTE_KIND 218 * compact = 1 219 * ready = 1 220 * ascii = 0 221 * utf8 is not shared with data 222 * utf8_length = 0 if utf8 is NULL 223 * wstr is shared with data and wstr_length=length 224 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 225 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 226 * wstr_length = 0 if wstr is NULL 227 * (data starts just after the structure) 228 229 - legacy string, not ready: 230 231 * structure = PyUnicodeObject 232 * test: kind == PyUnicode_WCHAR_KIND 233 * length = 0 (use wstr_length) 234 * hash = -1 235 * kind = PyUnicode_WCHAR_KIND 236 * compact = 0 237 * ascii = 0 238 * ready = 0 239 * interned = SSTATE_NOT_INTERNED 240 * wstr is not NULL 241 * data.any is NULL 242 * utf8 is NULL 243 * utf8_length = 0 244 245 - legacy string, ready: 246 247 * structure = PyUnicodeObject structure 248 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 249 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 250 PyUnicode_4BYTE_KIND 251 * compact = 0 252 * ready = 1 253 * data.any is not NULL 254 * utf8 is shared and utf8_length = length with data.any if ascii = 1 255 * utf8_length = 0 if utf8 is NULL 256 * wstr is shared with data.any and wstr_length = length 257 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 258 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 259 * wstr_length = 0 if wstr is NULL 260 261 Compact strings use only one memory block (structure + characters), 262 whereas legacy strings use one block for the structure and one block 263 for characters. 264 265 Legacy strings are created by PyUnicode_FromUnicode() and 266 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 267 when PyUnicode_READY() is called. 268 269 See also _PyUnicode_CheckConsistency(). 270 */ 271 PyObject_HEAD 272 Py_ssize_t length; /* Number of code points in the string */ 273 Py_hash_t hash; /* Hash value; -1 if not set */ 274 struct { 275 /* 276 SSTATE_NOT_INTERNED (0) 277 SSTATE_INTERNED_MORTAL (1) 278 SSTATE_INTERNED_IMMORTAL (2) 279 280 If interned != SSTATE_NOT_INTERNED, the two references from the 281 dictionary to this object are *not* counted in ob_refcnt. 282 */ 283 unsigned int interned:2; 284 /* Character size: 285 286 - PyUnicode_WCHAR_KIND (0): 287 288 * character type = wchar_t (16 or 32 bits, depending on the 289 platform) 290 291 - PyUnicode_1BYTE_KIND (1): 292 293 * character type = Py_UCS1 (8 bits, unsigned) 294 * all characters are in the range U+0000-U+00FF (latin1) 295 * if ascii is set, all characters are in the range U+0000-U+007F 296 (ASCII), otherwise at least one character is in the range 297 U+0080-U+00FF 298 299 - PyUnicode_2BYTE_KIND (2): 300 301 * character type = Py_UCS2 (16 bits, unsigned) 302 * all characters are in the range U+0000-U+FFFF (BMP) 303 * at least one character is in the range U+0100-U+FFFF 304 305 - PyUnicode_4BYTE_KIND (4): 306 307 * character type = Py_UCS4 (32 bits, unsigned) 308 * all characters are in the range U+0000-U+10FFFF 309 * at least one character is in the range U+10000-U+10FFFF 310 */ 311 unsigned int kind:3; 312 /* Compact is with respect to the allocation scheme. Compact unicode 313 objects only require one memory block while non-compact objects use 314 one block for the PyUnicodeObject struct and another for its data 315 buffer. */ 316 unsigned int compact:1; 317 /* The string only contains characters in the range U+0000-U+007F (ASCII) 318 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is 319 set, use the PyASCIIObject structure. */ 320 unsigned int ascii:1; 321 /* The ready flag indicates whether the object layout is initialized 322 completely. This means that this is either a compact object, or 323 the data pointer is filled out. The bit is redundant, and helps 324 to minimize the test in PyUnicode_IS_READY(). */ 325 unsigned int ready:1; 326 /* Padding to ensure that PyUnicode_DATA() is always aligned to 327 4 bytes (see issue #19537 on m68k). */ 328 unsigned int :24; 329 } state; 330 wchar_t *wstr; /* wchar_t representation (null-terminated) */ 331 } PyASCIIObject; 332 333 /* Non-ASCII strings allocated through PyUnicode_New use the 334 PyCompactUnicodeObject structure. state.compact is set, and the data 335 immediately follow the structure. */ 336 typedef struct { 337 PyASCIIObject _base; 338 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the 339 * terminating \0. */ 340 char *utf8; /* UTF-8 representation (null-terminated) */ 341 Py_ssize_t wstr_length; /* Number of code points in wstr, possible 342 * surrogates count as two code points. */ 343 } PyCompactUnicodeObject; 344 345 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the 346 PyUnicodeObject structure. The actual string data is initially in the wstr 347 block, and copied into the data block using _PyUnicode_Ready. */ 348 typedef struct { 349 PyCompactUnicodeObject _base; 350 union { 351 void *any; 352 Py_UCS1 *latin1; 353 Py_UCS2 *ucs2; 354 Py_UCS4 *ucs4; 355 } data; /* Canonical, smallest-form Unicode buffer */ 356 } PyUnicodeObject; 357 #endif 358 359 PyAPI_DATA(PyTypeObject) PyUnicode_Type; 360 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; 361 362 #define PyUnicode_Check(op) \ 363 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) 364 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) 365 366 /* Fast access macros */ 367 #ifndef Py_LIMITED_API 368 369 #define PyUnicode_WSTR_LENGTH(op) \ 370 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 371 ((PyASCIIObject*)op)->length : \ 372 ((PyCompactUnicodeObject*)op)->wstr_length) 373 374 /* Returns the deprecated Py_UNICODE representation's size in code units 375 (this includes surrogate pairs as 2 units). 376 If the Py_UNICODE representation is not available, it will be computed 377 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ 378 379 #define PyUnicode_GET_SIZE(op) \ 380 (assert(PyUnicode_Check(op)), \ 381 (((PyASCIIObject *)(op))->wstr) ? \ 382 PyUnicode_WSTR_LENGTH(op) : \ 383 ((void)PyUnicode_AsUnicode((PyObject *)(op)), \ 384 assert(((PyASCIIObject *)(op))->wstr), \ 385 PyUnicode_WSTR_LENGTH(op))) 386 /* Py_DEPRECATED(3.3) */ 387 388 #define PyUnicode_GET_DATA_SIZE(op) \ 389 (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) 390 /* Py_DEPRECATED(3.3) */ 391 392 /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE 393 representation on demand. Using this macro is very inefficient now, 394 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or 395 use PyUnicode_WRITE() and PyUnicode_READ(). */ 396 397 #define PyUnicode_AS_UNICODE(op) \ 398 (assert(PyUnicode_Check(op)), \ 399 (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ 400 PyUnicode_AsUnicode((PyObject *)(op))) 401 /* Py_DEPRECATED(3.3) */ 402 403 #define PyUnicode_AS_DATA(op) \ 404 ((const char *)(PyUnicode_AS_UNICODE(op))) 405 /* Py_DEPRECATED(3.3) */ 406 407 408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ 409 410 /* Values for PyASCIIObject.state: */ 411 412 /* Interning state. */ 413 #define SSTATE_NOT_INTERNED 0 414 #define SSTATE_INTERNED_MORTAL 1 415 #define SSTATE_INTERNED_IMMORTAL 2 416 417 /* Return true if the string contains only ASCII characters, or 0 if not. The 418 string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be 419 ready. */ 420 #define PyUnicode_IS_ASCII(op) \ 421 (assert(PyUnicode_Check(op)), \ 422 assert(PyUnicode_IS_READY(op)), \ 423 ((PyASCIIObject*)op)->state.ascii) 424 425 /* Return true if the string is compact or 0 if not. 426 No type checks or Ready calls are performed. */ 427 #define PyUnicode_IS_COMPACT(op) \ 428 (((PyASCIIObject*)(op))->state.compact) 429 430 /* Return true if the string is a compact ASCII string (use PyASCIIObject 431 structure), or 0 if not. No type checks or Ready calls are performed. */ 432 #define PyUnicode_IS_COMPACT_ASCII(op) \ 433 (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op)) 434 435 enum PyUnicode_Kind { 436 /* String contains only wstr byte characters. This is only possible 437 when the string was created with a legacy API and _PyUnicode_Ready() 438 has not been called yet. */ 439 PyUnicode_WCHAR_KIND = 0, 440 /* Return values of the PyUnicode_KIND() macro: */ 441 PyUnicode_1BYTE_KIND = 1, 442 PyUnicode_2BYTE_KIND = 2, 443 PyUnicode_4BYTE_KIND = 4 444 }; 445 446 /* Return pointers to the canonical representation cast to unsigned char, 447 Py_UCS2, or Py_UCS4 for direct character access. 448 No checks are performed, use PyUnicode_KIND() before to ensure 449 these will work correctly. */ 450 451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) 452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) 453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) 454 455 /* Return one of the PyUnicode_*_KIND values defined above. */ 456 #define PyUnicode_KIND(op) \ 457 (assert(PyUnicode_Check(op)), \ 458 assert(PyUnicode_IS_READY(op)), \ 459 ((PyASCIIObject *)(op))->state.kind) 460 461 /* Return a void pointer to the raw unicode buffer. */ 462 #define _PyUnicode_COMPACT_DATA(op) \ 463 (PyUnicode_IS_ASCII(op) ? \ 464 ((void*)((PyASCIIObject*)(op) + 1)) : \ 465 ((void*)((PyCompactUnicodeObject*)(op) + 1))) 466 467 #define _PyUnicode_NONCOMPACT_DATA(op) \ 468 (assert(((PyUnicodeObject*)(op))->data.any), \ 469 ((((PyUnicodeObject *)(op))->data.any))) 470 471 #define PyUnicode_DATA(op) \ 472 (assert(PyUnicode_Check(op)), \ 473 PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ 474 _PyUnicode_NONCOMPACT_DATA(op)) 475 476 /* In the access macros below, "kind" may be evaluated more than once. 477 All other macro parameters are evaluated exactly once, so it is safe 478 to put side effects into them (such as increasing the index). */ 479 480 /* Write into the canonical representation, this macro does not do any sanity 481 checks and is intended for usage in loops. The caller should cache the 482 kind and data pointers obtained from other macro calls. 483 index is the index in the string (starts at 0) and value is the new 484 code point value which should be written to that location. */ 485 #define PyUnicode_WRITE(kind, data, index, value) \ 486 do { \ 487 switch ((kind)) { \ 488 case PyUnicode_1BYTE_KIND: { \ 489 ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ 490 break; \ 491 } \ 492 case PyUnicode_2BYTE_KIND: { \ 493 ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ 494 break; \ 495 } \ 496 default: { \ 497 assert((kind) == PyUnicode_4BYTE_KIND); \ 498 ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ 499 } \ 500 } \ 501 } while (0) 502 503 /* Read a code point from the string's canonical representation. No checks 504 or ready calls are performed. */ 505 #define PyUnicode_READ(kind, data, index) \ 506 ((Py_UCS4) \ 507 ((kind) == PyUnicode_1BYTE_KIND ? \ 508 ((const Py_UCS1 *)(data))[(index)] : \ 509 ((kind) == PyUnicode_2BYTE_KIND ? \ 510 ((const Py_UCS2 *)(data))[(index)] : \ 511 ((const Py_UCS4 *)(data))[(index)] \ 512 ) \ 513 )) 514 515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it 516 calls PyUnicode_KIND() and might call it twice. For single reads, use 517 PyUnicode_READ_CHAR, for multiple consecutive reads callers should 518 cache kind and use PyUnicode_READ instead. */ 519 #define PyUnicode_READ_CHAR(unicode, index) \ 520 (assert(PyUnicode_Check(unicode)), \ 521 assert(PyUnicode_IS_READY(unicode)), \ 522 (Py_UCS4) \ 523 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ 524 ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ 525 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ 526 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ 527 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ 528 ) \ 529 )) 530 531 /* Returns the length of the unicode string. The caller has to make sure that 532 the string has it's canonical representation set before calling 533 this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ 534 #define PyUnicode_GET_LENGTH(op) \ 535 (assert(PyUnicode_Check(op)), \ 536 assert(PyUnicode_IS_READY(op)), \ 537 ((PyASCIIObject *)(op))->length) 538 539 540 /* Fast check to determine whether an object is ready. Equivalent to 541 PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ 542 543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) 544 545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best 546 case. If the canonical representation is not yet set, it will still call 547 _PyUnicode_Ready(). 548 Returns 0 on success and -1 on errors. */ 549 #define PyUnicode_READY(op) \ 550 (assert(PyUnicode_Check(op)), \ 551 (PyUnicode_IS_READY(op) ? \ 552 0 : _PyUnicode_Ready((PyObject *)(op)))) 553 554 /* Return a maximum character value which is suitable for creating another 555 string based on op. This is always an approximation but more efficient 556 than iterating over the string. */ 557 #define PyUnicode_MAX_CHAR_VALUE(op) \ 558 (assert(PyUnicode_IS_READY(op)), \ 559 (PyUnicode_IS_ASCII(op) ? \ 560 (0x7f) : \ 561 (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ 562 (0xffU) : \ 563 (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ 564 (0xffffU) : \ 565 (0x10ffffU))))) 566 567 #endif 568 569 /* --- Constants ---------------------------------------------------------- */ 570 571 /* This Unicode character will be used as replacement character during 572 decoding if the errors argument is set to "replace". Note: the 573 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 574 Unicode 3.0. */ 575 576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) 577 578 /* === Public API ========================================================= */ 579 580 /* --- Plain Py_UNICODE --------------------------------------------------- */ 581 582 /* With PEP 393, this is the recommended way to allocate a new unicode object. 583 This function will allocate the object and its buffer in a single memory 584 block. Objects created using this function are not resizable. */ 585 #ifndef Py_LIMITED_API 586 PyAPI_FUNC(PyObject*) PyUnicode_New( 587 Py_ssize_t size, /* Number of code points in the new string */ 588 Py_UCS4 maxchar /* maximum code point value in the string */ 589 ); 590 #endif 591 592 /* Initializes the canonical string representation from the deprecated 593 wstr/Py_UNICODE representation. This function is used to convert Unicode 594 objects which were created using the old API to the new flexible format 595 introduced with PEP 393. 596 597 Don't call this function directly, use the public PyUnicode_READY() macro 598 instead. */ 599 #ifndef Py_LIMITED_API 600 PyAPI_FUNC(int) _PyUnicode_Ready( 601 PyObject *unicode /* Unicode object */ 602 ); 603 #endif 604 605 /* Get a copy of a Unicode string. */ 606 #ifndef Py_LIMITED_API 607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy( 608 PyObject *unicode 609 ); 610 #endif 611 612 /* Copy character from one unicode object into another, this function performs 613 character conversion when necessary and falls back to memcpy() if possible. 614 615 Fail if to is too small (smaller than *how_many* or smaller than 616 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > 617 kind(to), or if *to* has more than 1 reference. 618 619 Return the number of written character, or return -1 and raise an exception 620 on error. 621 622 Pseudo-code: 623 624 how_many = min(how_many, len(from) - from_start) 625 to[to_start:to_start+how_many] = from[from_start:from_start+how_many] 626 return how_many 627 628 Note: The function doesn't write a terminating null character. 629 */ 630 #ifndef Py_LIMITED_API 631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( 632 PyObject *to, 633 Py_ssize_t to_start, 634 PyObject *from, 635 Py_ssize_t from_start, 636 Py_ssize_t how_many 637 ); 638 639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so 640 may crash if parameters are invalid (e.g. if the output string 641 is too short). */ 642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( 643 PyObject *to, 644 Py_ssize_t to_start, 645 PyObject *from, 646 Py_ssize_t from_start, 647 Py_ssize_t how_many 648 ); 649 #endif 650 651 #ifndef Py_LIMITED_API 652 /* Fill a string with a character: write fill_char into 653 unicode[start:start+length]. 654 655 Fail if fill_char is bigger than the string maximum character, or if the 656 string has more than 1 reference. 657 658 Return the number of written character, or return -1 and raise an exception 659 on error. */ 660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( 661 PyObject *unicode, 662 Py_ssize_t start, 663 Py_ssize_t length, 664 Py_UCS4 fill_char 665 ); 666 667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash 668 if parameters are invalid (e.g. if length is longer than the string). */ 669 PyAPI_FUNC(void) _PyUnicode_FastFill( 670 PyObject *unicode, 671 Py_ssize_t start, 672 Py_ssize_t length, 673 Py_UCS4 fill_char 674 ); 675 #endif 676 677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given 678 size. 679 680 u may be NULL which causes the contents to be undefined. It is the 681 user's responsibility to fill in the needed data afterwards. Note 682 that modifying the Unicode object contents after construction is 683 only allowed if u was set to NULL. 684 685 The buffer is copied into the new object. */ 686 687 #ifndef Py_LIMITED_API 688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( 689 const Py_UNICODE *u, /* Unicode buffer */ 690 Py_ssize_t size /* size of buffer */ 691 ) /* Py_DEPRECATED(3.3) */; 692 #endif 693 694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ 695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( 696 const char *u, /* UTF-8 encoded string */ 697 Py_ssize_t size /* size of buffer */ 698 ); 699 700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated 701 UTF-8 encoded bytes. The size is determined with strlen(). */ 702 PyAPI_FUNC(PyObject*) PyUnicode_FromString( 703 const char *u /* UTF-8 encoded string */ 704 ); 705 706 #ifndef Py_LIMITED_API 707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. 708 Scan the string to find the maximum character. */ 709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( 710 int kind, 711 const void *buffer, 712 Py_ssize_t size); 713 714 /* Create a new string from a buffer of ASCII characters. 715 WARNING: Don't check if the string contains any non-ASCII character. */ 716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( 717 const char *buffer, 718 Py_ssize_t size); 719 #endif 720 721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 722 PyAPI_FUNC(PyObject*) PyUnicode_Substring( 723 PyObject *str, 724 Py_ssize_t start, 725 Py_ssize_t end); 726 #endif 727 728 #ifndef Py_LIMITED_API 729 /* Compute the maximum character of the substring unicode[start:end]. 730 Return 127 for an empty string. */ 731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( 732 PyObject *unicode, 733 Py_ssize_t start, 734 Py_ssize_t end); 735 #endif 736 737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 738 /* Copy the string into a UCS4 buffer including the null character if copy_null 739 is set. Return NULL and raise an exception on error. Raise a SystemError if 740 the buffer is smaller than the string. Return buffer on success. 741 742 buflen is the length of the buffer in (Py_UCS4) characters. */ 743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( 744 PyObject *unicode, 745 Py_UCS4* buffer, 746 Py_ssize_t buflen, 747 int copy_null); 748 749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using 750 * PyMem_Malloc; if this fails, NULL is returned with a memory error 751 exception set. */ 752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); 753 #endif 754 755 #ifndef Py_LIMITED_API 756 /* Return a read-only pointer to the Unicode object's internal 757 Py_UNICODE buffer. 758 If the wchar_t/Py_UNICODE representation is not yet available, this 759 function will calculate it. */ 760 761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( 762 PyObject *unicode /* Unicode object */ 763 ) /* Py_DEPRECATED(3.3) */; 764 765 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string 766 contains null characters. */ 767 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( 768 PyObject *unicode /* Unicode object */ 769 ); 770 771 /* Return a read-only pointer to the Unicode object's internal 772 Py_UNICODE buffer and save the length at size. 773 If the wchar_t/Py_UNICODE representation is not yet available, this 774 function will calculate it. */ 775 776 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( 777 PyObject *unicode, /* Unicode object */ 778 Py_ssize_t *size /* location where to save the length */ 779 ) /* Py_DEPRECATED(3.3) */; 780 #endif 781 782 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 783 /* Get the length of the Unicode object. */ 784 785 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( 786 PyObject *unicode 787 ); 788 #endif 789 790 /* Get the number of Py_UNICODE units in the 791 string representation. */ 792 793 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( 794 PyObject *unicode /* Unicode object */ 795 ) Py_DEPRECATED(3.3); 796 797 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 798 /* Read a character from the string. */ 799 800 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( 801 PyObject *unicode, 802 Py_ssize_t index 803 ); 804 805 /* Write a character to the string. The string must have been created through 806 PyUnicode_New, must not be shared, and must not have been hashed yet. 807 808 Return 0 on success, -1 on error. */ 809 810 PyAPI_FUNC(int) PyUnicode_WriteChar( 811 PyObject *unicode, 812 Py_ssize_t index, 813 Py_UCS4 character 814 ); 815 #endif 816 817 #ifndef Py_LIMITED_API 818 /* Get the maximum ordinal for a Unicode character. */ 819 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3); 820 #endif 821 822 /* Resize a Unicode object. The length is the number of characters, except 823 if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length 824 is the number of Py_UNICODE characters. 825 826 *unicode is modified to point to the new (resized) object and 0 827 returned on success. 828 829 Try to resize the string in place (which is usually faster than allocating 830 a new string and copy characters), or create a new string. 831 832 Error handling is implemented as follows: an exception is set, -1 833 is returned and *unicode left untouched. 834 835 WARNING: The function doesn't check string content, the result may not be a 836 string in canonical representation. */ 837 838 PyAPI_FUNC(int) PyUnicode_Resize( 839 PyObject **unicode, /* Pointer to the Unicode object */ 840 Py_ssize_t length /* New length */ 841 ); 842 843 /* Decode obj to a Unicode object. 844 845 bytes, bytearray and other bytes-like objects are decoded according to the 846 given encoding and error handler. The encoding and error handler can be 847 NULL to have the interface use UTF-8 and "strict". 848 849 All other objects (including Unicode objects) raise an exception. 850 851 The API returns NULL in case of an error. The caller is responsible 852 for decref'ing the returned objects. 853 854 */ 855 856 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( 857 PyObject *obj, /* Object */ 858 const char *encoding, /* encoding */ 859 const char *errors /* error handling */ 860 ); 861 862 /* Copy an instance of a Unicode subtype to a new true Unicode object if 863 necessary. If obj is already a true Unicode object (not a subtype), return 864 the reference with *incremented* refcount. 865 866 The API returns NULL in case of an error. The caller is responsible 867 for decref'ing the returned objects. 868 869 */ 870 871 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( 872 PyObject *obj /* Object */ 873 ); 874 875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( 876 const char *format, /* ASCII-encoded string */ 877 va_list vargs 878 ); 879 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( 880 const char *format, /* ASCII-encoded string */ 881 ... 882 ); 883 884 #ifndef Py_LIMITED_API 885 typedef struct { 886 PyObject *buffer; 887 void *data; 888 enum PyUnicode_Kind kind; 889 Py_UCS4 maxchar; 890 Py_ssize_t size; 891 Py_ssize_t pos; 892 893 /* minimum number of allocated characters (default: 0) */ 894 Py_ssize_t min_length; 895 896 /* minimum character (default: 127, ASCII) */ 897 Py_UCS4 min_char; 898 899 /* If non-zero, overallocate the buffer (default: 0). */ 900 unsigned char overallocate; 901 902 /* If readonly is 1, buffer is a shared string (cannot be modified) 903 and size is set to 0. */ 904 unsigned char readonly; 905 } _PyUnicodeWriter ; 906 907 /* Initialize a Unicode writer. 908 * 909 * By default, the minimum buffer size is 0 character and overallocation is 910 * disabled. Set min_length, min_char and overallocate attributes to control 911 * the allocation of the buffer. */ 912 PyAPI_FUNC(void) 913 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer); 914 915 /* Prepare the buffer to write 'length' characters 916 with the specified maximum character. 917 918 Return 0 on success, raise an exception and return -1 on error. */ 919 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ 920 (((MAXCHAR) <= (WRITER)->maxchar \ 921 && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ 922 ? 0 \ 923 : (((LENGTH) == 0) \ 924 ? 0 \ 925 : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) 926 927 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro 928 instead. */ 929 PyAPI_FUNC(int) 930 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, 931 Py_ssize_t length, Py_UCS4 maxchar); 932 933 /* Prepare the buffer to have at least the kind KIND. 934 For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will 935 support characters in range U+000-U+FFFF. 936 937 Return 0 on success, raise an exception and return -1 on error. */ 938 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ 939 (assert((KIND) != PyUnicode_WCHAR_KIND), \ 940 (KIND) <= (WRITER)->kind \ 941 ? 0 \ 942 : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) 943 944 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() 945 macro instead. */ 946 PyAPI_FUNC(int) 947 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, 948 enum PyUnicode_Kind kind); 949 950 /* Append a Unicode character. 951 Return 0 on success, raise an exception and return -1 on error. */ 952 PyAPI_FUNC(int) 953 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, 954 Py_UCS4 ch 955 ); 956 957 /* Append a Unicode string. 958 Return 0 on success, raise an exception and return -1 on error. */ 959 PyAPI_FUNC(int) 960 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, 961 PyObject *str /* Unicode string */ 962 ); 963 964 /* Append a substring of a Unicode string. 965 Return 0 on success, raise an exception and return -1 on error. */ 966 PyAPI_FUNC(int) 967 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, 968 PyObject *str, /* Unicode string */ 969 Py_ssize_t start, 970 Py_ssize_t end 971 ); 972 973 /* Append an ASCII-encoded byte string. 974 Return 0 on success, raise an exception and return -1 on error. */ 975 PyAPI_FUNC(int) 976 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, 977 const char *str, /* ASCII-encoded byte string */ 978 Py_ssize_t len /* number of bytes, or -1 if unknown */ 979 ); 980 981 /* Append a latin1-encoded byte string. 982 Return 0 on success, raise an exception and return -1 on error. */ 983 PyAPI_FUNC(int) 984 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, 985 const char *str, /* latin1-encoded byte string */ 986 Py_ssize_t len /* length in bytes */ 987 ); 988 989 /* Get the value of the writer as a Unicode string. Clear the 990 buffer of the writer. Raise an exception and return NULL 991 on error. */ 992 PyAPI_FUNC(PyObject *) 993 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); 994 995 /* Deallocate memory of a writer (clear its internal buffer). */ 996 PyAPI_FUNC(void) 997 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); 998 #endif 999 1000 #ifndef Py_LIMITED_API 1001 /* Format the object based on the format_spec, as defined in PEP 3101 1002 (Advanced String Formatting). */ 1003 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( 1004 _PyUnicodeWriter *writer, 1005 PyObject *obj, 1006 PyObject *format_spec, 1007 Py_ssize_t start, 1008 Py_ssize_t end); 1009 #endif 1010 1011 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); 1012 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); 1013 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( 1014 const char *u /* UTF-8 encoded string */ 1015 ); 1016 #ifndef Py_LIMITED_API 1017 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); 1018 #endif 1019 1020 /* Use only if you know it's a string */ 1021 #define PyUnicode_CHECK_INTERNED(op) \ 1022 (((PyASCIIObject *)(op))->state.interned) 1023 1024 /* --- wchar_t support for platforms which support it --------------------- */ 1025 1026 #ifdef HAVE_WCHAR_H 1027 1028 /* Create a Unicode Object from the wchar_t buffer w of the given 1029 size. 1030 1031 The buffer is copied into the new object. */ 1032 1033 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( 1034 const wchar_t *w, /* wchar_t buffer */ 1035 Py_ssize_t size /* size of buffer */ 1036 ); 1037 1038 /* Copies the Unicode Object contents into the wchar_t buffer w. At 1039 most size wchar_t characters are copied. 1040 1041 Note that the resulting wchar_t string may or may not be 1042 0-terminated. It is the responsibility of the caller to make sure 1043 that the wchar_t string is 0-terminated in case this is required by 1044 the application. 1045 1046 Returns the number of wchar_t characters copied (excluding a 1047 possibly trailing 0-termination character) or -1 in case of an 1048 error. */ 1049 1050 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( 1051 PyObject *unicode, /* Unicode object */ 1052 wchar_t *w, /* wchar_t buffer */ 1053 Py_ssize_t size /* size of buffer */ 1054 ); 1055 1056 /* Convert the Unicode object to a wide character string. The output string 1057 always ends with a nul character. If size is not NULL, write the number of 1058 wide characters (excluding the null character) into *size. 1059 1060 Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) 1061 on success. On error, returns NULL, *size is undefined and raises a 1062 MemoryError. */ 1063 1064 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( 1065 PyObject *unicode, /* Unicode object */ 1066 Py_ssize_t *size /* number of characters of the result */ 1067 ); 1068 1069 #ifndef Py_LIMITED_API 1070 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); 1071 #endif 1072 1073 #endif 1074 1075 /* --- Unicode ordinals --------------------------------------------------- */ 1076 1077 /* Create a Unicode Object from the given Unicode code point ordinal. 1078 1079 The ordinal must be in range(0x110000). A ValueError is 1080 raised in case it is not. 1081 1082 */ 1083 1084 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); 1085 1086 /* --- Free-list management ----------------------------------------------- */ 1087 1088 /* Clear the free list used by the Unicode implementation. 1089 1090 This can be used to release memory used for objects on the free 1091 list back to the Python memory allocator. 1092 1093 */ 1094 1095 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); 1096 1097 /* === Builtin Codecs ===================================================== 1098 1099 Many of these APIs take two arguments encoding and errors. These 1100 parameters encoding and errors have the same semantics as the ones 1101 of the builtin str() API. 1102 1103 Setting encoding to NULL causes the default encoding (UTF-8) to be used. 1104 1105 Error handling is set by errors which may also be set to NULL 1106 meaning to use the default handling defined for the codec. Default 1107 error handling for all builtin codecs is "strict" (ValueErrors are 1108 raised). 1109 1110 The codecs all use a similar interface. Only deviation from the 1111 generic ones are documented. 1112 1113 */ 1114 1115 /* --- Manage the default encoding ---------------------------------------- */ 1116 1117 /* Returns a pointer to the default encoding (UTF-8) of the 1118 Unicode object unicode and the size of the encoded representation 1119 in bytes stored in *size. 1120 1121 In case of an error, no *size is set. 1122 1123 This function caches the UTF-8 encoded string in the unicodeobject 1124 and subsequent calls will return the same string. The memory is released 1125 when the unicodeobject is deallocated. 1126 1127 _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to 1128 support the previous internal function with the same behaviour. 1129 1130 *** This API is for interpreter INTERNAL USE ONLY and will likely 1131 *** be removed or changed in the future. 1132 1133 *** If you need to access the Unicode object as UTF-8 bytes string, 1134 *** please use PyUnicode_AsUTF8String() instead. 1135 */ 1136 1137 #ifndef Py_LIMITED_API 1138 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( 1139 PyObject *unicode, 1140 Py_ssize_t *size); 1141 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize 1142 #endif 1143 1144 /* Returns a pointer to the default encoding (UTF-8) of the 1145 Unicode object unicode. 1146 1147 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation 1148 in the unicodeobject. 1149 1150 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to 1151 support the previous internal function with the same behaviour. 1152 1153 Use of this API is DEPRECATED since no size information can be 1154 extracted from the returned data. 1155 1156 *** This API is for interpreter INTERNAL USE ONLY and will likely 1157 *** be removed or changed for Python 3.1. 1158 1159 *** If you need to access the Unicode object as UTF-8 bytes string, 1160 *** please use PyUnicode_AsUTF8String() instead. 1161 1162 */ 1163 1164 #ifndef Py_LIMITED_API 1165 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); 1166 #define _PyUnicode_AsString PyUnicode_AsUTF8 1167 #endif 1168 1169 /* Returns "utf-8". */ 1170 1171 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); 1172 1173 /* --- Generic Codecs ----------------------------------------------------- */ 1174 1175 /* Create a Unicode object by decoding the encoded string s of the 1176 given size. */ 1177 1178 PyAPI_FUNC(PyObject*) PyUnicode_Decode( 1179 const char *s, /* encoded string */ 1180 Py_ssize_t size, /* size of buffer */ 1181 const char *encoding, /* encoding */ 1182 const char *errors /* error handling */ 1183 ); 1184 1185 /* Decode a Unicode object unicode and return the result as Python 1186 object. 1187 1188 This API is DEPRECATED. The only supported standard encoding is rot13. 1189 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 1190 that decode from str. */ 1191 1192 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( 1193 PyObject *unicode, /* Unicode object */ 1194 const char *encoding, /* encoding */ 1195 const char *errors /* error handling */ 1196 ) Py_DEPRECATED(3.6); 1197 1198 /* Decode a Unicode object unicode and return the result as Unicode 1199 object. 1200 1201 This API is DEPRECATED. The only supported standard encoding is rot13. 1202 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 1203 that decode from str to str. */ 1204 1205 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( 1206 PyObject *unicode, /* Unicode object */ 1207 const char *encoding, /* encoding */ 1208 const char *errors /* error handling */ 1209 ) Py_DEPRECATED(3.6); 1210 1211 /* Encodes a Py_UNICODE buffer of the given size and returns a 1212 Python string object. */ 1213 1214 #ifndef Py_LIMITED_API 1215 PyAPI_FUNC(PyObject*) PyUnicode_Encode( 1216 const Py_UNICODE *s, /* Unicode char buffer */ 1217 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ 1218 const char *encoding, /* encoding */ 1219 const char *errors /* error handling */ 1220 ) Py_DEPRECATED(3.3); 1221 #endif 1222 1223 /* Encodes a Unicode object and returns the result as Python 1224 object. 1225 1226 This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() 1227 since all standard encodings (except rot13) encode str to bytes. 1228 Use PyCodec_Encode() for encoding with rot13 and non-standard codecs 1229 that encode form str to non-bytes. */ 1230 1231 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( 1232 PyObject *unicode, /* Unicode object */ 1233 const char *encoding, /* encoding */ 1234 const char *errors /* error handling */ 1235 ) Py_DEPRECATED(3.6); 1236 1237 /* Encodes a Unicode object and returns the result as Python string 1238 object. */ 1239 1240 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( 1241 PyObject *unicode, /* Unicode object */ 1242 const char *encoding, /* encoding */ 1243 const char *errors /* error handling */ 1244 ); 1245 1246 /* Encodes a Unicode object and returns the result as Unicode 1247 object. 1248 1249 This API is DEPRECATED. The only supported standard encodings is rot13. 1250 Use PyCodec_Encode() to encode with rot13 and non-standard codecs 1251 that encode from str to str. */ 1252 1253 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( 1254 PyObject *unicode, /* Unicode object */ 1255 const char *encoding, /* encoding */ 1256 const char *errors /* error handling */ 1257 ) Py_DEPRECATED(3.6); 1258 1259 /* Build an encoding map. */ 1260 1261 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( 1262 PyObject* string /* 256 character map */ 1263 ); 1264 1265 /* --- UTF-7 Codecs ------------------------------------------------------- */ 1266 1267 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( 1268 const char *string, /* UTF-7 encoded string */ 1269 Py_ssize_t length, /* size of string */ 1270 const char *errors /* error handling */ 1271 ); 1272 1273 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( 1274 const char *string, /* UTF-7 encoded string */ 1275 Py_ssize_t length, /* size of string */ 1276 const char *errors, /* error handling */ 1277 Py_ssize_t *consumed /* bytes consumed */ 1278 ); 1279 1280 #ifndef Py_LIMITED_API 1281 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( 1282 const Py_UNICODE *data, /* Unicode char buffer */ 1283 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1284 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 1285 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 1286 const char *errors /* error handling */ 1287 ) Py_DEPRECATED(3.3); 1288 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( 1289 PyObject *unicode, /* Unicode object */ 1290 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 1291 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 1292 const char *errors /* error handling */ 1293 ); 1294 #endif 1295 1296 /* --- UTF-8 Codecs ------------------------------------------------------- */ 1297 1298 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( 1299 const char *string, /* UTF-8 encoded string */ 1300 Py_ssize_t length, /* size of string */ 1301 const char *errors /* error handling */ 1302 ); 1303 1304 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( 1305 const char *string, /* UTF-8 encoded string */ 1306 Py_ssize_t length, /* size of string */ 1307 const char *errors, /* error handling */ 1308 Py_ssize_t *consumed /* bytes consumed */ 1309 ); 1310 1311 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( 1312 PyObject *unicode /* Unicode object */ 1313 ); 1314 1315 #ifndef Py_LIMITED_API 1316 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( 1317 PyObject *unicode, 1318 const char *errors); 1319 1320 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( 1321 const Py_UNICODE *data, /* Unicode char buffer */ 1322 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1323 const char *errors /* error handling */ 1324 ) Py_DEPRECATED(3.3); 1325 #endif 1326 1327 /* --- UTF-32 Codecs ------------------------------------------------------ */ 1328 1329 /* Decodes length bytes from a UTF-32 encoded buffer string and returns 1330 the corresponding Unicode object. 1331 1332 errors (if non-NULL) defines the error handling. It defaults 1333 to "strict". 1334 1335 If byteorder is non-NULL, the decoder starts decoding using the 1336 given byte order: 1337 1338 *byteorder == -1: little endian 1339 *byteorder == 0: native order 1340 *byteorder == 1: big endian 1341 1342 In native mode, the first four bytes of the stream are checked for a 1343 BOM mark. If found, the BOM mark is analysed, the byte order 1344 adjusted and the BOM skipped. In the other modes, no BOM mark 1345 interpretation is done. After completion, *byteorder is set to the 1346 current byte order at the end of input data. 1347 1348 If byteorder is NULL, the codec starts in native order mode. 1349 1350 */ 1351 1352 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( 1353 const char *string, /* UTF-32 encoded string */ 1354 Py_ssize_t length, /* size of string */ 1355 const char *errors, /* error handling */ 1356 int *byteorder /* pointer to byteorder to use 1357 0=native;-1=LE,1=BE; updated on 1358 exit */ 1359 ); 1360 1361 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( 1362 const char *string, /* UTF-32 encoded string */ 1363 Py_ssize_t length, /* size of string */ 1364 const char *errors, /* error handling */ 1365 int *byteorder, /* pointer to byteorder to use 1366 0=native;-1=LE,1=BE; updated on 1367 exit */ 1368 Py_ssize_t *consumed /* bytes consumed */ 1369 ); 1370 1371 /* Returns a Python string using the UTF-32 encoding in native byte 1372 order. The string always starts with a BOM mark. */ 1373 1374 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( 1375 PyObject *unicode /* Unicode object */ 1376 ); 1377 1378 /* Returns a Python string object holding the UTF-32 encoded value of 1379 the Unicode data. 1380 1381 If byteorder is not 0, output is written according to the following 1382 byte order: 1383 1384 byteorder == -1: little endian 1385 byteorder == 0: native byte order (writes a BOM mark) 1386 byteorder == 1: big endian 1387 1388 If byteorder is 0, the output string will always start with the 1389 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1390 prepended. 1391 1392 */ 1393 1394 #ifndef Py_LIMITED_API 1395 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( 1396 const Py_UNICODE *data, /* Unicode char buffer */ 1397 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1398 const char *errors, /* error handling */ 1399 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1400 ) Py_DEPRECATED(3.3); 1401 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( 1402 PyObject *object, /* Unicode object */ 1403 const char *errors, /* error handling */ 1404 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1405 ); 1406 #endif 1407 1408 /* --- UTF-16 Codecs ------------------------------------------------------ */ 1409 1410 /* Decodes length bytes from a UTF-16 encoded buffer string and returns 1411 the corresponding Unicode object. 1412 1413 errors (if non-NULL) defines the error handling. It defaults 1414 to "strict". 1415 1416 If byteorder is non-NULL, the decoder starts decoding using the 1417 given byte order: 1418 1419 *byteorder == -1: little endian 1420 *byteorder == 0: native order 1421 *byteorder == 1: big endian 1422 1423 In native mode, the first two bytes of the stream are checked for a 1424 BOM mark. If found, the BOM mark is analysed, the byte order 1425 adjusted and the BOM skipped. In the other modes, no BOM mark 1426 interpretation is done. After completion, *byteorder is set to the 1427 current byte order at the end of input data. 1428 1429 If byteorder is NULL, the codec starts in native order mode. 1430 1431 */ 1432 1433 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( 1434 const char *string, /* UTF-16 encoded string */ 1435 Py_ssize_t length, /* size of string */ 1436 const char *errors, /* error handling */ 1437 int *byteorder /* pointer to byteorder to use 1438 0=native;-1=LE,1=BE; updated on 1439 exit */ 1440 ); 1441 1442 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( 1443 const char *string, /* UTF-16 encoded string */ 1444 Py_ssize_t length, /* size of string */ 1445 const char *errors, /* error handling */ 1446 int *byteorder, /* pointer to byteorder to use 1447 0=native;-1=LE,1=BE; updated on 1448 exit */ 1449 Py_ssize_t *consumed /* bytes consumed */ 1450 ); 1451 1452 /* Returns a Python string using the UTF-16 encoding in native byte 1453 order. The string always starts with a BOM mark. */ 1454 1455 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( 1456 PyObject *unicode /* Unicode object */ 1457 ); 1458 1459 /* Returns a Python string object holding the UTF-16 encoded value of 1460 the Unicode data. 1461 1462 If byteorder is not 0, output is written according to the following 1463 byte order: 1464 1465 byteorder == -1: little endian 1466 byteorder == 0: native byte order (writes a BOM mark) 1467 byteorder == 1: big endian 1468 1469 If byteorder is 0, the output string will always start with the 1470 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1471 prepended. 1472 1473 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1474 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1475 at a later point without compromising the APIs. 1476 1477 */ 1478 1479 #ifndef Py_LIMITED_API 1480 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( 1481 const Py_UNICODE *data, /* Unicode char buffer */ 1482 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1483 const char *errors, /* error handling */ 1484 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1485 ) Py_DEPRECATED(3.3); 1486 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( 1487 PyObject* unicode, /* Unicode object */ 1488 const char *errors, /* error handling */ 1489 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1490 ); 1491 #endif 1492 1493 /* --- Unicode-Escape Codecs ---------------------------------------------- */ 1494 1495 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( 1496 const char *string, /* Unicode-Escape encoded string */ 1497 Py_ssize_t length, /* size of string */ 1498 const char *errors /* error handling */ 1499 ); 1500 1501 #ifndef Py_LIMITED_API 1502 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape 1503 chars. */ 1504 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape( 1505 const char *string, /* Unicode-Escape encoded string */ 1506 Py_ssize_t length, /* size of string */ 1507 const char *errors, /* error handling */ 1508 const char **first_invalid_escape /* on return, points to first 1509 invalid escaped char in 1510 string. */ 1511 ); 1512 #endif 1513 1514 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( 1515 PyObject *unicode /* Unicode object */ 1516 ); 1517 1518 #ifndef Py_LIMITED_API 1519 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( 1520 const Py_UNICODE *data, /* Unicode char buffer */ 1521 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1522 ) Py_DEPRECATED(3.3); 1523 #endif 1524 1525 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ 1526 1527 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( 1528 const char *string, /* Raw-Unicode-Escape encoded string */ 1529 Py_ssize_t length, /* size of string */ 1530 const char *errors /* error handling */ 1531 ); 1532 1533 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( 1534 PyObject *unicode /* Unicode object */ 1535 ); 1536 1537 #ifndef Py_LIMITED_API 1538 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( 1539 const Py_UNICODE *data, /* Unicode char buffer */ 1540 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1541 ) Py_DEPRECATED(3.3); 1542 #endif 1543 1544 /* --- Unicode Internal Codec --------------------------------------------- 1545 1546 Only for internal use in _codecsmodule.c */ 1547 1548 #ifndef Py_LIMITED_API 1549 PyObject *_PyUnicode_DecodeUnicodeInternal( 1550 const char *string, 1551 Py_ssize_t length, 1552 const char *errors 1553 ); 1554 #endif 1555 1556 /* --- Latin-1 Codecs ----------------------------------------------------- 1557 1558 Note: Latin-1 corresponds to the first 256 Unicode ordinals. 1559 1560 */ 1561 1562 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( 1563 const char *string, /* Latin-1 encoded string */ 1564 Py_ssize_t length, /* size of string */ 1565 const char *errors /* error handling */ 1566 ); 1567 1568 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( 1569 PyObject *unicode /* Unicode object */ 1570 ); 1571 1572 #ifndef Py_LIMITED_API 1573 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( 1574 PyObject* unicode, 1575 const char* errors); 1576 1577 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( 1578 const Py_UNICODE *data, /* Unicode char buffer */ 1579 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1580 const char *errors /* error handling */ 1581 ) Py_DEPRECATED(3.3); 1582 #endif 1583 1584 /* --- ASCII Codecs ------------------------------------------------------- 1585 1586 Only 7-bit ASCII data is excepted. All other codes generate errors. 1587 1588 */ 1589 1590 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( 1591 const char *string, /* ASCII encoded string */ 1592 Py_ssize_t length, /* size of string */ 1593 const char *errors /* error handling */ 1594 ); 1595 1596 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( 1597 PyObject *unicode /* Unicode object */ 1598 ); 1599 1600 #ifndef Py_LIMITED_API 1601 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( 1602 PyObject* unicode, 1603 const char* errors); 1604 1605 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( 1606 const Py_UNICODE *data, /* Unicode char buffer */ 1607 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1608 const char *errors /* error handling */ 1609 ) Py_DEPRECATED(3.3); 1610 #endif 1611 1612 /* --- Character Map Codecs ----------------------------------------------- 1613 1614 This codec uses mappings to encode and decode characters. 1615 1616 Decoding mappings must map byte ordinals (integers in the range from 0 to 1617 255) to Unicode strings, integers (which are then interpreted as Unicode 1618 ordinals) or None. Unmapped data bytes (ones which cause a LookupError) 1619 as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined 1620 mapping" and cause an error. 1621 1622 Encoding mappings must map Unicode ordinal integers to bytes objects, 1623 integers in the range from 0 to 255 or None. Unmapped character 1624 ordinals (ones which cause a LookupError) as well as mapped to 1625 None are treated as "undefined mapping" and cause an error. 1626 1627 */ 1628 1629 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( 1630 const char *string, /* Encoded string */ 1631 Py_ssize_t length, /* size of string */ 1632 PyObject *mapping, /* decoding mapping */ 1633 const char *errors /* error handling */ 1634 ); 1635 1636 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( 1637 PyObject *unicode, /* Unicode object */ 1638 PyObject *mapping /* encoding mapping */ 1639 ); 1640 1641 #ifndef Py_LIMITED_API 1642 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( 1643 const Py_UNICODE *data, /* Unicode char buffer */ 1644 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1645 PyObject *mapping, /* encoding mapping */ 1646 const char *errors /* error handling */ 1647 ) Py_DEPRECATED(3.3); 1648 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( 1649 PyObject *unicode, /* Unicode object */ 1650 PyObject *mapping, /* encoding mapping */ 1651 const char *errors /* error handling */ 1652 ); 1653 #endif 1654 1655 /* Translate a Py_UNICODE buffer of the given length by applying a 1656 character mapping table to it and return the resulting Unicode 1657 object. 1658 1659 The mapping table must map Unicode ordinal integers to Unicode strings, 1660 Unicode ordinal integers or None (causing deletion of the character). 1661 1662 Mapping tables may be dictionaries or sequences. Unmapped character 1663 ordinals (ones which cause a LookupError) are left untouched and 1664 are copied as-is. 1665 1666 */ 1667 1668 #ifndef Py_LIMITED_API 1669 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( 1670 const Py_UNICODE *data, /* Unicode char buffer */ 1671 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1672 PyObject *table, /* Translate table */ 1673 const char *errors /* error handling */ 1674 ) Py_DEPRECATED(3.3); 1675 #endif 1676 1677 #ifdef MS_WINDOWS 1678 1679 /* --- MBCS codecs for Windows -------------------------------------------- */ 1680 1681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( 1682 const char *string, /* MBCS encoded string */ 1683 Py_ssize_t length, /* size of string */ 1684 const char *errors /* error handling */ 1685 ); 1686 1687 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( 1688 const char *string, /* MBCS encoded string */ 1689 Py_ssize_t length, /* size of string */ 1690 const char *errors, /* error handling */ 1691 Py_ssize_t *consumed /* bytes consumed */ 1692 ); 1693 1694 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1695 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( 1696 int code_page, /* code page number */ 1697 const char *string, /* encoded string */ 1698 Py_ssize_t length, /* size of string */ 1699 const char *errors, /* error handling */ 1700 Py_ssize_t *consumed /* bytes consumed */ 1701 ); 1702 #endif 1703 1704 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( 1705 PyObject *unicode /* Unicode object */ 1706 ); 1707 1708 #ifndef Py_LIMITED_API 1709 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( 1710 const Py_UNICODE *data, /* Unicode char buffer */ 1711 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1712 const char *errors /* error handling */ 1713 ) Py_DEPRECATED(3.3); 1714 #endif 1715 1716 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1717 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( 1718 int code_page, /* code page number */ 1719 PyObject *unicode, /* Unicode object */ 1720 const char *errors /* error handling */ 1721 ); 1722 #endif 1723 1724 #endif /* MS_WINDOWS */ 1725 1726 #ifndef Py_LIMITED_API 1727 /* --- Decimal Encoder ---------------------------------------------------- */ 1728 1729 /* Takes a Unicode string holding a decimal value and writes it into 1730 an output buffer using standard ASCII digit codes. 1731 1732 The output buffer has to provide at least length+1 bytes of storage 1733 area. The output string is 0-terminated. 1734 1735 The encoder converts whitespace to ' ', decimal characters to their 1736 corresponding ASCII digit and all other Latin-1 characters except 1737 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1738 are treated as errors. This includes embedded NULL bytes. 1739 1740 Error handling is defined by the errors argument: 1741 1742 NULL or "strict": raise a ValueError 1743 "ignore": ignore the wrong characters (these are not copied to the 1744 output buffer) 1745 "replace": replaces illegal characters with '?' 1746 1747 Returns 0 on success, -1 on failure. 1748 1749 */ 1750 1751 PyAPI_FUNC(int) PyUnicode_EncodeDecimal( 1752 Py_UNICODE *s, /* Unicode buffer */ 1753 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1754 char *output, /* Output buffer; must have size >= length */ 1755 const char *errors /* error handling */ 1756 ) /* Py_DEPRECATED(3.3) */; 1757 1758 /* Transforms code points that have decimal digit property to the 1759 corresponding ASCII digit code points. 1760 1761 Returns a new Unicode string on success, NULL on failure. 1762 */ 1763 1764 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( 1765 Py_UNICODE *s, /* Unicode buffer */ 1766 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 1767 ) /* Py_DEPRECATED(3.3) */; 1768 1769 /* Coverts a Unicode object holding a decimal value to an ASCII string 1770 for using in int, float and complex parsers. 1771 Transforms code points that have decimal digit property to the 1772 corresponding ASCII digit code points. Transforms spaces to ASCII. 1773 Transforms code points starting from the first non-ASCII code point that 1774 is neither a decimal digit nor a space to the end into '?'. */ 1775 1776 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( 1777 PyObject *unicode /* Unicode object */ 1778 ); 1779 #endif 1780 1781 /* --- Locale encoding --------------------------------------------------- */ 1782 1783 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1784 /* Decode a string from the current locale encoding. The decoder is strict if 1785 *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' 1786 error handler (PEP 383) to escape undecodable bytes. If a byte sequence can 1787 be decoded as a surrogate character and *surrogateescape* is not equal to 1788 zero, the byte sequence is escaped using the 'surrogateescape' error handler 1789 instead of being decoded. *str* must end with a null character but cannot 1790 contain embedded null characters. */ 1791 1792 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( 1793 const char *str, 1794 Py_ssize_t len, 1795 const char *errors); 1796 1797 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string 1798 length using strlen(). */ 1799 1800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( 1801 const char *str, 1802 const char *errors); 1803 1804 /* Encode a Unicode object to the current locale encoding. The encoder is 1805 strict is *surrogateescape* is equal to zero, otherwise the 1806 "surrogateescape" error handler is used. Return a bytes object. The string 1807 cannot contain embedded null characters. */ 1808 1809 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( 1810 PyObject *unicode, 1811 const char *errors 1812 ); 1813 #endif 1814 1815 /* --- File system encoding ---------------------------------------------- */ 1816 1817 /* ParseTuple converter: encode str objects to bytes using 1818 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 1819 1820 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); 1821 1822 /* ParseTuple converter: decode bytes objects to unicode using 1823 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 1824 1825 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); 1826 1827 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding 1828 and the "surrogateescape" error handler. 1829 1830 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1831 encoding. 1832 1833 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 1834 */ 1835 1836 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( 1837 const char *s /* encoded string */ 1838 ); 1839 1840 /* Decode a string using Py_FileSystemDefaultEncoding 1841 and the "surrogateescape" error handler. 1842 1843 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1844 encoding. 1845 */ 1846 1847 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( 1848 const char *s, /* encoded string */ 1849 Py_ssize_t size /* size */ 1850 ); 1851 1852 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the 1853 "surrogateescape" error handler, and return bytes. 1854 1855 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1856 encoding. 1857 */ 1858 1859 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( 1860 PyObject *unicode 1861 ); 1862 1863 /* --- Methods & Slots ---------------------------------------------------- 1864 1865 These are capable of handling Unicode objects and strings on input 1866 (we refer to them as strings in the descriptions) and return 1867 Unicode objects or integers as appropriate. */ 1868 1869 /* Concat two strings giving a new Unicode string. */ 1870 1871 PyAPI_FUNC(PyObject*) PyUnicode_Concat( 1872 PyObject *left, /* Left string */ 1873 PyObject *right /* Right string */ 1874 ); 1875 1876 /* Concat two strings and put the result in *pleft 1877 (sets *pleft to NULL on error) */ 1878 1879 PyAPI_FUNC(void) PyUnicode_Append( 1880 PyObject **pleft, /* Pointer to left string */ 1881 PyObject *right /* Right string */ 1882 ); 1883 1884 /* Concat two strings, put the result in *pleft and drop the right object 1885 (sets *pleft to NULL on error) */ 1886 1887 PyAPI_FUNC(void) PyUnicode_AppendAndDel( 1888 PyObject **pleft, /* Pointer to left string */ 1889 PyObject *right /* Right string */ 1890 ); 1891 1892 /* Split a string giving a list of Unicode strings. 1893 1894 If sep is NULL, splitting will be done at all whitespace 1895 substrings. Otherwise, splits occur at the given separator. 1896 1897 At most maxsplit splits will be done. If negative, no limit is set. 1898 1899 Separators are not included in the resulting list. 1900 1901 */ 1902 1903 PyAPI_FUNC(PyObject*) PyUnicode_Split( 1904 PyObject *s, /* String to split */ 1905 PyObject *sep, /* String separator */ 1906 Py_ssize_t maxsplit /* Maxsplit count */ 1907 ); 1908 1909 /* Dito, but split at line breaks. 1910 1911 CRLF is considered to be one line break. Line breaks are not 1912 included in the resulting list. */ 1913 1914 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( 1915 PyObject *s, /* String to split */ 1916 int keepends /* If true, line end markers are included */ 1917 ); 1918 1919 /* Partition a string using a given separator. */ 1920 1921 PyAPI_FUNC(PyObject*) PyUnicode_Partition( 1922 PyObject *s, /* String to partition */ 1923 PyObject *sep /* String separator */ 1924 ); 1925 1926 /* Partition a string using a given separator, searching from the end of the 1927 string. */ 1928 1929 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( 1930 PyObject *s, /* String to partition */ 1931 PyObject *sep /* String separator */ 1932 ); 1933 1934 /* Split a string giving a list of Unicode strings. 1935 1936 If sep is NULL, splitting will be done at all whitespace 1937 substrings. Otherwise, splits occur at the given separator. 1938 1939 At most maxsplit splits will be done. But unlike PyUnicode_Split 1940 PyUnicode_RSplit splits from the end of the string. If negative, 1941 no limit is set. 1942 1943 Separators are not included in the resulting list. 1944 1945 */ 1946 1947 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( 1948 PyObject *s, /* String to split */ 1949 PyObject *sep, /* String separator */ 1950 Py_ssize_t maxsplit /* Maxsplit count */ 1951 ); 1952 1953 /* Translate a string by applying a character mapping table to it and 1954 return the resulting Unicode object. 1955 1956 The mapping table must map Unicode ordinal integers to Unicode strings, 1957 Unicode ordinal integers or None (causing deletion of the character). 1958 1959 Mapping tables may be dictionaries or sequences. Unmapped character 1960 ordinals (ones which cause a LookupError) are left untouched and 1961 are copied as-is. 1962 1963 */ 1964 1965 PyAPI_FUNC(PyObject *) PyUnicode_Translate( 1966 PyObject *str, /* String */ 1967 PyObject *table, /* Translate table */ 1968 const char *errors /* error handling */ 1969 ); 1970 1971 /* Join a sequence of strings using the given separator and return 1972 the resulting Unicode string. */ 1973 1974 PyAPI_FUNC(PyObject*) PyUnicode_Join( 1975 PyObject *separator, /* Separator string */ 1976 PyObject *seq /* Sequence object */ 1977 ); 1978 1979 #ifndef Py_LIMITED_API 1980 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( 1981 PyObject *separator, 1982 PyObject *const *items, 1983 Py_ssize_t seqlen 1984 ); 1985 #endif /* Py_LIMITED_API */ 1986 1987 /* Return 1 if substr matches str[start:end] at the given tail end, 0 1988 otherwise. */ 1989 1990 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( 1991 PyObject *str, /* String */ 1992 PyObject *substr, /* Prefix or Suffix string */ 1993 Py_ssize_t start, /* Start index */ 1994 Py_ssize_t end, /* Stop index */ 1995 int direction /* Tail end: -1 prefix, +1 suffix */ 1996 ); 1997 1998 /* Return the first position of substr in str[start:end] using the 1999 given search direction or -1 if not found. -2 is returned in case 2000 an error occurred and an exception is set. */ 2001 2002 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( 2003 PyObject *str, /* String */ 2004 PyObject *substr, /* Substring to find */ 2005 Py_ssize_t start, /* Start index */ 2006 Py_ssize_t end, /* Stop index */ 2007 int direction /* Find direction: +1 forward, -1 backward */ 2008 ); 2009 2010 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 2011 /* Like PyUnicode_Find, but search for single character only. */ 2012 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( 2013 PyObject *str, 2014 Py_UCS4 ch, 2015 Py_ssize_t start, 2016 Py_ssize_t end, 2017 int direction 2018 ); 2019 #endif 2020 2021 /* Count the number of occurrences of substr in str[start:end]. */ 2022 2023 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( 2024 PyObject *str, /* String */ 2025 PyObject *substr, /* Substring to count */ 2026 Py_ssize_t start, /* Start index */ 2027 Py_ssize_t end /* Stop index */ 2028 ); 2029 2030 /* Replace at most maxcount occurrences of substr in str with replstr 2031 and return the resulting Unicode object. */ 2032 2033 PyAPI_FUNC(PyObject *) PyUnicode_Replace( 2034 PyObject *str, /* String */ 2035 PyObject *substr, /* Substring to find */ 2036 PyObject *replstr, /* Substring to replace */ 2037 Py_ssize_t maxcount /* Max. number of replacements to apply; 2038 -1 = all */ 2039 ); 2040 2041 /* Compare two strings and return -1, 0, 1 for less than, equal, 2042 greater than resp. 2043 Raise an exception and return -1 on error. */ 2044 2045 PyAPI_FUNC(int) PyUnicode_Compare( 2046 PyObject *left, /* Left string */ 2047 PyObject *right /* Right string */ 2048 ); 2049 2050 #ifndef Py_LIMITED_API 2051 /* Test whether a unicode is equal to ASCII identifier. Return 1 if true, 2052 0 otherwise. The right argument must be ASCII identifier. 2053 Any error occurs inside will be cleared before return. */ 2054 2055 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( 2056 PyObject *left, /* Left string */ 2057 _Py_Identifier *right /* Right identifier */ 2058 ); 2059 #endif 2060 2061 /* Compare a Unicode object with C string and return -1, 0, 1 for less than, 2062 equal, and greater than, respectively. It is best to pass only 2063 ASCII-encoded strings, but the function interprets the input string as 2064 ISO-8859-1 if it contains non-ASCII characters. 2065 This function does not raise exceptions. */ 2066 2067 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( 2068 PyObject *left, 2069 const char *right /* ASCII-encoded string */ 2070 ); 2071 2072 #ifndef Py_LIMITED_API 2073 /* Test whether a unicode is equal to ASCII string. Return 1 if true, 2074 0 otherwise. The right argument must be ASCII-encoded string. 2075 Any error occurs inside will be cleared before return. */ 2076 2077 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( 2078 PyObject *left, 2079 const char *right /* ASCII-encoded string */ 2080 ); 2081 #endif 2082 2083 /* Rich compare two strings and return one of the following: 2084 2085 - NULL in case an exception was raised 2086 - Py_True or Py_False for successful comparisons 2087 - Py_NotImplemented in case the type combination is unknown 2088 2089 Possible values for op: 2090 2091 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 2092 2093 */ 2094 2095 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( 2096 PyObject *left, /* Left string */ 2097 PyObject *right, /* Right string */ 2098 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ 2099 ); 2100 2101 /* Apply an argument tuple or dictionary to a format string and return 2102 the resulting Unicode string. */ 2103 2104 PyAPI_FUNC(PyObject *) PyUnicode_Format( 2105 PyObject *format, /* Format string */ 2106 PyObject *args /* Argument tuple or dictionary */ 2107 ); 2108 2109 /* Checks whether element is contained in container and return 1/0 2110 accordingly. 2111 2112 element has to coerce to a one element Unicode string. -1 is 2113 returned in case of an error. */ 2114 2115 PyAPI_FUNC(int) PyUnicode_Contains( 2116 PyObject *container, /* Container string */ 2117 PyObject *element /* Element string */ 2118 ); 2119 2120 /* Checks whether argument is a valid identifier. */ 2121 2122 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); 2123 2124 #ifndef Py_LIMITED_API 2125 /* Externally visible for str.strip(unicode) */ 2126 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( 2127 PyObject *self, 2128 int striptype, 2129 PyObject *sepobj 2130 ); 2131 #endif 2132 2133 /* Using explicit passed-in values, insert the thousands grouping 2134 into the string pointed to by buffer. For the argument descriptions, 2135 see Objects/stringlib/localeutil.h */ 2136 #ifndef Py_LIMITED_API 2137 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( 2138 _PyUnicodeWriter *writer, 2139 Py_ssize_t n_buffer, 2140 PyObject *digits, 2141 Py_ssize_t d_pos, 2142 Py_ssize_t n_digits, 2143 Py_ssize_t min_width, 2144 const char *grouping, 2145 PyObject *thousands_sep, 2146 Py_UCS4 *maxchar); 2147 #endif 2148 /* === Characters Type APIs =============================================== */ 2149 2150 /* Helper array used by Py_UNICODE_ISSPACE(). */ 2151 2152 #ifndef Py_LIMITED_API 2153 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; 2154 2155 /* These should not be used directly. Use the Py_UNICODE_IS* and 2156 Py_UNICODE_TO* macros instead. 2157 2158 These APIs are implemented in Objects/unicodectype.c. 2159 2160 */ 2161 2162 PyAPI_FUNC(int) _PyUnicode_IsLowercase( 2163 Py_UCS4 ch /* Unicode character */ 2164 ); 2165 2166 PyAPI_FUNC(int) _PyUnicode_IsUppercase( 2167 Py_UCS4 ch /* Unicode character */ 2168 ); 2169 2170 PyAPI_FUNC(int) _PyUnicode_IsTitlecase( 2171 Py_UCS4 ch /* Unicode character */ 2172 ); 2173 2174 PyAPI_FUNC(int) _PyUnicode_IsXidStart( 2175 Py_UCS4 ch /* Unicode character */ 2176 ); 2177 2178 PyAPI_FUNC(int) _PyUnicode_IsXidContinue( 2179 Py_UCS4 ch /* Unicode character */ 2180 ); 2181 2182 PyAPI_FUNC(int) _PyUnicode_IsWhitespace( 2183 const Py_UCS4 ch /* Unicode character */ 2184 ); 2185 2186 PyAPI_FUNC(int) _PyUnicode_IsLinebreak( 2187 const Py_UCS4 ch /* Unicode character */ 2188 ); 2189 2190 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( 2191 Py_UCS4 ch /* Unicode character */ 2192 ) /* Py_DEPRECATED(3.3) */; 2193 2194 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( 2195 Py_UCS4 ch /* Unicode character */ 2196 ) /* Py_DEPRECATED(3.3) */; 2197 2198 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( 2199 Py_UCS4 ch /* Unicode character */ 2200 ) Py_DEPRECATED(3.3); 2201 2202 PyAPI_FUNC(int) _PyUnicode_ToLowerFull( 2203 Py_UCS4 ch, /* Unicode character */ 2204 Py_UCS4 *res 2205 ); 2206 2207 PyAPI_FUNC(int) _PyUnicode_ToTitleFull( 2208 Py_UCS4 ch, /* Unicode character */ 2209 Py_UCS4 *res 2210 ); 2211 2212 PyAPI_FUNC(int) _PyUnicode_ToUpperFull( 2213 Py_UCS4 ch, /* Unicode character */ 2214 Py_UCS4 *res 2215 ); 2216 2217 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( 2218 Py_UCS4 ch, /* Unicode character */ 2219 Py_UCS4 *res 2220 ); 2221 2222 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( 2223 Py_UCS4 ch /* Unicode character */ 2224 ); 2225 2226 PyAPI_FUNC(int) _PyUnicode_IsCased( 2227 Py_UCS4 ch /* Unicode character */ 2228 ); 2229 2230 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( 2231 Py_UCS4 ch /* Unicode character */ 2232 ); 2233 2234 PyAPI_FUNC(int) _PyUnicode_ToDigit( 2235 Py_UCS4 ch /* Unicode character */ 2236 ); 2237 2238 PyAPI_FUNC(double) _PyUnicode_ToNumeric( 2239 Py_UCS4 ch /* Unicode character */ 2240 ); 2241 2242 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( 2243 Py_UCS4 ch /* Unicode character */ 2244 ); 2245 2246 PyAPI_FUNC(int) _PyUnicode_IsDigit( 2247 Py_UCS4 ch /* Unicode character */ 2248 ); 2249 2250 PyAPI_FUNC(int) _PyUnicode_IsNumeric( 2251 Py_UCS4 ch /* Unicode character */ 2252 ); 2253 2254 PyAPI_FUNC(int) _PyUnicode_IsPrintable( 2255 Py_UCS4 ch /* Unicode character */ 2256 ); 2257 2258 PyAPI_FUNC(int) _PyUnicode_IsAlpha( 2259 Py_UCS4 ch /* Unicode character */ 2260 ); 2261 2262 PyAPI_FUNC(size_t) Py_UNICODE_strlen( 2263 const Py_UNICODE *u 2264 ) Py_DEPRECATED(3.3); 2265 2266 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( 2267 Py_UNICODE *s1, 2268 const Py_UNICODE *s2) Py_DEPRECATED(3.3); 2269 2270 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( 2271 Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3); 2272 2273 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( 2274 Py_UNICODE *s1, 2275 const Py_UNICODE *s2, 2276 size_t n) Py_DEPRECATED(3.3); 2277 2278 PyAPI_FUNC(int) Py_UNICODE_strcmp( 2279 const Py_UNICODE *s1, 2280 const Py_UNICODE *s2 2281 ) Py_DEPRECATED(3.3); 2282 2283 PyAPI_FUNC(int) Py_UNICODE_strncmp( 2284 const Py_UNICODE *s1, 2285 const Py_UNICODE *s2, 2286 size_t n 2287 ) Py_DEPRECATED(3.3); 2288 2289 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( 2290 const Py_UNICODE *s, 2291 Py_UNICODE c 2292 ) Py_DEPRECATED(3.3); 2293 2294 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( 2295 const Py_UNICODE *s, 2296 Py_UNICODE c 2297 ) Py_DEPRECATED(3.3); 2298 2299 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); 2300 2301 /* Create a copy of a unicode string ending with a nul character. Return NULL 2302 and raise a MemoryError exception on memory allocation failure, otherwise 2303 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 2304 2305 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( 2306 PyObject *unicode 2307 ) Py_DEPRECATED(3.3); 2308 #endif /* Py_LIMITED_API */ 2309 2310 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) 2311 PyAPI_FUNC(int) _PyUnicode_CheckConsistency( 2312 PyObject *op, 2313 int check_content); 2314 #elif !defined(NDEBUG) 2315 /* For asserts that call _PyUnicode_CheckConsistency(), which would 2316 * otherwise be a problem when building with asserts but without Py_DEBUG. */ 2317 #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op) 2318 #endif 2319 2320 #ifndef Py_LIMITED_API 2321 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ 2322 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); 2323 /* Clear all static strings. */ 2324 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); 2325 2326 /* Fast equality check when the inputs are known to be exact unicode types 2327 and where the hash values are equal (i.e. a very probable match) */ 2328 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); 2329 #endif /* !Py_LIMITED_API */ 2330 2331 #ifdef __cplusplus 2332 } 2333 #endif 2334 #endif /* !Py_UNICODEOBJECT_H */ 2335