1 #define PY_SSIZE_T_CLEAN 2 #include "Python.h" 3 #include "pycore_abstract.h" // _PyIndex_Check() 4 #include "pycore_bytes_methods.h" 5 6 PyDoc_STRVAR_shared(_Py_isspace__doc__, 7 "B.isspace() -> bool\n\ 8 \n\ 9 Return True if all characters in B are whitespace\n\ 10 and there is at least one character in B, False otherwise."); 11 12 PyObject* _Py_bytes_isspace(const char * cptr,Py_ssize_t len)13 _Py_bytes_isspace(const char *cptr, Py_ssize_t len) 14 { 15 const unsigned char *p 16 = (const unsigned char *) cptr; 17 const unsigned char *e; 18 19 /* Shortcut for single character strings */ 20 if (len == 1 && Py_ISSPACE(*p)) 21 Py_RETURN_TRUE; 22 23 /* Special case for empty strings */ 24 if (len == 0) 25 Py_RETURN_FALSE; 26 27 e = p + len; 28 for (; p < e; p++) { 29 if (!Py_ISSPACE(*p)) 30 Py_RETURN_FALSE; 31 } 32 Py_RETURN_TRUE; 33 } 34 35 36 PyDoc_STRVAR_shared(_Py_isalpha__doc__, 37 "B.isalpha() -> bool\n\ 38 \n\ 39 Return True if all characters in B are alphabetic\n\ 40 and there is at least one character in B, False otherwise."); 41 42 PyObject* _Py_bytes_isalpha(const char * cptr,Py_ssize_t len)43 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) 44 { 45 const unsigned char *p 46 = (const unsigned char *) cptr; 47 const unsigned char *e; 48 49 /* Shortcut for single character strings */ 50 if (len == 1 && Py_ISALPHA(*p)) 51 Py_RETURN_TRUE; 52 53 /* Special case for empty strings */ 54 if (len == 0) 55 Py_RETURN_FALSE; 56 57 e = p + len; 58 for (; p < e; p++) { 59 if (!Py_ISALPHA(*p)) 60 Py_RETURN_FALSE; 61 } 62 Py_RETURN_TRUE; 63 } 64 65 66 PyDoc_STRVAR_shared(_Py_isalnum__doc__, 67 "B.isalnum() -> bool\n\ 68 \n\ 69 Return True if all characters in B are alphanumeric\n\ 70 and there is at least one character in B, False otherwise."); 71 72 PyObject* _Py_bytes_isalnum(const char * cptr,Py_ssize_t len)73 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) 74 { 75 const unsigned char *p 76 = (const unsigned char *) cptr; 77 const unsigned char *e; 78 79 /* Shortcut for single character strings */ 80 if (len == 1 && Py_ISALNUM(*p)) 81 Py_RETURN_TRUE; 82 83 /* Special case for empty strings */ 84 if (len == 0) 85 Py_RETURN_FALSE; 86 87 e = p + len; 88 for (; p < e; p++) { 89 if (!Py_ISALNUM(*p)) 90 Py_RETURN_FALSE; 91 } 92 Py_RETURN_TRUE; 93 } 94 95 96 PyDoc_STRVAR_shared(_Py_isascii__doc__, 97 "B.isascii() -> bool\n\ 98 \n\ 99 Return True if B is empty or all characters in B are ASCII,\n\ 100 False otherwise."); 101 102 // Optimization is copied from ascii_decode in unicodeobject.c 103 /* Mask to quickly check whether a C 'size_t' contains a 104 non-ASCII, UTF8-encoded char. */ 105 #if (SIZEOF_SIZE_T == 8) 106 # define ASCII_CHAR_MASK 0x8080808080808080ULL 107 #elif (SIZEOF_SIZE_T == 4) 108 # define ASCII_CHAR_MASK 0x80808080U 109 #else 110 # error C 'size_t' size should be either 4 or 8! 111 #endif 112 113 PyObject* _Py_bytes_isascii(const char * cptr,Py_ssize_t len)114 _Py_bytes_isascii(const char *cptr, Py_ssize_t len) 115 { 116 const char *p = cptr; 117 const char *end = p + len; 118 119 while (p < end) { 120 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h 121 for an explanation. */ 122 if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { 123 /* Help allocation */ 124 const char *_p = p; 125 while (_p + SIZEOF_SIZE_T <= end) { 126 size_t value = *(const size_t *) _p; 127 if (value & ASCII_CHAR_MASK) { 128 Py_RETURN_FALSE; 129 } 130 _p += SIZEOF_SIZE_T; 131 } 132 p = _p; 133 if (_p == end) 134 break; 135 } 136 if ((unsigned char)*p & 0x80) { 137 Py_RETURN_FALSE; 138 } 139 p++; 140 } 141 Py_RETURN_TRUE; 142 } 143 144 #undef ASCII_CHAR_MASK 145 146 147 PyDoc_STRVAR_shared(_Py_isdigit__doc__, 148 "B.isdigit() -> bool\n\ 149 \n\ 150 Return True if all characters in B are digits\n\ 151 and there is at least one character in B, False otherwise."); 152 153 PyObject* _Py_bytes_isdigit(const char * cptr,Py_ssize_t len)154 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) 155 { 156 const unsigned char *p 157 = (const unsigned char *) cptr; 158 const unsigned char *e; 159 160 /* Shortcut for single character strings */ 161 if (len == 1 && Py_ISDIGIT(*p)) 162 Py_RETURN_TRUE; 163 164 /* Special case for empty strings */ 165 if (len == 0) 166 Py_RETURN_FALSE; 167 168 e = p + len; 169 for (; p < e; p++) { 170 if (!Py_ISDIGIT(*p)) 171 Py_RETURN_FALSE; 172 } 173 Py_RETURN_TRUE; 174 } 175 176 177 PyDoc_STRVAR_shared(_Py_islower__doc__, 178 "B.islower() -> bool\n\ 179 \n\ 180 Return True if all cased characters in B are lowercase and there is\n\ 181 at least one cased character in B, False otherwise."); 182 183 PyObject* _Py_bytes_islower(const char * cptr,Py_ssize_t len)184 _Py_bytes_islower(const char *cptr, Py_ssize_t len) 185 { 186 const unsigned char *p 187 = (const unsigned char *) cptr; 188 const unsigned char *e; 189 int cased; 190 191 /* Shortcut for single character strings */ 192 if (len == 1) 193 return PyBool_FromLong(Py_ISLOWER(*p)); 194 195 /* Special case for empty strings */ 196 if (len == 0) 197 Py_RETURN_FALSE; 198 199 e = p + len; 200 cased = 0; 201 for (; p < e; p++) { 202 if (Py_ISUPPER(*p)) 203 Py_RETURN_FALSE; 204 else if (!cased && Py_ISLOWER(*p)) 205 cased = 1; 206 } 207 return PyBool_FromLong(cased); 208 } 209 210 211 PyDoc_STRVAR_shared(_Py_isupper__doc__, 212 "B.isupper() -> bool\n\ 213 \n\ 214 Return True if all cased characters in B are uppercase and there is\n\ 215 at least one cased character in B, False otherwise."); 216 217 PyObject* _Py_bytes_isupper(const char * cptr,Py_ssize_t len)218 _Py_bytes_isupper(const char *cptr, Py_ssize_t len) 219 { 220 const unsigned char *p 221 = (const unsigned char *) cptr; 222 const unsigned char *e; 223 int cased; 224 225 /* Shortcut for single character strings */ 226 if (len == 1) 227 return PyBool_FromLong(Py_ISUPPER(*p)); 228 229 /* Special case for empty strings */ 230 if (len == 0) 231 Py_RETURN_FALSE; 232 233 e = p + len; 234 cased = 0; 235 for (; p < e; p++) { 236 if (Py_ISLOWER(*p)) 237 Py_RETURN_FALSE; 238 else if (!cased && Py_ISUPPER(*p)) 239 cased = 1; 240 } 241 return PyBool_FromLong(cased); 242 } 243 244 245 PyDoc_STRVAR_shared(_Py_istitle__doc__, 246 "B.istitle() -> bool\n\ 247 \n\ 248 Return True if B is a titlecased string and there is at least one\n\ 249 character in B, i.e. uppercase characters may only follow uncased\n\ 250 characters and lowercase characters only cased ones. Return False\n\ 251 otherwise."); 252 253 PyObject* _Py_bytes_istitle(const char * cptr,Py_ssize_t len)254 _Py_bytes_istitle(const char *cptr, Py_ssize_t len) 255 { 256 const unsigned char *p 257 = (const unsigned char *) cptr; 258 const unsigned char *e; 259 int cased, previous_is_cased; 260 261 /* Shortcut for single character strings */ 262 if (len == 1) 263 return PyBool_FromLong(Py_ISUPPER(*p)); 264 265 /* Special case for empty strings */ 266 if (len == 0) 267 Py_RETURN_FALSE; 268 269 e = p + len; 270 cased = 0; 271 previous_is_cased = 0; 272 for (; p < e; p++) { 273 const unsigned char ch = *p; 274 275 if (Py_ISUPPER(ch)) { 276 if (previous_is_cased) 277 Py_RETURN_FALSE; 278 previous_is_cased = 1; 279 cased = 1; 280 } 281 else if (Py_ISLOWER(ch)) { 282 if (!previous_is_cased) 283 Py_RETURN_FALSE; 284 previous_is_cased = 1; 285 cased = 1; 286 } 287 else 288 previous_is_cased = 0; 289 } 290 return PyBool_FromLong(cased); 291 } 292 293 294 PyDoc_STRVAR_shared(_Py_lower__doc__, 295 "B.lower() -> copy of B\n\ 296 \n\ 297 Return a copy of B with all ASCII characters converted to lowercase."); 298 299 void _Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)300 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) 301 { 302 Py_ssize_t i; 303 304 for (i = 0; i < len; i++) { 305 result[i] = Py_TOLOWER((unsigned char) cptr[i]); 306 } 307 } 308 309 310 PyDoc_STRVAR_shared(_Py_upper__doc__, 311 "B.upper() -> copy of B\n\ 312 \n\ 313 Return a copy of B with all ASCII characters converted to uppercase."); 314 315 void _Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)316 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) 317 { 318 Py_ssize_t i; 319 320 for (i = 0; i < len; i++) { 321 result[i] = Py_TOUPPER((unsigned char) cptr[i]); 322 } 323 } 324 325 326 PyDoc_STRVAR_shared(_Py_title__doc__, 327 "B.title() -> copy of B\n\ 328 \n\ 329 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\ 330 characters, all remaining cased characters have lowercase."); 331 332 void _Py_bytes_title(char * result,const char * s,Py_ssize_t len)333 _Py_bytes_title(char *result, const char *s, Py_ssize_t len) 334 { 335 Py_ssize_t i; 336 int previous_is_cased = 0; 337 338 for (i = 0; i < len; i++) { 339 int c = Py_CHARMASK(*s++); 340 if (Py_ISLOWER(c)) { 341 if (!previous_is_cased) 342 c = Py_TOUPPER(c); 343 previous_is_cased = 1; 344 } else if (Py_ISUPPER(c)) { 345 if (previous_is_cased) 346 c = Py_TOLOWER(c); 347 previous_is_cased = 1; 348 } else 349 previous_is_cased = 0; 350 *result++ = c; 351 } 352 } 353 354 355 PyDoc_STRVAR_shared(_Py_capitalize__doc__, 356 "B.capitalize() -> copy of B\n\ 357 \n\ 358 Return a copy of B with only its first character capitalized (ASCII)\n\ 359 and the rest lower-cased."); 360 361 void _Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)362 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len) 363 { 364 if (len > 0) { 365 *result = Py_TOUPPER(*s); 366 _Py_bytes_lower(result + 1, s + 1, len - 1); 367 } 368 } 369 370 371 PyDoc_STRVAR_shared(_Py_swapcase__doc__, 372 "B.swapcase() -> copy of B\n\ 373 \n\ 374 Return a copy of B with uppercase ASCII characters converted\n\ 375 to lowercase ASCII and vice versa."); 376 377 void _Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)378 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len) 379 { 380 Py_ssize_t i; 381 382 for (i = 0; i < len; i++) { 383 int c = Py_CHARMASK(*s++); 384 if (Py_ISLOWER(c)) { 385 *result = Py_TOUPPER(c); 386 } 387 else if (Py_ISUPPER(c)) { 388 *result = Py_TOLOWER(c); 389 } 390 else 391 *result = c; 392 result++; 393 } 394 } 395 396 397 PyDoc_STRVAR_shared(_Py_maketrans__doc__, 398 "B.maketrans(frm, to) -> translation table\n\ 399 \n\ 400 Return a translation table (a bytes object of length 256) suitable\n\ 401 for use in the bytes or bytearray translate method where each byte\n\ 402 in frm is mapped to the byte at the same position in to.\n\ 403 The bytes objects frm and to must be of the same length."); 404 405 PyObject * _Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)406 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to) 407 { 408 PyObject *res = NULL; 409 Py_ssize_t i; 410 char *p; 411 412 if (frm->len != to->len) { 413 PyErr_Format(PyExc_ValueError, 414 "maketrans arguments must have same length"); 415 return NULL; 416 } 417 res = PyBytes_FromStringAndSize(NULL, 256); 418 if (!res) 419 return NULL; 420 p = PyBytes_AS_STRING(res); 421 for (i = 0; i < 256; i++) 422 p[i] = (char) i; 423 for (i = 0; i < frm->len; i++) { 424 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i]; 425 } 426 427 return res; 428 } 429 430 #define FASTSEARCH fastsearch 431 #define STRINGLIB(F) stringlib_##F 432 #define STRINGLIB_CHAR char 433 #define STRINGLIB_SIZEOF_CHAR 1 434 435 #include "stringlib/fastsearch.h" 436 #include "stringlib/count.h" 437 #include "stringlib/find.h" 438 439 /* 440 Wraps stringlib_parse_args_finds() and additionally checks the first 441 argument type. 442 443 In case the first argument is a bytes-like object, sets it to subobj, 444 and doesn't touch the byte parameter. 445 In case it is an integer in range(0, 256), writes the integer value 446 to byte, and sets subobj to NULL. 447 448 The other parameters are similar to those of 449 stringlib_parse_args_finds(). 450 */ 451 452 Py_LOCAL_INLINE(int) parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)453 parse_args_finds_byte(const char *function_name, PyObject *args, 454 PyObject **subobj, char *byte, 455 Py_ssize_t *start, Py_ssize_t *end) 456 { 457 PyObject *tmp_subobj; 458 Py_ssize_t ival; 459 460 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj, 461 start, end)) 462 return 0; 463 464 if (PyObject_CheckBuffer(tmp_subobj)) { 465 *subobj = tmp_subobj; 466 return 1; 467 } 468 469 if (!_PyIndex_Check(tmp_subobj)) { 470 PyErr_Format(PyExc_TypeError, 471 "argument should be integer or bytes-like object, " 472 "not '%.200s'", 473 Py_TYPE(tmp_subobj)->tp_name); 474 return 0; 475 } 476 477 ival = PyNumber_AsSsize_t(tmp_subobj, NULL); 478 if (ival == -1 && PyErr_Occurred()) { 479 return 0; 480 } 481 if (ival < 0 || ival > 255) { 482 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); 483 return 0; 484 } 485 486 *subobj = NULL; 487 *byte = (char)ival; 488 return 1; 489 } 490 491 /* helper macro to fixup start/end slice values */ 492 #define ADJUST_INDICES(start, end, len) \ 493 if (end > len) \ 494 end = len; \ 495 else if (end < 0) { \ 496 end += len; \ 497 if (end < 0) \ 498 end = 0; \ 499 } \ 500 if (start < 0) { \ 501 start += len; \ 502 if (start < 0) \ 503 start = 0; \ 504 } 505 506 Py_LOCAL_INLINE(Py_ssize_t) find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)507 find_internal(const char *str, Py_ssize_t len, 508 const char *function_name, PyObject *args, int dir) 509 { 510 PyObject *subobj; 511 char byte; 512 Py_buffer subbuf; 513 const char *sub; 514 Py_ssize_t sub_len; 515 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 516 Py_ssize_t res; 517 518 if (!parse_args_finds_byte(function_name, args, 519 &subobj, &byte, &start, &end)) 520 return -2; 521 522 if (subobj) { 523 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) 524 return -2; 525 526 sub = subbuf.buf; 527 sub_len = subbuf.len; 528 } 529 else { 530 sub = &byte; 531 sub_len = 1; 532 } 533 534 ADJUST_INDICES(start, end, len); 535 if (end - start < sub_len) 536 res = -1; 537 else if (sub_len == 1) { 538 if (dir > 0) 539 res = stringlib_find_char( 540 str + start, end - start, 541 *sub); 542 else 543 res = stringlib_rfind_char( 544 str + start, end - start, 545 *sub); 546 if (res >= 0) 547 res += start; 548 } 549 else { 550 if (dir > 0) 551 res = stringlib_find_slice( 552 str, len, 553 sub, sub_len, start, end); 554 else 555 res = stringlib_rfind_slice( 556 str, len, 557 sub, sub_len, start, end); 558 } 559 560 if (subobj) 561 PyBuffer_Release(&subbuf); 562 563 return res; 564 } 565 566 PyDoc_STRVAR_shared(_Py_find__doc__, 567 "B.find(sub[, start[, end]]) -> int\n\ 568 \n\ 569 Return the lowest index in B where subsection sub is found,\n\ 570 such that sub is contained within B[start,end]. Optional\n\ 571 arguments start and end are interpreted as in slice notation.\n\ 572 \n\ 573 Return -1 on failure."); 574 575 PyObject * _Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)576 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args) 577 { 578 Py_ssize_t result = find_internal(str, len, "find", args, +1); 579 if (result == -2) 580 return NULL; 581 return PyLong_FromSsize_t(result); 582 } 583 584 PyDoc_STRVAR_shared(_Py_index__doc__, 585 "B.index(sub[, start[, end]]) -> int\n\ 586 \n\ 587 Return the lowest index in B where subsection sub is found,\n\ 588 such that sub is contained within B[start,end]. Optional\n\ 589 arguments start and end are interpreted as in slice notation.\n\ 590 \n\ 591 Raises ValueError when the subsection is not found."); 592 593 PyObject * _Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)594 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args) 595 { 596 Py_ssize_t result = find_internal(str, len, "index", args, +1); 597 if (result == -2) 598 return NULL; 599 if (result == -1) { 600 PyErr_SetString(PyExc_ValueError, 601 "subsection not found"); 602 return NULL; 603 } 604 return PyLong_FromSsize_t(result); 605 } 606 607 PyDoc_STRVAR_shared(_Py_rfind__doc__, 608 "B.rfind(sub[, start[, end]]) -> int\n\ 609 \n\ 610 Return the highest index in B where subsection sub is found,\n\ 611 such that sub is contained within B[start,end]. Optional\n\ 612 arguments start and end are interpreted as in slice notation.\n\ 613 \n\ 614 Return -1 on failure."); 615 616 PyObject * _Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)617 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args) 618 { 619 Py_ssize_t result = find_internal(str, len, "rfind", args, -1); 620 if (result == -2) 621 return NULL; 622 return PyLong_FromSsize_t(result); 623 } 624 625 PyDoc_STRVAR_shared(_Py_rindex__doc__, 626 "B.rindex(sub[, start[, end]]) -> int\n\ 627 \n\ 628 Return the highest index in B where subsection sub is found,\n\ 629 such that sub is contained within B[start,end]. Optional\n\ 630 arguments start and end are interpreted as in slice notation.\n\ 631 \n\ 632 Raise ValueError when the subsection is not found."); 633 634 PyObject * _Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)635 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args) 636 { 637 Py_ssize_t result = find_internal(str, len, "rindex", args, -1); 638 if (result == -2) 639 return NULL; 640 if (result == -1) { 641 PyErr_SetString(PyExc_ValueError, 642 "subsection not found"); 643 return NULL; 644 } 645 return PyLong_FromSsize_t(result); 646 } 647 648 PyDoc_STRVAR_shared(_Py_count__doc__, 649 "B.count(sub[, start[, end]]) -> int\n\ 650 \n\ 651 Return the number of non-overlapping occurrences of subsection sub in\n\ 652 bytes B[start:end]. Optional arguments start and end are interpreted\n\ 653 as in slice notation."); 654 655 PyObject * _Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)656 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args) 657 { 658 PyObject *sub_obj; 659 const char *sub; 660 Py_ssize_t sub_len; 661 char byte; 662 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 663 664 Py_buffer vsub; 665 PyObject *count_obj; 666 667 if (!parse_args_finds_byte("count", args, 668 &sub_obj, &byte, &start, &end)) 669 return NULL; 670 671 if (sub_obj) { 672 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) 673 return NULL; 674 675 sub = vsub.buf; 676 sub_len = vsub.len; 677 } 678 else { 679 sub = &byte; 680 sub_len = 1; 681 } 682 683 ADJUST_INDICES(start, end, len); 684 685 count_obj = PyLong_FromSsize_t( 686 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) 687 ); 688 689 if (sub_obj) 690 PyBuffer_Release(&vsub); 691 692 return count_obj; 693 } 694 695 int _Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)696 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg) 697 { 698 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL); 699 if (ival == -1 && PyErr_Occurred()) { 700 Py_buffer varg; 701 Py_ssize_t pos; 702 PyErr_Clear(); 703 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) 704 return -1; 705 pos = stringlib_find(str, len, 706 varg.buf, varg.len, 0); 707 PyBuffer_Release(&varg); 708 return pos >= 0; 709 } 710 if (ival < 0 || ival >= 256) { 711 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); 712 return -1; 713 } 714 715 return memchr(str, (int) ival, len) != NULL; 716 } 717 718 719 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer 720 * against substr, using the start and end arguments. Returns 721 * -1 on error, 0 if not found and 1 if found. 722 */ 723 static int tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)724 tailmatch(const char *str, Py_ssize_t len, PyObject *substr, 725 Py_ssize_t start, Py_ssize_t end, int direction) 726 { 727 Py_buffer sub_view = {NULL, NULL}; 728 const char *sub; 729 Py_ssize_t slen; 730 731 if (PyBytes_Check(substr)) { 732 sub = PyBytes_AS_STRING(substr); 733 slen = PyBytes_GET_SIZE(substr); 734 } 735 else { 736 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) 737 return -1; 738 sub = sub_view.buf; 739 slen = sub_view.len; 740 } 741 742 ADJUST_INDICES(start, end, len); 743 744 if (direction < 0) { 745 /* startswith */ 746 if (start > len - slen) 747 goto notfound; 748 } else { 749 /* endswith */ 750 if (end - start < slen || start > len) 751 goto notfound; 752 753 if (end - slen > start) 754 start = end - slen; 755 } 756 if (end - start < slen) 757 goto notfound; 758 if (memcmp(str + start, sub, slen) != 0) 759 goto notfound; 760 761 PyBuffer_Release(&sub_view); 762 return 1; 763 764 notfound: 765 PyBuffer_Release(&sub_view); 766 return 0; 767 } 768 769 static PyObject * _Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)770 _Py_bytes_tailmatch(const char *str, Py_ssize_t len, 771 const char *function_name, PyObject *args, 772 int direction) 773 { 774 Py_ssize_t start = 0; 775 Py_ssize_t end = PY_SSIZE_T_MAX; 776 PyObject *subobj; 777 int result; 778 779 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end)) 780 return NULL; 781 if (PyTuple_Check(subobj)) { 782 Py_ssize_t i; 783 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 784 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i), 785 start, end, direction); 786 if (result == -1) 787 return NULL; 788 else if (result) { 789 Py_RETURN_TRUE; 790 } 791 } 792 Py_RETURN_FALSE; 793 } 794 result = tailmatch(str, len, subobj, start, end, direction); 795 if (result == -1) { 796 if (PyErr_ExceptionMatches(PyExc_TypeError)) 797 PyErr_Format(PyExc_TypeError, 798 "%s first arg must be bytes or a tuple of bytes, " 799 "not %s", 800 function_name, Py_TYPE(subobj)->tp_name); 801 return NULL; 802 } 803 else 804 return PyBool_FromLong(result); 805 } 806 807 PyDoc_STRVAR_shared(_Py_startswith__doc__, 808 "B.startswith(prefix[, start[, end]]) -> bool\n\ 809 \n\ 810 Return True if B starts with the specified prefix, False otherwise.\n\ 811 With optional start, test B beginning at that position.\n\ 812 With optional end, stop comparing B at that position.\n\ 813 prefix can also be a tuple of bytes to try."); 814 815 PyObject * _Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)816 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args) 817 { 818 return _Py_bytes_tailmatch(str, len, "startswith", args, -1); 819 } 820 821 PyDoc_STRVAR_shared(_Py_endswith__doc__, 822 "B.endswith(suffix[, start[, end]]) -> bool\n\ 823 \n\ 824 Return True if B ends with the specified suffix, False otherwise.\n\ 825 With optional start, test B beginning at that position.\n\ 826 With optional end, stop comparing B at that position.\n\ 827 suffix can also be a tuple of bytes to try."); 828 829 PyObject * _Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)830 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args) 831 { 832 return _Py_bytes_tailmatch(str, len, "endswith", args, +1); 833 } 834