1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "pycore_abstract.h" // _PyIndex_Check()
4 #include "pycore_bytes_methods.h"
5
6 PyDoc_STRVAR_shared(_Py_isspace__doc__,
7 "B.isspace() -> bool\n\
8 \n\
9 Return True if all characters in B are whitespace\n\
10 and there is at least one character in B, False otherwise.");
11
12 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)13 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
14 {
15 const unsigned char *p
16 = (const unsigned char *) cptr;
17 const unsigned char *e;
18
19 /* Shortcut for single character strings */
20 if (len == 1 && Py_ISSPACE(*p))
21 Py_RETURN_TRUE;
22
23 /* Special case for empty strings */
24 if (len == 0)
25 Py_RETURN_FALSE;
26
27 e = p + len;
28 for (; p < e; p++) {
29 if (!Py_ISSPACE(*p))
30 Py_RETURN_FALSE;
31 }
32 Py_RETURN_TRUE;
33 }
34
35
36 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
37 "B.isalpha() -> bool\n\
38 \n\
39 Return True if all characters in B are alphabetic\n\
40 and there is at least one character in B, False otherwise.");
41
42 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)43 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
44 {
45 const unsigned char *p
46 = (const unsigned char *) cptr;
47 const unsigned char *e;
48
49 /* Shortcut for single character strings */
50 if (len == 1 && Py_ISALPHA(*p))
51 Py_RETURN_TRUE;
52
53 /* Special case for empty strings */
54 if (len == 0)
55 Py_RETURN_FALSE;
56
57 e = p + len;
58 for (; p < e; p++) {
59 if (!Py_ISALPHA(*p))
60 Py_RETURN_FALSE;
61 }
62 Py_RETURN_TRUE;
63 }
64
65
66 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
67 "B.isalnum() -> bool\n\
68 \n\
69 Return True if all characters in B are alphanumeric\n\
70 and there is at least one character in B, False otherwise.");
71
72 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)73 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
74 {
75 const unsigned char *p
76 = (const unsigned char *) cptr;
77 const unsigned char *e;
78
79 /* Shortcut for single character strings */
80 if (len == 1 && Py_ISALNUM(*p))
81 Py_RETURN_TRUE;
82
83 /* Special case for empty strings */
84 if (len == 0)
85 Py_RETURN_FALSE;
86
87 e = p + len;
88 for (; p < e; p++) {
89 if (!Py_ISALNUM(*p))
90 Py_RETURN_FALSE;
91 }
92 Py_RETURN_TRUE;
93 }
94
95
96 PyDoc_STRVAR_shared(_Py_isascii__doc__,
97 "B.isascii() -> bool\n\
98 \n\
99 Return True if B is empty or all characters in B are ASCII,\n\
100 False otherwise.");
101
102 // Optimization is copied from ascii_decode in unicodeobject.c
103 /* Mask to quickly check whether a C 'size_t' contains a
104 non-ASCII, UTF8-encoded char. */
105 #if (SIZEOF_SIZE_T == 8)
106 # define ASCII_CHAR_MASK 0x8080808080808080ULL
107 #elif (SIZEOF_SIZE_T == 4)
108 # define ASCII_CHAR_MASK 0x80808080U
109 #else
110 # error C 'size_t' size should be either 4 or 8!
111 #endif
112
113 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)114 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115 {
116 const char *p = cptr;
117 const char *end = p + len;
118
119 while (p < end) {
120 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121 for an explanation. */
122 if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
123 /* Help allocation */
124 const char *_p = p;
125 while (_p + SIZEOF_SIZE_T <= end) {
126 size_t value = *(const size_t *) _p;
127 if (value & ASCII_CHAR_MASK) {
128 Py_RETURN_FALSE;
129 }
130 _p += SIZEOF_SIZE_T;
131 }
132 p = _p;
133 if (_p == end)
134 break;
135 }
136 if ((unsigned char)*p & 0x80) {
137 Py_RETURN_FALSE;
138 }
139 p++;
140 }
141 Py_RETURN_TRUE;
142 }
143
144 #undef ASCII_CHAR_MASK
145
146
147 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148 "B.isdigit() -> bool\n\
149 \n\
150 Return True if all characters in B are digits\n\
151 and there is at least one character in B, False otherwise.");
152
153 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)154 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155 {
156 const unsigned char *p
157 = (const unsigned char *) cptr;
158 const unsigned char *e;
159
160 /* Shortcut for single character strings */
161 if (len == 1 && Py_ISDIGIT(*p))
162 Py_RETURN_TRUE;
163
164 /* Special case for empty strings */
165 if (len == 0)
166 Py_RETURN_FALSE;
167
168 e = p + len;
169 for (; p < e; p++) {
170 if (!Py_ISDIGIT(*p))
171 Py_RETURN_FALSE;
172 }
173 Py_RETURN_TRUE;
174 }
175
176
177 PyDoc_STRVAR_shared(_Py_islower__doc__,
178 "B.islower() -> bool\n\
179 \n\
180 Return True if all cased characters in B are lowercase and there is\n\
181 at least one cased character in B, False otherwise.");
182
183 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)184 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
185 {
186 const unsigned char *p
187 = (const unsigned char *) cptr;
188 const unsigned char *e;
189 int cased;
190
191 /* Shortcut for single character strings */
192 if (len == 1)
193 return PyBool_FromLong(Py_ISLOWER(*p));
194
195 /* Special case for empty strings */
196 if (len == 0)
197 Py_RETURN_FALSE;
198
199 e = p + len;
200 cased = 0;
201 for (; p < e; p++) {
202 if (Py_ISUPPER(*p))
203 Py_RETURN_FALSE;
204 else if (!cased && Py_ISLOWER(*p))
205 cased = 1;
206 }
207 return PyBool_FromLong(cased);
208 }
209
210
211 PyDoc_STRVAR_shared(_Py_isupper__doc__,
212 "B.isupper() -> bool\n\
213 \n\
214 Return True if all cased characters in B are uppercase and there is\n\
215 at least one cased character in B, False otherwise.");
216
217 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)218 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219 {
220 const unsigned char *p
221 = (const unsigned char *) cptr;
222 const unsigned char *e;
223 int cased;
224
225 /* Shortcut for single character strings */
226 if (len == 1)
227 return PyBool_FromLong(Py_ISUPPER(*p));
228
229 /* Special case for empty strings */
230 if (len == 0)
231 Py_RETURN_FALSE;
232
233 e = p + len;
234 cased = 0;
235 for (; p < e; p++) {
236 if (Py_ISLOWER(*p))
237 Py_RETURN_FALSE;
238 else if (!cased && Py_ISUPPER(*p))
239 cased = 1;
240 }
241 return PyBool_FromLong(cased);
242 }
243
244
245 PyDoc_STRVAR_shared(_Py_istitle__doc__,
246 "B.istitle() -> bool\n\
247 \n\
248 Return True if B is a titlecased string and there is at least one\n\
249 character in B, i.e. uppercase characters may only follow uncased\n\
250 characters and lowercase characters only cased ones. Return False\n\
251 otherwise.");
252
253 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)254 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255 {
256 const unsigned char *p
257 = (const unsigned char *) cptr;
258 const unsigned char *e;
259 int cased, previous_is_cased;
260
261 /* Shortcut for single character strings */
262 if (len == 1)
263 return PyBool_FromLong(Py_ISUPPER(*p));
264
265 /* Special case for empty strings */
266 if (len == 0)
267 Py_RETURN_FALSE;
268
269 e = p + len;
270 cased = 0;
271 previous_is_cased = 0;
272 for (; p < e; p++) {
273 const unsigned char ch = *p;
274
275 if (Py_ISUPPER(ch)) {
276 if (previous_is_cased)
277 Py_RETURN_FALSE;
278 previous_is_cased = 1;
279 cased = 1;
280 }
281 else if (Py_ISLOWER(ch)) {
282 if (!previous_is_cased)
283 Py_RETURN_FALSE;
284 previous_is_cased = 1;
285 cased = 1;
286 }
287 else
288 previous_is_cased = 0;
289 }
290 return PyBool_FromLong(cased);
291 }
292
293
294 PyDoc_STRVAR_shared(_Py_lower__doc__,
295 "B.lower() -> copy of B\n\
296 \n\
297 Return a copy of B with all ASCII characters converted to lowercase.");
298
299 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)300 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301 {
302 Py_ssize_t i;
303
304 for (i = 0; i < len; i++) {
305 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
306 }
307 }
308
309
310 PyDoc_STRVAR_shared(_Py_upper__doc__,
311 "B.upper() -> copy of B\n\
312 \n\
313 Return a copy of B with all ASCII characters converted to uppercase.");
314
315 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)316 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317 {
318 Py_ssize_t i;
319
320 for (i = 0; i < len; i++) {
321 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
322 }
323 }
324
325
326 PyDoc_STRVAR_shared(_Py_title__doc__,
327 "B.title() -> copy of B\n\
328 \n\
329 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330 characters, all remaining cased characters have lowercase.");
331
332 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)333 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
334 {
335 Py_ssize_t i;
336 int previous_is_cased = 0;
337
338 for (i = 0; i < len; i++) {
339 int c = Py_CHARMASK(*s++);
340 if (Py_ISLOWER(c)) {
341 if (!previous_is_cased)
342 c = Py_TOUPPER(c);
343 previous_is_cased = 1;
344 } else if (Py_ISUPPER(c)) {
345 if (previous_is_cased)
346 c = Py_TOLOWER(c);
347 previous_is_cased = 1;
348 } else
349 previous_is_cased = 0;
350 *result++ = c;
351 }
352 }
353
354
355 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356 "B.capitalize() -> copy of B\n\
357 \n\
358 Return a copy of B with only its first character capitalized (ASCII)\n\
359 and the rest lower-cased.");
360
361 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)362 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
363 {
364 if (len > 0) {
365 *result = Py_TOUPPER(*s);
366 _Py_bytes_lower(result + 1, s + 1, len - 1);
367 }
368 }
369
370
371 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
372 "B.swapcase() -> copy of B\n\
373 \n\
374 Return a copy of B with uppercase ASCII characters converted\n\
375 to lowercase ASCII and vice versa.");
376
377 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)378 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
379 {
380 Py_ssize_t i;
381
382 for (i = 0; i < len; i++) {
383 int c = Py_CHARMASK(*s++);
384 if (Py_ISLOWER(c)) {
385 *result = Py_TOUPPER(c);
386 }
387 else if (Py_ISUPPER(c)) {
388 *result = Py_TOLOWER(c);
389 }
390 else
391 *result = c;
392 result++;
393 }
394 }
395
396
397 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
398 "B.maketrans(frm, to) -> translation table\n\
399 \n\
400 Return a translation table (a bytes object of length 256) suitable\n\
401 for use in the bytes or bytearray translate method where each byte\n\
402 in frm is mapped to the byte at the same position in to.\n\
403 The bytes objects frm and to must be of the same length.");
404
405 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)406 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
407 {
408 PyObject *res = NULL;
409 Py_ssize_t i;
410 char *p;
411
412 if (frm->len != to->len) {
413 PyErr_Format(PyExc_ValueError,
414 "maketrans arguments must have same length");
415 return NULL;
416 }
417 res = PyBytes_FromStringAndSize(NULL, 256);
418 if (!res)
419 return NULL;
420 p = PyBytes_AS_STRING(res);
421 for (i = 0; i < 256; i++)
422 p[i] = (char) i;
423 for (i = 0; i < frm->len; i++) {
424 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
425 }
426
427 return res;
428 }
429
430 #define FASTSEARCH fastsearch
431 #define STRINGLIB(F) stringlib_##F
432 #define STRINGLIB_CHAR char
433 #define STRINGLIB_SIZEOF_CHAR 1
434
435 #include "stringlib/fastsearch.h"
436 #include "stringlib/count.h"
437 #include "stringlib/find.h"
438
439 /*
440 Wraps stringlib_parse_args_finds() and additionally checks the first
441 argument type.
442
443 In case the first argument is a bytes-like object, sets it to subobj,
444 and doesn't touch the byte parameter.
445 In case it is an integer in range(0, 256), writes the integer value
446 to byte, and sets subobj to NULL.
447
448 The other parameters are similar to those of
449 stringlib_parse_args_finds().
450 */
451
452 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)453 parse_args_finds_byte(const char *function_name, PyObject *args,
454 PyObject **subobj, char *byte,
455 Py_ssize_t *start, Py_ssize_t *end)
456 {
457 PyObject *tmp_subobj;
458 Py_ssize_t ival;
459
460 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
461 start, end))
462 return 0;
463
464 if (PyObject_CheckBuffer(tmp_subobj)) {
465 *subobj = tmp_subobj;
466 return 1;
467 }
468
469 if (!_PyIndex_Check(tmp_subobj)) {
470 PyErr_Format(PyExc_TypeError,
471 "argument should be integer or bytes-like object, "
472 "not '%.200s'",
473 Py_TYPE(tmp_subobj)->tp_name);
474 return 0;
475 }
476
477 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
478 if (ival == -1 && PyErr_Occurred()) {
479 return 0;
480 }
481 if (ival < 0 || ival > 255) {
482 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
483 return 0;
484 }
485
486 *subobj = NULL;
487 *byte = (char)ival;
488 return 1;
489 }
490
491 /* helper macro to fixup start/end slice values */
492 #define ADJUST_INDICES(start, end, len) \
493 if (end > len) \
494 end = len; \
495 else if (end < 0) { \
496 end += len; \
497 if (end < 0) \
498 end = 0; \
499 } \
500 if (start < 0) { \
501 start += len; \
502 if (start < 0) \
503 start = 0; \
504 }
505
506 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)507 find_internal(const char *str, Py_ssize_t len,
508 const char *function_name, PyObject *args, int dir)
509 {
510 PyObject *subobj;
511 char byte;
512 Py_buffer subbuf;
513 const char *sub;
514 Py_ssize_t sub_len;
515 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
516 Py_ssize_t res;
517
518 if (!parse_args_finds_byte(function_name, args,
519 &subobj, &byte, &start, &end))
520 return -2;
521
522 if (subobj) {
523 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
524 return -2;
525
526 sub = subbuf.buf;
527 sub_len = subbuf.len;
528 }
529 else {
530 sub = &byte;
531 sub_len = 1;
532 }
533
534 ADJUST_INDICES(start, end, len);
535 if (end - start < sub_len)
536 res = -1;
537 else if (sub_len == 1) {
538 if (dir > 0)
539 res = stringlib_find_char(
540 str + start, end - start,
541 *sub);
542 else
543 res = stringlib_rfind_char(
544 str + start, end - start,
545 *sub);
546 if (res >= 0)
547 res += start;
548 }
549 else {
550 if (dir > 0)
551 res = stringlib_find_slice(
552 str, len,
553 sub, sub_len, start, end);
554 else
555 res = stringlib_rfind_slice(
556 str, len,
557 sub, sub_len, start, end);
558 }
559
560 if (subobj)
561 PyBuffer_Release(&subbuf);
562
563 return res;
564 }
565
566 PyDoc_STRVAR_shared(_Py_find__doc__,
567 "B.find(sub[, start[, end]]) -> int\n\
568 \n\
569 Return the lowest index in B where subsection sub is found,\n\
570 such that sub is contained within B[start,end]. Optional\n\
571 arguments start and end are interpreted as in slice notation.\n\
572 \n\
573 Return -1 on failure.");
574
575 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)576 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
577 {
578 Py_ssize_t result = find_internal(str, len, "find", args, +1);
579 if (result == -2)
580 return NULL;
581 return PyLong_FromSsize_t(result);
582 }
583
584 PyDoc_STRVAR_shared(_Py_index__doc__,
585 "B.index(sub[, start[, end]]) -> int\n\
586 \n\
587 Return the lowest index in B where subsection sub is found,\n\
588 such that sub is contained within B[start,end]. Optional\n\
589 arguments start and end are interpreted as in slice notation.\n\
590 \n\
591 Raises ValueError when the subsection is not found.");
592
593 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)594 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
595 {
596 Py_ssize_t result = find_internal(str, len, "index", args, +1);
597 if (result == -2)
598 return NULL;
599 if (result == -1) {
600 PyErr_SetString(PyExc_ValueError,
601 "subsection not found");
602 return NULL;
603 }
604 return PyLong_FromSsize_t(result);
605 }
606
607 PyDoc_STRVAR_shared(_Py_rfind__doc__,
608 "B.rfind(sub[, start[, end]]) -> int\n\
609 \n\
610 Return the highest index in B where subsection sub is found,\n\
611 such that sub is contained within B[start,end]. Optional\n\
612 arguments start and end are interpreted as in slice notation.\n\
613 \n\
614 Return -1 on failure.");
615
616 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)617 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
618 {
619 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
620 if (result == -2)
621 return NULL;
622 return PyLong_FromSsize_t(result);
623 }
624
625 PyDoc_STRVAR_shared(_Py_rindex__doc__,
626 "B.rindex(sub[, start[, end]]) -> int\n\
627 \n\
628 Return the highest index in B where subsection sub is found,\n\
629 such that sub is contained within B[start,end]. Optional\n\
630 arguments start and end are interpreted as in slice notation.\n\
631 \n\
632 Raise ValueError when the subsection is not found.");
633
634 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)635 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
636 {
637 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
638 if (result == -2)
639 return NULL;
640 if (result == -1) {
641 PyErr_SetString(PyExc_ValueError,
642 "subsection not found");
643 return NULL;
644 }
645 return PyLong_FromSsize_t(result);
646 }
647
648 PyDoc_STRVAR_shared(_Py_count__doc__,
649 "B.count(sub[, start[, end]]) -> int\n\
650 \n\
651 Return the number of non-overlapping occurrences of subsection sub in\n\
652 bytes B[start:end]. Optional arguments start and end are interpreted\n\
653 as in slice notation.");
654
655 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)656 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
657 {
658 PyObject *sub_obj;
659 const char *sub;
660 Py_ssize_t sub_len;
661 char byte;
662 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
663
664 Py_buffer vsub;
665 PyObject *count_obj;
666
667 if (!parse_args_finds_byte("count", args,
668 &sub_obj, &byte, &start, &end))
669 return NULL;
670
671 if (sub_obj) {
672 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
673 return NULL;
674
675 sub = vsub.buf;
676 sub_len = vsub.len;
677 }
678 else {
679 sub = &byte;
680 sub_len = 1;
681 }
682
683 ADJUST_INDICES(start, end, len);
684
685 count_obj = PyLong_FromSsize_t(
686 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
687 );
688
689 if (sub_obj)
690 PyBuffer_Release(&vsub);
691
692 return count_obj;
693 }
694
695 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)696 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
697 {
698 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
699 if (ival == -1 && PyErr_Occurred()) {
700 Py_buffer varg;
701 Py_ssize_t pos;
702 PyErr_Clear();
703 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
704 return -1;
705 pos = stringlib_find(str, len,
706 varg.buf, varg.len, 0);
707 PyBuffer_Release(&varg);
708 return pos >= 0;
709 }
710 if (ival < 0 || ival >= 256) {
711 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
712 return -1;
713 }
714
715 return memchr(str, (int) ival, len) != NULL;
716 }
717
718
719 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
720 * against substr, using the start and end arguments. Returns
721 * -1 on error, 0 if not found and 1 if found.
722 */
723 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)724 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
725 Py_ssize_t start, Py_ssize_t end, int direction)
726 {
727 Py_buffer sub_view = {NULL, NULL};
728 const char *sub;
729 Py_ssize_t slen;
730
731 if (PyBytes_Check(substr)) {
732 sub = PyBytes_AS_STRING(substr);
733 slen = PyBytes_GET_SIZE(substr);
734 }
735 else {
736 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
737 return -1;
738 sub = sub_view.buf;
739 slen = sub_view.len;
740 }
741
742 ADJUST_INDICES(start, end, len);
743
744 if (direction < 0) {
745 /* startswith */
746 if (start > len - slen)
747 goto notfound;
748 } else {
749 /* endswith */
750 if (end - start < slen || start > len)
751 goto notfound;
752
753 if (end - slen > start)
754 start = end - slen;
755 }
756 if (end - start < slen)
757 goto notfound;
758 if (memcmp(str + start, sub, slen) != 0)
759 goto notfound;
760
761 PyBuffer_Release(&sub_view);
762 return 1;
763
764 notfound:
765 PyBuffer_Release(&sub_view);
766 return 0;
767 }
768
769 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)770 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
771 const char *function_name, PyObject *args,
772 int direction)
773 {
774 Py_ssize_t start = 0;
775 Py_ssize_t end = PY_SSIZE_T_MAX;
776 PyObject *subobj;
777 int result;
778
779 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
780 return NULL;
781 if (PyTuple_Check(subobj)) {
782 Py_ssize_t i;
783 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
784 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
785 start, end, direction);
786 if (result == -1)
787 return NULL;
788 else if (result) {
789 Py_RETURN_TRUE;
790 }
791 }
792 Py_RETURN_FALSE;
793 }
794 result = tailmatch(str, len, subobj, start, end, direction);
795 if (result == -1) {
796 if (PyErr_ExceptionMatches(PyExc_TypeError))
797 PyErr_Format(PyExc_TypeError,
798 "%s first arg must be bytes or a tuple of bytes, "
799 "not %s",
800 function_name, Py_TYPE(subobj)->tp_name);
801 return NULL;
802 }
803 else
804 return PyBool_FromLong(result);
805 }
806
807 PyDoc_STRVAR_shared(_Py_startswith__doc__,
808 "B.startswith(prefix[, start[, end]]) -> bool\n\
809 \n\
810 Return True if B starts with the specified prefix, False otherwise.\n\
811 With optional start, test B beginning at that position.\n\
812 With optional end, stop comparing B at that position.\n\
813 prefix can also be a tuple of bytes to try.");
814
815 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)816 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
817 {
818 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
819 }
820
821 PyDoc_STRVAR_shared(_Py_endswith__doc__,
822 "B.endswith(suffix[, start[, end]]) -> bool\n\
823 \n\
824 Return True if B ends with the specified suffix, False otherwise.\n\
825 With optional start, test B beginning at that position.\n\
826 With optional end, stop comparing B at that position.\n\
827 suffix can also be a tuple of bytes to try.");
828
829 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)830 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
831 {
832 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
833 }
834