1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "pycore_abstract.h" // _PyIndex_Check()
4 #include "pycore_bytes_methods.h"
5
6 PyDoc_STRVAR_shared(_Py_isspace__doc__,
7 "B.isspace() -> bool\n\
8 \n\
9 Return True if all characters in B are whitespace\n\
10 and there is at least one character in B, False otherwise.");
11
12 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)13 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
14 {
15 const unsigned char *p
16 = (const unsigned char *) cptr;
17 const unsigned char *e;
18
19 /* Shortcut for single character strings */
20 if (len == 1 && Py_ISSPACE(*p))
21 Py_RETURN_TRUE;
22
23 /* Special case for empty strings */
24 if (len == 0)
25 Py_RETURN_FALSE;
26
27 e = p + len;
28 for (; p < e; p++) {
29 if (!Py_ISSPACE(*p))
30 Py_RETURN_FALSE;
31 }
32 Py_RETURN_TRUE;
33 }
34
35
36 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
37 "B.isalpha() -> bool\n\
38 \n\
39 Return True if all characters in B are alphabetic\n\
40 and there is at least one character in B, False otherwise.");
41
42 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)43 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
44 {
45 const unsigned char *p
46 = (const unsigned char *) cptr;
47 const unsigned char *e;
48
49 /* Shortcut for single character strings */
50 if (len == 1 && Py_ISALPHA(*p))
51 Py_RETURN_TRUE;
52
53 /* Special case for empty strings */
54 if (len == 0)
55 Py_RETURN_FALSE;
56
57 e = p + len;
58 for (; p < e; p++) {
59 if (!Py_ISALPHA(*p))
60 Py_RETURN_FALSE;
61 }
62 Py_RETURN_TRUE;
63 }
64
65
66 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
67 "B.isalnum() -> bool\n\
68 \n\
69 Return True if all characters in B are alphanumeric\n\
70 and there is at least one character in B, False otherwise.");
71
72 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)73 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
74 {
75 const unsigned char *p
76 = (const unsigned char *) cptr;
77 const unsigned char *e;
78
79 /* Shortcut for single character strings */
80 if (len == 1 && Py_ISALNUM(*p))
81 Py_RETURN_TRUE;
82
83 /* Special case for empty strings */
84 if (len == 0)
85 Py_RETURN_FALSE;
86
87 e = p + len;
88 for (; p < e; p++) {
89 if (!Py_ISALNUM(*p))
90 Py_RETURN_FALSE;
91 }
92 Py_RETURN_TRUE;
93 }
94
95
96 PyDoc_STRVAR_shared(_Py_isascii__doc__,
97 "B.isascii() -> bool\n\
98 \n\
99 Return True if B is empty or all characters in B are ASCII,\n\
100 False otherwise.");
101
102 // Optimization is copied from ascii_decode in unicodeobject.c
103 /* Mask to quickly check whether a C 'long' contains a
104 non-ASCII, UTF8-encoded char. */
105 #if (SIZEOF_LONG == 8)
106 # define ASCII_CHAR_MASK 0x8080808080808080UL
107 #elif (SIZEOF_LONG == 4)
108 # define ASCII_CHAR_MASK 0x80808080UL
109 #else
110 # error C 'long' size should be either 4 or 8!
111 #endif
112
113 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)114 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115 {
116 const char *p = cptr;
117 const char *end = p + len;
118 const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
119
120 while (p < end) {
121 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122 for an explanation. */
123 if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
124 /* Help allocation */
125 const char *_p = p;
126 while (_p < aligned_end) {
127 unsigned long value = *(const unsigned long *) _p;
128 if (value & ASCII_CHAR_MASK) {
129 Py_RETURN_FALSE;
130 }
131 _p += SIZEOF_LONG;
132 }
133 p = _p;
134 if (_p == end)
135 break;
136 }
137 if ((unsigned char)*p & 0x80) {
138 Py_RETURN_FALSE;
139 }
140 p++;
141 }
142 Py_RETURN_TRUE;
143 }
144
145 #undef ASCII_CHAR_MASK
146
147
148 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
149 "B.isdigit() -> bool\n\
150 \n\
151 Return True if all characters in B are digits\n\
152 and there is at least one character in B, False otherwise.");
153
154 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)155 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
156 {
157 const unsigned char *p
158 = (const unsigned char *) cptr;
159 const unsigned char *e;
160
161 /* Shortcut for single character strings */
162 if (len == 1 && Py_ISDIGIT(*p))
163 Py_RETURN_TRUE;
164
165 /* Special case for empty strings */
166 if (len == 0)
167 Py_RETURN_FALSE;
168
169 e = p + len;
170 for (; p < e; p++) {
171 if (!Py_ISDIGIT(*p))
172 Py_RETURN_FALSE;
173 }
174 Py_RETURN_TRUE;
175 }
176
177
178 PyDoc_STRVAR_shared(_Py_islower__doc__,
179 "B.islower() -> bool\n\
180 \n\
181 Return True if all cased characters in B are lowercase and there is\n\
182 at least one cased character in B, False otherwise.");
183
184 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)185 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
186 {
187 const unsigned char *p
188 = (const unsigned char *) cptr;
189 const unsigned char *e;
190 int cased;
191
192 /* Shortcut for single character strings */
193 if (len == 1)
194 return PyBool_FromLong(Py_ISLOWER(*p));
195
196 /* Special case for empty strings */
197 if (len == 0)
198 Py_RETURN_FALSE;
199
200 e = p + len;
201 cased = 0;
202 for (; p < e; p++) {
203 if (Py_ISUPPER(*p))
204 Py_RETURN_FALSE;
205 else if (!cased && Py_ISLOWER(*p))
206 cased = 1;
207 }
208 return PyBool_FromLong(cased);
209 }
210
211
212 PyDoc_STRVAR_shared(_Py_isupper__doc__,
213 "B.isupper() -> bool\n\
214 \n\
215 Return True if all cased characters in B are uppercase and there is\n\
216 at least one cased character in B, False otherwise.");
217
218 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)219 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
220 {
221 const unsigned char *p
222 = (const unsigned char *) cptr;
223 const unsigned char *e;
224 int cased;
225
226 /* Shortcut for single character strings */
227 if (len == 1)
228 return PyBool_FromLong(Py_ISUPPER(*p));
229
230 /* Special case for empty strings */
231 if (len == 0)
232 Py_RETURN_FALSE;
233
234 e = p + len;
235 cased = 0;
236 for (; p < e; p++) {
237 if (Py_ISLOWER(*p))
238 Py_RETURN_FALSE;
239 else if (!cased && Py_ISUPPER(*p))
240 cased = 1;
241 }
242 return PyBool_FromLong(cased);
243 }
244
245
246 PyDoc_STRVAR_shared(_Py_istitle__doc__,
247 "B.istitle() -> bool\n\
248 \n\
249 Return True if B is a titlecased string and there is at least one\n\
250 character in B, i.e. uppercase characters may only follow uncased\n\
251 characters and lowercase characters only cased ones. Return False\n\
252 otherwise.");
253
254 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)255 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
256 {
257 const unsigned char *p
258 = (const unsigned char *) cptr;
259 const unsigned char *e;
260 int cased, previous_is_cased;
261
262 /* Shortcut for single character strings */
263 if (len == 1)
264 return PyBool_FromLong(Py_ISUPPER(*p));
265
266 /* Special case for empty strings */
267 if (len == 0)
268 Py_RETURN_FALSE;
269
270 e = p + len;
271 cased = 0;
272 previous_is_cased = 0;
273 for (; p < e; p++) {
274 const unsigned char ch = *p;
275
276 if (Py_ISUPPER(ch)) {
277 if (previous_is_cased)
278 Py_RETURN_FALSE;
279 previous_is_cased = 1;
280 cased = 1;
281 }
282 else if (Py_ISLOWER(ch)) {
283 if (!previous_is_cased)
284 Py_RETURN_FALSE;
285 previous_is_cased = 1;
286 cased = 1;
287 }
288 else
289 previous_is_cased = 0;
290 }
291 return PyBool_FromLong(cased);
292 }
293
294
295 PyDoc_STRVAR_shared(_Py_lower__doc__,
296 "B.lower() -> copy of B\n\
297 \n\
298 Return a copy of B with all ASCII characters converted to lowercase.");
299
300 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)301 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
302 {
303 Py_ssize_t i;
304
305 for (i = 0; i < len; i++) {
306 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
307 }
308 }
309
310
311 PyDoc_STRVAR_shared(_Py_upper__doc__,
312 "B.upper() -> copy of B\n\
313 \n\
314 Return a copy of B with all ASCII characters converted to uppercase.");
315
316 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)317 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
318 {
319 Py_ssize_t i;
320
321 for (i = 0; i < len; i++) {
322 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
323 }
324 }
325
326
327 PyDoc_STRVAR_shared(_Py_title__doc__,
328 "B.title() -> copy of B\n\
329 \n\
330 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
331 characters, all remaining cased characters have lowercase.");
332
333 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)334 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
335 {
336 Py_ssize_t i;
337 int previous_is_cased = 0;
338
339 for (i = 0; i < len; i++) {
340 int c = Py_CHARMASK(*s++);
341 if (Py_ISLOWER(c)) {
342 if (!previous_is_cased)
343 c = Py_TOUPPER(c);
344 previous_is_cased = 1;
345 } else if (Py_ISUPPER(c)) {
346 if (previous_is_cased)
347 c = Py_TOLOWER(c);
348 previous_is_cased = 1;
349 } else
350 previous_is_cased = 0;
351 *result++ = c;
352 }
353 }
354
355
356 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
357 "B.capitalize() -> copy of B\n\
358 \n\
359 Return a copy of B with only its first character capitalized (ASCII)\n\
360 and the rest lower-cased.");
361
362 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)363 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
364 {
365 if (len > 0) {
366 *result = Py_TOUPPER(*s);
367 _Py_bytes_lower(result + 1, s + 1, len - 1);
368 }
369 }
370
371
372 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
373 "B.swapcase() -> copy of B\n\
374 \n\
375 Return a copy of B with uppercase ASCII characters converted\n\
376 to lowercase ASCII and vice versa.");
377
378 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)379 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
380 {
381 Py_ssize_t i;
382
383 for (i = 0; i < len; i++) {
384 int c = Py_CHARMASK(*s++);
385 if (Py_ISLOWER(c)) {
386 *result = Py_TOUPPER(c);
387 }
388 else if (Py_ISUPPER(c)) {
389 *result = Py_TOLOWER(c);
390 }
391 else
392 *result = c;
393 result++;
394 }
395 }
396
397
398 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
399 "B.maketrans(frm, to) -> translation table\n\
400 \n\
401 Return a translation table (a bytes object of length 256) suitable\n\
402 for use in the bytes or bytearray translate method where each byte\n\
403 in frm is mapped to the byte at the same position in to.\n\
404 The bytes objects frm and to must be of the same length.");
405
406 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)407 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
408 {
409 PyObject *res = NULL;
410 Py_ssize_t i;
411 char *p;
412
413 if (frm->len != to->len) {
414 PyErr_Format(PyExc_ValueError,
415 "maketrans arguments must have same length");
416 return NULL;
417 }
418 res = PyBytes_FromStringAndSize(NULL, 256);
419 if (!res)
420 return NULL;
421 p = PyBytes_AS_STRING(res);
422 for (i = 0; i < 256; i++)
423 p[i] = (char) i;
424 for (i = 0; i < frm->len; i++) {
425 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
426 }
427
428 return res;
429 }
430
431 #define FASTSEARCH fastsearch
432 #define STRINGLIB(F) stringlib_##F
433 #define STRINGLIB_CHAR char
434 #define STRINGLIB_SIZEOF_CHAR 1
435
436 #include "stringlib/fastsearch.h"
437 #include "stringlib/count.h"
438 #include "stringlib/find.h"
439
440 /*
441 Wraps stringlib_parse_args_finds() and additionally checks the first
442 argument type.
443
444 In case the first argument is a bytes-like object, sets it to subobj,
445 and doesn't touch the byte parameter.
446 In case it is an integer in range(0, 256), writes the integer value
447 to byte, and sets subobj to NULL.
448
449 The other parameters are similar to those of
450 stringlib_parse_args_finds().
451 */
452
453 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)454 parse_args_finds_byte(const char *function_name, PyObject *args,
455 PyObject **subobj, char *byte,
456 Py_ssize_t *start, Py_ssize_t *end)
457 {
458 PyObject *tmp_subobj;
459 Py_ssize_t ival;
460
461 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
462 start, end))
463 return 0;
464
465 if (PyObject_CheckBuffer(tmp_subobj)) {
466 *subobj = tmp_subobj;
467 return 1;
468 }
469
470 if (!_PyIndex_Check(tmp_subobj)) {
471 PyErr_Format(PyExc_TypeError,
472 "argument should be integer or bytes-like object, "
473 "not '%.200s'",
474 Py_TYPE(tmp_subobj)->tp_name);
475 return 0;
476 }
477
478 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
479 if (ival == -1 && PyErr_Occurred()) {
480 return 0;
481 }
482 if (ival < 0 || ival > 255) {
483 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
484 return 0;
485 }
486
487 *subobj = NULL;
488 *byte = (char)ival;
489 return 1;
490 }
491
492 /* helper macro to fixup start/end slice values */
493 #define ADJUST_INDICES(start, end, len) \
494 if (end > len) \
495 end = len; \
496 else if (end < 0) { \
497 end += len; \
498 if (end < 0) \
499 end = 0; \
500 } \
501 if (start < 0) { \
502 start += len; \
503 if (start < 0) \
504 start = 0; \
505 }
506
507 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)508 find_internal(const char *str, Py_ssize_t len,
509 const char *function_name, PyObject *args, int dir)
510 {
511 PyObject *subobj;
512 char byte;
513 Py_buffer subbuf;
514 const char *sub;
515 Py_ssize_t sub_len;
516 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
517 Py_ssize_t res;
518
519 if (!parse_args_finds_byte(function_name, args,
520 &subobj, &byte, &start, &end))
521 return -2;
522
523 if (subobj) {
524 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
525 return -2;
526
527 sub = subbuf.buf;
528 sub_len = subbuf.len;
529 }
530 else {
531 sub = &byte;
532 sub_len = 1;
533 }
534
535 ADJUST_INDICES(start, end, len);
536 if (end - start < sub_len)
537 res = -1;
538 else if (sub_len == 1) {
539 if (dir > 0)
540 res = stringlib_find_char(
541 str + start, end - start,
542 *sub);
543 else
544 res = stringlib_rfind_char(
545 str + start, end - start,
546 *sub);
547 if (res >= 0)
548 res += start;
549 }
550 else {
551 if (dir > 0)
552 res = stringlib_find_slice(
553 str, len,
554 sub, sub_len, start, end);
555 else
556 res = stringlib_rfind_slice(
557 str, len,
558 sub, sub_len, start, end);
559 }
560
561 if (subobj)
562 PyBuffer_Release(&subbuf);
563
564 return res;
565 }
566
567 PyDoc_STRVAR_shared(_Py_find__doc__,
568 "B.find(sub[, start[, end]]) -> int\n\
569 \n\
570 Return the lowest index in B where subsection sub is found,\n\
571 such that sub is contained within B[start,end]. Optional\n\
572 arguments start and end are interpreted as in slice notation.\n\
573 \n\
574 Return -1 on failure.");
575
576 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)577 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
578 {
579 Py_ssize_t result = find_internal(str, len, "find", args, +1);
580 if (result == -2)
581 return NULL;
582 return PyLong_FromSsize_t(result);
583 }
584
585 PyDoc_STRVAR_shared(_Py_index__doc__,
586 "B.index(sub[, start[, end]]) -> int\n\
587 \n\
588 Return the lowest index in B where subsection sub is found,\n\
589 such that sub is contained within B[start,end]. Optional\n\
590 arguments start and end are interpreted as in slice notation.\n\
591 \n\
592 Raises ValueError when the subsection is not found.");
593
594 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)595 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
596 {
597 Py_ssize_t result = find_internal(str, len, "index", args, +1);
598 if (result == -2)
599 return NULL;
600 if (result == -1) {
601 PyErr_SetString(PyExc_ValueError,
602 "subsection not found");
603 return NULL;
604 }
605 return PyLong_FromSsize_t(result);
606 }
607
608 PyDoc_STRVAR_shared(_Py_rfind__doc__,
609 "B.rfind(sub[, start[, end]]) -> int\n\
610 \n\
611 Return the highest index in B where subsection sub is found,\n\
612 such that sub is contained within B[start,end]. Optional\n\
613 arguments start and end are interpreted as in slice notation.\n\
614 \n\
615 Return -1 on failure.");
616
617 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)618 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
619 {
620 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
621 if (result == -2)
622 return NULL;
623 return PyLong_FromSsize_t(result);
624 }
625
626 PyDoc_STRVAR_shared(_Py_rindex__doc__,
627 "B.rindex(sub[, start[, end]]) -> int\n\
628 \n\
629 Return the highest index in B where subsection sub is found,\n\
630 such that sub is contained within B[start,end]. Optional\n\
631 arguments start and end are interpreted as in slice notation.\n\
632 \n\
633 Raise ValueError when the subsection is not found.");
634
635 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)636 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
637 {
638 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
639 if (result == -2)
640 return NULL;
641 if (result == -1) {
642 PyErr_SetString(PyExc_ValueError,
643 "subsection not found");
644 return NULL;
645 }
646 return PyLong_FromSsize_t(result);
647 }
648
649 PyDoc_STRVAR_shared(_Py_count__doc__,
650 "B.count(sub[, start[, end]]) -> int\n\
651 \n\
652 Return the number of non-overlapping occurrences of subsection sub in\n\
653 bytes B[start:end]. Optional arguments start and end are interpreted\n\
654 as in slice notation.");
655
656 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)657 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
658 {
659 PyObject *sub_obj;
660 const char *sub;
661 Py_ssize_t sub_len;
662 char byte;
663 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
664
665 Py_buffer vsub;
666 PyObject *count_obj;
667
668 if (!parse_args_finds_byte("count", args,
669 &sub_obj, &byte, &start, &end))
670 return NULL;
671
672 if (sub_obj) {
673 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
674 return NULL;
675
676 sub = vsub.buf;
677 sub_len = vsub.len;
678 }
679 else {
680 sub = &byte;
681 sub_len = 1;
682 }
683
684 ADJUST_INDICES(start, end, len);
685
686 count_obj = PyLong_FromSsize_t(
687 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
688 );
689
690 if (sub_obj)
691 PyBuffer_Release(&vsub);
692
693 return count_obj;
694 }
695
696 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)697 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
698 {
699 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
700 if (ival == -1 && PyErr_Occurred()) {
701 Py_buffer varg;
702 Py_ssize_t pos;
703 PyErr_Clear();
704 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
705 return -1;
706 pos = stringlib_find(str, len,
707 varg.buf, varg.len, 0);
708 PyBuffer_Release(&varg);
709 return pos >= 0;
710 }
711 if (ival < 0 || ival >= 256) {
712 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
713 return -1;
714 }
715
716 return memchr(str, (int) ival, len) != NULL;
717 }
718
719
720 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
721 * against substr, using the start and end arguments. Returns
722 * -1 on error, 0 if not found and 1 if found.
723 */
724 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)725 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
726 Py_ssize_t start, Py_ssize_t end, int direction)
727 {
728 Py_buffer sub_view = {NULL, NULL};
729 const char *sub;
730 Py_ssize_t slen;
731
732 if (PyBytes_Check(substr)) {
733 sub = PyBytes_AS_STRING(substr);
734 slen = PyBytes_GET_SIZE(substr);
735 }
736 else {
737 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
738 return -1;
739 sub = sub_view.buf;
740 slen = sub_view.len;
741 }
742
743 ADJUST_INDICES(start, end, len);
744
745 if (direction < 0) {
746 /* startswith */
747 if (start > len - slen)
748 goto notfound;
749 } else {
750 /* endswith */
751 if (end - start < slen || start > len)
752 goto notfound;
753
754 if (end - slen > start)
755 start = end - slen;
756 }
757 if (end - start < slen)
758 goto notfound;
759 if (memcmp(str + start, sub, slen) != 0)
760 goto notfound;
761
762 PyBuffer_Release(&sub_view);
763 return 1;
764
765 notfound:
766 PyBuffer_Release(&sub_view);
767 return 0;
768 }
769
770 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)771 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
772 const char *function_name, PyObject *args,
773 int direction)
774 {
775 Py_ssize_t start = 0;
776 Py_ssize_t end = PY_SSIZE_T_MAX;
777 PyObject *subobj;
778 int result;
779
780 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
781 return NULL;
782 if (PyTuple_Check(subobj)) {
783 Py_ssize_t i;
784 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
785 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
786 start, end, direction);
787 if (result == -1)
788 return NULL;
789 else if (result) {
790 Py_RETURN_TRUE;
791 }
792 }
793 Py_RETURN_FALSE;
794 }
795 result = tailmatch(str, len, subobj, start, end, direction);
796 if (result == -1) {
797 if (PyErr_ExceptionMatches(PyExc_TypeError))
798 PyErr_Format(PyExc_TypeError,
799 "%s first arg must be bytes or a tuple of bytes, "
800 "not %s",
801 function_name, Py_TYPE(subobj)->tp_name);
802 return NULL;
803 }
804 else
805 return PyBool_FromLong(result);
806 }
807
808 PyDoc_STRVAR_shared(_Py_startswith__doc__,
809 "B.startswith(prefix[, start[, end]]) -> bool\n\
810 \n\
811 Return True if B starts with the specified prefix, False otherwise.\n\
812 With optional start, test B beginning at that position.\n\
813 With optional end, stop comparing B at that position.\n\
814 prefix can also be a tuple of bytes to try.");
815
816 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)817 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
818 {
819 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
820 }
821
822 PyDoc_STRVAR_shared(_Py_endswith__doc__,
823 "B.endswith(suffix[, start[, end]]) -> bool\n\
824 \n\
825 Return True if B ends with the specified suffix, False otherwise.\n\
826 With optional start, test B beginning at that position.\n\
827 With optional end, stop comparing B at that position.\n\
828 suffix can also be a tuple of bytes to try.");
829
830 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)831 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
832 {
833 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
834 }
835