• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "pycore_abstract.h"   // _PyIndex_Check()
4 #include "pycore_bytes_methods.h"
5 
6 PyDoc_STRVAR_shared(_Py_isspace__doc__,
7 "B.isspace() -> bool\n\
8 \n\
9 Return True if all characters in B are whitespace\n\
10 and there is at least one character in B, False otherwise.");
11 
12 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)13 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
14 {
15     const unsigned char *p
16         = (const unsigned char *) cptr;
17     const unsigned char *e;
18 
19     /* Shortcut for single character strings */
20     if (len == 1 && Py_ISSPACE(*p))
21         Py_RETURN_TRUE;
22 
23     /* Special case for empty strings */
24     if (len == 0)
25         Py_RETURN_FALSE;
26 
27     e = p + len;
28     for (; p < e; p++) {
29         if (!Py_ISSPACE(*p))
30             Py_RETURN_FALSE;
31     }
32     Py_RETURN_TRUE;
33 }
34 
35 
36 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
37 "B.isalpha() -> bool\n\
38 \n\
39 Return True if all characters in B are alphabetic\n\
40 and there is at least one character in B, False otherwise.");
41 
42 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)43 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
44 {
45     const unsigned char *p
46         = (const unsigned char *) cptr;
47     const unsigned char *e;
48 
49     /* Shortcut for single character strings */
50     if (len == 1 && Py_ISALPHA(*p))
51         Py_RETURN_TRUE;
52 
53     /* Special case for empty strings */
54     if (len == 0)
55         Py_RETURN_FALSE;
56 
57     e = p + len;
58     for (; p < e; p++) {
59         if (!Py_ISALPHA(*p))
60             Py_RETURN_FALSE;
61     }
62     Py_RETURN_TRUE;
63 }
64 
65 
66 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
67 "B.isalnum() -> bool\n\
68 \n\
69 Return True if all characters in B are alphanumeric\n\
70 and there is at least one character in B, False otherwise.");
71 
72 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)73 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
74 {
75     const unsigned char *p
76         = (const unsigned char *) cptr;
77     const unsigned char *e;
78 
79     /* Shortcut for single character strings */
80     if (len == 1 && Py_ISALNUM(*p))
81         Py_RETURN_TRUE;
82 
83     /* Special case for empty strings */
84     if (len == 0)
85         Py_RETURN_FALSE;
86 
87     e = p + len;
88     for (; p < e; p++) {
89         if (!Py_ISALNUM(*p))
90             Py_RETURN_FALSE;
91     }
92     Py_RETURN_TRUE;
93 }
94 
95 
96 PyDoc_STRVAR_shared(_Py_isascii__doc__,
97 "B.isascii() -> bool\n\
98 \n\
99 Return True if B is empty or all characters in B are ASCII,\n\
100 False otherwise.");
101 
102 // Optimization is copied from ascii_decode in unicodeobject.c
103 /* Mask to quickly check whether a C 'long' contains a
104    non-ASCII, UTF8-encoded char. */
105 #if (SIZEOF_LONG == 8)
106 # define ASCII_CHAR_MASK 0x8080808080808080UL
107 #elif (SIZEOF_LONG == 4)
108 # define ASCII_CHAR_MASK 0x80808080UL
109 #else
110 # error C 'long' size should be either 4 or 8!
111 #endif
112 
113 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)114 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115 {
116     const char *p = cptr;
117     const char *end = p + len;
118     const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
119 
120     while (p < end) {
121         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122            for an explanation. */
123         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
124             /* Help allocation */
125             const char *_p = p;
126             while (_p < aligned_end) {
127                 unsigned long value = *(const unsigned long *) _p;
128                 if (value & ASCII_CHAR_MASK) {
129                     Py_RETURN_FALSE;
130                 }
131                 _p += SIZEOF_LONG;
132             }
133             p = _p;
134             if (_p == end)
135                 break;
136         }
137         if ((unsigned char)*p & 0x80) {
138             Py_RETURN_FALSE;
139         }
140         p++;
141     }
142     Py_RETURN_TRUE;
143 }
144 
145 #undef ASCII_CHAR_MASK
146 
147 
148 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
149 "B.isdigit() -> bool\n\
150 \n\
151 Return True if all characters in B are digits\n\
152 and there is at least one character in B, False otherwise.");
153 
154 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)155 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
156 {
157     const unsigned char *p
158         = (const unsigned char *) cptr;
159     const unsigned char *e;
160 
161     /* Shortcut for single character strings */
162     if (len == 1 && Py_ISDIGIT(*p))
163         Py_RETURN_TRUE;
164 
165     /* Special case for empty strings */
166     if (len == 0)
167         Py_RETURN_FALSE;
168 
169     e = p + len;
170     for (; p < e; p++) {
171         if (!Py_ISDIGIT(*p))
172             Py_RETURN_FALSE;
173     }
174     Py_RETURN_TRUE;
175 }
176 
177 
178 PyDoc_STRVAR_shared(_Py_islower__doc__,
179 "B.islower() -> bool\n\
180 \n\
181 Return True if all cased characters in B are lowercase and there is\n\
182 at least one cased character in B, False otherwise.");
183 
184 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)185 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
186 {
187     const unsigned char *p
188         = (const unsigned char *) cptr;
189     const unsigned char *e;
190     int cased;
191 
192     /* Shortcut for single character strings */
193     if (len == 1)
194         return PyBool_FromLong(Py_ISLOWER(*p));
195 
196     /* Special case for empty strings */
197     if (len == 0)
198         Py_RETURN_FALSE;
199 
200     e = p + len;
201     cased = 0;
202     for (; p < e; p++) {
203         if (Py_ISUPPER(*p))
204             Py_RETURN_FALSE;
205         else if (!cased && Py_ISLOWER(*p))
206             cased = 1;
207     }
208     return PyBool_FromLong(cased);
209 }
210 
211 
212 PyDoc_STRVAR_shared(_Py_isupper__doc__,
213 "B.isupper() -> bool\n\
214 \n\
215 Return True if all cased characters in B are uppercase and there is\n\
216 at least one cased character in B, False otherwise.");
217 
218 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)219 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
220 {
221     const unsigned char *p
222         = (const unsigned char *) cptr;
223     const unsigned char *e;
224     int cased;
225 
226     /* Shortcut for single character strings */
227     if (len == 1)
228         return PyBool_FromLong(Py_ISUPPER(*p));
229 
230     /* Special case for empty strings */
231     if (len == 0)
232         Py_RETURN_FALSE;
233 
234     e = p + len;
235     cased = 0;
236     for (; p < e; p++) {
237         if (Py_ISLOWER(*p))
238             Py_RETURN_FALSE;
239         else if (!cased && Py_ISUPPER(*p))
240             cased = 1;
241     }
242     return PyBool_FromLong(cased);
243 }
244 
245 
246 PyDoc_STRVAR_shared(_Py_istitle__doc__,
247 "B.istitle() -> bool\n\
248 \n\
249 Return True if B is a titlecased string and there is at least one\n\
250 character in B, i.e. uppercase characters may only follow uncased\n\
251 characters and lowercase characters only cased ones. Return False\n\
252 otherwise.");
253 
254 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)255 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
256 {
257     const unsigned char *p
258         = (const unsigned char *) cptr;
259     const unsigned char *e;
260     int cased, previous_is_cased;
261 
262     /* Shortcut for single character strings */
263     if (len == 1)
264         return PyBool_FromLong(Py_ISUPPER(*p));
265 
266     /* Special case for empty strings */
267     if (len == 0)
268         Py_RETURN_FALSE;
269 
270     e = p + len;
271     cased = 0;
272     previous_is_cased = 0;
273     for (; p < e; p++) {
274         const unsigned char ch = *p;
275 
276         if (Py_ISUPPER(ch)) {
277             if (previous_is_cased)
278                 Py_RETURN_FALSE;
279             previous_is_cased = 1;
280             cased = 1;
281         }
282         else if (Py_ISLOWER(ch)) {
283             if (!previous_is_cased)
284                 Py_RETURN_FALSE;
285             previous_is_cased = 1;
286             cased = 1;
287         }
288         else
289             previous_is_cased = 0;
290     }
291     return PyBool_FromLong(cased);
292 }
293 
294 
295 PyDoc_STRVAR_shared(_Py_lower__doc__,
296 "B.lower() -> copy of B\n\
297 \n\
298 Return a copy of B with all ASCII characters converted to lowercase.");
299 
300 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)301 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
302 {
303     Py_ssize_t i;
304 
305     for (i = 0; i < len; i++) {
306         result[i] = Py_TOLOWER((unsigned char) cptr[i]);
307     }
308 }
309 
310 
311 PyDoc_STRVAR_shared(_Py_upper__doc__,
312 "B.upper() -> copy of B\n\
313 \n\
314 Return a copy of B with all ASCII characters converted to uppercase.");
315 
316 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)317 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
318 {
319     Py_ssize_t i;
320 
321     for (i = 0; i < len; i++) {
322         result[i] = Py_TOUPPER((unsigned char) cptr[i]);
323     }
324 }
325 
326 
327 PyDoc_STRVAR_shared(_Py_title__doc__,
328 "B.title() -> copy of B\n\
329 \n\
330 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
331 characters, all remaining cased characters have lowercase.");
332 
333 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)334 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
335 {
336     Py_ssize_t i;
337     int previous_is_cased = 0;
338 
339     for (i = 0; i < len; i++) {
340         int c = Py_CHARMASK(*s++);
341         if (Py_ISLOWER(c)) {
342             if (!previous_is_cased)
343                 c = Py_TOUPPER(c);
344             previous_is_cased = 1;
345         } else if (Py_ISUPPER(c)) {
346             if (previous_is_cased)
347                 c = Py_TOLOWER(c);
348             previous_is_cased = 1;
349         } else
350             previous_is_cased = 0;
351         *result++ = c;
352     }
353 }
354 
355 
356 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
357 "B.capitalize() -> copy of B\n\
358 \n\
359 Return a copy of B with only its first character capitalized (ASCII)\n\
360 and the rest lower-cased.");
361 
362 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)363 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
364 {
365     if (len > 0) {
366         *result = Py_TOUPPER(*s);
367         _Py_bytes_lower(result + 1, s + 1, len - 1);
368     }
369 }
370 
371 
372 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
373 "B.swapcase() -> copy of B\n\
374 \n\
375 Return a copy of B with uppercase ASCII characters converted\n\
376 to lowercase ASCII and vice versa.");
377 
378 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)379 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
380 {
381     Py_ssize_t i;
382 
383     for (i = 0; i < len; i++) {
384         int c = Py_CHARMASK(*s++);
385         if (Py_ISLOWER(c)) {
386             *result = Py_TOUPPER(c);
387         }
388         else if (Py_ISUPPER(c)) {
389             *result = Py_TOLOWER(c);
390         }
391         else
392             *result = c;
393         result++;
394     }
395 }
396 
397 
398 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
399 "B.maketrans(frm, to) -> translation table\n\
400 \n\
401 Return a translation table (a bytes object of length 256) suitable\n\
402 for use in the bytes or bytearray translate method where each byte\n\
403 in frm is mapped to the byte at the same position in to.\n\
404 The bytes objects frm and to must be of the same length.");
405 
406 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)407 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
408 {
409     PyObject *res = NULL;
410     Py_ssize_t i;
411     char *p;
412 
413     if (frm->len != to->len) {
414         PyErr_Format(PyExc_ValueError,
415                      "maketrans arguments must have same length");
416         return NULL;
417     }
418     res = PyBytes_FromStringAndSize(NULL, 256);
419     if (!res)
420         return NULL;
421     p = PyBytes_AS_STRING(res);
422     for (i = 0; i < 256; i++)
423         p[i] = (char) i;
424     for (i = 0; i < frm->len; i++) {
425         p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
426     }
427 
428     return res;
429 }
430 
431 #define FASTSEARCH fastsearch
432 #define STRINGLIB(F) stringlib_##F
433 #define STRINGLIB_CHAR char
434 #define STRINGLIB_SIZEOF_CHAR 1
435 
436 #include "stringlib/fastsearch.h"
437 #include "stringlib/count.h"
438 #include "stringlib/find.h"
439 
440 /*
441 Wraps stringlib_parse_args_finds() and additionally checks the first
442 argument type.
443 
444 In case the first argument is a bytes-like object, sets it to subobj,
445 and doesn't touch the byte parameter.
446 In case it is an integer in range(0, 256), writes the integer value
447 to byte, and sets subobj to NULL.
448 
449 The other parameters are similar to those of
450 stringlib_parse_args_finds().
451 */
452 
453 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)454 parse_args_finds_byte(const char *function_name, PyObject *args,
455                       PyObject **subobj, char *byte,
456                       Py_ssize_t *start, Py_ssize_t *end)
457 {
458     PyObject *tmp_subobj;
459     Py_ssize_t ival;
460 
461     if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
462                                    start, end))
463         return 0;
464 
465     if (PyObject_CheckBuffer(tmp_subobj)) {
466         *subobj = tmp_subobj;
467         return 1;
468     }
469 
470     if (!_PyIndex_Check(tmp_subobj)) {
471         PyErr_Format(PyExc_TypeError,
472                      "argument should be integer or bytes-like object, "
473                      "not '%.200s'",
474                      Py_TYPE(tmp_subobj)->tp_name);
475         return 0;
476     }
477 
478     ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
479     if (ival == -1 && PyErr_Occurred()) {
480         return 0;
481     }
482     if (ival < 0 || ival > 255) {
483         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
484         return 0;
485     }
486 
487     *subobj = NULL;
488     *byte = (char)ival;
489     return 1;
490 }
491 
492 /* helper macro to fixup start/end slice values */
493 #define ADJUST_INDICES(start, end, len)         \
494     if (end > len)                          \
495         end = len;                          \
496     else if (end < 0) {                     \
497         end += len;                         \
498         if (end < 0)                        \
499         end = 0;                        \
500     }                                       \
501     if (start < 0) {                        \
502         start += len;                       \
503         if (start < 0)                      \
504         start = 0;                      \
505     }
506 
507 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)508 find_internal(const char *str, Py_ssize_t len,
509               const char *function_name, PyObject *args, int dir)
510 {
511     PyObject *subobj;
512     char byte;
513     Py_buffer subbuf;
514     const char *sub;
515     Py_ssize_t sub_len;
516     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
517     Py_ssize_t res;
518 
519     if (!parse_args_finds_byte(function_name, args,
520                                &subobj, &byte, &start, &end))
521         return -2;
522 
523     if (subobj) {
524         if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
525             return -2;
526 
527         sub = subbuf.buf;
528         sub_len = subbuf.len;
529     }
530     else {
531         sub = &byte;
532         sub_len = 1;
533     }
534 
535     ADJUST_INDICES(start, end, len);
536     if (end - start < sub_len)
537         res = -1;
538     else if (sub_len == 1) {
539         if (dir > 0)
540             res = stringlib_find_char(
541                 str + start, end - start,
542                 *sub);
543         else
544             res = stringlib_rfind_char(
545                 str + start, end - start,
546                 *sub);
547         if (res >= 0)
548             res += start;
549     }
550     else {
551         if (dir > 0)
552             res = stringlib_find_slice(
553                 str, len,
554                 sub, sub_len, start, end);
555         else
556             res = stringlib_rfind_slice(
557                 str, len,
558                 sub, sub_len, start, end);
559     }
560 
561     if (subobj)
562         PyBuffer_Release(&subbuf);
563 
564     return res;
565 }
566 
567 PyDoc_STRVAR_shared(_Py_find__doc__,
568 "B.find(sub[, start[, end]]) -> int\n\
569 \n\
570 Return the lowest index in B where subsection sub is found,\n\
571 such that sub is contained within B[start,end].  Optional\n\
572 arguments start and end are interpreted as in slice notation.\n\
573 \n\
574 Return -1 on failure.");
575 
576 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)577 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
578 {
579     Py_ssize_t result = find_internal(str, len, "find", args, +1);
580     if (result == -2)
581         return NULL;
582     return PyLong_FromSsize_t(result);
583 }
584 
585 PyDoc_STRVAR_shared(_Py_index__doc__,
586 "B.index(sub[, start[, end]]) -> int\n\
587 \n\
588 Return the lowest index in B where subsection sub is found,\n\
589 such that sub is contained within B[start,end].  Optional\n\
590 arguments start and end are interpreted as in slice notation.\n\
591 \n\
592 Raises ValueError when the subsection is not found.");
593 
594 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)595 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
596 {
597     Py_ssize_t result = find_internal(str, len, "index", args, +1);
598     if (result == -2)
599         return NULL;
600     if (result == -1) {
601         PyErr_SetString(PyExc_ValueError,
602                         "subsection not found");
603         return NULL;
604     }
605     return PyLong_FromSsize_t(result);
606 }
607 
608 PyDoc_STRVAR_shared(_Py_rfind__doc__,
609 "B.rfind(sub[, start[, end]]) -> int\n\
610 \n\
611 Return the highest index in B where subsection sub is found,\n\
612 such that sub is contained within B[start,end].  Optional\n\
613 arguments start and end are interpreted as in slice notation.\n\
614 \n\
615 Return -1 on failure.");
616 
617 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)618 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
619 {
620     Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
621     if (result == -2)
622         return NULL;
623     return PyLong_FromSsize_t(result);
624 }
625 
626 PyDoc_STRVAR_shared(_Py_rindex__doc__,
627 "B.rindex(sub[, start[, end]]) -> int\n\
628 \n\
629 Return the highest index in B where subsection sub is found,\n\
630 such that sub is contained within B[start,end].  Optional\n\
631 arguments start and end are interpreted as in slice notation.\n\
632 \n\
633 Raise ValueError when the subsection is not found.");
634 
635 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)636 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
637 {
638     Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
639     if (result == -2)
640         return NULL;
641     if (result == -1) {
642         PyErr_SetString(PyExc_ValueError,
643                         "subsection not found");
644         return NULL;
645     }
646     return PyLong_FromSsize_t(result);
647 }
648 
649 PyDoc_STRVAR_shared(_Py_count__doc__,
650 "B.count(sub[, start[, end]]) -> int\n\
651 \n\
652 Return the number of non-overlapping occurrences of subsection sub in\n\
653 bytes B[start:end].  Optional arguments start and end are interpreted\n\
654 as in slice notation.");
655 
656 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)657 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
658 {
659     PyObject *sub_obj;
660     const char *sub;
661     Py_ssize_t sub_len;
662     char byte;
663     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
664 
665     Py_buffer vsub;
666     PyObject *count_obj;
667 
668     if (!parse_args_finds_byte("count", args,
669                                &sub_obj, &byte, &start, &end))
670         return NULL;
671 
672     if (sub_obj) {
673         if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
674             return NULL;
675 
676         sub = vsub.buf;
677         sub_len = vsub.len;
678     }
679     else {
680         sub = &byte;
681         sub_len = 1;
682     }
683 
684     ADJUST_INDICES(start, end, len);
685 
686     count_obj = PyLong_FromSsize_t(
687         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
688         );
689 
690     if (sub_obj)
691         PyBuffer_Release(&vsub);
692 
693     return count_obj;
694 }
695 
696 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)697 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
698 {
699     Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
700     if (ival == -1 && PyErr_Occurred()) {
701         Py_buffer varg;
702         Py_ssize_t pos;
703         PyErr_Clear();
704         if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
705             return -1;
706         pos = stringlib_find(str, len,
707                              varg.buf, varg.len, 0);
708         PyBuffer_Release(&varg);
709         return pos >= 0;
710     }
711     if (ival < 0 || ival >= 256) {
712         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
713         return -1;
714     }
715 
716     return memchr(str, (int) ival, len) != NULL;
717 }
718 
719 
720 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
721  * against substr, using the start and end arguments. Returns
722  * -1 on error, 0 if not found and 1 if found.
723  */
724 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)725 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
726           Py_ssize_t start, Py_ssize_t end, int direction)
727 {
728     Py_buffer sub_view = {NULL, NULL};
729     const char *sub;
730     Py_ssize_t slen;
731 
732     if (PyBytes_Check(substr)) {
733         sub = PyBytes_AS_STRING(substr);
734         slen = PyBytes_GET_SIZE(substr);
735     }
736     else {
737         if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
738             return -1;
739         sub = sub_view.buf;
740         slen = sub_view.len;
741     }
742 
743     ADJUST_INDICES(start, end, len);
744 
745     if (direction < 0) {
746         /* startswith */
747         if (start > len - slen)
748             goto notfound;
749     } else {
750         /* endswith */
751         if (end - start < slen || start > len)
752             goto notfound;
753 
754         if (end - slen > start)
755             start = end - slen;
756     }
757     if (end - start < slen)
758         goto notfound;
759     if (memcmp(str + start, sub, slen) != 0)
760         goto notfound;
761 
762     PyBuffer_Release(&sub_view);
763     return 1;
764 
765 notfound:
766     PyBuffer_Release(&sub_view);
767     return 0;
768 }
769 
770 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)771 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
772                     const char *function_name, PyObject *args,
773                     int direction)
774 {
775     Py_ssize_t start = 0;
776     Py_ssize_t end = PY_SSIZE_T_MAX;
777     PyObject *subobj;
778     int result;
779 
780     if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
781         return NULL;
782     if (PyTuple_Check(subobj)) {
783         Py_ssize_t i;
784         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
785             result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
786                                start, end, direction);
787             if (result == -1)
788                 return NULL;
789             else if (result) {
790                 Py_RETURN_TRUE;
791             }
792         }
793         Py_RETURN_FALSE;
794     }
795     result = tailmatch(str, len, subobj, start, end, direction);
796     if (result == -1) {
797         if (PyErr_ExceptionMatches(PyExc_TypeError))
798             PyErr_Format(PyExc_TypeError,
799                          "%s first arg must be bytes or a tuple of bytes, "
800                          "not %s",
801                          function_name, Py_TYPE(subobj)->tp_name);
802         return NULL;
803     }
804     else
805         return PyBool_FromLong(result);
806 }
807 
808 PyDoc_STRVAR_shared(_Py_startswith__doc__,
809 "B.startswith(prefix[, start[, end]]) -> bool\n\
810 \n\
811 Return True if B starts with the specified prefix, False otherwise.\n\
812 With optional start, test B beginning at that position.\n\
813 With optional end, stop comparing B at that position.\n\
814 prefix can also be a tuple of bytes to try.");
815 
816 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)817 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
818 {
819     return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
820 }
821 
822 PyDoc_STRVAR_shared(_Py_endswith__doc__,
823 "B.endswith(suffix[, start[, end]]) -> bool\n\
824 \n\
825 Return True if B ends with the specified suffix, False otherwise.\n\
826 With optional start, test B beginning at that position.\n\
827 With optional end, stop comparing B at that position.\n\
828 suffix can also be a tuple of bytes to try.");
829 
830 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)831 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
832 {
833     return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
834 }
835