1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "bytes_methods.h"
4
5 PyDoc_STRVAR_shared(_Py_isspace__doc__,
6 "B.isspace() -> bool\n\
7 \n\
8 Return True if all characters in B are whitespace\n\
9 and there is at least one character in B, False otherwise.");
10
11 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)12 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13 {
14 const unsigned char *p
15 = (unsigned char *) cptr;
16 const unsigned char *e;
17
18 /* Shortcut for single character strings */
19 if (len == 1 && Py_ISSPACE(*p))
20 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
28 if (!Py_ISSPACE(*p))
29 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32 }
33
34
35 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36 "B.isalpha() -> bool\n\
37 \n\
38 Return True if all characters in B are alphabetic\n\
39 and there is at least one character in B, False otherwise.");
40
41 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)42 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43 {
44 const unsigned char *p
45 = (unsigned char *) cptr;
46 const unsigned char *e;
47
48 /* Shortcut for single character strings */
49 if (len == 1 && Py_ISALPHA(*p))
50 Py_RETURN_TRUE;
51
52 /* Special case for empty strings */
53 if (len == 0)
54 Py_RETURN_FALSE;
55
56 e = p + len;
57 for (; p < e; p++) {
58 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
60 }
61 Py_RETURN_TRUE;
62 }
63
64
65 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66 "B.isalnum() -> bool\n\
67 \n\
68 Return True if all characters in B are alphanumeric\n\
69 and there is at least one character in B, False otherwise.");
70
71 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)72 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73 {
74 const unsigned char *p
75 = (unsigned char *) cptr;
76 const unsigned char *e;
77
78 /* Shortcut for single character strings */
79 if (len == 1 && Py_ISALNUM(*p))
80 Py_RETURN_TRUE;
81
82 /* Special case for empty strings */
83 if (len == 0)
84 Py_RETURN_FALSE;
85
86 e = p + len;
87 for (; p < e; p++) {
88 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
90 }
91 Py_RETURN_TRUE;
92 }
93
94
95 PyDoc_STRVAR_shared(_Py_isascii__doc__,
96 "B.isascii() -> bool\n\
97 \n\
98 Return True if B is empty or all characters in B are ASCII,\n\
99 False otherwise.");
100
101 // Optimization is copied from ascii_decode in unicodeobject.c
102 /* Mask to quickly check whether a C 'long' contains a
103 non-ASCII, UTF8-encoded char. */
104 #if (SIZEOF_LONG == 8)
105 # define ASCII_CHAR_MASK 0x8080808080808080UL
106 #elif (SIZEOF_LONG == 4)
107 # define ASCII_CHAR_MASK 0x80808080UL
108 #else
109 # error C 'long' size should be either 4 or 8!
110 #endif
111
112 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)113 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114 {
115 const char *p = cptr;
116 const char *end = p + len;
117 const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118
119 while (p < end) {
120 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121 for an explanation. */
122 if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123 /* Help allocation */
124 const char *_p = p;
125 while (_p < aligned_end) {
126 unsigned long value = *(unsigned long *) _p;
127 if (value & ASCII_CHAR_MASK) {
128 Py_RETURN_FALSE;
129 }
130 _p += SIZEOF_LONG;
131 }
132 p = _p;
133 if (_p == end)
134 break;
135 }
136 if ((unsigned char)*p & 0x80) {
137 Py_RETURN_FALSE;
138 }
139 p++;
140 }
141 Py_RETURN_TRUE;
142 }
143
144 #undef ASCII_CHAR_MASK
145
146
147 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148 "B.isdigit() -> bool\n\
149 \n\
150 Return True if all characters in B are digits\n\
151 and there is at least one character in B, False otherwise.");
152
153 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)154 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155 {
156 const unsigned char *p
157 = (unsigned char *) cptr;
158 const unsigned char *e;
159
160 /* Shortcut for single character strings */
161 if (len == 1 && Py_ISDIGIT(*p))
162 Py_RETURN_TRUE;
163
164 /* Special case for empty strings */
165 if (len == 0)
166 Py_RETURN_FALSE;
167
168 e = p + len;
169 for (; p < e; p++) {
170 if (!Py_ISDIGIT(*p))
171 Py_RETURN_FALSE;
172 }
173 Py_RETURN_TRUE;
174 }
175
176
177 PyDoc_STRVAR_shared(_Py_islower__doc__,
178 "B.islower() -> bool\n\
179 \n\
180 Return True if all cased characters in B are lowercase and there is\n\
181 at least one cased character in B, False otherwise.");
182
183 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)184 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
185 {
186 const unsigned char *p
187 = (unsigned char *) cptr;
188 const unsigned char *e;
189 int cased;
190
191 /* Shortcut for single character strings */
192 if (len == 1)
193 return PyBool_FromLong(Py_ISLOWER(*p));
194
195 /* Special case for empty strings */
196 if (len == 0)
197 Py_RETURN_FALSE;
198
199 e = p + len;
200 cased = 0;
201 for (; p < e; p++) {
202 if (Py_ISUPPER(*p))
203 Py_RETURN_FALSE;
204 else if (!cased && Py_ISLOWER(*p))
205 cased = 1;
206 }
207 return PyBool_FromLong(cased);
208 }
209
210
211 PyDoc_STRVAR_shared(_Py_isupper__doc__,
212 "B.isupper() -> bool\n\
213 \n\
214 Return True if all cased characters in B are uppercase and there is\n\
215 at least one cased character in B, False otherwise.");
216
217 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)218 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219 {
220 const unsigned char *p
221 = (unsigned char *) cptr;
222 const unsigned char *e;
223 int cased;
224
225 /* Shortcut for single character strings */
226 if (len == 1)
227 return PyBool_FromLong(Py_ISUPPER(*p));
228
229 /* Special case for empty strings */
230 if (len == 0)
231 Py_RETURN_FALSE;
232
233 e = p + len;
234 cased = 0;
235 for (; p < e; p++) {
236 if (Py_ISLOWER(*p))
237 Py_RETURN_FALSE;
238 else if (!cased && Py_ISUPPER(*p))
239 cased = 1;
240 }
241 return PyBool_FromLong(cased);
242 }
243
244
245 PyDoc_STRVAR_shared(_Py_istitle__doc__,
246 "B.istitle() -> bool\n\
247 \n\
248 Return True if B is a titlecased string and there is at least one\n\
249 character in B, i.e. uppercase characters may only follow uncased\n\
250 characters and lowercase characters only cased ones. Return False\n\
251 otherwise.");
252
253 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)254 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255 {
256 const unsigned char *p
257 = (unsigned char *) cptr;
258 const unsigned char *e;
259 int cased, previous_is_cased;
260
261 /* Shortcut for single character strings */
262 if (len == 1)
263 return PyBool_FromLong(Py_ISUPPER(*p));
264
265 /* Special case for empty strings */
266 if (len == 0)
267 Py_RETURN_FALSE;
268
269 e = p + len;
270 cased = 0;
271 previous_is_cased = 0;
272 for (; p < e; p++) {
273 const unsigned char ch = *p;
274
275 if (Py_ISUPPER(ch)) {
276 if (previous_is_cased)
277 Py_RETURN_FALSE;
278 previous_is_cased = 1;
279 cased = 1;
280 }
281 else if (Py_ISLOWER(ch)) {
282 if (!previous_is_cased)
283 Py_RETURN_FALSE;
284 previous_is_cased = 1;
285 cased = 1;
286 }
287 else
288 previous_is_cased = 0;
289 }
290 return PyBool_FromLong(cased);
291 }
292
293
294 PyDoc_STRVAR_shared(_Py_lower__doc__,
295 "B.lower() -> copy of B\n\
296 \n\
297 Return a copy of B with all ASCII characters converted to lowercase.");
298
299 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)300 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301 {
302 Py_ssize_t i;
303
304 for (i = 0; i < len; i++) {
305 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
306 }
307 }
308
309
310 PyDoc_STRVAR_shared(_Py_upper__doc__,
311 "B.upper() -> copy of B\n\
312 \n\
313 Return a copy of B with all ASCII characters converted to uppercase.");
314
315 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)316 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317 {
318 Py_ssize_t i;
319
320 for (i = 0; i < len; i++) {
321 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
322 }
323 }
324
325
326 PyDoc_STRVAR_shared(_Py_title__doc__,
327 "B.title() -> copy of B\n\
328 \n\
329 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330 characters, all remaining cased characters have lowercase.");
331
332 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)333 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
334 {
335 Py_ssize_t i;
336 int previous_is_cased = 0;
337
338 for (i = 0; i < len; i++) {
339 int c = Py_CHARMASK(*s++);
340 if (Py_ISLOWER(c)) {
341 if (!previous_is_cased)
342 c = Py_TOUPPER(c);
343 previous_is_cased = 1;
344 } else if (Py_ISUPPER(c)) {
345 if (previous_is_cased)
346 c = Py_TOLOWER(c);
347 previous_is_cased = 1;
348 } else
349 previous_is_cased = 0;
350 *result++ = c;
351 }
352 }
353
354
355 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356 "B.capitalize() -> copy of B\n\
357 \n\
358 Return a copy of B with only its first character capitalized (ASCII)\n\
359 and the rest lower-cased.");
360
361 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)362 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
363 {
364 Py_ssize_t i;
365
366 if (0 < len) {
367 int c = Py_CHARMASK(*s++);
368 if (Py_ISLOWER(c))
369 *result = Py_TOUPPER(c);
370 else
371 *result = c;
372 result++;
373 }
374 for (i = 1; i < len; i++) {
375 int c = Py_CHARMASK(*s++);
376 if (Py_ISUPPER(c))
377 *result = Py_TOLOWER(c);
378 else
379 *result = c;
380 result++;
381 }
382 }
383
384
385 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
386 "B.swapcase() -> copy of B\n\
387 \n\
388 Return a copy of B with uppercase ASCII characters converted\n\
389 to lowercase ASCII and vice versa.");
390
391 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)392 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
393 {
394 Py_ssize_t i;
395
396 for (i = 0; i < len; i++) {
397 int c = Py_CHARMASK(*s++);
398 if (Py_ISLOWER(c)) {
399 *result = Py_TOUPPER(c);
400 }
401 else if (Py_ISUPPER(c)) {
402 *result = Py_TOLOWER(c);
403 }
404 else
405 *result = c;
406 result++;
407 }
408 }
409
410
411 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
412 "B.maketrans(frm, to) -> translation table\n\
413 \n\
414 Return a translation table (a bytes object of length 256) suitable\n\
415 for use in the bytes or bytearray translate method where each byte\n\
416 in frm is mapped to the byte at the same position in to.\n\
417 The bytes objects frm and to must be of the same length.");
418
419 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)420 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
421 {
422 PyObject *res = NULL;
423 Py_ssize_t i;
424 char *p;
425
426 if (frm->len != to->len) {
427 PyErr_Format(PyExc_ValueError,
428 "maketrans arguments must have same length");
429 return NULL;
430 }
431 res = PyBytes_FromStringAndSize(NULL, 256);
432 if (!res)
433 return NULL;
434 p = PyBytes_AS_STRING(res);
435 for (i = 0; i < 256; i++)
436 p[i] = (char) i;
437 for (i = 0; i < frm->len; i++) {
438 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
439 }
440
441 return res;
442 }
443
444 #define FASTSEARCH fastsearch
445 #define STRINGLIB(F) stringlib_##F
446 #define STRINGLIB_CHAR char
447 #define STRINGLIB_SIZEOF_CHAR 1
448
449 #include "stringlib/fastsearch.h"
450 #include "stringlib/count.h"
451 #include "stringlib/find.h"
452
453 /*
454 Wraps stringlib_parse_args_finds() and additionally checks the first
455 argument type.
456
457 In case the first argument is a bytes-like object, sets it to subobj,
458 and doesn't touch the byte parameter.
459 In case it is an integer in range(0, 256), writes the integer value
460 to byte, and sets subobj to NULL.
461
462 The other parameters are similar to those of
463 stringlib_parse_args_finds().
464 */
465
466 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)467 parse_args_finds_byte(const char *function_name, PyObject *args,
468 PyObject **subobj, char *byte,
469 Py_ssize_t *start, Py_ssize_t *end)
470 {
471 PyObject *tmp_subobj;
472 Py_ssize_t ival;
473
474 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
475 start, end))
476 return 0;
477
478 if (PyObject_CheckBuffer(tmp_subobj)) {
479 *subobj = tmp_subobj;
480 return 1;
481 }
482
483 if (!PyIndex_Check(tmp_subobj)) {
484 PyErr_Format(PyExc_TypeError,
485 "argument should be integer or bytes-like object, "
486 "not '%.200s'",
487 Py_TYPE(tmp_subobj)->tp_name);
488 return 0;
489 }
490
491 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
492 if (ival == -1 && PyErr_Occurred()) {
493 return 0;
494 }
495 if (ival < 0 || ival > 255) {
496 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
497 return 0;
498 }
499
500 *subobj = NULL;
501 *byte = (char)ival;
502 return 1;
503 }
504
505 /* helper macro to fixup start/end slice values */
506 #define ADJUST_INDICES(start, end, len) \
507 if (end > len) \
508 end = len; \
509 else if (end < 0) { \
510 end += len; \
511 if (end < 0) \
512 end = 0; \
513 } \
514 if (start < 0) { \
515 start += len; \
516 if (start < 0) \
517 start = 0; \
518 }
519
520 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)521 find_internal(const char *str, Py_ssize_t len,
522 const char *function_name, PyObject *args, int dir)
523 {
524 PyObject *subobj;
525 char byte;
526 Py_buffer subbuf;
527 const char *sub;
528 Py_ssize_t sub_len;
529 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
530 Py_ssize_t res;
531
532 if (!parse_args_finds_byte(function_name, args,
533 &subobj, &byte, &start, &end))
534 return -2;
535
536 if (subobj) {
537 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
538 return -2;
539
540 sub = subbuf.buf;
541 sub_len = subbuf.len;
542 }
543 else {
544 sub = &byte;
545 sub_len = 1;
546 }
547
548 ADJUST_INDICES(start, end, len);
549 if (end - start < sub_len)
550 res = -1;
551 else if (sub_len == 1) {
552 if (dir > 0)
553 res = stringlib_find_char(
554 str + start, end - start,
555 *sub);
556 else
557 res = stringlib_rfind_char(
558 str + start, end - start,
559 *sub);
560 if (res >= 0)
561 res += start;
562 }
563 else {
564 if (dir > 0)
565 res = stringlib_find_slice(
566 str, len,
567 sub, sub_len, start, end);
568 else
569 res = stringlib_rfind_slice(
570 str, len,
571 sub, sub_len, start, end);
572 }
573
574 if (subobj)
575 PyBuffer_Release(&subbuf);
576
577 return res;
578 }
579
580 PyDoc_STRVAR_shared(_Py_find__doc__,
581 "B.find(sub[, start[, end]]) -> int\n\
582 \n\
583 Return the lowest index in B where subsection sub is found,\n\
584 such that sub is contained within B[start,end]. Optional\n\
585 arguments start and end are interpreted as in slice notation.\n\
586 \n\
587 Return -1 on failure.");
588
589 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)590 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
591 {
592 Py_ssize_t result = find_internal(str, len, "find", args, +1);
593 if (result == -2)
594 return NULL;
595 return PyLong_FromSsize_t(result);
596 }
597
598 PyDoc_STRVAR_shared(_Py_index__doc__,
599 "B.index(sub[, start[, end]]) -> int\n\
600 \n\
601 Return the lowest index in B where subsection sub is found,\n\
602 such that sub is contained within B[start,end]. Optional\n\
603 arguments start and end are interpreted as in slice notation.\n\
604 \n\
605 Raises ValueError when the subsection is not found.");
606
607 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)608 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
609 {
610 Py_ssize_t result = find_internal(str, len, "index", args, +1);
611 if (result == -2)
612 return NULL;
613 if (result == -1) {
614 PyErr_SetString(PyExc_ValueError,
615 "subsection not found");
616 return NULL;
617 }
618 return PyLong_FromSsize_t(result);
619 }
620
621 PyDoc_STRVAR_shared(_Py_rfind__doc__,
622 "B.rfind(sub[, start[, end]]) -> int\n\
623 \n\
624 Return the highest index in B where subsection sub is found,\n\
625 such that sub is contained within B[start,end]. Optional\n\
626 arguments start and end are interpreted as in slice notation.\n\
627 \n\
628 Return -1 on failure.");
629
630 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)631 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
632 {
633 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
634 if (result == -2)
635 return NULL;
636 return PyLong_FromSsize_t(result);
637 }
638
639 PyDoc_STRVAR_shared(_Py_rindex__doc__,
640 "B.rindex(sub[, start[, end]]) -> int\n\
641 \n\
642 Return the highest index in B where subsection sub is found,\n\
643 such that sub is contained within B[start,end]. Optional\n\
644 arguments start and end are interpreted as in slice notation.\n\
645 \n\
646 Raise ValueError when the subsection is not found.");
647
648 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)649 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
650 {
651 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
652 if (result == -2)
653 return NULL;
654 if (result == -1) {
655 PyErr_SetString(PyExc_ValueError,
656 "subsection not found");
657 return NULL;
658 }
659 return PyLong_FromSsize_t(result);
660 }
661
662 PyDoc_STRVAR_shared(_Py_count__doc__,
663 "B.count(sub[, start[, end]]) -> int\n\
664 \n\
665 Return the number of non-overlapping occurrences of subsection sub in\n\
666 bytes B[start:end]. Optional arguments start and end are interpreted\n\
667 as in slice notation.");
668
669 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)670 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
671 {
672 PyObject *sub_obj;
673 const char *sub;
674 Py_ssize_t sub_len;
675 char byte;
676 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
677
678 Py_buffer vsub;
679 PyObject *count_obj;
680
681 if (!parse_args_finds_byte("count", args,
682 &sub_obj, &byte, &start, &end))
683 return NULL;
684
685 if (sub_obj) {
686 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
687 return NULL;
688
689 sub = vsub.buf;
690 sub_len = vsub.len;
691 }
692 else {
693 sub = &byte;
694 sub_len = 1;
695 }
696
697 ADJUST_INDICES(start, end, len);
698
699 count_obj = PyLong_FromSsize_t(
700 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
701 );
702
703 if (sub_obj)
704 PyBuffer_Release(&vsub);
705
706 return count_obj;
707 }
708
709 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)710 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
711 {
712 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
713 if (ival == -1 && PyErr_Occurred()) {
714 Py_buffer varg;
715 Py_ssize_t pos;
716 PyErr_Clear();
717 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
718 return -1;
719 pos = stringlib_find(str, len,
720 varg.buf, varg.len, 0);
721 PyBuffer_Release(&varg);
722 return pos >= 0;
723 }
724 if (ival < 0 || ival >= 256) {
725 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
726 return -1;
727 }
728
729 return memchr(str, (int) ival, len) != NULL;
730 }
731
732
733 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
734 * against substr, using the start and end arguments. Returns
735 * -1 on error, 0 if not found and 1 if found.
736 */
737 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)738 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
739 Py_ssize_t start, Py_ssize_t end, int direction)
740 {
741 Py_buffer sub_view = {NULL, NULL};
742 const char *sub;
743 Py_ssize_t slen;
744
745 if (PyBytes_Check(substr)) {
746 sub = PyBytes_AS_STRING(substr);
747 slen = PyBytes_GET_SIZE(substr);
748 }
749 else {
750 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
751 return -1;
752 sub = sub_view.buf;
753 slen = sub_view.len;
754 }
755
756 ADJUST_INDICES(start, end, len);
757
758 if (direction < 0) {
759 /* startswith */
760 if (start + slen > len)
761 goto notfound;
762 } else {
763 /* endswith */
764 if (end - start < slen || start > len)
765 goto notfound;
766
767 if (end - slen > start)
768 start = end - slen;
769 }
770 if (end - start < slen)
771 goto notfound;
772 if (memcmp(str + start, sub, slen) != 0)
773 goto notfound;
774
775 PyBuffer_Release(&sub_view);
776 return 1;
777
778 notfound:
779 PyBuffer_Release(&sub_view);
780 return 0;
781 }
782
783 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)784 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
785 const char *function_name, PyObject *args,
786 int direction)
787 {
788 Py_ssize_t start = 0;
789 Py_ssize_t end = PY_SSIZE_T_MAX;
790 PyObject *subobj;
791 int result;
792
793 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
794 return NULL;
795 if (PyTuple_Check(subobj)) {
796 Py_ssize_t i;
797 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
798 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
799 start, end, direction);
800 if (result == -1)
801 return NULL;
802 else if (result) {
803 Py_RETURN_TRUE;
804 }
805 }
806 Py_RETURN_FALSE;
807 }
808 result = tailmatch(str, len, subobj, start, end, direction);
809 if (result == -1) {
810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "%s first arg must be bytes or a tuple of bytes, "
813 "not %s",
814 function_name, Py_TYPE(subobj)->tp_name);
815 return NULL;
816 }
817 else
818 return PyBool_FromLong(result);
819 }
820
821 PyDoc_STRVAR_shared(_Py_startswith__doc__,
822 "B.startswith(prefix[, start[, end]]) -> bool\n\
823 \n\
824 Return True if B starts with the specified prefix, False otherwise.\n\
825 With optional start, test B beginning at that position.\n\
826 With optional end, stop comparing B at that position.\n\
827 prefix can also be a tuple of bytes to try.");
828
829 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)830 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
831 {
832 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
833 }
834
835 PyDoc_STRVAR_shared(_Py_endswith__doc__,
836 "B.endswith(suffix[, start[, end]]) -> bool\n\
837 \n\
838 Return True if B ends with the specified suffix, False otherwise.\n\
839 With optional start, test B beginning at that position.\n\
840 With optional end, stop comparing B at that position.\n\
841 suffix can also be a tuple of bytes to try.");
842
843 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)844 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
845 {
846 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
847 }
848
849 PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
850 "B.expandtabs(tabsize=8) -> copy of B\n\
851 \n\
852 Return a copy of B where all tab characters are expanded using spaces.\n\
853 If tabsize is not given, a tab size of 8 characters is assumed.");
854
855 PyDoc_STRVAR_shared(_Py_ljust__doc__,
856 "B.ljust(width[, fillchar]) -> copy of B\n"
857 "\n"
858 "Return B left justified in a string of length width. Padding is\n"
859 "done using the specified fill character (default is a space).");
860
861 PyDoc_STRVAR_shared(_Py_rjust__doc__,
862 "B.rjust(width[, fillchar]) -> copy of B\n"
863 "\n"
864 "Return B right justified in a string of length width. Padding is\n"
865 "done using the specified fill character (default is a space)");
866
867 PyDoc_STRVAR_shared(_Py_center__doc__,
868 "B.center(width[, fillchar]) -> copy of B\n"
869 "\n"
870 "Return B centered in a string of length width. Padding is\n"
871 "done using the specified fill character (default is a space).");
872
873 PyDoc_STRVAR_shared(_Py_zfill__doc__,
874 "B.zfill(width) -> copy of B\n"
875 "\n"
876 "Pad a numeric string B with zeros on the left, to fill a field\n"
877 "of the specified width. B is never truncated.");
878