1 /* PyBytes (bytearray) implementation */
2
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
7
8 char _PyByteArray_empty_string[] = "";
9
10 void
PyByteArray_Fini(void)11 PyByteArray_Fini(void)
12 {
13 }
14
15 int
PyByteArray_Init(void)16 PyByteArray_Init(void)
17 {
18 return 1;
19 }
20
21 /* end nullbytes support */
22
23 /* Helpers */
24
25 static int
_getbytevalue(PyObject * arg,int * value)26 _getbytevalue(PyObject* arg, int *value)
27 {
28 long face_value;
29
30 if (PyBytes_CheckExact(arg)) {
31 if (Py_SIZE(arg) != 1) {
32 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
33 return 0;
34 }
35 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
36 return 1;
37 }
38 else if (_PyAnyInt_Check(arg)) {
39 face_value = PyLong_AsLong(arg);
40 }
41 else {
42 PyObject *index = PyNumber_Index(arg);
43 if (index == NULL) {
44 PyErr_Format(PyExc_TypeError,
45 "an integer or string of size 1 is required");
46 return 0;
47 }
48 face_value = PyLong_AsLong(index);
49 Py_DECREF(index);
50 }
51
52 if (face_value < 0 || face_value >= 256) {
53 /* this includes the OverflowError in case the long is too large */
54 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
55 return 0;
56 }
57
58 *value = face_value;
59 return 1;
60 }
61
62 static Py_ssize_t
bytearray_buffer_getreadbuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)63 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
64 {
65 if ( index != 0 ) {
66 PyErr_SetString(PyExc_SystemError,
67 "accessing non-existent bytes segment");
68 return -1;
69 }
70 *ptr = (void *)PyByteArray_AS_STRING(self);
71 return Py_SIZE(self);
72 }
73
74 static Py_ssize_t
bytearray_buffer_getwritebuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)75 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
76 {
77 if ( index != 0 ) {
78 PyErr_SetString(PyExc_SystemError,
79 "accessing non-existent bytes segment");
80 return -1;
81 }
82 *ptr = (void *)PyByteArray_AS_STRING(self);
83 return Py_SIZE(self);
84 }
85
86 static Py_ssize_t
bytearray_buffer_getsegcount(PyByteArrayObject * self,Py_ssize_t * lenp)87 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
88 {
89 if ( lenp )
90 *lenp = Py_SIZE(self);
91 return 1;
92 }
93
94 static Py_ssize_t
bytearray_buffer_getcharbuf(PyByteArrayObject * self,Py_ssize_t index,const char ** ptr)95 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
96 {
97 if ( index != 0 ) {
98 PyErr_SetString(PyExc_SystemError,
99 "accessing non-existent bytes segment");
100 return -1;
101 }
102 *ptr = PyByteArray_AS_STRING(self);
103 return Py_SIZE(self);
104 }
105
106 static int
bytearray_getbuffer(PyByteArrayObject * obj,Py_buffer * view,int flags)107 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
108 {
109 int ret;
110 void *ptr;
111 if (view == NULL) {
112 obj->ob_exports++;
113 return 0;
114 }
115 ptr = (void *) PyByteArray_AS_STRING(obj);
116 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
117 if (ret >= 0) {
118 obj->ob_exports++;
119 }
120 return ret;
121 }
122
123 static void
bytearray_releasebuffer(PyByteArrayObject * obj,Py_buffer * view)124 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
125 {
126 obj->ob_exports--;
127 }
128
129 static Py_ssize_t
_getbuffer(PyObject * obj,Py_buffer * view)130 _getbuffer(PyObject *obj, Py_buffer *view)
131 {
132 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
133
134 if (buffer == NULL || buffer->bf_getbuffer == NULL)
135 {
136 PyErr_Format(PyExc_TypeError,
137 "Type %.100s doesn't support the buffer API",
138 Py_TYPE(obj)->tp_name);
139 return -1;
140 }
141
142 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
143 return -1;
144 return view->len;
145 }
146
147 static int
_canresize(PyByteArrayObject * self)148 _canresize(PyByteArrayObject *self)
149 {
150 if (self->ob_exports > 0) {
151 PyErr_SetString(PyExc_BufferError,
152 "Existing exports of data: object cannot be re-sized");
153 return 0;
154 }
155 return 1;
156 }
157
158 /* Direct API functions */
159
160 PyObject *
PyByteArray_FromObject(PyObject * input)161 PyByteArray_FromObject(PyObject *input)
162 {
163 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
164 input, NULL);
165 }
166
167 static PyObject *
_PyByteArray_FromBufferObject(PyObject * obj)168 _PyByteArray_FromBufferObject(PyObject *obj)
169 {
170 PyObject *result;
171 Py_buffer view;
172
173 if (PyObject_GetBuffer(obj, &view, PyBUF_FULL_RO) < 0) {
174 return NULL;
175 }
176 result = PyByteArray_FromStringAndSize(NULL, view.len);
177 if (result != NULL &&
178 PyBuffer_ToContiguous(PyByteArray_AS_STRING(result),
179 &view, view.len, 'C') < 0)
180 {
181 Py_CLEAR(result);
182 }
183 PyBuffer_Release(&view);
184 return result;
185 }
186
187 PyObject *
PyByteArray_FromStringAndSize(const char * bytes,Py_ssize_t size)188 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
189 {
190 PyByteArrayObject *new;
191 Py_ssize_t alloc;
192
193 if (size < 0) {
194 PyErr_SetString(PyExc_SystemError,
195 "Negative size passed to PyByteArray_FromStringAndSize");
196 return NULL;
197 }
198
199 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
200 if (new == NULL)
201 return NULL;
202
203 if (size == 0) {
204 new->ob_bytes = NULL;
205 alloc = 0;
206 }
207 else {
208 alloc = size + 1;
209 new->ob_bytes = PyMem_Malloc(alloc);
210 if (new->ob_bytes == NULL) {
211 Py_DECREF(new);
212 return PyErr_NoMemory();
213 }
214 if (bytes != NULL && size > 0)
215 memcpy(new->ob_bytes, bytes, size);
216 new->ob_bytes[size] = '\0'; /* Trailing null byte */
217 }
218 Py_SIZE(new) = size;
219 new->ob_alloc = alloc;
220 new->ob_exports = 0;
221
222 return (PyObject *)new;
223 }
224
225 Py_ssize_t
PyByteArray_Size(PyObject * self)226 PyByteArray_Size(PyObject *self)
227 {
228 assert(self != NULL);
229 assert(PyByteArray_Check(self));
230
231 return PyByteArray_GET_SIZE(self);
232 }
233
234 char *
PyByteArray_AsString(PyObject * self)235 PyByteArray_AsString(PyObject *self)
236 {
237 assert(self != NULL);
238 assert(PyByteArray_Check(self));
239
240 return PyByteArray_AS_STRING(self);
241 }
242
243 int
PyByteArray_Resize(PyObject * self,Py_ssize_t size)244 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
245 {
246 void *sval;
247 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
248
249 assert(self != NULL);
250 assert(PyByteArray_Check(self));
251 assert(size >= 0);
252
253 if (size == Py_SIZE(self)) {
254 return 0;
255 }
256 if (!_canresize((PyByteArrayObject *)self)) {
257 return -1;
258 }
259
260 if (size < alloc / 2) {
261 /* Major downsize; resize down to exact size */
262 alloc = size + 1;
263 }
264 else if (size < alloc) {
265 /* Within allocated size; quick exit */
266 Py_SIZE(self) = size;
267 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
268 return 0;
269 }
270 else if (size <= alloc * 1.125) {
271 /* Moderate upsize; overallocate similar to list_resize() */
272 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
273 }
274 else {
275 /* Major upsize; resize up to exact size */
276 alloc = size + 1;
277 }
278
279 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
280 if (sval == NULL) {
281 PyErr_NoMemory();
282 return -1;
283 }
284
285 ((PyByteArrayObject *)self)->ob_bytes = sval;
286 Py_SIZE(self) = size;
287 ((PyByteArrayObject *)self)->ob_alloc = alloc;
288 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
289
290 return 0;
291 }
292
293 PyObject *
PyByteArray_Concat(PyObject * a,PyObject * b)294 PyByteArray_Concat(PyObject *a, PyObject *b)
295 {
296 Py_buffer va, vb;
297 PyByteArrayObject *result = NULL;
298
299 va.len = -1;
300 vb.len = -1;
301 if (_getbuffer(a, &va) < 0 ||
302 _getbuffer(b, &vb) < 0) {
303 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
304 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
305 goto done;
306 }
307
308 if (va.len > PY_SSIZE_T_MAX - vb.len) {
309 PyErr_NoMemory();
310 goto done;
311 }
312
313 result = (PyByteArrayObject *) \
314 PyByteArray_FromStringAndSize(NULL, va.len + vb.len);
315 if (result != NULL) {
316 memcpy(result->ob_bytes, va.buf, va.len);
317 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
318 }
319
320 done:
321 if (va.len != -1)
322 PyBuffer_Release(&va);
323 if (vb.len != -1)
324 PyBuffer_Release(&vb);
325 return (PyObject *)result;
326 }
327
328 /* Functions stuffed into the type object */
329
330 static Py_ssize_t
bytearray_length(PyByteArrayObject * self)331 bytearray_length(PyByteArrayObject *self)
332 {
333 return Py_SIZE(self);
334 }
335
336 static PyObject *
bytearray_iconcat(PyByteArrayObject * self,PyObject * other)337 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
338 {
339 Py_ssize_t mysize;
340 Py_ssize_t size;
341 Py_buffer vo;
342
343 if (_getbuffer(other, &vo) < 0) {
344 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
345 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
346 return NULL;
347 }
348
349 mysize = Py_SIZE(self);
350 if (mysize > PY_SSIZE_T_MAX - vo.len) {
351 PyBuffer_Release(&vo);
352 return PyErr_NoMemory();
353 }
354 size = mysize + vo.len;
355 if (size < self->ob_alloc) {
356 Py_SIZE(self) = size;
357 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
358 }
359 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
360 PyBuffer_Release(&vo);
361 return NULL;
362 }
363 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
364 PyBuffer_Release(&vo);
365 Py_INCREF(self);
366 return (PyObject *)self;
367 }
368
369 static PyObject *
bytearray_repeat(PyByteArrayObject * self,Py_ssize_t count)370 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
371 {
372 PyByteArrayObject *result;
373 Py_ssize_t mysize;
374 Py_ssize_t size;
375
376 if (count < 0)
377 count = 0;
378 mysize = Py_SIZE(self);
379 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
380 return PyErr_NoMemory();
381 size = mysize * count;
382 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
383 if (result != NULL && size != 0) {
384 if (mysize == 1)
385 memset(result->ob_bytes, self->ob_bytes[0], size);
386 else {
387 Py_ssize_t i;
388 for (i = 0; i < count; i++)
389 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
390 }
391 }
392 return (PyObject *)result;
393 }
394
395 static PyObject *
bytearray_irepeat(PyByteArrayObject * self,Py_ssize_t count)396 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
397 {
398 Py_ssize_t mysize;
399 Py_ssize_t size;
400
401 if (count < 0)
402 count = 0;
403 mysize = Py_SIZE(self);
404 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
405 return PyErr_NoMemory();
406 size = mysize * count;
407 if (size < self->ob_alloc) {
408 Py_SIZE(self) = size;
409 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
410 }
411 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
412 return NULL;
413
414 if (mysize == 1)
415 memset(self->ob_bytes, self->ob_bytes[0], size);
416 else {
417 Py_ssize_t i;
418 for (i = 1; i < count; i++)
419 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
420 }
421
422 Py_INCREF(self);
423 return (PyObject *)self;
424 }
425
426 static PyObject *
bytearray_getitem(PyByteArrayObject * self,Py_ssize_t i)427 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
428 {
429 if (i < 0)
430 i += Py_SIZE(self);
431 if (i < 0 || i >= Py_SIZE(self)) {
432 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
433 return NULL;
434 }
435 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
436 }
437
438 static PyObject *
bytearray_subscript(PyByteArrayObject * self,PyObject * index)439 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
440 {
441 if (PyIndex_Check(index)) {
442 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
443
444 if (i == -1 && PyErr_Occurred())
445 return NULL;
446
447 if (i < 0)
448 i += PyByteArray_GET_SIZE(self);
449
450 if (i < 0 || i >= Py_SIZE(self)) {
451 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
452 return NULL;
453 }
454 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
455 }
456 else if (PySlice_Check(index)) {
457 Py_ssize_t start, stop, step, slicelength, cur, i;
458 if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
459 return NULL;
460 }
461 slicelength = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self),
462 &start, &stop, step);
463
464 if (slicelength <= 0)
465 return PyByteArray_FromStringAndSize("", 0);
466 else if (step == 1) {
467 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
468 slicelength);
469 }
470 else {
471 char *source_buf = PyByteArray_AS_STRING(self);
472 char *result_buf = (char *)PyMem_Malloc(slicelength);
473 PyObject *result;
474
475 if (result_buf == NULL)
476 return PyErr_NoMemory();
477
478 for (cur = start, i = 0; i < slicelength;
479 cur += step, i++) {
480 result_buf[i] = source_buf[cur];
481 }
482 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
483 PyMem_Free(result_buf);
484 return result;
485 }
486 }
487 else {
488 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
489 return NULL;
490 }
491 }
492
493 static int
bytearray_setslice(PyByteArrayObject * self,Py_ssize_t lo,Py_ssize_t hi,PyObject * values)494 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
495 PyObject *values)
496 {
497 Py_ssize_t avail, needed;
498 void *bytes;
499 Py_buffer vbytes;
500 int res = 0;
501
502 vbytes.len = -1;
503 if (values == (PyObject *)self) {
504 /* Make a copy and call this function recursively */
505 int err;
506 values = PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(values),
507 PyByteArray_GET_SIZE(values));
508 if (values == NULL)
509 return -1;
510 err = bytearray_setslice(self, lo, hi, values);
511 Py_DECREF(values);
512 return err;
513 }
514 if (values == NULL) {
515 /* del b[lo:hi] */
516 bytes = NULL;
517 needed = 0;
518 }
519 else {
520 if (_getbuffer(values, &vbytes) < 0) {
521 PyErr_Format(PyExc_TypeError,
522 "can't set bytearray slice from %.100s",
523 Py_TYPE(values)->tp_name);
524 return -1;
525 }
526 needed = vbytes.len;
527 bytes = vbytes.buf;
528 }
529
530 if (lo < 0)
531 lo = 0;
532 if (hi < lo)
533 hi = lo;
534 if (hi > Py_SIZE(self))
535 hi = Py_SIZE(self);
536
537 avail = hi - lo;
538 if (avail < 0)
539 lo = hi = avail = 0;
540
541 if (avail != needed) {
542 if (avail > needed) {
543 if (!_canresize(self)) {
544 res = -1;
545 goto finish;
546 }
547 /*
548 0 lo hi old_size
549 | |<----avail----->|<-----tomove------>|
550 | |<-needed->|<-----tomove------>|
551 0 lo new_hi new_size
552 */
553 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
554 Py_SIZE(self) - hi);
555 }
556 /* XXX(nnorwitz): need to verify this can't overflow! */
557 if (PyByteArray_Resize((PyObject *)self,
558 Py_SIZE(self) + needed - avail) < 0) {
559 res = -1;
560 goto finish;
561 }
562 if (avail < needed) {
563 /*
564 0 lo hi old_size
565 | |<-avail->|<-----tomove------>|
566 | |<----needed---->|<-----tomove------>|
567 0 lo new_hi new_size
568 */
569 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
570 Py_SIZE(self) - lo - needed);
571 }
572 }
573
574 if (needed > 0)
575 memcpy(self->ob_bytes + lo, bytes, needed);
576
577
578 finish:
579 if (vbytes.len != -1)
580 PyBuffer_Release(&vbytes);
581 return res;
582 }
583
584 static int
bytearray_setitem(PyByteArrayObject * self,Py_ssize_t i,PyObject * value)585 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
586 {
587 int ival;
588
589 if (i < 0)
590 i += Py_SIZE(self);
591
592 if (i < 0 || i >= Py_SIZE(self)) {
593 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
594 return -1;
595 }
596
597 if (value == NULL)
598 return bytearray_setslice(self, i, i+1, NULL);
599
600 if (!_getbytevalue(value, &ival))
601 return -1;
602
603 self->ob_bytes[i] = ival;
604 return 0;
605 }
606
607 static int
bytearray_ass_subscript(PyByteArrayObject * self,PyObject * index,PyObject * values)608 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
609 {
610 Py_ssize_t start, stop, step, slicelen, needed;
611 char *bytes;
612
613 if (PyIndex_Check(index)) {
614 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
615
616 if (i == -1 && PyErr_Occurred())
617 return -1;
618
619 if (i < 0)
620 i += PyByteArray_GET_SIZE(self);
621
622 if (i < 0 || i >= Py_SIZE(self)) {
623 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
624 return -1;
625 }
626
627 if (values == NULL) {
628 /* Fall through to slice assignment */
629 start = i;
630 stop = i + 1;
631 step = 1;
632 slicelen = 1;
633 }
634 else {
635 int ival;
636 if (!_getbytevalue(values, &ival))
637 return -1;
638 self->ob_bytes[i] = (char)ival;
639 return 0;
640 }
641 }
642 else if (PySlice_Check(index)) {
643 if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
644 return -1;
645 }
646 slicelen = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self), &start,
647 &stop, step);
648 }
649 else {
650 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
651 return -1;
652 }
653
654 if (values == NULL) {
655 bytes = NULL;
656 needed = 0;
657 }
658 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
659 int err;
660 if (PyNumber_Check(values) || PyUnicode_Check(values)) {
661 PyErr_SetString(PyExc_TypeError,
662 "can assign only bytes, buffers, or iterables "
663 "of ints in range(0, 256)");
664 return -1;
665 }
666 /* Make a copy and call this function recursively */
667 values = PyByteArray_FromObject(values);
668 if (values == NULL)
669 return -1;
670 err = bytearray_ass_subscript(self, index, values);
671 Py_DECREF(values);
672 return err;
673 }
674 else {
675 assert(PyByteArray_Check(values));
676 bytes = ((PyByteArrayObject *)values)->ob_bytes;
677 needed = Py_SIZE(values);
678 }
679 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
680 if ((step < 0 && start < stop) ||
681 (step > 0 && start > stop))
682 stop = start;
683 if (step == 1) {
684 if (slicelen != needed) {
685 if (!_canresize(self))
686 return -1;
687 if (slicelen > needed) {
688 /*
689 0 start stop old_size
690 | |<---slicelen--->|<-----tomove------>|
691 | |<-needed->|<-----tomove------>|
692 0 lo new_hi new_size
693 */
694 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
695 Py_SIZE(self) - stop);
696 }
697 if (PyByteArray_Resize((PyObject *)self,
698 Py_SIZE(self) + needed - slicelen) < 0)
699 return -1;
700 if (slicelen < needed) {
701 /*
702 0 lo hi old_size
703 | |<-avail->|<-----tomove------>|
704 | |<----needed---->|<-----tomove------>|
705 0 lo new_hi new_size
706 */
707 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
708 Py_SIZE(self) - start - needed);
709 }
710 }
711
712 if (needed > 0)
713 memcpy(self->ob_bytes + start, bytes, needed);
714
715 return 0;
716 }
717 else {
718 if (needed == 0) {
719 /* Delete slice */
720 size_t cur;
721 Py_ssize_t i;
722
723 if (!_canresize(self))
724 return -1;
725 if (step < 0) {
726 stop = start + 1;
727 start = stop + step * (slicelen - 1) - 1;
728 step = -step;
729 }
730 for (cur = start, i = 0;
731 i < slicelen; cur += step, i++) {
732 Py_ssize_t lim = step - 1;
733
734 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
735 lim = PyByteArray_GET_SIZE(self) - cur - 1;
736
737 memmove(self->ob_bytes + cur - i,
738 self->ob_bytes + cur + 1, lim);
739 }
740 /* Move the tail of the bytes, in one chunk */
741 cur = start + slicelen*step;
742 if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
743 memmove(self->ob_bytes + cur - slicelen,
744 self->ob_bytes + cur,
745 PyByteArray_GET_SIZE(self) - cur);
746 }
747 if (PyByteArray_Resize((PyObject *)self,
748 PyByteArray_GET_SIZE(self) - slicelen) < 0)
749 return -1;
750
751 return 0;
752 }
753 else {
754 /* Assign slice */
755 Py_ssize_t cur, i;
756
757 if (needed != slicelen) {
758 PyErr_Format(PyExc_ValueError,
759 "attempt to assign bytes of size %zd "
760 "to extended slice of size %zd",
761 needed, slicelen);
762 return -1;
763 }
764 for (cur = start, i = 0; i < slicelen; cur += step, i++)
765 self->ob_bytes[cur] = bytes[i];
766 return 0;
767 }
768 }
769 }
770
771 static int
bytearray_init(PyByteArrayObject * self,PyObject * args,PyObject * kwds)772 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
773 {
774 static char *kwlist[] = {"source", "encoding", "errors", 0};
775 PyObject *arg = NULL;
776 const char *encoding = NULL;
777 const char *errors = NULL;
778 Py_ssize_t count;
779 PyObject *it;
780 PyObject *(*iternext)(PyObject *);
781
782 if (Py_SIZE(self) != 0) {
783 /* Empty previous contents (yes, do this first of all!) */
784 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
785 return -1;
786 }
787
788 /* Parse arguments */
789 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
790 &arg, &encoding, &errors))
791 return -1;
792
793 /* Make a quick exit if no first argument */
794 if (arg == NULL) {
795 if (encoding != NULL || errors != NULL) {
796 PyErr_SetString(PyExc_TypeError,
797 "encoding or errors without sequence argument");
798 return -1;
799 }
800 return 0;
801 }
802
803 if (PyBytes_Check(arg)) {
804 PyObject *new, *encoded;
805 if (encoding != NULL) {
806 encoded = _PyCodec_EncodeText(arg, encoding, errors);
807 if (encoded == NULL)
808 return -1;
809 assert(PyBytes_Check(encoded));
810 }
811 else {
812 encoded = arg;
813 Py_INCREF(arg);
814 }
815 new = bytearray_iconcat(self, arg);
816 Py_DECREF(encoded);
817 if (new == NULL)
818 return -1;
819 Py_DECREF(new);
820 return 0;
821 }
822
823 #ifdef Py_USING_UNICODE
824 if (PyUnicode_Check(arg)) {
825 /* Encode via the codec registry */
826 PyObject *encoded, *new;
827 if (encoding == NULL) {
828 PyErr_SetString(PyExc_TypeError,
829 "unicode argument without an encoding");
830 return -1;
831 }
832 encoded = _PyCodec_EncodeText(arg, encoding, errors);
833 if (encoded == NULL)
834 return -1;
835 assert(PyBytes_Check(encoded));
836 new = bytearray_iconcat(self, encoded);
837 Py_DECREF(encoded);
838 if (new == NULL)
839 return -1;
840 Py_DECREF(new);
841 return 0;
842 }
843 #endif
844
845 /* If it's not unicode, there can't be encoding or errors */
846 if (encoding != NULL || errors != NULL) {
847 PyErr_SetString(PyExc_TypeError,
848 "encoding or errors without a string argument");
849 return -1;
850 }
851
852 /* Is it an int? */
853 count = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
854 if (count == -1 && PyErr_Occurred()) {
855 if (PyErr_ExceptionMatches(PyExc_OverflowError))
856 return -1;
857 PyErr_Clear();
858 }
859 else if (count < 0) {
860 PyErr_SetString(PyExc_ValueError, "negative count");
861 return -1;
862 }
863 else {
864 if (count > 0) {
865 if (PyByteArray_Resize((PyObject *)self, count))
866 return -1;
867 memset(self->ob_bytes, 0, count);
868 }
869 return 0;
870 }
871
872 /* Use the buffer API */
873 if (PyObject_CheckBuffer(arg)) {
874 Py_ssize_t size;
875 Py_buffer view;
876 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
877 return -1;
878 size = view.len;
879 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
880 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
881 goto fail;
882 PyBuffer_Release(&view);
883 return 0;
884 fail:
885 PyBuffer_Release(&view);
886 return -1;
887 }
888
889 /* XXX Optimize this if the arguments is a list, tuple */
890
891 /* Get the iterator */
892 it = PyObject_GetIter(arg);
893 if (it == NULL)
894 return -1;
895 iternext = *Py_TYPE(it)->tp_iternext;
896
897 /* Run the iterator to exhaustion */
898 for (;;) {
899 PyObject *item;
900 int rc, value;
901
902 /* Get the next item */
903 item = iternext(it);
904 if (item == NULL) {
905 if (PyErr_Occurred()) {
906 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
907 goto error;
908 PyErr_Clear();
909 }
910 break;
911 }
912
913 /* Interpret it as an int (__index__) */
914 rc = _getbytevalue(item, &value);
915 Py_DECREF(item);
916 if (!rc)
917 goto error;
918
919 /* Append the byte */
920 if (Py_SIZE(self) + 1 < self->ob_alloc) {
921 Py_SIZE(self)++;
922 PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0';
923 }
924 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
925 goto error;
926 self->ob_bytes[Py_SIZE(self)-1] = value;
927 }
928
929 /* Clean up and return success */
930 Py_DECREF(it);
931 return 0;
932
933 error:
934 /* Error handling when it != NULL */
935 Py_DECREF(it);
936 return -1;
937 }
938
939 /* Mostly copied from string_repr, but without the
940 "smart quote" functionality. */
941 static PyObject *
bytearray_repr(PyByteArrayObject * self)942 bytearray_repr(PyByteArrayObject *self)
943 {
944 static const char *hexdigits = "0123456789abcdef";
945 const char *quote_prefix = "bytearray(b";
946 const char *quote_postfix = ")";
947 Py_ssize_t length = Py_SIZE(self);
948 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
949 size_t newsize;
950 PyObject *v;
951 if (length > (PY_SSIZE_T_MAX - 14) / 4) {
952 PyErr_SetString(PyExc_OverflowError,
953 "bytearray object is too large to make repr");
954 return NULL;
955 }
956 newsize = 14 + 4 * length;
957 v = PyString_FromStringAndSize(NULL, newsize);
958 if (v == NULL) {
959 return NULL;
960 }
961 else {
962 register Py_ssize_t i;
963 register char c;
964 register char *p;
965 int quote;
966
967 /* Figure out which quote to use; single is preferred */
968 quote = '\'';
969 {
970 char *test, *start;
971 start = PyByteArray_AS_STRING(self);
972 for (test = start; test < start+length; ++test) {
973 if (*test == '"') {
974 quote = '\''; /* back to single */
975 goto decided;
976 }
977 else if (*test == '\'')
978 quote = '"';
979 }
980 decided:
981 ;
982 }
983
984 p = PyString_AS_STRING(v);
985 while (*quote_prefix)
986 *p++ = *quote_prefix++;
987 *p++ = quote;
988
989 for (i = 0; i < length; i++) {
990 /* There's at least enough room for a hex escape
991 and a closing quote. */
992 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
993 c = self->ob_bytes[i];
994 if (c == '\'' || c == '\\')
995 *p++ = '\\', *p++ = c;
996 else if (c == '\t')
997 *p++ = '\\', *p++ = 't';
998 else if (c == '\n')
999 *p++ = '\\', *p++ = 'n';
1000 else if (c == '\r')
1001 *p++ = '\\', *p++ = 'r';
1002 else if (c == 0)
1003 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
1004 else if (c < ' ' || c >= 0x7f) {
1005 *p++ = '\\';
1006 *p++ = 'x';
1007 *p++ = hexdigits[(c & 0xf0) >> 4];
1008 *p++ = hexdigits[c & 0xf];
1009 }
1010 else
1011 *p++ = c;
1012 }
1013 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
1014 *p++ = quote;
1015 while (*quote_postfix) {
1016 *p++ = *quote_postfix++;
1017 }
1018 *p = '\0';
1019 /* v is cleared on error */
1020 (void)_PyString_Resize(&v, (p - PyString_AS_STRING(v)));
1021 return v;
1022 }
1023 }
1024
1025 static PyObject *
bytearray_str(PyObject * op)1026 bytearray_str(PyObject *op)
1027 {
1028 #if 0
1029 if (Py_BytesWarningFlag) {
1030 if (PyErr_WarnEx(PyExc_BytesWarning,
1031 "str() on a bytearray instance", 1))
1032 return NULL;
1033 }
1034 return bytearray_repr((PyByteArrayObject*)op);
1035 #endif
1036 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1037 }
1038
1039 static PyObject *
bytearray_richcompare(PyObject * self,PyObject * other,int op)1040 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1041 {
1042 Py_ssize_t self_size, other_size;
1043 Py_buffer self_bytes, other_bytes;
1044 PyObject *res;
1045 Py_ssize_t minsize;
1046 int cmp, rc;
1047
1048 /* Bytes can be compared to anything that supports the (binary)
1049 buffer API. Except that a comparison with Unicode is always an
1050 error, even if the comparison is for equality. */
1051 #ifdef Py_USING_UNICODE
1052 rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type);
1053 if (!rc)
1054 rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type);
1055 if (rc < 0)
1056 return NULL;
1057 if (rc) {
1058 if (Py_BytesWarningFlag && op == Py_EQ) {
1059 if (PyErr_WarnEx(PyExc_BytesWarning,
1060 "Comparison between bytearray and string", 1))
1061 return NULL;
1062 }
1063
1064 Py_INCREF(Py_NotImplemented);
1065 return Py_NotImplemented;
1066 }
1067 #endif
1068
1069 self_size = _getbuffer(self, &self_bytes);
1070 if (self_size < 0) {
1071 PyErr_Clear();
1072 Py_INCREF(Py_NotImplemented);
1073 return Py_NotImplemented;
1074 }
1075
1076 other_size = _getbuffer(other, &other_bytes);
1077 if (other_size < 0) {
1078 PyErr_Clear();
1079 PyBuffer_Release(&self_bytes);
1080 Py_INCREF(Py_NotImplemented);
1081 return Py_NotImplemented;
1082 }
1083
1084 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1085 /* Shortcut: if the lengths differ, the objects differ */
1086 cmp = (op == Py_NE);
1087 }
1088 else {
1089 minsize = self_size;
1090 if (other_size < minsize)
1091 minsize = other_size;
1092
1093 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1094 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1095
1096 if (cmp == 0) {
1097 if (self_size < other_size)
1098 cmp = -1;
1099 else if (self_size > other_size)
1100 cmp = 1;
1101 }
1102
1103 switch (op) {
1104 case Py_LT: cmp = cmp < 0; break;
1105 case Py_LE: cmp = cmp <= 0; break;
1106 case Py_EQ: cmp = cmp == 0; break;
1107 case Py_NE: cmp = cmp != 0; break;
1108 case Py_GT: cmp = cmp > 0; break;
1109 case Py_GE: cmp = cmp >= 0; break;
1110 }
1111 }
1112
1113 res = cmp ? Py_True : Py_False;
1114 PyBuffer_Release(&self_bytes);
1115 PyBuffer_Release(&other_bytes);
1116 Py_INCREF(res);
1117 return res;
1118 }
1119
1120 static void
bytearray_dealloc(PyByteArrayObject * self)1121 bytearray_dealloc(PyByteArrayObject *self)
1122 {
1123 if (self->ob_exports > 0) {
1124 PyErr_SetString(PyExc_SystemError,
1125 "deallocated bytearray object has exported buffers");
1126 PyErr_Print();
1127 }
1128 if (self->ob_bytes != 0) {
1129 PyMem_Free(self->ob_bytes);
1130 }
1131 Py_TYPE(self)->tp_free((PyObject *)self);
1132 }
1133
1134
1135 /* -------------------------------------------------------------------- */
1136 /* Methods */
1137
1138 #define STRINGLIB_CHAR char
1139 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1140 #define STRINGLIB_STR PyByteArray_AS_STRING
1141 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1142 #define STRINGLIB_ISSPACE Py_ISSPACE
1143 #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
1144 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1145 #define STRINGLIB_MUTABLE 1
1146
1147 #include "stringlib/fastsearch.h"
1148 #include "stringlib/count.h"
1149 #include "stringlib/find.h"
1150 #include "stringlib/partition.h"
1151 #include "stringlib/split.h"
1152 #include "stringlib/ctype.h"
1153 #include "stringlib/transmogrify.h"
1154
1155
1156 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1157 were copied from the old char* style string object. */
1158
1159 /* helper macro to fixup start/end slice values */
1160 #define ADJUST_INDICES(start, end, len) \
1161 if (end > len) \
1162 end = len; \
1163 else if (end < 0) { \
1164 end += len; \
1165 if (end < 0) \
1166 end = 0; \
1167 } \
1168 if (start < 0) { \
1169 start += len; \
1170 if (start < 0) \
1171 start = 0; \
1172 }
1173
1174 Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject * self,PyObject * args,int dir)1175 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1176 {
1177 PyObject *subobj;
1178 Py_buffer subbuf;
1179 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1180 Py_ssize_t res;
1181
1182 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1183 args, &subobj, &start, &end))
1184 return -2;
1185 if (_getbuffer(subobj, &subbuf) < 0)
1186 return -2;
1187 if (dir > 0)
1188 res = stringlib_find_slice(
1189 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1190 subbuf.buf, subbuf.len, start, end);
1191 else
1192 res = stringlib_rfind_slice(
1193 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1194 subbuf.buf, subbuf.len, start, end);
1195 PyBuffer_Release(&subbuf);
1196 return res;
1197 }
1198
1199 PyDoc_STRVAR(find__doc__,
1200 "B.find(sub [,start [,end]]) -> int\n\
1201 \n\
1202 Return the lowest index in B where subsection sub is found,\n\
1203 such that sub is contained within B[start,end]. Optional\n\
1204 arguments start and end are interpreted as in slice notation.\n\
1205 \n\
1206 Return -1 on failure.");
1207
1208 static PyObject *
bytearray_find(PyByteArrayObject * self,PyObject * args)1209 bytearray_find(PyByteArrayObject *self, PyObject *args)
1210 {
1211 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1212 if (result == -2)
1213 return NULL;
1214 return PyInt_FromSsize_t(result);
1215 }
1216
1217 PyDoc_STRVAR(count__doc__,
1218 "B.count(sub [,start [,end]]) -> int\n\
1219 \n\
1220 Return the number of non-overlapping occurrences of subsection sub in\n\
1221 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1222 as in slice notation.");
1223
1224 static PyObject *
bytearray_count(PyByteArrayObject * self,PyObject * args)1225 bytearray_count(PyByteArrayObject *self, PyObject *args)
1226 {
1227 PyObject *sub_obj;
1228 const char *str = PyByteArray_AS_STRING(self);
1229 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1230 Py_buffer vsub;
1231 PyObject *count_obj;
1232
1233 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
1234 return NULL;
1235
1236 if (_getbuffer(sub_obj, &vsub) < 0)
1237 return NULL;
1238
1239 ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
1240
1241 count_obj = PyInt_FromSsize_t(
1242 stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
1243 );
1244 PyBuffer_Release(&vsub);
1245 return count_obj;
1246 }
1247
1248
1249 PyDoc_STRVAR(index__doc__,
1250 "B.index(sub [,start [,end]]) -> int\n\
1251 \n\
1252 Like B.find() but raise ValueError when the subsection is not found.");
1253
1254 static PyObject *
bytearray_index(PyByteArrayObject * self,PyObject * args)1255 bytearray_index(PyByteArrayObject *self, PyObject *args)
1256 {
1257 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1258 if (result == -2)
1259 return NULL;
1260 if (result == -1) {
1261 PyErr_SetString(PyExc_ValueError,
1262 "subsection not found");
1263 return NULL;
1264 }
1265 return PyInt_FromSsize_t(result);
1266 }
1267
1268
1269 PyDoc_STRVAR(rfind__doc__,
1270 "B.rfind(sub [,start [,end]]) -> int\n\
1271 \n\
1272 Return the highest index in B where subsection sub is found,\n\
1273 such that sub is contained within B[start,end]. Optional\n\
1274 arguments start and end are interpreted as in slice notation.\n\
1275 \n\
1276 Return -1 on failure.");
1277
1278 static PyObject *
bytearray_rfind(PyByteArrayObject * self,PyObject * args)1279 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1280 {
1281 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1282 if (result == -2)
1283 return NULL;
1284 return PyInt_FromSsize_t(result);
1285 }
1286
1287
1288 PyDoc_STRVAR(rindex__doc__,
1289 "B.rindex(sub [,start [,end]]) -> int\n\
1290 \n\
1291 Like B.rfind() but raise ValueError when the subsection is not found.");
1292
1293 static PyObject *
bytearray_rindex(PyByteArrayObject * self,PyObject * args)1294 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1295 {
1296 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1297 if (result == -2)
1298 return NULL;
1299 if (result == -1) {
1300 PyErr_SetString(PyExc_ValueError,
1301 "subsection not found");
1302 return NULL;
1303 }
1304 return PyInt_FromSsize_t(result);
1305 }
1306
1307
1308 static int
bytearray_contains(PyObject * self,PyObject * arg)1309 bytearray_contains(PyObject *self, PyObject *arg)
1310 {
1311 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1312 if (ival == -1 && PyErr_Occurred()) {
1313 Py_buffer varg;
1314 int pos;
1315 PyErr_Clear();
1316 if (_getbuffer(arg, &varg) < 0)
1317 return -1;
1318 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1319 varg.buf, varg.len, 0);
1320 PyBuffer_Release(&varg);
1321 return pos >= 0;
1322 }
1323 if (ival < 0 || ival >= 256) {
1324 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1325 return -1;
1326 }
1327
1328 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1329 }
1330
1331
1332 /* Matches the end (direction >= 0) or start (direction < 0) of self
1333 * against substr, using the start and end arguments. Returns
1334 * -1 on error, 0 if not found and 1 if found.
1335 */
1336 Py_LOCAL(int)
_bytearray_tailmatch(PyByteArrayObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)1337 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1338 Py_ssize_t end, int direction)
1339 {
1340 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1341 const char* str;
1342 Py_buffer vsubstr;
1343 int rv = 0;
1344
1345 str = PyByteArray_AS_STRING(self);
1346
1347 if (_getbuffer(substr, &vsubstr) < 0)
1348 return -1;
1349
1350 ADJUST_INDICES(start, end, len);
1351
1352 if (direction < 0) {
1353 /* startswith */
1354 if (start+vsubstr.len > len) {
1355 goto done;
1356 }
1357 } else {
1358 /* endswith */
1359 if (end-start < vsubstr.len || start > len) {
1360 goto done;
1361 }
1362
1363 if (end-vsubstr.len > start)
1364 start = end - vsubstr.len;
1365 }
1366 if (end-start >= vsubstr.len)
1367 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1368
1369 done:
1370 PyBuffer_Release(&vsubstr);
1371 return rv;
1372 }
1373
1374
1375 PyDoc_STRVAR(startswith__doc__,
1376 "B.startswith(prefix [,start [,end]]) -> bool\n\
1377 \n\
1378 Return True if B starts with the specified prefix, False otherwise.\n\
1379 With optional start, test B beginning at that position.\n\
1380 With optional end, stop comparing B at that position.\n\
1381 prefix can also be a tuple of strings to try.");
1382
1383 static PyObject *
bytearray_startswith(PyByteArrayObject * self,PyObject * args)1384 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1385 {
1386 Py_ssize_t start = 0;
1387 Py_ssize_t end = PY_SSIZE_T_MAX;
1388 PyObject *subobj;
1389 int result;
1390
1391 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
1392 return NULL;
1393 if (PyTuple_Check(subobj)) {
1394 Py_ssize_t i;
1395 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1396 result = _bytearray_tailmatch(self,
1397 PyTuple_GET_ITEM(subobj, i),
1398 start, end, -1);
1399 if (result == -1)
1400 return NULL;
1401 else if (result) {
1402 Py_RETURN_TRUE;
1403 }
1404 }
1405 Py_RETURN_FALSE;
1406 }
1407 result = _bytearray_tailmatch(self, subobj, start, end, -1);
1408 if (result == -1)
1409 return NULL;
1410 else
1411 return PyBool_FromLong(result);
1412 }
1413
1414 PyDoc_STRVAR(endswith__doc__,
1415 "B.endswith(suffix [,start [,end]]) -> bool\n\
1416 \n\
1417 Return True if B ends with the specified suffix, False otherwise.\n\
1418 With optional start, test B beginning at that position.\n\
1419 With optional end, stop comparing B at that position.\n\
1420 suffix can also be a tuple of strings to try.");
1421
1422 static PyObject *
bytearray_endswith(PyByteArrayObject * self,PyObject * args)1423 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1424 {
1425 Py_ssize_t start = 0;
1426 Py_ssize_t end = PY_SSIZE_T_MAX;
1427 PyObject *subobj;
1428 int result;
1429
1430 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
1431 return NULL;
1432 if (PyTuple_Check(subobj)) {
1433 Py_ssize_t i;
1434 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1435 result = _bytearray_tailmatch(self,
1436 PyTuple_GET_ITEM(subobj, i),
1437 start, end, +1);
1438 if (result == -1)
1439 return NULL;
1440 else if (result) {
1441 Py_RETURN_TRUE;
1442 }
1443 }
1444 Py_RETURN_FALSE;
1445 }
1446 result = _bytearray_tailmatch(self, subobj, start, end, +1);
1447 if (result == -1)
1448 return NULL;
1449 else
1450 return PyBool_FromLong(result);
1451 }
1452
1453
1454 PyDoc_STRVAR(translate__doc__,
1455 "B.translate(table[, deletechars]) -> bytearray\n\
1456 \n\
1457 Return a copy of B, where all characters occurring in the\n\
1458 optional argument deletechars are removed, and the remaining\n\
1459 characters have been mapped through the given translation\n\
1460 table, which must be a bytes object of length 256.");
1461
1462 static PyObject *
bytearray_translate(PyByteArrayObject * self,PyObject * args)1463 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1464 {
1465 register char *input, *output;
1466 register const char *table;
1467 register Py_ssize_t i, c;
1468 PyObject *input_obj = (PyObject*)self;
1469 const char *output_start;
1470 Py_ssize_t inlen;
1471 PyObject *result = NULL;
1472 int trans_table[256];
1473 PyObject *tableobj = NULL, *delobj = NULL;
1474 Py_buffer vtable, vdel;
1475
1476 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1477 &tableobj, &delobj))
1478 return NULL;
1479
1480 if (tableobj == Py_None) {
1481 table = NULL;
1482 tableobj = NULL;
1483 } else if (_getbuffer(tableobj, &vtable) < 0) {
1484 return NULL;
1485 } else {
1486 if (vtable.len != 256) {
1487 PyErr_SetString(PyExc_ValueError,
1488 "translation table must be 256 characters long");
1489 PyBuffer_Release(&vtable);
1490 return NULL;
1491 }
1492 table = (const char*)vtable.buf;
1493 }
1494
1495 if (delobj != NULL) {
1496 if (_getbuffer(delobj, &vdel) < 0) {
1497 if (tableobj != NULL)
1498 PyBuffer_Release(&vtable);
1499 return NULL;
1500 }
1501 }
1502 else {
1503 vdel.buf = NULL;
1504 vdel.len = 0;
1505 }
1506
1507 inlen = PyByteArray_GET_SIZE(input_obj);
1508 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1509 if (result == NULL)
1510 goto done;
1511 output_start = output = PyByteArray_AsString(result);
1512 input = PyByteArray_AS_STRING(input_obj);
1513
1514 if (vdel.len == 0 && table != NULL) {
1515 /* If no deletions are required, use faster code */
1516 for (i = inlen; --i >= 0; ) {
1517 c = Py_CHARMASK(*input++);
1518 *output++ = table[c];
1519 }
1520 goto done;
1521 }
1522
1523 if (table == NULL) {
1524 for (i = 0; i < 256; i++)
1525 trans_table[i] = Py_CHARMASK(i);
1526 } else {
1527 for (i = 0; i < 256; i++)
1528 trans_table[i] = Py_CHARMASK(table[i]);
1529 }
1530
1531 for (i = 0; i < vdel.len; i++)
1532 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1533
1534 for (i = inlen; --i >= 0; ) {
1535 c = Py_CHARMASK(*input++);
1536 if (trans_table[c] != -1)
1537 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1538 continue;
1539 }
1540 /* Fix the size of the resulting string */
1541 if (inlen > 0)
1542 PyByteArray_Resize(result, output - output_start);
1543
1544 done:
1545 if (tableobj != NULL)
1546 PyBuffer_Release(&vtable);
1547 if (delobj != NULL)
1548 PyBuffer_Release(&vdel);
1549 return result;
1550 }
1551
1552
1553 /* find and count characters and substrings */
1554
1555 #define findchar(target, target_len, c) \
1556 ((char *)memchr((const void *)(target), c, target_len))
1557
1558
1559 /* Bytes ops must return a string, create a copy */
1560 Py_LOCAL(PyByteArrayObject *)
return_self(PyByteArrayObject * self)1561 return_self(PyByteArrayObject *self)
1562 {
1563 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1564 PyByteArray_AS_STRING(self),
1565 PyByteArray_GET_SIZE(self));
1566 }
1567
1568 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)1569 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1570 {
1571 Py_ssize_t count=0;
1572 const char *start=target;
1573 const char *end=target+target_len;
1574
1575 while ( (start=findchar(start, end-start, c)) != NULL ) {
1576 count++;
1577 if (count >= maxcount)
1578 break;
1579 start += 1;
1580 }
1581 return count;
1582 }
1583
1584
1585 /* Algorithms for different cases of string replacement */
1586
1587 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1588 Py_LOCAL(PyByteArrayObject *)
replace_interleave(PyByteArrayObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1589 replace_interleave(PyByteArrayObject *self,
1590 const char *to_s, Py_ssize_t to_len,
1591 Py_ssize_t maxcount)
1592 {
1593 char *self_s, *result_s;
1594 Py_ssize_t self_len, result_len;
1595 Py_ssize_t count, i;
1596 PyByteArrayObject *result;
1597
1598 self_len = PyByteArray_GET_SIZE(self);
1599
1600 /* 1 at the end plus 1 after every character;
1601 count = min(maxcount, self_len + 1) */
1602 if (maxcount <= self_len) {
1603 count = maxcount;
1604 }
1605 else {
1606 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1607 count = self_len + 1;
1608 }
1609
1610 /* Check for overflow */
1611 /* result_len = count * to_len + self_len; */
1612 assert(count > 0);
1613 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
1614 PyErr_SetString(PyExc_OverflowError,
1615 "replace bytes is too long");
1616 return NULL;
1617 }
1618 result_len = count * to_len + self_len;
1619 if (! (result = (PyByteArrayObject *)
1620 PyByteArray_FromStringAndSize(NULL, result_len)) )
1621 return NULL;
1622
1623 self_s = PyByteArray_AS_STRING(self);
1624 result_s = PyByteArray_AS_STRING(result);
1625
1626 /* TODO: special case single character, which doesn't need memcpy */
1627
1628 /* Lay the first one down (guaranteed this will occur) */
1629 Py_MEMCPY(result_s, to_s, to_len);
1630 result_s += to_len;
1631 count -= 1;
1632
1633 for (i=0; i<count; i++) {
1634 *result_s++ = *self_s++;
1635 Py_MEMCPY(result_s, to_s, to_len);
1636 result_s += to_len;
1637 }
1638
1639 /* Copy the rest of the original string */
1640 Py_MEMCPY(result_s, self_s, self_len-i);
1641
1642 return result;
1643 }
1644
1645 /* Special case for deleting a single character */
1646 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1647 Py_LOCAL(PyByteArrayObject *)
replace_delete_single_character(PyByteArrayObject * self,char from_c,Py_ssize_t maxcount)1648 replace_delete_single_character(PyByteArrayObject *self,
1649 char from_c, Py_ssize_t maxcount)
1650 {
1651 char *self_s, *result_s;
1652 char *start, *next, *end;
1653 Py_ssize_t self_len, result_len;
1654 Py_ssize_t count;
1655 PyByteArrayObject *result;
1656
1657 self_len = PyByteArray_GET_SIZE(self);
1658 self_s = PyByteArray_AS_STRING(self);
1659
1660 count = countchar(self_s, self_len, from_c, maxcount);
1661 if (count == 0) {
1662 return return_self(self);
1663 }
1664
1665 result_len = self_len - count; /* from_len == 1 */
1666 assert(result_len>=0);
1667
1668 if ( (result = (PyByteArrayObject *)
1669 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1670 return NULL;
1671 result_s = PyByteArray_AS_STRING(result);
1672
1673 start = self_s;
1674 end = self_s + self_len;
1675 while (count-- > 0) {
1676 next = findchar(start, end-start, from_c);
1677 if (next == NULL)
1678 break;
1679 Py_MEMCPY(result_s, start, next-start);
1680 result_s += (next-start);
1681 start = next+1;
1682 }
1683 Py_MEMCPY(result_s, start, end-start);
1684
1685 return result;
1686 }
1687
1688 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1689
1690 Py_LOCAL(PyByteArrayObject *)
replace_delete_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)1691 replace_delete_substring(PyByteArrayObject *self,
1692 const char *from_s, Py_ssize_t from_len,
1693 Py_ssize_t maxcount)
1694 {
1695 char *self_s, *result_s;
1696 char *start, *next, *end;
1697 Py_ssize_t self_len, result_len;
1698 Py_ssize_t count, offset;
1699 PyByteArrayObject *result;
1700
1701 self_len = PyByteArray_GET_SIZE(self);
1702 self_s = PyByteArray_AS_STRING(self);
1703
1704 count = stringlib_count(self_s, self_len,
1705 from_s, from_len,
1706 maxcount);
1707
1708 if (count == 0) {
1709 /* no matches */
1710 return return_self(self);
1711 }
1712
1713 result_len = self_len - (count * from_len);
1714 assert (result_len>=0);
1715
1716 if ( (result = (PyByteArrayObject *)
1717 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1718 return NULL;
1719
1720 result_s = PyByteArray_AS_STRING(result);
1721
1722 start = self_s;
1723 end = self_s + self_len;
1724 while (count-- > 0) {
1725 offset = stringlib_find(start, end-start,
1726 from_s, from_len,
1727 0);
1728 if (offset == -1)
1729 break;
1730 next = start + offset;
1731
1732 Py_MEMCPY(result_s, start, next-start);
1733
1734 result_s += (next-start);
1735 start = next+from_len;
1736 }
1737 Py_MEMCPY(result_s, start, end-start);
1738 return result;
1739 }
1740
1741 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1742 Py_LOCAL(PyByteArrayObject *)
replace_single_character_in_place(PyByteArrayObject * self,char from_c,char to_c,Py_ssize_t maxcount)1743 replace_single_character_in_place(PyByteArrayObject *self,
1744 char from_c, char to_c,
1745 Py_ssize_t maxcount)
1746 {
1747 char *self_s, *result_s, *start, *end, *next;
1748 Py_ssize_t self_len;
1749 PyByteArrayObject *result;
1750
1751 /* The result string will be the same size */
1752 self_s = PyByteArray_AS_STRING(self);
1753 self_len = PyByteArray_GET_SIZE(self);
1754
1755 next = findchar(self_s, self_len, from_c);
1756
1757 if (next == NULL) {
1758 /* No matches; return the original bytes */
1759 return return_self(self);
1760 }
1761
1762 /* Need to make a new bytes */
1763 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1764 if (result == NULL)
1765 return NULL;
1766 result_s = PyByteArray_AS_STRING(result);
1767 Py_MEMCPY(result_s, self_s, self_len);
1768
1769 /* change everything in-place, starting with this one */
1770 start = result_s + (next-self_s);
1771 *start = to_c;
1772 start++;
1773 end = result_s + self_len;
1774
1775 while (--maxcount > 0) {
1776 next = findchar(start, end-start, from_c);
1777 if (next == NULL)
1778 break;
1779 *next = to_c;
1780 start = next+1;
1781 }
1782
1783 return result;
1784 }
1785
1786 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1787 Py_LOCAL(PyByteArrayObject *)
replace_substring_in_place(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1788 replace_substring_in_place(PyByteArrayObject *self,
1789 const char *from_s, Py_ssize_t from_len,
1790 const char *to_s, Py_ssize_t to_len,
1791 Py_ssize_t maxcount)
1792 {
1793 char *result_s, *start, *end;
1794 char *self_s;
1795 Py_ssize_t self_len, offset;
1796 PyByteArrayObject *result;
1797
1798 /* The result bytes will be the same size */
1799
1800 self_s = PyByteArray_AS_STRING(self);
1801 self_len = PyByteArray_GET_SIZE(self);
1802
1803 offset = stringlib_find(self_s, self_len,
1804 from_s, from_len,
1805 0);
1806 if (offset == -1) {
1807 /* No matches; return the original bytes */
1808 return return_self(self);
1809 }
1810
1811 /* Need to make a new bytes */
1812 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1813 if (result == NULL)
1814 return NULL;
1815 result_s = PyByteArray_AS_STRING(result);
1816 Py_MEMCPY(result_s, self_s, self_len);
1817
1818 /* change everything in-place, starting with this one */
1819 start = result_s + offset;
1820 Py_MEMCPY(start, to_s, from_len);
1821 start += from_len;
1822 end = result_s + self_len;
1823
1824 while ( --maxcount > 0) {
1825 offset = stringlib_find(start, end-start,
1826 from_s, from_len,
1827 0);
1828 if (offset==-1)
1829 break;
1830 Py_MEMCPY(start+offset, to_s, from_len);
1831 start += offset+from_len;
1832 }
1833
1834 return result;
1835 }
1836
1837 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1838 Py_LOCAL(PyByteArrayObject *)
replace_single_character(PyByteArrayObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1839 replace_single_character(PyByteArrayObject *self,
1840 char from_c,
1841 const char *to_s, Py_ssize_t to_len,
1842 Py_ssize_t maxcount)
1843 {
1844 char *self_s, *result_s;
1845 char *start, *next, *end;
1846 Py_ssize_t self_len, result_len;
1847 Py_ssize_t count;
1848 PyByteArrayObject *result;
1849
1850 self_s = PyByteArray_AS_STRING(self);
1851 self_len = PyByteArray_GET_SIZE(self);
1852
1853 count = countchar(self_s, self_len, from_c, maxcount);
1854 if (count == 0) {
1855 /* no matches, return unchanged */
1856 return return_self(self);
1857 }
1858
1859 /* use the difference between current and new, hence the "-1" */
1860 /* result_len = self_len + count * (to_len-1) */
1861 assert(count > 0);
1862 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
1863 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1864 return NULL;
1865 }
1866 result_len = self_len + count * (to_len - 1);
1867
1868 if ( (result = (PyByteArrayObject *)
1869 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1870 return NULL;
1871 result_s = PyByteArray_AS_STRING(result);
1872
1873 start = self_s;
1874 end = self_s + self_len;
1875 while (count-- > 0) {
1876 next = findchar(start, end-start, from_c);
1877 if (next == NULL)
1878 break;
1879
1880 if (next == start) {
1881 /* replace with the 'to' */
1882 Py_MEMCPY(result_s, to_s, to_len);
1883 result_s += to_len;
1884 start += 1;
1885 } else {
1886 /* copy the unchanged old then the 'to' */
1887 Py_MEMCPY(result_s, start, next-start);
1888 result_s += (next-start);
1889 Py_MEMCPY(result_s, to_s, to_len);
1890 result_s += to_len;
1891 start = next+1;
1892 }
1893 }
1894 /* Copy the remainder of the remaining bytes */
1895 Py_MEMCPY(result_s, start, end-start);
1896
1897 return result;
1898 }
1899
1900 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1901 Py_LOCAL(PyByteArrayObject *)
replace_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1902 replace_substring(PyByteArrayObject *self,
1903 const char *from_s, Py_ssize_t from_len,
1904 const char *to_s, Py_ssize_t to_len,
1905 Py_ssize_t maxcount)
1906 {
1907 char *self_s, *result_s;
1908 char *start, *next, *end;
1909 Py_ssize_t self_len, result_len;
1910 Py_ssize_t count, offset;
1911 PyByteArrayObject *result;
1912
1913 self_s = PyByteArray_AS_STRING(self);
1914 self_len = PyByteArray_GET_SIZE(self);
1915
1916 count = stringlib_count(self_s, self_len,
1917 from_s, from_len,
1918 maxcount);
1919
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1923 }
1924
1925 /* Check for overflow */
1926 /* result_len = self_len + count * (to_len-from_len) */
1927 assert(count > 0);
1928 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1931 }
1932 result_len = self_len + count * (to_len - from_len);
1933
1934 if ( (result = (PyByteArrayObject *)
1935 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1936 return NULL;
1937 result_s = PyByteArray_AS_STRING(result);
1938
1939 start = self_s;
1940 end = self_s + self_len;
1941 while (count-- > 0) {
1942 offset = stringlib_find(start, end-start,
1943 from_s, from_len,
1944 0);
1945 if (offset == -1)
1946 break;
1947 next = start+offset;
1948 if (next == start) {
1949 /* replace with the 'to' */
1950 Py_MEMCPY(result_s, to_s, to_len);
1951 result_s += to_len;
1952 start += from_len;
1953 } else {
1954 /* copy the unchanged old then the 'to' */
1955 Py_MEMCPY(result_s, start, next-start);
1956 result_s += (next-start);
1957 Py_MEMCPY(result_s, to_s, to_len);
1958 result_s += to_len;
1959 start = next+from_len;
1960 }
1961 }
1962 /* Copy the remainder of the remaining bytes */
1963 Py_MEMCPY(result_s, start, end-start);
1964
1965 return result;
1966 }
1967
1968
1969 Py_LOCAL(PyByteArrayObject *)
replace(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1970 replace(PyByteArrayObject *self,
1971 const char *from_s, Py_ssize_t from_len,
1972 const char *to_s, Py_ssize_t to_len,
1973 Py_ssize_t maxcount)
1974 {
1975 if (maxcount < 0) {
1976 maxcount = PY_SSIZE_T_MAX;
1977 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1978 /* nothing to do; return the original bytes */
1979 return return_self(self);
1980 }
1981
1982 if (maxcount == 0 ||
1983 (from_len == 0 && to_len == 0)) {
1984 /* nothing to do; return the original bytes */
1985 return return_self(self);
1986 }
1987
1988 /* Handle zero-length special cases */
1989
1990 if (from_len == 0) {
1991 /* insert the 'to' bytes everywhere. */
1992 /* >>> "Python".replace("", ".") */
1993 /* '.P.y.t.h.o.n.' */
1994 return replace_interleave(self, to_s, to_len, maxcount);
1995 }
1996
1997 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1998 /* point for an empty self bytes to generate a non-empty bytes */
1999 /* Special case so the remaining code always gets a non-empty bytes */
2000 if (PyByteArray_GET_SIZE(self) == 0) {
2001 return return_self(self);
2002 }
2003
2004 if (to_len == 0) {
2005 /* delete all occurrences of 'from' bytes */
2006 if (from_len == 1) {
2007 return replace_delete_single_character(
2008 self, from_s[0], maxcount);
2009 } else {
2010 return replace_delete_substring(self, from_s, from_len, maxcount);
2011 }
2012 }
2013
2014 /* Handle special case where both bytes have the same length */
2015
2016 if (from_len == to_len) {
2017 if (from_len == 1) {
2018 return replace_single_character_in_place(
2019 self,
2020 from_s[0],
2021 to_s[0],
2022 maxcount);
2023 } else {
2024 return replace_substring_in_place(
2025 self, from_s, from_len, to_s, to_len, maxcount);
2026 }
2027 }
2028
2029 /* Otherwise use the more generic algorithms */
2030 if (from_len == 1) {
2031 return replace_single_character(self, from_s[0],
2032 to_s, to_len, maxcount);
2033 } else {
2034 /* len('from')>=2, len('to')>=1 */
2035 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2036 }
2037 }
2038
2039
2040 PyDoc_STRVAR(replace__doc__,
2041 "B.replace(old, new[, count]) -> bytes\n\
2042 \n\
2043 Return a copy of B with all occurrences of subsection\n\
2044 old replaced by new. If the optional argument count is\n\
2045 given, only the first count occurrences are replaced.");
2046
2047 static PyObject *
bytearray_replace(PyByteArrayObject * self,PyObject * args)2048 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2049 {
2050 Py_ssize_t count = -1;
2051 PyObject *from, *to, *res;
2052 Py_buffer vfrom, vto;
2053
2054 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2055 return NULL;
2056
2057 if (_getbuffer(from, &vfrom) < 0)
2058 return NULL;
2059 if (_getbuffer(to, &vto) < 0) {
2060 PyBuffer_Release(&vfrom);
2061 return NULL;
2062 }
2063
2064 res = (PyObject *)replace((PyByteArrayObject *) self,
2065 vfrom.buf, vfrom.len,
2066 vto.buf, vto.len, count);
2067
2068 PyBuffer_Release(&vfrom);
2069 PyBuffer_Release(&vto);
2070 return res;
2071 }
2072
2073 PyDoc_STRVAR(split__doc__,
2074 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2075 \n\
2076 Return a list of the sections in B, using sep as the delimiter.\n\
2077 If sep is not given, B is split on ASCII whitespace characters\n\
2078 (space, tab, return, newline, formfeed, vertical tab).\n\
2079 If maxsplit is given, at most maxsplit splits are done.");
2080
2081 static PyObject *
bytearray_split(PyByteArrayObject * self,PyObject * args)2082 bytearray_split(PyByteArrayObject *self, PyObject *args)
2083 {
2084 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2085 Py_ssize_t maxsplit = -1;
2086 const char *s = PyByteArray_AS_STRING(self), *sub;
2087 PyObject *list, *subobj = Py_None;
2088 Py_buffer vsub;
2089
2090 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2091 return NULL;
2092 if (maxsplit < 0)
2093 maxsplit = PY_SSIZE_T_MAX;
2094
2095 if (subobj == Py_None)
2096 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
2097
2098 if (_getbuffer(subobj, &vsub) < 0)
2099 return NULL;
2100 sub = vsub.buf;
2101 n = vsub.len;
2102
2103 list = stringlib_split(
2104 (PyObject*) self, s, len, sub, n, maxsplit
2105 );
2106 PyBuffer_Release(&vsub);
2107 return list;
2108 }
2109
2110 PyDoc_STRVAR(partition__doc__,
2111 "B.partition(sep) -> (head, sep, tail)\n\
2112 \n\
2113 Searches for the separator sep in B, and returns the part before it,\n\
2114 the separator itself, and the part after it. If the separator is not\n\
2115 found, returns B and two empty bytearray objects.");
2116
2117 static PyObject *
bytearray_partition(PyByteArrayObject * self,PyObject * sep_obj)2118 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2119 {
2120 PyObject *bytesep, *result;
2121
2122 bytesep = _PyByteArray_FromBufferObject(sep_obj);
2123 if (! bytesep)
2124 return NULL;
2125
2126 result = stringlib_partition(
2127 (PyObject*) self,
2128 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2129 bytesep,
2130 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2131 );
2132
2133 Py_DECREF(bytesep);
2134 return result;
2135 }
2136
2137 PyDoc_STRVAR(rpartition__doc__,
2138 "B.rpartition(sep) -> (head, sep, tail)\n\
2139 \n\
2140 Searches for the separator sep in B, starting at the end of B,\n\
2141 and returns the part before it, the separator itself, and the\n\
2142 part after it. If the separator is not found, returns two empty\n\
2143 bytearray objects and B.");
2144
2145 static PyObject *
bytearray_rpartition(PyByteArrayObject * self,PyObject * sep_obj)2146 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2147 {
2148 PyObject *bytesep, *result;
2149
2150 bytesep = _PyByteArray_FromBufferObject(sep_obj);
2151 if (! bytesep)
2152 return NULL;
2153
2154 result = stringlib_rpartition(
2155 (PyObject*) self,
2156 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2157 bytesep,
2158 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2159 );
2160
2161 Py_DECREF(bytesep);
2162 return result;
2163 }
2164
2165 PyDoc_STRVAR(rsplit__doc__,
2166 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2167 \n\
2168 Return a list of the sections in B, using sep as the delimiter,\n\
2169 starting at the end of B and working to the front.\n\
2170 If sep is not given, B is split on ASCII whitespace characters\n\
2171 (space, tab, return, newline, formfeed, vertical tab).\n\
2172 If maxsplit is given, at most maxsplit splits are done.");
2173
2174 static PyObject *
bytearray_rsplit(PyByteArrayObject * self,PyObject * args)2175 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2176 {
2177 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2178 Py_ssize_t maxsplit = -1;
2179 const char *s = PyByteArray_AS_STRING(self), *sub;
2180 PyObject *list, *subobj = Py_None;
2181 Py_buffer vsub;
2182
2183 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2184 return NULL;
2185 if (maxsplit < 0)
2186 maxsplit = PY_SSIZE_T_MAX;
2187
2188 if (subobj == Py_None)
2189 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
2190
2191 if (_getbuffer(subobj, &vsub) < 0)
2192 return NULL;
2193 sub = vsub.buf;
2194 n = vsub.len;
2195
2196 list = stringlib_rsplit(
2197 (PyObject*) self, s, len, sub, n, maxsplit
2198 );
2199 PyBuffer_Release(&vsub);
2200 return list;
2201 }
2202
2203 PyDoc_STRVAR(reverse__doc__,
2204 "B.reverse() -> None\n\
2205 \n\
2206 Reverse the order of the values in B in place.");
2207 static PyObject *
bytearray_reverse(PyByteArrayObject * self,PyObject * unused)2208 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2209 {
2210 char swap, *head, *tail;
2211 Py_ssize_t i, j, n = Py_SIZE(self);
2212
2213 j = n / 2;
2214 head = self->ob_bytes;
2215 tail = head + n - 1;
2216 for (i = 0; i < j; i++) {
2217 swap = *head;
2218 *head++ = *tail;
2219 *tail-- = swap;
2220 }
2221
2222 Py_RETURN_NONE;
2223 }
2224
2225 PyDoc_STRVAR(insert__doc__,
2226 "B.insert(index, int) -> None\n\
2227 \n\
2228 Insert a single item into the bytearray before the given index.");
2229 static PyObject *
bytearray_insert(PyByteArrayObject * self,PyObject * args)2230 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2231 {
2232 PyObject *value;
2233 int ival;
2234 Py_ssize_t where, n = Py_SIZE(self);
2235
2236 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2237 return NULL;
2238
2239 if (n == PY_SSIZE_T_MAX) {
2240 PyErr_SetString(PyExc_OverflowError,
2241 "cannot add more objects to bytearray");
2242 return NULL;
2243 }
2244 if (!_getbytevalue(value, &ival))
2245 return NULL;
2246 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2247 return NULL;
2248
2249 if (where < 0) {
2250 where += n;
2251 if (where < 0)
2252 where = 0;
2253 }
2254 if (where > n)
2255 where = n;
2256 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2257 self->ob_bytes[where] = ival;
2258
2259 Py_RETURN_NONE;
2260 }
2261
2262 PyDoc_STRVAR(append__doc__,
2263 "B.append(int) -> None\n\
2264 \n\
2265 Append a single item to the end of B.");
2266 static PyObject *
bytearray_append(PyByteArrayObject * self,PyObject * arg)2267 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2268 {
2269 int value;
2270 Py_ssize_t n = Py_SIZE(self);
2271
2272 if (! _getbytevalue(arg, &value))
2273 return NULL;
2274 if (n == PY_SSIZE_T_MAX) {
2275 PyErr_SetString(PyExc_OverflowError,
2276 "cannot add more objects to bytearray");
2277 return NULL;
2278 }
2279 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2280 return NULL;
2281
2282 self->ob_bytes[n] = value;
2283
2284 Py_RETURN_NONE;
2285 }
2286
2287 PyDoc_STRVAR(extend__doc__,
2288 "B.extend(iterable int) -> None\n\
2289 \n\
2290 Append all the elements from the iterator or sequence to the\n\
2291 end of B.");
2292 static PyObject *
bytearray_extend(PyByteArrayObject * self,PyObject * arg)2293 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2294 {
2295 PyObject *it, *item, *bytearray_obj;
2296 Py_ssize_t buf_size = 0, len = 0;
2297 int value;
2298 char *buf;
2299
2300 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2301 if (PyObject_CheckBuffer(arg)) {
2302 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2303 return NULL;
2304
2305 Py_RETURN_NONE;
2306 }
2307
2308 it = PyObject_GetIter(arg);
2309 if (it == NULL)
2310 return NULL;
2311
2312 /* Try to determine the length of the argument. 32 is arbitrary. */
2313 buf_size = _PyObject_LengthHint(arg, 32);
2314 if (buf_size == -1) {
2315 Py_DECREF(it);
2316 return NULL;
2317 }
2318
2319 bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2320 if (bytearray_obj == NULL) {
2321 Py_DECREF(it);
2322 return NULL;
2323 }
2324 buf = PyByteArray_AS_STRING(bytearray_obj);
2325
2326 while ((item = PyIter_Next(it)) != NULL) {
2327 if (! _getbytevalue(item, &value)) {
2328 Py_DECREF(item);
2329 Py_DECREF(it);
2330 Py_DECREF(bytearray_obj);
2331 return NULL;
2332 }
2333 buf[len++] = value;
2334 Py_DECREF(item);
2335
2336 if (len >= buf_size) {
2337 Py_ssize_t addition;
2338 if (len == PY_SSIZE_T_MAX) {
2339 Py_DECREF(it);
2340 Py_DECREF(bytearray_obj);
2341 return PyErr_NoMemory();
2342 }
2343 addition = len >> 1;
2344 if (addition > PY_SSIZE_T_MAX - len - 1)
2345 buf_size = PY_SSIZE_T_MAX;
2346 else
2347 buf_size = len + addition + 1;
2348 if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2349 Py_DECREF(it);
2350 Py_DECREF(bytearray_obj);
2351 return NULL;
2352 }
2353 /* Recompute the `buf' pointer, since the resizing operation may
2354 have invalidated it. */
2355 buf = PyByteArray_AS_STRING(bytearray_obj);
2356 }
2357 }
2358 Py_DECREF(it);
2359
2360 /* Resize down to exact size. */
2361 if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2362 Py_DECREF(bytearray_obj);
2363 return NULL;
2364 }
2365
2366 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1) {
2367 Py_DECREF(bytearray_obj);
2368 return NULL;
2369 }
2370 Py_DECREF(bytearray_obj);
2371
2372 Py_RETURN_NONE;
2373 }
2374
2375 PyDoc_STRVAR(pop__doc__,
2376 "B.pop([index]) -> int\n\
2377 \n\
2378 Remove and return a single item from B. If no index\n\
2379 argument is given, will pop the last value.");
2380 static PyObject *
bytearray_pop(PyByteArrayObject * self,PyObject * args)2381 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2382 {
2383 int value;
2384 Py_ssize_t where = -1, n = Py_SIZE(self);
2385
2386 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2387 return NULL;
2388
2389 if (n == 0) {
2390 PyErr_SetString(PyExc_IndexError,
2391 "pop from empty bytearray");
2392 return NULL;
2393 }
2394 if (where < 0)
2395 where += Py_SIZE(self);
2396 if (where < 0 || where >= Py_SIZE(self)) {
2397 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2398 return NULL;
2399 }
2400 if (!_canresize(self))
2401 return NULL;
2402
2403 value = self->ob_bytes[where];
2404 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2405 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2406 return NULL;
2407
2408 return PyInt_FromLong((unsigned char)value);
2409 }
2410
2411 PyDoc_STRVAR(remove__doc__,
2412 "B.remove(int) -> None\n\
2413 \n\
2414 Remove the first occurrence of a value in B.");
2415 static PyObject *
bytearray_remove(PyByteArrayObject * self,PyObject * arg)2416 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2417 {
2418 int value;
2419 Py_ssize_t n = Py_SIZE(self);
2420 char *where;
2421
2422 if (! _getbytevalue(arg, &value))
2423 return NULL;
2424
2425 where = memchr(self->ob_bytes, value, n);
2426 if (!where) {
2427 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2428 return NULL;
2429 }
2430 if (!_canresize(self))
2431 return NULL;
2432
2433 memmove(where, where + 1, self->ob_bytes + n - where);
2434 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2435 return NULL;
2436
2437 Py_RETURN_NONE;
2438 }
2439
2440 /* XXX These two helpers could be optimized if argsize == 1 */
2441
2442 static Py_ssize_t
lstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2443 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2444 void *argptr, Py_ssize_t argsize)
2445 {
2446 Py_ssize_t i = 0;
2447 while (i < mysize && memchr(argptr, myptr[i], argsize))
2448 i++;
2449 return i;
2450 }
2451
2452 static Py_ssize_t
rstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2453 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2454 void *argptr, Py_ssize_t argsize)
2455 {
2456 Py_ssize_t i = mysize - 1;
2457 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2458 i--;
2459 return i + 1;
2460 }
2461
2462 PyDoc_STRVAR(strip__doc__,
2463 "B.strip([bytes]) -> bytearray\n\
2464 \n\
2465 Strip leading and trailing bytes contained in the argument.\n\
2466 If the argument is omitted, strip ASCII whitespace.");
2467 static PyObject *
bytearray_strip(PyByteArrayObject * self,PyObject * args)2468 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2469 {
2470 Py_ssize_t left, right, mysize, argsize;
2471 void *myptr, *argptr;
2472 PyObject *arg = Py_None;
2473 Py_buffer varg;
2474 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2475 return NULL;
2476 if (arg == Py_None) {
2477 argptr = "\t\n\r\f\v ";
2478 argsize = 6;
2479 }
2480 else {
2481 if (_getbuffer(arg, &varg) < 0)
2482 return NULL;
2483 argptr = varg.buf;
2484 argsize = varg.len;
2485 }
2486 myptr = self->ob_bytes;
2487 mysize = Py_SIZE(self);
2488 left = lstrip_helper(myptr, mysize, argptr, argsize);
2489 if (left == mysize)
2490 right = left;
2491 else
2492 right = rstrip_helper(myptr, mysize, argptr, argsize);
2493 if (arg != Py_None)
2494 PyBuffer_Release(&varg);
2495 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2496 }
2497
2498 PyDoc_STRVAR(lstrip__doc__,
2499 "B.lstrip([bytes]) -> bytearray\n\
2500 \n\
2501 Strip leading bytes contained in the argument.\n\
2502 If the argument is omitted, strip leading ASCII whitespace.");
2503 static PyObject *
bytearray_lstrip(PyByteArrayObject * self,PyObject * args)2504 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2505 {
2506 Py_ssize_t left, right, mysize, argsize;
2507 void *myptr, *argptr;
2508 PyObject *arg = Py_None;
2509 Py_buffer varg;
2510 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2511 return NULL;
2512 if (arg == Py_None) {
2513 argptr = "\t\n\r\f\v ";
2514 argsize = 6;
2515 }
2516 else {
2517 if (_getbuffer(arg, &varg) < 0)
2518 return NULL;
2519 argptr = varg.buf;
2520 argsize = varg.len;
2521 }
2522 myptr = self->ob_bytes;
2523 mysize = Py_SIZE(self);
2524 left = lstrip_helper(myptr, mysize, argptr, argsize);
2525 right = mysize;
2526 if (arg != Py_None)
2527 PyBuffer_Release(&varg);
2528 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2529 }
2530
2531 PyDoc_STRVAR(rstrip__doc__,
2532 "B.rstrip([bytes]) -> bytearray\n\
2533 \n\
2534 Strip trailing bytes contained in the argument.\n\
2535 If the argument is omitted, strip trailing ASCII whitespace.");
2536 static PyObject *
bytearray_rstrip(PyByteArrayObject * self,PyObject * args)2537 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2538 {
2539 Py_ssize_t left, right, mysize, argsize;
2540 void *myptr, *argptr;
2541 PyObject *arg = Py_None;
2542 Py_buffer varg;
2543 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2544 return NULL;
2545 if (arg == Py_None) {
2546 argptr = "\t\n\r\f\v ";
2547 argsize = 6;
2548 }
2549 else {
2550 if (_getbuffer(arg, &varg) < 0)
2551 return NULL;
2552 argptr = varg.buf;
2553 argsize = varg.len;
2554 }
2555 myptr = self->ob_bytes;
2556 mysize = Py_SIZE(self);
2557 left = 0;
2558 right = rstrip_helper(myptr, mysize, argptr, argsize);
2559 if (arg != Py_None)
2560 PyBuffer_Release(&varg);
2561 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2562 }
2563
2564 PyDoc_STRVAR(decode_doc,
2565 "B.decode([encoding[, errors]]) -> unicode object.\n\
2566 \n\
2567 Decodes B using the codec registered for encoding. encoding defaults\n\
2568 to the default encoding. errors may be given to set a different error\n\
2569 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2570 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2571 as well as any other name registered with codecs.register_error that is\n\
2572 able to handle UnicodeDecodeErrors.");
2573
2574 static PyObject *
bytearray_decode(PyObject * self,PyObject * args,PyObject * kwargs)2575 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2576 {
2577 const char *encoding = NULL;
2578 const char *errors = NULL;
2579 static char *kwlist[] = {"encoding", "errors", 0};
2580
2581 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2582 return NULL;
2583 if (encoding == NULL) {
2584 #ifdef Py_USING_UNICODE
2585 encoding = PyUnicode_GetDefaultEncoding();
2586 #else
2587 PyErr_SetString(PyExc_ValueError, "no encoding specified");
2588 return NULL;
2589 #endif
2590 }
2591 return _PyCodec_DecodeText(self, encoding, errors);
2592 }
2593
2594 PyDoc_STRVAR(alloc_doc,
2595 "B.__alloc__() -> int\n\
2596 \n\
2597 Returns the number of bytes actually allocated.");
2598
2599 static PyObject *
bytearray_alloc(PyByteArrayObject * self)2600 bytearray_alloc(PyByteArrayObject *self)
2601 {
2602 return PyInt_FromSsize_t(self->ob_alloc);
2603 }
2604
2605 PyDoc_STRVAR(join_doc,
2606 "B.join(iterable_of_bytes) -> bytes\n\
2607 \n\
2608 Concatenates any number of bytearray objects, with B in between each pair.");
2609
2610 static PyObject *
bytearray_join(PyByteArrayObject * self,PyObject * it)2611 bytearray_join(PyByteArrayObject *self, PyObject *it)
2612 {
2613 PyObject *seq;
2614 Py_ssize_t mysize = Py_SIZE(self);
2615 Py_ssize_t i;
2616 Py_ssize_t n;
2617 PyObject **items;
2618 Py_ssize_t totalsize = 0;
2619 PyObject *result;
2620 char *dest;
2621
2622 seq = PySequence_Fast(it, "can only join an iterable");
2623 if (seq == NULL)
2624 return NULL;
2625 n = PySequence_Fast_GET_SIZE(seq);
2626 items = PySequence_Fast_ITEMS(seq);
2627
2628 /* Compute the total size, and check that they are all bytes */
2629 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2630 for (i = 0; i < n; i++) {
2631 PyObject *obj = items[i];
2632 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2633 PyErr_Format(PyExc_TypeError,
2634 "can only join an iterable of bytes "
2635 "(item %ld has type '%.100s')",
2636 /* XXX %ld isn't right on Win64 */
2637 (long)i, Py_TYPE(obj)->tp_name);
2638 goto error;
2639 }
2640 if (i > 0)
2641 totalsize += mysize;
2642 totalsize += Py_SIZE(obj);
2643 if (totalsize < 0) {
2644 PyErr_NoMemory();
2645 goto error;
2646 }
2647 }
2648
2649 /* Allocate the result, and copy the bytes */
2650 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2651 if (result == NULL)
2652 goto error;
2653 dest = PyByteArray_AS_STRING(result);
2654 for (i = 0; i < n; i++) {
2655 PyObject *obj = items[i];
2656 Py_ssize_t size = Py_SIZE(obj);
2657 char *buf;
2658 if (PyByteArray_Check(obj))
2659 buf = PyByteArray_AS_STRING(obj);
2660 else
2661 buf = PyBytes_AS_STRING(obj);
2662 if (i) {
2663 memcpy(dest, self->ob_bytes, mysize);
2664 dest += mysize;
2665 }
2666 memcpy(dest, buf, size);
2667 dest += size;
2668 }
2669
2670 /* Done */
2671 Py_DECREF(seq);
2672 return result;
2673
2674 /* Error handling */
2675 error:
2676 Py_DECREF(seq);
2677 return NULL;
2678 }
2679
2680 PyDoc_STRVAR(splitlines__doc__,
2681 "B.splitlines(keepends=False) -> list of lines\n\
2682 \n\
2683 Return a list of the lines in B, breaking at line boundaries.\n\
2684 Line breaks are not included in the resulting list unless keepends\n\
2685 is given and true.");
2686
2687 static PyObject*
bytearray_splitlines(PyObject * self,PyObject * args)2688 bytearray_splitlines(PyObject *self, PyObject *args)
2689 {
2690 int keepends = 0;
2691
2692 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2693 return NULL;
2694
2695 return stringlib_splitlines(
2696 (PyObject*) self, PyByteArray_AS_STRING(self),
2697 PyByteArray_GET_SIZE(self), keepends
2698 );
2699 }
2700
2701 PyDoc_STRVAR(fromhex_doc,
2702 "bytearray.fromhex(string) -> bytearray\n\
2703 \n\
2704 Create a bytearray object from a string of hexadecimal numbers.\n\
2705 Spaces between two numbers are accepted.\n\
2706 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2707
2708 static int
hex_digit_to_int(char c)2709 hex_digit_to_int(char c)
2710 {
2711 if (Py_ISDIGIT(c))
2712 return c - '0';
2713 else {
2714 if (Py_ISUPPER(c))
2715 c = Py_TOLOWER(c);
2716 if (c >= 'a' && c <= 'f')
2717 return c - 'a' + 10;
2718 }
2719 return -1;
2720 }
2721
2722 static PyObject *
bytearray_fromhex(PyObject * cls,PyObject * args)2723 bytearray_fromhex(PyObject *cls, PyObject *args)
2724 {
2725 PyObject *newbytes;
2726 char *buf;
2727 char *hex;
2728 Py_ssize_t hexlen, byteslen, i, j;
2729 int top, bot;
2730
2731 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
2732 return NULL;
2733 byteslen = hexlen/2; /* This overestimates if there are spaces */
2734 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
2735 if (!newbytes)
2736 return NULL;
2737 buf = PyByteArray_AS_STRING(newbytes);
2738 for (i = j = 0; i < hexlen; i += 2) {
2739 /* skip over spaces in the input */
2740 while (hex[i] == ' ')
2741 i++;
2742 if (i >= hexlen)
2743 break;
2744 top = hex_digit_to_int(hex[i]);
2745 bot = hex_digit_to_int(hex[i+1]);
2746 if (top == -1 || bot == -1) {
2747 PyErr_Format(PyExc_ValueError,
2748 "non-hexadecimal number found in "
2749 "fromhex() arg at position %zd", i);
2750 goto error;
2751 }
2752 buf[j++] = (top << 4) + bot;
2753 }
2754 if (PyByteArray_Resize(newbytes, j) < 0)
2755 goto error;
2756 return newbytes;
2757
2758 error:
2759 Py_DECREF(newbytes);
2760 return NULL;
2761 }
2762
2763 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2764
2765 static PyObject *
bytearray_reduce(PyByteArrayObject * self)2766 bytearray_reduce(PyByteArrayObject *self)
2767 {
2768 PyObject *latin1, *dict;
2769 if (self->ob_bytes)
2770 #ifdef Py_USING_UNICODE
2771 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2772 Py_SIZE(self), NULL);
2773 #else
2774 latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self));
2775 #endif
2776 else
2777 #ifdef Py_USING_UNICODE
2778 latin1 = PyUnicode_FromString("");
2779 #else
2780 latin1 = PyString_FromString("");
2781 #endif
2782
2783 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
2784 if (dict == NULL) {
2785 PyErr_Clear();
2786 dict = Py_None;
2787 Py_INCREF(dict);
2788 }
2789
2790 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
2791 }
2792
2793 PyDoc_STRVAR(sizeof_doc,
2794 "B.__sizeof__() -> int\n\
2795 \n\
2796 Returns the size of B in memory, in bytes");
2797 static PyObject *
bytearray_sizeof(PyByteArrayObject * self)2798 bytearray_sizeof(PyByteArrayObject *self)
2799 {
2800 Py_ssize_t res;
2801
2802 res = _PyObject_SIZE(Py_TYPE(self)) + self->ob_alloc * sizeof(char);
2803 return PyInt_FromSsize_t(res);
2804 }
2805
2806 static PySequenceMethods bytearray_as_sequence = {
2807 (lenfunc)bytearray_length, /* sq_length */
2808 (binaryfunc)PyByteArray_Concat, /* sq_concat */
2809 (ssizeargfunc)bytearray_repeat, /* sq_repeat */
2810 (ssizeargfunc)bytearray_getitem, /* sq_item */
2811 0, /* sq_slice */
2812 (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */
2813 0, /* sq_ass_slice */
2814 (objobjproc)bytearray_contains, /* sq_contains */
2815 (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */
2816 (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */
2817 };
2818
2819 static PyMappingMethods bytearray_as_mapping = {
2820 (lenfunc)bytearray_length,
2821 (binaryfunc)bytearray_subscript,
2822 (objobjargproc)bytearray_ass_subscript,
2823 };
2824
2825 static PyBufferProcs bytearray_as_buffer = {
2826 (readbufferproc)bytearray_buffer_getreadbuf,
2827 (writebufferproc)bytearray_buffer_getwritebuf,
2828 (segcountproc)bytearray_buffer_getsegcount,
2829 (charbufferproc)bytearray_buffer_getcharbuf,
2830 (getbufferproc)bytearray_getbuffer,
2831 (releasebufferproc)bytearray_releasebuffer,
2832 };
2833
2834 static PyMethodDef
2835 bytearray_methods[] = {
2836 {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
2837 {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
2838 {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
2839 {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
2840 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2841 _Py_capitalize__doc__},
2842 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2843 {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
2844 {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
2845 {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
2846 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2847 expandtabs__doc__},
2848 {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
2849 {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
2850 {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
2851 fromhex_doc},
2852 {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
2853 {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
2854 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2855 _Py_isalnum__doc__},
2856 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2857 _Py_isalpha__doc__},
2858 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2859 _Py_isdigit__doc__},
2860 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2861 _Py_islower__doc__},
2862 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2863 _Py_isspace__doc__},
2864 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2865 _Py_istitle__doc__},
2866 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2867 _Py_isupper__doc__},
2868 {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
2869 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2870 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2871 {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
2872 {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
2873 {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
2874 {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
2875 {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
2876 {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
2877 {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
2878 {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
2879 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2880 {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
2881 {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
2882 {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
2883 {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
2884 {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
2885 splitlines__doc__},
2886 {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
2887 startswith__doc__},
2888 {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
2889 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2890 _Py_swapcase__doc__},
2891 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2892 {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
2893 translate__doc__},
2894 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2895 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2896 {NULL}
2897 };
2898
2899 PyDoc_STRVAR(bytearray_doc,
2900 "bytearray(iterable_of_ints) -> bytearray.\n\
2901 bytearray(string, encoding[, errors]) -> bytearray.\n\
2902 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
2903 bytearray(memory_view) -> bytearray.\n\
2904 \n\
2905 Construct a mutable bytearray object from:\n\
2906 - an iterable yielding integers in range(256)\n\
2907 - a text string encoded using the specified encoding\n\
2908 - a bytes or a bytearray object\n\
2909 - any object implementing the buffer API.\n\
2910 \n\
2911 bytearray(int) -> bytearray.\n\
2912 \n\
2913 Construct a zero-initialized bytearray of the given length.");
2914
2915
2916 static PyObject *bytearray_iter(PyObject *seq);
2917
2918 PyTypeObject PyByteArray_Type = {
2919 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2920 "bytearray",
2921 sizeof(PyByteArrayObject),
2922 0,
2923 (destructor)bytearray_dealloc, /* tp_dealloc */
2924 0, /* tp_print */
2925 0, /* tp_getattr */
2926 0, /* tp_setattr */
2927 0, /* tp_compare */
2928 (reprfunc)bytearray_repr, /* tp_repr */
2929 0, /* tp_as_number */
2930 &bytearray_as_sequence, /* tp_as_sequence */
2931 &bytearray_as_mapping, /* tp_as_mapping */
2932 0, /* tp_hash */
2933 0, /* tp_call */
2934 bytearray_str, /* tp_str */
2935 PyObject_GenericGetAttr, /* tp_getattro */
2936 0, /* tp_setattro */
2937 &bytearray_as_buffer, /* tp_as_buffer */
2938 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2939 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
2940 bytearray_doc, /* tp_doc */
2941 0, /* tp_traverse */
2942 0, /* tp_clear */
2943 (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
2944 0, /* tp_weaklistoffset */
2945 bytearray_iter, /* tp_iter */
2946 0, /* tp_iternext */
2947 bytearray_methods, /* tp_methods */
2948 0, /* tp_members */
2949 0, /* tp_getset */
2950 0, /* tp_base */
2951 0, /* tp_dict */
2952 0, /* tp_descr_get */
2953 0, /* tp_descr_set */
2954 0, /* tp_dictoffset */
2955 (initproc)bytearray_init, /* tp_init */
2956 PyType_GenericAlloc, /* tp_alloc */
2957 PyType_GenericNew, /* tp_new */
2958 PyObject_Del, /* tp_free */
2959 };
2960
2961 /*********************** Bytes Iterator ****************************/
2962
2963 typedef struct {
2964 PyObject_HEAD
2965 Py_ssize_t it_index;
2966 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
2967 } bytesiterobject;
2968
2969 static void
bytearrayiter_dealloc(bytesiterobject * it)2970 bytearrayiter_dealloc(bytesiterobject *it)
2971 {
2972 _PyObject_GC_UNTRACK(it);
2973 Py_XDECREF(it->it_seq);
2974 PyObject_GC_Del(it);
2975 }
2976
2977 static int
bytearrayiter_traverse(bytesiterobject * it,visitproc visit,void * arg)2978 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
2979 {
2980 Py_VISIT(it->it_seq);
2981 return 0;
2982 }
2983
2984 static PyObject *
bytearrayiter_next(bytesiterobject * it)2985 bytearrayiter_next(bytesiterobject *it)
2986 {
2987 PyByteArrayObject *seq;
2988 PyObject *item;
2989
2990 assert(it != NULL);
2991 seq = it->it_seq;
2992 if (seq == NULL)
2993 return NULL;
2994 assert(PyByteArray_Check(seq));
2995
2996 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
2997 item = PyInt_FromLong(
2998 (unsigned char)seq->ob_bytes[it->it_index]);
2999 if (item != NULL)
3000 ++it->it_index;
3001 return item;
3002 }
3003
3004 it->it_seq = NULL;
3005 Py_DECREF(seq);
3006 return NULL;
3007 }
3008
3009 static PyObject *
bytesarrayiter_length_hint(bytesiterobject * it)3010 bytesarrayiter_length_hint(bytesiterobject *it)
3011 {
3012 Py_ssize_t len = 0;
3013 if (it->it_seq)
3014 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3015 return PyInt_FromSsize_t(len);
3016 }
3017
3018 PyDoc_STRVAR(length_hint_doc,
3019 "Private method returning an estimate of len(list(it)).");
3020
3021 static PyMethodDef bytearrayiter_methods[] = {
3022 {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3023 length_hint_doc},
3024 {NULL, NULL} /* sentinel */
3025 };
3026
3027 PyTypeObject PyByteArrayIter_Type = {
3028 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3029 "bytearray_iterator", /* tp_name */
3030 sizeof(bytesiterobject), /* tp_basicsize */
3031 0, /* tp_itemsize */
3032 /* methods */
3033 (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3034 0, /* tp_print */
3035 0, /* tp_getattr */
3036 0, /* tp_setattr */
3037 0, /* tp_compare */
3038 0, /* tp_repr */
3039 0, /* tp_as_number */
3040 0, /* tp_as_sequence */
3041 0, /* tp_as_mapping */
3042 0, /* tp_hash */
3043 0, /* tp_call */
3044 0, /* tp_str */
3045 PyObject_GenericGetAttr, /* tp_getattro */
3046 0, /* tp_setattro */
3047 0, /* tp_as_buffer */
3048 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3049 0, /* tp_doc */
3050 (traverseproc)bytearrayiter_traverse, /* tp_traverse */
3051 0, /* tp_clear */
3052 0, /* tp_richcompare */
3053 0, /* tp_weaklistoffset */
3054 PyObject_SelfIter, /* tp_iter */
3055 (iternextfunc)bytearrayiter_next, /* tp_iternext */
3056 bytearrayiter_methods, /* tp_methods */
3057 0,
3058 };
3059
3060 static PyObject *
bytearray_iter(PyObject * seq)3061 bytearray_iter(PyObject *seq)
3062 {
3063 bytesiterobject *it;
3064
3065 if (!PyByteArray_Check(seq)) {
3066 PyErr_BadInternalCall();
3067 return NULL;
3068 }
3069 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3070 if (it == NULL)
3071 return NULL;
3072 it->it_index = 0;
3073 Py_INCREF(seq);
3074 it->it_seq = (PyByteArrayObject *)seq;
3075 _PyObject_GC_TRACK(it);
3076 return (PyObject *)it;
3077 }
3078