1 /* PyBytes (bytearray) implementation */
2
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
7
8 char _PyByteArray_empty_string[] = "";
9
10 void
PyByteArray_Fini(void)11 PyByteArray_Fini(void)
12 {
13 }
14
15 int
PyByteArray_Init(void)16 PyByteArray_Init(void)
17 {
18 return 1;
19 }
20
21 /* end nullbytes support */
22
23 /* Helpers */
24
25 static int
_getbytevalue(PyObject * arg,int * value)26 _getbytevalue(PyObject* arg, int *value)
27 {
28 long face_value;
29
30 if (PyBytes_CheckExact(arg)) {
31 if (Py_SIZE(arg) != 1) {
32 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
33 return 0;
34 }
35 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
36 return 1;
37 }
38 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
39 face_value = PyLong_AsLong(arg);
40 }
41 else {
42 PyObject *index = PyNumber_Index(arg);
43 if (index == NULL) {
44 PyErr_Format(PyExc_TypeError,
45 "an integer or string of size 1 is required");
46 return 0;
47 }
48 face_value = PyLong_AsLong(index);
49 Py_DECREF(index);
50 }
51
52 if (face_value < 0 || face_value >= 256) {
53 /* this includes the OverflowError in case the long is too large */
54 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
55 return 0;
56 }
57
58 *value = face_value;
59 return 1;
60 }
61
62 static Py_ssize_t
bytearray_buffer_getreadbuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)63 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
64 {
65 if ( index != 0 ) {
66 PyErr_SetString(PyExc_SystemError,
67 "accessing non-existent bytes segment");
68 return -1;
69 }
70 *ptr = (void *)PyByteArray_AS_STRING(self);
71 return Py_SIZE(self);
72 }
73
74 static Py_ssize_t
bytearray_buffer_getwritebuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)75 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
76 {
77 if ( index != 0 ) {
78 PyErr_SetString(PyExc_SystemError,
79 "accessing non-existent bytes segment");
80 return -1;
81 }
82 *ptr = (void *)PyByteArray_AS_STRING(self);
83 return Py_SIZE(self);
84 }
85
86 static Py_ssize_t
bytearray_buffer_getsegcount(PyByteArrayObject * self,Py_ssize_t * lenp)87 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
88 {
89 if ( lenp )
90 *lenp = Py_SIZE(self);
91 return 1;
92 }
93
94 static Py_ssize_t
bytearray_buffer_getcharbuf(PyByteArrayObject * self,Py_ssize_t index,const char ** ptr)95 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
96 {
97 if ( index != 0 ) {
98 PyErr_SetString(PyExc_SystemError,
99 "accessing non-existent bytes segment");
100 return -1;
101 }
102 *ptr = PyByteArray_AS_STRING(self);
103 return Py_SIZE(self);
104 }
105
106 static int
bytearray_getbuffer(PyByteArrayObject * obj,Py_buffer * view,int flags)107 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
108 {
109 int ret;
110 void *ptr;
111 if (view == NULL) {
112 obj->ob_exports++;
113 return 0;
114 }
115 ptr = (void *) PyByteArray_AS_STRING(obj);
116 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
117 if (ret >= 0) {
118 obj->ob_exports++;
119 }
120 return ret;
121 }
122
123 static void
bytearray_releasebuffer(PyByteArrayObject * obj,Py_buffer * view)124 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
125 {
126 obj->ob_exports--;
127 }
128
129 static Py_ssize_t
_getbuffer(PyObject * obj,Py_buffer * view)130 _getbuffer(PyObject *obj, Py_buffer *view)
131 {
132 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
133
134 if (buffer == NULL || buffer->bf_getbuffer == NULL)
135 {
136 PyErr_Format(PyExc_TypeError,
137 "Type %.100s doesn't support the buffer API",
138 Py_TYPE(obj)->tp_name);
139 return -1;
140 }
141
142 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
143 return -1;
144 return view->len;
145 }
146
147 static int
_canresize(PyByteArrayObject * self)148 _canresize(PyByteArrayObject *self)
149 {
150 if (self->ob_exports > 0) {
151 PyErr_SetString(PyExc_BufferError,
152 "Existing exports of data: object cannot be re-sized");
153 return 0;
154 }
155 return 1;
156 }
157
158 /* Direct API functions */
159
160 PyObject *
PyByteArray_FromObject(PyObject * input)161 PyByteArray_FromObject(PyObject *input)
162 {
163 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
164 input, NULL);
165 }
166
167 PyObject *
PyByteArray_FromStringAndSize(const char * bytes,Py_ssize_t size)168 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
169 {
170 PyByteArrayObject *new;
171 Py_ssize_t alloc;
172
173 if (size < 0) {
174 PyErr_SetString(PyExc_SystemError,
175 "Negative size passed to PyByteArray_FromStringAndSize");
176 return NULL;
177 }
178
179 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
180 if (new == NULL)
181 return NULL;
182
183 if (size == 0) {
184 new->ob_bytes = NULL;
185 alloc = 0;
186 }
187 else {
188 alloc = size + 1;
189 new->ob_bytes = PyMem_Malloc(alloc);
190 if (new->ob_bytes == NULL) {
191 Py_DECREF(new);
192 return PyErr_NoMemory();
193 }
194 if (bytes != NULL && size > 0)
195 memcpy(new->ob_bytes, bytes, size);
196 new->ob_bytes[size] = '\0'; /* Trailing null byte */
197 }
198 Py_SIZE(new) = size;
199 new->ob_alloc = alloc;
200 new->ob_exports = 0;
201
202 return (PyObject *)new;
203 }
204
205 Py_ssize_t
PyByteArray_Size(PyObject * self)206 PyByteArray_Size(PyObject *self)
207 {
208 assert(self != NULL);
209 assert(PyByteArray_Check(self));
210
211 return PyByteArray_GET_SIZE(self);
212 }
213
214 char *
PyByteArray_AsString(PyObject * self)215 PyByteArray_AsString(PyObject *self)
216 {
217 assert(self != NULL);
218 assert(PyByteArray_Check(self));
219
220 return PyByteArray_AS_STRING(self);
221 }
222
223 int
PyByteArray_Resize(PyObject * self,Py_ssize_t size)224 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
225 {
226 void *sval;
227 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
228
229 assert(self != NULL);
230 assert(PyByteArray_Check(self));
231 assert(size >= 0);
232
233 if (size == Py_SIZE(self)) {
234 return 0;
235 }
236 if (!_canresize((PyByteArrayObject *)self)) {
237 return -1;
238 }
239
240 if (size < alloc / 2) {
241 /* Major downsize; resize down to exact size */
242 alloc = size + 1;
243 }
244 else if (size < alloc) {
245 /* Within allocated size; quick exit */
246 Py_SIZE(self) = size;
247 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
248 return 0;
249 }
250 else if (size <= alloc * 1.125) {
251 /* Moderate upsize; overallocate similar to list_resize() */
252 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
253 }
254 else {
255 /* Major upsize; resize up to exact size */
256 alloc = size + 1;
257 }
258
259 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
260 if (sval == NULL) {
261 PyErr_NoMemory();
262 return -1;
263 }
264
265 ((PyByteArrayObject *)self)->ob_bytes = sval;
266 Py_SIZE(self) = size;
267 ((PyByteArrayObject *)self)->ob_alloc = alloc;
268 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
269
270 return 0;
271 }
272
273 PyObject *
PyByteArray_Concat(PyObject * a,PyObject * b)274 PyByteArray_Concat(PyObject *a, PyObject *b)
275 {
276 Py_buffer va, vb;
277 PyByteArrayObject *result = NULL;
278
279 va.len = -1;
280 vb.len = -1;
281 if (_getbuffer(a, &va) < 0 ||
282 _getbuffer(b, &vb) < 0) {
283 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
284 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
285 goto done;
286 }
287
288 if (va.len > PY_SSIZE_T_MAX - vb.len) {
289 PyErr_NoMemory();
290 goto done;
291 }
292
293 result = (PyByteArrayObject *) \
294 PyByteArray_FromStringAndSize(NULL, va.len + vb.len);
295 if (result != NULL) {
296 memcpy(result->ob_bytes, va.buf, va.len);
297 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
298 }
299
300 done:
301 if (va.len != -1)
302 PyBuffer_Release(&va);
303 if (vb.len != -1)
304 PyBuffer_Release(&vb);
305 return (PyObject *)result;
306 }
307
308 /* Functions stuffed into the type object */
309
310 static Py_ssize_t
bytearray_length(PyByteArrayObject * self)311 bytearray_length(PyByteArrayObject *self)
312 {
313 return Py_SIZE(self);
314 }
315
316 static PyObject *
bytearray_iconcat(PyByteArrayObject * self,PyObject * other)317 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
318 {
319 Py_ssize_t mysize;
320 Py_ssize_t size;
321 Py_buffer vo;
322
323 if (_getbuffer(other, &vo) < 0) {
324 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
325 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
326 return NULL;
327 }
328
329 mysize = Py_SIZE(self);
330 if (mysize > PY_SSIZE_T_MAX - vo.len) {
331 PyBuffer_Release(&vo);
332 return PyErr_NoMemory();
333 }
334 size = mysize + vo.len;
335 if (size < self->ob_alloc) {
336 Py_SIZE(self) = size;
337 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
338 }
339 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
340 PyBuffer_Release(&vo);
341 return NULL;
342 }
343 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
344 PyBuffer_Release(&vo);
345 Py_INCREF(self);
346 return (PyObject *)self;
347 }
348
349 static PyObject *
bytearray_repeat(PyByteArrayObject * self,Py_ssize_t count)350 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
351 {
352 PyByteArrayObject *result;
353 Py_ssize_t mysize;
354 Py_ssize_t size;
355
356 if (count < 0)
357 count = 0;
358 mysize = Py_SIZE(self);
359 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
360 return PyErr_NoMemory();
361 size = mysize * count;
362 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
363 if (result != NULL && size != 0) {
364 if (mysize == 1)
365 memset(result->ob_bytes, self->ob_bytes[0], size);
366 else {
367 Py_ssize_t i;
368 for (i = 0; i < count; i++)
369 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
370 }
371 }
372 return (PyObject *)result;
373 }
374
375 static PyObject *
bytearray_irepeat(PyByteArrayObject * self,Py_ssize_t count)376 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
377 {
378 Py_ssize_t mysize;
379 Py_ssize_t size;
380
381 if (count < 0)
382 count = 0;
383 mysize = Py_SIZE(self);
384 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
385 return PyErr_NoMemory();
386 size = mysize * count;
387 if (size < self->ob_alloc) {
388 Py_SIZE(self) = size;
389 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
390 }
391 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
392 return NULL;
393
394 if (mysize == 1)
395 memset(self->ob_bytes, self->ob_bytes[0], size);
396 else {
397 Py_ssize_t i;
398 for (i = 1; i < count; i++)
399 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
400 }
401
402 Py_INCREF(self);
403 return (PyObject *)self;
404 }
405
406 static PyObject *
bytearray_getitem(PyByteArrayObject * self,Py_ssize_t i)407 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
408 {
409 if (i < 0)
410 i += Py_SIZE(self);
411 if (i < 0 || i >= Py_SIZE(self)) {
412 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
413 return NULL;
414 }
415 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
416 }
417
418 static PyObject *
bytearray_subscript(PyByteArrayObject * self,PyObject * index)419 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
420 {
421 if (PyIndex_Check(index)) {
422 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
423
424 if (i == -1 && PyErr_Occurred())
425 return NULL;
426
427 if (i < 0)
428 i += PyByteArray_GET_SIZE(self);
429
430 if (i < 0 || i >= Py_SIZE(self)) {
431 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
432 return NULL;
433 }
434 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
435 }
436 else if (PySlice_Check(index)) {
437 Py_ssize_t start, stop, step, slicelength, cur, i;
438 if (PySlice_GetIndicesEx((PySliceObject *)index,
439 PyByteArray_GET_SIZE(self),
440 &start, &stop, &step, &slicelength) < 0) {
441 return NULL;
442 }
443
444 if (slicelength <= 0)
445 return PyByteArray_FromStringAndSize("", 0);
446 else if (step == 1) {
447 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
448 slicelength);
449 }
450 else {
451 char *source_buf = PyByteArray_AS_STRING(self);
452 char *result_buf = (char *)PyMem_Malloc(slicelength);
453 PyObject *result;
454
455 if (result_buf == NULL)
456 return PyErr_NoMemory();
457
458 for (cur = start, i = 0; i < slicelength;
459 cur += step, i++) {
460 result_buf[i] = source_buf[cur];
461 }
462 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
463 PyMem_Free(result_buf);
464 return result;
465 }
466 }
467 else {
468 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
469 return NULL;
470 }
471 }
472
473 static int
bytearray_setslice(PyByteArrayObject * self,Py_ssize_t lo,Py_ssize_t hi,PyObject * values)474 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
475 PyObject *values)
476 {
477 Py_ssize_t avail, needed;
478 void *bytes;
479 Py_buffer vbytes;
480 int res = 0;
481
482 vbytes.len = -1;
483 if (values == (PyObject *)self) {
484 /* Make a copy and call this function recursively */
485 int err;
486 values = PyByteArray_FromObject(values);
487 if (values == NULL)
488 return -1;
489 err = bytearray_setslice(self, lo, hi, values);
490 Py_DECREF(values);
491 return err;
492 }
493 if (values == NULL) {
494 /* del b[lo:hi] */
495 bytes = NULL;
496 needed = 0;
497 }
498 else {
499 if (_getbuffer(values, &vbytes) < 0) {
500 PyErr_Format(PyExc_TypeError,
501 "can't set bytearray slice from %.100s",
502 Py_TYPE(values)->tp_name);
503 return -1;
504 }
505 needed = vbytes.len;
506 bytes = vbytes.buf;
507 }
508
509 if (lo < 0)
510 lo = 0;
511 if (hi < lo)
512 hi = lo;
513 if (hi > Py_SIZE(self))
514 hi = Py_SIZE(self);
515
516 avail = hi - lo;
517 if (avail < 0)
518 lo = hi = avail = 0;
519
520 if (avail != needed) {
521 if (avail > needed) {
522 if (!_canresize(self)) {
523 res = -1;
524 goto finish;
525 }
526 /*
527 0 lo hi old_size
528 | |<----avail----->|<-----tomove------>|
529 | |<-needed->|<-----tomove------>|
530 0 lo new_hi new_size
531 */
532 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
533 Py_SIZE(self) - hi);
534 }
535 /* XXX(nnorwitz): need to verify this can't overflow! */
536 if (PyByteArray_Resize((PyObject *)self,
537 Py_SIZE(self) + needed - avail) < 0) {
538 res = -1;
539 goto finish;
540 }
541 if (avail < needed) {
542 /*
543 0 lo hi old_size
544 | |<-avail->|<-----tomove------>|
545 | |<----needed---->|<-----tomove------>|
546 0 lo new_hi new_size
547 */
548 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
549 Py_SIZE(self) - lo - needed);
550 }
551 }
552
553 if (needed > 0)
554 memcpy(self->ob_bytes + lo, bytes, needed);
555
556
557 finish:
558 if (vbytes.len != -1)
559 PyBuffer_Release(&vbytes);
560 return res;
561 }
562
563 static int
bytearray_setitem(PyByteArrayObject * self,Py_ssize_t i,PyObject * value)564 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
565 {
566 int ival;
567
568 if (i < 0)
569 i += Py_SIZE(self);
570
571 if (i < 0 || i >= Py_SIZE(self)) {
572 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
573 return -1;
574 }
575
576 if (value == NULL)
577 return bytearray_setslice(self, i, i+1, NULL);
578
579 if (!_getbytevalue(value, &ival))
580 return -1;
581
582 self->ob_bytes[i] = ival;
583 return 0;
584 }
585
586 static int
bytearray_ass_subscript(PyByteArrayObject * self,PyObject * index,PyObject * values)587 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
588 {
589 Py_ssize_t start, stop, step, slicelen, needed;
590 char *bytes;
591
592 if (PyIndex_Check(index)) {
593 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
594
595 if (i == -1 && PyErr_Occurred())
596 return -1;
597
598 if (i < 0)
599 i += PyByteArray_GET_SIZE(self);
600
601 if (i < 0 || i >= Py_SIZE(self)) {
602 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
603 return -1;
604 }
605
606 if (values == NULL) {
607 /* Fall through to slice assignment */
608 start = i;
609 stop = i + 1;
610 step = 1;
611 slicelen = 1;
612 }
613 else {
614 int ival;
615 if (!_getbytevalue(values, &ival))
616 return -1;
617 self->ob_bytes[i] = (char)ival;
618 return 0;
619 }
620 }
621 else if (PySlice_Check(index)) {
622 if (PySlice_GetIndicesEx((PySliceObject *)index,
623 PyByteArray_GET_SIZE(self),
624 &start, &stop, &step, &slicelen) < 0) {
625 return -1;
626 }
627 }
628 else {
629 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
630 return -1;
631 }
632
633 if (values == NULL) {
634 bytes = NULL;
635 needed = 0;
636 }
637 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
638 int err;
639 if (PyNumber_Check(values) || PyUnicode_Check(values)) {
640 PyErr_SetString(PyExc_TypeError,
641 "can assign only bytes, buffers, or iterables "
642 "of ints in range(0, 256)");
643 return -1;
644 }
645 /* Make a copy and call this function recursively */
646 values = PyByteArray_FromObject(values);
647 if (values == NULL)
648 return -1;
649 err = bytearray_ass_subscript(self, index, values);
650 Py_DECREF(values);
651 return err;
652 }
653 else {
654 assert(PyByteArray_Check(values));
655 bytes = ((PyByteArrayObject *)values)->ob_bytes;
656 needed = Py_SIZE(values);
657 }
658 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
659 if ((step < 0 && start < stop) ||
660 (step > 0 && start > stop))
661 stop = start;
662 if (step == 1) {
663 if (slicelen != needed) {
664 if (!_canresize(self))
665 return -1;
666 if (slicelen > needed) {
667 /*
668 0 start stop old_size
669 | |<---slicelen--->|<-----tomove------>|
670 | |<-needed->|<-----tomove------>|
671 0 lo new_hi new_size
672 */
673 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
674 Py_SIZE(self) - stop);
675 }
676 if (PyByteArray_Resize((PyObject *)self,
677 Py_SIZE(self) + needed - slicelen) < 0)
678 return -1;
679 if (slicelen < needed) {
680 /*
681 0 lo hi old_size
682 | |<-avail->|<-----tomove------>|
683 | |<----needed---->|<-----tomove------>|
684 0 lo new_hi new_size
685 */
686 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
687 Py_SIZE(self) - start - needed);
688 }
689 }
690
691 if (needed > 0)
692 memcpy(self->ob_bytes + start, bytes, needed);
693
694 return 0;
695 }
696 else {
697 if (needed == 0) {
698 /* Delete slice */
699 size_t cur;
700 Py_ssize_t i;
701
702 if (!_canresize(self))
703 return -1;
704 if (step < 0) {
705 stop = start + 1;
706 start = stop + step * (slicelen - 1) - 1;
707 step = -step;
708 }
709 for (cur = start, i = 0;
710 i < slicelen; cur += step, i++) {
711 Py_ssize_t lim = step - 1;
712
713 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
714 lim = PyByteArray_GET_SIZE(self) - cur - 1;
715
716 memmove(self->ob_bytes + cur - i,
717 self->ob_bytes + cur + 1, lim);
718 }
719 /* Move the tail of the bytes, in one chunk */
720 cur = start + slicelen*step;
721 if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
722 memmove(self->ob_bytes + cur - slicelen,
723 self->ob_bytes + cur,
724 PyByteArray_GET_SIZE(self) - cur);
725 }
726 if (PyByteArray_Resize((PyObject *)self,
727 PyByteArray_GET_SIZE(self) - slicelen) < 0)
728 return -1;
729
730 return 0;
731 }
732 else {
733 /* Assign slice */
734 Py_ssize_t cur, i;
735
736 if (needed != slicelen) {
737 PyErr_Format(PyExc_ValueError,
738 "attempt to assign bytes of size %zd "
739 "to extended slice of size %zd",
740 needed, slicelen);
741 return -1;
742 }
743 for (cur = start, i = 0; i < slicelen; cur += step, i++)
744 self->ob_bytes[cur] = bytes[i];
745 return 0;
746 }
747 }
748 }
749
750 static int
bytearray_init(PyByteArrayObject * self,PyObject * args,PyObject * kwds)751 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
752 {
753 static char *kwlist[] = {"source", "encoding", "errors", 0};
754 PyObject *arg = NULL;
755 const char *encoding = NULL;
756 const char *errors = NULL;
757 Py_ssize_t count;
758 PyObject *it;
759 PyObject *(*iternext)(PyObject *);
760
761 if (Py_SIZE(self) != 0) {
762 /* Empty previous contents (yes, do this first of all!) */
763 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
764 return -1;
765 }
766
767 /* Parse arguments */
768 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
769 &arg, &encoding, &errors))
770 return -1;
771
772 /* Make a quick exit if no first argument */
773 if (arg == NULL) {
774 if (encoding != NULL || errors != NULL) {
775 PyErr_SetString(PyExc_TypeError,
776 "encoding or errors without sequence argument");
777 return -1;
778 }
779 return 0;
780 }
781
782 if (PyBytes_Check(arg)) {
783 PyObject *new, *encoded;
784 if (encoding != NULL) {
785 encoded = _PyCodec_EncodeText(arg, encoding, errors);
786 if (encoded == NULL)
787 return -1;
788 assert(PyBytes_Check(encoded));
789 }
790 else {
791 encoded = arg;
792 Py_INCREF(arg);
793 }
794 new = bytearray_iconcat(self, arg);
795 Py_DECREF(encoded);
796 if (new == NULL)
797 return -1;
798 Py_DECREF(new);
799 return 0;
800 }
801
802 #ifdef Py_USING_UNICODE
803 if (PyUnicode_Check(arg)) {
804 /* Encode via the codec registry */
805 PyObject *encoded, *new;
806 if (encoding == NULL) {
807 PyErr_SetString(PyExc_TypeError,
808 "unicode argument without an encoding");
809 return -1;
810 }
811 encoded = _PyCodec_EncodeText(arg, encoding, errors);
812 if (encoded == NULL)
813 return -1;
814 assert(PyBytes_Check(encoded));
815 new = bytearray_iconcat(self, encoded);
816 Py_DECREF(encoded);
817 if (new == NULL)
818 return -1;
819 Py_DECREF(new);
820 return 0;
821 }
822 #endif
823
824 /* If it's not unicode, there can't be encoding or errors */
825 if (encoding != NULL || errors != NULL) {
826 PyErr_SetString(PyExc_TypeError,
827 "encoding or errors without a string argument");
828 return -1;
829 }
830
831 /* Is it an int? */
832 count = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
833 if (count == -1 && PyErr_Occurred()) {
834 if (PyErr_ExceptionMatches(PyExc_OverflowError))
835 return -1;
836 PyErr_Clear();
837 }
838 else if (count < 0) {
839 PyErr_SetString(PyExc_ValueError, "negative count");
840 return -1;
841 }
842 else {
843 if (count > 0) {
844 if (PyByteArray_Resize((PyObject *)self, count))
845 return -1;
846 memset(self->ob_bytes, 0, count);
847 }
848 return 0;
849 }
850
851 /* Use the buffer API */
852 if (PyObject_CheckBuffer(arg)) {
853 Py_ssize_t size;
854 Py_buffer view;
855 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
856 return -1;
857 size = view.len;
858 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
859 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
860 goto fail;
861 PyBuffer_Release(&view);
862 return 0;
863 fail:
864 PyBuffer_Release(&view);
865 return -1;
866 }
867
868 /* XXX Optimize this if the arguments is a list, tuple */
869
870 /* Get the iterator */
871 it = PyObject_GetIter(arg);
872 if (it == NULL)
873 return -1;
874 iternext = *Py_TYPE(it)->tp_iternext;
875
876 /* Run the iterator to exhaustion */
877 for (;;) {
878 PyObject *item;
879 int rc, value;
880
881 /* Get the next item */
882 item = iternext(it);
883 if (item == NULL) {
884 if (PyErr_Occurred()) {
885 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
886 goto error;
887 PyErr_Clear();
888 }
889 break;
890 }
891
892 /* Interpret it as an int (__index__) */
893 rc = _getbytevalue(item, &value);
894 Py_DECREF(item);
895 if (!rc)
896 goto error;
897
898 /* Append the byte */
899 if (Py_SIZE(self) + 1 < self->ob_alloc) {
900 Py_SIZE(self)++;
901 PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0';
902 }
903 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
904 goto error;
905 self->ob_bytes[Py_SIZE(self)-1] = value;
906 }
907
908 /* Clean up and return success */
909 Py_DECREF(it);
910 return 0;
911
912 error:
913 /* Error handling when it != NULL */
914 Py_DECREF(it);
915 return -1;
916 }
917
918 /* Mostly copied from string_repr, but without the
919 "smart quote" functionality. */
920 static PyObject *
bytearray_repr(PyByteArrayObject * self)921 bytearray_repr(PyByteArrayObject *self)
922 {
923 static const char *hexdigits = "0123456789abcdef";
924 const char *quote_prefix = "bytearray(b";
925 const char *quote_postfix = ")";
926 Py_ssize_t length = Py_SIZE(self);
927 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
928 size_t newsize;
929 PyObject *v;
930 if (length > (PY_SSIZE_T_MAX - 14) / 4) {
931 PyErr_SetString(PyExc_OverflowError,
932 "bytearray object is too large to make repr");
933 return NULL;
934 }
935 newsize = 14 + 4 * length;
936 v = PyString_FromStringAndSize(NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
945
946 /* Figure out which quote to use; single is preferred */
947 quote = '\'';
948 {
949 char *test, *start;
950 start = PyByteArray_AS_STRING(self);
951 for (test = start; test < start+length; ++test) {
952 if (*test == '"') {
953 quote = '\''; /* back to single */
954 goto decided;
955 }
956 else if (*test == '\'')
957 quote = '"';
958 }
959 decided:
960 ;
961 }
962
963 p = PyString_AS_STRING(v);
964 while (*quote_prefix)
965 *p++ = *quote_prefix++;
966 *p++ = quote;
967
968 for (i = 0; i < length; i++) {
969 /* There's at least enough room for a hex escape
970 and a closing quote. */
971 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
972 c = self->ob_bytes[i];
973 if (c == '\'' || c == '\\')
974 *p++ = '\\', *p++ = c;
975 else if (c == '\t')
976 *p++ = '\\', *p++ = 't';
977 else if (c == '\n')
978 *p++ = '\\', *p++ = 'n';
979 else if (c == '\r')
980 *p++ = '\\', *p++ = 'r';
981 else if (c == 0)
982 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
983 else if (c < ' ' || c >= 0x7f) {
984 *p++ = '\\';
985 *p++ = 'x';
986 *p++ = hexdigits[(c & 0xf0) >> 4];
987 *p++ = hexdigits[c & 0xf];
988 }
989 else
990 *p++ = c;
991 }
992 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
993 *p++ = quote;
994 while (*quote_postfix) {
995 *p++ = *quote_postfix++;
996 }
997 *p = '\0';
998 /* v is cleared on error */
999 (void)_PyString_Resize(&v, (p - PyString_AS_STRING(v)));
1000 return v;
1001 }
1002 }
1003
1004 static PyObject *
bytearray_str(PyObject * op)1005 bytearray_str(PyObject *op)
1006 {
1007 #if 0
1008 if (Py_BytesWarningFlag) {
1009 if (PyErr_WarnEx(PyExc_BytesWarning,
1010 "str() on a bytearray instance", 1))
1011 return NULL;
1012 }
1013 return bytearray_repr((PyByteArrayObject*)op);
1014 #endif
1015 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1016 }
1017
1018 static PyObject *
bytearray_richcompare(PyObject * self,PyObject * other,int op)1019 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1020 {
1021 Py_ssize_t self_size, other_size;
1022 Py_buffer self_bytes, other_bytes;
1023 PyObject *res;
1024 Py_ssize_t minsize;
1025 int cmp, rc;
1026
1027 /* Bytes can be compared to anything that supports the (binary)
1028 buffer API. Except that a comparison with Unicode is always an
1029 error, even if the comparison is for equality. */
1030 #ifdef Py_USING_UNICODE
1031 rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type);
1032 if (!rc)
1033 rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type);
1034 if (rc < 0)
1035 return NULL;
1036 if (rc) {
1037 if (Py_BytesWarningFlag && op == Py_EQ) {
1038 if (PyErr_WarnEx(PyExc_BytesWarning,
1039 "Comparison between bytearray and string", 1))
1040 return NULL;
1041 }
1042
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1045 }
1046 #endif
1047
1048 self_size = _getbuffer(self, &self_bytes);
1049 if (self_size < 0) {
1050 PyErr_Clear();
1051 Py_INCREF(Py_NotImplemented);
1052 return Py_NotImplemented;
1053 }
1054
1055 other_size = _getbuffer(other, &other_bytes);
1056 if (other_size < 0) {
1057 PyErr_Clear();
1058 PyBuffer_Release(&self_bytes);
1059 Py_INCREF(Py_NotImplemented);
1060 return Py_NotImplemented;
1061 }
1062
1063 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1064 /* Shortcut: if the lengths differ, the objects differ */
1065 cmp = (op == Py_NE);
1066 }
1067 else {
1068 minsize = self_size;
1069 if (other_size < minsize)
1070 minsize = other_size;
1071
1072 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1073 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1074
1075 if (cmp == 0) {
1076 if (self_size < other_size)
1077 cmp = -1;
1078 else if (self_size > other_size)
1079 cmp = 1;
1080 }
1081
1082 switch (op) {
1083 case Py_LT: cmp = cmp < 0; break;
1084 case Py_LE: cmp = cmp <= 0; break;
1085 case Py_EQ: cmp = cmp == 0; break;
1086 case Py_NE: cmp = cmp != 0; break;
1087 case Py_GT: cmp = cmp > 0; break;
1088 case Py_GE: cmp = cmp >= 0; break;
1089 }
1090 }
1091
1092 res = cmp ? Py_True : Py_False;
1093 PyBuffer_Release(&self_bytes);
1094 PyBuffer_Release(&other_bytes);
1095 Py_INCREF(res);
1096 return res;
1097 }
1098
1099 static void
bytearray_dealloc(PyByteArrayObject * self)1100 bytearray_dealloc(PyByteArrayObject *self)
1101 {
1102 if (self->ob_exports > 0) {
1103 PyErr_SetString(PyExc_SystemError,
1104 "deallocated bytearray object has exported buffers");
1105 PyErr_Print();
1106 }
1107 if (self->ob_bytes != 0) {
1108 PyMem_Free(self->ob_bytes);
1109 }
1110 Py_TYPE(self)->tp_free((PyObject *)self);
1111 }
1112
1113
1114 /* -------------------------------------------------------------------- */
1115 /* Methods */
1116
1117 #define STRINGLIB_CHAR char
1118 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1119 #define STRINGLIB_STR PyByteArray_AS_STRING
1120 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1121 #define STRINGLIB_ISSPACE Py_ISSPACE
1122 #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
1123 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1124 #define STRINGLIB_MUTABLE 1
1125
1126 #include "stringlib/fastsearch.h"
1127 #include "stringlib/count.h"
1128 #include "stringlib/find.h"
1129 #include "stringlib/partition.h"
1130 #include "stringlib/split.h"
1131 #include "stringlib/ctype.h"
1132 #include "stringlib/transmogrify.h"
1133
1134
1135 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1136 were copied from the old char* style string object. */
1137
1138 /* helper macro to fixup start/end slice values */
1139 #define ADJUST_INDICES(start, end, len) \
1140 if (end > len) \
1141 end = len; \
1142 else if (end < 0) { \
1143 end += len; \
1144 if (end < 0) \
1145 end = 0; \
1146 } \
1147 if (start < 0) { \
1148 start += len; \
1149 if (start < 0) \
1150 start = 0; \
1151 }
1152
1153 Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject * self,PyObject * args,int dir)1154 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1155 {
1156 PyObject *subobj;
1157 Py_buffer subbuf;
1158 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1159 Py_ssize_t res;
1160
1161 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1162 args, &subobj, &start, &end))
1163 return -2;
1164 if (_getbuffer(subobj, &subbuf) < 0)
1165 return -2;
1166 if (dir > 0)
1167 res = stringlib_find_slice(
1168 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1169 subbuf.buf, subbuf.len, start, end);
1170 else
1171 res = stringlib_rfind_slice(
1172 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1173 subbuf.buf, subbuf.len, start, end);
1174 PyBuffer_Release(&subbuf);
1175 return res;
1176 }
1177
1178 PyDoc_STRVAR(find__doc__,
1179 "B.find(sub [,start [,end]]) -> int\n\
1180 \n\
1181 Return the lowest index in B where subsection sub is found,\n\
1182 such that sub is contained within B[start,end]. Optional\n\
1183 arguments start and end are interpreted as in slice notation.\n\
1184 \n\
1185 Return -1 on failure.");
1186
1187 static PyObject *
bytearray_find(PyByteArrayObject * self,PyObject * args)1188 bytearray_find(PyByteArrayObject *self, PyObject *args)
1189 {
1190 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1191 if (result == -2)
1192 return NULL;
1193 return PyInt_FromSsize_t(result);
1194 }
1195
1196 PyDoc_STRVAR(count__doc__,
1197 "B.count(sub [,start [,end]]) -> int\n\
1198 \n\
1199 Return the number of non-overlapping occurrences of subsection sub in\n\
1200 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1201 as in slice notation.");
1202
1203 static PyObject *
bytearray_count(PyByteArrayObject * self,PyObject * args)1204 bytearray_count(PyByteArrayObject *self, PyObject *args)
1205 {
1206 PyObject *sub_obj;
1207 const char *str = PyByteArray_AS_STRING(self);
1208 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1209 Py_buffer vsub;
1210 PyObject *count_obj;
1211
1212 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
1213 return NULL;
1214
1215 if (_getbuffer(sub_obj, &vsub) < 0)
1216 return NULL;
1217
1218 ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
1219
1220 count_obj = PyInt_FromSsize_t(
1221 stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
1222 );
1223 PyBuffer_Release(&vsub);
1224 return count_obj;
1225 }
1226
1227
1228 PyDoc_STRVAR(index__doc__,
1229 "B.index(sub [,start [,end]]) -> int\n\
1230 \n\
1231 Like B.find() but raise ValueError when the subsection is not found.");
1232
1233 static PyObject *
bytearray_index(PyByteArrayObject * self,PyObject * args)1234 bytearray_index(PyByteArrayObject *self, PyObject *args)
1235 {
1236 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1237 if (result == -2)
1238 return NULL;
1239 if (result == -1) {
1240 PyErr_SetString(PyExc_ValueError,
1241 "subsection not found");
1242 return NULL;
1243 }
1244 return PyInt_FromSsize_t(result);
1245 }
1246
1247
1248 PyDoc_STRVAR(rfind__doc__,
1249 "B.rfind(sub [,start [,end]]) -> int\n\
1250 \n\
1251 Return the highest index in B where subsection sub is found,\n\
1252 such that sub is contained within B[start,end]. Optional\n\
1253 arguments start and end are interpreted as in slice notation.\n\
1254 \n\
1255 Return -1 on failure.");
1256
1257 static PyObject *
bytearray_rfind(PyByteArrayObject * self,PyObject * args)1258 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1259 {
1260 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1261 if (result == -2)
1262 return NULL;
1263 return PyInt_FromSsize_t(result);
1264 }
1265
1266
1267 PyDoc_STRVAR(rindex__doc__,
1268 "B.rindex(sub [,start [,end]]) -> int\n\
1269 \n\
1270 Like B.rfind() but raise ValueError when the subsection is not found.");
1271
1272 static PyObject *
bytearray_rindex(PyByteArrayObject * self,PyObject * args)1273 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1274 {
1275 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1276 if (result == -2)
1277 return NULL;
1278 if (result == -1) {
1279 PyErr_SetString(PyExc_ValueError,
1280 "subsection not found");
1281 return NULL;
1282 }
1283 return PyInt_FromSsize_t(result);
1284 }
1285
1286
1287 static int
bytearray_contains(PyObject * self,PyObject * arg)1288 bytearray_contains(PyObject *self, PyObject *arg)
1289 {
1290 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1291 if (ival == -1 && PyErr_Occurred()) {
1292 Py_buffer varg;
1293 int pos;
1294 PyErr_Clear();
1295 if (_getbuffer(arg, &varg) < 0)
1296 return -1;
1297 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1298 varg.buf, varg.len, 0);
1299 PyBuffer_Release(&varg);
1300 return pos >= 0;
1301 }
1302 if (ival < 0 || ival >= 256) {
1303 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1304 return -1;
1305 }
1306
1307 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1308 }
1309
1310
1311 /* Matches the end (direction >= 0) or start (direction < 0) of self
1312 * against substr, using the start and end arguments. Returns
1313 * -1 on error, 0 if not found and 1 if found.
1314 */
1315 Py_LOCAL(int)
_bytearray_tailmatch(PyByteArrayObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)1316 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1317 Py_ssize_t end, int direction)
1318 {
1319 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1320 const char* str;
1321 Py_buffer vsubstr;
1322 int rv = 0;
1323
1324 str = PyByteArray_AS_STRING(self);
1325
1326 if (_getbuffer(substr, &vsubstr) < 0)
1327 return -1;
1328
1329 ADJUST_INDICES(start, end, len);
1330
1331 if (direction < 0) {
1332 /* startswith */
1333 if (start+vsubstr.len > len) {
1334 goto done;
1335 }
1336 } else {
1337 /* endswith */
1338 if (end-start < vsubstr.len || start > len) {
1339 goto done;
1340 }
1341
1342 if (end-vsubstr.len > start)
1343 start = end - vsubstr.len;
1344 }
1345 if (end-start >= vsubstr.len)
1346 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1347
1348 done:
1349 PyBuffer_Release(&vsubstr);
1350 return rv;
1351 }
1352
1353
1354 PyDoc_STRVAR(startswith__doc__,
1355 "B.startswith(prefix [,start [,end]]) -> bool\n\
1356 \n\
1357 Return True if B starts with the specified prefix, False otherwise.\n\
1358 With optional start, test B beginning at that position.\n\
1359 With optional end, stop comparing B at that position.\n\
1360 prefix can also be a tuple of strings to try.");
1361
1362 static PyObject *
bytearray_startswith(PyByteArrayObject * self,PyObject * args)1363 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1364 {
1365 Py_ssize_t start = 0;
1366 Py_ssize_t end = PY_SSIZE_T_MAX;
1367 PyObject *subobj;
1368 int result;
1369
1370 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
1371 return NULL;
1372 if (PyTuple_Check(subobj)) {
1373 Py_ssize_t i;
1374 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1375 result = _bytearray_tailmatch(self,
1376 PyTuple_GET_ITEM(subobj, i),
1377 start, end, -1);
1378 if (result == -1)
1379 return NULL;
1380 else if (result) {
1381 Py_RETURN_TRUE;
1382 }
1383 }
1384 Py_RETURN_FALSE;
1385 }
1386 result = _bytearray_tailmatch(self, subobj, start, end, -1);
1387 if (result == -1)
1388 return NULL;
1389 else
1390 return PyBool_FromLong(result);
1391 }
1392
1393 PyDoc_STRVAR(endswith__doc__,
1394 "B.endswith(suffix [,start [,end]]) -> bool\n\
1395 \n\
1396 Return True if B ends with the specified suffix, False otherwise.\n\
1397 With optional start, test B beginning at that position.\n\
1398 With optional end, stop comparing B at that position.\n\
1399 suffix can also be a tuple of strings to try.");
1400
1401 static PyObject *
bytearray_endswith(PyByteArrayObject * self,PyObject * args)1402 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1403 {
1404 Py_ssize_t start = 0;
1405 Py_ssize_t end = PY_SSIZE_T_MAX;
1406 PyObject *subobj;
1407 int result;
1408
1409 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
1410 return NULL;
1411 if (PyTuple_Check(subobj)) {
1412 Py_ssize_t i;
1413 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1414 result = _bytearray_tailmatch(self,
1415 PyTuple_GET_ITEM(subobj, i),
1416 start, end, +1);
1417 if (result == -1)
1418 return NULL;
1419 else if (result) {
1420 Py_RETURN_TRUE;
1421 }
1422 }
1423 Py_RETURN_FALSE;
1424 }
1425 result = _bytearray_tailmatch(self, subobj, start, end, +1);
1426 if (result == -1)
1427 return NULL;
1428 else
1429 return PyBool_FromLong(result);
1430 }
1431
1432
1433 PyDoc_STRVAR(translate__doc__,
1434 "B.translate(table[, deletechars]) -> bytearray\n\
1435 \n\
1436 Return a copy of B, where all characters occurring in the\n\
1437 optional argument deletechars are removed, and the remaining\n\
1438 characters have been mapped through the given translation\n\
1439 table, which must be a bytes object of length 256.");
1440
1441 static PyObject *
bytearray_translate(PyByteArrayObject * self,PyObject * args)1442 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1443 {
1444 register char *input, *output;
1445 register const char *table;
1446 register Py_ssize_t i, c;
1447 PyObject *input_obj = (PyObject*)self;
1448 const char *output_start;
1449 Py_ssize_t inlen;
1450 PyObject *result = NULL;
1451 int trans_table[256];
1452 PyObject *tableobj = NULL, *delobj = NULL;
1453 Py_buffer vtable, vdel;
1454
1455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1456 &tableobj, &delobj))
1457 return NULL;
1458
1459 if (tableobj == Py_None) {
1460 table = NULL;
1461 tableobj = NULL;
1462 } else if (_getbuffer(tableobj, &vtable) < 0) {
1463 return NULL;
1464 } else {
1465 if (vtable.len != 256) {
1466 PyErr_SetString(PyExc_ValueError,
1467 "translation table must be 256 characters long");
1468 PyBuffer_Release(&vtable);
1469 return NULL;
1470 }
1471 table = (const char*)vtable.buf;
1472 }
1473
1474 if (delobj != NULL) {
1475 if (_getbuffer(delobj, &vdel) < 0) {
1476 if (tableobj != NULL)
1477 PyBuffer_Release(&vtable);
1478 return NULL;
1479 }
1480 }
1481 else {
1482 vdel.buf = NULL;
1483 vdel.len = 0;
1484 }
1485
1486 inlen = PyByteArray_GET_SIZE(input_obj);
1487 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1488 if (result == NULL)
1489 goto done;
1490 output_start = output = PyByteArray_AsString(result);
1491 input = PyByteArray_AS_STRING(input_obj);
1492
1493 if (vdel.len == 0 && table != NULL) {
1494 /* If no deletions are required, use faster code */
1495 for (i = inlen; --i >= 0; ) {
1496 c = Py_CHARMASK(*input++);
1497 *output++ = table[c];
1498 }
1499 goto done;
1500 }
1501
1502 if (table == NULL) {
1503 for (i = 0; i < 256; i++)
1504 trans_table[i] = Py_CHARMASK(i);
1505 } else {
1506 for (i = 0; i < 256; i++)
1507 trans_table[i] = Py_CHARMASK(table[i]);
1508 }
1509
1510 for (i = 0; i < vdel.len; i++)
1511 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1512
1513 for (i = inlen; --i >= 0; ) {
1514 c = Py_CHARMASK(*input++);
1515 if (trans_table[c] != -1)
1516 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1517 continue;
1518 }
1519 /* Fix the size of the resulting string */
1520 if (inlen > 0)
1521 PyByteArray_Resize(result, output - output_start);
1522
1523 done:
1524 if (tableobj != NULL)
1525 PyBuffer_Release(&vtable);
1526 if (delobj != NULL)
1527 PyBuffer_Release(&vdel);
1528 return result;
1529 }
1530
1531
1532 /* find and count characters and substrings */
1533
1534 #define findchar(target, target_len, c) \
1535 ((char *)memchr((const void *)(target), c, target_len))
1536
1537
1538 /* Bytes ops must return a string, create a copy */
1539 Py_LOCAL(PyByteArrayObject *)
return_self(PyByteArrayObject * self)1540 return_self(PyByteArrayObject *self)
1541 {
1542 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1543 PyByteArray_AS_STRING(self),
1544 PyByteArray_GET_SIZE(self));
1545 }
1546
1547 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)1548 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1549 {
1550 Py_ssize_t count=0;
1551 const char *start=target;
1552 const char *end=target+target_len;
1553
1554 while ( (start=findchar(start, end-start, c)) != NULL ) {
1555 count++;
1556 if (count >= maxcount)
1557 break;
1558 start += 1;
1559 }
1560 return count;
1561 }
1562
1563
1564 /* Algorithms for different cases of string replacement */
1565
1566 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1567 Py_LOCAL(PyByteArrayObject *)
replace_interleave(PyByteArrayObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1568 replace_interleave(PyByteArrayObject *self,
1569 const char *to_s, Py_ssize_t to_len,
1570 Py_ssize_t maxcount)
1571 {
1572 char *self_s, *result_s;
1573 Py_ssize_t self_len, result_len;
1574 Py_ssize_t count, i, product;
1575 PyByteArrayObject *result;
1576
1577 self_len = PyByteArray_GET_SIZE(self);
1578
1579 /* 1 at the end plus 1 after every character */
1580 count = self_len+1;
1581 if (maxcount < count)
1582 count = maxcount;
1583
1584 /* Check for overflow */
1585 /* result_len = count * to_len + self_len; */
1586 product = count * to_len;
1587 if (product / to_len != count) {
1588 PyErr_SetString(PyExc_OverflowError,
1589 "replace string is too long");
1590 return NULL;
1591 }
1592 result_len = product + self_len;
1593 if (result_len < 0) {
1594 PyErr_SetString(PyExc_OverflowError,
1595 "replace string is too long");
1596 return NULL;
1597 }
1598
1599 if (! (result = (PyByteArrayObject *)
1600 PyByteArray_FromStringAndSize(NULL, result_len)) )
1601 return NULL;
1602
1603 self_s = PyByteArray_AS_STRING(self);
1604 result_s = PyByteArray_AS_STRING(result);
1605
1606 /* TODO: special case single character, which doesn't need memcpy */
1607
1608 /* Lay the first one down (guaranteed this will occur) */
1609 Py_MEMCPY(result_s, to_s, to_len);
1610 result_s += to_len;
1611 count -= 1;
1612
1613 for (i=0; i<count; i++) {
1614 *result_s++ = *self_s++;
1615 Py_MEMCPY(result_s, to_s, to_len);
1616 result_s += to_len;
1617 }
1618
1619 /* Copy the rest of the original string */
1620 Py_MEMCPY(result_s, self_s, self_len-i);
1621
1622 return result;
1623 }
1624
1625 /* Special case for deleting a single character */
1626 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1627 Py_LOCAL(PyByteArrayObject *)
replace_delete_single_character(PyByteArrayObject * self,char from_c,Py_ssize_t maxcount)1628 replace_delete_single_character(PyByteArrayObject *self,
1629 char from_c, Py_ssize_t maxcount)
1630 {
1631 char *self_s, *result_s;
1632 char *start, *next, *end;
1633 Py_ssize_t self_len, result_len;
1634 Py_ssize_t count;
1635 PyByteArrayObject *result;
1636
1637 self_len = PyByteArray_GET_SIZE(self);
1638 self_s = PyByteArray_AS_STRING(self);
1639
1640 count = countchar(self_s, self_len, from_c, maxcount);
1641 if (count == 0) {
1642 return return_self(self);
1643 }
1644
1645 result_len = self_len - count; /* from_len == 1 */
1646 assert(result_len>=0);
1647
1648 if ( (result = (PyByteArrayObject *)
1649 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1650 return NULL;
1651 result_s = PyByteArray_AS_STRING(result);
1652
1653 start = self_s;
1654 end = self_s + self_len;
1655 while (count-- > 0) {
1656 next = findchar(start, end-start, from_c);
1657 if (next == NULL)
1658 break;
1659 Py_MEMCPY(result_s, start, next-start);
1660 result_s += (next-start);
1661 start = next+1;
1662 }
1663 Py_MEMCPY(result_s, start, end-start);
1664
1665 return result;
1666 }
1667
1668 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1669
1670 Py_LOCAL(PyByteArrayObject *)
replace_delete_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)1671 replace_delete_substring(PyByteArrayObject *self,
1672 const char *from_s, Py_ssize_t from_len,
1673 Py_ssize_t maxcount)
1674 {
1675 char *self_s, *result_s;
1676 char *start, *next, *end;
1677 Py_ssize_t self_len, result_len;
1678 Py_ssize_t count, offset;
1679 PyByteArrayObject *result;
1680
1681 self_len = PyByteArray_GET_SIZE(self);
1682 self_s = PyByteArray_AS_STRING(self);
1683
1684 count = stringlib_count(self_s, self_len,
1685 from_s, from_len,
1686 maxcount);
1687
1688 if (count == 0) {
1689 /* no matches */
1690 return return_self(self);
1691 }
1692
1693 result_len = self_len - (count * from_len);
1694 assert (result_len>=0);
1695
1696 if ( (result = (PyByteArrayObject *)
1697 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1698 return NULL;
1699
1700 result_s = PyByteArray_AS_STRING(result);
1701
1702 start = self_s;
1703 end = self_s + self_len;
1704 while (count-- > 0) {
1705 offset = stringlib_find(start, end-start,
1706 from_s, from_len,
1707 0);
1708 if (offset == -1)
1709 break;
1710 next = start + offset;
1711
1712 Py_MEMCPY(result_s, start, next-start);
1713
1714 result_s += (next-start);
1715 start = next+from_len;
1716 }
1717 Py_MEMCPY(result_s, start, end-start);
1718 return result;
1719 }
1720
1721 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1722 Py_LOCAL(PyByteArrayObject *)
replace_single_character_in_place(PyByteArrayObject * self,char from_c,char to_c,Py_ssize_t maxcount)1723 replace_single_character_in_place(PyByteArrayObject *self,
1724 char from_c, char to_c,
1725 Py_ssize_t maxcount)
1726 {
1727 char *self_s, *result_s, *start, *end, *next;
1728 Py_ssize_t self_len;
1729 PyByteArrayObject *result;
1730
1731 /* The result string will be the same size */
1732 self_s = PyByteArray_AS_STRING(self);
1733 self_len = PyByteArray_GET_SIZE(self);
1734
1735 next = findchar(self_s, self_len, from_c);
1736
1737 if (next == NULL) {
1738 /* No matches; return the original bytes */
1739 return return_self(self);
1740 }
1741
1742 /* Need to make a new bytes */
1743 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1744 if (result == NULL)
1745 return NULL;
1746 result_s = PyByteArray_AS_STRING(result);
1747 Py_MEMCPY(result_s, self_s, self_len);
1748
1749 /* change everything in-place, starting with this one */
1750 start = result_s + (next-self_s);
1751 *start = to_c;
1752 start++;
1753 end = result_s + self_len;
1754
1755 while (--maxcount > 0) {
1756 next = findchar(start, end-start, from_c);
1757 if (next == NULL)
1758 break;
1759 *next = to_c;
1760 start = next+1;
1761 }
1762
1763 return result;
1764 }
1765
1766 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1767 Py_LOCAL(PyByteArrayObject *)
replace_substring_in_place(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1768 replace_substring_in_place(PyByteArrayObject *self,
1769 const char *from_s, Py_ssize_t from_len,
1770 const char *to_s, Py_ssize_t to_len,
1771 Py_ssize_t maxcount)
1772 {
1773 char *result_s, *start, *end;
1774 char *self_s;
1775 Py_ssize_t self_len, offset;
1776 PyByteArrayObject *result;
1777
1778 /* The result bytes will be the same size */
1779
1780 self_s = PyByteArray_AS_STRING(self);
1781 self_len = PyByteArray_GET_SIZE(self);
1782
1783 offset = stringlib_find(self_s, self_len,
1784 from_s, from_len,
1785 0);
1786 if (offset == -1) {
1787 /* No matches; return the original bytes */
1788 return return_self(self);
1789 }
1790
1791 /* Need to make a new bytes */
1792 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1793 if (result == NULL)
1794 return NULL;
1795 result_s = PyByteArray_AS_STRING(result);
1796 Py_MEMCPY(result_s, self_s, self_len);
1797
1798 /* change everything in-place, starting with this one */
1799 start = result_s + offset;
1800 Py_MEMCPY(start, to_s, from_len);
1801 start += from_len;
1802 end = result_s + self_len;
1803
1804 while ( --maxcount > 0) {
1805 offset = stringlib_find(start, end-start,
1806 from_s, from_len,
1807 0);
1808 if (offset==-1)
1809 break;
1810 Py_MEMCPY(start+offset, to_s, from_len);
1811 start += offset+from_len;
1812 }
1813
1814 return result;
1815 }
1816
1817 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1818 Py_LOCAL(PyByteArrayObject *)
replace_single_character(PyByteArrayObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1819 replace_single_character(PyByteArrayObject *self,
1820 char from_c,
1821 const char *to_s, Py_ssize_t to_len,
1822 Py_ssize_t maxcount)
1823 {
1824 char *self_s, *result_s;
1825 char *start, *next, *end;
1826 Py_ssize_t self_len, result_len;
1827 Py_ssize_t count, product;
1828 PyByteArrayObject *result;
1829
1830 self_s = PyByteArray_AS_STRING(self);
1831 self_len = PyByteArray_GET_SIZE(self);
1832
1833 count = countchar(self_s, self_len, from_c, maxcount);
1834 if (count == 0) {
1835 /* no matches, return unchanged */
1836 return return_self(self);
1837 }
1838
1839 /* use the difference between current and new, hence the "-1" */
1840 /* result_len = self_len + count * (to_len-1) */
1841 product = count * (to_len-1);
1842 if (product / (to_len-1) != count) {
1843 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1844 return NULL;
1845 }
1846 result_len = self_len + product;
1847 if (result_len < 0) {
1848 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1849 return NULL;
1850 }
1851
1852 if ( (result = (PyByteArrayObject *)
1853 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1854 return NULL;
1855 result_s = PyByteArray_AS_STRING(result);
1856
1857 start = self_s;
1858 end = self_s + self_len;
1859 while (count-- > 0) {
1860 next = findchar(start, end-start, from_c);
1861 if (next == NULL)
1862 break;
1863
1864 if (next == start) {
1865 /* replace with the 'to' */
1866 Py_MEMCPY(result_s, to_s, to_len);
1867 result_s += to_len;
1868 start += 1;
1869 } else {
1870 /* copy the unchanged old then the 'to' */
1871 Py_MEMCPY(result_s, start, next-start);
1872 result_s += (next-start);
1873 Py_MEMCPY(result_s, to_s, to_len);
1874 result_s += to_len;
1875 start = next+1;
1876 }
1877 }
1878 /* Copy the remainder of the remaining bytes */
1879 Py_MEMCPY(result_s, start, end-start);
1880
1881 return result;
1882 }
1883
1884 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1885 Py_LOCAL(PyByteArrayObject *)
replace_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1886 replace_substring(PyByteArrayObject *self,
1887 const char *from_s, Py_ssize_t from_len,
1888 const char *to_s, Py_ssize_t to_len,
1889 Py_ssize_t maxcount)
1890 {
1891 char *self_s, *result_s;
1892 char *start, *next, *end;
1893 Py_ssize_t self_len, result_len;
1894 Py_ssize_t count, offset, product;
1895 PyByteArrayObject *result;
1896
1897 self_s = PyByteArray_AS_STRING(self);
1898 self_len = PyByteArray_GET_SIZE(self);
1899
1900 count = stringlib_count(self_s, self_len,
1901 from_s, from_len,
1902 maxcount);
1903
1904 if (count == 0) {
1905 /* no matches, return unchanged */
1906 return return_self(self);
1907 }
1908
1909 /* Check for overflow */
1910 /* result_len = self_len + count * (to_len-from_len) */
1911 product = count * (to_len-from_len);
1912 if (product / (to_len-from_len) != count) {
1913 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1914 return NULL;
1915 }
1916 result_len = self_len + product;
1917 if (result_len < 0) {
1918 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1919 return NULL;
1920 }
1921
1922 if ( (result = (PyByteArrayObject *)
1923 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1924 return NULL;
1925 result_s = PyByteArray_AS_STRING(result);
1926
1927 start = self_s;
1928 end = self_s + self_len;
1929 while (count-- > 0) {
1930 offset = stringlib_find(start, end-start,
1931 from_s, from_len,
1932 0);
1933 if (offset == -1)
1934 break;
1935 next = start+offset;
1936 if (next == start) {
1937 /* replace with the 'to' */
1938 Py_MEMCPY(result_s, to_s, to_len);
1939 result_s += to_len;
1940 start += from_len;
1941 } else {
1942 /* copy the unchanged old then the 'to' */
1943 Py_MEMCPY(result_s, start, next-start);
1944 result_s += (next-start);
1945 Py_MEMCPY(result_s, to_s, to_len);
1946 result_s += to_len;
1947 start = next+from_len;
1948 }
1949 }
1950 /* Copy the remainder of the remaining bytes */
1951 Py_MEMCPY(result_s, start, end-start);
1952
1953 return result;
1954 }
1955
1956
1957 Py_LOCAL(PyByteArrayObject *)
replace(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1958 replace(PyByteArrayObject *self,
1959 const char *from_s, Py_ssize_t from_len,
1960 const char *to_s, Py_ssize_t to_len,
1961 Py_ssize_t maxcount)
1962 {
1963 if (maxcount < 0) {
1964 maxcount = PY_SSIZE_T_MAX;
1965 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1966 /* nothing to do; return the original bytes */
1967 return return_self(self);
1968 }
1969
1970 if (maxcount == 0 ||
1971 (from_len == 0 && to_len == 0)) {
1972 /* nothing to do; return the original bytes */
1973 return return_self(self);
1974 }
1975
1976 /* Handle zero-length special cases */
1977
1978 if (from_len == 0) {
1979 /* insert the 'to' bytes everywhere. */
1980 /* >>> "Python".replace("", ".") */
1981 /* '.P.y.t.h.o.n.' */
1982 return replace_interleave(self, to_s, to_len, maxcount);
1983 }
1984
1985 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1986 /* point for an empty self bytes to generate a non-empty bytes */
1987 /* Special case so the remaining code always gets a non-empty bytes */
1988 if (PyByteArray_GET_SIZE(self) == 0) {
1989 return return_self(self);
1990 }
1991
1992 if (to_len == 0) {
1993 /* delete all occurrences of 'from' bytes */
1994 if (from_len == 1) {
1995 return replace_delete_single_character(
1996 self, from_s[0], maxcount);
1997 } else {
1998 return replace_delete_substring(self, from_s, from_len, maxcount);
1999 }
2000 }
2001
2002 /* Handle special case where both bytes have the same length */
2003
2004 if (from_len == to_len) {
2005 if (from_len == 1) {
2006 return replace_single_character_in_place(
2007 self,
2008 from_s[0],
2009 to_s[0],
2010 maxcount);
2011 } else {
2012 return replace_substring_in_place(
2013 self, from_s, from_len, to_s, to_len, maxcount);
2014 }
2015 }
2016
2017 /* Otherwise use the more generic algorithms */
2018 if (from_len == 1) {
2019 return replace_single_character(self, from_s[0],
2020 to_s, to_len, maxcount);
2021 } else {
2022 /* len('from')>=2, len('to')>=1 */
2023 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2024 }
2025 }
2026
2027
2028 PyDoc_STRVAR(replace__doc__,
2029 "B.replace(old, new[, count]) -> bytes\n\
2030 \n\
2031 Return a copy of B with all occurrences of subsection\n\
2032 old replaced by new. If the optional argument count is\n\
2033 given, only the first count occurrences are replaced.");
2034
2035 static PyObject *
bytearray_replace(PyByteArrayObject * self,PyObject * args)2036 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2037 {
2038 Py_ssize_t count = -1;
2039 PyObject *from, *to, *res;
2040 Py_buffer vfrom, vto;
2041
2042 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2043 return NULL;
2044
2045 if (_getbuffer(from, &vfrom) < 0)
2046 return NULL;
2047 if (_getbuffer(to, &vto) < 0) {
2048 PyBuffer_Release(&vfrom);
2049 return NULL;
2050 }
2051
2052 res = (PyObject *)replace((PyByteArrayObject *) self,
2053 vfrom.buf, vfrom.len,
2054 vto.buf, vto.len, count);
2055
2056 PyBuffer_Release(&vfrom);
2057 PyBuffer_Release(&vto);
2058 return res;
2059 }
2060
2061 PyDoc_STRVAR(split__doc__,
2062 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2063 \n\
2064 Return a list of the sections in B, using sep as the delimiter.\n\
2065 If sep is not given, B is split on ASCII whitespace characters\n\
2066 (space, tab, return, newline, formfeed, vertical tab).\n\
2067 If maxsplit is given, at most maxsplit splits are done.");
2068
2069 static PyObject *
bytearray_split(PyByteArrayObject * self,PyObject * args)2070 bytearray_split(PyByteArrayObject *self, PyObject *args)
2071 {
2072 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2073 Py_ssize_t maxsplit = -1;
2074 const char *s = PyByteArray_AS_STRING(self), *sub;
2075 PyObject *list, *subobj = Py_None;
2076 Py_buffer vsub;
2077
2078 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2079 return NULL;
2080 if (maxsplit < 0)
2081 maxsplit = PY_SSIZE_T_MAX;
2082
2083 if (subobj == Py_None)
2084 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
2085
2086 if (_getbuffer(subobj, &vsub) < 0)
2087 return NULL;
2088 sub = vsub.buf;
2089 n = vsub.len;
2090
2091 list = stringlib_split(
2092 (PyObject*) self, s, len, sub, n, maxsplit
2093 );
2094 PyBuffer_Release(&vsub);
2095 return list;
2096 }
2097
2098 PyDoc_STRVAR(partition__doc__,
2099 "B.partition(sep) -> (head, sep, tail)\n\
2100 \n\
2101 Searches for the separator sep in B, and returns the part before it,\n\
2102 the separator itself, and the part after it. If the separator is not\n\
2103 found, returns B and two empty bytearray objects.");
2104
2105 static PyObject *
bytearray_partition(PyByteArrayObject * self,PyObject * sep_obj)2106 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2107 {
2108 PyObject *bytesep, *result;
2109
2110 bytesep = PyByteArray_FromObject(sep_obj);
2111 if (! bytesep)
2112 return NULL;
2113
2114 result = stringlib_partition(
2115 (PyObject*) self,
2116 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2117 bytesep,
2118 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2119 );
2120
2121 Py_DECREF(bytesep);
2122 return result;
2123 }
2124
2125 PyDoc_STRVAR(rpartition__doc__,
2126 "B.rpartition(sep) -> (head, sep, tail)\n\
2127 \n\
2128 Searches for the separator sep in B, starting at the end of B,\n\
2129 and returns the part before it, the separator itself, and the\n\
2130 part after it. If the separator is not found, returns two empty\n\
2131 bytearray objects and B.");
2132
2133 static PyObject *
bytearray_rpartition(PyByteArrayObject * self,PyObject * sep_obj)2134 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2135 {
2136 PyObject *bytesep, *result;
2137
2138 bytesep = PyByteArray_FromObject(sep_obj);
2139 if (! bytesep)
2140 return NULL;
2141
2142 result = stringlib_rpartition(
2143 (PyObject*) self,
2144 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2145 bytesep,
2146 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2147 );
2148
2149 Py_DECREF(bytesep);
2150 return result;
2151 }
2152
2153 PyDoc_STRVAR(rsplit__doc__,
2154 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2155 \n\
2156 Return a list of the sections in B, using sep as the delimiter,\n\
2157 starting at the end of B and working to the front.\n\
2158 If sep is not given, B is split on ASCII whitespace characters\n\
2159 (space, tab, return, newline, formfeed, vertical tab).\n\
2160 If maxsplit is given, at most maxsplit splits are done.");
2161
2162 static PyObject *
bytearray_rsplit(PyByteArrayObject * self,PyObject * args)2163 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2164 {
2165 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2166 Py_ssize_t maxsplit = -1;
2167 const char *s = PyByteArray_AS_STRING(self), *sub;
2168 PyObject *list, *subobj = Py_None;
2169 Py_buffer vsub;
2170
2171 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2172 return NULL;
2173 if (maxsplit < 0)
2174 maxsplit = PY_SSIZE_T_MAX;
2175
2176 if (subobj == Py_None)
2177 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
2178
2179 if (_getbuffer(subobj, &vsub) < 0)
2180 return NULL;
2181 sub = vsub.buf;
2182 n = vsub.len;
2183
2184 list = stringlib_rsplit(
2185 (PyObject*) self, s, len, sub, n, maxsplit
2186 );
2187 PyBuffer_Release(&vsub);
2188 return list;
2189 }
2190
2191 PyDoc_STRVAR(reverse__doc__,
2192 "B.reverse() -> None\n\
2193 \n\
2194 Reverse the order of the values in B in place.");
2195 static PyObject *
bytearray_reverse(PyByteArrayObject * self,PyObject * unused)2196 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2197 {
2198 char swap, *head, *tail;
2199 Py_ssize_t i, j, n = Py_SIZE(self);
2200
2201 j = n / 2;
2202 head = self->ob_bytes;
2203 tail = head + n - 1;
2204 for (i = 0; i < j; i++) {
2205 swap = *head;
2206 *head++ = *tail;
2207 *tail-- = swap;
2208 }
2209
2210 Py_RETURN_NONE;
2211 }
2212
2213 PyDoc_STRVAR(insert__doc__,
2214 "B.insert(index, int) -> None\n\
2215 \n\
2216 Insert a single item into the bytearray before the given index.");
2217 static PyObject *
bytearray_insert(PyByteArrayObject * self,PyObject * args)2218 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2219 {
2220 PyObject *value;
2221 int ival;
2222 Py_ssize_t where, n = Py_SIZE(self);
2223
2224 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2225 return NULL;
2226
2227 if (n == PY_SSIZE_T_MAX) {
2228 PyErr_SetString(PyExc_OverflowError,
2229 "cannot add more objects to bytearray");
2230 return NULL;
2231 }
2232 if (!_getbytevalue(value, &ival))
2233 return NULL;
2234 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2235 return NULL;
2236
2237 if (where < 0) {
2238 where += n;
2239 if (where < 0)
2240 where = 0;
2241 }
2242 if (where > n)
2243 where = n;
2244 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2245 self->ob_bytes[where] = ival;
2246
2247 Py_RETURN_NONE;
2248 }
2249
2250 PyDoc_STRVAR(append__doc__,
2251 "B.append(int) -> None\n\
2252 \n\
2253 Append a single item to the end of B.");
2254 static PyObject *
bytearray_append(PyByteArrayObject * self,PyObject * arg)2255 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2256 {
2257 int value;
2258 Py_ssize_t n = Py_SIZE(self);
2259
2260 if (! _getbytevalue(arg, &value))
2261 return NULL;
2262 if (n == PY_SSIZE_T_MAX) {
2263 PyErr_SetString(PyExc_OverflowError,
2264 "cannot add more objects to bytearray");
2265 return NULL;
2266 }
2267 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2268 return NULL;
2269
2270 self->ob_bytes[n] = value;
2271
2272 Py_RETURN_NONE;
2273 }
2274
2275 PyDoc_STRVAR(extend__doc__,
2276 "B.extend(iterable int) -> None\n\
2277 \n\
2278 Append all the elements from the iterator or sequence to the\n\
2279 end of B.");
2280 static PyObject *
bytearray_extend(PyByteArrayObject * self,PyObject * arg)2281 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2282 {
2283 PyObject *it, *item, *bytearray_obj;
2284 Py_ssize_t buf_size = 0, len = 0;
2285 int value;
2286 char *buf;
2287
2288 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2289 if (PyObject_CheckBuffer(arg)) {
2290 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2291 return NULL;
2292
2293 Py_RETURN_NONE;
2294 }
2295
2296 it = PyObject_GetIter(arg);
2297 if (it == NULL)
2298 return NULL;
2299
2300 /* Try to determine the length of the argument. 32 is arbitrary. */
2301 buf_size = _PyObject_LengthHint(arg, 32);
2302 if (buf_size == -1) {
2303 Py_DECREF(it);
2304 return NULL;
2305 }
2306
2307 bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2308 if (bytearray_obj == NULL) {
2309 Py_DECREF(it);
2310 return NULL;
2311 }
2312 buf = PyByteArray_AS_STRING(bytearray_obj);
2313
2314 while ((item = PyIter_Next(it)) != NULL) {
2315 if (! _getbytevalue(item, &value)) {
2316 Py_DECREF(item);
2317 Py_DECREF(it);
2318 Py_DECREF(bytearray_obj);
2319 return NULL;
2320 }
2321 buf[len++] = value;
2322 Py_DECREF(item);
2323
2324 if (len >= buf_size) {
2325 Py_ssize_t addition;
2326 if (len == PY_SSIZE_T_MAX) {
2327 Py_DECREF(it);
2328 Py_DECREF(bytearray_obj);
2329 return PyErr_NoMemory();
2330 }
2331 addition = len >> 1;
2332 if (addition > PY_SSIZE_T_MAX - len - 1)
2333 buf_size = PY_SSIZE_T_MAX;
2334 else
2335 buf_size = len + addition + 1;
2336 if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2337 Py_DECREF(it);
2338 Py_DECREF(bytearray_obj);
2339 return NULL;
2340 }
2341 /* Recompute the `buf' pointer, since the resizing operation may
2342 have invalidated it. */
2343 buf = PyByteArray_AS_STRING(bytearray_obj);
2344 }
2345 }
2346 Py_DECREF(it);
2347
2348 /* Resize down to exact size. */
2349 if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2350 Py_DECREF(bytearray_obj);
2351 return NULL;
2352 }
2353
2354 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1) {
2355 Py_DECREF(bytearray_obj);
2356 return NULL;
2357 }
2358 Py_DECREF(bytearray_obj);
2359
2360 Py_RETURN_NONE;
2361 }
2362
2363 PyDoc_STRVAR(pop__doc__,
2364 "B.pop([index]) -> int\n\
2365 \n\
2366 Remove and return a single item from B. If no index\n\
2367 argument is given, will pop the last value.");
2368 static PyObject *
bytearray_pop(PyByteArrayObject * self,PyObject * args)2369 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2370 {
2371 int value;
2372 Py_ssize_t where = -1, n = Py_SIZE(self);
2373
2374 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2375 return NULL;
2376
2377 if (n == 0) {
2378 PyErr_SetString(PyExc_IndexError,
2379 "pop from empty bytearray");
2380 return NULL;
2381 }
2382 if (where < 0)
2383 where += Py_SIZE(self);
2384 if (where < 0 || where >= Py_SIZE(self)) {
2385 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2386 return NULL;
2387 }
2388 if (!_canresize(self))
2389 return NULL;
2390
2391 value = self->ob_bytes[where];
2392 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2393 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2394 return NULL;
2395
2396 return PyInt_FromLong((unsigned char)value);
2397 }
2398
2399 PyDoc_STRVAR(remove__doc__,
2400 "B.remove(int) -> None\n\
2401 \n\
2402 Remove the first occurrence of a value in B.");
2403 static PyObject *
bytearray_remove(PyByteArrayObject * self,PyObject * arg)2404 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2405 {
2406 int value;
2407 Py_ssize_t n = Py_SIZE(self);
2408 char *where;
2409
2410 if (! _getbytevalue(arg, &value))
2411 return NULL;
2412
2413 where = memchr(self->ob_bytes, value, n);
2414 if (!where) {
2415 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2416 return NULL;
2417 }
2418 if (!_canresize(self))
2419 return NULL;
2420
2421 memmove(where, where + 1, self->ob_bytes + n - where);
2422 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2423 return NULL;
2424
2425 Py_RETURN_NONE;
2426 }
2427
2428 /* XXX These two helpers could be optimized if argsize == 1 */
2429
2430 static Py_ssize_t
lstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2431 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2432 void *argptr, Py_ssize_t argsize)
2433 {
2434 Py_ssize_t i = 0;
2435 while (i < mysize && memchr(argptr, myptr[i], argsize))
2436 i++;
2437 return i;
2438 }
2439
2440 static Py_ssize_t
rstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2441 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2442 void *argptr, Py_ssize_t argsize)
2443 {
2444 Py_ssize_t i = mysize - 1;
2445 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2446 i--;
2447 return i + 1;
2448 }
2449
2450 PyDoc_STRVAR(strip__doc__,
2451 "B.strip([bytes]) -> bytearray\n\
2452 \n\
2453 Strip leading and trailing bytes contained in the argument.\n\
2454 If the argument is omitted, strip ASCII whitespace.");
2455 static PyObject *
bytearray_strip(PyByteArrayObject * self,PyObject * args)2456 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2457 {
2458 Py_ssize_t left, right, mysize, argsize;
2459 void *myptr, *argptr;
2460 PyObject *arg = Py_None;
2461 Py_buffer varg;
2462 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2463 return NULL;
2464 if (arg == Py_None) {
2465 argptr = "\t\n\r\f\v ";
2466 argsize = 6;
2467 }
2468 else {
2469 if (_getbuffer(arg, &varg) < 0)
2470 return NULL;
2471 argptr = varg.buf;
2472 argsize = varg.len;
2473 }
2474 myptr = self->ob_bytes;
2475 mysize = Py_SIZE(self);
2476 left = lstrip_helper(myptr, mysize, argptr, argsize);
2477 if (left == mysize)
2478 right = left;
2479 else
2480 right = rstrip_helper(myptr, mysize, argptr, argsize);
2481 if (arg != Py_None)
2482 PyBuffer_Release(&varg);
2483 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2484 }
2485
2486 PyDoc_STRVAR(lstrip__doc__,
2487 "B.lstrip([bytes]) -> bytearray\n\
2488 \n\
2489 Strip leading bytes contained in the argument.\n\
2490 If the argument is omitted, strip leading ASCII whitespace.");
2491 static PyObject *
bytearray_lstrip(PyByteArrayObject * self,PyObject * args)2492 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2493 {
2494 Py_ssize_t left, right, mysize, argsize;
2495 void *myptr, *argptr;
2496 PyObject *arg = Py_None;
2497 Py_buffer varg;
2498 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2499 return NULL;
2500 if (arg == Py_None) {
2501 argptr = "\t\n\r\f\v ";
2502 argsize = 6;
2503 }
2504 else {
2505 if (_getbuffer(arg, &varg) < 0)
2506 return NULL;
2507 argptr = varg.buf;
2508 argsize = varg.len;
2509 }
2510 myptr = self->ob_bytes;
2511 mysize = Py_SIZE(self);
2512 left = lstrip_helper(myptr, mysize, argptr, argsize);
2513 right = mysize;
2514 if (arg != Py_None)
2515 PyBuffer_Release(&varg);
2516 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2517 }
2518
2519 PyDoc_STRVAR(rstrip__doc__,
2520 "B.rstrip([bytes]) -> bytearray\n\
2521 \n\
2522 Strip trailing bytes contained in the argument.\n\
2523 If the argument is omitted, strip trailing ASCII whitespace.");
2524 static PyObject *
bytearray_rstrip(PyByteArrayObject * self,PyObject * args)2525 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2526 {
2527 Py_ssize_t left, right, mysize, argsize;
2528 void *myptr, *argptr;
2529 PyObject *arg = Py_None;
2530 Py_buffer varg;
2531 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2532 return NULL;
2533 if (arg == Py_None) {
2534 argptr = "\t\n\r\f\v ";
2535 argsize = 6;
2536 }
2537 else {
2538 if (_getbuffer(arg, &varg) < 0)
2539 return NULL;
2540 argptr = varg.buf;
2541 argsize = varg.len;
2542 }
2543 myptr = self->ob_bytes;
2544 mysize = Py_SIZE(self);
2545 left = 0;
2546 right = rstrip_helper(myptr, mysize, argptr, argsize);
2547 if (arg != Py_None)
2548 PyBuffer_Release(&varg);
2549 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2550 }
2551
2552 PyDoc_STRVAR(decode_doc,
2553 "B.decode([encoding[, errors]]) -> unicode object.\n\
2554 \n\
2555 Decodes B using the codec registered for encoding. encoding defaults\n\
2556 to the default encoding. errors may be given to set a different error\n\
2557 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2558 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2559 as well as any other name registered with codecs.register_error that is\n\
2560 able to handle UnicodeDecodeErrors.");
2561
2562 static PyObject *
bytearray_decode(PyObject * self,PyObject * args,PyObject * kwargs)2563 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2564 {
2565 const char *encoding = NULL;
2566 const char *errors = NULL;
2567 static char *kwlist[] = {"encoding", "errors", 0};
2568
2569 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2570 return NULL;
2571 if (encoding == NULL) {
2572 #ifdef Py_USING_UNICODE
2573 encoding = PyUnicode_GetDefaultEncoding();
2574 #else
2575 PyErr_SetString(PyExc_ValueError, "no encoding specified");
2576 return NULL;
2577 #endif
2578 }
2579 return _PyCodec_DecodeText(self, encoding, errors);
2580 }
2581
2582 PyDoc_STRVAR(alloc_doc,
2583 "B.__alloc__() -> int\n\
2584 \n\
2585 Returns the number of bytes actually allocated.");
2586
2587 static PyObject *
bytearray_alloc(PyByteArrayObject * self)2588 bytearray_alloc(PyByteArrayObject *self)
2589 {
2590 return PyInt_FromSsize_t(self->ob_alloc);
2591 }
2592
2593 PyDoc_STRVAR(join_doc,
2594 "B.join(iterable_of_bytes) -> bytes\n\
2595 \n\
2596 Concatenates any number of bytearray objects, with B in between each pair.");
2597
2598 static PyObject *
bytearray_join(PyByteArrayObject * self,PyObject * it)2599 bytearray_join(PyByteArrayObject *self, PyObject *it)
2600 {
2601 PyObject *seq;
2602 Py_ssize_t mysize = Py_SIZE(self);
2603 Py_ssize_t i;
2604 Py_ssize_t n;
2605 PyObject **items;
2606 Py_ssize_t totalsize = 0;
2607 PyObject *result;
2608 char *dest;
2609
2610 seq = PySequence_Fast(it, "can only join an iterable");
2611 if (seq == NULL)
2612 return NULL;
2613 n = PySequence_Fast_GET_SIZE(seq);
2614 items = PySequence_Fast_ITEMS(seq);
2615
2616 /* Compute the total size, and check that they are all bytes */
2617 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2618 for (i = 0; i < n; i++) {
2619 PyObject *obj = items[i];
2620 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2621 PyErr_Format(PyExc_TypeError,
2622 "can only join an iterable of bytes "
2623 "(item %ld has type '%.100s')",
2624 /* XXX %ld isn't right on Win64 */
2625 (long)i, Py_TYPE(obj)->tp_name);
2626 goto error;
2627 }
2628 if (i > 0)
2629 totalsize += mysize;
2630 totalsize += Py_SIZE(obj);
2631 if (totalsize < 0) {
2632 PyErr_NoMemory();
2633 goto error;
2634 }
2635 }
2636
2637 /* Allocate the result, and copy the bytes */
2638 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2639 if (result == NULL)
2640 goto error;
2641 dest = PyByteArray_AS_STRING(result);
2642 for (i = 0; i < n; i++) {
2643 PyObject *obj = items[i];
2644 Py_ssize_t size = Py_SIZE(obj);
2645 char *buf;
2646 if (PyByteArray_Check(obj))
2647 buf = PyByteArray_AS_STRING(obj);
2648 else
2649 buf = PyBytes_AS_STRING(obj);
2650 if (i) {
2651 memcpy(dest, self->ob_bytes, mysize);
2652 dest += mysize;
2653 }
2654 memcpy(dest, buf, size);
2655 dest += size;
2656 }
2657
2658 /* Done */
2659 Py_DECREF(seq);
2660 return result;
2661
2662 /* Error handling */
2663 error:
2664 Py_DECREF(seq);
2665 return NULL;
2666 }
2667
2668 PyDoc_STRVAR(splitlines__doc__,
2669 "B.splitlines(keepends=False) -> list of lines\n\
2670 \n\
2671 Return a list of the lines in B, breaking at line boundaries.\n\
2672 Line breaks are not included in the resulting list unless keepends\n\
2673 is given and true.");
2674
2675 static PyObject*
bytearray_splitlines(PyObject * self,PyObject * args)2676 bytearray_splitlines(PyObject *self, PyObject *args)
2677 {
2678 int keepends = 0;
2679
2680 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2681 return NULL;
2682
2683 return stringlib_splitlines(
2684 (PyObject*) self, PyByteArray_AS_STRING(self),
2685 PyByteArray_GET_SIZE(self), keepends
2686 );
2687 }
2688
2689 PyDoc_STRVAR(fromhex_doc,
2690 "bytearray.fromhex(string) -> bytearray\n\
2691 \n\
2692 Create a bytearray object from a string of hexadecimal numbers.\n\
2693 Spaces between two numbers are accepted.\n\
2694 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2695
2696 static int
hex_digit_to_int(char c)2697 hex_digit_to_int(char c)
2698 {
2699 if (Py_ISDIGIT(c))
2700 return c - '0';
2701 else {
2702 if (Py_ISUPPER(c))
2703 c = Py_TOLOWER(c);
2704 if (c >= 'a' && c <= 'f')
2705 return c - 'a' + 10;
2706 }
2707 return -1;
2708 }
2709
2710 static PyObject *
bytearray_fromhex(PyObject * cls,PyObject * args)2711 bytearray_fromhex(PyObject *cls, PyObject *args)
2712 {
2713 PyObject *newbytes;
2714 char *buf;
2715 char *hex;
2716 Py_ssize_t hexlen, byteslen, i, j;
2717 int top, bot;
2718
2719 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
2720 return NULL;
2721 byteslen = hexlen/2; /* This overestimates if there are spaces */
2722 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
2723 if (!newbytes)
2724 return NULL;
2725 buf = PyByteArray_AS_STRING(newbytes);
2726 for (i = j = 0; i < hexlen; i += 2) {
2727 /* skip over spaces in the input */
2728 while (hex[i] == ' ')
2729 i++;
2730 if (i >= hexlen)
2731 break;
2732 top = hex_digit_to_int(hex[i]);
2733 bot = hex_digit_to_int(hex[i+1]);
2734 if (top == -1 || bot == -1) {
2735 PyErr_Format(PyExc_ValueError,
2736 "non-hexadecimal number found in "
2737 "fromhex() arg at position %zd", i);
2738 goto error;
2739 }
2740 buf[j++] = (top << 4) + bot;
2741 }
2742 if (PyByteArray_Resize(newbytes, j) < 0)
2743 goto error;
2744 return newbytes;
2745
2746 error:
2747 Py_DECREF(newbytes);
2748 return NULL;
2749 }
2750
2751 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2752
2753 static PyObject *
bytearray_reduce(PyByteArrayObject * self)2754 bytearray_reduce(PyByteArrayObject *self)
2755 {
2756 PyObject *latin1, *dict;
2757 if (self->ob_bytes)
2758 #ifdef Py_USING_UNICODE
2759 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2760 Py_SIZE(self), NULL);
2761 #else
2762 latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self));
2763 #endif
2764 else
2765 #ifdef Py_USING_UNICODE
2766 latin1 = PyUnicode_FromString("");
2767 #else
2768 latin1 = PyString_FromString("");
2769 #endif
2770
2771 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
2772 if (dict == NULL) {
2773 PyErr_Clear();
2774 dict = Py_None;
2775 Py_INCREF(dict);
2776 }
2777
2778 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
2779 }
2780
2781 PyDoc_STRVAR(sizeof_doc,
2782 "B.__sizeof__() -> int\n\
2783 \n\
2784 Returns the size of B in memory, in bytes");
2785 static PyObject *
bytearray_sizeof(PyByteArrayObject * self)2786 bytearray_sizeof(PyByteArrayObject *self)
2787 {
2788 Py_ssize_t res;
2789
2790 res = _PyObject_SIZE(Py_TYPE(self)) + self->ob_alloc * sizeof(char);
2791 return PyInt_FromSsize_t(res);
2792 }
2793
2794 static PySequenceMethods bytearray_as_sequence = {
2795 (lenfunc)bytearray_length, /* sq_length */
2796 (binaryfunc)PyByteArray_Concat, /* sq_concat */
2797 (ssizeargfunc)bytearray_repeat, /* sq_repeat */
2798 (ssizeargfunc)bytearray_getitem, /* sq_item */
2799 0, /* sq_slice */
2800 (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */
2801 0, /* sq_ass_slice */
2802 (objobjproc)bytearray_contains, /* sq_contains */
2803 (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */
2804 (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */
2805 };
2806
2807 static PyMappingMethods bytearray_as_mapping = {
2808 (lenfunc)bytearray_length,
2809 (binaryfunc)bytearray_subscript,
2810 (objobjargproc)bytearray_ass_subscript,
2811 };
2812
2813 static PyBufferProcs bytearray_as_buffer = {
2814 (readbufferproc)bytearray_buffer_getreadbuf,
2815 (writebufferproc)bytearray_buffer_getwritebuf,
2816 (segcountproc)bytearray_buffer_getsegcount,
2817 (charbufferproc)bytearray_buffer_getcharbuf,
2818 (getbufferproc)bytearray_getbuffer,
2819 (releasebufferproc)bytearray_releasebuffer,
2820 };
2821
2822 static PyMethodDef
2823 bytearray_methods[] = {
2824 {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
2825 {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
2826 {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
2827 {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
2828 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2829 _Py_capitalize__doc__},
2830 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2831 {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
2832 {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
2833 {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
2834 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2835 expandtabs__doc__},
2836 {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
2837 {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
2838 {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
2839 fromhex_doc},
2840 {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
2841 {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
2842 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2843 _Py_isalnum__doc__},
2844 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2845 _Py_isalpha__doc__},
2846 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2847 _Py_isdigit__doc__},
2848 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2849 _Py_islower__doc__},
2850 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2851 _Py_isspace__doc__},
2852 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2853 _Py_istitle__doc__},
2854 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2855 _Py_isupper__doc__},
2856 {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
2857 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2858 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2859 {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
2860 {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
2861 {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
2862 {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
2863 {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
2864 {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
2865 {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
2866 {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
2867 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2868 {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
2869 {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
2870 {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
2871 {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
2872 {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
2873 splitlines__doc__},
2874 {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
2875 startswith__doc__},
2876 {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
2877 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2878 _Py_swapcase__doc__},
2879 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2880 {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
2881 translate__doc__},
2882 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2883 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2884 {NULL}
2885 };
2886
2887 PyDoc_STRVAR(bytearray_doc,
2888 "bytearray(iterable_of_ints) -> bytearray.\n\
2889 bytearray(string, encoding[, errors]) -> bytearray.\n\
2890 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
2891 bytearray(memory_view) -> bytearray.\n\
2892 \n\
2893 Construct a mutable bytearray object from:\n\
2894 - an iterable yielding integers in range(256)\n\
2895 - a text string encoded using the specified encoding\n\
2896 - a bytes or a bytearray object\n\
2897 - any object implementing the buffer API.\n\
2898 \n\
2899 bytearray(int) -> bytearray.\n\
2900 \n\
2901 Construct a zero-initialized bytearray of the given length.");
2902
2903
2904 static PyObject *bytearray_iter(PyObject *seq);
2905
2906 PyTypeObject PyByteArray_Type = {
2907 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2908 "bytearray",
2909 sizeof(PyByteArrayObject),
2910 0,
2911 (destructor)bytearray_dealloc, /* tp_dealloc */
2912 0, /* tp_print */
2913 0, /* tp_getattr */
2914 0, /* tp_setattr */
2915 0, /* tp_compare */
2916 (reprfunc)bytearray_repr, /* tp_repr */
2917 0, /* tp_as_number */
2918 &bytearray_as_sequence, /* tp_as_sequence */
2919 &bytearray_as_mapping, /* tp_as_mapping */
2920 0, /* tp_hash */
2921 0, /* tp_call */
2922 bytearray_str, /* tp_str */
2923 PyObject_GenericGetAttr, /* tp_getattro */
2924 0, /* tp_setattro */
2925 &bytearray_as_buffer, /* tp_as_buffer */
2926 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2927 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
2928 bytearray_doc, /* tp_doc */
2929 0, /* tp_traverse */
2930 0, /* tp_clear */
2931 (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
2932 0, /* tp_weaklistoffset */
2933 bytearray_iter, /* tp_iter */
2934 0, /* tp_iternext */
2935 bytearray_methods, /* tp_methods */
2936 0, /* tp_members */
2937 0, /* tp_getset */
2938 0, /* tp_base */
2939 0, /* tp_dict */
2940 0, /* tp_descr_get */
2941 0, /* tp_descr_set */
2942 0, /* tp_dictoffset */
2943 (initproc)bytearray_init, /* tp_init */
2944 PyType_GenericAlloc, /* tp_alloc */
2945 PyType_GenericNew, /* tp_new */
2946 PyObject_Del, /* tp_free */
2947 };
2948
2949 /*********************** Bytes Iterator ****************************/
2950
2951 typedef struct {
2952 PyObject_HEAD
2953 Py_ssize_t it_index;
2954 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
2955 } bytesiterobject;
2956
2957 static void
bytearrayiter_dealloc(bytesiterobject * it)2958 bytearrayiter_dealloc(bytesiterobject *it)
2959 {
2960 _PyObject_GC_UNTRACK(it);
2961 Py_XDECREF(it->it_seq);
2962 PyObject_GC_Del(it);
2963 }
2964
2965 static int
bytearrayiter_traverse(bytesiterobject * it,visitproc visit,void * arg)2966 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
2967 {
2968 Py_VISIT(it->it_seq);
2969 return 0;
2970 }
2971
2972 static PyObject *
bytearrayiter_next(bytesiterobject * it)2973 bytearrayiter_next(bytesiterobject *it)
2974 {
2975 PyByteArrayObject *seq;
2976 PyObject *item;
2977
2978 assert(it != NULL);
2979 seq = it->it_seq;
2980 if (seq == NULL)
2981 return NULL;
2982 assert(PyByteArray_Check(seq));
2983
2984 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
2985 item = PyInt_FromLong(
2986 (unsigned char)seq->ob_bytes[it->it_index]);
2987 if (item != NULL)
2988 ++it->it_index;
2989 return item;
2990 }
2991
2992 it->it_seq = NULL;
2993 Py_DECREF(seq);
2994 return NULL;
2995 }
2996
2997 static PyObject *
bytesarrayiter_length_hint(bytesiterobject * it)2998 bytesarrayiter_length_hint(bytesiterobject *it)
2999 {
3000 Py_ssize_t len = 0;
3001 if (it->it_seq)
3002 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3003 return PyInt_FromSsize_t(len);
3004 }
3005
3006 PyDoc_STRVAR(length_hint_doc,
3007 "Private method returning an estimate of len(list(it)).");
3008
3009 static PyMethodDef bytearrayiter_methods[] = {
3010 {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3011 length_hint_doc},
3012 {NULL, NULL} /* sentinel */
3013 };
3014
3015 PyTypeObject PyByteArrayIter_Type = {
3016 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3017 "bytearray_iterator", /* tp_name */
3018 sizeof(bytesiterobject), /* tp_basicsize */
3019 0, /* tp_itemsize */
3020 /* methods */
3021 (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3022 0, /* tp_print */
3023 0, /* tp_getattr */
3024 0, /* tp_setattr */
3025 0, /* tp_compare */
3026 0, /* tp_repr */
3027 0, /* tp_as_number */
3028 0, /* tp_as_sequence */
3029 0, /* tp_as_mapping */
3030 0, /* tp_hash */
3031 0, /* tp_call */
3032 0, /* tp_str */
3033 PyObject_GenericGetAttr, /* tp_getattro */
3034 0, /* tp_setattro */
3035 0, /* tp_as_buffer */
3036 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3037 0, /* tp_doc */
3038 (traverseproc)bytearrayiter_traverse, /* tp_traverse */
3039 0, /* tp_clear */
3040 0, /* tp_richcompare */
3041 0, /* tp_weaklistoffset */
3042 PyObject_SelfIter, /* tp_iter */
3043 (iternextfunc)bytearrayiter_next, /* tp_iternext */
3044 bytearrayiter_methods, /* tp_methods */
3045 0,
3046 };
3047
3048 static PyObject *
bytearray_iter(PyObject * seq)3049 bytearray_iter(PyObject *seq)
3050 {
3051 bytesiterobject *it;
3052
3053 if (!PyByteArray_Check(seq)) {
3054 PyErr_BadInternalCall();
3055 return NULL;
3056 }
3057 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3058 if (it == NULL)
3059 return NULL;
3060 it->it_index = 0;
3061 Py_INCREF(seq);
3062 it->it_seq = (PyByteArrayObject *)seq;
3063 _PyObject_GC_TRACK(it);
3064 return (PyObject *)it;
3065 }
3066