1 /* stringlib: bytes joining implementation */
2
3 #if STRINGLIB_IS_UNICODE
4 #error join.h only compatible with byte-wise strings
5 #endif
6
7 Py_LOCAL_INLINE(PyObject *)
STRINGLIB(bytes_join)8 STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9 {
10 const char *sepstr = STRINGLIB_STR(sep);
11 Py_ssize_t seplen = STRINGLIB_LEN(sep);
12 PyObject *res = NULL;
13 char *p;
14 Py_ssize_t seqlen = 0;
15 Py_ssize_t sz = 0;
16 Py_ssize_t i, nbufs;
17 PyObject *seq, *item;
18 Py_buffer *buffers = NULL;
19 #define NB_STATIC_BUFFERS 10
20 Py_buffer static_buffers[NB_STATIC_BUFFERS];
21 #define GIL_THRESHOLD 1048576
22 int drop_gil = 1;
23 PyThreadState *save = NULL;
24
25 seq = PySequence_Fast(iterable, "can only join an iterable");
26 if (seq == NULL) {
27 return NULL;
28 }
29
30 seqlen = PySequence_Fast_GET_SIZE(seq);
31 if (seqlen == 0) {
32 Py_DECREF(seq);
33 return STRINGLIB_NEW(NULL, 0);
34 }
35 #ifndef STRINGLIB_MUTABLE
36 if (seqlen == 1) {
37 item = PySequence_Fast_GET_ITEM(seq, 0);
38 if (STRINGLIB_CHECK_EXACT(item)) {
39 Py_INCREF(item);
40 Py_DECREF(seq);
41 return item;
42 }
43 }
44 #endif
45 if (seqlen > NB_STATIC_BUFFERS) {
46 buffers = PyMem_NEW(Py_buffer, seqlen);
47 if (buffers == NULL) {
48 Py_DECREF(seq);
49 PyErr_NoMemory();
50 return NULL;
51 }
52 }
53 else {
54 buffers = static_buffers;
55 }
56
57 /* Here is the general case. Do a pre-pass to figure out the total
58 * amount of space we'll need (sz), and see whether all arguments are
59 * bytes-like.
60 */
61 for (i = 0, nbufs = 0; i < seqlen; i++) {
62 Py_ssize_t itemlen;
63 item = PySequence_Fast_GET_ITEM(seq, i);
64 if (PyBytes_CheckExact(item)) {
65 /* Fast path. */
66 Py_INCREF(item);
67 buffers[i].obj = item;
68 buffers[i].buf = PyBytes_AS_STRING(item);
69 buffers[i].len = PyBytes_GET_SIZE(item);
70 }
71 else {
72 if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
73 PyErr_Format(PyExc_TypeError,
74 "sequence item %zd: expected a bytes-like object, "
75 "%.80s found",
76 i, Py_TYPE(item)->tp_name);
77 goto error;
78 }
79 /* If the backing objects are mutable, then dropping the GIL
80 * opens up race conditions where another thread tries to modify
81 * the object which we hold a buffer on it. Such code has data
82 * races anyway, but this is a conservative approach that avoids
83 * changing the behaviour of that data race.
84 */
85 drop_gil = 0;
86 }
87 nbufs = i + 1; /* for error cleanup */
88 itemlen = buffers[i].len;
89 if (itemlen > PY_SSIZE_T_MAX - sz) {
90 PyErr_SetString(PyExc_OverflowError,
91 "join() result is too long");
92 goto error;
93 }
94 sz += itemlen;
95 if (i != 0) {
96 if (seplen > PY_SSIZE_T_MAX - sz) {
97 PyErr_SetString(PyExc_OverflowError,
98 "join() result is too long");
99 goto error;
100 }
101 sz += seplen;
102 }
103 if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
104 PyErr_SetString(PyExc_RuntimeError,
105 "sequence changed size during iteration");
106 goto error;
107 }
108 }
109
110 /* Allocate result space. */
111 res = STRINGLIB_NEW(NULL, sz);
112 if (res == NULL)
113 goto error;
114
115 /* Catenate everything. */
116 p = STRINGLIB_STR(res);
117 if (sz < GIL_THRESHOLD) {
118 drop_gil = 0; /* Benefits are likely outweighed by the overheads */
119 }
120 if (drop_gil) {
121 save = PyEval_SaveThread();
122 }
123 if (!seplen) {
124 /* fast path */
125 for (i = 0; i < nbufs; i++) {
126 Py_ssize_t n = buffers[i].len;
127 char *q = buffers[i].buf;
128 memcpy(p, q, n);
129 p += n;
130 }
131 }
132 else {
133 for (i = 0; i < nbufs; i++) {
134 Py_ssize_t n;
135 char *q;
136 if (i) {
137 memcpy(p, sepstr, seplen);
138 p += seplen;
139 }
140 n = buffers[i].len;
141 q = buffers[i].buf;
142 memcpy(p, q, n);
143 p += n;
144 }
145 }
146 if (drop_gil) {
147 PyEval_RestoreThread(save);
148 }
149 goto done;
150
151 error:
152 res = NULL;
153 done:
154 Py_DECREF(seq);
155 for (i = 0; i < nbufs; i++)
156 PyBuffer_Release(&buffers[i]);
157 if (buffers != static_buffers)
158 PyMem_Free(buffers);
159 return res;
160 }
161
162 #undef NB_STATIC_BUFFERS
163 #undef GIL_THRESHOLD
164