1 /* stringlib: bytes joining implementation */
2
3 #if STRINGLIB_IS_UNICODE
4 #error join.h only compatible with byte-wise strings
5 #endif
6
7 Py_LOCAL_INLINE(PyObject *)
STRINGLIB(bytes_join)8 STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9 {
10 const char *sepstr = STRINGLIB_STR(sep);
11 Py_ssize_t seplen = STRINGLIB_LEN(sep);
12 PyObject *res = NULL;
13 char *p;
14 Py_ssize_t seqlen = 0;
15 Py_ssize_t sz = 0;
16 Py_ssize_t i, nbufs;
17 PyObject *seq, *item;
18 Py_buffer *buffers = NULL;
19 #define NB_STATIC_BUFFERS 10
20 Py_buffer static_buffers[NB_STATIC_BUFFERS];
21 #define GIL_THRESHOLD 1048576
22 int drop_gil = 1;
23 PyThreadState *save = NULL;
24
25 seq = PySequence_Fast(iterable, "can only join an iterable");
26 if (seq == NULL) {
27 return NULL;
28 }
29
30 seqlen = PySequence_Fast_GET_SIZE(seq);
31 if (seqlen == 0) {
32 Py_DECREF(seq);
33 return STRINGLIB_NEW(NULL, 0);
34 }
35 #if !STRINGLIB_MUTABLE
36 if (seqlen == 1) {
37 item = PySequence_Fast_GET_ITEM(seq, 0);
38 if (STRINGLIB_CHECK_EXACT(item)) {
39 Py_INCREF(item);
40 Py_DECREF(seq);
41 return item;
42 }
43 }
44 #endif
45 if (seqlen > NB_STATIC_BUFFERS) {
46 buffers = PyMem_NEW(Py_buffer, seqlen);
47 if (buffers == NULL) {
48 Py_DECREF(seq);
49 PyErr_NoMemory();
50 return NULL;
51 }
52 }
53 else {
54 buffers = static_buffers;
55 }
56
57 /* Here is the general case. Do a pre-pass to figure out the total
58 * amount of space we'll need (sz), and see whether all arguments are
59 * bytes-like.
60 */
61 for (i = 0, nbufs = 0; i < seqlen; i++) {
62 Py_ssize_t itemlen;
63 item = PySequence_Fast_GET_ITEM(seq, i);
64 if (PyBytes_CheckExact(item)) {
65 /* Fast path. */
66 buffers[i].obj = Py_NewRef(item);
67 buffers[i].buf = PyBytes_AS_STRING(item);
68 buffers[i].len = PyBytes_GET_SIZE(item);
69 }
70 else {
71 if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
72 PyErr_Format(PyExc_TypeError,
73 "sequence item %zd: expected a bytes-like object, "
74 "%.80s found",
75 i, Py_TYPE(item)->tp_name);
76 goto error;
77 }
78 /* If the backing objects are mutable, then dropping the GIL
79 * opens up race conditions where another thread tries to modify
80 * the object which we hold a buffer on it. Such code has data
81 * races anyway, but this is a conservative approach that avoids
82 * changing the behaviour of that data race.
83 */
84 drop_gil = 0;
85 }
86 nbufs = i + 1; /* for error cleanup */
87 itemlen = buffers[i].len;
88 if (itemlen > PY_SSIZE_T_MAX - sz) {
89 PyErr_SetString(PyExc_OverflowError,
90 "join() result is too long");
91 goto error;
92 }
93 sz += itemlen;
94 if (i != 0) {
95 if (seplen > PY_SSIZE_T_MAX - sz) {
96 PyErr_SetString(PyExc_OverflowError,
97 "join() result is too long");
98 goto error;
99 }
100 sz += seplen;
101 }
102 if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
103 PyErr_SetString(PyExc_RuntimeError,
104 "sequence changed size during iteration");
105 goto error;
106 }
107 }
108
109 /* Allocate result space. */
110 res = STRINGLIB_NEW(NULL, sz);
111 if (res == NULL)
112 goto error;
113
114 /* Catenate everything. */
115 p = STRINGLIB_STR(res);
116 if (sz < GIL_THRESHOLD) {
117 drop_gil = 0; /* Benefits are likely outweighed by the overheads */
118 }
119 if (drop_gil) {
120 save = PyEval_SaveThread();
121 }
122 if (!seplen) {
123 /* fast path */
124 for (i = 0; i < nbufs; i++) {
125 Py_ssize_t n = buffers[i].len;
126 char *q = buffers[i].buf;
127 memcpy(p, q, n);
128 p += n;
129 }
130 }
131 else {
132 for (i = 0; i < nbufs; i++) {
133 Py_ssize_t n;
134 char *q;
135 if (i) {
136 memcpy(p, sepstr, seplen);
137 p += seplen;
138 }
139 n = buffers[i].len;
140 q = buffers[i].buf;
141 memcpy(p, q, n);
142 p += n;
143 }
144 }
145 if (drop_gil) {
146 PyEval_RestoreThread(save);
147 }
148 goto done;
149
150 error:
151 res = NULL;
152 done:
153 Py_DECREF(seq);
154 for (i = 0; i < nbufs; i++)
155 PyBuffer_Release(&buffers[i]);
156 if (buffers != static_buffers)
157 PyMem_Free(buffers);
158 return res;
159 }
160
161 #undef NB_STATIC_BUFFERS
162 #undef GIL_THRESHOLD
163