• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* stringlib: bytes joining implementation */
2 
3 #if STRINGLIB_IS_UNICODE
4 #error join.h only compatible with byte-wise strings
5 #endif
6 
7 Py_LOCAL_INLINE(PyObject *)
STRINGLIB(bytes_join)8 STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9 {
10     const char *sepstr = STRINGLIB_STR(sep);
11     Py_ssize_t seplen = STRINGLIB_LEN(sep);
12     PyObject *res = NULL;
13     char *p;
14     Py_ssize_t seqlen = 0;
15     Py_ssize_t sz = 0;
16     Py_ssize_t i, nbufs;
17     PyObject *seq, *item;
18     Py_buffer *buffers = NULL;
19 #define NB_STATIC_BUFFERS 10
20     Py_buffer static_buffers[NB_STATIC_BUFFERS];
21 #define GIL_THRESHOLD 1048576
22     int drop_gil = 1;
23     PyThreadState *save = NULL;
24 
25     seq = PySequence_Fast(iterable, "can only join an iterable");
26     if (seq == NULL) {
27         return NULL;
28     }
29 
30     seqlen = PySequence_Fast_GET_SIZE(seq);
31     if (seqlen == 0) {
32         Py_DECREF(seq);
33         return STRINGLIB_NEW(NULL, 0);
34     }
35 #ifndef STRINGLIB_MUTABLE
36     if (seqlen == 1) {
37         item = PySequence_Fast_GET_ITEM(seq, 0);
38         if (STRINGLIB_CHECK_EXACT(item)) {
39             Py_INCREF(item);
40             Py_DECREF(seq);
41             return item;
42         }
43     }
44 #endif
45     if (seqlen > NB_STATIC_BUFFERS) {
46         buffers = PyMem_NEW(Py_buffer, seqlen);
47         if (buffers == NULL) {
48             Py_DECREF(seq);
49             PyErr_NoMemory();
50             return NULL;
51         }
52     }
53     else {
54         buffers = static_buffers;
55     }
56 
57     /* Here is the general case.  Do a pre-pass to figure out the total
58      * amount of space we'll need (sz), and see whether all arguments are
59      * bytes-like.
60      */
61     for (i = 0, nbufs = 0; i < seqlen; i++) {
62         Py_ssize_t itemlen;
63         item = PySequence_Fast_GET_ITEM(seq, i);
64         if (PyBytes_CheckExact(item)) {
65             /* Fast path. */
66             Py_INCREF(item);
67             buffers[i].obj = item;
68             buffers[i].buf = PyBytes_AS_STRING(item);
69             buffers[i].len = PyBytes_GET_SIZE(item);
70         }
71         else {
72             if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
73                 PyErr_Format(PyExc_TypeError,
74                              "sequence item %zd: expected a bytes-like object, "
75                              "%.80s found",
76                              i, Py_TYPE(item)->tp_name);
77                 goto error;
78             }
79             /* If the backing objects are mutable, then dropping the GIL
80              * opens up race conditions where another thread tries to modify
81              * the object which we hold a buffer on it. Such code has data
82              * races anyway, but this is a conservative approach that avoids
83              * changing the behaviour of that data race.
84              */
85             drop_gil = 0;
86         }
87         nbufs = i + 1;  /* for error cleanup */
88         itemlen = buffers[i].len;
89         if (itemlen > PY_SSIZE_T_MAX - sz) {
90             PyErr_SetString(PyExc_OverflowError,
91                             "join() result is too long");
92             goto error;
93         }
94         sz += itemlen;
95         if (i != 0) {
96             if (seplen > PY_SSIZE_T_MAX - sz) {
97                 PyErr_SetString(PyExc_OverflowError,
98                                 "join() result is too long");
99                 goto error;
100             }
101             sz += seplen;
102         }
103         if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
104             PyErr_SetString(PyExc_RuntimeError,
105                             "sequence changed size during iteration");
106             goto error;
107         }
108     }
109 
110     /* Allocate result space. */
111     res = STRINGLIB_NEW(NULL, sz);
112     if (res == NULL)
113         goto error;
114 
115     /* Catenate everything. */
116     p = STRINGLIB_STR(res);
117     if (sz < GIL_THRESHOLD) {
118         drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
119     }
120     if (drop_gil) {
121         save = PyEval_SaveThread();
122     }
123     if (!seplen) {
124         /* fast path */
125         for (i = 0; i < nbufs; i++) {
126             Py_ssize_t n = buffers[i].len;
127             char *q = buffers[i].buf;
128             memcpy(p, q, n);
129             p += n;
130         }
131     }
132     else {
133         for (i = 0; i < nbufs; i++) {
134             Py_ssize_t n;
135             char *q;
136             if (i) {
137                 memcpy(p, sepstr, seplen);
138                 p += seplen;
139             }
140             n = buffers[i].len;
141             q = buffers[i].buf;
142             memcpy(p, q, n);
143             p += n;
144         }
145     }
146     if (drop_gil) {
147         PyEval_RestoreThread(save);
148     }
149     goto done;
150 
151 error:
152     res = NULL;
153 done:
154     Py_DECREF(seq);
155     for (i = 0; i < nbufs; i++)
156         PyBuffer_Release(&buffers[i]);
157     if (buffers != static_buffers)
158         PyMem_Free(buffers);
159     return res;
160 }
161 
162 #undef NB_STATIC_BUFFERS
163 #undef GIL_THRESHOLD
164