• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* stringlib: bytes joining implementation */
2 
3 #if STRINGLIB_IS_UNICODE
4 #error join.h only compatible with byte-wise strings
5 #endif
6 
7 Py_LOCAL_INLINE(PyObject *)
STRINGLIB(bytes_join)8 STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9 {
10     const char *sepstr = STRINGLIB_STR(sep);
11     Py_ssize_t seplen = STRINGLIB_LEN(sep);
12     PyObject *res = NULL;
13     char *p;
14     Py_ssize_t seqlen = 0;
15     Py_ssize_t sz = 0;
16     Py_ssize_t i, nbufs;
17     PyObject *seq, *item;
18     Py_buffer *buffers = NULL;
19 #define NB_STATIC_BUFFERS 10
20     Py_buffer static_buffers[NB_STATIC_BUFFERS];
21 #define GIL_THRESHOLD 1048576
22     int drop_gil = 1;
23     PyThreadState *save = NULL;
24 
25     seq = PySequence_Fast(iterable, "can only join an iterable");
26     if (seq == NULL) {
27         return NULL;
28     }
29 
30     seqlen = PySequence_Fast_GET_SIZE(seq);
31     if (seqlen == 0) {
32         Py_DECREF(seq);
33         return STRINGLIB_NEW(NULL, 0);
34     }
35 #if !STRINGLIB_MUTABLE
36     if (seqlen == 1) {
37         item = PySequence_Fast_GET_ITEM(seq, 0);
38         if (STRINGLIB_CHECK_EXACT(item)) {
39             Py_INCREF(item);
40             Py_DECREF(seq);
41             return item;
42         }
43     }
44 #endif
45     if (seqlen > NB_STATIC_BUFFERS) {
46         buffers = PyMem_NEW(Py_buffer, seqlen);
47         if (buffers == NULL) {
48             Py_DECREF(seq);
49             PyErr_NoMemory();
50             return NULL;
51         }
52     }
53     else {
54         buffers = static_buffers;
55     }
56 
57     /* Here is the general case.  Do a pre-pass to figure out the total
58      * amount of space we'll need (sz), and see whether all arguments are
59      * bytes-like.
60      */
61     for (i = 0, nbufs = 0; i < seqlen; i++) {
62         Py_ssize_t itemlen;
63         item = PySequence_Fast_GET_ITEM(seq, i);
64         if (PyBytes_CheckExact(item)) {
65             /* Fast path. */
66             buffers[i].obj = Py_NewRef(item);
67             buffers[i].buf = PyBytes_AS_STRING(item);
68             buffers[i].len = PyBytes_GET_SIZE(item);
69         }
70         else {
71             if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
72                 PyErr_Format(PyExc_TypeError,
73                              "sequence item %zd: expected a bytes-like object, "
74                              "%.80s found",
75                              i, Py_TYPE(item)->tp_name);
76                 goto error;
77             }
78             /* If the backing objects are mutable, then dropping the GIL
79              * opens up race conditions where another thread tries to modify
80              * the object which we hold a buffer on it. Such code has data
81              * races anyway, but this is a conservative approach that avoids
82              * changing the behaviour of that data race.
83              */
84             drop_gil = 0;
85         }
86         nbufs = i + 1;  /* for error cleanup */
87         itemlen = buffers[i].len;
88         if (itemlen > PY_SSIZE_T_MAX - sz) {
89             PyErr_SetString(PyExc_OverflowError,
90                             "join() result is too long");
91             goto error;
92         }
93         sz += itemlen;
94         if (i != 0) {
95             if (seplen > PY_SSIZE_T_MAX - sz) {
96                 PyErr_SetString(PyExc_OverflowError,
97                                 "join() result is too long");
98                 goto error;
99             }
100             sz += seplen;
101         }
102         if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
103             PyErr_SetString(PyExc_RuntimeError,
104                             "sequence changed size during iteration");
105             goto error;
106         }
107     }
108 
109     /* Allocate result space. */
110     res = STRINGLIB_NEW(NULL, sz);
111     if (res == NULL)
112         goto error;
113 
114     /* Catenate everything. */
115     p = STRINGLIB_STR(res);
116     if (sz < GIL_THRESHOLD) {
117         drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
118     }
119     if (drop_gil) {
120         save = PyEval_SaveThread();
121     }
122     if (!seplen) {
123         /* fast path */
124         for (i = 0; i < nbufs; i++) {
125             Py_ssize_t n = buffers[i].len;
126             char *q = buffers[i].buf;
127             memcpy(p, q, n);
128             p += n;
129         }
130     }
131     else {
132         for (i = 0; i < nbufs; i++) {
133             Py_ssize_t n;
134             char *q;
135             if (i) {
136                 memcpy(p, sepstr, seplen);
137                 p += seplen;
138             }
139             n = buffers[i].len;
140             q = buffers[i].buf;
141             memcpy(p, q, n);
142             p += n;
143         }
144     }
145     if (drop_gil) {
146         PyEval_RestoreThread(save);
147     }
148     goto done;
149 
150 error:
151     res = NULL;
152 done:
153     Py_DECREF(seq);
154     for (i = 0; i < nbufs; i++)
155         PyBuffer_Release(&buffers[i]);
156     if (buffers != static_buffers)
157         PyMem_Free(buffers);
158     return res;
159 }
160 
161 #undef NB_STATIC_BUFFERS
162 #undef GIL_THRESHOLD
163