1 /*
2 _BlocksOutputBuffer is used to maintain an output buffer
3 that has unpredictable size. Suitable for compression/decompression
4 API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5
6 stream->next_out: point to the next output position.
7 stream->avail_out: the number of available bytes left in the buffer.
8
9 It maintains a list of bytes object, so there is no overhead of resizing
10 the buffer.
11
12 Usage:
13
14 1, Initialize the struct instance like this:
15 _BlocksOutputBuffer buffer = {.list = NULL};
16 Set .list to NULL for _BlocksOutputBuffer_OnError()
17
18 2, Initialize the buffer use one of these functions:
19 _BlocksOutputBuffer_InitAndGrow()
20 _BlocksOutputBuffer_InitWithSize()
21
22 3, If (avail_out == 0), grow the buffer:
23 _BlocksOutputBuffer_Grow()
24
25 4, Get the current outputted data size:
26 _BlocksOutputBuffer_GetDataSize()
27
28 5, Finish the buffer, and return a bytes object:
29 _BlocksOutputBuffer_Finish()
30
31 6, Clean up the buffer when an error occurred:
32 _BlocksOutputBuffer_OnError()
33 */
34
35 #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36 #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 #include "Python.h"
42
43 #ifndef Py_BUILD_CORE
44 # error "this header requires Py_BUILD_CORE define"
45 #endif
46
47 typedef struct {
48 // List of bytes objects
49 PyObject *list;
50 // Number of whole allocated size
51 Py_ssize_t allocated;
52 // Max length of the buffer, negative number means unlimited length.
53 Py_ssize_t max_length;
54 } _BlocksOutputBuffer;
55
56 static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
57
58 /* In 32-bit build, the max block size should <= INT32_MAX. */
59 #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
60
61 /* Block size sequence */
62 #define KB (1024)
63 #define MB (1024*1024)
64 static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
65 { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
66 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
67 OUTPUT_BUFFER_MAX_BLOCK_SIZE };
68 #undef KB
69 #undef MB
70
71 /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
72 allocated size growth step is:
73 1 32 KB +32 KB
74 2 96 KB +64 KB
75 3 352 KB +256 KB
76 4 1.34 MB +1 MB
77 5 5.34 MB +4 MB
78 6 13.34 MB +8 MB
79 7 29.34 MB +16 MB
80 8 45.34 MB +16 MB
81 9 77.34 MB +32 MB
82 10 109.34 MB +32 MB
83 11 141.34 MB +32 MB
84 12 173.34 MB +32 MB
85 13 237.34 MB +64 MB
86 14 301.34 MB +64 MB
87 15 429.34 MB +128 MB
88 16 557.34 MB +128 MB
89 17 813.34 MB +256 MB
90 18 1069.34 MB +256 MB
91 19 1325.34 MB +256 MB
92 20 1581.34 MB +256 MB
93 21 1837.34 MB +256 MB
94 22 2093.34 MB +256 MB
95 ...
96 */
97
98 /* Initialize the buffer, and grow the buffer.
99
100 max_length: Max length of the buffer, -1 for unlimited length.
101
102 On success, return allocated size (>=0)
103 On failure, return -1
104 */
105 static inline Py_ssize_t
_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,const Py_ssize_t max_length,void ** next_out)106 _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
107 const Py_ssize_t max_length,
108 void **next_out)
109 {
110 PyObject *b;
111 Py_ssize_t block_size;
112
113 // ensure .list was set to NULL
114 assert(buffer->list == NULL);
115
116 // get block size
117 if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
118 block_size = max_length;
119 } else {
120 block_size = BUFFER_BLOCK_SIZE[0];
121 }
122
123 // the first block
124 b = PyBytes_FromStringAndSize(NULL, block_size);
125 if (b == NULL) {
126 return -1;
127 }
128
129 // create the list
130 buffer->list = PyList_New(1);
131 if (buffer->list == NULL) {
132 Py_DECREF(b);
133 return -1;
134 }
135 PyList_SET_ITEM(buffer->list, 0, b);
136
137 // set variables
138 buffer->allocated = block_size;
139 buffer->max_length = max_length;
140
141 *next_out = PyBytes_AS_STRING(b);
142 return block_size;
143 }
144
145 /* Initialize the buffer, with an initial size.
146
147 Check block size limit in the outer wrapper function. For example, some libs
148 accept UINT32_MAX as the maximum block size, then init_size should <= it.
149
150 On success, return allocated size (>=0)
151 On failure, return -1
152 */
153 static inline Py_ssize_t
_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer * buffer,const Py_ssize_t init_size,void ** next_out)154 _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
155 const Py_ssize_t init_size,
156 void **next_out)
157 {
158 PyObject *b;
159
160 // ensure .list was set to NULL
161 assert(buffer->list == NULL);
162
163 // the first block
164 b = PyBytes_FromStringAndSize(NULL, init_size);
165 if (b == NULL) {
166 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
167 return -1;
168 }
169
170 // create the list
171 buffer->list = PyList_New(1);
172 if (buffer->list == NULL) {
173 Py_DECREF(b);
174 return -1;
175 }
176 PyList_SET_ITEM(buffer->list, 0, b);
177
178 // set variables
179 buffer->allocated = init_size;
180 buffer->max_length = -1;
181
182 *next_out = PyBytes_AS_STRING(b);
183 return init_size;
184 }
185
186 /* Grow the buffer. The avail_out must be 0, please check it before calling.
187
188 On success, return allocated size (>=0)
189 On failure, return -1
190 */
191 static inline Py_ssize_t
_BlocksOutputBuffer_Grow(_BlocksOutputBuffer * buffer,void ** next_out,const Py_ssize_t avail_out)192 _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
193 void **next_out,
194 const Py_ssize_t avail_out)
195 {
196 PyObject *b;
197 const Py_ssize_t list_len = Py_SIZE(buffer->list);
198 Py_ssize_t block_size;
199
200 // ensure no gaps in the data
201 if (avail_out != 0) {
202 PyErr_SetString(PyExc_SystemError,
203 "avail_out is non-zero in _BlocksOutputBuffer_Grow().");
204 return -1;
205 }
206
207 // get block size
208 if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
209 block_size = BUFFER_BLOCK_SIZE[list_len];
210 } else {
211 block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
212 }
213
214 // check max_length
215 if (buffer->max_length >= 0) {
216 // if (rest == 0), should not grow the buffer.
217 Py_ssize_t rest = buffer->max_length - buffer->allocated;
218 assert(rest > 0);
219
220 // block_size of the last block
221 if (block_size > rest) {
222 block_size = rest;
223 }
224 }
225
226 // check buffer->allocated overflow
227 if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
228 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
229 return -1;
230 }
231
232 // create the block
233 b = PyBytes_FromStringAndSize(NULL, block_size);
234 if (b == NULL) {
235 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
236 return -1;
237 }
238 if (PyList_Append(buffer->list, b) < 0) {
239 Py_DECREF(b);
240 return -1;
241 }
242 Py_DECREF(b);
243
244 // set variables
245 buffer->allocated += block_size;
246
247 *next_out = PyBytes_AS_STRING(b);
248 return block_size;
249 }
250
251 /* Return the current outputted data size. */
252 static inline Py_ssize_t
_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)253 _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer,
254 const Py_ssize_t avail_out)
255 {
256 return buffer->allocated - avail_out;
257 }
258
259 /* Finish the buffer.
260
261 Return a bytes object on success
262 Return NULL on failure
263 */
264 static inline PyObject *
_BlocksOutputBuffer_Finish(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)265 _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
266 const Py_ssize_t avail_out)
267 {
268 PyObject *result, *block;
269 const Py_ssize_t list_len = Py_SIZE(buffer->list);
270
271 // fast path for single block
272 if ((list_len == 1 && avail_out == 0) ||
273 (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
274 {
275 block = PyList_GET_ITEM(buffer->list, 0);
276 Py_INCREF(block);
277
278 Py_CLEAR(buffer->list);
279 return block;
280 }
281
282 // final bytes object
283 result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
284 if (result == NULL) {
285 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
286 return NULL;
287 }
288
289 // memory copy
290 if (list_len > 0) {
291 char *posi = PyBytes_AS_STRING(result);
292
293 // blocks except the last one
294 Py_ssize_t i = 0;
295 for (; i < list_len-1; i++) {
296 block = PyList_GET_ITEM(buffer->list, i);
297 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
298 posi += Py_SIZE(block);
299 }
300 // the last block
301 block = PyList_GET_ITEM(buffer->list, i);
302 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
303 } else {
304 assert(Py_SIZE(result) == 0);
305 }
306
307 Py_CLEAR(buffer->list);
308 return result;
309 }
310
311 /* Clean up the buffer when an error occurred. */
312 static inline void
_BlocksOutputBuffer_OnError(_BlocksOutputBuffer * buffer)313 _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
314 {
315 Py_CLEAR(buffer->list);
316 }
317
318 #ifdef __cplusplus
319 }
320 #endif
321 #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */
322