• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2    _BlocksOutputBuffer is used to maintain an output buffer
3    that has unpredictable size. Suitable for compression/decompression
4    API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5 
6         stream->next_out:  point to the next output position.
7         stream->avail_out: the number of available bytes left in the buffer.
8 
9    It maintains a list of bytes object, so there is no overhead of resizing
10    the buffer.
11 
12    Usage:
13 
14    1, Initialize the struct instance like this:
15         _BlocksOutputBuffer buffer = {.list = NULL};
16       Set .list to NULL for _BlocksOutputBuffer_OnError()
17 
18    2, Initialize the buffer use one of these functions:
19         _BlocksOutputBuffer_InitAndGrow()
20         _BlocksOutputBuffer_InitWithSize()
21 
22    3, If (avail_out == 0), grow the buffer:
23         _BlocksOutputBuffer_Grow()
24 
25    4, Get the current outputted data size:
26         _BlocksOutputBuffer_GetDataSize()
27 
28    5, Finish the buffer, and return a bytes object:
29         _BlocksOutputBuffer_Finish()
30 
31    6, Clean up the buffer when an error occurred:
32         _BlocksOutputBuffer_OnError()
33 */
34 
35 #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36 #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #include "Python.h"
42 
43 #ifndef Py_BUILD_CORE
44 #  error "this header requires Py_BUILD_CORE define"
45 #endif
46 
47 typedef struct {
48     // List of bytes objects
49     PyObject *list;
50     // Number of whole allocated size
51     Py_ssize_t allocated;
52     // Max length of the buffer, negative number means unlimited length.
53     Py_ssize_t max_length;
54 } _BlocksOutputBuffer;
55 
56 static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
57 
58 /* In 32-bit build, the max block size should <= INT32_MAX. */
59 #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
60 
61 /* Block size sequence */
62 #define KB (1024)
63 #define MB (1024*1024)
64 static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
65     { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
66       32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
67       OUTPUT_BUFFER_MAX_BLOCK_SIZE };
68 #undef KB
69 #undef MB
70 
71 /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
72    allocated size growth step is:
73     1   32 KB       +32 KB
74     2   96 KB       +64 KB
75     3   352 KB      +256 KB
76     4   1.34 MB     +1 MB
77     5   5.34 MB     +4 MB
78     6   13.34 MB    +8 MB
79     7   29.34 MB    +16 MB
80     8   45.34 MB    +16 MB
81     9   77.34 MB    +32 MB
82     10  109.34 MB   +32 MB
83     11  141.34 MB   +32 MB
84     12  173.34 MB   +32 MB
85     13  237.34 MB   +64 MB
86     14  301.34 MB   +64 MB
87     15  429.34 MB   +128 MB
88     16  557.34 MB   +128 MB
89     17  813.34 MB   +256 MB
90     18  1069.34 MB  +256 MB
91     19  1325.34 MB  +256 MB
92     20  1581.34 MB  +256 MB
93     21  1837.34 MB  +256 MB
94     22  2093.34 MB  +256 MB
95     ...
96 */
97 
98 /* Initialize the buffer, and grow the buffer.
99 
100    max_length: Max length of the buffer, -1 for unlimited length.
101 
102    On success, return allocated size (>=0)
103    On failure, return -1
104 */
105 static inline Py_ssize_t
_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,const Py_ssize_t max_length,void ** next_out)106 _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
107                                 const Py_ssize_t max_length,
108                                 void **next_out)
109 {
110     PyObject *b;
111     Py_ssize_t block_size;
112 
113     // ensure .list was set to NULL
114     assert(buffer->list == NULL);
115 
116     // get block size
117     if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
118         block_size = max_length;
119     } else {
120         block_size = BUFFER_BLOCK_SIZE[0];
121     }
122 
123     // the first block
124     b = PyBytes_FromStringAndSize(NULL, block_size);
125     if (b == NULL) {
126         return -1;
127     }
128 
129     // create the list
130     buffer->list = PyList_New(1);
131     if (buffer->list == NULL) {
132         Py_DECREF(b);
133         return -1;
134     }
135     PyList_SET_ITEM(buffer->list, 0, b);
136 
137     // set variables
138     buffer->allocated = block_size;
139     buffer->max_length = max_length;
140 
141     *next_out = PyBytes_AS_STRING(b);
142     return block_size;
143 }
144 
145 /* Initialize the buffer, with an initial size.
146 
147    Check block size limit in the outer wrapper function. For example, some libs
148    accept UINT32_MAX as the maximum block size, then init_size should <= it.
149 
150    On success, return allocated size (>=0)
151    On failure, return -1
152 */
153 static inline Py_ssize_t
_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer * buffer,const Py_ssize_t init_size,void ** next_out)154 _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
155                                  const Py_ssize_t init_size,
156                                  void **next_out)
157 {
158     PyObject *b;
159 
160     // ensure .list was set to NULL
161     assert(buffer->list == NULL);
162 
163     // the first block
164     b = PyBytes_FromStringAndSize(NULL, init_size);
165     if (b == NULL) {
166         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
167         return -1;
168     }
169 
170     // create the list
171     buffer->list = PyList_New(1);
172     if (buffer->list == NULL) {
173         Py_DECREF(b);
174         return -1;
175     }
176     PyList_SET_ITEM(buffer->list, 0, b);
177 
178     // set variables
179     buffer->allocated = init_size;
180     buffer->max_length = -1;
181 
182     *next_out = PyBytes_AS_STRING(b);
183     return init_size;
184 }
185 
186 /* Grow the buffer. The avail_out must be 0, please check it before calling.
187 
188    On success, return allocated size (>=0)
189    On failure, return -1
190 */
191 static inline Py_ssize_t
_BlocksOutputBuffer_Grow(_BlocksOutputBuffer * buffer,void ** next_out,const Py_ssize_t avail_out)192 _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
193                          void **next_out,
194                          const Py_ssize_t avail_out)
195 {
196     PyObject *b;
197     const Py_ssize_t list_len = Py_SIZE(buffer->list);
198     Py_ssize_t block_size;
199 
200     // ensure no gaps in the data
201     if (avail_out != 0) {
202         PyErr_SetString(PyExc_SystemError,
203                         "avail_out is non-zero in _BlocksOutputBuffer_Grow().");
204         return -1;
205     }
206 
207     // get block size
208     if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
209         block_size = BUFFER_BLOCK_SIZE[list_len];
210     } else {
211         block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
212     }
213 
214     // check max_length
215     if (buffer->max_length >= 0) {
216         // if (rest == 0), should not grow the buffer.
217         Py_ssize_t rest = buffer->max_length - buffer->allocated;
218         assert(rest > 0);
219 
220         // block_size of the last block
221         if (block_size > rest) {
222             block_size = rest;
223         }
224     }
225 
226     // check buffer->allocated overflow
227     if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
228         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
229         return -1;
230     }
231 
232     // create the block
233     b = PyBytes_FromStringAndSize(NULL, block_size);
234     if (b == NULL) {
235         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
236         return -1;
237     }
238     if (PyList_Append(buffer->list, b) < 0) {
239         Py_DECREF(b);
240         return -1;
241     }
242     Py_DECREF(b);
243 
244     // set variables
245     buffer->allocated += block_size;
246 
247     *next_out = PyBytes_AS_STRING(b);
248     return block_size;
249 }
250 
251 /* Return the current outputted data size. */
252 static inline Py_ssize_t
_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)253 _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer,
254                                 const Py_ssize_t avail_out)
255 {
256     return buffer->allocated - avail_out;
257 }
258 
259 /* Finish the buffer.
260 
261    Return a bytes object on success
262    Return NULL on failure
263 */
264 static inline PyObject *
_BlocksOutputBuffer_Finish(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)265 _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
266                            const Py_ssize_t avail_out)
267 {
268     PyObject *result, *block;
269     const Py_ssize_t list_len = Py_SIZE(buffer->list);
270 
271     // fast path for single block
272     if ((list_len == 1 && avail_out == 0) ||
273         (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
274     {
275         block = PyList_GET_ITEM(buffer->list, 0);
276         Py_INCREF(block);
277 
278         Py_CLEAR(buffer->list);
279         return block;
280     }
281 
282     // final bytes object
283     result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
284     if (result == NULL) {
285         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
286         return NULL;
287     }
288 
289     // memory copy
290     if (list_len > 0) {
291         char *posi = PyBytes_AS_STRING(result);
292 
293         // blocks except the last one
294         Py_ssize_t i = 0;
295         for (; i < list_len-1; i++) {
296             block = PyList_GET_ITEM(buffer->list, i);
297             memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
298             posi += Py_SIZE(block);
299         }
300         // the last block
301         block = PyList_GET_ITEM(buffer->list, i);
302         memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
303     } else {
304         assert(Py_SIZE(result) == 0);
305     }
306 
307     Py_CLEAR(buffer->list);
308     return result;
309 }
310 
311 /* Clean up the buffer when an error occurred. */
312 static inline void
_BlocksOutputBuffer_OnError(_BlocksOutputBuffer * buffer)313 _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
314 {
315     Py_CLEAR(buffer->list);
316 }
317 
318 #ifdef __cplusplus
319 }
320 #endif
321 #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */
322