• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2    _BlocksOutputBuffer is used to maintain an output buffer
3    that has unpredictable size. Suitable for compression/decompression
4    API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5 
6         stream->next_out:  point to the next output position.
7         stream->avail_out: the number of available bytes left in the buffer.
8 
9    It maintains a list of bytes object, so there is no overhead of resizing
10    the buffer.
11 
12    Usage:
13 
14    1, Initialize the struct instance like this:
15         _BlocksOutputBuffer buffer = {.list = NULL};
16       Set .list to NULL for _BlocksOutputBuffer_OnError()
17 
18    2, Initialize the buffer use one of these functions:
19         _BlocksOutputBuffer_InitAndGrow()
20         _BlocksOutputBuffer_InitWithSize()
21 
22    3, If (avail_out == 0), grow the buffer:
23         _BlocksOutputBuffer_Grow()
24 
25    4, Get the current outputted data size:
26         _BlocksOutputBuffer_GetDataSize()
27 
28    5, Finish the buffer, and return a bytes object:
29         _BlocksOutputBuffer_Finish()
30 
31    6, Clean up the buffer when an error occurred:
32         _BlocksOutputBuffer_OnError()
33 */
34 
35 #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36 #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #include "Python.h"
42 
43 typedef struct {
44     // List of bytes objects
45     PyObject *list;
46     // Number of whole allocated size
47     Py_ssize_t allocated;
48     // Max length of the buffer, negative number means unlimited length.
49     Py_ssize_t max_length;
50 } _BlocksOutputBuffer;
51 
52 static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
53 
54 /* In 32-bit build, the max block size should <= INT32_MAX. */
55 #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
56 
57 /* Block size sequence */
58 #define KB (1024)
59 #define MB (1024*1024)
60 static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
61     { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
62       32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
63       OUTPUT_BUFFER_MAX_BLOCK_SIZE };
64 #undef KB
65 #undef MB
66 
67 /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
68    allocated size growth step is:
69     1   32 KB       +32 KB
70     2   96 KB       +64 KB
71     3   352 KB      +256 KB
72     4   1.34 MB     +1 MB
73     5   5.34 MB     +4 MB
74     6   13.34 MB    +8 MB
75     7   29.34 MB    +16 MB
76     8   45.34 MB    +16 MB
77     9   77.34 MB    +32 MB
78     10  109.34 MB   +32 MB
79     11  141.34 MB   +32 MB
80     12  173.34 MB   +32 MB
81     13  237.34 MB   +64 MB
82     14  301.34 MB   +64 MB
83     15  429.34 MB   +128 MB
84     16  557.34 MB   +128 MB
85     17  813.34 MB   +256 MB
86     18  1069.34 MB  +256 MB
87     19  1325.34 MB  +256 MB
88     20  1581.34 MB  +256 MB
89     21  1837.34 MB  +256 MB
90     22  2093.34 MB  +256 MB
91     ...
92 */
93 
94 /* Initialize the buffer, and grow the buffer.
95 
96    max_length: Max length of the buffer, -1 for unlimited length.
97 
98    On success, return allocated size (>=0)
99    On failure, return -1
100 */
101 static inline Py_ssize_t
_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,const Py_ssize_t max_length,void ** next_out)102 _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
103                                 const Py_ssize_t max_length,
104                                 void **next_out)
105 {
106     PyObject *b;
107     Py_ssize_t block_size;
108 
109     // ensure .list was set to NULL
110     assert(buffer->list == NULL);
111 
112     // get block size
113     if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
114         block_size = max_length;
115     } else {
116         block_size = BUFFER_BLOCK_SIZE[0];
117     }
118 
119     // the first block
120     b = PyBytes_FromStringAndSize(NULL, block_size);
121     if (b == NULL) {
122         return -1;
123     }
124 
125     // create the list
126     buffer->list = PyList_New(1);
127     if (buffer->list == NULL) {
128         Py_DECREF(b);
129         return -1;
130     }
131     PyList_SET_ITEM(buffer->list, 0, b);
132 
133     // set variables
134     buffer->allocated = block_size;
135     buffer->max_length = max_length;
136 
137     *next_out = PyBytes_AS_STRING(b);
138     return block_size;
139 }
140 
141 /* Initialize the buffer, with an initial size.
142 
143    Check block size limit in the outer wrapper function. For example, some libs
144    accept UINT32_MAX as the maximum block size, then init_size should <= it.
145 
146    On success, return allocated size (>=0)
147    On failure, return -1
148 */
149 static inline Py_ssize_t
_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer * buffer,const Py_ssize_t init_size,void ** next_out)150 _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
151                                  const Py_ssize_t init_size,
152                                  void **next_out)
153 {
154     PyObject *b;
155 
156     // ensure .list was set to NULL
157     assert(buffer->list == NULL);
158 
159     // the first block
160     b = PyBytes_FromStringAndSize(NULL, init_size);
161     if (b == NULL) {
162         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
163         return -1;
164     }
165 
166     // create the list
167     buffer->list = PyList_New(1);
168     if (buffer->list == NULL) {
169         Py_DECREF(b);
170         return -1;
171     }
172     PyList_SET_ITEM(buffer->list, 0, b);
173 
174     // set variables
175     buffer->allocated = init_size;
176     buffer->max_length = -1;
177 
178     *next_out = PyBytes_AS_STRING(b);
179     return init_size;
180 }
181 
182 /* Grow the buffer. The avail_out must be 0, please check it before calling.
183 
184    On success, return allocated size (>=0)
185    On failure, return -1
186 */
187 static inline Py_ssize_t
_BlocksOutputBuffer_Grow(_BlocksOutputBuffer * buffer,void ** next_out,const Py_ssize_t avail_out)188 _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
189                          void **next_out,
190                          const Py_ssize_t avail_out)
191 {
192     PyObject *b;
193     const Py_ssize_t list_len = Py_SIZE(buffer->list);
194     Py_ssize_t block_size;
195 
196     // ensure no gaps in the data
197     if (avail_out != 0) {
198         PyErr_SetString(PyExc_SystemError,
199                         "avail_out is non-zero in _BlocksOutputBuffer_Grow().");
200         return -1;
201     }
202 
203     // get block size
204     if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
205         block_size = BUFFER_BLOCK_SIZE[list_len];
206     } else {
207         block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
208     }
209 
210     // check max_length
211     if (buffer->max_length >= 0) {
212         // if (rest == 0), should not grow the buffer.
213         Py_ssize_t rest = buffer->max_length - buffer->allocated;
214         assert(rest > 0);
215 
216         // block_size of the last block
217         if (block_size > rest) {
218             block_size = rest;
219         }
220     }
221 
222     // check buffer->allocated overflow
223     if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
224         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
225         return -1;
226     }
227 
228     // create the block
229     b = PyBytes_FromStringAndSize(NULL, block_size);
230     if (b == NULL) {
231         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
232         return -1;
233     }
234     if (PyList_Append(buffer->list, b) < 0) {
235         Py_DECREF(b);
236         return -1;
237     }
238     Py_DECREF(b);
239 
240     // set variables
241     buffer->allocated += block_size;
242 
243     *next_out = PyBytes_AS_STRING(b);
244     return block_size;
245 }
246 
247 /* Return the current outputted data size. */
248 static inline Py_ssize_t
_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)249 _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer,
250                                 const Py_ssize_t avail_out)
251 {
252     return buffer->allocated - avail_out;
253 }
254 
255 /* Finish the buffer.
256 
257    Return a bytes object on success
258    Return NULL on failure
259 */
260 static inline PyObject *
_BlocksOutputBuffer_Finish(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)261 _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
262                            const Py_ssize_t avail_out)
263 {
264     PyObject *result, *block;
265     const Py_ssize_t list_len = Py_SIZE(buffer->list);
266 
267     // fast path for single block
268     if ((list_len == 1 && avail_out == 0) ||
269         (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
270     {
271         block = PyList_GET_ITEM(buffer->list, 0);
272         Py_INCREF(block);
273 
274         Py_CLEAR(buffer->list);
275         return block;
276     }
277 
278     // final bytes object
279     result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
280     if (result == NULL) {
281         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
282         return NULL;
283     }
284 
285     // memory copy
286     if (list_len > 0) {
287         char *posi = PyBytes_AS_STRING(result);
288 
289         // blocks except the last one
290         Py_ssize_t i = 0;
291         for (; i < list_len-1; i++) {
292             block = PyList_GET_ITEM(buffer->list, i);
293             memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
294             posi += Py_SIZE(block);
295         }
296         // the last block
297         block = PyList_GET_ITEM(buffer->list, i);
298         memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
299     } else {
300         assert(Py_SIZE(result) == 0);
301     }
302 
303     Py_CLEAR(buffer->list);
304     return result;
305 }
306 
307 /* Clean up the buffer when an error occurred. */
308 static inline void
_BlocksOutputBuffer_OnError(_BlocksOutputBuffer * buffer)309 _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
310 {
311     Py_CLEAR(buffer->list);
312 }
313 
314 #ifdef __cplusplus
315 }
316 #endif
317 #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */