• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2     _BlocksOutputBuffer is used to maintain an output buffer
3     that has unpredictable size. Suitable for compression/decompression
4     API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5  
6          stream->next_out:  point to the next output position.
7          stream->avail_out: the number of available bytes left in the buffer.
8  
9     It maintains a list of bytes object, so there is no overhead of resizing
10     the buffer.
11  
12     Usage:
13  
14     1, Initialize the struct instance like this:
15          _BlocksOutputBuffer buffer = {.list = NULL};
16        Set .list to NULL for _BlocksOutputBuffer_OnError()
17  
18     2, Initialize the buffer use one of these functions:
19          _BlocksOutputBuffer_InitAndGrow()
20          _BlocksOutputBuffer_InitWithSize()
21  
22     3, If (avail_out == 0), grow the buffer:
23          _BlocksOutputBuffer_Grow()
24  
25     4, Get the current outputted data size:
26          _BlocksOutputBuffer_GetDataSize()
27  
28     5, Finish the buffer, and return a bytes object:
29          _BlocksOutputBuffer_Finish()
30  
31     6, Clean up the buffer when an error occurred:
32          _BlocksOutputBuffer_OnError()
33  */
34  
35  #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36  #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37  #ifdef __cplusplus
38  extern "C" {
39  #endif
40  
41  #include "Python.h"
42  
43  typedef struct {
44      // List of bytes objects
45      PyObject *list;
46      // Number of whole allocated size
47      Py_ssize_t allocated;
48      // Max length of the buffer, negative number means unlimited length.
49      Py_ssize_t max_length;
50  } _BlocksOutputBuffer;
51  
52  static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
53  
54  /* In 32-bit build, the max block size should <= INT32_MAX. */
55  #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
56  
57  /* Block size sequence */
58  #define KB (1024)
59  #define MB (1024*1024)
60  static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
61      { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
62        32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
63        OUTPUT_BUFFER_MAX_BLOCK_SIZE };
64  #undef KB
65  #undef MB
66  
67  /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
68     allocated size growth step is:
69      1   32 KB       +32 KB
70      2   96 KB       +64 KB
71      3   352 KB      +256 KB
72      4   1.34 MB     +1 MB
73      5   5.34 MB     +4 MB
74      6   13.34 MB    +8 MB
75      7   29.34 MB    +16 MB
76      8   45.34 MB    +16 MB
77      9   77.34 MB    +32 MB
78      10  109.34 MB   +32 MB
79      11  141.34 MB   +32 MB
80      12  173.34 MB   +32 MB
81      13  237.34 MB   +64 MB
82      14  301.34 MB   +64 MB
83      15  429.34 MB   +128 MB
84      16  557.34 MB   +128 MB
85      17  813.34 MB   +256 MB
86      18  1069.34 MB  +256 MB
87      19  1325.34 MB  +256 MB
88      20  1581.34 MB  +256 MB
89      21  1837.34 MB  +256 MB
90      22  2093.34 MB  +256 MB
91      ...
92  */
93  
94  /* Initialize the buffer, and grow the buffer.
95  
96     max_length: Max length of the buffer, -1 for unlimited length.
97  
98     On success, return allocated size (>=0)
99     On failure, return -1
100  */
101  static inline Py_ssize_t
_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,const Py_ssize_t max_length,void ** next_out)102  _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
103                                  const Py_ssize_t max_length,
104                                  void **next_out)
105  {
106      PyObject *b;
107      Py_ssize_t block_size;
108  
109      // ensure .list was set to NULL
110      assert(buffer->list == NULL);
111  
112      // get block size
113      if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
114          block_size = max_length;
115      } else {
116          block_size = BUFFER_BLOCK_SIZE[0];
117      }
118  
119      // the first block
120      b = PyBytes_FromStringAndSize(NULL, block_size);
121      if (b == NULL) {
122          return -1;
123      }
124  
125      // create the list
126      buffer->list = PyList_New(1);
127      if (buffer->list == NULL) {
128          Py_DECREF(b);
129          return -1;
130      }
131      PyList_SET_ITEM(buffer->list, 0, b);
132  
133      // set variables
134      buffer->allocated = block_size;
135      buffer->max_length = max_length;
136  
137      *next_out = PyBytes_AS_STRING(b);
138      return block_size;
139  }
140  
141  /* Initialize the buffer, with an initial size.
142  
143     Check block size limit in the outer wrapper function. For example, some libs
144     accept UINT32_MAX as the maximum block size, then init_size should <= it.
145  
146     On success, return allocated size (>=0)
147     On failure, return -1
148  */
149  static inline Py_ssize_t
_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer * buffer,const Py_ssize_t init_size,void ** next_out)150  _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
151                                   const Py_ssize_t init_size,
152                                   void **next_out)
153  {
154      PyObject *b;
155  
156      // ensure .list was set to NULL
157      assert(buffer->list == NULL);
158  
159      // the first block
160      b = PyBytes_FromStringAndSize(NULL, init_size);
161      if (b == NULL) {
162          PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
163          return -1;
164      }
165  
166      // create the list
167      buffer->list = PyList_New(1);
168      if (buffer->list == NULL) {
169          Py_DECREF(b);
170          return -1;
171      }
172      PyList_SET_ITEM(buffer->list, 0, b);
173  
174      // set variables
175      buffer->allocated = init_size;
176      buffer->max_length = -1;
177  
178      *next_out = PyBytes_AS_STRING(b);
179      return init_size;
180  }
181  
182  /* Grow the buffer. The avail_out must be 0, please check it before calling.
183  
184     On success, return allocated size (>=0)
185     On failure, return -1
186  */
187  static inline Py_ssize_t
_BlocksOutputBuffer_Grow(_BlocksOutputBuffer * buffer,void ** next_out,const Py_ssize_t avail_out)188  _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
189                           void **next_out,
190                           const Py_ssize_t avail_out)
191  {
192      PyObject *b;
193      const Py_ssize_t list_len = Py_SIZE(buffer->list);
194      Py_ssize_t block_size;
195  
196      // ensure no gaps in the data
197      if (avail_out != 0) {
198          PyErr_SetString(PyExc_SystemError,
199                          "avail_out is non-zero in _BlocksOutputBuffer_Grow().");
200          return -1;
201      }
202  
203      // get block size
204      if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
205          block_size = BUFFER_BLOCK_SIZE[list_len];
206      } else {
207          block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
208      }
209  
210      // check max_length
211      if (buffer->max_length >= 0) {
212          // if (rest == 0), should not grow the buffer.
213          Py_ssize_t rest = buffer->max_length - buffer->allocated;
214          assert(rest > 0);
215  
216          // block_size of the last block
217          if (block_size > rest) {
218              block_size = rest;
219          }
220      }
221  
222      // check buffer->allocated overflow
223      if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
224          PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
225          return -1;
226      }
227  
228      // create the block
229      b = PyBytes_FromStringAndSize(NULL, block_size);
230      if (b == NULL) {
231          PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
232          return -1;
233      }
234      if (PyList_Append(buffer->list, b) < 0) {
235          Py_DECREF(b);
236          return -1;
237      }
238      Py_DECREF(b);
239  
240      // set variables
241      buffer->allocated += block_size;
242  
243      *next_out = PyBytes_AS_STRING(b);
244      return block_size;
245  }
246  
247  /* Return the current outputted data size. */
248  static inline Py_ssize_t
_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)249  _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer,
250                                  const Py_ssize_t avail_out)
251  {
252      return buffer->allocated - avail_out;
253  }
254  
255  /* Finish the buffer.
256  
257     Return a bytes object on success
258     Return NULL on failure
259  */
260  static inline PyObject *
_BlocksOutputBuffer_Finish(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)261  _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
262                             const Py_ssize_t avail_out)
263  {
264      PyObject *result, *block;
265      const Py_ssize_t list_len = Py_SIZE(buffer->list);
266  
267      // fast path for single block
268      if ((list_len == 1 && avail_out == 0) ||
269          (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
270      {
271          block = PyList_GET_ITEM(buffer->list, 0);
272          Py_INCREF(block);
273  
274          Py_CLEAR(buffer->list);
275          return block;
276      }
277  
278      // final bytes object
279      result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
280      if (result == NULL) {
281          PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
282          return NULL;
283      }
284  
285      // memory copy
286      if (list_len > 0) {
287          char *posi = PyBytes_AS_STRING(result);
288  
289          // blocks except the last one
290          Py_ssize_t i = 0;
291          for (; i < list_len-1; i++) {
292              block = PyList_GET_ITEM(buffer->list, i);
293              memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
294              posi += Py_SIZE(block);
295          }
296          // the last block
297          block = PyList_GET_ITEM(buffer->list, i);
298          memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
299      } else {
300          assert(Py_SIZE(result) == 0);
301      }
302  
303      Py_CLEAR(buffer->list);
304      return result;
305  }
306  
307  /* Clean up the buffer when an error occurred. */
308  static inline void
_BlocksOutputBuffer_OnError(_BlocksOutputBuffer * buffer)309  _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
310  {
311      Py_CLEAR(buffer->list);
312  }
313  
314  #ifdef __cplusplus
315  }
316  #endif
317  #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */