1 /* 2 _BlocksOutputBuffer is used to maintain an output buffer 3 that has unpredictable size. Suitable for compression/decompression 4 API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out: 5 6 stream->next_out: point to the next output position. 7 stream->avail_out: the number of available bytes left in the buffer. 8 9 It maintains a list of bytes object, so there is no overhead of resizing 10 the buffer. 11 12 Usage: 13 14 1, Initialize the struct instance like this: 15 _BlocksOutputBuffer buffer = {.list = NULL}; 16 Set .list to NULL for _BlocksOutputBuffer_OnError() 17 18 2, Initialize the buffer use one of these functions: 19 _BlocksOutputBuffer_InitAndGrow() 20 _BlocksOutputBuffer_InitWithSize() 21 22 3, If (avail_out == 0), grow the buffer: 23 _BlocksOutputBuffer_Grow() 24 25 4, Get the current outputted data size: 26 _BlocksOutputBuffer_GetDataSize() 27 28 5, Finish the buffer, and return a bytes object: 29 _BlocksOutputBuffer_Finish() 30 31 6, Clean up the buffer when an error occurred: 32 _BlocksOutputBuffer_OnError() 33 */ 34 35 #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H 36 #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #include "Python.h" 42 43 typedef struct { 44 // List of bytes objects 45 PyObject *list; 46 // Number of whole allocated size 47 Py_ssize_t allocated; 48 // Max length of the buffer, negative number means unlimited length. 49 Py_ssize_t max_length; 50 } _BlocksOutputBuffer; 51 52 static const char unable_allocate_msg[] = "Unable to allocate output buffer."; 53 54 /* In 32-bit build, the max block size should <= INT32_MAX. */ 55 #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024) 56 57 /* Block size sequence */ 58 #define KB (1024) 59 #define MB (1024*1024) 60 static const Py_ssize_t BUFFER_BLOCK_SIZE[] = 61 { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, 62 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, 63 OUTPUT_BUFFER_MAX_BLOCK_SIZE }; 64 #undef KB 65 #undef MB 66 67 /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole 68 allocated size growth step is: 69 1 32 KB +32 KB 70 2 96 KB +64 KB 71 3 352 KB +256 KB 72 4 1.34 MB +1 MB 73 5 5.34 MB +4 MB 74 6 13.34 MB +8 MB 75 7 29.34 MB +16 MB 76 8 45.34 MB +16 MB 77 9 77.34 MB +32 MB 78 10 109.34 MB +32 MB 79 11 141.34 MB +32 MB 80 12 173.34 MB +32 MB 81 13 237.34 MB +64 MB 82 14 301.34 MB +64 MB 83 15 429.34 MB +128 MB 84 16 557.34 MB +128 MB 85 17 813.34 MB +256 MB 86 18 1069.34 MB +256 MB 87 19 1325.34 MB +256 MB 88 20 1581.34 MB +256 MB 89 21 1837.34 MB +256 MB 90 22 2093.34 MB +256 MB 91 ... 92 */ 93 94 /* Initialize the buffer, and grow the buffer. 95 96 max_length: Max length of the buffer, -1 for unlimited length. 97 98 On success, return allocated size (>=0) 99 On failure, return -1 100 */ 101 static inline Py_ssize_t _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,const Py_ssize_t max_length,void ** next_out)102 _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, 103 const Py_ssize_t max_length, 104 void **next_out) 105 { 106 PyObject *b; 107 Py_ssize_t block_size; 108 109 // ensure .list was set to NULL 110 assert(buffer->list == NULL); 111 112 // get block size 113 if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) { 114 block_size = max_length; 115 } else { 116 block_size = BUFFER_BLOCK_SIZE[0]; 117 } 118 119 // the first block 120 b = PyBytes_FromStringAndSize(NULL, block_size); 121 if (b == NULL) { 122 return -1; 123 } 124 125 // create the list 126 buffer->list = PyList_New(1); 127 if (buffer->list == NULL) { 128 Py_DECREF(b); 129 return -1; 130 } 131 PyList_SET_ITEM(buffer->list, 0, b); 132 133 // set variables 134 buffer->allocated = block_size; 135 buffer->max_length = max_length; 136 137 *next_out = PyBytes_AS_STRING(b); 138 return block_size; 139 } 140 141 /* Initialize the buffer, with an initial size. 142 143 Check block size limit in the outer wrapper function. For example, some libs 144 accept UINT32_MAX as the maximum block size, then init_size should <= it. 145 146 On success, return allocated size (>=0) 147 On failure, return -1 148 */ 149 static inline Py_ssize_t _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer * buffer,const Py_ssize_t init_size,void ** next_out)150 _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, 151 const Py_ssize_t init_size, 152 void **next_out) 153 { 154 PyObject *b; 155 156 // ensure .list was set to NULL 157 assert(buffer->list == NULL); 158 159 // the first block 160 b = PyBytes_FromStringAndSize(NULL, init_size); 161 if (b == NULL) { 162 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); 163 return -1; 164 } 165 166 // create the list 167 buffer->list = PyList_New(1); 168 if (buffer->list == NULL) { 169 Py_DECREF(b); 170 return -1; 171 } 172 PyList_SET_ITEM(buffer->list, 0, b); 173 174 // set variables 175 buffer->allocated = init_size; 176 buffer->max_length = -1; 177 178 *next_out = PyBytes_AS_STRING(b); 179 return init_size; 180 } 181 182 /* Grow the buffer. The avail_out must be 0, please check it before calling. 183 184 On success, return allocated size (>=0) 185 On failure, return -1 186 */ 187 static inline Py_ssize_t _BlocksOutputBuffer_Grow(_BlocksOutputBuffer * buffer,void ** next_out,const Py_ssize_t avail_out)188 _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer, 189 void **next_out, 190 const Py_ssize_t avail_out) 191 { 192 PyObject *b; 193 const Py_ssize_t list_len = Py_SIZE(buffer->list); 194 Py_ssize_t block_size; 195 196 // ensure no gaps in the data 197 if (avail_out != 0) { 198 PyErr_SetString(PyExc_SystemError, 199 "avail_out is non-zero in _BlocksOutputBuffer_Grow()."); 200 return -1; 201 } 202 203 // get block size 204 if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { 205 block_size = BUFFER_BLOCK_SIZE[list_len]; 206 } else { 207 block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; 208 } 209 210 // check max_length 211 if (buffer->max_length >= 0) { 212 // if (rest == 0), should not grow the buffer. 213 Py_ssize_t rest = buffer->max_length - buffer->allocated; 214 assert(rest > 0); 215 216 // block_size of the last block 217 if (block_size > rest) { 218 block_size = rest; 219 } 220 } 221 222 // check buffer->allocated overflow 223 if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { 224 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); 225 return -1; 226 } 227 228 // create the block 229 b = PyBytes_FromStringAndSize(NULL, block_size); 230 if (b == NULL) { 231 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); 232 return -1; 233 } 234 if (PyList_Append(buffer->list, b) < 0) { 235 Py_DECREF(b); 236 return -1; 237 } 238 Py_DECREF(b); 239 240 // set variables 241 buffer->allocated += block_size; 242 243 *next_out = PyBytes_AS_STRING(b); 244 return block_size; 245 } 246 247 /* Return the current outputted data size. */ 248 static inline Py_ssize_t _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)249 _BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, 250 const Py_ssize_t avail_out) 251 { 252 return buffer->allocated - avail_out; 253 } 254 255 /* Finish the buffer. 256 257 Return a bytes object on success 258 Return NULL on failure 259 */ 260 static inline PyObject * _BlocksOutputBuffer_Finish(_BlocksOutputBuffer * buffer,const Py_ssize_t avail_out)261 _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer, 262 const Py_ssize_t avail_out) 263 { 264 PyObject *result, *block; 265 const Py_ssize_t list_len = Py_SIZE(buffer->list); 266 267 // fast path for single block 268 if ((list_len == 1 && avail_out == 0) || 269 (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out)) 270 { 271 block = PyList_GET_ITEM(buffer->list, 0); 272 Py_INCREF(block); 273 274 Py_CLEAR(buffer->list); 275 return block; 276 } 277 278 // final bytes object 279 result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out); 280 if (result == NULL) { 281 PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); 282 return NULL; 283 } 284 285 // memory copy 286 if (list_len > 0) { 287 char *posi = PyBytes_AS_STRING(result); 288 289 // blocks except the last one 290 Py_ssize_t i = 0; 291 for (; i < list_len-1; i++) { 292 block = PyList_GET_ITEM(buffer->list, i); 293 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); 294 posi += Py_SIZE(block); 295 } 296 // the last block 297 block = PyList_GET_ITEM(buffer->list, i); 298 memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out); 299 } else { 300 assert(Py_SIZE(result) == 0); 301 } 302 303 Py_CLEAR(buffer->list); 304 return result; 305 } 306 307 /* Clean up the buffer when an error occurred. */ 308 static inline void _BlocksOutputBuffer_OnError(_BlocksOutputBuffer * buffer)309 _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer) 310 { 311 Py_CLEAR(buffer->list); 312 } 313 314 #ifdef __cplusplus 315 } 316 #endif 317 #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */