1 /*
2
3 python-bz2 - python bz2 library interface
4
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8 */
9
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
14
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23 ";
24
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
43
44
45 #ifndef BZ_CONFIG_ERROR
46
47 #define BZ2_bzRead bzRead
48 #define BZ2_bzReadOpen bzReadOpen
49 #define BZ2_bzReadClose bzReadClose
50 #define BZ2_bzWrite bzWrite
51 #define BZ2_bzWriteOpen bzWriteOpen
52 #define BZ2_bzWriteClose bzWriteClose
53 #define BZ2_bzCompress bzCompress
54 #define BZ2_bzCompressInit bzCompressInit
55 #define BZ2_bzCompressEnd bzCompressEnd
56 #define BZ2_bzDecompress bzDecompress
57 #define BZ2_bzDecompressInit bzDecompressInit
58 #define BZ2_bzDecompressEnd bzDecompressEnd
59
60 #endif /* ! BZ_CONFIG_ERROR */
61
62
63 #ifdef WITH_THREAD
64 #define ACQUIRE_LOCK(obj) do { \
65 if (!PyThread_acquire_lock(obj->lock, 0)) { \
66 Py_BEGIN_ALLOW_THREADS \
67 PyThread_acquire_lock(obj->lock, 1); \
68 Py_END_ALLOW_THREADS \
69 } } while(0)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71 #else
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
74 #endif
75
76 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
77
78 /* Bits in f_newlinetypes */
79 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
80 #define NEWLINE_CR 1 /* \r newline seen */
81 #define NEWLINE_LF 2 /* \n newline seen */
82 #define NEWLINE_CRLF 4 /* \r\n newline seen */
83
84 /* ===================================================================== */
85 /* Structure definitions. */
86
87 typedef struct {
88 PyObject_HEAD
89 PyObject *file;
90
91 char* f_buf; /* Allocated readahead buffer */
92 char* f_bufend; /* Points after last occupied position */
93 char* f_bufptr; /* Current buffer position */
94
95 int f_softspace; /* Flag used by 'print' command */
96
97 int f_univ_newline; /* Handle any newline convention */
98 int f_newlinetypes; /* Types of newlines seen */
99 int f_skipnextlf; /* Skip next \n */
100
101 BZFILE *fp;
102 int mode;
103 Py_off_t pos;
104 Py_off_t size;
105 #ifdef WITH_THREAD
106 PyThread_type_lock lock;
107 #endif
108 } BZ2FileObject;
109
110 typedef struct {
111 PyObject_HEAD
112 bz_stream bzs;
113 int running;
114 #ifdef WITH_THREAD
115 PyThread_type_lock lock;
116 #endif
117 } BZ2CompObject;
118
119 typedef struct {
120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
123 PyObject *unused_data;
124 #ifdef WITH_THREAD
125 PyThread_type_lock lock;
126 #endif
127 } BZ2DecompObject;
128
129 /* ===================================================================== */
130 /* Utility functions. */
131
132 /* Refuse regular I/O if there's data in the iteration-buffer.
133 * Mixing them would cause data to arrive out of order, as the read*
134 * methods don't use the iteration buffer. */
135 static int
check_iterbuffered(BZ2FileObject * f)136 check_iterbuffered(BZ2FileObject *f)
137 {
138 if (f->f_buf != NULL &&
139 (f->f_bufend - f->f_bufptr) > 0 &&
140 f->f_buf[0] != '\0') {
141 PyErr_SetString(PyExc_ValueError,
142 "Mixing iteration and read methods would lose data");
143 return -1;
144 }
145 return 0;
146 }
147
148 static int
Util_CatchBZ2Error(int bzerror)149 Util_CatchBZ2Error(int bzerror)
150 {
151 int ret = 0;
152 switch(bzerror) {
153 case BZ_OK:
154 case BZ_STREAM_END:
155 break;
156
157 #ifdef BZ_CONFIG_ERROR
158 case BZ_CONFIG_ERROR:
159 PyErr_SetString(PyExc_SystemError,
160 "the bz2 library was not compiled "
161 "correctly");
162 ret = 1;
163 break;
164 #endif
165
166 case BZ_PARAM_ERROR:
167 PyErr_SetString(PyExc_ValueError,
168 "the bz2 library has received wrong "
169 "parameters");
170 ret = 1;
171 break;
172
173 case BZ_MEM_ERROR:
174 PyErr_NoMemory();
175 ret = 1;
176 break;
177
178 case BZ_DATA_ERROR:
179 case BZ_DATA_ERROR_MAGIC:
180 PyErr_SetString(PyExc_IOError, "invalid data stream");
181 ret = 1;
182 break;
183
184 case BZ_IO_ERROR:
185 PyErr_SetString(PyExc_IOError, "unknown IO error");
186 ret = 1;
187 break;
188
189 case BZ_UNEXPECTED_EOF:
190 PyErr_SetString(PyExc_EOFError,
191 "compressed file ended before the "
192 "logical end-of-stream was detected");
193 ret = 1;
194 break;
195
196 case BZ_SEQUENCE_ERROR:
197 PyErr_SetString(PyExc_RuntimeError,
198 "wrong sequence of bz2 library "
199 "commands used");
200 ret = 1;
201 break;
202 }
203 return ret;
204 }
205
206 #if BUFSIZ < 8192
207 #define SMALLCHUNK 8192
208 #else
209 #define SMALLCHUNK BUFSIZ
210 #endif
211
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
Util_NewBufferSize(size_t currentsize)214 Util_NewBufferSize(size_t currentsize)
215 {
216 /* Expand the buffer by an amount proportional to the current size,
217 giving us amortized linear-time behavior. Use a less-than-double
218 growth factor to avoid excessive allocation. */
219 return currentsize + (currentsize >> 3) + 6;
220 }
221
222 static int
Util_GrowBuffer(PyObject ** buf)223 Util_GrowBuffer(PyObject **buf)
224 {
225 size_t size = PyString_GET_SIZE(*buf);
226 size_t new_size = Util_NewBufferSize(size);
227 if (new_size > size) {
228 return _PyString_Resize(buf, new_size);
229 } else { /* overflow */
230 PyErr_SetString(PyExc_OverflowError,
231 "Unable to allocate buffer - output too large");
232 return -1;
233 }
234 }
235
236 /* This is a hacked version of Python's fileobject.c:get_line(). */
237 static PyObject *
Util_GetLine(BZ2FileObject * f,int n)238 Util_GetLine(BZ2FileObject *f, int n)
239 {
240 char c;
241 char *buf, *end;
242 size_t total_v_size; /* total # of slots in buffer */
243 size_t used_v_size; /* # used slots in buffer */
244 size_t increment; /* amount to increment the buffer */
245 PyObject *v;
246 int bzerror;
247 int bytes_read;
248 int newlinetypes = f->f_newlinetypes;
249 int skipnextlf = f->f_skipnextlf;
250 int univ_newline = f->f_univ_newline;
251
252 total_v_size = n > 0 ? n : 100;
253 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
254 if (v == NULL)
255 return NULL;
256
257 buf = BUF(v);
258 end = buf + total_v_size;
259
260 for (;;) {
261 Py_BEGIN_ALLOW_THREADS
262 while (buf != end) {
263 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
264 f->pos++;
265 if (bytes_read == 0) break;
266 if (univ_newline) {
267 if (skipnextlf) {
268 skipnextlf = 0;
269 if (c == '\n') {
270 /* Seeing a \n here with skipnextlf true means we
271 * saw a \r before.
272 */
273 newlinetypes |= NEWLINE_CRLF;
274 if (bzerror != BZ_OK) break;
275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276 f->pos++;
277 if (bytes_read == 0) break;
278 } else {
279 newlinetypes |= NEWLINE_CR;
280 }
281 }
282 if (c == '\r') {
283 skipnextlf = 1;
284 c = '\n';
285 } else if (c == '\n')
286 newlinetypes |= NEWLINE_LF;
287 }
288 *buf++ = c;
289 if (bzerror != BZ_OK || c == '\n') break;
290 }
291 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
292 newlinetypes |= NEWLINE_CR;
293 Py_END_ALLOW_THREADS
294 f->f_newlinetypes = newlinetypes;
295 f->f_skipnextlf = skipnextlf;
296 if (bzerror == BZ_STREAM_END) {
297 f->size = f->pos;
298 f->mode = MODE_READ_EOF;
299 break;
300 } else if (bzerror != BZ_OK) {
301 Util_CatchBZ2Error(bzerror);
302 Py_DECREF(v);
303 return NULL;
304 }
305 if (c == '\n')
306 break;
307 /* Must be because buf == end */
308 if (n > 0)
309 break;
310 used_v_size = total_v_size;
311 increment = total_v_size >> 2; /* mild exponential growth */
312 total_v_size += increment;
313 if (total_v_size > INT_MAX) {
314 PyErr_SetString(PyExc_OverflowError,
315 "line is longer than a Python string can hold");
316 Py_DECREF(v);
317 return NULL;
318 }
319 if (_PyString_Resize(&v, total_v_size) < 0)
320 return NULL;
321 buf = BUF(v) + used_v_size;
322 end = BUF(v) + total_v_size;
323 }
324
325 used_v_size = buf - BUF(v);
326 if (used_v_size != total_v_size)
327 _PyString_Resize(&v, used_v_size);
328 return v;
329 }
330
331 /* This is a hacked version of Python's
332 * fileobject.c:Py_UniversalNewlineFread(). */
333 size_t
Util_UnivNewlineRead(int * bzerror,BZFILE * stream,char * buf,size_t n,BZ2FileObject * f)334 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
335 char* buf, size_t n, BZ2FileObject *f)
336 {
337 char *dst = buf;
338 int newlinetypes, skipnextlf;
339
340 assert(buf != NULL);
341 assert(stream != NULL);
342
343 if (!f->f_univ_newline)
344 return BZ2_bzRead(bzerror, stream, buf, n);
345
346 newlinetypes = f->f_newlinetypes;
347 skipnextlf = f->f_skipnextlf;
348
349 /* Invariant: n is the number of bytes remaining to be filled
350 * in the buffer.
351 */
352 while (n) {
353 size_t nread;
354 int shortread;
355 char *src = dst;
356
357 nread = BZ2_bzRead(bzerror, stream, dst, n);
358 assert(nread <= n);
359 n -= nread; /* assuming 1 byte out for each in; will adjust */
360 shortread = n != 0; /* true iff EOF or error */
361 while (nread--) {
362 char c = *src++;
363 if (c == '\r') {
364 /* Save as LF and set flag to skip next LF. */
365 *dst++ = '\n';
366 skipnextlf = 1;
367 }
368 else if (skipnextlf && c == '\n') {
369 /* Skip LF, and remember we saw CR LF. */
370 skipnextlf = 0;
371 newlinetypes |= NEWLINE_CRLF;
372 ++n;
373 }
374 else {
375 /* Normal char to be stored in buffer. Also
376 * update the newlinetypes flag if either this
377 * is an LF or the previous char was a CR.
378 */
379 if (c == '\n')
380 newlinetypes |= NEWLINE_LF;
381 else if (skipnextlf)
382 newlinetypes |= NEWLINE_CR;
383 *dst++ = c;
384 skipnextlf = 0;
385 }
386 }
387 if (shortread) {
388 /* If this is EOF, update type flags. */
389 if (skipnextlf && *bzerror == BZ_STREAM_END)
390 newlinetypes |= NEWLINE_CR;
391 break;
392 }
393 }
394 f->f_newlinetypes = newlinetypes;
395 f->f_skipnextlf = skipnextlf;
396 return dst - buf;
397 }
398
399 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400 static void
Util_DropReadAhead(BZ2FileObject * f)401 Util_DropReadAhead(BZ2FileObject *f)
402 {
403 if (f->f_buf != NULL) {
404 PyMem_Free(f->f_buf);
405 f->f_buf = NULL;
406 }
407 }
408
409 /* This is a hacked version of Python's fileobject.c:readahead(). */
410 static int
Util_ReadAhead(BZ2FileObject * f,int bufsize)411 Util_ReadAhead(BZ2FileObject *f, int bufsize)
412 {
413 int chunksize;
414 int bzerror;
415
416 if (f->f_buf != NULL) {
417 if((f->f_bufend - f->f_bufptr) >= 1)
418 return 0;
419 else
420 Util_DropReadAhead(f);
421 }
422 if (f->mode == MODE_READ_EOF) {
423 f->f_bufptr = f->f_buf;
424 f->f_bufend = f->f_buf;
425 return 0;
426 }
427 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
428 PyErr_NoMemory();
429 return -1;
430 }
431 Py_BEGIN_ALLOW_THREADS
432 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433 bufsize, f);
434 Py_END_ALLOW_THREADS
435 f->pos += chunksize;
436 if (bzerror == BZ_STREAM_END) {
437 f->size = f->pos;
438 f->mode = MODE_READ_EOF;
439 } else if (bzerror != BZ_OK) {
440 Util_CatchBZ2Error(bzerror);
441 Util_DropReadAhead(f);
442 return -1;
443 }
444 f->f_bufptr = f->f_buf;
445 f->f_bufend = f->f_buf + chunksize;
446 return 0;
447 }
448
449 /* This is a hacked version of Python's
450 * fileobject.c:readahead_get_line_skip(). */
451 static PyStringObject *
Util_ReadAheadGetLineSkip(BZ2FileObject * f,int skip,int bufsize)452 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
453 {
454 PyStringObject* s;
455 char *bufptr;
456 char *buf;
457 int len;
458
459 if (f->f_buf == NULL)
460 if (Util_ReadAhead(f, bufsize) < 0)
461 return NULL;
462
463 len = f->f_bufend - f->f_bufptr;
464 if (len == 0)
465 return (PyStringObject *)
466 PyString_FromStringAndSize(NULL, skip);
467 bufptr = memchr(f->f_bufptr, '\n', len);
468 if (bufptr != NULL) {
469 bufptr++; /* Count the '\n' */
470 len = bufptr - f->f_bufptr;
471 s = (PyStringObject *)
472 PyString_FromStringAndSize(NULL, skip+len);
473 if (s == NULL)
474 return NULL;
475 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
476 f->f_bufptr = bufptr;
477 if (bufptr == f->f_bufend)
478 Util_DropReadAhead(f);
479 } else {
480 bufptr = f->f_bufptr;
481 buf = f->f_buf;
482 f->f_buf = NULL; /* Force new readahead buffer */
483 s = Util_ReadAheadGetLineSkip(f, skip+len,
484 bufsize + (bufsize>>2));
485 if (s == NULL) {
486 PyMem_Free(buf);
487 return NULL;
488 }
489 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
490 PyMem_Free(buf);
491 }
492 return s;
493 }
494
495 /* ===================================================================== */
496 /* Methods of BZ2File. */
497
498 PyDoc_STRVAR(BZ2File_read__doc__,
499 "read([size]) -> string\n\
500 \n\
501 Read at most size uncompressed bytes, returned as a string. If the size\n\
502 argument is negative or omitted, read until EOF is reached.\n\
503 ");
504
505 /* This is a hacked version of Python's fileobject.c:file_read(). */
506 static PyObject *
BZ2File_read(BZ2FileObject * self,PyObject * args)507 BZ2File_read(BZ2FileObject *self, PyObject *args)
508 {
509 long bytesrequested = -1;
510 size_t bytesread, buffersize, chunksize;
511 int bzerror;
512 PyObject *ret = NULL;
513
514 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515 return NULL;
516
517 ACQUIRE_LOCK(self);
518 switch (self->mode) {
519 case MODE_READ:
520 break;
521 case MODE_READ_EOF:
522 ret = PyString_FromString("");
523 goto cleanup;
524 case MODE_CLOSED:
525 PyErr_SetString(PyExc_ValueError,
526 "I/O operation on closed file");
527 goto cleanup;
528 default:
529 PyErr_SetString(PyExc_IOError,
530 "file is not ready for reading");
531 goto cleanup;
532 }
533
534 /* refuse to mix with f.next() */
535 if (check_iterbuffered(self))
536 goto cleanup;
537
538 if (bytesrequested < 0)
539 buffersize = Util_NewBufferSize((size_t)0);
540 else
541 buffersize = bytesrequested;
542 if (buffersize > INT_MAX) {
543 PyErr_SetString(PyExc_OverflowError,
544 "requested number of bytes is "
545 "more than a Python string can hold");
546 goto cleanup;
547 }
548 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
549 if (ret == NULL)
550 goto cleanup;
551 bytesread = 0;
552
553 for (;;) {
554 Py_BEGIN_ALLOW_THREADS
555 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
556 BUF(ret)+bytesread,
557 buffersize-bytesread,
558 self);
559 self->pos += chunksize;
560 Py_END_ALLOW_THREADS
561 bytesread += chunksize;
562 if (bzerror == BZ_STREAM_END) {
563 self->size = self->pos;
564 self->mode = MODE_READ_EOF;
565 break;
566 } else if (bzerror != BZ_OK) {
567 Util_CatchBZ2Error(bzerror);
568 Py_DECREF(ret);
569 ret = NULL;
570 goto cleanup;
571 }
572 if (bytesrequested < 0) {
573 buffersize = Util_NewBufferSize(buffersize);
574 if (_PyString_Resize(&ret, buffersize) < 0)
575 goto cleanup;
576 } else {
577 break;
578 }
579 }
580 if (bytesread != buffersize)
581 _PyString_Resize(&ret, bytesread);
582
583 cleanup:
584 RELEASE_LOCK(self);
585 return ret;
586 }
587
588 PyDoc_STRVAR(BZ2File_readline__doc__,
589 "readline([size]) -> string\n\
590 \n\
591 Return the next line from the file, as a string, retaining newline.\n\
592 A non-negative size argument will limit the maximum number of bytes to\n\
593 return (an incomplete line may be returned then). Return an empty\n\
594 string at EOF.\n\
595 ");
596
597 static PyObject *
BZ2File_readline(BZ2FileObject * self,PyObject * args)598 BZ2File_readline(BZ2FileObject *self, PyObject *args)
599 {
600 PyObject *ret = NULL;
601 int sizehint = -1;
602
603 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
604 return NULL;
605
606 ACQUIRE_LOCK(self);
607 switch (self->mode) {
608 case MODE_READ:
609 break;
610 case MODE_READ_EOF:
611 ret = PyString_FromString("");
612 goto cleanup;
613 case MODE_CLOSED:
614 PyErr_SetString(PyExc_ValueError,
615 "I/O operation on closed file");
616 goto cleanup;
617 default:
618 PyErr_SetString(PyExc_IOError,
619 "file is not ready for reading");
620 goto cleanup;
621 }
622
623 /* refuse to mix with f.next() */
624 if (check_iterbuffered(self))
625 goto cleanup;
626
627 if (sizehint == 0)
628 ret = PyString_FromString("");
629 else
630 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
631
632 cleanup:
633 RELEASE_LOCK(self);
634 return ret;
635 }
636
637 PyDoc_STRVAR(BZ2File_readlines__doc__,
638 "readlines([size]) -> list\n\
639 \n\
640 Call readline() repeatedly and return a list of lines read.\n\
641 The optional size argument, if given, is an approximate bound on the\n\
642 total number of bytes in the lines returned.\n\
643 ");
644
645 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
646 static PyObject *
BZ2File_readlines(BZ2FileObject * self,PyObject * args)647 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
648 {
649 long sizehint = 0;
650 PyObject *list = NULL;
651 PyObject *line;
652 char small_buffer[SMALLCHUNK];
653 char *buffer = small_buffer;
654 size_t buffersize = SMALLCHUNK;
655 PyObject *big_buffer = NULL;
656 size_t nfilled = 0;
657 size_t nread;
658 size_t totalread = 0;
659 char *p, *q, *end;
660 int err;
661 int shortread = 0;
662 int bzerror;
663
664 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
665 return NULL;
666
667 ACQUIRE_LOCK(self);
668 switch (self->mode) {
669 case MODE_READ:
670 break;
671 case MODE_READ_EOF:
672 list = PyList_New(0);
673 goto cleanup;
674 case MODE_CLOSED:
675 PyErr_SetString(PyExc_ValueError,
676 "I/O operation on closed file");
677 goto cleanup;
678 default:
679 PyErr_SetString(PyExc_IOError,
680 "file is not ready for reading");
681 goto cleanup;
682 }
683
684 /* refuse to mix with f.next() */
685 if (check_iterbuffered(self))
686 goto cleanup;
687
688 if ((list = PyList_New(0)) == NULL)
689 goto cleanup;
690
691 for (;;) {
692 Py_BEGIN_ALLOW_THREADS
693 nread = Util_UnivNewlineRead(&bzerror, self->fp,
694 buffer+nfilled,
695 buffersize-nfilled, self);
696 self->pos += nread;
697 Py_END_ALLOW_THREADS
698 if (bzerror == BZ_STREAM_END) {
699 self->size = self->pos;
700 self->mode = MODE_READ_EOF;
701 if (nread == 0) {
702 sizehint = 0;
703 break;
704 }
705 shortread = 1;
706 } else if (bzerror != BZ_OK) {
707 Util_CatchBZ2Error(bzerror);
708 error:
709 Py_DECREF(list);
710 list = NULL;
711 goto cleanup;
712 }
713 totalread += nread;
714 p = memchr(buffer+nfilled, '\n', nread);
715 if (!shortread && p == NULL) {
716 /* Need a larger buffer to fit this line */
717 nfilled += nread;
718 buffersize *= 2;
719 if (buffersize > INT_MAX) {
720 PyErr_SetString(PyExc_OverflowError,
721 "line is longer than a Python string can hold");
722 goto error;
723 }
724 if (big_buffer == NULL) {
725 /* Create the big buffer */
726 big_buffer = PyString_FromStringAndSize(
727 NULL, buffersize);
728 if (big_buffer == NULL)
729 goto error;
730 buffer = PyString_AS_STRING(big_buffer);
731 memcpy(buffer, small_buffer, nfilled);
732 }
733 else {
734 /* Grow the big buffer */
735 if (_PyString_Resize(&big_buffer, buffersize))
736 goto error;
737 buffer = PyString_AS_STRING(big_buffer);
738 }
739 continue;
740 }
741 end = buffer+nfilled+nread;
742 q = buffer;
743 while (p != NULL) {
744 /* Process complete lines */
745 p++;
746 line = PyString_FromStringAndSize(q, p-q);
747 if (line == NULL)
748 goto error;
749 err = PyList_Append(list, line);
750 Py_DECREF(line);
751 if (err != 0)
752 goto error;
753 q = p;
754 p = memchr(q, '\n', end-q);
755 }
756 /* Move the remaining incomplete line to the start */
757 nfilled = end-q;
758 memmove(buffer, q, nfilled);
759 if (sizehint > 0)
760 if (totalread >= (size_t)sizehint)
761 break;
762 if (shortread) {
763 sizehint = 0;
764 break;
765 }
766 }
767 if (nfilled != 0) {
768 /* Partial last line */
769 line = PyString_FromStringAndSize(buffer, nfilled);
770 if (line == NULL)
771 goto error;
772 if (sizehint > 0) {
773 /* Need to complete the last line */
774 PyObject *rest = Util_GetLine(self, 0);
775 if (rest == NULL) {
776 Py_DECREF(line);
777 goto error;
778 }
779 PyString_Concat(&line, rest);
780 Py_DECREF(rest);
781 if (line == NULL)
782 goto error;
783 }
784 err = PyList_Append(list, line);
785 Py_DECREF(line);
786 if (err != 0)
787 goto error;
788 }
789
790 cleanup:
791 RELEASE_LOCK(self);
792 if (big_buffer) {
793 Py_DECREF(big_buffer);
794 }
795 return list;
796 }
797
798 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799 "xreadlines() -> self\n\
800 \n\
801 For backward compatibility. BZ2File objects now include the performance\n\
802 optimizations previously implemented in the xreadlines module.\n\
803 ");
804
805 PyDoc_STRVAR(BZ2File_write__doc__,
806 "write(data) -> None\n\
807 \n\
808 Write the 'data' string to file. Note that due to buffering, close() may\n\
809 be needed before the file on disk reflects the data written.\n\
810 ");
811
812 /* This is a hacked version of Python's fileobject.c:file_write(). */
813 static PyObject *
BZ2File_write(BZ2FileObject * self,PyObject * args)814 BZ2File_write(BZ2FileObject *self, PyObject *args)
815 {
816 PyObject *ret = NULL;
817 Py_buffer pbuf;
818 char *buf;
819 int len;
820 int bzerror;
821
822 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823 return NULL;
824 buf = pbuf.buf;
825 len = pbuf.len;
826
827 ACQUIRE_LOCK(self);
828 switch (self->mode) {
829 case MODE_WRITE:
830 break;
831
832 case MODE_CLOSED:
833 PyErr_SetString(PyExc_ValueError,
834 "I/O operation on closed file");
835 goto cleanup;
836
837 default:
838 PyErr_SetString(PyExc_IOError,
839 "file is not ready for writing");
840 goto cleanup;
841 }
842
843 self->f_softspace = 0;
844
845 Py_BEGIN_ALLOW_THREADS
846 BZ2_bzWrite (&bzerror, self->fp, buf, len);
847 self->pos += len;
848 Py_END_ALLOW_THREADS
849
850 if (bzerror != BZ_OK) {
851 Util_CatchBZ2Error(bzerror);
852 goto cleanup;
853 }
854
855 Py_INCREF(Py_None);
856 ret = Py_None;
857
858 cleanup:
859 PyBuffer_Release(&pbuf);
860 RELEASE_LOCK(self);
861 return ret;
862 }
863
864 PyDoc_STRVAR(BZ2File_writelines__doc__,
865 "writelines(sequence_of_strings) -> None\n\
866 \n\
867 Write the sequence of strings to the file. Note that newlines are not\n\
868 added. The sequence can be any iterable object producing strings. This is\n\
869 equivalent to calling write() for each string.\n\
870 ");
871
872 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
873 static PyObject *
BZ2File_writelines(BZ2FileObject * self,PyObject * seq)874 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875 {
876 #define CHUNKSIZE 1000
877 PyObject *list = NULL;
878 PyObject *iter = NULL;
879 PyObject *ret = NULL;
880 PyObject *line;
881 int i, j, index, len, islist;
882 int bzerror;
883
884 ACQUIRE_LOCK(self);
885 switch (self->mode) {
886 case MODE_WRITE:
887 break;
888
889 case MODE_CLOSED:
890 PyErr_SetString(PyExc_ValueError,
891 "I/O operation on closed file");
892 goto error;
893
894 default:
895 PyErr_SetString(PyExc_IOError,
896 "file is not ready for writing");
897 goto error;
898 }
899
900 islist = PyList_Check(seq);
901 if (!islist) {
902 iter = PyObject_GetIter(seq);
903 if (iter == NULL) {
904 PyErr_SetString(PyExc_TypeError,
905 "writelines() requires an iterable argument");
906 goto error;
907 }
908 list = PyList_New(CHUNKSIZE);
909 if (list == NULL)
910 goto error;
911 }
912
913 /* Strategy: slurp CHUNKSIZE lines into a private list,
914 checking that they are all strings, then write that list
915 without holding the interpreter lock, then come back for more. */
916 for (index = 0; ; index += CHUNKSIZE) {
917 if (islist) {
918 Py_XDECREF(list);
919 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 j = PyList_GET_SIZE(list);
923 }
924 else {
925 for (j = 0; j < CHUNKSIZE; j++) {
926 line = PyIter_Next(iter);
927 if (line == NULL) {
928 if (PyErr_Occurred())
929 goto error;
930 break;
931 }
932 PyList_SetItem(list, j, line);
933 }
934 }
935 if (j == 0)
936 break;
937
938 /* Check that all entries are indeed strings. If not,
939 apply the same rules as for file.write() and
940 convert the rets to strings. This is slow, but
941 seems to be the only way since all conversion APIs
942 could potentially execute Python code. */
943 for (i = 0; i < j; i++) {
944 PyObject *v = PyList_GET_ITEM(list, i);
945 if (!PyString_Check(v)) {
946 const char *buffer;
947 Py_ssize_t len;
948 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949 PyErr_SetString(PyExc_TypeError,
950 "writelines() "
951 "argument must be "
952 "a sequence of "
953 "strings");
954 goto error;
955 }
956 line = PyString_FromStringAndSize(buffer,
957 len);
958 if (line == NULL)
959 goto error;
960 Py_DECREF(v);
961 PyList_SET_ITEM(list, i, line);
962 }
963 }
964
965 self->f_softspace = 0;
966
967 /* Since we are releasing the global lock, the
968 following code may *not* execute Python code. */
969 Py_BEGIN_ALLOW_THREADS
970 for (i = 0; i < j; i++) {
971 line = PyList_GET_ITEM(list, i);
972 len = PyString_GET_SIZE(line);
973 BZ2_bzWrite (&bzerror, self->fp,
974 PyString_AS_STRING(line), len);
975 if (bzerror != BZ_OK) {
976 Py_BLOCK_THREADS
977 Util_CatchBZ2Error(bzerror);
978 goto error;
979 }
980 }
981 Py_END_ALLOW_THREADS
982
983 if (j < CHUNKSIZE)
984 break;
985 }
986
987 Py_INCREF(Py_None);
988 ret = Py_None;
989
990 error:
991 RELEASE_LOCK(self);
992 Py_XDECREF(list);
993 Py_XDECREF(iter);
994 return ret;
995 #undef CHUNKSIZE
996 }
997
998 PyDoc_STRVAR(BZ2File_seek__doc__,
999 "seek(offset [, whence]) -> None\n\
1000 \n\
1001 Move to new file position. Argument offset is a byte count. Optional\n\
1002 argument whence defaults to 0 (offset from start of file, offset\n\
1003 should be >= 0); other values are 1 (move relative to current position,\n\
1004 positive or negative), and 2 (move relative to end of file, usually\n\
1005 negative, although many platforms allow seeking beyond the end of a file).\n\
1006 \n\
1007 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008 the operation may be extremely slow.\n\
1009 ");
1010
1011 static PyObject *
BZ2File_seek(BZ2FileObject * self,PyObject * args)1012 BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013 {
1014 int where = 0;
1015 PyObject *offobj;
1016 Py_off_t offset;
1017 char small_buffer[SMALLCHUNK];
1018 char *buffer = small_buffer;
1019 size_t buffersize = SMALLCHUNK;
1020 Py_off_t bytesread = 0;
1021 size_t readsize;
1022 int chunksize;
1023 int bzerror;
1024 PyObject *ret = NULL;
1025
1026 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027 return NULL;
1028 #if !defined(HAVE_LARGEFILE_SUPPORT)
1029 offset = PyInt_AsLong(offobj);
1030 #else
1031 offset = PyLong_Check(offobj) ?
1032 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1033 #endif
1034 if (PyErr_Occurred())
1035 return NULL;
1036
1037 ACQUIRE_LOCK(self);
1038 Util_DropReadAhead(self);
1039 switch (self->mode) {
1040 case MODE_READ:
1041 case MODE_READ_EOF:
1042 break;
1043
1044 case MODE_CLOSED:
1045 PyErr_SetString(PyExc_ValueError,
1046 "I/O operation on closed file");
1047 goto cleanup;
1048
1049 default:
1050 PyErr_SetString(PyExc_IOError,
1051 "seek works only while reading");
1052 goto cleanup;
1053 }
1054
1055 if (where == 2) {
1056 if (self->size == -1) {
1057 assert(self->mode != MODE_READ_EOF);
1058 for (;;) {
1059 Py_BEGIN_ALLOW_THREADS
1060 chunksize = Util_UnivNewlineRead(
1061 &bzerror, self->fp,
1062 buffer, buffersize,
1063 self);
1064 self->pos += chunksize;
1065 Py_END_ALLOW_THREADS
1066
1067 bytesread += chunksize;
1068 if (bzerror == BZ_STREAM_END) {
1069 break;
1070 } else if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1073 }
1074 }
1075 self->mode = MODE_READ_EOF;
1076 self->size = self->pos;
1077 bytesread = 0;
1078 }
1079 offset = self->size + offset;
1080 } else if (where == 1) {
1081 offset = self->pos + offset;
1082 }
1083
1084 /* Before getting here, offset must be the absolute position the file
1085 * pointer should be set to. */
1086
1087 if (offset >= self->pos) {
1088 /* we can move forward */
1089 offset -= self->pos;
1090 } else {
1091 /* we cannot move back, so rewind the stream */
1092 BZ2_bzReadClose(&bzerror, self->fp);
1093 if (self->fp) {
1094 PyFile_DecUseCount((PyFileObject *)self->file);
1095 self->fp = NULL;
1096 }
1097 if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1100 }
1101 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102 if (!ret)
1103 goto cleanup;
1104 Py_DECREF(ret);
1105 ret = NULL;
1106 self->pos = 0;
1107 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108 0, 0, NULL, 0);
1109 if (self->fp)
1110 PyFile_IncUseCount((PyFileObject *)self->file);
1111 if (bzerror != BZ_OK) {
1112 Util_CatchBZ2Error(bzerror);
1113 goto cleanup;
1114 }
1115 self->mode = MODE_READ;
1116 }
1117
1118 if (offset <= 0 || self->mode == MODE_READ_EOF)
1119 goto exit;
1120
1121 /* Before getting here, offset must be set to the number of bytes
1122 * to walk forward. */
1123 for (;;) {
1124 if (offset-bytesread > buffersize)
1125 readsize = buffersize;
1126 else
1127 /* offset might be wider that readsize, but the result
1128 * of the subtraction is bound by buffersize (see the
1129 * condition above). buffersize is 8192. */
1130 readsize = (size_t)(offset-bytesread);
1131 Py_BEGIN_ALLOW_THREADS
1132 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133 buffer, readsize, self);
1134 self->pos += chunksize;
1135 Py_END_ALLOW_THREADS
1136 bytesread += chunksize;
1137 if (bzerror == BZ_STREAM_END) {
1138 self->size = self->pos;
1139 self->mode = MODE_READ_EOF;
1140 break;
1141 } else if (bzerror != BZ_OK) {
1142 Util_CatchBZ2Error(bzerror);
1143 goto cleanup;
1144 }
1145 if (bytesread == offset)
1146 break;
1147 }
1148
1149 exit:
1150 Py_INCREF(Py_None);
1151 ret = Py_None;
1152
1153 cleanup:
1154 RELEASE_LOCK(self);
1155 return ret;
1156 }
1157
1158 PyDoc_STRVAR(BZ2File_tell__doc__,
1159 "tell() -> int\n\
1160 \n\
1161 Return the current file position, an integer (may be a long integer).\n\
1162 ");
1163
1164 static PyObject *
BZ2File_tell(BZ2FileObject * self,PyObject * args)1165 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166 {
1167 PyObject *ret = NULL;
1168
1169 if (self->mode == MODE_CLOSED) {
1170 PyErr_SetString(PyExc_ValueError,
1171 "I/O operation on closed file");
1172 goto cleanup;
1173 }
1174
1175 #if !defined(HAVE_LARGEFILE_SUPPORT)
1176 ret = PyInt_FromLong(self->pos);
1177 #else
1178 ret = PyLong_FromLongLong(self->pos);
1179 #endif
1180
1181 cleanup:
1182 return ret;
1183 }
1184
1185 PyDoc_STRVAR(BZ2File_close__doc__,
1186 "close() -> None or (perhaps) an integer\n\
1187 \n\
1188 Close the file. Sets data attribute .closed to true. A closed file\n\
1189 cannot be used for further I/O operations. close() may be called more\n\
1190 than once without error.\n\
1191 ");
1192
1193 static PyObject *
BZ2File_close(BZ2FileObject * self)1194 BZ2File_close(BZ2FileObject *self)
1195 {
1196 PyObject *ret = NULL;
1197 int bzerror = BZ_OK;
1198
1199 ACQUIRE_LOCK(self);
1200 switch (self->mode) {
1201 case MODE_READ:
1202 case MODE_READ_EOF:
1203 BZ2_bzReadClose(&bzerror, self->fp);
1204 break;
1205 case MODE_WRITE:
1206 BZ2_bzWriteClose(&bzerror, self->fp,
1207 0, NULL, NULL);
1208 break;
1209 }
1210 if (self->file) {
1211 if (self->fp)
1212 PyFile_DecUseCount((PyFileObject *)self->file);
1213 ret = PyObject_CallMethod(self->file, "close", NULL);
1214 } else {
1215 Py_INCREF(Py_None);
1216 ret = Py_None;
1217 }
1218 self->fp = NULL;
1219 self->mode = MODE_CLOSED;
1220 if (bzerror != BZ_OK) {
1221 Util_CatchBZ2Error(bzerror);
1222 Py_XDECREF(ret);
1223 ret = NULL;
1224 }
1225
1226 RELEASE_LOCK(self);
1227 return ret;
1228 }
1229
1230 PyDoc_STRVAR(BZ2File_enter_doc,
1231 "__enter__() -> self.");
1232
1233 static PyObject *
BZ2File_enter(BZ2FileObject * self)1234 BZ2File_enter(BZ2FileObject *self)
1235 {
1236 if (self->mode == MODE_CLOSED) {
1237 PyErr_SetString(PyExc_ValueError,
1238 "I/O operation on closed file");
1239 return NULL;
1240 }
1241 Py_INCREF(self);
1242 return (PyObject *) self;
1243 }
1244
1245 PyDoc_STRVAR(BZ2File_exit_doc,
1246 "__exit__(*excinfo) -> None. Closes the file.");
1247
1248 static PyObject *
BZ2File_exit(BZ2FileObject * self,PyObject * args)1249 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1250 {
1251 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1252 if (!ret)
1253 /* If error occurred, pass through */
1254 return NULL;
1255 Py_DECREF(ret);
1256 Py_RETURN_NONE;
1257 }
1258
1259
1260 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1261
1262 static PyMethodDef BZ2File_methods[] = {
1263 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1264 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1265 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1266 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1267 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1268 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1269 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1270 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1271 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1272 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1273 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1274 {NULL, NULL} /* sentinel */
1275 };
1276
1277
1278 /* ===================================================================== */
1279 /* Getters and setters of BZ2File. */
1280
1281 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1282 static PyObject *
BZ2File_get_newlines(BZ2FileObject * self,void * closure)1283 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1284 {
1285 switch (self->f_newlinetypes) {
1286 case NEWLINE_UNKNOWN:
1287 Py_INCREF(Py_None);
1288 return Py_None;
1289 case NEWLINE_CR:
1290 return PyString_FromString("\r");
1291 case NEWLINE_LF:
1292 return PyString_FromString("\n");
1293 case NEWLINE_CR|NEWLINE_LF:
1294 return Py_BuildValue("(ss)", "\r", "\n");
1295 case NEWLINE_CRLF:
1296 return PyString_FromString("\r\n");
1297 case NEWLINE_CR|NEWLINE_CRLF:
1298 return Py_BuildValue("(ss)", "\r", "\r\n");
1299 case NEWLINE_LF|NEWLINE_CRLF:
1300 return Py_BuildValue("(ss)", "\n", "\r\n");
1301 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1302 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1303 default:
1304 PyErr_Format(PyExc_SystemError,
1305 "Unknown newlines value 0x%x\n",
1306 self->f_newlinetypes);
1307 return NULL;
1308 }
1309 }
1310
1311 static PyObject *
BZ2File_get_closed(BZ2FileObject * self,void * closure)1312 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1313 {
1314 return PyInt_FromLong(self->mode == MODE_CLOSED);
1315 }
1316
1317 static PyObject *
BZ2File_get_mode(BZ2FileObject * self,void * closure)1318 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1319 {
1320 return PyObject_GetAttrString(self->file, "mode");
1321 }
1322
1323 static PyObject *
BZ2File_get_name(BZ2FileObject * self,void * closure)1324 BZ2File_get_name(BZ2FileObject *self, void *closure)
1325 {
1326 return PyObject_GetAttrString(self->file, "name");
1327 }
1328
1329 static PyGetSetDef BZ2File_getset[] = {
1330 {"closed", (getter)BZ2File_get_closed, NULL,
1331 "True if the file is closed"},
1332 {"newlines", (getter)BZ2File_get_newlines, NULL,
1333 "end-of-line convention used in this file"},
1334 {"mode", (getter)BZ2File_get_mode, NULL,
1335 "file mode ('r', 'w', or 'U')"},
1336 {"name", (getter)BZ2File_get_name, NULL,
1337 "file name"},
1338 {NULL} /* Sentinel */
1339 };
1340
1341
1342 /* ===================================================================== */
1343 /* Members of BZ2File_Type. */
1344
1345 #undef OFF
1346 #define OFF(x) offsetof(BZ2FileObject, x)
1347
1348 static PyMemberDef BZ2File_members[] = {
1349 {"softspace", T_INT, OFF(f_softspace), 0,
1350 "flag indicating that a space needs to be printed; used by print"},
1351 {NULL} /* Sentinel */
1352 };
1353
1354 /* ===================================================================== */
1355 /* Slot definitions for BZ2File_Type. */
1356
1357 static int
BZ2File_init(BZ2FileObject * self,PyObject * args,PyObject * kwargs)1358 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1359 {
1360 static char *kwlist[] = {"filename", "mode", "buffering",
1361 "compresslevel", 0};
1362 PyObject *name;
1363 char *mode = "r";
1364 int buffering = -1;
1365 int compresslevel = 9;
1366 int bzerror;
1367 int mode_char = 0;
1368
1369 self->size = -1;
1370
1371 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1372 kwlist, &name, &mode, &buffering,
1373 &compresslevel))
1374 return -1;
1375
1376 if (compresslevel < 1 || compresslevel > 9) {
1377 PyErr_SetString(PyExc_ValueError,
1378 "compresslevel must be between 1 and 9");
1379 return -1;
1380 }
1381
1382 for (;;) {
1383 int error = 0;
1384 switch (*mode) {
1385 case 'r':
1386 case 'w':
1387 if (mode_char)
1388 error = 1;
1389 mode_char = *mode;
1390 break;
1391
1392 case 'b':
1393 break;
1394
1395 case 'U':
1396 #ifdef __VMS
1397 self->f_univ_newline = 0;
1398 #else
1399 self->f_univ_newline = 1;
1400 #endif
1401 break;
1402
1403 default:
1404 error = 1;
1405 break;
1406 }
1407 if (error) {
1408 PyErr_Format(PyExc_ValueError,
1409 "invalid mode char %c", *mode);
1410 return -1;
1411 }
1412 mode++;
1413 if (*mode == '\0')
1414 break;
1415 }
1416
1417 if (mode_char == 0) {
1418 mode_char = 'r';
1419 }
1420
1421 mode = (mode_char == 'r') ? "rb" : "wb";
1422
1423 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1424 name, mode, buffering);
1425 if (self->file == NULL)
1426 return -1;
1427
1428 /* From now on, we have stuff to dealloc, so jump to error label
1429 * instead of returning */
1430
1431 #ifdef WITH_THREAD
1432 self->lock = PyThread_allocate_lock();
1433 if (!self->lock) {
1434 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1435 goto error;
1436 }
1437 #endif
1438
1439 if (mode_char == 'r')
1440 self->fp = BZ2_bzReadOpen(&bzerror,
1441 PyFile_AsFile(self->file),
1442 0, 0, NULL, 0);
1443 else
1444 self->fp = BZ2_bzWriteOpen(&bzerror,
1445 PyFile_AsFile(self->file),
1446 compresslevel, 0, 0);
1447
1448 if (bzerror != BZ_OK) {
1449 Util_CatchBZ2Error(bzerror);
1450 goto error;
1451 }
1452 PyFile_IncUseCount((PyFileObject *)self->file);
1453
1454 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1455
1456 return 0;
1457
1458 error:
1459 Py_CLEAR(self->file);
1460 #ifdef WITH_THREAD
1461 if (self->lock) {
1462 PyThread_free_lock(self->lock);
1463 self->lock = NULL;
1464 }
1465 #endif
1466 return -1;
1467 }
1468
1469 static void
BZ2File_dealloc(BZ2FileObject * self)1470 BZ2File_dealloc(BZ2FileObject *self)
1471 {
1472 int bzerror;
1473 #ifdef WITH_THREAD
1474 if (self->lock)
1475 PyThread_free_lock(self->lock);
1476 #endif
1477 switch (self->mode) {
1478 case MODE_READ:
1479 case MODE_READ_EOF:
1480 BZ2_bzReadClose(&bzerror, self->fp);
1481 break;
1482 case MODE_WRITE:
1483 BZ2_bzWriteClose(&bzerror, self->fp,
1484 0, NULL, NULL);
1485 break;
1486 }
1487 if (self->fp != NULL && self->file != NULL)
1488 PyFile_DecUseCount((PyFileObject *)self->file);
1489 self->fp = NULL;
1490 Util_DropReadAhead(self);
1491 Py_XDECREF(self->file);
1492 Py_TYPE(self)->tp_free((PyObject *)self);
1493 }
1494
1495 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1496 static PyObject *
BZ2File_getiter(BZ2FileObject * self)1497 BZ2File_getiter(BZ2FileObject *self)
1498 {
1499 if (self->mode == MODE_CLOSED) {
1500 PyErr_SetString(PyExc_ValueError,
1501 "I/O operation on closed file");
1502 return NULL;
1503 }
1504 Py_INCREF((PyObject*)self);
1505 return (PyObject *)self;
1506 }
1507
1508 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1509 #define READAHEAD_BUFSIZE 8192
1510 static PyObject *
BZ2File_iternext(BZ2FileObject * self)1511 BZ2File_iternext(BZ2FileObject *self)
1512 {
1513 PyStringObject* ret;
1514 ACQUIRE_LOCK(self);
1515 if (self->mode == MODE_CLOSED) {
1516 RELEASE_LOCK(self);
1517 PyErr_SetString(PyExc_ValueError,
1518 "I/O operation on closed file");
1519 return NULL;
1520 }
1521 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1522 RELEASE_LOCK(self);
1523 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1524 Py_XDECREF(ret);
1525 return NULL;
1526 }
1527 return (PyObject *)ret;
1528 }
1529
1530 /* ===================================================================== */
1531 /* BZ2File_Type definition. */
1532
1533 PyDoc_VAR(BZ2File__doc__) =
1534 PyDoc_STR(
1535 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1536 \n\
1537 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1538 writing. When opened for writing, the file will be created if it doesn't\n\
1539 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1540 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1541 is given, must be a number between 1 and 9.\n\
1542 ")
1543 PyDoc_STR(
1544 "\n\
1545 Add a 'U' to mode to open the file for input with universal newline\n\
1546 support. Any line ending in the input file will be seen as a '\\n' in\n\
1547 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1548 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1549 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1550 newlines are available only when reading.\n\
1551 ")
1552 ;
1553
1554 static PyTypeObject BZ2File_Type = {
1555 PyVarObject_HEAD_INIT(NULL, 0)
1556 "bz2.BZ2File", /*tp_name*/
1557 sizeof(BZ2FileObject), /*tp_basicsize*/
1558 0, /*tp_itemsize*/
1559 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1560 0, /*tp_print*/
1561 0, /*tp_getattr*/
1562 0, /*tp_setattr*/
1563 0, /*tp_compare*/
1564 0, /*tp_repr*/
1565 0, /*tp_as_number*/
1566 0, /*tp_as_sequence*/
1567 0, /*tp_as_mapping*/
1568 0, /*tp_hash*/
1569 0, /*tp_call*/
1570 0, /*tp_str*/
1571 PyObject_GenericGetAttr,/*tp_getattro*/
1572 PyObject_GenericSetAttr,/*tp_setattro*/
1573 0, /*tp_as_buffer*/
1574 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1575 BZ2File__doc__, /*tp_doc*/
1576 0, /*tp_traverse*/
1577 0, /*tp_clear*/
1578 0, /*tp_richcompare*/
1579 0, /*tp_weaklistoffset*/
1580 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1581 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1582 BZ2File_methods, /*tp_methods*/
1583 BZ2File_members, /*tp_members*/
1584 BZ2File_getset, /*tp_getset*/
1585 0, /*tp_base*/
1586 0, /*tp_dict*/
1587 0, /*tp_descr_get*/
1588 0, /*tp_descr_set*/
1589 0, /*tp_dictoffset*/
1590 (initproc)BZ2File_init, /*tp_init*/
1591 PyType_GenericAlloc, /*tp_alloc*/
1592 PyType_GenericNew, /*tp_new*/
1593 _PyObject_Del, /*tp_free*/
1594 0, /*tp_is_gc*/
1595 };
1596
1597
1598 /* ===================================================================== */
1599 /* Methods of BZ2Comp. */
1600
1601 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1602 "compress(data) -> string\n\
1603 \n\
1604 Provide more data to the compressor object. It will return chunks of\n\
1605 compressed data whenever possible. When you've finished providing data\n\
1606 to compress, call the flush() method to finish the compression process,\n\
1607 and return what is left in the internal buffers.\n\
1608 ");
1609
1610 static PyObject *
BZ2Comp_compress(BZ2CompObject * self,PyObject * args)1611 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1612 {
1613 Py_buffer pdata;
1614 size_t input_left;
1615 size_t output_size = 0;
1616 PyObject *ret = NULL;
1617 bz_stream *bzs = &self->bzs;
1618 int bzerror;
1619
1620 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1621 return NULL;
1622
1623 if (pdata.len == 0) {
1624 PyBuffer_Release(&pdata);
1625 return PyString_FromString("");
1626 }
1627
1628 ACQUIRE_LOCK(self);
1629 if (!self->running) {
1630 PyErr_SetString(PyExc_ValueError,
1631 "this object was already flushed");
1632 goto error;
1633 }
1634
1635 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1636 if (!ret)
1637 goto error;
1638
1639 bzs->next_in = pdata.buf;
1640 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1641 input_left = pdata.len - bzs->avail_in;
1642
1643 bzs->next_out = BUF(ret);
1644 bzs->avail_out = PyString_GET_SIZE(ret);
1645
1646 for (;;) {
1647 char *saved_next_out;
1648
1649 Py_BEGIN_ALLOW_THREADS
1650 saved_next_out = bzs->next_out;
1651 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1652 output_size += bzs->next_out - saved_next_out;
1653 Py_END_ALLOW_THREADS
1654
1655 if (bzerror != BZ_RUN_OK) {
1656 Util_CatchBZ2Error(bzerror);
1657 goto error;
1658 }
1659 if (bzs->avail_in == 0) {
1660 if (input_left == 0)
1661 break; /* no more input data */
1662 bzs->avail_in = MIN(input_left, UINT_MAX);
1663 input_left -= bzs->avail_in;
1664 }
1665 if (bzs->avail_out == 0) {
1666 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1667 if (buffer_left == 0) {
1668 if (Util_GrowBuffer(&ret) < 0) {
1669 BZ2_bzCompressEnd(bzs);
1670 goto error;
1671 }
1672 bzs->next_out = BUF(ret) + output_size;
1673 buffer_left = PyString_GET_SIZE(ret) - output_size;
1674 }
1675 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1676 }
1677 }
1678
1679 if (_PyString_Resize(&ret, output_size) < 0)
1680 goto error;
1681
1682 RELEASE_LOCK(self);
1683 PyBuffer_Release(&pdata);
1684 return ret;
1685
1686 error:
1687 RELEASE_LOCK(self);
1688 PyBuffer_Release(&pdata);
1689 Py_XDECREF(ret);
1690 return NULL;
1691 }
1692
1693 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694 "flush() -> string\n\
1695 \n\
1696 Finish the compression process and return what is left in internal buffers.\n\
1697 You must not use the compressor object after calling this method.\n\
1698 ");
1699
1700 static PyObject *
BZ2Comp_flush(BZ2CompObject * self)1701 BZ2Comp_flush(BZ2CompObject *self)
1702 {
1703 size_t output_size = 0;
1704 PyObject *ret = NULL;
1705 bz_stream *bzs = &self->bzs;
1706 int bzerror;
1707
1708 ACQUIRE_LOCK(self);
1709 if (!self->running) {
1710 PyErr_SetString(PyExc_ValueError, "object was already flushed");
1711 goto error;
1712 }
1713 self->running = 0;
1714
1715 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1716 if (!ret)
1717 goto error;
1718
1719 bzs->next_out = BUF(ret);
1720 bzs->avail_out = PyString_GET_SIZE(ret);
1721
1722 for (;;) {
1723 char *saved_next_out;
1724
1725 Py_BEGIN_ALLOW_THREADS
1726 saved_next_out = bzs->next_out;
1727 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1728 output_size += bzs->next_out - saved_next_out;
1729 Py_END_ALLOW_THREADS
1730
1731 if (bzerror == BZ_STREAM_END) {
1732 break;
1733 } else if (bzerror != BZ_FINISH_OK) {
1734 Util_CatchBZ2Error(bzerror);
1735 goto error;
1736 }
1737 if (bzs->avail_out == 0) {
1738 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1739 if (buffer_left == 0) {
1740 if (Util_GrowBuffer(&ret) < 0)
1741 goto error;
1742 bzs->next_out = BUF(ret) + output_size;
1743 buffer_left = PyString_GET_SIZE(ret) - output_size;
1744 }
1745 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1746 }
1747 }
1748
1749 if (output_size != PyString_GET_SIZE(ret))
1750 if (_PyString_Resize(&ret, output_size) < 0)
1751 goto error;
1752
1753 RELEASE_LOCK(self);
1754 return ret;
1755
1756 error:
1757 RELEASE_LOCK(self);
1758 Py_XDECREF(ret);
1759 return NULL;
1760 }
1761
1762 static PyMethodDef BZ2Comp_methods[] = {
1763 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1764 BZ2Comp_compress__doc__},
1765 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1766 BZ2Comp_flush__doc__},
1767 {NULL, NULL} /* sentinel */
1768 };
1769
1770
1771 /* ===================================================================== */
1772 /* Slot definitions for BZ2Comp_Type. */
1773
1774 static int
BZ2Comp_init(BZ2CompObject * self,PyObject * args,PyObject * kwargs)1775 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1776 {
1777 int compresslevel = 9;
1778 int bzerror;
1779 static char *kwlist[] = {"compresslevel", 0};
1780
1781 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1782 kwlist, &compresslevel))
1783 return -1;
1784
1785 if (compresslevel < 1 || compresslevel > 9) {
1786 PyErr_SetString(PyExc_ValueError,
1787 "compresslevel must be between 1 and 9");
1788 goto error;
1789 }
1790
1791 #ifdef WITH_THREAD
1792 self->lock = PyThread_allocate_lock();
1793 if (!self->lock) {
1794 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1795 goto error;
1796 }
1797 #endif
1798
1799 memset(&self->bzs, 0, sizeof(bz_stream));
1800 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1801 if (bzerror != BZ_OK) {
1802 Util_CatchBZ2Error(bzerror);
1803 goto error;
1804 }
1805
1806 self->running = 1;
1807
1808 return 0;
1809 error:
1810 #ifdef WITH_THREAD
1811 if (self->lock) {
1812 PyThread_free_lock(self->lock);
1813 self->lock = NULL;
1814 }
1815 #endif
1816 return -1;
1817 }
1818
1819 static void
BZ2Comp_dealloc(BZ2CompObject * self)1820 BZ2Comp_dealloc(BZ2CompObject *self)
1821 {
1822 #ifdef WITH_THREAD
1823 if (self->lock)
1824 PyThread_free_lock(self->lock);
1825 #endif
1826 BZ2_bzCompressEnd(&self->bzs);
1827 Py_TYPE(self)->tp_free((PyObject *)self);
1828 }
1829
1830
1831 /* ===================================================================== */
1832 /* BZ2Comp_Type definition. */
1833
1834 PyDoc_STRVAR(BZ2Comp__doc__,
1835 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1836 \n\
1837 Create a new compressor object. This object may be used to compress\n\
1838 data sequentially. If you want to compress data in one shot, use the\n\
1839 compress() function instead. The compresslevel parameter, if given,\n\
1840 must be a number between 1 and 9.\n\
1841 ");
1842
1843 static PyTypeObject BZ2Comp_Type = {
1844 PyVarObject_HEAD_INIT(NULL, 0)
1845 "bz2.BZ2Compressor", /*tp_name*/
1846 sizeof(BZ2CompObject), /*tp_basicsize*/
1847 0, /*tp_itemsize*/
1848 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1849 0, /*tp_print*/
1850 0, /*tp_getattr*/
1851 0, /*tp_setattr*/
1852 0, /*tp_compare*/
1853 0, /*tp_repr*/
1854 0, /*tp_as_number*/
1855 0, /*tp_as_sequence*/
1856 0, /*tp_as_mapping*/
1857 0, /*tp_hash*/
1858 0, /*tp_call*/
1859 0, /*tp_str*/
1860 PyObject_GenericGetAttr,/*tp_getattro*/
1861 PyObject_GenericSetAttr,/*tp_setattro*/
1862 0, /*tp_as_buffer*/
1863 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1864 BZ2Comp__doc__, /*tp_doc*/
1865 0, /*tp_traverse*/
1866 0, /*tp_clear*/
1867 0, /*tp_richcompare*/
1868 0, /*tp_weaklistoffset*/
1869 0, /*tp_iter*/
1870 0, /*tp_iternext*/
1871 BZ2Comp_methods, /*tp_methods*/
1872 0, /*tp_members*/
1873 0, /*tp_getset*/
1874 0, /*tp_base*/
1875 0, /*tp_dict*/
1876 0, /*tp_descr_get*/
1877 0, /*tp_descr_set*/
1878 0, /*tp_dictoffset*/
1879 (initproc)BZ2Comp_init, /*tp_init*/
1880 PyType_GenericAlloc, /*tp_alloc*/
1881 PyType_GenericNew, /*tp_new*/
1882 _PyObject_Del, /*tp_free*/
1883 0, /*tp_is_gc*/
1884 };
1885
1886
1887 /* ===================================================================== */
1888 /* Members of BZ2Decomp. */
1889
1890 #undef OFF
1891 #define OFF(x) offsetof(BZ2DecompObject, x)
1892
1893 static PyMemberDef BZ2Decomp_members[] = {
1894 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1895 {NULL} /* Sentinel */
1896 };
1897
1898
1899 /* ===================================================================== */
1900 /* Methods of BZ2Decomp. */
1901
1902 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1903 "decompress(data) -> string\n\
1904 \n\
1905 Provide more data to the decompressor object. It will return chunks\n\
1906 of decompressed data whenever possible. If you try to decompress data\n\
1907 after the end of stream is found, EOFError will be raised. If any data\n\
1908 was found after the end of stream, it'll be ignored and saved in\n\
1909 unused_data attribute.\n\
1910 ");
1911
1912 static PyObject *
BZ2Decomp_decompress(BZ2DecompObject * self,PyObject * args)1913 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1914 {
1915 Py_buffer pdata;
1916 size_t input_left;
1917 size_t output_size = 0;
1918 PyObject *ret = NULL;
1919 bz_stream *bzs = &self->bzs;
1920 int bzerror;
1921
1922 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1923 return NULL;
1924
1925 ACQUIRE_LOCK(self);
1926 if (!self->running) {
1927 PyErr_SetString(PyExc_EOFError, "end of stream was "
1928 "already found");
1929 goto error;
1930 }
1931
1932 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1933 if (!ret)
1934 goto error;
1935
1936 bzs->next_in = pdata.buf;
1937 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1938 input_left = pdata.len - bzs->avail_in;
1939
1940 bzs->next_out = BUF(ret);
1941 bzs->avail_out = PyString_GET_SIZE(ret);
1942
1943 for (;;) {
1944 char *saved_next_out;
1945
1946 Py_BEGIN_ALLOW_THREADS
1947 saved_next_out = bzs->next_out;
1948 bzerror = BZ2_bzDecompress(bzs);
1949 output_size += bzs->next_out - saved_next_out;
1950 Py_END_ALLOW_THREADS
1951
1952 if (bzerror == BZ_STREAM_END) {
1953 self->running = 0;
1954 input_left += bzs->avail_in;
1955 if (input_left != 0) {
1956 Py_SETREF(self->unused_data,
1957 PyString_FromStringAndSize(bzs->next_in, input_left));
1958 if (self->unused_data == NULL)
1959 goto error;
1960 }
1961 break;
1962 }
1963 if (bzerror != BZ_OK) {
1964 Util_CatchBZ2Error(bzerror);
1965 goto error;
1966 }
1967 if (bzs->avail_in == 0) {
1968 if (input_left == 0)
1969 break; /* no more input data */
1970 bzs->avail_in = MIN(input_left, UINT_MAX);
1971 input_left -= bzs->avail_in;
1972 }
1973 if (bzs->avail_out == 0) {
1974 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1975 if (buffer_left == 0) {
1976 if (Util_GrowBuffer(&ret) < 0) {
1977 BZ2_bzDecompressEnd(bzs);
1978 goto error;
1979 }
1980 bzs->next_out = BUF(ret) + output_size;
1981 buffer_left = PyString_GET_SIZE(ret) - output_size;
1982 }
1983 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1984 }
1985 }
1986
1987 if (output_size != PyString_GET_SIZE(ret))
1988 if (_PyString_Resize(&ret, output_size) < 0)
1989 goto error;
1990
1991 RELEASE_LOCK(self);
1992 PyBuffer_Release(&pdata);
1993 return ret;
1994
1995 error:
1996 RELEASE_LOCK(self);
1997 PyBuffer_Release(&pdata);
1998 Py_XDECREF(ret);
1999 return NULL;
2000 }
2001
2002 static PyMethodDef BZ2Decomp_methods[] = {
2003 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
2004 {NULL, NULL} /* sentinel */
2005 };
2006
2007
2008 /* ===================================================================== */
2009 /* Slot definitions for BZ2Decomp_Type. */
2010
2011 static int
BZ2Decomp_init(BZ2DecompObject * self,PyObject * args,PyObject * kwargs)2012 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2013 {
2014 int bzerror;
2015
2016 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2017 return -1;
2018
2019 #ifdef WITH_THREAD
2020 self->lock = PyThread_allocate_lock();
2021 if (!self->lock) {
2022 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2023 goto error;
2024 }
2025 #endif
2026
2027 self->unused_data = PyString_FromString("");
2028 if (!self->unused_data)
2029 goto error;
2030
2031 memset(&self->bzs, 0, sizeof(bz_stream));
2032 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2033 if (bzerror != BZ_OK) {
2034 Util_CatchBZ2Error(bzerror);
2035 goto error;
2036 }
2037
2038 self->running = 1;
2039
2040 return 0;
2041
2042 error:
2043 #ifdef WITH_THREAD
2044 if (self->lock) {
2045 PyThread_free_lock(self->lock);
2046 self->lock = NULL;
2047 }
2048 #endif
2049 Py_CLEAR(self->unused_data);
2050 return -1;
2051 }
2052
2053 static void
BZ2Decomp_dealloc(BZ2DecompObject * self)2054 BZ2Decomp_dealloc(BZ2DecompObject *self)
2055 {
2056 #ifdef WITH_THREAD
2057 if (self->lock)
2058 PyThread_free_lock(self->lock);
2059 #endif
2060 Py_XDECREF(self->unused_data);
2061 BZ2_bzDecompressEnd(&self->bzs);
2062 Py_TYPE(self)->tp_free((PyObject *)self);
2063 }
2064
2065
2066 /* ===================================================================== */
2067 /* BZ2Decomp_Type definition. */
2068
2069 PyDoc_STRVAR(BZ2Decomp__doc__,
2070 "BZ2Decompressor() -> decompressor object\n\
2071 \n\
2072 Create a new decompressor object. This object may be used to decompress\n\
2073 data sequentially. If you want to decompress data in one shot, use the\n\
2074 decompress() function instead.\n\
2075 ");
2076
2077 static PyTypeObject BZ2Decomp_Type = {
2078 PyVarObject_HEAD_INIT(NULL, 0)
2079 "bz2.BZ2Decompressor", /*tp_name*/
2080 sizeof(BZ2DecompObject), /*tp_basicsize*/
2081 0, /*tp_itemsize*/
2082 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2083 0, /*tp_print*/
2084 0, /*tp_getattr*/
2085 0, /*tp_setattr*/
2086 0, /*tp_compare*/
2087 0, /*tp_repr*/
2088 0, /*tp_as_number*/
2089 0, /*tp_as_sequence*/
2090 0, /*tp_as_mapping*/
2091 0, /*tp_hash*/
2092 0, /*tp_call*/
2093 0, /*tp_str*/
2094 PyObject_GenericGetAttr,/*tp_getattro*/
2095 PyObject_GenericSetAttr,/*tp_setattro*/
2096 0, /*tp_as_buffer*/
2097 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2098 BZ2Decomp__doc__, /*tp_doc*/
2099 0, /*tp_traverse*/
2100 0, /*tp_clear*/
2101 0, /*tp_richcompare*/
2102 0, /*tp_weaklistoffset*/
2103 0, /*tp_iter*/
2104 0, /*tp_iternext*/
2105 BZ2Decomp_methods, /*tp_methods*/
2106 BZ2Decomp_members, /*tp_members*/
2107 0, /*tp_getset*/
2108 0, /*tp_base*/
2109 0, /*tp_dict*/
2110 0, /*tp_descr_get*/
2111 0, /*tp_descr_set*/
2112 0, /*tp_dictoffset*/
2113 (initproc)BZ2Decomp_init, /*tp_init*/
2114 PyType_GenericAlloc, /*tp_alloc*/
2115 PyType_GenericNew, /*tp_new*/
2116 _PyObject_Del, /*tp_free*/
2117 0, /*tp_is_gc*/
2118 };
2119
2120
2121 /* ===================================================================== */
2122 /* Module functions. */
2123
2124 PyDoc_STRVAR(bz2_compress__doc__,
2125 "compress(data [, compresslevel=9]) -> string\n\
2126 \n\
2127 Compress data in one shot. If you want to compress data sequentially,\n\
2128 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2129 given, must be a number between 1 and 9.\n\
2130 ");
2131
2132 static PyObject *
bz2_compress(PyObject * self,PyObject * args,PyObject * kwargs)2133 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2134 {
2135 int compresslevel=9;
2136 int action;
2137 Py_buffer pdata;
2138 size_t input_left;
2139 size_t output_size = 0;
2140 PyObject *ret = NULL;
2141 bz_stream _bzs;
2142 bz_stream *bzs = &_bzs;
2143 int bzerror;
2144 static char *kwlist[] = {"data", "compresslevel", 0};
2145
2146 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2147 kwlist, &pdata,
2148 &compresslevel))
2149 return NULL;
2150
2151 if (compresslevel < 1 || compresslevel > 9) {
2152 PyErr_SetString(PyExc_ValueError,
2153 "compresslevel must be between 1 and 9");
2154 PyBuffer_Release(&pdata);
2155 return NULL;
2156 }
2157
2158 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2159 if (!ret) {
2160 PyBuffer_Release(&pdata);
2161 return NULL;
2162 }
2163
2164 memset(bzs, 0, sizeof(bz_stream));
2165
2166 bzs->next_in = pdata.buf;
2167 bzs->avail_in = MIN(pdata.len, UINT_MAX);
2168 input_left = pdata.len - bzs->avail_in;
2169
2170 bzs->next_out = BUF(ret);
2171 bzs->avail_out = PyString_GET_SIZE(ret);
2172
2173 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2174 if (bzerror != BZ_OK) {
2175 Util_CatchBZ2Error(bzerror);
2176 PyBuffer_Release(&pdata);
2177 Py_DECREF(ret);
2178 return NULL;
2179 }
2180
2181 action = input_left > 0 ? BZ_RUN : BZ_FINISH;
2182
2183 for (;;) {
2184 char *saved_next_out;
2185
2186 Py_BEGIN_ALLOW_THREADS
2187 saved_next_out = bzs->next_out;
2188 bzerror = BZ2_bzCompress(bzs, action);
2189 output_size += bzs->next_out - saved_next_out;
2190 Py_END_ALLOW_THREADS
2191
2192 if (bzerror == BZ_STREAM_END) {
2193 break;
2194 } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
2195 BZ2_bzCompressEnd(bzs);
2196 Util_CatchBZ2Error(bzerror);
2197 PyBuffer_Release(&pdata);
2198 Py_DECREF(ret);
2199 return NULL;
2200 }
2201 if (action == BZ_RUN && bzs->avail_in == 0) {
2202 if (input_left == 0) {
2203 action = BZ_FINISH;
2204 } else {
2205 bzs->avail_in = MIN(input_left, UINT_MAX);
2206 input_left -= bzs->avail_in;
2207 }
2208 }
2209 if (bzs->avail_out == 0) {
2210 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2211 if (buffer_left == 0) {
2212 if (Util_GrowBuffer(&ret) < 0) {
2213 BZ2_bzCompressEnd(bzs);
2214 PyBuffer_Release(&pdata);
2215 return NULL;
2216 }
2217 bzs->next_out = BUF(ret) + output_size;
2218 buffer_left = PyString_GET_SIZE(ret) - output_size;
2219 }
2220 bzs->avail_out = MIN(buffer_left, UINT_MAX);
2221 }
2222 }
2223
2224 if (output_size != PyString_GET_SIZE(ret))
2225 _PyString_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2226
2227 BZ2_bzCompressEnd(bzs);
2228 PyBuffer_Release(&pdata);
2229 return ret;
2230 }
2231
2232 PyDoc_STRVAR(bz2_decompress__doc__,
2233 "decompress(data) -> decompressed data\n\
2234 \n\
2235 Decompress data in one shot. If you want to decompress data sequentially,\n\
2236 use an instance of BZ2Decompressor instead.\n\
2237 ");
2238
2239 static PyObject *
bz2_decompress(PyObject * self,PyObject * args)2240 bz2_decompress(PyObject *self, PyObject *args)
2241 {
2242 Py_buffer pdata;
2243 size_t input_left;
2244 size_t output_size = 0;
2245 PyObject *ret;
2246 bz_stream _bzs;
2247 bz_stream *bzs = &_bzs;
2248 int bzerror;
2249
2250 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2251 return NULL;
2252
2253 if (pdata.len == 0) {
2254 PyBuffer_Release(&pdata);
2255 return PyString_FromString("");
2256 }
2257
2258 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2259 if (!ret) {
2260 PyBuffer_Release(&pdata);
2261 return NULL;
2262 }
2263
2264 memset(bzs, 0, sizeof(bz_stream));
2265
2266 bzs->next_in = pdata.buf;
2267 bzs->avail_in = MIN(pdata.len, UINT_MAX);
2268 input_left = pdata.len - bzs->avail_in;
2269
2270 bzs->next_out = BUF(ret);
2271 bzs->avail_out = PyString_GET_SIZE(ret);
2272
2273 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2274 if (bzerror != BZ_OK) {
2275 Util_CatchBZ2Error(bzerror);
2276 Py_DECREF(ret);
2277 PyBuffer_Release(&pdata);
2278 return NULL;
2279 }
2280
2281 for (;;) {
2282 char *saved_next_out;
2283
2284 Py_BEGIN_ALLOW_THREADS
2285 saved_next_out = bzs->next_out;
2286 bzerror = BZ2_bzDecompress(bzs);
2287 output_size += bzs->next_out - saved_next_out;
2288 Py_END_ALLOW_THREADS
2289
2290 if (bzerror == BZ_STREAM_END) {
2291 break;
2292 } else if (bzerror != BZ_OK) {
2293 BZ2_bzDecompressEnd(bzs);
2294 Util_CatchBZ2Error(bzerror);
2295 PyBuffer_Release(&pdata);
2296 Py_DECREF(ret);
2297 return NULL;
2298 }
2299 if (bzs->avail_in == 0) {
2300 if (input_left == 0) {
2301 BZ2_bzDecompressEnd(bzs);
2302 PyErr_SetString(PyExc_ValueError,
2303 "couldn't find end of stream");
2304 PyBuffer_Release(&pdata);
2305 Py_DECREF(ret);
2306 return NULL;
2307 }
2308 bzs->avail_in = MIN(input_left, UINT_MAX);
2309 input_left -= bzs->avail_in;
2310 }
2311 if (bzs->avail_out == 0) {
2312 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2313 if (buffer_left == 0) {
2314 if (Util_GrowBuffer(&ret) < 0) {
2315 BZ2_bzDecompressEnd(bzs);
2316 PyBuffer_Release(&pdata);
2317 return NULL;
2318 }
2319 bzs->next_out = BUF(ret) + output_size;
2320 buffer_left = PyString_GET_SIZE(ret) - output_size;
2321 }
2322 bzs->avail_out = MIN(buffer_left, UINT_MAX);
2323 }
2324 }
2325
2326 if (output_size != PyString_GET_SIZE(ret))
2327 _PyString_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2328
2329 BZ2_bzDecompressEnd(bzs);
2330 PyBuffer_Release(&pdata);
2331 return ret;
2332 }
2333
2334 static PyMethodDef bz2_methods[] = {
2335 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2336 bz2_compress__doc__},
2337 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2338 bz2_decompress__doc__},
2339 {NULL, NULL} /* sentinel */
2340 };
2341
2342 /* ===================================================================== */
2343 /* Initialization function. */
2344
2345 PyDoc_STRVAR(bz2__doc__,
2346 "The python bz2 module provides a comprehensive interface for\n\
2347 the bz2 compression library. It implements a complete file\n\
2348 interface, one shot (de)compression functions, and types for\n\
2349 sequential (de)compression.\n\
2350 ");
2351
2352 PyMODINIT_FUNC
initbz2(void)2353 initbz2(void)
2354 {
2355 PyObject *m;
2356
2357 if (PyType_Ready(&BZ2File_Type) < 0)
2358 return;
2359 if (PyType_Ready(&BZ2Comp_Type) < 0)
2360 return;
2361 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2362 return;
2363
2364 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2365 if (m == NULL)
2366 return;
2367
2368 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2369
2370 Py_INCREF(&BZ2File_Type);
2371 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2372
2373 Py_INCREF(&BZ2Comp_Type);
2374 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2375
2376 Py_INCREF(&BZ2Decomp_Type);
2377 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2378 }
2379