• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 
3 python-bz2 - python bz2 library interface
4 
5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
7 
8 */
9 
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
14 
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18 
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23 ";
24 
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36 
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38 
39 #define MODE_CLOSED   0
40 #define MODE_READ     1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE    3
43 
44 
45 #ifndef BZ_CONFIG_ERROR
46 
47 #define BZ2_bzRead bzRead
48 #define BZ2_bzReadOpen bzReadOpen
49 #define BZ2_bzReadClose bzReadClose
50 #define BZ2_bzWrite bzWrite
51 #define BZ2_bzWriteOpen bzWriteOpen
52 #define BZ2_bzWriteClose bzWriteClose
53 #define BZ2_bzCompress bzCompress
54 #define BZ2_bzCompressInit bzCompressInit
55 #define BZ2_bzCompressEnd bzCompressEnd
56 #define BZ2_bzDecompress bzDecompress
57 #define BZ2_bzDecompressInit bzDecompressInit
58 #define BZ2_bzDecompressEnd bzDecompressEnd
59 
60 #endif /* ! BZ_CONFIG_ERROR */
61 
62 
63 #ifdef WITH_THREAD
64 #define ACQUIRE_LOCK(obj) do { \
65     if (!PyThread_acquire_lock(obj->lock, 0)) { \
66         Py_BEGIN_ALLOW_THREADS \
67         PyThread_acquire_lock(obj->lock, 1); \
68         Py_END_ALLOW_THREADS \
69     } } while(0)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71 #else
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
74 #endif
75 
76 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
77 
78 /* Bits in f_newlinetypes */
79 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
80 #define NEWLINE_CR 1            /* \r newline seen */
81 #define NEWLINE_LF 2            /* \n newline seen */
82 #define NEWLINE_CRLF 4          /* \r\n newline seen */
83 
84 /* ===================================================================== */
85 /* Structure definitions. */
86 
87 typedef struct {
88     PyObject_HEAD
89     PyObject *file;
90 
91     char* f_buf;                /* Allocated readahead buffer */
92     char* f_bufend;             /* Points after last occupied position */
93     char* f_bufptr;             /* Current buffer position */
94 
95     int f_softspace;            /* Flag used by 'print' command */
96 
97     int f_univ_newline;         /* Handle any newline convention */
98     int f_newlinetypes;         /* Types of newlines seen */
99     int f_skipnextlf;           /* Skip next \n */
100 
101     BZFILE *fp;
102     int mode;
103     Py_off_t pos;
104     Py_off_t size;
105 #ifdef WITH_THREAD
106     PyThread_type_lock lock;
107 #endif
108 } BZ2FileObject;
109 
110 typedef struct {
111     PyObject_HEAD
112     bz_stream bzs;
113     int running;
114 #ifdef WITH_THREAD
115     PyThread_type_lock lock;
116 #endif
117 } BZ2CompObject;
118 
119 typedef struct {
120     PyObject_HEAD
121     bz_stream bzs;
122     int running;
123     PyObject *unused_data;
124 #ifdef WITH_THREAD
125     PyThread_type_lock lock;
126 #endif
127 } BZ2DecompObject;
128 
129 /* ===================================================================== */
130 /* Utility functions. */
131 
132 /* Refuse regular I/O if there's data in the iteration-buffer.
133  * Mixing them would cause data to arrive out of order, as the read*
134  * methods don't use the iteration buffer. */
135 static int
check_iterbuffered(BZ2FileObject * f)136 check_iterbuffered(BZ2FileObject *f)
137 {
138     if (f->f_buf != NULL &&
139         (f->f_bufend - f->f_bufptr) > 0 &&
140         f->f_buf[0] != '\0') {
141         PyErr_SetString(PyExc_ValueError,
142             "Mixing iteration and read methods would lose data");
143         return -1;
144     }
145     return 0;
146 }
147 
148 static int
Util_CatchBZ2Error(int bzerror)149 Util_CatchBZ2Error(int bzerror)
150 {
151     int ret = 0;
152     switch(bzerror) {
153         case BZ_OK:
154         case BZ_STREAM_END:
155             break;
156 
157 #ifdef BZ_CONFIG_ERROR
158         case BZ_CONFIG_ERROR:
159             PyErr_SetString(PyExc_SystemError,
160                             "the bz2 library was not compiled "
161                             "correctly");
162             ret = 1;
163             break;
164 #endif
165 
166         case BZ_PARAM_ERROR:
167             PyErr_SetString(PyExc_ValueError,
168                             "the bz2 library has received wrong "
169                             "parameters");
170             ret = 1;
171             break;
172 
173         case BZ_MEM_ERROR:
174             PyErr_NoMemory();
175             ret = 1;
176             break;
177 
178         case BZ_DATA_ERROR:
179         case BZ_DATA_ERROR_MAGIC:
180             PyErr_SetString(PyExc_IOError, "invalid data stream");
181             ret = 1;
182             break;
183 
184         case BZ_IO_ERROR:
185             PyErr_SetString(PyExc_IOError, "unknown IO error");
186             ret = 1;
187             break;
188 
189         case BZ_UNEXPECTED_EOF:
190             PyErr_SetString(PyExc_EOFError,
191                             "compressed file ended before the "
192                             "logical end-of-stream was detected");
193             ret = 1;
194             break;
195 
196         case BZ_SEQUENCE_ERROR:
197             PyErr_SetString(PyExc_RuntimeError,
198                             "wrong sequence of bz2 library "
199                             "commands used");
200             ret = 1;
201             break;
202     }
203     return ret;
204 }
205 
206 #if BUFSIZ < 8192
207 #define SMALLCHUNK 8192
208 #else
209 #define SMALLCHUNK BUFSIZ
210 #endif
211 
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
Util_NewBufferSize(size_t currentsize)214 Util_NewBufferSize(size_t currentsize)
215 {
216     /* Expand the buffer by an amount proportional to the current size,
217        giving us amortized linear-time behavior. Use a less-than-double
218        growth factor to avoid excessive allocation. */
219     return currentsize + (currentsize >> 3) + 6;
220 }
221 
222 static int
Util_GrowBuffer(PyObject ** buf)223 Util_GrowBuffer(PyObject **buf)
224 {
225     size_t size = PyString_GET_SIZE(*buf);
226     size_t new_size = Util_NewBufferSize(size);
227     if (new_size > size) {
228         return _PyString_Resize(buf, new_size);
229     } else {  /* overflow */
230         PyErr_SetString(PyExc_OverflowError,
231                         "Unable to allocate buffer - output too large");
232         return -1;
233     }
234 }
235 
236 /* This is a hacked version of Python's fileobject.c:get_line(). */
237 static PyObject *
Util_GetLine(BZ2FileObject * f,int n)238 Util_GetLine(BZ2FileObject *f, int n)
239 {
240     char c;
241     char *buf, *end;
242     size_t total_v_size;        /* total # of slots in buffer */
243     size_t used_v_size;         /* # used slots in buffer */
244     size_t increment;       /* amount to increment the buffer */
245     PyObject *v;
246     int bzerror;
247     int bytes_read;
248     int newlinetypes = f->f_newlinetypes;
249     int skipnextlf = f->f_skipnextlf;
250     int univ_newline = f->f_univ_newline;
251 
252     total_v_size = n > 0 ? n : 100;
253     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
254     if (v == NULL)
255         return NULL;
256 
257     buf = BUF(v);
258     end = buf + total_v_size;
259 
260     for (;;) {
261         Py_BEGIN_ALLOW_THREADS
262         while (buf != end) {
263             bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
264             f->pos++;
265             if (bytes_read == 0) break;
266             if (univ_newline) {
267                 if (skipnextlf) {
268                     skipnextlf = 0;
269                     if (c == '\n') {
270                         /* Seeing a \n here with skipnextlf true means we
271                          * saw a \r before.
272                          */
273                         newlinetypes |= NEWLINE_CRLF;
274                         if (bzerror != BZ_OK) break;
275                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276                         f->pos++;
277                         if (bytes_read == 0) break;
278                     } else {
279                         newlinetypes |= NEWLINE_CR;
280                     }
281                 }
282                 if (c == '\r') {
283                     skipnextlf = 1;
284                     c = '\n';
285                 } else if (c == '\n')
286                     newlinetypes |= NEWLINE_LF;
287             }
288             *buf++ = c;
289             if (bzerror != BZ_OK || c == '\n') break;
290         }
291         if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
292             newlinetypes |= NEWLINE_CR;
293         Py_END_ALLOW_THREADS
294         f->f_newlinetypes = newlinetypes;
295         f->f_skipnextlf = skipnextlf;
296         if (bzerror == BZ_STREAM_END) {
297             f->size = f->pos;
298             f->mode = MODE_READ_EOF;
299             break;
300         } else if (bzerror != BZ_OK) {
301             Util_CatchBZ2Error(bzerror);
302             Py_DECREF(v);
303             return NULL;
304         }
305         if (c == '\n')
306             break;
307         /* Must be because buf == end */
308         if (n > 0)
309             break;
310         used_v_size = total_v_size;
311         increment = total_v_size >> 2; /* mild exponential growth */
312         total_v_size += increment;
313         if (total_v_size > INT_MAX) {
314             PyErr_SetString(PyExc_OverflowError,
315                 "line is longer than a Python string can hold");
316             Py_DECREF(v);
317             return NULL;
318         }
319         if (_PyString_Resize(&v, total_v_size) < 0)
320             return NULL;
321         buf = BUF(v) + used_v_size;
322         end = BUF(v) + total_v_size;
323     }
324 
325     used_v_size = buf - BUF(v);
326     if (used_v_size != total_v_size)
327         _PyString_Resize(&v, used_v_size);
328     return v;
329 }
330 
331 /* This is a hacked version of Python's
332  * fileobject.c:Py_UniversalNewlineFread(). */
333 size_t
Util_UnivNewlineRead(int * bzerror,BZFILE * stream,char * buf,size_t n,BZ2FileObject * f)334 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
335                      char* buf, size_t n, BZ2FileObject *f)
336 {
337     char *dst = buf;
338     int newlinetypes, skipnextlf;
339 
340     assert(buf != NULL);
341     assert(stream != NULL);
342 
343     if (!f->f_univ_newline)
344         return BZ2_bzRead(bzerror, stream, buf, n);
345 
346     newlinetypes = f->f_newlinetypes;
347     skipnextlf = f->f_skipnextlf;
348 
349     /* Invariant:  n is the number of bytes remaining to be filled
350      * in the buffer.
351      */
352     while (n) {
353         size_t nread;
354         int shortread;
355         char *src = dst;
356 
357         nread = BZ2_bzRead(bzerror, stream, dst, n);
358         assert(nread <= n);
359         n -= nread; /* assuming 1 byte out for each in; will adjust */
360         shortread = n != 0;             /* true iff EOF or error */
361         while (nread--) {
362             char c = *src++;
363             if (c == '\r') {
364                 /* Save as LF and set flag to skip next LF. */
365                 *dst++ = '\n';
366                 skipnextlf = 1;
367             }
368             else if (skipnextlf && c == '\n') {
369                 /* Skip LF, and remember we saw CR LF. */
370                 skipnextlf = 0;
371                 newlinetypes |= NEWLINE_CRLF;
372                 ++n;
373             }
374             else {
375                 /* Normal char to be stored in buffer.  Also
376                  * update the newlinetypes flag if either this
377                  * is an LF or the previous char was a CR.
378                  */
379                 if (c == '\n')
380                     newlinetypes |= NEWLINE_LF;
381                 else if (skipnextlf)
382                     newlinetypes |= NEWLINE_CR;
383                 *dst++ = c;
384                 skipnextlf = 0;
385             }
386         }
387         if (shortread) {
388             /* If this is EOF, update type flags. */
389             if (skipnextlf && *bzerror == BZ_STREAM_END)
390                 newlinetypes |= NEWLINE_CR;
391             break;
392         }
393     }
394     f->f_newlinetypes = newlinetypes;
395     f->f_skipnextlf = skipnextlf;
396     return dst - buf;
397 }
398 
399 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400 static void
Util_DropReadAhead(BZ2FileObject * f)401 Util_DropReadAhead(BZ2FileObject *f)
402 {
403     if (f->f_buf != NULL) {
404         PyMem_Free(f->f_buf);
405         f->f_buf = NULL;
406     }
407 }
408 
409 /* This is a hacked version of Python's fileobject.c:readahead(). */
410 static int
Util_ReadAhead(BZ2FileObject * f,int bufsize)411 Util_ReadAhead(BZ2FileObject *f, int bufsize)
412 {
413     int chunksize;
414     int bzerror;
415 
416     if (f->f_buf != NULL) {
417         if((f->f_bufend - f->f_bufptr) >= 1)
418             return 0;
419         else
420             Util_DropReadAhead(f);
421     }
422     if (f->mode == MODE_READ_EOF) {
423         f->f_bufptr = f->f_buf;
424         f->f_bufend = f->f_buf;
425         return 0;
426     }
427     if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
428         PyErr_NoMemory();
429         return -1;
430     }
431     Py_BEGIN_ALLOW_THREADS
432     chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433                                      bufsize, f);
434     Py_END_ALLOW_THREADS
435     f->pos += chunksize;
436     if (bzerror == BZ_STREAM_END) {
437         f->size = f->pos;
438         f->mode = MODE_READ_EOF;
439     } else if (bzerror != BZ_OK) {
440         Util_CatchBZ2Error(bzerror);
441         Util_DropReadAhead(f);
442         return -1;
443     }
444     f->f_bufptr = f->f_buf;
445     f->f_bufend = f->f_buf + chunksize;
446     return 0;
447 }
448 
449 /* This is a hacked version of Python's
450  * fileobject.c:readahead_get_line_skip(). */
451 static PyStringObject *
Util_ReadAheadGetLineSkip(BZ2FileObject * f,int skip,int bufsize)452 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
453 {
454     PyStringObject* s;
455     char *bufptr;
456     char *buf;
457     int len;
458 
459     if (f->f_buf == NULL)
460         if (Util_ReadAhead(f, bufsize) < 0)
461             return NULL;
462 
463     len = f->f_bufend - f->f_bufptr;
464     if (len == 0)
465         return (PyStringObject *)
466             PyString_FromStringAndSize(NULL, skip);
467     bufptr = memchr(f->f_bufptr, '\n', len);
468     if (bufptr != NULL) {
469         bufptr++;                               /* Count the '\n' */
470         len = bufptr - f->f_bufptr;
471         s = (PyStringObject *)
472             PyString_FromStringAndSize(NULL, skip+len);
473         if (s == NULL)
474             return NULL;
475         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
476         f->f_bufptr = bufptr;
477         if (bufptr == f->f_bufend)
478             Util_DropReadAhead(f);
479     } else {
480         bufptr = f->f_bufptr;
481         buf = f->f_buf;
482         f->f_buf = NULL;                /* Force new readahead buffer */
483         s = Util_ReadAheadGetLineSkip(f, skip+len,
484                                       bufsize + (bufsize>>2));
485         if (s == NULL) {
486             PyMem_Free(buf);
487             return NULL;
488         }
489         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
490         PyMem_Free(buf);
491     }
492     return s;
493 }
494 
495 /* ===================================================================== */
496 /* Methods of BZ2File. */
497 
498 PyDoc_STRVAR(BZ2File_read__doc__,
499 "read([size]) -> string\n\
500 \n\
501 Read at most size uncompressed bytes, returned as a string. If the size\n\
502 argument is negative or omitted, read until EOF is reached.\n\
503 ");
504 
505 /* This is a hacked version of Python's fileobject.c:file_read(). */
506 static PyObject *
BZ2File_read(BZ2FileObject * self,PyObject * args)507 BZ2File_read(BZ2FileObject *self, PyObject *args)
508 {
509     long bytesrequested = -1;
510     size_t bytesread, buffersize, chunksize;
511     int bzerror;
512     PyObject *ret = NULL;
513 
514     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515         return NULL;
516 
517     ACQUIRE_LOCK(self);
518     switch (self->mode) {
519         case MODE_READ:
520             break;
521         case MODE_READ_EOF:
522             ret = PyString_FromString("");
523             goto cleanup;
524         case MODE_CLOSED:
525             PyErr_SetString(PyExc_ValueError,
526                             "I/O operation on closed file");
527             goto cleanup;
528         default:
529             PyErr_SetString(PyExc_IOError,
530                             "file is not ready for reading");
531             goto cleanup;
532     }
533 
534     /* refuse to mix with f.next() */
535     if (check_iterbuffered(self))
536         goto cleanup;
537 
538     if (bytesrequested < 0)
539         buffersize = Util_NewBufferSize((size_t)0);
540     else
541         buffersize = bytesrequested;
542     if (buffersize > INT_MAX) {
543         PyErr_SetString(PyExc_OverflowError,
544                         "requested number of bytes is "
545                         "more than a Python string can hold");
546         goto cleanup;
547     }
548     ret = PyString_FromStringAndSize((char *)NULL, buffersize);
549     if (ret == NULL)
550         goto cleanup;
551     bytesread = 0;
552 
553     for (;;) {
554         Py_BEGIN_ALLOW_THREADS
555         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
556                                          BUF(ret)+bytesread,
557                                          buffersize-bytesread,
558                                          self);
559         self->pos += chunksize;
560         Py_END_ALLOW_THREADS
561         bytesread += chunksize;
562         if (bzerror == BZ_STREAM_END) {
563             self->size = self->pos;
564             self->mode = MODE_READ_EOF;
565             break;
566         } else if (bzerror != BZ_OK) {
567             Util_CatchBZ2Error(bzerror);
568             Py_DECREF(ret);
569             ret = NULL;
570             goto cleanup;
571         }
572         if (bytesrequested < 0) {
573             buffersize = Util_NewBufferSize(buffersize);
574             if (_PyString_Resize(&ret, buffersize) < 0)
575                 goto cleanup;
576         } else {
577             break;
578         }
579     }
580     if (bytesread != buffersize)
581         _PyString_Resize(&ret, bytesread);
582 
583 cleanup:
584     RELEASE_LOCK(self);
585     return ret;
586 }
587 
588 PyDoc_STRVAR(BZ2File_readline__doc__,
589 "readline([size]) -> string\n\
590 \n\
591 Return the next line from the file, as a string, retaining newline.\n\
592 A non-negative size argument will limit the maximum number of bytes to\n\
593 return (an incomplete line may be returned then). Return an empty\n\
594 string at EOF.\n\
595 ");
596 
597 static PyObject *
BZ2File_readline(BZ2FileObject * self,PyObject * args)598 BZ2File_readline(BZ2FileObject *self, PyObject *args)
599 {
600     PyObject *ret = NULL;
601     int sizehint = -1;
602 
603     if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
604         return NULL;
605 
606     ACQUIRE_LOCK(self);
607     switch (self->mode) {
608         case MODE_READ:
609             break;
610         case MODE_READ_EOF:
611             ret = PyString_FromString("");
612             goto cleanup;
613         case MODE_CLOSED:
614             PyErr_SetString(PyExc_ValueError,
615                             "I/O operation on closed file");
616             goto cleanup;
617         default:
618             PyErr_SetString(PyExc_IOError,
619                             "file is not ready for reading");
620             goto cleanup;
621     }
622 
623     /* refuse to mix with f.next() */
624     if (check_iterbuffered(self))
625         goto cleanup;
626 
627     if (sizehint == 0)
628         ret = PyString_FromString("");
629     else
630         ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
631 
632 cleanup:
633     RELEASE_LOCK(self);
634     return ret;
635 }
636 
637 PyDoc_STRVAR(BZ2File_readlines__doc__,
638 "readlines([size]) -> list\n\
639 \n\
640 Call readline() repeatedly and return a list of lines read.\n\
641 The optional size argument, if given, is an approximate bound on the\n\
642 total number of bytes in the lines returned.\n\
643 ");
644 
645 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
646 static PyObject *
BZ2File_readlines(BZ2FileObject * self,PyObject * args)647 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
648 {
649     long sizehint = 0;
650     PyObject *list = NULL;
651     PyObject *line;
652     char small_buffer[SMALLCHUNK];
653     char *buffer = small_buffer;
654     size_t buffersize = SMALLCHUNK;
655     PyObject *big_buffer = NULL;
656     size_t nfilled = 0;
657     size_t nread;
658     size_t totalread = 0;
659     char *p, *q, *end;
660     int err;
661     int shortread = 0;
662     int bzerror;
663 
664     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
665         return NULL;
666 
667     ACQUIRE_LOCK(self);
668     switch (self->mode) {
669         case MODE_READ:
670             break;
671         case MODE_READ_EOF:
672             list = PyList_New(0);
673             goto cleanup;
674         case MODE_CLOSED:
675             PyErr_SetString(PyExc_ValueError,
676                             "I/O operation on closed file");
677             goto cleanup;
678         default:
679             PyErr_SetString(PyExc_IOError,
680                             "file is not ready for reading");
681             goto cleanup;
682     }
683 
684     /* refuse to mix with f.next() */
685     if (check_iterbuffered(self))
686         goto cleanup;
687 
688     if ((list = PyList_New(0)) == NULL)
689         goto cleanup;
690 
691     for (;;) {
692         Py_BEGIN_ALLOW_THREADS
693         nread = Util_UnivNewlineRead(&bzerror, self->fp,
694                                      buffer+nfilled,
695                                      buffersize-nfilled, self);
696         self->pos += nread;
697         Py_END_ALLOW_THREADS
698         if (bzerror == BZ_STREAM_END) {
699             self->size = self->pos;
700             self->mode = MODE_READ_EOF;
701             if (nread == 0) {
702                 sizehint = 0;
703                 break;
704             }
705             shortread = 1;
706         } else if (bzerror != BZ_OK) {
707             Util_CatchBZ2Error(bzerror);
708           error:
709             Py_DECREF(list);
710             list = NULL;
711             goto cleanup;
712         }
713         totalread += nread;
714         p = memchr(buffer+nfilled, '\n', nread);
715         if (!shortread && p == NULL) {
716             /* Need a larger buffer to fit this line */
717             nfilled += nread;
718             buffersize *= 2;
719             if (buffersize > INT_MAX) {
720                 PyErr_SetString(PyExc_OverflowError,
721                 "line is longer than a Python string can hold");
722                 goto error;
723             }
724             if (big_buffer == NULL) {
725                 /* Create the big buffer */
726                 big_buffer = PyString_FromStringAndSize(
727                     NULL, buffersize);
728                 if (big_buffer == NULL)
729                     goto error;
730                 buffer = PyString_AS_STRING(big_buffer);
731                 memcpy(buffer, small_buffer, nfilled);
732             }
733             else {
734                 /* Grow the big buffer */
735                 if (_PyString_Resize(&big_buffer, buffersize))
736                     goto error;
737                 buffer = PyString_AS_STRING(big_buffer);
738             }
739             continue;
740         }
741         end = buffer+nfilled+nread;
742         q = buffer;
743         while (p != NULL) {
744             /* Process complete lines */
745             p++;
746             line = PyString_FromStringAndSize(q, p-q);
747             if (line == NULL)
748                 goto error;
749             err = PyList_Append(list, line);
750             Py_DECREF(line);
751             if (err != 0)
752                 goto error;
753             q = p;
754             p = memchr(q, '\n', end-q);
755         }
756         /* Move the remaining incomplete line to the start */
757         nfilled = end-q;
758         memmove(buffer, q, nfilled);
759         if (sizehint > 0)
760             if (totalread >= (size_t)sizehint)
761                 break;
762         if (shortread) {
763             sizehint = 0;
764             break;
765         }
766     }
767     if (nfilled != 0) {
768         /* Partial last line */
769         line = PyString_FromStringAndSize(buffer, nfilled);
770         if (line == NULL)
771             goto error;
772         if (sizehint > 0) {
773             /* Need to complete the last line */
774             PyObject *rest = Util_GetLine(self, 0);
775             if (rest == NULL) {
776                 Py_DECREF(line);
777                 goto error;
778             }
779             PyString_Concat(&line, rest);
780             Py_DECREF(rest);
781             if (line == NULL)
782                 goto error;
783         }
784         err = PyList_Append(list, line);
785         Py_DECREF(line);
786         if (err != 0)
787             goto error;
788     }
789 
790   cleanup:
791     RELEASE_LOCK(self);
792     if (big_buffer) {
793         Py_DECREF(big_buffer);
794     }
795     return list;
796 }
797 
798 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799 "xreadlines() -> self\n\
800 \n\
801 For backward compatibility. BZ2File objects now include the performance\n\
802 optimizations previously implemented in the xreadlines module.\n\
803 ");
804 
805 PyDoc_STRVAR(BZ2File_write__doc__,
806 "write(data) -> None\n\
807 \n\
808 Write the 'data' string to file. Note that due to buffering, close() may\n\
809 be needed before the file on disk reflects the data written.\n\
810 ");
811 
812 /* This is a hacked version of Python's fileobject.c:file_write(). */
813 static PyObject *
BZ2File_write(BZ2FileObject * self,PyObject * args)814 BZ2File_write(BZ2FileObject *self, PyObject *args)
815 {
816     PyObject *ret = NULL;
817     Py_buffer pbuf;
818     char *buf;
819     int len;
820     int bzerror;
821 
822     if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823         return NULL;
824     buf = pbuf.buf;
825     len = pbuf.len;
826 
827     ACQUIRE_LOCK(self);
828     switch (self->mode) {
829         case MODE_WRITE:
830             break;
831 
832         case MODE_CLOSED:
833             PyErr_SetString(PyExc_ValueError,
834                             "I/O operation on closed file");
835             goto cleanup;
836 
837         default:
838             PyErr_SetString(PyExc_IOError,
839                             "file is not ready for writing");
840             goto cleanup;
841     }
842 
843     self->f_softspace = 0;
844 
845     Py_BEGIN_ALLOW_THREADS
846     BZ2_bzWrite (&bzerror, self->fp, buf, len);
847     self->pos += len;
848     Py_END_ALLOW_THREADS
849 
850     if (bzerror != BZ_OK) {
851         Util_CatchBZ2Error(bzerror);
852         goto cleanup;
853     }
854 
855     Py_INCREF(Py_None);
856     ret = Py_None;
857 
858 cleanup:
859     PyBuffer_Release(&pbuf);
860     RELEASE_LOCK(self);
861     return ret;
862 }
863 
864 PyDoc_STRVAR(BZ2File_writelines__doc__,
865 "writelines(sequence_of_strings) -> None\n\
866 \n\
867 Write the sequence of strings to the file. Note that newlines are not\n\
868 added. The sequence can be any iterable object producing strings. This is\n\
869 equivalent to calling write() for each string.\n\
870 ");
871 
872 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
873 static PyObject *
BZ2File_writelines(BZ2FileObject * self,PyObject * seq)874 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875 {
876 #define CHUNKSIZE 1000
877     PyObject *list = NULL;
878     PyObject *iter = NULL;
879     PyObject *ret = NULL;
880     PyObject *line;
881     int i, j, index, len, islist;
882     int bzerror;
883 
884     ACQUIRE_LOCK(self);
885     switch (self->mode) {
886         case MODE_WRITE:
887             break;
888 
889         case MODE_CLOSED:
890             PyErr_SetString(PyExc_ValueError,
891                             "I/O operation on closed file");
892             goto error;
893 
894         default:
895             PyErr_SetString(PyExc_IOError,
896                             "file is not ready for writing");
897             goto error;
898     }
899 
900     islist = PyList_Check(seq);
901     if  (!islist) {
902         iter = PyObject_GetIter(seq);
903         if (iter == NULL) {
904             PyErr_SetString(PyExc_TypeError,
905                 "writelines() requires an iterable argument");
906             goto error;
907         }
908         list = PyList_New(CHUNKSIZE);
909         if (list == NULL)
910             goto error;
911     }
912 
913     /* Strategy: slurp CHUNKSIZE lines into a private list,
914        checking that they are all strings, then write that list
915        without holding the interpreter lock, then come back for more. */
916     for (index = 0; ; index += CHUNKSIZE) {
917         if (islist) {
918             Py_XDECREF(list);
919             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920             if (list == NULL)
921                 goto error;
922             j = PyList_GET_SIZE(list);
923         }
924         else {
925             for (j = 0; j < CHUNKSIZE; j++) {
926                 line = PyIter_Next(iter);
927                 if (line == NULL) {
928                     if (PyErr_Occurred())
929                         goto error;
930                     break;
931                 }
932                 PyList_SetItem(list, j, line);
933             }
934         }
935         if (j == 0)
936             break;
937 
938         /* Check that all entries are indeed strings. If not,
939            apply the same rules as for file.write() and
940            convert the rets to strings. This is slow, but
941            seems to be the only way since all conversion APIs
942            could potentially execute Python code. */
943         for (i = 0; i < j; i++) {
944             PyObject *v = PyList_GET_ITEM(list, i);
945             if (!PyString_Check(v)) {
946                 const char *buffer;
947                 Py_ssize_t len;
948                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949                     PyErr_SetString(PyExc_TypeError,
950                                     "writelines() "
951                                     "argument must be "
952                                     "a sequence of "
953                                     "strings");
954                     goto error;
955                 }
956                 line = PyString_FromStringAndSize(buffer,
957                                                   len);
958                 if (line == NULL)
959                     goto error;
960                 Py_DECREF(v);
961                 PyList_SET_ITEM(list, i, line);
962             }
963         }
964 
965         self->f_softspace = 0;
966 
967         /* Since we are releasing the global lock, the
968            following code may *not* execute Python code. */
969         Py_BEGIN_ALLOW_THREADS
970         for (i = 0; i < j; i++) {
971             line = PyList_GET_ITEM(list, i);
972             len = PyString_GET_SIZE(line);
973             BZ2_bzWrite (&bzerror, self->fp,
974                          PyString_AS_STRING(line), len);
975             if (bzerror != BZ_OK) {
976                 Py_BLOCK_THREADS
977                 Util_CatchBZ2Error(bzerror);
978                 goto error;
979             }
980         }
981         Py_END_ALLOW_THREADS
982 
983         if (j < CHUNKSIZE)
984             break;
985     }
986 
987     Py_INCREF(Py_None);
988     ret = Py_None;
989 
990   error:
991     RELEASE_LOCK(self);
992     Py_XDECREF(list);
993     Py_XDECREF(iter);
994     return ret;
995 #undef CHUNKSIZE
996 }
997 
998 PyDoc_STRVAR(BZ2File_seek__doc__,
999 "seek(offset [, whence]) -> None\n\
1000 \n\
1001 Move to new file position. Argument offset is a byte count. Optional\n\
1002 argument whence defaults to 0 (offset from start of file, offset\n\
1003 should be >= 0); other values are 1 (move relative to current position,\n\
1004 positive or negative), and 2 (move relative to end of file, usually\n\
1005 negative, although many platforms allow seeking beyond the end of a file).\n\
1006 \n\
1007 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008 the operation may be extremely slow.\n\
1009 ");
1010 
1011 static PyObject *
BZ2File_seek(BZ2FileObject * self,PyObject * args)1012 BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013 {
1014     int where = 0;
1015     PyObject *offobj;
1016     Py_off_t offset;
1017     char small_buffer[SMALLCHUNK];
1018     char *buffer = small_buffer;
1019     size_t buffersize = SMALLCHUNK;
1020     Py_off_t bytesread = 0;
1021     size_t readsize;
1022     int chunksize;
1023     int bzerror;
1024     PyObject *ret = NULL;
1025 
1026     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027         return NULL;
1028 #if !defined(HAVE_LARGEFILE_SUPPORT)
1029     offset = PyInt_AsLong(offobj);
1030 #else
1031     offset = PyLong_Check(offobj) ?
1032         PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1033 #endif
1034     if (PyErr_Occurred())
1035         return NULL;
1036 
1037     ACQUIRE_LOCK(self);
1038     Util_DropReadAhead(self);
1039     switch (self->mode) {
1040         case MODE_READ:
1041         case MODE_READ_EOF:
1042             break;
1043 
1044         case MODE_CLOSED:
1045             PyErr_SetString(PyExc_ValueError,
1046                             "I/O operation on closed file");
1047             goto cleanup;
1048 
1049         default:
1050             PyErr_SetString(PyExc_IOError,
1051                             "seek works only while reading");
1052             goto cleanup;
1053     }
1054 
1055     if (where == 2) {
1056         if (self->size == -1) {
1057             assert(self->mode != MODE_READ_EOF);
1058             for (;;) {
1059                 Py_BEGIN_ALLOW_THREADS
1060                 chunksize = Util_UnivNewlineRead(
1061                                 &bzerror, self->fp,
1062                                 buffer, buffersize,
1063                                 self);
1064                 self->pos += chunksize;
1065                 Py_END_ALLOW_THREADS
1066 
1067                 bytesread += chunksize;
1068                 if (bzerror == BZ_STREAM_END) {
1069                     break;
1070                 } else if (bzerror != BZ_OK) {
1071                     Util_CatchBZ2Error(bzerror);
1072                     goto cleanup;
1073                 }
1074             }
1075             self->mode = MODE_READ_EOF;
1076             self->size = self->pos;
1077             bytesread = 0;
1078         }
1079         offset = self->size + offset;
1080     } else if (where == 1) {
1081         offset = self->pos + offset;
1082     }
1083 
1084     /* Before getting here, offset must be the absolute position the file
1085      * pointer should be set to. */
1086 
1087     if (offset >= self->pos) {
1088         /* we can move forward */
1089         offset -= self->pos;
1090     } else {
1091         /* we cannot move back, so rewind the stream */
1092         BZ2_bzReadClose(&bzerror, self->fp);
1093         if (self->fp) {
1094             PyFile_DecUseCount((PyFileObject *)self->file);
1095             self->fp = NULL;
1096         }
1097         if (bzerror != BZ_OK) {
1098             Util_CatchBZ2Error(bzerror);
1099             goto cleanup;
1100         }
1101         ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102         if (!ret)
1103             goto cleanup;
1104         Py_DECREF(ret);
1105         ret = NULL;
1106         self->pos = 0;
1107         self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108                                   0, 0, NULL, 0);
1109         if (self->fp)
1110             PyFile_IncUseCount((PyFileObject *)self->file);
1111         if (bzerror != BZ_OK) {
1112             Util_CatchBZ2Error(bzerror);
1113             goto cleanup;
1114         }
1115         self->mode = MODE_READ;
1116     }
1117 
1118     if (offset <= 0 || self->mode == MODE_READ_EOF)
1119         goto exit;
1120 
1121     /* Before getting here, offset must be set to the number of bytes
1122      * to walk forward. */
1123     for (;;) {
1124         if (offset-bytesread > buffersize)
1125             readsize = buffersize;
1126         else
1127             /* offset might be wider that readsize, but the result
1128              * of the subtraction is bound by buffersize (see the
1129              * condition above). buffersize is 8192. */
1130             readsize = (size_t)(offset-bytesread);
1131         Py_BEGIN_ALLOW_THREADS
1132         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133                                          buffer, readsize, self);
1134         self->pos += chunksize;
1135         Py_END_ALLOW_THREADS
1136         bytesread += chunksize;
1137         if (bzerror == BZ_STREAM_END) {
1138             self->size = self->pos;
1139             self->mode = MODE_READ_EOF;
1140             break;
1141         } else if (bzerror != BZ_OK) {
1142             Util_CatchBZ2Error(bzerror);
1143             goto cleanup;
1144         }
1145         if (bytesread == offset)
1146             break;
1147     }
1148 
1149 exit:
1150     Py_INCREF(Py_None);
1151     ret = Py_None;
1152 
1153 cleanup:
1154     RELEASE_LOCK(self);
1155     return ret;
1156 }
1157 
1158 PyDoc_STRVAR(BZ2File_tell__doc__,
1159 "tell() -> int\n\
1160 \n\
1161 Return the current file position, an integer (may be a long integer).\n\
1162 ");
1163 
1164 static PyObject *
BZ2File_tell(BZ2FileObject * self,PyObject * args)1165 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166 {
1167     PyObject *ret = NULL;
1168 
1169     if (self->mode == MODE_CLOSED) {
1170         PyErr_SetString(PyExc_ValueError,
1171                         "I/O operation on closed file");
1172         goto cleanup;
1173     }
1174 
1175 #if !defined(HAVE_LARGEFILE_SUPPORT)
1176     ret = PyInt_FromLong(self->pos);
1177 #else
1178     ret = PyLong_FromLongLong(self->pos);
1179 #endif
1180 
1181 cleanup:
1182     return ret;
1183 }
1184 
1185 PyDoc_STRVAR(BZ2File_close__doc__,
1186 "close() -> None or (perhaps) an integer\n\
1187 \n\
1188 Close the file. Sets data attribute .closed to true. A closed file\n\
1189 cannot be used for further I/O operations. close() may be called more\n\
1190 than once without error.\n\
1191 ");
1192 
1193 static PyObject *
BZ2File_close(BZ2FileObject * self)1194 BZ2File_close(BZ2FileObject *self)
1195 {
1196     PyObject *ret = NULL;
1197     int bzerror = BZ_OK;
1198 
1199     ACQUIRE_LOCK(self);
1200     switch (self->mode) {
1201         case MODE_READ:
1202         case MODE_READ_EOF:
1203             BZ2_bzReadClose(&bzerror, self->fp);
1204             break;
1205         case MODE_WRITE:
1206             BZ2_bzWriteClose(&bzerror, self->fp,
1207                              0, NULL, NULL);
1208             break;
1209     }
1210     if (self->file) {
1211         if (self->fp)
1212             PyFile_DecUseCount((PyFileObject *)self->file);
1213         ret = PyObject_CallMethod(self->file, "close", NULL);
1214     } else {
1215         Py_INCREF(Py_None);
1216         ret = Py_None;
1217     }
1218     self->fp = NULL;
1219     self->mode = MODE_CLOSED;
1220     if (bzerror != BZ_OK) {
1221         Util_CatchBZ2Error(bzerror);
1222         Py_XDECREF(ret);
1223         ret = NULL;
1224     }
1225 
1226     RELEASE_LOCK(self);
1227     return ret;
1228 }
1229 
1230 PyDoc_STRVAR(BZ2File_enter_doc,
1231 "__enter__() -> self.");
1232 
1233 static PyObject *
BZ2File_enter(BZ2FileObject * self)1234 BZ2File_enter(BZ2FileObject *self)
1235 {
1236     if (self->mode == MODE_CLOSED) {
1237         PyErr_SetString(PyExc_ValueError,
1238             "I/O operation on closed file");
1239         return NULL;
1240     }
1241     Py_INCREF(self);
1242     return (PyObject *) self;
1243 }
1244 
1245 PyDoc_STRVAR(BZ2File_exit_doc,
1246 "__exit__(*excinfo) -> None.  Closes the file.");
1247 
1248 static PyObject *
BZ2File_exit(BZ2FileObject * self,PyObject * args)1249 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1250 {
1251     PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1252     if (!ret)
1253         /* If error occurred, pass through */
1254         return NULL;
1255     Py_DECREF(ret);
1256     Py_RETURN_NONE;
1257 }
1258 
1259 
1260 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1261 
1262 static PyMethodDef BZ2File_methods[] = {
1263     {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1264     {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1265     {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1266     {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1267     {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1268     {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1269     {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1270     {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1271     {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1272     {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1273     {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1274     {NULL,              NULL}           /* sentinel */
1275 };
1276 
1277 
1278 /* ===================================================================== */
1279 /* Getters and setters of BZ2File. */
1280 
1281 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1282 static PyObject *
BZ2File_get_newlines(BZ2FileObject * self,void * closure)1283 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1284 {
1285     switch (self->f_newlinetypes) {
1286     case NEWLINE_UNKNOWN:
1287         Py_INCREF(Py_None);
1288         return Py_None;
1289     case NEWLINE_CR:
1290         return PyString_FromString("\r");
1291     case NEWLINE_LF:
1292         return PyString_FromString("\n");
1293     case NEWLINE_CR|NEWLINE_LF:
1294         return Py_BuildValue("(ss)", "\r", "\n");
1295     case NEWLINE_CRLF:
1296         return PyString_FromString("\r\n");
1297     case NEWLINE_CR|NEWLINE_CRLF:
1298         return Py_BuildValue("(ss)", "\r", "\r\n");
1299     case NEWLINE_LF|NEWLINE_CRLF:
1300         return Py_BuildValue("(ss)", "\n", "\r\n");
1301     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1302         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1303     default:
1304         PyErr_Format(PyExc_SystemError,
1305                      "Unknown newlines value 0x%x\n",
1306                      self->f_newlinetypes);
1307         return NULL;
1308     }
1309 }
1310 
1311 static PyObject *
BZ2File_get_closed(BZ2FileObject * self,void * closure)1312 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1313 {
1314     return PyInt_FromLong(self->mode == MODE_CLOSED);
1315 }
1316 
1317 static PyObject *
BZ2File_get_mode(BZ2FileObject * self,void * closure)1318 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1319 {
1320     return PyObject_GetAttrString(self->file, "mode");
1321 }
1322 
1323 static PyObject *
BZ2File_get_name(BZ2FileObject * self,void * closure)1324 BZ2File_get_name(BZ2FileObject *self, void *closure)
1325 {
1326     return PyObject_GetAttrString(self->file, "name");
1327 }
1328 
1329 static PyGetSetDef BZ2File_getset[] = {
1330     {"closed", (getter)BZ2File_get_closed, NULL,
1331                     "True if the file is closed"},
1332     {"newlines", (getter)BZ2File_get_newlines, NULL,
1333                     "end-of-line convention used in this file"},
1334     {"mode", (getter)BZ2File_get_mode, NULL,
1335                     "file mode ('r', 'w', or 'U')"},
1336     {"name", (getter)BZ2File_get_name, NULL,
1337                     "file name"},
1338     {NULL}      /* Sentinel */
1339 };
1340 
1341 
1342 /* ===================================================================== */
1343 /* Members of BZ2File_Type. */
1344 
1345 #undef OFF
1346 #define OFF(x) offsetof(BZ2FileObject, x)
1347 
1348 static PyMemberDef BZ2File_members[] = {
1349     {"softspace",       T_INT,          OFF(f_softspace), 0,
1350      "flag indicating that a space needs to be printed; used by print"},
1351     {NULL}      /* Sentinel */
1352 };
1353 
1354 /* ===================================================================== */
1355 /* Slot definitions for BZ2File_Type. */
1356 
1357 static int
BZ2File_init(BZ2FileObject * self,PyObject * args,PyObject * kwargs)1358 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1359 {
1360     static char *kwlist[] = {"filename", "mode", "buffering",
1361                                    "compresslevel", 0};
1362     PyObject *name;
1363     char *mode = "r";
1364     int buffering = -1;
1365     int compresslevel = 9;
1366     int bzerror;
1367     int mode_char = 0;
1368 
1369     self->size = -1;
1370 
1371     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1372                                      kwlist, &name, &mode, &buffering,
1373                                      &compresslevel))
1374         return -1;
1375 
1376     if (compresslevel < 1 || compresslevel > 9) {
1377         PyErr_SetString(PyExc_ValueError,
1378                         "compresslevel must be between 1 and 9");
1379         return -1;
1380     }
1381 
1382     for (;;) {
1383         int error = 0;
1384         switch (*mode) {
1385             case 'r':
1386             case 'w':
1387                 if (mode_char)
1388                     error = 1;
1389                 mode_char = *mode;
1390                 break;
1391 
1392             case 'b':
1393                 break;
1394 
1395             case 'U':
1396 #ifdef __VMS
1397                 self->f_univ_newline = 0;
1398 #else
1399                 self->f_univ_newline = 1;
1400 #endif
1401                 break;
1402 
1403             default:
1404                 error = 1;
1405                 break;
1406         }
1407         if (error) {
1408             PyErr_Format(PyExc_ValueError,
1409                          "invalid mode char %c", *mode);
1410             return -1;
1411         }
1412         mode++;
1413         if (*mode == '\0')
1414             break;
1415     }
1416 
1417     if (mode_char == 0) {
1418         mode_char = 'r';
1419     }
1420 
1421     mode = (mode_char == 'r') ? "rb" : "wb";
1422 
1423     self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1424                                        name, mode, buffering);
1425     if (self->file == NULL)
1426         return -1;
1427 
1428     /* From now on, we have stuff to dealloc, so jump to error label
1429      * instead of returning */
1430 
1431 #ifdef WITH_THREAD
1432     self->lock = PyThread_allocate_lock();
1433     if (!self->lock) {
1434         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1435         goto error;
1436     }
1437 #endif
1438 
1439     if (mode_char == 'r')
1440         self->fp = BZ2_bzReadOpen(&bzerror,
1441                                   PyFile_AsFile(self->file),
1442                                   0, 0, NULL, 0);
1443     else
1444         self->fp = BZ2_bzWriteOpen(&bzerror,
1445                                    PyFile_AsFile(self->file),
1446                                    compresslevel, 0, 0);
1447 
1448     if (bzerror != BZ_OK) {
1449         Util_CatchBZ2Error(bzerror);
1450         goto error;
1451     }
1452     PyFile_IncUseCount((PyFileObject *)self->file);
1453 
1454     self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1455 
1456     return 0;
1457 
1458 error:
1459     Py_CLEAR(self->file);
1460 #ifdef WITH_THREAD
1461     if (self->lock) {
1462         PyThread_free_lock(self->lock);
1463         self->lock = NULL;
1464     }
1465 #endif
1466     return -1;
1467 }
1468 
1469 static void
BZ2File_dealloc(BZ2FileObject * self)1470 BZ2File_dealloc(BZ2FileObject *self)
1471 {
1472     int bzerror;
1473 #ifdef WITH_THREAD
1474     if (self->lock)
1475         PyThread_free_lock(self->lock);
1476 #endif
1477     switch (self->mode) {
1478         case MODE_READ:
1479         case MODE_READ_EOF:
1480             BZ2_bzReadClose(&bzerror, self->fp);
1481             break;
1482         case MODE_WRITE:
1483             BZ2_bzWriteClose(&bzerror, self->fp,
1484                              0, NULL, NULL);
1485             break;
1486     }
1487     if (self->fp != NULL && self->file != NULL)
1488         PyFile_DecUseCount((PyFileObject *)self->file);
1489     self->fp = NULL;
1490     Util_DropReadAhead(self);
1491     Py_XDECREF(self->file);
1492     Py_TYPE(self)->tp_free((PyObject *)self);
1493 }
1494 
1495 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1496 static PyObject *
BZ2File_getiter(BZ2FileObject * self)1497 BZ2File_getiter(BZ2FileObject *self)
1498 {
1499     if (self->mode == MODE_CLOSED) {
1500         PyErr_SetString(PyExc_ValueError,
1501                         "I/O operation on closed file");
1502         return NULL;
1503     }
1504     Py_INCREF((PyObject*)self);
1505     return (PyObject *)self;
1506 }
1507 
1508 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1509 #define READAHEAD_BUFSIZE 8192
1510 static PyObject *
BZ2File_iternext(BZ2FileObject * self)1511 BZ2File_iternext(BZ2FileObject *self)
1512 {
1513     PyStringObject* ret;
1514     ACQUIRE_LOCK(self);
1515     if (self->mode == MODE_CLOSED) {
1516         RELEASE_LOCK(self);
1517         PyErr_SetString(PyExc_ValueError,
1518                         "I/O operation on closed file");
1519         return NULL;
1520     }
1521     ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1522     RELEASE_LOCK(self);
1523     if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1524         Py_XDECREF(ret);
1525         return NULL;
1526     }
1527     return (PyObject *)ret;
1528 }
1529 
1530 /* ===================================================================== */
1531 /* BZ2File_Type definition. */
1532 
1533 PyDoc_VAR(BZ2File__doc__) =
1534 PyDoc_STR(
1535 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1536 \n\
1537 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1538 writing. When opened for writing, the file will be created if it doesn't\n\
1539 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1540 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1541 is given, must be a number between 1 and 9.\n\
1542 ")
1543 PyDoc_STR(
1544 "\n\
1545 Add a 'U' to mode to open the file for input with universal newline\n\
1546 support. Any line ending in the input file will be seen as a '\\n' in\n\
1547 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1548 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1549 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1550 newlines are available only when reading.\n\
1551 ")
1552 ;
1553 
1554 static PyTypeObject BZ2File_Type = {
1555     PyVarObject_HEAD_INIT(NULL, 0)
1556     "bz2.BZ2File",              /*tp_name*/
1557     sizeof(BZ2FileObject),      /*tp_basicsize*/
1558     0,                          /*tp_itemsize*/
1559     (destructor)BZ2File_dealloc, /*tp_dealloc*/
1560     0,                          /*tp_print*/
1561     0,                          /*tp_getattr*/
1562     0,                          /*tp_setattr*/
1563     0,                          /*tp_compare*/
1564     0,                          /*tp_repr*/
1565     0,                          /*tp_as_number*/
1566     0,                          /*tp_as_sequence*/
1567     0,                          /*tp_as_mapping*/
1568     0,                          /*tp_hash*/
1569     0,                      /*tp_call*/
1570     0,                      /*tp_str*/
1571     PyObject_GenericGetAttr,/*tp_getattro*/
1572     PyObject_GenericSetAttr,/*tp_setattro*/
1573     0,                      /*tp_as_buffer*/
1574     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1575     BZ2File__doc__,         /*tp_doc*/
1576     0,                      /*tp_traverse*/
1577     0,                      /*tp_clear*/
1578     0,                      /*tp_richcompare*/
1579     0,                      /*tp_weaklistoffset*/
1580     (getiterfunc)BZ2File_getiter, /*tp_iter*/
1581     (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1582     BZ2File_methods,        /*tp_methods*/
1583     BZ2File_members,        /*tp_members*/
1584     BZ2File_getset,         /*tp_getset*/
1585     0,                      /*tp_base*/
1586     0,                      /*tp_dict*/
1587     0,                      /*tp_descr_get*/
1588     0,                      /*tp_descr_set*/
1589     0,                      /*tp_dictoffset*/
1590     (initproc)BZ2File_init, /*tp_init*/
1591     PyType_GenericAlloc,    /*tp_alloc*/
1592     PyType_GenericNew,      /*tp_new*/
1593     _PyObject_Del,          /*tp_free*/
1594     0,                      /*tp_is_gc*/
1595 };
1596 
1597 
1598 /* ===================================================================== */
1599 /* Methods of BZ2Comp. */
1600 
1601 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1602 "compress(data) -> string\n\
1603 \n\
1604 Provide more data to the compressor object. It will return chunks of\n\
1605 compressed data whenever possible. When you've finished providing data\n\
1606 to compress, call the flush() method to finish the compression process,\n\
1607 and return what is left in the internal buffers.\n\
1608 ");
1609 
1610 static PyObject *
BZ2Comp_compress(BZ2CompObject * self,PyObject * args)1611 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1612 {
1613     Py_buffer pdata;
1614     size_t input_left;
1615     size_t output_size = 0;
1616     PyObject *ret = NULL;
1617     bz_stream *bzs = &self->bzs;
1618     int bzerror;
1619 
1620     if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1621         return NULL;
1622 
1623     if (pdata.len == 0) {
1624         PyBuffer_Release(&pdata);
1625         return PyString_FromString("");
1626     }
1627 
1628     ACQUIRE_LOCK(self);
1629     if (!self->running) {
1630         PyErr_SetString(PyExc_ValueError,
1631                         "this object was already flushed");
1632         goto error;
1633     }
1634 
1635     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1636     if (!ret)
1637         goto error;
1638 
1639     bzs->next_in = pdata.buf;
1640     bzs->avail_in = MIN(pdata.len, UINT_MAX);
1641     input_left = pdata.len - bzs->avail_in;
1642 
1643     bzs->next_out = BUF(ret);
1644     bzs->avail_out = PyString_GET_SIZE(ret);
1645 
1646     for (;;) {
1647         char *saved_next_out;
1648 
1649         Py_BEGIN_ALLOW_THREADS
1650         saved_next_out = bzs->next_out;
1651         bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1652         output_size += bzs->next_out - saved_next_out;
1653         Py_END_ALLOW_THREADS
1654 
1655         if (bzerror != BZ_RUN_OK) {
1656             Util_CatchBZ2Error(bzerror);
1657             goto error;
1658         }
1659         if (bzs->avail_in == 0) {
1660             if (input_left == 0)
1661                 break; /* no more input data */
1662             bzs->avail_in = MIN(input_left, UINT_MAX);
1663             input_left -= bzs->avail_in;
1664         }
1665         if (bzs->avail_out == 0) {
1666             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1667             if (buffer_left == 0) {
1668                 if (Util_GrowBuffer(&ret) < 0) {
1669                     BZ2_bzCompressEnd(bzs);
1670                     goto error;
1671                 }
1672                 bzs->next_out = BUF(ret) + output_size;
1673                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1674             }
1675             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1676         }
1677     }
1678 
1679     if (_PyString_Resize(&ret, output_size) < 0)
1680         goto error;
1681 
1682     RELEASE_LOCK(self);
1683     PyBuffer_Release(&pdata);
1684     return ret;
1685 
1686 error:
1687     RELEASE_LOCK(self);
1688     PyBuffer_Release(&pdata);
1689     Py_XDECREF(ret);
1690     return NULL;
1691 }
1692 
1693 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694 "flush() -> string\n\
1695 \n\
1696 Finish the compression process and return what is left in internal buffers.\n\
1697 You must not use the compressor object after calling this method.\n\
1698 ");
1699 
1700 static PyObject *
BZ2Comp_flush(BZ2CompObject * self)1701 BZ2Comp_flush(BZ2CompObject *self)
1702 {
1703     size_t output_size = 0;
1704     PyObject *ret = NULL;
1705     bz_stream *bzs = &self->bzs;
1706     int bzerror;
1707 
1708     ACQUIRE_LOCK(self);
1709     if (!self->running) {
1710         PyErr_SetString(PyExc_ValueError, "object was already flushed");
1711         goto error;
1712     }
1713     self->running = 0;
1714 
1715     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1716     if (!ret)
1717         goto error;
1718 
1719     bzs->next_out = BUF(ret);
1720     bzs->avail_out = PyString_GET_SIZE(ret);
1721 
1722     for (;;) {
1723         char *saved_next_out;
1724 
1725         Py_BEGIN_ALLOW_THREADS
1726         saved_next_out = bzs->next_out;
1727         bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1728         output_size += bzs->next_out - saved_next_out;
1729         Py_END_ALLOW_THREADS
1730 
1731         if (bzerror == BZ_STREAM_END) {
1732             break;
1733         } else if (bzerror != BZ_FINISH_OK) {
1734             Util_CatchBZ2Error(bzerror);
1735             goto error;
1736         }
1737         if (bzs->avail_out == 0) {
1738             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1739             if (buffer_left == 0) {
1740                 if (Util_GrowBuffer(&ret) < 0)
1741                     goto error;
1742                 bzs->next_out = BUF(ret) + output_size;
1743                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1744             }
1745             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1746         }
1747     }
1748 
1749     if (output_size != PyString_GET_SIZE(ret))
1750         if (_PyString_Resize(&ret, output_size) < 0)
1751             goto error;
1752 
1753     RELEASE_LOCK(self);
1754     return ret;
1755 
1756 error:
1757     RELEASE_LOCK(self);
1758     Py_XDECREF(ret);
1759     return NULL;
1760 }
1761 
1762 static PyMethodDef BZ2Comp_methods[] = {
1763     {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1764      BZ2Comp_compress__doc__},
1765     {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1766      BZ2Comp_flush__doc__},
1767     {NULL,              NULL}           /* sentinel */
1768 };
1769 
1770 
1771 /* ===================================================================== */
1772 /* Slot definitions for BZ2Comp_Type. */
1773 
1774 static int
BZ2Comp_init(BZ2CompObject * self,PyObject * args,PyObject * kwargs)1775 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1776 {
1777     int compresslevel = 9;
1778     int bzerror;
1779     static char *kwlist[] = {"compresslevel", 0};
1780 
1781     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1782                                      kwlist, &compresslevel))
1783         return -1;
1784 
1785     if (compresslevel < 1 || compresslevel > 9) {
1786         PyErr_SetString(PyExc_ValueError,
1787                         "compresslevel must be between 1 and 9");
1788         goto error;
1789     }
1790 
1791 #ifdef WITH_THREAD
1792     self->lock = PyThread_allocate_lock();
1793     if (!self->lock) {
1794         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1795         goto error;
1796     }
1797 #endif
1798 
1799     memset(&self->bzs, 0, sizeof(bz_stream));
1800     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1801     if (bzerror != BZ_OK) {
1802         Util_CatchBZ2Error(bzerror);
1803         goto error;
1804     }
1805 
1806     self->running = 1;
1807 
1808     return 0;
1809 error:
1810 #ifdef WITH_THREAD
1811     if (self->lock) {
1812         PyThread_free_lock(self->lock);
1813         self->lock = NULL;
1814     }
1815 #endif
1816     return -1;
1817 }
1818 
1819 static void
BZ2Comp_dealloc(BZ2CompObject * self)1820 BZ2Comp_dealloc(BZ2CompObject *self)
1821 {
1822 #ifdef WITH_THREAD
1823     if (self->lock)
1824         PyThread_free_lock(self->lock);
1825 #endif
1826     BZ2_bzCompressEnd(&self->bzs);
1827     Py_TYPE(self)->tp_free((PyObject *)self);
1828 }
1829 
1830 
1831 /* ===================================================================== */
1832 /* BZ2Comp_Type definition. */
1833 
1834 PyDoc_STRVAR(BZ2Comp__doc__,
1835 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1836 \n\
1837 Create a new compressor object. This object may be used to compress\n\
1838 data sequentially. If you want to compress data in one shot, use the\n\
1839 compress() function instead. The compresslevel parameter, if given,\n\
1840 must be a number between 1 and 9.\n\
1841 ");
1842 
1843 static PyTypeObject BZ2Comp_Type = {
1844     PyVarObject_HEAD_INIT(NULL, 0)
1845     "bz2.BZ2Compressor",        /*tp_name*/
1846     sizeof(BZ2CompObject),      /*tp_basicsize*/
1847     0,                          /*tp_itemsize*/
1848     (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1849     0,                          /*tp_print*/
1850     0,                          /*tp_getattr*/
1851     0,                          /*tp_setattr*/
1852     0,                          /*tp_compare*/
1853     0,                          /*tp_repr*/
1854     0,                          /*tp_as_number*/
1855     0,                          /*tp_as_sequence*/
1856     0,                          /*tp_as_mapping*/
1857     0,                          /*tp_hash*/
1858     0,                      /*tp_call*/
1859     0,                      /*tp_str*/
1860     PyObject_GenericGetAttr,/*tp_getattro*/
1861     PyObject_GenericSetAttr,/*tp_setattro*/
1862     0,                      /*tp_as_buffer*/
1863     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1864     BZ2Comp__doc__,         /*tp_doc*/
1865     0,                      /*tp_traverse*/
1866     0,                      /*tp_clear*/
1867     0,                      /*tp_richcompare*/
1868     0,                      /*tp_weaklistoffset*/
1869     0,                      /*tp_iter*/
1870     0,                      /*tp_iternext*/
1871     BZ2Comp_methods,        /*tp_methods*/
1872     0,                      /*tp_members*/
1873     0,                      /*tp_getset*/
1874     0,                      /*tp_base*/
1875     0,                      /*tp_dict*/
1876     0,                      /*tp_descr_get*/
1877     0,                      /*tp_descr_set*/
1878     0,                      /*tp_dictoffset*/
1879     (initproc)BZ2Comp_init, /*tp_init*/
1880     PyType_GenericAlloc,    /*tp_alloc*/
1881     PyType_GenericNew,      /*tp_new*/
1882     _PyObject_Del,          /*tp_free*/
1883     0,                      /*tp_is_gc*/
1884 };
1885 
1886 
1887 /* ===================================================================== */
1888 /* Members of BZ2Decomp. */
1889 
1890 #undef OFF
1891 #define OFF(x) offsetof(BZ2DecompObject, x)
1892 
1893 static PyMemberDef BZ2Decomp_members[] = {
1894     {"unused_data", T_OBJECT, OFF(unused_data), RO},
1895     {NULL}      /* Sentinel */
1896 };
1897 
1898 
1899 /* ===================================================================== */
1900 /* Methods of BZ2Decomp. */
1901 
1902 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1903 "decompress(data) -> string\n\
1904 \n\
1905 Provide more data to the decompressor object. It will return chunks\n\
1906 of decompressed data whenever possible. If you try to decompress data\n\
1907 after the end of stream is found, EOFError will be raised. If any data\n\
1908 was found after the end of stream, it'll be ignored and saved in\n\
1909 unused_data attribute.\n\
1910 ");
1911 
1912 static PyObject *
BZ2Decomp_decompress(BZ2DecompObject * self,PyObject * args)1913 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1914 {
1915     Py_buffer pdata;
1916     size_t input_left;
1917     size_t output_size = 0;
1918     PyObject *ret = NULL;
1919     bz_stream *bzs = &self->bzs;
1920     int bzerror;
1921 
1922     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1923         return NULL;
1924 
1925     ACQUIRE_LOCK(self);
1926     if (!self->running) {
1927         PyErr_SetString(PyExc_EOFError, "end of stream was "
1928                                         "already found");
1929         goto error;
1930     }
1931 
1932     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1933     if (!ret)
1934         goto error;
1935 
1936     bzs->next_in = pdata.buf;
1937     bzs->avail_in = MIN(pdata.len, UINT_MAX);
1938     input_left = pdata.len - bzs->avail_in;
1939 
1940     bzs->next_out = BUF(ret);
1941     bzs->avail_out = PyString_GET_SIZE(ret);
1942 
1943     for (;;) {
1944         char *saved_next_out;
1945 
1946         Py_BEGIN_ALLOW_THREADS
1947         saved_next_out = bzs->next_out;
1948         bzerror = BZ2_bzDecompress(bzs);
1949         output_size += bzs->next_out - saved_next_out;
1950         Py_END_ALLOW_THREADS
1951 
1952         if (bzerror == BZ_STREAM_END) {
1953             self->running = 0;
1954             input_left += bzs->avail_in;
1955             if (input_left != 0) {
1956                 Py_SETREF(self->unused_data,
1957                           PyString_FromStringAndSize(bzs->next_in, input_left));
1958                 if (self->unused_data == NULL)
1959                     goto error;
1960             }
1961             break;
1962         }
1963         if (bzerror != BZ_OK) {
1964             Util_CatchBZ2Error(bzerror);
1965             goto error;
1966         }
1967         if (bzs->avail_in == 0) {
1968             if (input_left == 0)
1969                 break; /* no more input data */
1970             bzs->avail_in = MIN(input_left, UINT_MAX);
1971             input_left -= bzs->avail_in;
1972         }
1973         if (bzs->avail_out == 0) {
1974             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1975             if (buffer_left == 0) {
1976                 if (Util_GrowBuffer(&ret) < 0) {
1977                     BZ2_bzDecompressEnd(bzs);
1978                     goto error;
1979                 }
1980                 bzs->next_out = BUF(ret) + output_size;
1981                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1982             }
1983             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1984         }
1985     }
1986 
1987     if (output_size != PyString_GET_SIZE(ret))
1988         if (_PyString_Resize(&ret, output_size) < 0)
1989             goto error;
1990 
1991     RELEASE_LOCK(self);
1992     PyBuffer_Release(&pdata);
1993     return ret;
1994 
1995 error:
1996     RELEASE_LOCK(self);
1997     PyBuffer_Release(&pdata);
1998     Py_XDECREF(ret);
1999     return NULL;
2000 }
2001 
2002 static PyMethodDef BZ2Decomp_methods[] = {
2003     {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
2004     {NULL,              NULL}           /* sentinel */
2005 };
2006 
2007 
2008 /* ===================================================================== */
2009 /* Slot definitions for BZ2Decomp_Type. */
2010 
2011 static int
BZ2Decomp_init(BZ2DecompObject * self,PyObject * args,PyObject * kwargs)2012 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2013 {
2014     int bzerror;
2015 
2016     if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2017         return -1;
2018 
2019 #ifdef WITH_THREAD
2020     self->lock = PyThread_allocate_lock();
2021     if (!self->lock) {
2022         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2023         goto error;
2024     }
2025 #endif
2026 
2027     self->unused_data = PyString_FromString("");
2028     if (!self->unused_data)
2029         goto error;
2030 
2031     memset(&self->bzs, 0, sizeof(bz_stream));
2032     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2033     if (bzerror != BZ_OK) {
2034         Util_CatchBZ2Error(bzerror);
2035         goto error;
2036     }
2037 
2038     self->running = 1;
2039 
2040     return 0;
2041 
2042 error:
2043 #ifdef WITH_THREAD
2044     if (self->lock) {
2045         PyThread_free_lock(self->lock);
2046         self->lock = NULL;
2047     }
2048 #endif
2049     Py_CLEAR(self->unused_data);
2050     return -1;
2051 }
2052 
2053 static void
BZ2Decomp_dealloc(BZ2DecompObject * self)2054 BZ2Decomp_dealloc(BZ2DecompObject *self)
2055 {
2056 #ifdef WITH_THREAD
2057     if (self->lock)
2058         PyThread_free_lock(self->lock);
2059 #endif
2060     Py_XDECREF(self->unused_data);
2061     BZ2_bzDecompressEnd(&self->bzs);
2062     Py_TYPE(self)->tp_free((PyObject *)self);
2063 }
2064 
2065 
2066 /* ===================================================================== */
2067 /* BZ2Decomp_Type definition. */
2068 
2069 PyDoc_STRVAR(BZ2Decomp__doc__,
2070 "BZ2Decompressor() -> decompressor object\n\
2071 \n\
2072 Create a new decompressor object. This object may be used to decompress\n\
2073 data sequentially. If you want to decompress data in one shot, use the\n\
2074 decompress() function instead.\n\
2075 ");
2076 
2077 static PyTypeObject BZ2Decomp_Type = {
2078     PyVarObject_HEAD_INIT(NULL, 0)
2079     "bz2.BZ2Decompressor",      /*tp_name*/
2080     sizeof(BZ2DecompObject), /*tp_basicsize*/
2081     0,                          /*tp_itemsize*/
2082     (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2083     0,                          /*tp_print*/
2084     0,                          /*tp_getattr*/
2085     0,                          /*tp_setattr*/
2086     0,                          /*tp_compare*/
2087     0,                          /*tp_repr*/
2088     0,                          /*tp_as_number*/
2089     0,                          /*tp_as_sequence*/
2090     0,                          /*tp_as_mapping*/
2091     0,                          /*tp_hash*/
2092     0,                      /*tp_call*/
2093     0,                      /*tp_str*/
2094     PyObject_GenericGetAttr,/*tp_getattro*/
2095     PyObject_GenericSetAttr,/*tp_setattro*/
2096     0,                      /*tp_as_buffer*/
2097     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2098     BZ2Decomp__doc__,       /*tp_doc*/
2099     0,                      /*tp_traverse*/
2100     0,                      /*tp_clear*/
2101     0,                      /*tp_richcompare*/
2102     0,                      /*tp_weaklistoffset*/
2103     0,                      /*tp_iter*/
2104     0,                      /*tp_iternext*/
2105     BZ2Decomp_methods,      /*tp_methods*/
2106     BZ2Decomp_members,      /*tp_members*/
2107     0,                      /*tp_getset*/
2108     0,                      /*tp_base*/
2109     0,                      /*tp_dict*/
2110     0,                      /*tp_descr_get*/
2111     0,                      /*tp_descr_set*/
2112     0,                      /*tp_dictoffset*/
2113     (initproc)BZ2Decomp_init, /*tp_init*/
2114     PyType_GenericAlloc,    /*tp_alloc*/
2115     PyType_GenericNew,      /*tp_new*/
2116     _PyObject_Del,          /*tp_free*/
2117     0,                      /*tp_is_gc*/
2118 };
2119 
2120 
2121 /* ===================================================================== */
2122 /* Module functions. */
2123 
2124 PyDoc_STRVAR(bz2_compress__doc__,
2125 "compress(data [, compresslevel=9]) -> string\n\
2126 \n\
2127 Compress data in one shot. If you want to compress data sequentially,\n\
2128 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2129 given, must be a number between 1 and 9.\n\
2130 ");
2131 
2132 static PyObject *
bz2_compress(PyObject * self,PyObject * args,PyObject * kwargs)2133 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2134 {
2135     int compresslevel=9;
2136     int action;
2137     Py_buffer pdata;
2138     size_t input_left;
2139     size_t output_size = 0;
2140     PyObject *ret = NULL;
2141     bz_stream _bzs;
2142     bz_stream *bzs = &_bzs;
2143     int bzerror;
2144     static char *kwlist[] = {"data", "compresslevel", 0};
2145 
2146     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2147                                      kwlist, &pdata,
2148                                      &compresslevel))
2149         return NULL;
2150 
2151     if (compresslevel < 1 || compresslevel > 9) {
2152         PyErr_SetString(PyExc_ValueError,
2153                         "compresslevel must be between 1 and 9");
2154         PyBuffer_Release(&pdata);
2155         return NULL;
2156     }
2157 
2158     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2159     if (!ret) {
2160         PyBuffer_Release(&pdata);
2161         return NULL;
2162     }
2163 
2164     memset(bzs, 0, sizeof(bz_stream));
2165 
2166     bzs->next_in = pdata.buf;
2167     bzs->avail_in = MIN(pdata.len, UINT_MAX);
2168     input_left = pdata.len - bzs->avail_in;
2169 
2170     bzs->next_out = BUF(ret);
2171     bzs->avail_out = PyString_GET_SIZE(ret);
2172 
2173     bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2174     if (bzerror != BZ_OK) {
2175         Util_CatchBZ2Error(bzerror);
2176         PyBuffer_Release(&pdata);
2177         Py_DECREF(ret);
2178         return NULL;
2179     }
2180 
2181     action = input_left > 0 ? BZ_RUN : BZ_FINISH;
2182 
2183     for (;;) {
2184         char *saved_next_out;
2185 
2186         Py_BEGIN_ALLOW_THREADS
2187         saved_next_out = bzs->next_out;
2188         bzerror = BZ2_bzCompress(bzs, action);
2189         output_size += bzs->next_out - saved_next_out;
2190         Py_END_ALLOW_THREADS
2191 
2192         if (bzerror == BZ_STREAM_END) {
2193             break;
2194         } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
2195             BZ2_bzCompressEnd(bzs);
2196             Util_CatchBZ2Error(bzerror);
2197             PyBuffer_Release(&pdata);
2198             Py_DECREF(ret);
2199             return NULL;
2200         }
2201         if (action == BZ_RUN && bzs->avail_in == 0) {
2202             if (input_left == 0) {
2203                 action = BZ_FINISH;
2204             } else {
2205                 bzs->avail_in = MIN(input_left, UINT_MAX);
2206                 input_left -= bzs->avail_in;
2207             }
2208         }
2209         if (bzs->avail_out == 0) {
2210             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2211             if (buffer_left == 0) {
2212                 if (Util_GrowBuffer(&ret) < 0) {
2213                     BZ2_bzCompressEnd(bzs);
2214                     PyBuffer_Release(&pdata);
2215                     return NULL;
2216                 }
2217                 bzs->next_out = BUF(ret) + output_size;
2218                 buffer_left = PyString_GET_SIZE(ret) - output_size;
2219             }
2220             bzs->avail_out = MIN(buffer_left, UINT_MAX);
2221         }
2222     }
2223 
2224     if (output_size != PyString_GET_SIZE(ret))
2225         _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2226 
2227     BZ2_bzCompressEnd(bzs);
2228     PyBuffer_Release(&pdata);
2229     return ret;
2230 }
2231 
2232 PyDoc_STRVAR(bz2_decompress__doc__,
2233 "decompress(data) -> decompressed data\n\
2234 \n\
2235 Decompress data in one shot. If you want to decompress data sequentially,\n\
2236 use an instance of BZ2Decompressor instead.\n\
2237 ");
2238 
2239 static PyObject *
bz2_decompress(PyObject * self,PyObject * args)2240 bz2_decompress(PyObject *self, PyObject *args)
2241 {
2242     Py_buffer pdata;
2243     size_t input_left;
2244     size_t output_size = 0;
2245     PyObject *ret;
2246     bz_stream _bzs;
2247     bz_stream *bzs = &_bzs;
2248     int bzerror;
2249 
2250     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2251         return NULL;
2252 
2253     if (pdata.len == 0) {
2254         PyBuffer_Release(&pdata);
2255         return PyString_FromString("");
2256     }
2257 
2258     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2259     if (!ret) {
2260         PyBuffer_Release(&pdata);
2261         return NULL;
2262     }
2263 
2264     memset(bzs, 0, sizeof(bz_stream));
2265 
2266     bzs->next_in = pdata.buf;
2267     bzs->avail_in = MIN(pdata.len, UINT_MAX);
2268     input_left = pdata.len - bzs->avail_in;
2269 
2270     bzs->next_out = BUF(ret);
2271     bzs->avail_out = PyString_GET_SIZE(ret);
2272 
2273     bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2274     if (bzerror != BZ_OK) {
2275         Util_CatchBZ2Error(bzerror);
2276         Py_DECREF(ret);
2277         PyBuffer_Release(&pdata);
2278         return NULL;
2279     }
2280 
2281     for (;;) {
2282         char *saved_next_out;
2283 
2284         Py_BEGIN_ALLOW_THREADS
2285         saved_next_out = bzs->next_out;
2286         bzerror = BZ2_bzDecompress(bzs);
2287         output_size += bzs->next_out - saved_next_out;
2288         Py_END_ALLOW_THREADS
2289 
2290         if (bzerror == BZ_STREAM_END) {
2291             break;
2292         } else if (bzerror != BZ_OK) {
2293             BZ2_bzDecompressEnd(bzs);
2294             Util_CatchBZ2Error(bzerror);
2295             PyBuffer_Release(&pdata);
2296             Py_DECREF(ret);
2297             return NULL;
2298         }
2299         if (bzs->avail_in == 0) {
2300             if (input_left == 0) {
2301                 BZ2_bzDecompressEnd(bzs);
2302                 PyErr_SetString(PyExc_ValueError,
2303                                 "couldn't find end of stream");
2304                 PyBuffer_Release(&pdata);
2305                 Py_DECREF(ret);
2306                 return NULL;
2307             }
2308             bzs->avail_in = MIN(input_left, UINT_MAX);
2309             input_left -= bzs->avail_in;
2310         }
2311         if (bzs->avail_out == 0) {
2312             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2313             if (buffer_left == 0) {
2314                 if (Util_GrowBuffer(&ret) < 0) {
2315                     BZ2_bzDecompressEnd(bzs);
2316                     PyBuffer_Release(&pdata);
2317                     return NULL;
2318                 }
2319                 bzs->next_out = BUF(ret) + output_size;
2320                 buffer_left = PyString_GET_SIZE(ret) - output_size;
2321             }
2322             bzs->avail_out = MIN(buffer_left, UINT_MAX);
2323         }
2324     }
2325 
2326     if (output_size != PyString_GET_SIZE(ret))
2327         _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2328 
2329     BZ2_bzDecompressEnd(bzs);
2330     PyBuffer_Release(&pdata);
2331     return ret;
2332 }
2333 
2334 static PyMethodDef bz2_methods[] = {
2335     {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2336         bz2_compress__doc__},
2337     {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2338         bz2_decompress__doc__},
2339     {NULL,              NULL}           /* sentinel */
2340 };
2341 
2342 /* ===================================================================== */
2343 /* Initialization function. */
2344 
2345 PyDoc_STRVAR(bz2__doc__,
2346 "The python bz2 module provides a comprehensive interface for\n\
2347 the bz2 compression library. It implements a complete file\n\
2348 interface, one shot (de)compression functions, and types for\n\
2349 sequential (de)compression.\n\
2350 ");
2351 
2352 PyMODINIT_FUNC
initbz2(void)2353 initbz2(void)
2354 {
2355     PyObject *m;
2356 
2357     if (PyType_Ready(&BZ2File_Type) < 0)
2358         return;
2359     if (PyType_Ready(&BZ2Comp_Type) < 0)
2360         return;
2361     if (PyType_Ready(&BZ2Decomp_Type) < 0)
2362         return;
2363 
2364     m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2365     if (m == NULL)
2366         return;
2367 
2368     PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2369 
2370     Py_INCREF(&BZ2File_Type);
2371     PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2372 
2373     Py_INCREF(&BZ2Comp_Type);
2374     PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2375 
2376     Py_INCREF(&BZ2Decomp_Type);
2377     PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2378 }
2379