• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 **      each line encodes 45 bytes (except possibly the last)
7 **      First char encodes (binary) length, rest data
8 **      each char encodes 6 bits, as follows:
9 **      binary: 01234567 abcdefgh ijklmnop
10 **      ascii:  012345 67abcd efghij klmnop
11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 **      short binary data is zero-extended (so the bits are always in the
13 **      right place), this does *not* reflect in the length.
14 ** base64:
15 **      Line breaks are insignificant, but lines are at most 76 chars
16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 **      is done via a table.
18 **      Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 **      File starts with introductory text, real data starts and ends
21 **      with colons.
22 **      Data consists of three similar parts: info, datafork, resourcefork.
23 **      Each part is protected (at the end) with a 16-bit crc
24 **      The binary data is run-length encoded, and then ascii-fied:
25 **      binary: 01234567 abcdefgh ijklmnop
26 **      ascii:  012345 67abcd efghij klmnop
27 **      ASCII encoding is table-driven, see the code.
28 **      Short binary data results in the runt ascii-byte being output with
29 **      the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 **      Programs that encode binary data in ASCII are written in
35 **      such a style that they are as unreadable as possible. Devices used
36 **      include unnecessary global variables, burying important tables
37 **      in unrelated sourcefiles, putting functions in include files,
38 **      using seemingly-descriptive variable names for different purposes,
39 **      calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character.  It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55 
56 #define PY_SSIZE_T_CLEAN
57 
58 #include "Python.h"
59 #ifdef USE_ZLIB_CRC32
60 #include "zlib.h"
61 #endif
62 
63 static PyObject *Error;
64 static PyObject *Incomplete;
65 
66 /*
67 ** hqx lookup table, ascii->binary.
68 */
69 
70 #define RUNCHAR 0x90
71 
72 #define DONE 0x7F
73 #define SKIP 0x7E
74 #define FAIL 0x7D
75 
76 static unsigned char table_a2b_hqx[256] = {
77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85 /*              !     "     #     $     %     &     '   */
86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87 /*        (     )     *     +     ,     -     .     /   */
88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89 /*        0     1     2     3     4     5     6     7   */
90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91 /*        8     9     :     ;     <     =     >     ?   */
92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93 /*        @     A     B     C     D     E     F     G   */
94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95 /*        H     I     J     K     L     M     N     O   */
96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97 /*        P     Q     R     S     T     U     V     W   */
98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99 /*        X     Y     Z     [     \     ]     ^     _   */
100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101 /*        `     a     b     c     d     e     f     g   */
102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103 /*        h     i     j     k     l     m     n     o   */
104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105 /*        p     q     r     s     t     u     v     w   */
106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107 /*        x     y     z     {     |     }     ~    ^?   */
108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 };
126 
127 static unsigned char table_b2a_hqx[] =
128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
129 
130 static char table_a2b_base64[] = {
131     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
136     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139 };
140 
141 #define BASE64_PAD '='
142 
143 /* Max binary chunk size; limited only by available memory */
144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
145 
146 static unsigned char table_b2a_base64[] =
147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
148 
149 
150 
151 static unsigned short crctab_hqx[256] = {
152     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
184 };
185 
186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
187 
188 static PyObject *
binascii_a2b_uu(PyObject * self,PyObject * args)189 binascii_a2b_uu(PyObject *self, PyObject *args)
190 {
191     Py_buffer pascii;
192     unsigned char *ascii_data, *bin_data;
193     int leftbits = 0;
194     unsigned char this_ch;
195     unsigned int leftchar = 0;
196     PyObject *rv;
197     Py_ssize_t ascii_len, bin_len;
198 
199     if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
200         return NULL;
201     ascii_data = pascii.buf;
202     ascii_len = pascii.len;
203 
204     assert(ascii_len >= 0);
205 
206     /* First byte: binary data length (in bytes) */
207     bin_len = (*ascii_data++ - ' ') & 077;
208     ascii_len--;
209 
210     /* Allocate the buffer */
211     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
212         PyBuffer_Release(&pascii);
213         return NULL;
214     }
215     bin_data = (unsigned char *)PyString_AS_STRING(rv);
216 
217     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218         /* XXX is it really best to add NULs if there's no more data */
219         this_ch = (ascii_len > 0) ? *ascii_data : 0;
220         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
221             /*
222             ** Whitespace. Assume some spaces got eaten at
223             ** end-of-line. (We check this later)
224             */
225             this_ch = 0;
226         } else {
227             /* Check the character for legality
228             ** The 64 in stead of the expected 63 is because
229             ** there are a few uuencodes out there that use
230             ** '`' as zero instead of space.
231             */
232             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233                 PyErr_SetString(Error, "Illegal char");
234                 PyBuffer_Release(&pascii);
235                 Py_DECREF(rv);
236                 return NULL;
237             }
238             this_ch = (this_ch - ' ') & 077;
239         }
240         /*
241         ** Shift it in on the low end, and see if there's
242         ** a byte ready for output.
243         */
244         leftchar = (leftchar << 6) | (this_ch);
245         leftbits += 6;
246         if ( leftbits >= 8 ) {
247             leftbits -= 8;
248             *bin_data++ = (leftchar >> leftbits) & 0xff;
249             leftchar &= ((1 << leftbits) - 1);
250             bin_len--;
251         }
252     }
253     /*
254     ** Finally, check that if there's anything left on the line
255     ** that it's whitespace only.
256     */
257     while( ascii_len-- > 0 ) {
258         this_ch = *ascii_data++;
259         /* Extra '`' may be written as padding in some cases */
260         if ( this_ch != ' ' && this_ch != ' '+64 &&
261              this_ch != '\n' && this_ch != '\r' ) {
262             PyErr_SetString(Error, "Trailing garbage");
263             PyBuffer_Release(&pascii);
264             Py_DECREF(rv);
265             return NULL;
266         }
267     }
268     PyBuffer_Release(&pascii);
269     return rv;
270 }
271 
272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
273 
274 static PyObject *
binascii_b2a_uu(PyObject * self,PyObject * args)275 binascii_b2a_uu(PyObject *self, PyObject *args)
276 {
277     Py_buffer pbin;
278     unsigned char *ascii_data, *bin_data;
279     int leftbits = 0;
280     unsigned char this_ch;
281     unsigned int leftchar = 0;
282     PyObject *rv;
283     Py_ssize_t bin_len;
284 
285     if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
286         return NULL;
287     bin_data = pbin.buf;
288     bin_len = pbin.len;
289     if ( bin_len > 45 ) {
290         /* The 45 is a limit that appears in all uuencode's */
291         PyErr_SetString(Error, "At most 45 bytes at once");
292         PyBuffer_Release(&pbin);
293         return NULL;
294     }
295 
296     /* We're lazy and allocate to much (fixed up later) */
297     if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298         PyBuffer_Release(&pbin);
299         return NULL;
300     }
301     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
302 
303     /* Store the length */
304     *ascii_data++ = ' ' + (bin_len & 077);
305 
306     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307         /* Shift the data (or padding) into our buffer */
308         if ( bin_len > 0 )              /* Data */
309             leftchar = (leftchar << 8) | *bin_data;
310         else                            /* Padding */
311             leftchar <<= 8;
312         leftbits += 8;
313 
314         /* See if there are 6-bit groups ready */
315         while ( leftbits >= 6 ) {
316             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317             leftbits -= 6;
318             *ascii_data++ = this_ch + ' ';
319         }
320     }
321     *ascii_data++ = '\n';       /* Append a courtesy newline */
322 
323     /* rv is cleared on error */
324     (void)_PyString_Resize(&rv,
325                        (ascii_data -
326                         (unsigned char *)PyString_AS_STRING(rv)));
327     PyBuffer_Release(&pbin);
328     return rv;
329 }
330 
331 
332 static int
binascii_find_valid(unsigned char * s,Py_ssize_t slen,int num)333 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
334 {
335     /* Finds & returns the (num+1)th
336     ** valid character for base64, or -1 if none.
337     */
338 
339     int ret = -1;
340     unsigned char c, b64val;
341 
342     while ((slen > 0) && (ret == -1)) {
343         c = *s;
344         b64val = table_a2b_base64[c & 0x7f];
345         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
346             if (num == 0)
347                 ret = *s;
348             num--;
349         }
350 
351         s++;
352         slen--;
353     }
354     return ret;
355 }
356 
357 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
358 
359 static PyObject *
binascii_a2b_base64(PyObject * self,PyObject * args)360 binascii_a2b_base64(PyObject *self, PyObject *args)
361 {
362     Py_buffer pascii;
363     unsigned char *ascii_data, *bin_data;
364     int leftbits = 0;
365     unsigned char this_ch;
366     unsigned int leftchar = 0;
367     PyObject *rv;
368     Py_ssize_t ascii_len, bin_len;
369     int quad_pos = 0;
370 
371     if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
372         return NULL;
373     ascii_data = pascii.buf;
374     ascii_len = pascii.len;
375 
376     assert(ascii_len >= 0);
377 
378     if (ascii_len > PY_SSIZE_T_MAX - 3) {
379         PyBuffer_Release(&pascii);
380         return PyErr_NoMemory();
381     }
382 
383     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
384 
385     /* Allocate the buffer */
386     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
387         PyBuffer_Release(&pascii);
388         return NULL;
389     }
390     bin_data = (unsigned char *)PyString_AS_STRING(rv);
391     bin_len = 0;
392 
393     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
394         this_ch = *ascii_data;
395 
396         if (this_ch > 0x7f ||
397             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
398             continue;
399 
400         /* Check for pad sequences and ignore
401         ** the invalid ones.
402         */
403         if (this_ch == BASE64_PAD) {
404             if ( (quad_pos < 2) ||
405                  ((quad_pos == 2) &&
406                   (binascii_find_valid(ascii_data, ascii_len, 1)
407                    != BASE64_PAD)) )
408             {
409                 continue;
410             }
411             else {
412                 /* A pad sequence means no more input.
413                 ** We've already interpreted the data
414                 ** from the quad at this point.
415                 */
416                 leftbits = 0;
417                 break;
418             }
419         }
420 
421         this_ch = table_a2b_base64[*ascii_data];
422         if ( this_ch == (unsigned char) -1 )
423             continue;
424 
425         /*
426         ** Shift it in on the low end, and see if there's
427         ** a byte ready for output.
428         */
429         quad_pos = (quad_pos + 1) & 0x03;
430         leftchar = (leftchar << 6) | (this_ch);
431         leftbits += 6;
432 
433         if ( leftbits >= 8 ) {
434             leftbits -= 8;
435             *bin_data++ = (leftchar >> leftbits) & 0xff;
436             bin_len++;
437             leftchar &= ((1 << leftbits) - 1);
438         }
439     }
440 
441     if (leftbits != 0) {
442         PyBuffer_Release(&pascii);
443         PyErr_SetString(Error, "Incorrect padding");
444         Py_DECREF(rv);
445         return NULL;
446     }
447 
448     /* And set string size correctly. If the result string is empty
449     ** (because the input was all invalid) return the shared empty
450     ** string instead; _PyString_Resize() won't do this for us.
451     */
452     if (bin_len > 0) {
453         /* rv is cleared on error */
454         (void)_PyString_Resize(&rv, bin_len);
455     }
456     else {
457         Py_DECREF(rv);
458         rv = PyString_FromStringAndSize("", 0);
459     }
460     PyBuffer_Release(&pascii);
461     return rv;
462 }
463 
464 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
465 
466 static PyObject *
binascii_b2a_base64(PyObject * self,PyObject * args)467 binascii_b2a_base64(PyObject *self, PyObject *args)
468 {
469     Py_buffer pbuf;
470     unsigned char *ascii_data, *bin_data;
471     int leftbits = 0;
472     unsigned char this_ch;
473     unsigned int leftchar = 0;
474     PyObject *rv;
475     Py_ssize_t bin_len;
476 
477     if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
478         return NULL;
479     bin_data = pbuf.buf;
480     bin_len = pbuf.len;
481 
482     assert(bin_len >= 0);
483 
484     if ( bin_len > BASE64_MAXBIN ) {
485         PyErr_SetString(Error, "Too much data for base64 line");
486         PyBuffer_Release(&pbuf);
487         return NULL;
488     }
489 
490     /* We're lazy and allocate too much (fixed up later).
491        "+3" leaves room for up to two pad characters and a trailing
492        newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
493     if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
494         PyBuffer_Release(&pbuf);
495         return NULL;
496     }
497     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
498 
499     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
500         /* Shift the data into our buffer */
501         leftchar = (leftchar << 8) | *bin_data;
502         leftbits += 8;
503 
504         /* See if there are 6-bit groups ready */
505         while ( leftbits >= 6 ) {
506             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
507             leftbits -= 6;
508             *ascii_data++ = table_b2a_base64[this_ch];
509         }
510     }
511     if ( leftbits == 2 ) {
512         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
513         *ascii_data++ = BASE64_PAD;
514         *ascii_data++ = BASE64_PAD;
515     } else if ( leftbits == 4 ) {
516         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
517         *ascii_data++ = BASE64_PAD;
518     }
519     *ascii_data++ = '\n';       /* Append a courtesy newline */
520 
521     /* rv is cleared on error */
522     (void)_PyString_Resize(&rv,
523                        (ascii_data -
524                         (unsigned char *)PyString_AS_STRING(rv)));
525     PyBuffer_Release(&pbuf);
526     return rv;
527 }
528 
529 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
530 
531 static PyObject *
binascii_a2b_hqx(PyObject * self,PyObject * args)532 binascii_a2b_hqx(PyObject *self, PyObject *args)
533 {
534     Py_buffer pascii;
535     unsigned char *ascii_data, *bin_data;
536     int leftbits = 0;
537     unsigned char this_ch;
538     unsigned int leftchar = 0;
539     PyObject *rv;
540     Py_ssize_t len;
541     int done = 0;
542 
543     if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
544         return NULL;
545     ascii_data = pascii.buf;
546     len = pascii.len;
547 
548     assert(len >= 0);
549 
550     if (len > PY_SSIZE_T_MAX - 2) {
551         PyBuffer_Release(&pascii);
552         return PyErr_NoMemory();
553     }
554 
555     /* Allocate a string that is too big (fixed later)
556        Add two to the initial length to prevent interning which
557        would preclude subsequent resizing.  */
558     if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
559         PyBuffer_Release(&pascii);
560         return NULL;
561     }
562     bin_data = (unsigned char *)PyString_AS_STRING(rv);
563 
564     for( ; len > 0 ; len--, ascii_data++ ) {
565         /* Get the byte and look it up */
566         this_ch = table_a2b_hqx[*ascii_data];
567         if ( this_ch == SKIP )
568             continue;
569         if ( this_ch == FAIL ) {
570             PyErr_SetString(Error, "Illegal char");
571             PyBuffer_Release(&pascii);
572             Py_DECREF(rv);
573             return NULL;
574         }
575         if ( this_ch == DONE ) {
576             /* The terminating colon */
577             done = 1;
578             break;
579         }
580 
581         /* Shift it into the buffer and see if any bytes are ready */
582         leftchar = (leftchar << 6) | (this_ch);
583         leftbits += 6;
584         if ( leftbits >= 8 ) {
585             leftbits -= 8;
586             *bin_data++ = (leftchar >> leftbits) & 0xff;
587             leftchar &= ((1 << leftbits) - 1);
588         }
589     }
590 
591     if ( leftbits && !done ) {
592         PyErr_SetString(Incomplete,
593                         "String has incomplete number of bytes");
594         PyBuffer_Release(&pascii);
595         Py_DECREF(rv);
596         return NULL;
597     }
598     /* rv is cleared on error */
599     if (_PyString_Resize(&rv,
600                        (bin_data -
601                         (unsigned char *)PyString_AS_STRING(rv))) == 0) {
602         PyObject *rrv = Py_BuildValue("Oi", rv, done);
603         PyBuffer_Release(&pascii);
604         Py_DECREF(rv);
605         return rrv;
606     }
607 
608     PyBuffer_Release(&pascii);
609     return NULL;
610 }
611 
612 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
613 
614 static PyObject *
binascii_rlecode_hqx(PyObject * self,PyObject * args)615 binascii_rlecode_hqx(PyObject *self, PyObject *args)
616 {
617     Py_buffer pbuf;
618     unsigned char *in_data, *out_data;
619     PyObject *rv;
620     unsigned char ch;
621     Py_ssize_t in, inend, len;
622 
623     if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
624         return NULL;
625     in_data = pbuf.buf;
626     len = pbuf.len;
627 
628     assert(len >= 0);
629 
630     if (len > PY_SSIZE_T_MAX / 2 - 2) {
631         PyBuffer_Release(&pbuf);
632         return PyErr_NoMemory();
633     }
634 
635     /* Worst case: output is twice as big as input (fixed later) */
636     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
637         PyBuffer_Release(&pbuf);
638         return NULL;
639     }
640     out_data = (unsigned char *)PyString_AS_STRING(rv);
641 
642     for( in=0; in<len; in++) {
643         ch = in_data[in];
644         if ( ch == RUNCHAR ) {
645             /* RUNCHAR. Escape it. */
646             *out_data++ = RUNCHAR;
647             *out_data++ = 0;
648         } else {
649             /* Check how many following are the same */
650             for(inend=in+1;
651                 inend<len && in_data[inend] == ch &&
652                     inend < in+255;
653                 inend++) ;
654             if ( inend - in > 3 ) {
655                 /* More than 3 in a row. Output RLE. */
656                 *out_data++ = ch;
657                 *out_data++ = RUNCHAR;
658                 *out_data++ = inend-in;
659                 in = inend-1;
660             } else {
661                 /* Less than 3. Output the byte itself */
662                 *out_data++ = ch;
663             }
664         }
665     }
666     /* rv is cleared on error */
667     (void)_PyString_Resize(&rv,
668                        (out_data -
669                         (unsigned char *)PyString_AS_STRING(rv)));
670     PyBuffer_Release(&pbuf);
671     return rv;
672 }
673 
674 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
675 
676 static PyObject *
binascii_b2a_hqx(PyObject * self,PyObject * args)677 binascii_b2a_hqx(PyObject *self, PyObject *args)
678 {
679     Py_buffer pbin;
680     unsigned char *ascii_data, *bin_data;
681     int leftbits = 0;
682     unsigned char this_ch;
683     unsigned int leftchar = 0;
684     PyObject *rv;
685     Py_ssize_t len;
686 
687     if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
688         return NULL;
689     bin_data = pbin.buf;
690     len = pbin.len;
691 
692     assert(len >= 0);
693 
694     if (len > PY_SSIZE_T_MAX / 2 - 2) {
695         PyBuffer_Release(&pbin);
696         return PyErr_NoMemory();
697     }
698 
699     /* Allocate a buffer that is at least large enough */
700     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
701         PyBuffer_Release(&pbin);
702         return NULL;
703     }
704     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
705 
706     for( ; len > 0 ; len--, bin_data++ ) {
707         /* Shift into our buffer, and output any 6bits ready */
708         leftchar = (leftchar << 8) | *bin_data;
709         leftbits += 8;
710         while ( leftbits >= 6 ) {
711             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
712             leftbits -= 6;
713             *ascii_data++ = table_b2a_hqx[this_ch];
714         }
715     }
716     /* Output a possible runt byte */
717     if ( leftbits ) {
718         leftchar <<= (6-leftbits);
719         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
720     }
721     /* rv is cleared on error */
722     (void)_PyString_Resize(&rv,
723                        (ascii_data -
724                         (unsigned char *)PyString_AS_STRING(rv)));
725     PyBuffer_Release(&pbin);
726     return rv;
727 }
728 
729 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
730 
731 static PyObject *
binascii_rledecode_hqx(PyObject * self,PyObject * args)732 binascii_rledecode_hqx(PyObject *self, PyObject *args)
733 {
734     Py_buffer pin;
735     unsigned char *in_data, *out_data;
736     unsigned char in_byte, in_repeat;
737     PyObject *rv;
738     Py_ssize_t in_len, out_len, out_len_left;
739 
740     if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
741         return NULL;
742     in_data = pin.buf;
743     in_len = pin.len;
744 
745     assert(in_len >= 0);
746 
747     /* Empty string is a special case */
748     if ( in_len == 0 ) {
749         PyBuffer_Release(&pin);
750         return PyString_FromStringAndSize("", 0);
751     }
752     else if (in_len > PY_SSIZE_T_MAX / 2) {
753         PyBuffer_Release(&pin);
754         return PyErr_NoMemory();
755     }
756 
757     /* Allocate a buffer of reasonable size. Resized when needed */
758     out_len = in_len*2;
759     if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
760         PyBuffer_Release(&pin);
761         return NULL;
762     }
763     out_len_left = out_len;
764     out_data = (unsigned char *)PyString_AS_STRING(rv);
765 
766     /*
767     ** We need two macros here to get/put bytes and handle
768     ** end-of-buffer for input and output strings.
769     */
770 #define INBYTE(b) \
771     do { \
772              if ( --in_len < 0 ) { \
773                        PyErr_SetString(Incomplete, ""); \
774                        Py_DECREF(rv); \
775                        PyBuffer_Release(&pin); \
776                        return NULL; \
777              } \
778              b = *in_data++; \
779     } while(0)
780 
781 #define OUTBYTE(b) \
782     do { \
783              if ( --out_len_left < 0 ) { \
784                       if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
785                       if (_PyString_Resize(&rv, 2*out_len) < 0) \
786                         { PyBuffer_Release(&pin); return NULL; } \
787                       out_data = (unsigned char *)PyString_AS_STRING(rv) \
788                                                              + out_len; \
789                       out_len_left = out_len-1; \
790                       out_len = out_len * 2; \
791              } \
792              *out_data++ = b; \
793     } while(0)
794 
795         /*
796         ** Handle first byte separately (since we have to get angry
797         ** in case of an orphaned RLE code).
798         */
799         INBYTE(in_byte);
800 
801     if (in_byte == RUNCHAR) {
802         INBYTE(in_repeat);
803         if (in_repeat != 0) {
804             /* Note Error, not Incomplete (which is at the end
805             ** of the string only). This is a programmer error.
806             */
807             PyErr_SetString(Error, "Orphaned RLE code at start");
808             PyBuffer_Release(&pin);
809             Py_DECREF(rv);
810             return NULL;
811         }
812         OUTBYTE(RUNCHAR);
813     } else {
814         OUTBYTE(in_byte);
815     }
816 
817     while( in_len > 0 ) {
818         INBYTE(in_byte);
819 
820         if (in_byte == RUNCHAR) {
821             INBYTE(in_repeat);
822             if ( in_repeat == 0 ) {
823                 /* Just an escaped RUNCHAR value */
824                 OUTBYTE(RUNCHAR);
825             } else {
826                 /* Pick up value and output a sequence of it */
827                 in_byte = out_data[-1];
828                 while ( --in_repeat > 0 )
829                     OUTBYTE(in_byte);
830             }
831         } else {
832             /* Normal byte */
833             OUTBYTE(in_byte);
834         }
835     }
836     /* rv is cleared on error */
837     (void)_PyString_Resize(&rv,
838                        (out_data -
839                         (unsigned char *)PyString_AS_STRING(rv)));
840     PyBuffer_Release(&pin);
841     return rv;
842 }
843 
844 PyDoc_STRVAR(doc_crc_hqx,
845 "(data, oldcrc) -> newcrc. Compute CRC-CCITT incrementally");
846 
847 static PyObject *
binascii_crc_hqx(PyObject * self,PyObject * args)848 binascii_crc_hqx(PyObject *self, PyObject *args)
849 {
850     Py_buffer pin;
851     unsigned char *bin_data;
852     unsigned int crc;
853     Py_ssize_t len;
854 
855     if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
856         return NULL;
857     bin_data = pin.buf;
858     len = pin.len;
859 
860     while(len-- > 0) {
861         crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
862     }
863 
864     PyBuffer_Release(&pin);
865     return Py_BuildValue("i", crc);
866 }
867 
868 PyDoc_STRVAR(doc_crc32,
869 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
870 
871 #ifdef USE_ZLIB_CRC32
872 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
873 static PyObject *
binascii_crc32(PyObject * self,PyObject * args)874 binascii_crc32(PyObject *self, PyObject *args)
875 {
876     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
877     Py_buffer pbuf;
878     Byte *buf;
879     Py_ssize_t len;
880     int signed_val;
881 
882     if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
883         return NULL;
884     /* In Python 2.x we return a signed integer regardless of native platform
885      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
886      * extended into a 64-bit long inside the integer object).  3.0 does the
887      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
888     buf = (Byte*)pbuf.buf;
889     len = pbuf.len;
890     signed_val = crc32(crc32val, buf, len);
891     PyBuffer_Release(&pbuf);
892     return PyInt_FromLong(signed_val);
893 }
894 #else  /* USE_ZLIB_CRC32 */
895 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
896     Also known as: ISO 3307
897 **********************************************************************|
898 *                                                                    *|
899 * Demonstration program to compute the 32-bit CRC used as the frame  *|
900 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
901 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
902 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
903 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
904 * this polynomial is or will be included in CCITT V.41, which        *|
905 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
906 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
907 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
908 *                                                                    *|
909 **********************************************************************|
910 
911  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
912  code or tables extracted from it, as desired without restriction.
913 
914  First, the polynomial itself and its table of feedback terms.  The
915  polynomial is
916  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
917  Note that we take it "backwards" and put the highest-order term in
918  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
919  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
920  the MSB being 1.
921 
922  Note that the usual hardware shift register implementation, which
923  is what we're using (we're merely optimizing it by doing eight-bit
924  chunks at a time) shifts bits into the lowest-order term.  In our
925  implementation, that means shifting towards the right.  Why do we
926  do it this way?  Because the calculated CRC must be transmitted in
927  order from highest-order term to lowest-order term.  UARTs transmit
928  characters in order from LSB to MSB.  By storing the CRC this way,
929  we hand it to the UART in the order low-byte to high-byte; the UART
930  sends each low-bit to hight-bit; and the result is transmission bit
931  by bit from highest- to lowest-order term without requiring any bit
932  shuffling on our part.  Reception works similarly.
933 
934  The feedback terms table consists of 256, 32-bit entries.  Notes:
935 
936   1. The table can be generated at runtime if desired; code to do so
937      is shown later.  It might not be obvious, but the feedback
938      terms simply represent the results of eight shift/xor opera-
939      tions for all combinations of data and CRC register values.
940 
941   2. The CRC accumulation logic is the same for all CRC polynomials,
942      be they sixteen or thirty-two bits wide.  You simply choose the
943      appropriate table.  Alternatively, because the table can be
944      generated at runtime, you can start by generating the table for
945      the polynomial in question and use exactly the same "updcrc",
946      if your application needn't simultaneously handle two CRC
947      polynomials.  (Note, however, that XMODEM is strange.)
948 
949   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
950      of course, 32-bit entries work OK if the high 16 bits are zero.
951 
952   4. The values must be right-shifted by eight bits by the "updcrc"
953      logic; the shift must be unsigned (bring in zeroes).  On some
954      hardware you could probably optimize the shift in assembler by
955      using byte-swap instructions.
956 ********************************************************************/
957 
958 static unsigned int crc_32_tab[256] = {
959 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
960 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
961 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
962 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
963 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
964 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
965 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
966 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
967 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
968 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
969 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
970 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
971 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
972 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
973 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
974 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
975 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
976 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
977 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
978 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
979 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
980 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
981 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
982 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
983 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
984 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
985 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
986 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
987 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
988 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
989 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
990 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
991 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
992 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
993 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
994 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
995 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
996 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
997 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
998 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
999 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1000 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1001 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1002 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1003 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1004 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1005 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1006 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1007 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1008 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1009 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1010 0x2d02ef8dU
1011 };
1012 
1013 static PyObject *
binascii_crc32(PyObject * self,PyObject * args)1014 binascii_crc32(PyObject *self, PyObject *args)
1015 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1016     Py_buffer pbin;
1017     unsigned char *bin_data;
1018     unsigned int crc = 0U;      /* initial value of CRC */
1019     Py_ssize_t len;
1020     int result;
1021 
1022     if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1023         return NULL;
1024     bin_data = pbin.buf;
1025     len = pbin.len;
1026 
1027     crc = ~ crc;
1028     while (len-- > 0)
1029         crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1030         /* Note:  (crc >> 8) MUST zero fill on left */
1031 
1032     result = (int)(crc ^ 0xFFFFFFFFU);
1033     PyBuffer_Release(&pbin);
1034     return PyInt_FromLong(result);
1035 }
1036 #endif  /* USE_ZLIB_CRC32 */
1037 
1038 
1039 static PyObject *
binascii_hexlify(PyObject * self,PyObject * args)1040 binascii_hexlify(PyObject *self, PyObject *args)
1041 {
1042     Py_buffer parg;
1043     char* argbuf;
1044     Py_ssize_t arglen;
1045     PyObject *retval;
1046     char* retbuf;
1047     Py_ssize_t i, j;
1048 
1049     if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1050         return NULL;
1051     argbuf = parg.buf;
1052     arglen = parg.len;
1053 
1054     assert(arglen >= 0);
1055     if (arglen > PY_SSIZE_T_MAX / 2) {
1056         PyBuffer_Release(&parg);
1057         return PyErr_NoMemory();
1058     }
1059 
1060     retval = PyString_FromStringAndSize(NULL, arglen*2);
1061     if (!retval) {
1062         PyBuffer_Release(&parg);
1063         return NULL;
1064     }
1065     retbuf = PyString_AS_STRING(retval);
1066 
1067     /* make hex version of string, taken from shamodule.c */
1068     for (i=j=0; i < arglen; i++) {
1069         char c;
1070         c = (argbuf[i] >> 4) & 0xf;
1071         c = (c>9) ? c+'a'-10 : c + '0';
1072         retbuf[j++] = c;
1073         c = argbuf[i] & 0xf;
1074         c = (c>9) ? c+'a'-10 : c + '0';
1075         retbuf[j++] = c;
1076     }
1077     PyBuffer_Release(&parg);
1078     return retval;
1079 }
1080 
1081 PyDoc_STRVAR(doc_hexlify,
1082 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1083 \n\
1084 This function is also available as \"hexlify()\".");
1085 
1086 
1087 static int
to_int(int c)1088 to_int(int c)
1089 {
1090     if (isdigit(c))
1091         return c - '0';
1092     else {
1093         if (Py_ISUPPER(c))
1094             c = Py_TOLOWER(c);
1095         if (c >= 'a' && c <= 'f')
1096             return c - 'a' + 10;
1097     }
1098     return -1;
1099 }
1100 
1101 
1102 static PyObject *
binascii_unhexlify(PyObject * self,PyObject * args)1103 binascii_unhexlify(PyObject *self, PyObject *args)
1104 {
1105     Py_buffer parg;
1106     char* argbuf;
1107     Py_ssize_t arglen;
1108     PyObject *retval;
1109     char* retbuf;
1110     Py_ssize_t i, j;
1111 
1112     if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1113         return NULL;
1114     argbuf = parg.buf;
1115     arglen = parg.len;
1116 
1117     assert(arglen >= 0);
1118 
1119     /* XXX What should we do about strings with an odd length?  Should
1120      * we add an implicit leading zero, or a trailing zero?  For now,
1121      * raise an exception.
1122      */
1123     if (arglen % 2) {
1124         PyBuffer_Release(&parg);
1125         PyErr_SetString(PyExc_TypeError, "Odd-length string");
1126         return NULL;
1127     }
1128 
1129     retval = PyString_FromStringAndSize(NULL, (arglen/2));
1130     if (!retval) {
1131         PyBuffer_Release(&parg);
1132         return NULL;
1133     }
1134     retbuf = PyString_AS_STRING(retval);
1135 
1136     for (i=j=0; i < arglen; i += 2) {
1137         int top = to_int(Py_CHARMASK(argbuf[i]));
1138         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1139         if (top == -1 || bot == -1) {
1140             PyErr_SetString(PyExc_TypeError,
1141                             "Non-hexadecimal digit found");
1142             goto finally;
1143         }
1144         retbuf[j++] = (top << 4) + bot;
1145     }
1146     PyBuffer_Release(&parg);
1147     return retval;
1148 
1149   finally:
1150     PyBuffer_Release(&parg);
1151     Py_DECREF(retval);
1152     return NULL;
1153 }
1154 
1155 PyDoc_STRVAR(doc_unhexlify,
1156 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1157 \n\
1158 hexstr must contain an even number of hex digits (upper or lower case).\n\
1159 This function is also available as \"unhexlify()\"");
1160 
1161 static int table_hex[128] = {
1162   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1163   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1164   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1165    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1166   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1167   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1170 };
1171 
1172 #define hexval(c) table_hex[(unsigned int)(c)]
1173 
1174 #define MAXLINESIZE 76
1175 
1176 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1177 
1178 static PyObject*
binascii_a2b_qp(PyObject * self,PyObject * args,PyObject * kwargs)1179 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1180 {
1181     Py_ssize_t in, out;
1182     char ch;
1183     Py_buffer pdata;
1184     unsigned char *data, *odata;
1185     Py_ssize_t datalen = 0;
1186     PyObject *rv;
1187     static char *kwlist[] = {"data", "header", NULL};
1188     int header = 0;
1189 
1190     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1191           &header))
1192         return NULL;
1193     data = pdata.buf;
1194     datalen = pdata.len;
1195 
1196     /* We allocate the output same size as input, this is overkill.
1197      * The previous implementation used calloc() so we'll zero out the
1198      * memory here too, since PyMem_Malloc() does not guarantee that.
1199      */
1200     odata = (unsigned char *) PyMem_Malloc(datalen);
1201     if (odata == NULL) {
1202         PyBuffer_Release(&pdata);
1203         PyErr_NoMemory();
1204         return NULL;
1205     }
1206     memset(odata, 0, datalen);
1207 
1208     in = out = 0;
1209     while (in < datalen) {
1210         if (data[in] == '=') {
1211             in++;
1212             if (in >= datalen) break;
1213             /* Soft line breaks */
1214             if ((data[in] == '\n') || (data[in] == '\r')) {
1215                 if (data[in] != '\n') {
1216                     while (in < datalen && data[in] != '\n') in++;
1217                 }
1218                 if (in < datalen) in++;
1219             }
1220             else if (data[in] == '=') {
1221                 /* broken case from broken python qp */
1222                 odata[out++] = '=';
1223                 in++;
1224             }
1225             else if ((in + 1 < datalen) &&
1226                      ((data[in] >= 'A' && data[in] <= 'F') ||
1227                       (data[in] >= 'a' && data[in] <= 'f') ||
1228                       (data[in] >= '0' && data[in] <= '9')) &&
1229                      ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1230                       (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1231                       (data[in+1] >= '0' && data[in+1] <= '9'))) {
1232                 /* hexval */
1233                 ch = hexval(data[in]) << 4;
1234                 in++;
1235                 ch |= hexval(data[in]);
1236                 in++;
1237                 odata[out++] = ch;
1238             }
1239             else {
1240               odata[out++] = '=';
1241             }
1242         }
1243         else if (header && data[in] == '_') {
1244             odata[out++] = ' ';
1245             in++;
1246         }
1247         else {
1248             odata[out] = data[in];
1249             in++;
1250             out++;
1251         }
1252     }
1253     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1254         PyBuffer_Release(&pdata);
1255         PyMem_Free(odata);
1256         return NULL;
1257     }
1258     PyBuffer_Release(&pdata);
1259     PyMem_Free(odata);
1260     return rv;
1261 }
1262 
1263 static int
to_hex(unsigned char ch,unsigned char * s)1264 to_hex (unsigned char ch, unsigned char *s)
1265 {
1266     unsigned int uvalue = ch;
1267 
1268     s[1] = "0123456789ABCDEF"[uvalue % 16];
1269     uvalue = (uvalue / 16);
1270     s[0] = "0123456789ABCDEF"[uvalue % 16];
1271     return 0;
1272 }
1273 
1274 PyDoc_STRVAR(doc_b2a_qp,
1275 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1276  Encode a string using quoted-printable encoding. \n\
1277 \n\
1278 On encoding, when istext is set, newlines are not encoded, and white \n\
1279 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1280 both encoded.  When quotetabs is set, space and tabs are encoded.");
1281 
1282 /* XXX: This is ridiculously complicated to be backward compatible
1283  * (mostly) with the quopri module.  It doesn't re-create the quopri
1284  * module bug where text ending in CRLF has the CR encoded */
1285 static PyObject*
binascii_b2a_qp(PyObject * self,PyObject * args,PyObject * kwargs)1286 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1287 {
1288     Py_ssize_t in, out;
1289     Py_buffer pdata;
1290     unsigned char *data, *odata;
1291     Py_ssize_t datalen = 0, odatalen = 0;
1292     PyObject *rv;
1293     unsigned int linelen = 0;
1294     static char *kwlist[] = {"data", "quotetabs", "istext",
1295                                    "header", NULL};
1296     int istext = 1;
1297     int quotetabs = 0;
1298     int header = 0;
1299     unsigned char ch;
1300     int crlf = 0;
1301     unsigned char *p;
1302 
1303     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1304           &quotetabs, &istext, &header))
1305         return NULL;
1306     data = pdata.buf;
1307     datalen = pdata.len;
1308 
1309     /* See if this string is using CRLF line ends */
1310     /* XXX: this function has the side effect of converting all of
1311      * the end of lines to be the same depending on this detection
1312      * here */
1313     p = (unsigned char *) memchr(data, '\n', datalen);
1314     if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1315         crlf = 1;
1316 
1317     /* First, scan to see how many characters need to be encoded */
1318     in = 0;
1319     while (in < datalen) {
1320         Py_ssize_t delta = 0;
1321         if ((data[in] > 126) ||
1322             (data[in] == '=') ||
1323             (header && data[in] == '_') ||
1324             ((data[in] == '.') && (linelen == 0) &&
1325              (in + 1 == datalen || data[in+1] == '\n' ||
1326               data[in+1] == '\r' || data[in+1] == 0)) ||
1327             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1328             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1329             ((data[in] < 33) &&
1330              (data[in] != '\r') && (data[in] != '\n') &&
1331              (quotetabs ||
1332             (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1333         {
1334             if ((linelen + 3) >= MAXLINESIZE) {
1335                 linelen = 0;
1336                 if (crlf)
1337                     delta += 3;
1338                 else
1339                     delta += 2;
1340             }
1341             linelen += 3;
1342             delta += 3;
1343             in++;
1344         }
1345         else {
1346             if (istext &&
1347                 ((data[in] == '\n') ||
1348                  ((in+1 < datalen) && (data[in] == '\r') &&
1349                  (data[in+1] == '\n'))))
1350             {
1351                 linelen = 0;
1352                 /* Protect against whitespace on end of line */
1353                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1354                     delta += 2;
1355                 if (crlf)
1356                     delta += 2;
1357                 else
1358                     delta += 1;
1359                 if (data[in] == '\r')
1360                     in += 2;
1361                 else
1362                     in++;
1363             }
1364             else {
1365                 if ((in + 1 != datalen) &&
1366                     (data[in+1] != '\n') &&
1367                     (linelen + 1) >= MAXLINESIZE) {
1368                     linelen = 0;
1369                     if (crlf)
1370                         delta += 3;
1371                     else
1372                         delta += 2;
1373                 }
1374                 linelen++;
1375                 delta++;
1376                 in++;
1377             }
1378         }
1379         if (PY_SSIZE_T_MAX - delta < odatalen) {
1380             PyBuffer_Release(&pdata);
1381             PyErr_NoMemory();
1382             return NULL;
1383         }
1384         odatalen += delta;
1385     }
1386 
1387     /* We allocate the output same size as input, this is overkill.
1388      * The previous implementation used calloc() so we'll zero out the
1389      * memory here too, since PyMem_Malloc() does not guarantee that.
1390      */
1391     odata = (unsigned char *) PyMem_Malloc(odatalen);
1392     if (odata == NULL) {
1393         PyBuffer_Release(&pdata);
1394         PyErr_NoMemory();
1395         return NULL;
1396     }
1397     memset(odata, 0, odatalen);
1398 
1399     in = out = linelen = 0;
1400     while (in < datalen) {
1401         if ((data[in] > 126) ||
1402             (data[in] == '=') ||
1403             (header && data[in] == '_') ||
1404             ((data[in] == '.') && (linelen == 0) &&
1405              (in + 1 == datalen || data[in+1] == '\n' ||
1406               data[in+1] == '\r' || data[in+1] == 0)) ||
1407             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1408             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1409             ((data[in] < 33) &&
1410              (data[in] != '\r') && (data[in] != '\n') &&
1411              (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
1412         {
1413             if ((linelen + 3 )>= MAXLINESIZE) {
1414                 odata[out++] = '=';
1415                 if (crlf) odata[out++] = '\r';
1416                 odata[out++] = '\n';
1417                 linelen = 0;
1418             }
1419             odata[out++] = '=';
1420             to_hex(data[in], &odata[out]);
1421             out += 2;
1422             in++;
1423             linelen += 3;
1424         }
1425         else {
1426             if (istext &&
1427                 ((data[in] == '\n') ||
1428                  ((in+1 < datalen) && (data[in] == '\r') &&
1429                  (data[in+1] == '\n'))))
1430             {
1431                 linelen = 0;
1432                 /* Protect against whitespace on end of line */
1433                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1434                     ch = odata[out-1];
1435                     odata[out-1] = '=';
1436                     to_hex(ch, &odata[out]);
1437                     out += 2;
1438                 }
1439 
1440                 if (crlf) odata[out++] = '\r';
1441                 odata[out++] = '\n';
1442                 if (data[in] == '\r')
1443                     in += 2;
1444                 else
1445                     in++;
1446             }
1447             else {
1448                 if ((in + 1 != datalen) &&
1449                     (data[in+1] != '\n') &&
1450                     (linelen + 1) >= MAXLINESIZE) {
1451                     odata[out++] = '=';
1452                     if (crlf) odata[out++] = '\r';
1453                     odata[out++] = '\n';
1454                     linelen = 0;
1455                 }
1456                 linelen++;
1457                 if (header && data[in] == ' ') {
1458                     odata[out++] = '_';
1459                     in++;
1460                 }
1461                 else {
1462                     odata[out++] = data[in++];
1463                 }
1464             }
1465         }
1466     }
1467     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1468         PyBuffer_Release(&pdata);
1469         PyMem_Free(odata);
1470         return NULL;
1471     }
1472     PyBuffer_Release(&pdata);
1473     PyMem_Free(odata);
1474     return rv;
1475 }
1476 
1477 /* List of functions defined in the module */
1478 
1479 static struct PyMethodDef binascii_module_methods[] = {
1480     {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1481     {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1482     {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1483     {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1484     {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1485     {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1486     {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1487     {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1488     {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1489     {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1490     {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1491     {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1492      doc_rledecode_hqx},
1493     {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1494     {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1495     {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1496       doc_a2b_qp},
1497     {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1498       doc_b2a_qp},
1499     {NULL, NULL}                             /* sentinel */
1500 };
1501 
1502 
1503 /* Initialization function for the module (*must* be called initbinascii) */
1504 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1505 
1506 PyMODINIT_FUNC
initbinascii(void)1507 initbinascii(void)
1508 {
1509     PyObject *m, *d, *x;
1510 
1511     /* Create the module and add the functions */
1512     m = Py_InitModule("binascii", binascii_module_methods);
1513     if (m == NULL)
1514         return;
1515 
1516     d = PyModule_GetDict(m);
1517     x = PyString_FromString(doc_binascii);
1518     PyDict_SetItemString(d, "__doc__", x);
1519     Py_XDECREF(x);
1520 
1521     Error = PyErr_NewException("binascii.Error", NULL, NULL);
1522     PyDict_SetItemString(d, "Error", Error);
1523     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1524     PyDict_SetItemString(d, "Incomplete", Incomplete);
1525 }
1526