1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #ifdef USE_ZLIB_CRC32
60 #include "zlib.h"
61 #endif
62
63 static PyObject *Error;
64 static PyObject *Incomplete;
65
66 /*
67 ** hqx lookup table, ascii->binary.
68 */
69
70 #define RUNCHAR 0x90
71
72 #define DONE 0x7F
73 #define SKIP 0x7E
74 #define FAIL 0x7D
75
76 static unsigned char table_a2b_hqx[256] = {
77 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
78 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /* \b \t \n ^K ^L \r ^N ^O */
80 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
82 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85 /* ! " # $ % & ' */
86 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87 /* ( ) * + , - . / */
88 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89 /* 0 1 2 3 4 5 6 7 */
90 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91 /* 8 9 : ; < = > ? */
92 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93 /* @ A B C D E F G */
94 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95 /* H I J K L M N O */
96 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97 /* P Q R S T U V W */
98 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99 /* X Y Z [ \ ] ^ _ */
100 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101 /* ` a b c d e f g */
102 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103 /* h i j k l m n o */
104 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105 /* p q r s t u v w */
106 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107 /* x y z { | } ~ ^? */
108 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 };
126
127 static unsigned char table_b2a_hqx[] =
128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
129
130 static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139 };
140
141 #define BASE64_PAD '='
142
143 /* Max binary chunk size; limited only by available memory */
144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
145
146 static unsigned char table_b2a_base64[] =
147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
148
149
150
151 static unsigned short crctab_hqx[256] = {
152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
184 };
185
186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
187
188 static PyObject *
binascii_a2b_uu(PyObject * self,PyObject * args)189 binascii_a2b_uu(PyObject *self, PyObject *args)
190 {
191 Py_buffer pascii;
192 unsigned char *ascii_data, *bin_data;
193 int leftbits = 0;
194 unsigned char this_ch;
195 unsigned int leftchar = 0;
196 PyObject *rv;
197 Py_ssize_t ascii_len, bin_len;
198
199 if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
200 return NULL;
201 ascii_data = pascii.buf;
202 ascii_len = pascii.len;
203
204 assert(ascii_len >= 0);
205
206 /* First byte: binary data length (in bytes) */
207 bin_len = (*ascii_data++ - ' ') & 077;
208 ascii_len--;
209
210 /* Allocate the buffer */
211 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
212 PyBuffer_Release(&pascii);
213 return NULL;
214 }
215 bin_data = (unsigned char *)PyString_AS_STRING(rv);
216
217 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218 /* XXX is it really best to add NULs if there's no more data */
219 this_ch = (ascii_len > 0) ? *ascii_data : 0;
220 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
221 /*
222 ** Whitespace. Assume some spaces got eaten at
223 ** end-of-line. (We check this later)
224 */
225 this_ch = 0;
226 } else {
227 /* Check the character for legality
228 ** The 64 in stead of the expected 63 is because
229 ** there are a few uuencodes out there that use
230 ** '`' as zero instead of space.
231 */
232 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233 PyErr_SetString(Error, "Illegal char");
234 PyBuffer_Release(&pascii);
235 Py_DECREF(rv);
236 return NULL;
237 }
238 this_ch = (this_ch - ' ') & 077;
239 }
240 /*
241 ** Shift it in on the low end, and see if there's
242 ** a byte ready for output.
243 */
244 leftchar = (leftchar << 6) | (this_ch);
245 leftbits += 6;
246 if ( leftbits >= 8 ) {
247 leftbits -= 8;
248 *bin_data++ = (leftchar >> leftbits) & 0xff;
249 leftchar &= ((1 << leftbits) - 1);
250 bin_len--;
251 }
252 }
253 /*
254 ** Finally, check that if there's anything left on the line
255 ** that it's whitespace only.
256 */
257 while( ascii_len-- > 0 ) {
258 this_ch = *ascii_data++;
259 /* Extra '`' may be written as padding in some cases */
260 if ( this_ch != ' ' && this_ch != ' '+64 &&
261 this_ch != '\n' && this_ch != '\r' ) {
262 PyErr_SetString(Error, "Trailing garbage");
263 PyBuffer_Release(&pascii);
264 Py_DECREF(rv);
265 return NULL;
266 }
267 }
268 PyBuffer_Release(&pascii);
269 return rv;
270 }
271
272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
273
274 static PyObject *
binascii_b2a_uu(PyObject * self,PyObject * args)275 binascii_b2a_uu(PyObject *self, PyObject *args)
276 {
277 Py_buffer pbin;
278 unsigned char *ascii_data, *bin_data;
279 int leftbits = 0;
280 unsigned char this_ch;
281 unsigned int leftchar = 0;
282 PyObject *rv;
283 Py_ssize_t bin_len;
284
285 if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
286 return NULL;
287 bin_data = pbin.buf;
288 bin_len = pbin.len;
289 if ( bin_len > 45 ) {
290 /* The 45 is a limit that appears in all uuencode's */
291 PyErr_SetString(Error, "At most 45 bytes at once");
292 PyBuffer_Release(&pbin);
293 return NULL;
294 }
295
296 /* We're lazy and allocate to much (fixed up later) */
297 if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298 PyBuffer_Release(&pbin);
299 return NULL;
300 }
301 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
302
303 /* Store the length */
304 *ascii_data++ = ' ' + (bin_len & 077);
305
306 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307 /* Shift the data (or padding) into our buffer */
308 if ( bin_len > 0 ) /* Data */
309 leftchar = (leftchar << 8) | *bin_data;
310 else /* Padding */
311 leftchar <<= 8;
312 leftbits += 8;
313
314 /* See if there are 6-bit groups ready */
315 while ( leftbits >= 6 ) {
316 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317 leftbits -= 6;
318 *ascii_data++ = this_ch + ' ';
319 }
320 }
321 *ascii_data++ = '\n'; /* Append a courtesy newline */
322
323 /* rv is cleared on error */
324 (void)_PyString_Resize(&rv,
325 (ascii_data -
326 (unsigned char *)PyString_AS_STRING(rv)));
327 PyBuffer_Release(&pbin);
328 return rv;
329 }
330
331
332 static int
binascii_find_valid(unsigned char * s,Py_ssize_t slen,int num)333 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
334 {
335 /* Finds & returns the (num+1)th
336 ** valid character for base64, or -1 if none.
337 */
338
339 int ret = -1;
340 unsigned char c, b64val;
341
342 while ((slen > 0) && (ret == -1)) {
343 c = *s;
344 b64val = table_a2b_base64[c & 0x7f];
345 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
346 if (num == 0)
347 ret = *s;
348 num--;
349 }
350
351 s++;
352 slen--;
353 }
354 return ret;
355 }
356
357 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
358
359 static PyObject *
binascii_a2b_base64(PyObject * self,PyObject * args)360 binascii_a2b_base64(PyObject *self, PyObject *args)
361 {
362 Py_buffer pascii;
363 unsigned char *ascii_data, *bin_data;
364 int leftbits = 0;
365 unsigned char this_ch;
366 unsigned int leftchar = 0;
367 PyObject *rv;
368 Py_ssize_t ascii_len, bin_len;
369 int quad_pos = 0;
370
371 if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
372 return NULL;
373 ascii_data = pascii.buf;
374 ascii_len = pascii.len;
375
376 assert(ascii_len >= 0);
377
378 if (ascii_len > PY_SSIZE_T_MAX - 3) {
379 PyBuffer_Release(&pascii);
380 return PyErr_NoMemory();
381 }
382
383 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
384
385 /* Allocate the buffer */
386 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
387 PyBuffer_Release(&pascii);
388 return NULL;
389 }
390 bin_data = (unsigned char *)PyString_AS_STRING(rv);
391 bin_len = 0;
392
393 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
394 this_ch = *ascii_data;
395
396 if (this_ch > 0x7f ||
397 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
398 continue;
399
400 /* Check for pad sequences and ignore
401 ** the invalid ones.
402 */
403 if (this_ch == BASE64_PAD) {
404 if ( (quad_pos < 2) ||
405 ((quad_pos == 2) &&
406 (binascii_find_valid(ascii_data, ascii_len, 1)
407 != BASE64_PAD)) )
408 {
409 continue;
410 }
411 else {
412 /* A pad sequence means no more input.
413 ** We've already interpreted the data
414 ** from the quad at this point.
415 */
416 leftbits = 0;
417 break;
418 }
419 }
420
421 this_ch = table_a2b_base64[*ascii_data];
422 if ( this_ch == (unsigned char) -1 )
423 continue;
424
425 /*
426 ** Shift it in on the low end, and see if there's
427 ** a byte ready for output.
428 */
429 quad_pos = (quad_pos + 1) & 0x03;
430 leftchar = (leftchar << 6) | (this_ch);
431 leftbits += 6;
432
433 if ( leftbits >= 8 ) {
434 leftbits -= 8;
435 *bin_data++ = (leftchar >> leftbits) & 0xff;
436 bin_len++;
437 leftchar &= ((1 << leftbits) - 1);
438 }
439 }
440
441 if (leftbits != 0) {
442 PyBuffer_Release(&pascii);
443 PyErr_SetString(Error, "Incorrect padding");
444 Py_DECREF(rv);
445 return NULL;
446 }
447
448 /* And set string size correctly. If the result string is empty
449 ** (because the input was all invalid) return the shared empty
450 ** string instead; _PyString_Resize() won't do this for us.
451 */
452 if (bin_len > 0) {
453 /* rv is cleared on error */
454 (void)_PyString_Resize(&rv, bin_len);
455 }
456 else {
457 Py_DECREF(rv);
458 rv = PyString_FromStringAndSize("", 0);
459 }
460 PyBuffer_Release(&pascii);
461 return rv;
462 }
463
464 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
465
466 static PyObject *
binascii_b2a_base64(PyObject * self,PyObject * args)467 binascii_b2a_base64(PyObject *self, PyObject *args)
468 {
469 Py_buffer pbuf;
470 unsigned char *ascii_data, *bin_data;
471 int leftbits = 0;
472 unsigned char this_ch;
473 unsigned int leftchar = 0;
474 PyObject *rv;
475 Py_ssize_t bin_len;
476
477 if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
478 return NULL;
479 bin_data = pbuf.buf;
480 bin_len = pbuf.len;
481
482 assert(bin_len >= 0);
483
484 if ( bin_len > BASE64_MAXBIN ) {
485 PyErr_SetString(Error, "Too much data for base64 line");
486 PyBuffer_Release(&pbuf);
487 return NULL;
488 }
489
490 /* We're lazy and allocate too much (fixed up later).
491 "+3" leaves room for up to two pad characters and a trailing
492 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
493 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
494 PyBuffer_Release(&pbuf);
495 return NULL;
496 }
497 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
498
499 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
500 /* Shift the data into our buffer */
501 leftchar = (leftchar << 8) | *bin_data;
502 leftbits += 8;
503
504 /* See if there are 6-bit groups ready */
505 while ( leftbits >= 6 ) {
506 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
507 leftbits -= 6;
508 *ascii_data++ = table_b2a_base64[this_ch];
509 }
510 }
511 if ( leftbits == 2 ) {
512 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
513 *ascii_data++ = BASE64_PAD;
514 *ascii_data++ = BASE64_PAD;
515 } else if ( leftbits == 4 ) {
516 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
517 *ascii_data++ = BASE64_PAD;
518 }
519 *ascii_data++ = '\n'; /* Append a courtesy newline */
520
521 /* rv is cleared on error */
522 (void)_PyString_Resize(&rv,
523 (ascii_data -
524 (unsigned char *)PyString_AS_STRING(rv)));
525 PyBuffer_Release(&pbuf);
526 return rv;
527 }
528
529 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
530
531 static PyObject *
binascii_a2b_hqx(PyObject * self,PyObject * args)532 binascii_a2b_hqx(PyObject *self, PyObject *args)
533 {
534 Py_buffer pascii;
535 unsigned char *ascii_data, *bin_data;
536 int leftbits = 0;
537 unsigned char this_ch;
538 unsigned int leftchar = 0;
539 PyObject *rv;
540 Py_ssize_t len;
541 int done = 0;
542
543 if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
544 return NULL;
545 ascii_data = pascii.buf;
546 len = pascii.len;
547
548 assert(len >= 0);
549
550 if (len > PY_SSIZE_T_MAX - 2) {
551 PyBuffer_Release(&pascii);
552 return PyErr_NoMemory();
553 }
554
555 /* Allocate a string that is too big (fixed later)
556 Add two to the initial length to prevent interning which
557 would preclude subsequent resizing. */
558 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
559 PyBuffer_Release(&pascii);
560 return NULL;
561 }
562 bin_data = (unsigned char *)PyString_AS_STRING(rv);
563
564 for( ; len > 0 ; len--, ascii_data++ ) {
565 /* Get the byte and look it up */
566 this_ch = table_a2b_hqx[*ascii_data];
567 if ( this_ch == SKIP )
568 continue;
569 if ( this_ch == FAIL ) {
570 PyErr_SetString(Error, "Illegal char");
571 PyBuffer_Release(&pascii);
572 Py_DECREF(rv);
573 return NULL;
574 }
575 if ( this_ch == DONE ) {
576 /* The terminating colon */
577 done = 1;
578 break;
579 }
580
581 /* Shift it into the buffer and see if any bytes are ready */
582 leftchar = (leftchar << 6) | (this_ch);
583 leftbits += 6;
584 if ( leftbits >= 8 ) {
585 leftbits -= 8;
586 *bin_data++ = (leftchar >> leftbits) & 0xff;
587 leftchar &= ((1 << leftbits) - 1);
588 }
589 }
590
591 if ( leftbits && !done ) {
592 PyErr_SetString(Incomplete,
593 "String has incomplete number of bytes");
594 PyBuffer_Release(&pascii);
595 Py_DECREF(rv);
596 return NULL;
597 }
598 /* rv is cleared on error */
599 if (_PyString_Resize(&rv,
600 (bin_data -
601 (unsigned char *)PyString_AS_STRING(rv))) == 0) {
602 PyObject *rrv = Py_BuildValue("Oi", rv, done);
603 PyBuffer_Release(&pascii);
604 Py_DECREF(rv);
605 return rrv;
606 }
607
608 PyBuffer_Release(&pascii);
609 return NULL;
610 }
611
612 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
613
614 static PyObject *
binascii_rlecode_hqx(PyObject * self,PyObject * args)615 binascii_rlecode_hqx(PyObject *self, PyObject *args)
616 {
617 Py_buffer pbuf;
618 unsigned char *in_data, *out_data;
619 PyObject *rv;
620 unsigned char ch;
621 Py_ssize_t in, inend, len;
622
623 if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
624 return NULL;
625 in_data = pbuf.buf;
626 len = pbuf.len;
627
628 assert(len >= 0);
629
630 if (len > PY_SSIZE_T_MAX / 2 - 2) {
631 PyBuffer_Release(&pbuf);
632 return PyErr_NoMemory();
633 }
634
635 /* Worst case: output is twice as big as input (fixed later) */
636 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
637 PyBuffer_Release(&pbuf);
638 return NULL;
639 }
640 out_data = (unsigned char *)PyString_AS_STRING(rv);
641
642 for( in=0; in<len; in++) {
643 ch = in_data[in];
644 if ( ch == RUNCHAR ) {
645 /* RUNCHAR. Escape it. */
646 *out_data++ = RUNCHAR;
647 *out_data++ = 0;
648 } else {
649 /* Check how many following are the same */
650 for(inend=in+1;
651 inend<len && in_data[inend] == ch &&
652 inend < in+255;
653 inend++) ;
654 if ( inend - in > 3 ) {
655 /* More than 3 in a row. Output RLE. */
656 *out_data++ = ch;
657 *out_data++ = RUNCHAR;
658 *out_data++ = inend-in;
659 in = inend-1;
660 } else {
661 /* Less than 3. Output the byte itself */
662 *out_data++ = ch;
663 }
664 }
665 }
666 /* rv is cleared on error */
667 (void)_PyString_Resize(&rv,
668 (out_data -
669 (unsigned char *)PyString_AS_STRING(rv)));
670 PyBuffer_Release(&pbuf);
671 return rv;
672 }
673
674 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
675
676 static PyObject *
binascii_b2a_hqx(PyObject * self,PyObject * args)677 binascii_b2a_hqx(PyObject *self, PyObject *args)
678 {
679 Py_buffer pbin;
680 unsigned char *ascii_data, *bin_data;
681 int leftbits = 0;
682 unsigned char this_ch;
683 unsigned int leftchar = 0;
684 PyObject *rv;
685 Py_ssize_t len;
686
687 if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
688 return NULL;
689 bin_data = pbin.buf;
690 len = pbin.len;
691
692 assert(len >= 0);
693
694 if (len > PY_SSIZE_T_MAX / 2 - 2) {
695 PyBuffer_Release(&pbin);
696 return PyErr_NoMemory();
697 }
698
699 /* Allocate a buffer that is at least large enough */
700 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
701 PyBuffer_Release(&pbin);
702 return NULL;
703 }
704 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
705
706 for( ; len > 0 ; len--, bin_data++ ) {
707 /* Shift into our buffer, and output any 6bits ready */
708 leftchar = (leftchar << 8) | *bin_data;
709 leftbits += 8;
710 while ( leftbits >= 6 ) {
711 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
712 leftbits -= 6;
713 *ascii_data++ = table_b2a_hqx[this_ch];
714 }
715 }
716 /* Output a possible runt byte */
717 if ( leftbits ) {
718 leftchar <<= (6-leftbits);
719 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
720 }
721 /* rv is cleared on error */
722 (void)_PyString_Resize(&rv,
723 (ascii_data -
724 (unsigned char *)PyString_AS_STRING(rv)));
725 PyBuffer_Release(&pbin);
726 return rv;
727 }
728
729 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
730
731 static PyObject *
binascii_rledecode_hqx(PyObject * self,PyObject * args)732 binascii_rledecode_hqx(PyObject *self, PyObject *args)
733 {
734 Py_buffer pin;
735 unsigned char *in_data, *out_data;
736 unsigned char in_byte, in_repeat;
737 PyObject *rv;
738 Py_ssize_t in_len, out_len, out_len_left;
739
740 if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
741 return NULL;
742 in_data = pin.buf;
743 in_len = pin.len;
744
745 assert(in_len >= 0);
746
747 /* Empty string is a special case */
748 if ( in_len == 0 ) {
749 PyBuffer_Release(&pin);
750 return PyString_FromStringAndSize("", 0);
751 }
752 else if (in_len > PY_SSIZE_T_MAX / 2) {
753 PyBuffer_Release(&pin);
754 return PyErr_NoMemory();
755 }
756
757 /* Allocate a buffer of reasonable size. Resized when needed */
758 out_len = in_len*2;
759 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
760 PyBuffer_Release(&pin);
761 return NULL;
762 }
763 out_len_left = out_len;
764 out_data = (unsigned char *)PyString_AS_STRING(rv);
765
766 /*
767 ** We need two macros here to get/put bytes and handle
768 ** end-of-buffer for input and output strings.
769 */
770 #define INBYTE(b) \
771 do { \
772 if ( --in_len < 0 ) { \
773 PyErr_SetString(Incomplete, ""); \
774 Py_DECREF(rv); \
775 PyBuffer_Release(&pin); \
776 return NULL; \
777 } \
778 b = *in_data++; \
779 } while(0)
780
781 #define OUTBYTE(b) \
782 do { \
783 if ( --out_len_left < 0 ) { \
784 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
785 if (_PyString_Resize(&rv, 2*out_len) < 0) \
786 { PyBuffer_Release(&pin); return NULL; } \
787 out_data = (unsigned char *)PyString_AS_STRING(rv) \
788 + out_len; \
789 out_len_left = out_len-1; \
790 out_len = out_len * 2; \
791 } \
792 *out_data++ = b; \
793 } while(0)
794
795 /*
796 ** Handle first byte separately (since we have to get angry
797 ** in case of an orphaned RLE code).
798 */
799 INBYTE(in_byte);
800
801 if (in_byte == RUNCHAR) {
802 INBYTE(in_repeat);
803 if (in_repeat != 0) {
804 /* Note Error, not Incomplete (which is at the end
805 ** of the string only). This is a programmer error.
806 */
807 PyErr_SetString(Error, "Orphaned RLE code at start");
808 PyBuffer_Release(&pin);
809 Py_DECREF(rv);
810 return NULL;
811 }
812 OUTBYTE(RUNCHAR);
813 } else {
814 OUTBYTE(in_byte);
815 }
816
817 while( in_len > 0 ) {
818 INBYTE(in_byte);
819
820 if (in_byte == RUNCHAR) {
821 INBYTE(in_repeat);
822 if ( in_repeat == 0 ) {
823 /* Just an escaped RUNCHAR value */
824 OUTBYTE(RUNCHAR);
825 } else {
826 /* Pick up value and output a sequence of it */
827 in_byte = out_data[-1];
828 while ( --in_repeat > 0 )
829 OUTBYTE(in_byte);
830 }
831 } else {
832 /* Normal byte */
833 OUTBYTE(in_byte);
834 }
835 }
836 /* rv is cleared on error */
837 (void)_PyString_Resize(&rv,
838 (out_data -
839 (unsigned char *)PyString_AS_STRING(rv)));
840 PyBuffer_Release(&pin);
841 return rv;
842 }
843
844 PyDoc_STRVAR(doc_crc_hqx,
845 "(data, oldcrc) -> newcrc. Compute CRC-CCITT incrementally");
846
847 static PyObject *
binascii_crc_hqx(PyObject * self,PyObject * args)848 binascii_crc_hqx(PyObject *self, PyObject *args)
849 {
850 Py_buffer pin;
851 unsigned char *bin_data;
852 unsigned int crc;
853 Py_ssize_t len;
854
855 if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
856 return NULL;
857 bin_data = pin.buf;
858 len = pin.len;
859
860 while(len-- > 0) {
861 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
862 }
863
864 PyBuffer_Release(&pin);
865 return Py_BuildValue("i", crc);
866 }
867
868 PyDoc_STRVAR(doc_crc32,
869 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
870
871 #ifdef USE_ZLIB_CRC32
872 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
873 static PyObject *
binascii_crc32(PyObject * self,PyObject * args)874 binascii_crc32(PyObject *self, PyObject *args)
875 {
876 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
877 Py_buffer pbuf;
878 Byte *buf;
879 Py_ssize_t len;
880 int signed_val;
881
882 if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
883 return NULL;
884 /* In Python 2.x we return a signed integer regardless of native platform
885 * long size (the 32bit unsigned long is treated as 32-bit signed and sign
886 * extended into a 64-bit long inside the integer object). 3.0 does the
887 * right thing and returns unsigned. http://bugs.python.org/issue1202 */
888 buf = (Byte*)pbuf.buf;
889 len = pbuf.len;
890 signed_val = crc32(crc32val, buf, len);
891 PyBuffer_Release(&pbuf);
892 return PyInt_FromLong(signed_val);
893 }
894 #else /* USE_ZLIB_CRC32 */
895 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
896 Also known as: ISO 3307
897 **********************************************************************|
898 * *|
899 * Demonstration program to compute the 32-bit CRC used as the frame *|
900 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
901 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
902 * protocol). The 32-bit FCS was added via the Federal Register, *|
903 * 1 June 1982, p.23798. I presume but don't know for certain that *|
904 * this polynomial is or will be included in CCITT V.41, which *|
905 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
906 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
907 * errors by a factor of 10^-5 over 16-bit FCS. *|
908 * *|
909 **********************************************************************|
910
911 Copyright (C) 1986 Gary S. Brown. You may use this program, or
912 code or tables extracted from it, as desired without restriction.
913
914 First, the polynomial itself and its table of feedback terms. The
915 polynomial is
916 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
917 Note that we take it "backwards" and put the highest-order term in
918 the lowest-order bit. The X^32 term is "implied"; the LSB is the
919 X^31 term, etc. The X^0 term (usually shown as "+1") results in
920 the MSB being 1.
921
922 Note that the usual hardware shift register implementation, which
923 is what we're using (we're merely optimizing it by doing eight-bit
924 chunks at a time) shifts bits into the lowest-order term. In our
925 implementation, that means shifting towards the right. Why do we
926 do it this way? Because the calculated CRC must be transmitted in
927 order from highest-order term to lowest-order term. UARTs transmit
928 characters in order from LSB to MSB. By storing the CRC this way,
929 we hand it to the UART in the order low-byte to high-byte; the UART
930 sends each low-bit to hight-bit; and the result is transmission bit
931 by bit from highest- to lowest-order term without requiring any bit
932 shuffling on our part. Reception works similarly.
933
934 The feedback terms table consists of 256, 32-bit entries. Notes:
935
936 1. The table can be generated at runtime if desired; code to do so
937 is shown later. It might not be obvious, but the feedback
938 terms simply represent the results of eight shift/xor opera-
939 tions for all combinations of data and CRC register values.
940
941 2. The CRC accumulation logic is the same for all CRC polynomials,
942 be they sixteen or thirty-two bits wide. You simply choose the
943 appropriate table. Alternatively, because the table can be
944 generated at runtime, you can start by generating the table for
945 the polynomial in question and use exactly the same "updcrc",
946 if your application needn't simultaneously handle two CRC
947 polynomials. (Note, however, that XMODEM is strange.)
948
949 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
950 of course, 32-bit entries work OK if the high 16 bits are zero.
951
952 4. The values must be right-shifted by eight bits by the "updcrc"
953 logic; the shift must be unsigned (bring in zeroes). On some
954 hardware you could probably optimize the shift in assembler by
955 using byte-swap instructions.
956 ********************************************************************/
957
958 static unsigned int crc_32_tab[256] = {
959 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
960 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
961 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
962 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
963 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
964 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
965 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
966 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
967 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
968 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
969 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
970 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
971 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
972 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
973 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
974 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
975 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
976 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
977 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
978 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
979 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
980 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
981 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
982 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
983 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
984 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
985 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
986 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
987 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
988 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
989 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
990 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
991 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
992 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
993 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
994 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
995 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
996 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
997 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
998 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
999 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1000 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1001 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1002 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1003 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1004 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1005 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1006 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1007 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1008 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1009 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1010 0x2d02ef8dU
1011 };
1012
1013 static PyObject *
binascii_crc32(PyObject * self,PyObject * args)1014 binascii_crc32(PyObject *self, PyObject *args)
1015 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1016 Py_buffer pbin;
1017 unsigned char *bin_data;
1018 unsigned int crc = 0U; /* initial value of CRC */
1019 Py_ssize_t len;
1020 int result;
1021
1022 if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1023 return NULL;
1024 bin_data = pbin.buf;
1025 len = pbin.len;
1026
1027 crc = ~ crc;
1028 while (len-- > 0)
1029 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1030 /* Note: (crc >> 8) MUST zero fill on left */
1031
1032 result = (int)(crc ^ 0xFFFFFFFFU);
1033 PyBuffer_Release(&pbin);
1034 return PyInt_FromLong(result);
1035 }
1036 #endif /* USE_ZLIB_CRC32 */
1037
1038
1039 static PyObject *
binascii_hexlify(PyObject * self,PyObject * args)1040 binascii_hexlify(PyObject *self, PyObject *args)
1041 {
1042 Py_buffer parg;
1043 char* argbuf;
1044 Py_ssize_t arglen;
1045 PyObject *retval;
1046 char* retbuf;
1047 Py_ssize_t i, j;
1048
1049 if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1050 return NULL;
1051 argbuf = parg.buf;
1052 arglen = parg.len;
1053
1054 assert(arglen >= 0);
1055 if (arglen > PY_SSIZE_T_MAX / 2) {
1056 PyBuffer_Release(&parg);
1057 return PyErr_NoMemory();
1058 }
1059
1060 retval = PyString_FromStringAndSize(NULL, arglen*2);
1061 if (!retval) {
1062 PyBuffer_Release(&parg);
1063 return NULL;
1064 }
1065 retbuf = PyString_AS_STRING(retval);
1066
1067 /* make hex version of string, taken from shamodule.c */
1068 for (i=j=0; i < arglen; i++) {
1069 char c;
1070 c = (argbuf[i] >> 4) & 0xf;
1071 c = (c>9) ? c+'a'-10 : c + '0';
1072 retbuf[j++] = c;
1073 c = argbuf[i] & 0xf;
1074 c = (c>9) ? c+'a'-10 : c + '0';
1075 retbuf[j++] = c;
1076 }
1077 PyBuffer_Release(&parg);
1078 return retval;
1079 }
1080
1081 PyDoc_STRVAR(doc_hexlify,
1082 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1083 \n\
1084 This function is also available as \"hexlify()\".");
1085
1086
1087 static int
to_int(int c)1088 to_int(int c)
1089 {
1090 if (isdigit(c))
1091 return c - '0';
1092 else {
1093 if (Py_ISUPPER(c))
1094 c = Py_TOLOWER(c);
1095 if (c >= 'a' && c <= 'f')
1096 return c - 'a' + 10;
1097 }
1098 return -1;
1099 }
1100
1101
1102 static PyObject *
binascii_unhexlify(PyObject * self,PyObject * args)1103 binascii_unhexlify(PyObject *self, PyObject *args)
1104 {
1105 Py_buffer parg;
1106 char* argbuf;
1107 Py_ssize_t arglen;
1108 PyObject *retval;
1109 char* retbuf;
1110 Py_ssize_t i, j;
1111
1112 if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1113 return NULL;
1114 argbuf = parg.buf;
1115 arglen = parg.len;
1116
1117 assert(arglen >= 0);
1118
1119 /* XXX What should we do about strings with an odd length? Should
1120 * we add an implicit leading zero, or a trailing zero? For now,
1121 * raise an exception.
1122 */
1123 if (arglen % 2) {
1124 PyBuffer_Release(&parg);
1125 PyErr_SetString(PyExc_TypeError, "Odd-length string");
1126 return NULL;
1127 }
1128
1129 retval = PyString_FromStringAndSize(NULL, (arglen/2));
1130 if (!retval) {
1131 PyBuffer_Release(&parg);
1132 return NULL;
1133 }
1134 retbuf = PyString_AS_STRING(retval);
1135
1136 for (i=j=0; i < arglen; i += 2) {
1137 int top = to_int(Py_CHARMASK(argbuf[i]));
1138 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1139 if (top == -1 || bot == -1) {
1140 PyErr_SetString(PyExc_TypeError,
1141 "Non-hexadecimal digit found");
1142 goto finally;
1143 }
1144 retbuf[j++] = (top << 4) + bot;
1145 }
1146 PyBuffer_Release(&parg);
1147 return retval;
1148
1149 finally:
1150 PyBuffer_Release(&parg);
1151 Py_DECREF(retval);
1152 return NULL;
1153 }
1154
1155 PyDoc_STRVAR(doc_unhexlify,
1156 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1157 \n\
1158 hexstr must contain an even number of hex digits (upper or lower case).\n\
1159 This function is also available as \"unhexlify()\"");
1160
1161 static int table_hex[128] = {
1162 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1163 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1164 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1165 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1166 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1167 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1170 };
1171
1172 #define hexval(c) table_hex[(unsigned int)(c)]
1173
1174 #define MAXLINESIZE 76
1175
1176 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1177
1178 static PyObject*
binascii_a2b_qp(PyObject * self,PyObject * args,PyObject * kwargs)1179 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1180 {
1181 Py_ssize_t in, out;
1182 char ch;
1183 Py_buffer pdata;
1184 unsigned char *data, *odata;
1185 Py_ssize_t datalen = 0;
1186 PyObject *rv;
1187 static char *kwlist[] = {"data", "header", NULL};
1188 int header = 0;
1189
1190 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1191 &header))
1192 return NULL;
1193 data = pdata.buf;
1194 datalen = pdata.len;
1195
1196 /* We allocate the output same size as input, this is overkill.
1197 * The previous implementation used calloc() so we'll zero out the
1198 * memory here too, since PyMem_Malloc() does not guarantee that.
1199 */
1200 odata = (unsigned char *) PyMem_Malloc(datalen);
1201 if (odata == NULL) {
1202 PyBuffer_Release(&pdata);
1203 PyErr_NoMemory();
1204 return NULL;
1205 }
1206 memset(odata, 0, datalen);
1207
1208 in = out = 0;
1209 while (in < datalen) {
1210 if (data[in] == '=') {
1211 in++;
1212 if (in >= datalen) break;
1213 /* Soft line breaks */
1214 if ((data[in] == '\n') || (data[in] == '\r')) {
1215 if (data[in] != '\n') {
1216 while (in < datalen && data[in] != '\n') in++;
1217 }
1218 if (in < datalen) in++;
1219 }
1220 else if (data[in] == '=') {
1221 /* broken case from broken python qp */
1222 odata[out++] = '=';
1223 in++;
1224 }
1225 else if ((in + 1 < datalen) &&
1226 ((data[in] >= 'A' && data[in] <= 'F') ||
1227 (data[in] >= 'a' && data[in] <= 'f') ||
1228 (data[in] >= '0' && data[in] <= '9')) &&
1229 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1230 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1231 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1232 /* hexval */
1233 ch = hexval(data[in]) << 4;
1234 in++;
1235 ch |= hexval(data[in]);
1236 in++;
1237 odata[out++] = ch;
1238 }
1239 else {
1240 odata[out++] = '=';
1241 }
1242 }
1243 else if (header && data[in] == '_') {
1244 odata[out++] = ' ';
1245 in++;
1246 }
1247 else {
1248 odata[out] = data[in];
1249 in++;
1250 out++;
1251 }
1252 }
1253 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1254 PyBuffer_Release(&pdata);
1255 PyMem_Free(odata);
1256 return NULL;
1257 }
1258 PyBuffer_Release(&pdata);
1259 PyMem_Free(odata);
1260 return rv;
1261 }
1262
1263 static int
to_hex(unsigned char ch,unsigned char * s)1264 to_hex (unsigned char ch, unsigned char *s)
1265 {
1266 unsigned int uvalue = ch;
1267
1268 s[1] = "0123456789ABCDEF"[uvalue % 16];
1269 uvalue = (uvalue / 16);
1270 s[0] = "0123456789ABCDEF"[uvalue % 16];
1271 return 0;
1272 }
1273
1274 PyDoc_STRVAR(doc_b2a_qp,
1275 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1276 Encode a string using quoted-printable encoding. \n\
1277 \n\
1278 On encoding, when istext is set, newlines are not encoded, and white \n\
1279 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1280 both encoded. When quotetabs is set, space and tabs are encoded.");
1281
1282 /* XXX: This is ridiculously complicated to be backward compatible
1283 * (mostly) with the quopri module. It doesn't re-create the quopri
1284 * module bug where text ending in CRLF has the CR encoded */
1285 static PyObject*
binascii_b2a_qp(PyObject * self,PyObject * args,PyObject * kwargs)1286 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1287 {
1288 Py_ssize_t in, out;
1289 Py_buffer pdata;
1290 unsigned char *data, *odata;
1291 Py_ssize_t datalen = 0, odatalen = 0;
1292 PyObject *rv;
1293 unsigned int linelen = 0;
1294 static char *kwlist[] = {"data", "quotetabs", "istext",
1295 "header", NULL};
1296 int istext = 1;
1297 int quotetabs = 0;
1298 int header = 0;
1299 unsigned char ch;
1300 int crlf = 0;
1301 unsigned char *p;
1302
1303 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1304 "etabs, &istext, &header))
1305 return NULL;
1306 data = pdata.buf;
1307 datalen = pdata.len;
1308
1309 /* See if this string is using CRLF line ends */
1310 /* XXX: this function has the side effect of converting all of
1311 * the end of lines to be the same depending on this detection
1312 * here */
1313 p = (unsigned char *) memchr(data, '\n', datalen);
1314 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1315 crlf = 1;
1316
1317 /* First, scan to see how many characters need to be encoded */
1318 in = 0;
1319 while (in < datalen) {
1320 Py_ssize_t delta = 0;
1321 if ((data[in] > 126) ||
1322 (data[in] == '=') ||
1323 (header && data[in] == '_') ||
1324 ((data[in] == '.') && (linelen == 0) &&
1325 (in + 1 == datalen || data[in+1] == '\n' ||
1326 data[in+1] == '\r' || data[in+1] == 0)) ||
1327 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1328 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1329 ((data[in] < 33) &&
1330 (data[in] != '\r') && (data[in] != '\n') &&
1331 (quotetabs ||
1332 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1333 {
1334 if ((linelen + 3) >= MAXLINESIZE) {
1335 linelen = 0;
1336 if (crlf)
1337 delta += 3;
1338 else
1339 delta += 2;
1340 }
1341 linelen += 3;
1342 delta += 3;
1343 in++;
1344 }
1345 else {
1346 if (istext &&
1347 ((data[in] == '\n') ||
1348 ((in+1 < datalen) && (data[in] == '\r') &&
1349 (data[in+1] == '\n'))))
1350 {
1351 linelen = 0;
1352 /* Protect against whitespace on end of line */
1353 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1354 delta += 2;
1355 if (crlf)
1356 delta += 2;
1357 else
1358 delta += 1;
1359 if (data[in] == '\r')
1360 in += 2;
1361 else
1362 in++;
1363 }
1364 else {
1365 if ((in + 1 != datalen) &&
1366 (data[in+1] != '\n') &&
1367 (linelen + 1) >= MAXLINESIZE) {
1368 linelen = 0;
1369 if (crlf)
1370 delta += 3;
1371 else
1372 delta += 2;
1373 }
1374 linelen++;
1375 delta++;
1376 in++;
1377 }
1378 }
1379 if (PY_SSIZE_T_MAX - delta < odatalen) {
1380 PyBuffer_Release(&pdata);
1381 PyErr_NoMemory();
1382 return NULL;
1383 }
1384 odatalen += delta;
1385 }
1386
1387 /* We allocate the output same size as input, this is overkill.
1388 * The previous implementation used calloc() so we'll zero out the
1389 * memory here too, since PyMem_Malloc() does not guarantee that.
1390 */
1391 odata = (unsigned char *) PyMem_Malloc(odatalen);
1392 if (odata == NULL) {
1393 PyBuffer_Release(&pdata);
1394 PyErr_NoMemory();
1395 return NULL;
1396 }
1397 memset(odata, 0, odatalen);
1398
1399 in = out = linelen = 0;
1400 while (in < datalen) {
1401 if ((data[in] > 126) ||
1402 (data[in] == '=') ||
1403 (header && data[in] == '_') ||
1404 ((data[in] == '.') && (linelen == 0) &&
1405 (in + 1 == datalen || data[in+1] == '\n' ||
1406 data[in+1] == '\r' || data[in+1] == 0)) ||
1407 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1408 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1409 ((data[in] < 33) &&
1410 (data[in] != '\r') && (data[in] != '\n') &&
1411 (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
1412 {
1413 if ((linelen + 3 )>= MAXLINESIZE) {
1414 odata[out++] = '=';
1415 if (crlf) odata[out++] = '\r';
1416 odata[out++] = '\n';
1417 linelen = 0;
1418 }
1419 odata[out++] = '=';
1420 to_hex(data[in], &odata[out]);
1421 out += 2;
1422 in++;
1423 linelen += 3;
1424 }
1425 else {
1426 if (istext &&
1427 ((data[in] == '\n') ||
1428 ((in+1 < datalen) && (data[in] == '\r') &&
1429 (data[in+1] == '\n'))))
1430 {
1431 linelen = 0;
1432 /* Protect against whitespace on end of line */
1433 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1434 ch = odata[out-1];
1435 odata[out-1] = '=';
1436 to_hex(ch, &odata[out]);
1437 out += 2;
1438 }
1439
1440 if (crlf) odata[out++] = '\r';
1441 odata[out++] = '\n';
1442 if (data[in] == '\r')
1443 in += 2;
1444 else
1445 in++;
1446 }
1447 else {
1448 if ((in + 1 != datalen) &&
1449 (data[in+1] != '\n') &&
1450 (linelen + 1) >= MAXLINESIZE) {
1451 odata[out++] = '=';
1452 if (crlf) odata[out++] = '\r';
1453 odata[out++] = '\n';
1454 linelen = 0;
1455 }
1456 linelen++;
1457 if (header && data[in] == ' ') {
1458 odata[out++] = '_';
1459 in++;
1460 }
1461 else {
1462 odata[out++] = data[in++];
1463 }
1464 }
1465 }
1466 }
1467 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1468 PyBuffer_Release(&pdata);
1469 PyMem_Free(odata);
1470 return NULL;
1471 }
1472 PyBuffer_Release(&pdata);
1473 PyMem_Free(odata);
1474 return rv;
1475 }
1476
1477 /* List of functions defined in the module */
1478
1479 static struct PyMethodDef binascii_module_methods[] = {
1480 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1481 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1482 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1483 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1484 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1485 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1486 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1487 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1488 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1489 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1490 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1491 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1492 doc_rledecode_hqx},
1493 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1494 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1495 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1496 doc_a2b_qp},
1497 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1498 doc_b2a_qp},
1499 {NULL, NULL} /* sentinel */
1500 };
1501
1502
1503 /* Initialization function for the module (*must* be called initbinascii) */
1504 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1505
1506 PyMODINIT_FUNC
initbinascii(void)1507 initbinascii(void)
1508 {
1509 PyObject *m, *d, *x;
1510
1511 /* Create the module and add the functions */
1512 m = Py_InitModule("binascii", binascii_module_methods);
1513 if (m == NULL)
1514 return;
1515
1516 d = PyModule_GetDict(m);
1517 x = PyString_FromString(doc_binascii);
1518 PyDict_SetItemString(d, "__doc__", x);
1519 Py_XDECREF(x);
1520
1521 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1522 PyDict_SetItemString(d, "Error", Error);
1523 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1524 PyDict_SetItemString(d, "Incomplete", Incomplete);
1525 }
1526