1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63
64 static PyObject *Error;
65 static PyObject *Incomplete;
66
67 /*
68 ** hqx lookup table, ascii->binary.
69 */
70
71 #define RUNCHAR 0x90
72
73 #define DONE 0x7F
74 #define SKIP 0x7E
75 #define FAIL 0x7D
76
77 static const unsigned char table_a2b_hqx[256] = {
78 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
79 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /* \b \t \n ^K ^L \r ^N ^O */
81 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
82 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
83 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
84 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
85 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /* ! " # $ % & ' */
87 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
88 /* ( ) * + , - . / */
89 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
90 /* 0 1 2 3 4 5 6 7 */
91 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
92 /* 8 9 : ; < = > ? */
93 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /* @ A B C D E F G */
95 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
96 /* H I J K L M N O */
97 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
98 /* P Q R S T U V W */
99 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
100 /* X Y Z [ \ ] ^ _ */
101 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
102 /* ` a b c d e f g */
103 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
104 /* h i j k l m n o */
105 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
106 /* p q r s t u v w */
107 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
108 /* x y z { | } ~ ^? */
109 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 };
127
128 static const unsigned char table_b2a_hqx[] =
129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
130
131 static const char table_a2b_base64[] = {
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
137 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
140 };
141
142 #define BASE64_PAD '='
143
144 /* Max binary chunk size; limited only by available memory */
145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
146
147 static const unsigned char table_b2a_base64[] =
148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
149
150
151
152 static const unsigned short crctab_hqx[256] = {
153 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
185 };
186
187 /*[clinic input]
188 module binascii
189 [clinic start generated code]*/
190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
191
192 /*[python input]
193
194 class ascii_buffer_converter(CConverter):
195 type = 'Py_buffer'
196 converter = 'ascii_buffer_converter'
197 impl_by_reference = True
198 c_default = "{NULL, NULL}"
199
200 def cleanup(self):
201 name = self.name
202 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
203
204 [python start generated code]*/
205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
206
207 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209 {
210 if (arg == NULL) {
211 PyBuffer_Release(buf);
212 return 1;
213 }
214 if (PyUnicode_Check(arg)) {
215 if (PyUnicode_READY(arg) < 0)
216 return 0;
217 if (!PyUnicode_IS_ASCII(arg)) {
218 PyErr_SetString(PyExc_ValueError,
219 "string argument should contain only ASCII characters");
220 return 0;
221 }
222 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224 buf->len = PyUnicode_GET_LENGTH(arg);
225 buf->obj = NULL;
226 return 1;
227 }
228 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229 PyErr_Format(PyExc_TypeError,
230 "argument should be bytes, buffer or ASCII string, "
231 "not '%.100s'", Py_TYPE(arg)->tp_name);
232 return 0;
233 }
234 if (!PyBuffer_IsContiguous(buf, 'C')) {
235 PyErr_Format(PyExc_TypeError,
236 "argument should be a contiguous buffer, "
237 "not '%.100s'", Py_TYPE(arg)->tp_name);
238 PyBuffer_Release(buf);
239 return 0;
240 }
241 return Py_CLEANUP_SUPPORTED;
242 }
243
244 #include "clinic/binascii.c.h"
245
246 /*[clinic input]
247 binascii.a2b_uu
248
249 data: ascii_buffer
250 /
251
252 Decode a line of uuencoded data.
253 [clinic start generated code]*/
254
255 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
258 {
259 const unsigned char *ascii_data;
260 unsigned char *bin_data;
261 int leftbits = 0;
262 unsigned char this_ch;
263 unsigned int leftchar = 0;
264 PyObject *rv;
265 Py_ssize_t ascii_len, bin_len;
266
267 ascii_data = data->buf;
268 ascii_len = data->len;
269
270 assert(ascii_len >= 0);
271
272 /* First byte: binary data length (in bytes) */
273 bin_len = (*ascii_data++ - ' ') & 077;
274 ascii_len--;
275
276 /* Allocate the buffer */
277 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
278 return NULL;
279 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
280
281 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282 /* XXX is it really best to add NULs if there's no more data */
283 this_ch = (ascii_len > 0) ? *ascii_data : 0;
284 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285 /*
286 ** Whitespace. Assume some spaces got eaten at
287 ** end-of-line. (We check this later)
288 */
289 this_ch = 0;
290 } else {
291 /* Check the character for legality
292 ** The 64 in stead of the expected 63 is because
293 ** there are a few uuencodes out there that use
294 ** '`' as zero instead of space.
295 */
296 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297 PyErr_SetString(Error, "Illegal char");
298 Py_DECREF(rv);
299 return NULL;
300 }
301 this_ch = (this_ch - ' ') & 077;
302 }
303 /*
304 ** Shift it in on the low end, and see if there's
305 ** a byte ready for output.
306 */
307 leftchar = (leftchar << 6) | (this_ch);
308 leftbits += 6;
309 if ( leftbits >= 8 ) {
310 leftbits -= 8;
311 *bin_data++ = (leftchar >> leftbits) & 0xff;
312 leftchar &= ((1 << leftbits) - 1);
313 bin_len--;
314 }
315 }
316 /*
317 ** Finally, check that if there's anything left on the line
318 ** that it's whitespace only.
319 */
320 while( ascii_len-- > 0 ) {
321 this_ch = *ascii_data++;
322 /* Extra '`' may be written as padding in some cases */
323 if ( this_ch != ' ' && this_ch != ' '+64 &&
324 this_ch != '\n' && this_ch != '\r' ) {
325 PyErr_SetString(Error, "Trailing garbage");
326 Py_DECREF(rv);
327 return NULL;
328 }
329 }
330 return rv;
331 }
332
333 /*[clinic input]
334 binascii.b2a_uu
335
336 data: Py_buffer
337 /
338
339 Uuencode line of data.
340 [clinic start generated code]*/
341
342 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data)343 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data)
344 /*[clinic end generated code: output=0070670e52e4aa6b input=00fdf458ce8b465b]*/
345 {
346 unsigned char *ascii_data;
347 const unsigned char *bin_data;
348 int leftbits = 0;
349 unsigned char this_ch;
350 unsigned int leftchar = 0;
351 Py_ssize_t bin_len, out_len;
352 _PyBytesWriter writer;
353
354 _PyBytesWriter_Init(&writer);
355 bin_data = data->buf;
356 bin_len = data->len;
357 if ( bin_len > 45 ) {
358 /* The 45 is a limit that appears in all uuencode's */
359 PyErr_SetString(Error, "At most 45 bytes at once");
360 return NULL;
361 }
362
363 /* We're lazy and allocate to much (fixed up later) */
364 out_len = 2 + (bin_len + 2) / 3 * 4;
365 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
366 if (ascii_data == NULL)
367 return NULL;
368
369 /* Store the length */
370 *ascii_data++ = ' ' + (bin_len & 077);
371
372 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
373 /* Shift the data (or padding) into our buffer */
374 if ( bin_len > 0 ) /* Data */
375 leftchar = (leftchar << 8) | *bin_data;
376 else /* Padding */
377 leftchar <<= 8;
378 leftbits += 8;
379
380 /* See if there are 6-bit groups ready */
381 while ( leftbits >= 6 ) {
382 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
383 leftbits -= 6;
384 *ascii_data++ = this_ch + ' ';
385 }
386 }
387 *ascii_data++ = '\n'; /* Append a courtesy newline */
388
389 return _PyBytesWriter_Finish(&writer, ascii_data);
390 }
391
392
393 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)394 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
395 {
396 /* Finds & returns the (num+1)th
397 ** valid character for base64, or -1 if none.
398 */
399
400 int ret = -1;
401 unsigned char c, b64val;
402
403 while ((slen > 0) && (ret == -1)) {
404 c = *s;
405 b64val = table_a2b_base64[c & 0x7f];
406 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
407 if (num == 0)
408 ret = *s;
409 num--;
410 }
411
412 s++;
413 slen--;
414 }
415 return ret;
416 }
417
418 /*[clinic input]
419 binascii.a2b_base64
420
421 data: ascii_buffer
422 /
423
424 Decode a line of base64 data.
425 [clinic start generated code]*/
426
427 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)428 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
429 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
430 {
431 const unsigned char *ascii_data;
432 unsigned char *bin_data;
433 int leftbits = 0;
434 unsigned char this_ch;
435 unsigned int leftchar = 0;
436 Py_ssize_t ascii_len, bin_len;
437 int quad_pos = 0;
438 _PyBytesWriter writer;
439
440 ascii_data = data->buf;
441 ascii_len = data->len;
442
443 assert(ascii_len >= 0);
444
445 if (ascii_len > PY_SSIZE_T_MAX - 3)
446 return PyErr_NoMemory();
447
448 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
449
450 _PyBytesWriter_Init(&writer);
451
452 /* Allocate the buffer */
453 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
454 if (bin_data == NULL)
455 return NULL;
456
457 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
458 this_ch = *ascii_data;
459
460 if (this_ch > 0x7f ||
461 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
462 continue;
463
464 /* Check for pad sequences and ignore
465 ** the invalid ones.
466 */
467 if (this_ch == BASE64_PAD) {
468 if ( (quad_pos < 2) ||
469 ((quad_pos == 2) &&
470 (binascii_find_valid(ascii_data, ascii_len, 1)
471 != BASE64_PAD)) )
472 {
473 continue;
474 }
475 else {
476 /* A pad sequence means no more input.
477 ** We've already interpreted the data
478 ** from the quad at this point.
479 */
480 leftbits = 0;
481 break;
482 }
483 }
484
485 this_ch = table_a2b_base64[*ascii_data];
486 if ( this_ch == (unsigned char) -1 )
487 continue;
488
489 /*
490 ** Shift it in on the low end, and see if there's
491 ** a byte ready for output.
492 */
493 quad_pos = (quad_pos + 1) & 0x03;
494 leftchar = (leftchar << 6) | (this_ch);
495 leftbits += 6;
496
497 if ( leftbits >= 8 ) {
498 leftbits -= 8;
499 *bin_data++ = (leftchar >> leftbits) & 0xff;
500 leftchar &= ((1 << leftbits) - 1);
501 }
502 }
503
504 if (leftbits != 0) {
505 PyErr_SetString(Error, "Incorrect padding");
506 _PyBytesWriter_Dealloc(&writer);
507 return NULL;
508 }
509
510 return _PyBytesWriter_Finish(&writer, bin_data);
511 }
512
513
514 /*[clinic input]
515 binascii.b2a_base64
516
517 data: Py_buffer
518 *
519 newline: int(c_default="1") = True
520
521 Base64-code line of data.
522 [clinic start generated code]*/
523
524 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)525 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
526 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=7b2ea6fa38d8924c]*/
527 {
528 unsigned char *ascii_data;
529 const unsigned char *bin_data;
530 int leftbits = 0;
531 unsigned char this_ch;
532 unsigned int leftchar = 0;
533 Py_ssize_t bin_len, out_len;
534 _PyBytesWriter writer;
535
536 bin_data = data->buf;
537 bin_len = data->len;
538 _PyBytesWriter_Init(&writer);
539
540 assert(bin_len >= 0);
541
542 if ( bin_len > BASE64_MAXBIN ) {
543 PyErr_SetString(Error, "Too much data for base64 line");
544 return NULL;
545 }
546
547 /* We're lazy and allocate too much (fixed up later).
548 "+2" leaves room for up to two pad characters.
549 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
550 out_len = bin_len*2 + 2;
551 if (newline)
552 out_len++;
553 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
554 if (ascii_data == NULL)
555 return NULL;
556
557 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
558 /* Shift the data into our buffer */
559 leftchar = (leftchar << 8) | *bin_data;
560 leftbits += 8;
561
562 /* See if there are 6-bit groups ready */
563 while ( leftbits >= 6 ) {
564 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
565 leftbits -= 6;
566 *ascii_data++ = table_b2a_base64[this_ch];
567 }
568 }
569 if ( leftbits == 2 ) {
570 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
571 *ascii_data++ = BASE64_PAD;
572 *ascii_data++ = BASE64_PAD;
573 } else if ( leftbits == 4 ) {
574 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
575 *ascii_data++ = BASE64_PAD;
576 }
577 if (newline)
578 *ascii_data++ = '\n'; /* Append a courtesy newline */
579
580 return _PyBytesWriter_Finish(&writer, ascii_data);
581 }
582
583 /*[clinic input]
584 binascii.a2b_hqx
585
586 data: ascii_buffer
587 /
588
589 Decode .hqx coding.
590 [clinic start generated code]*/
591
592 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)593 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
594 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
595 {
596 const unsigned char *ascii_data;
597 unsigned char *bin_data;
598 int leftbits = 0;
599 unsigned char this_ch;
600 unsigned int leftchar = 0;
601 PyObject *res;
602 Py_ssize_t len;
603 int done = 0;
604 _PyBytesWriter writer;
605
606 ascii_data = data->buf;
607 len = data->len;
608 _PyBytesWriter_Init(&writer);
609
610 assert(len >= 0);
611
612 if (len > PY_SSIZE_T_MAX - 2)
613 return PyErr_NoMemory();
614
615 /* Allocate a string that is too big (fixed later)
616 Add two to the initial length to prevent interning which
617 would preclude subsequent resizing. */
618 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
619 if (bin_data == NULL)
620 return NULL;
621
622 for( ; len > 0 ; len--, ascii_data++ ) {
623 /* Get the byte and look it up */
624 this_ch = table_a2b_hqx[*ascii_data];
625 if ( this_ch == SKIP )
626 continue;
627 if ( this_ch == FAIL ) {
628 PyErr_SetString(Error, "Illegal char");
629 _PyBytesWriter_Dealloc(&writer);
630 return NULL;
631 }
632 if ( this_ch == DONE ) {
633 /* The terminating colon */
634 done = 1;
635 break;
636 }
637
638 /* Shift it into the buffer and see if any bytes are ready */
639 leftchar = (leftchar << 6) | (this_ch);
640 leftbits += 6;
641 if ( leftbits >= 8 ) {
642 leftbits -= 8;
643 *bin_data++ = (leftchar >> leftbits) & 0xff;
644 leftchar &= ((1 << leftbits) - 1);
645 }
646 }
647
648 if ( leftbits && !done ) {
649 PyErr_SetString(Incomplete,
650 "String has incomplete number of bytes");
651 _PyBytesWriter_Dealloc(&writer);
652 return NULL;
653 }
654
655 res = _PyBytesWriter_Finish(&writer, bin_data);
656 if (res == NULL)
657 return NULL;
658 return Py_BuildValue("Ni", res, done);
659 }
660
661
662 /*[clinic input]
663 binascii.rlecode_hqx
664
665 data: Py_buffer
666 /
667
668 Binhex RLE-code binary data.
669 [clinic start generated code]*/
670
671 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)672 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
673 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
674 {
675 const unsigned char *in_data;
676 unsigned char *out_data;
677 unsigned char ch;
678 Py_ssize_t in, inend, len;
679 _PyBytesWriter writer;
680
681 _PyBytesWriter_Init(&writer);
682 in_data = data->buf;
683 len = data->len;
684
685 assert(len >= 0);
686
687 if (len > PY_SSIZE_T_MAX / 2 - 2)
688 return PyErr_NoMemory();
689
690 /* Worst case: output is twice as big as input (fixed later) */
691 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
692 if (out_data == NULL)
693 return NULL;
694
695 for( in=0; in<len; in++) {
696 ch = in_data[in];
697 if ( ch == RUNCHAR ) {
698 /* RUNCHAR. Escape it. */
699 *out_data++ = RUNCHAR;
700 *out_data++ = 0;
701 } else {
702 /* Check how many following are the same */
703 for(inend=in+1;
704 inend<len && in_data[inend] == ch &&
705 inend < in+255;
706 inend++) ;
707 if ( inend - in > 3 ) {
708 /* More than 3 in a row. Output RLE. */
709 *out_data++ = ch;
710 *out_data++ = RUNCHAR;
711 *out_data++ = (unsigned char) (inend-in);
712 in = inend-1;
713 } else {
714 /* Less than 3. Output the byte itself */
715 *out_data++ = ch;
716 }
717 }
718 }
719
720 return _PyBytesWriter_Finish(&writer, out_data);
721 }
722
723
724 /*[clinic input]
725 binascii.b2a_hqx
726
727 data: Py_buffer
728 /
729
730 Encode .hqx data.
731 [clinic start generated code]*/
732
733 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)734 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
735 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
736 {
737 unsigned char *ascii_data;
738 const unsigned char *bin_data;
739 int leftbits = 0;
740 unsigned char this_ch;
741 unsigned int leftchar = 0;
742 Py_ssize_t len;
743 _PyBytesWriter writer;
744
745 bin_data = data->buf;
746 len = data->len;
747 _PyBytesWriter_Init(&writer);
748
749 assert(len >= 0);
750
751 if (len > PY_SSIZE_T_MAX / 2 - 2)
752 return PyErr_NoMemory();
753
754 /* Allocate a buffer that is at least large enough */
755 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
756 if (ascii_data == NULL)
757 return NULL;
758
759 for( ; len > 0 ; len--, bin_data++ ) {
760 /* Shift into our buffer, and output any 6bits ready */
761 leftchar = (leftchar << 8) | *bin_data;
762 leftbits += 8;
763 while ( leftbits >= 6 ) {
764 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
765 leftbits -= 6;
766 *ascii_data++ = table_b2a_hqx[this_ch];
767 }
768 }
769 /* Output a possible runt byte */
770 if ( leftbits ) {
771 leftchar <<= (6-leftbits);
772 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
773 }
774
775 return _PyBytesWriter_Finish(&writer, ascii_data);
776 }
777
778
779 /*[clinic input]
780 binascii.rledecode_hqx
781
782 data: Py_buffer
783 /
784
785 Decode hexbin RLE-coded string.
786 [clinic start generated code]*/
787
788 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)789 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
790 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
791 {
792 const unsigned char *in_data;
793 unsigned char *out_data;
794 unsigned char in_byte, in_repeat;
795 Py_ssize_t in_len;
796 _PyBytesWriter writer;
797
798 in_data = data->buf;
799 in_len = data->len;
800 _PyBytesWriter_Init(&writer);
801
802 assert(in_len >= 0);
803
804 /* Empty string is a special case */
805 if ( in_len == 0 )
806 return PyBytes_FromStringAndSize("", 0);
807 else if (in_len > PY_SSIZE_T_MAX / 2)
808 return PyErr_NoMemory();
809
810 /* Allocate a buffer of reasonable size. Resized when needed */
811 out_data = _PyBytesWriter_Alloc(&writer, in_len);
812 if (out_data == NULL)
813 return NULL;
814
815 /* Use overallocation */
816 writer.overallocate = 1;
817
818 /*
819 ** We need two macros here to get/put bytes and handle
820 ** end-of-buffer for input and output strings.
821 */
822 #define INBYTE(b) \
823 do { \
824 if ( --in_len < 0 ) { \
825 PyErr_SetString(Incomplete, ""); \
826 goto error; \
827 } \
828 b = *in_data++; \
829 } while(0)
830
831 /*
832 ** Handle first byte separately (since we have to get angry
833 ** in case of an orphaned RLE code).
834 */
835 INBYTE(in_byte);
836
837 if (in_byte == RUNCHAR) {
838 INBYTE(in_repeat);
839 /* only 1 byte will be written, but 2 bytes were preallocated:
840 subtract 1 byte to prevent overallocation */
841 writer.min_size--;
842
843 if (in_repeat != 0) {
844 /* Note Error, not Incomplete (which is at the end
845 ** of the string only). This is a programmer error.
846 */
847 PyErr_SetString(Error, "Orphaned RLE code at start");
848 goto error;
849 }
850 *out_data++ = RUNCHAR;
851 } else {
852 *out_data++ = in_byte;
853 }
854
855 while( in_len > 0 ) {
856 INBYTE(in_byte);
857
858 if (in_byte == RUNCHAR) {
859 INBYTE(in_repeat);
860 /* only 1 byte will be written, but 2 bytes were preallocated:
861 subtract 1 byte to prevent overallocation */
862 writer.min_size--;
863
864 if ( in_repeat == 0 ) {
865 /* Just an escaped RUNCHAR value */
866 *out_data++ = RUNCHAR;
867 } else {
868 /* Pick up value and output a sequence of it */
869 in_byte = out_data[-1];
870
871 /* enlarge the buffer if needed */
872 if (in_repeat > 1) {
873 /* -1 because we already preallocated 1 byte */
874 out_data = _PyBytesWriter_Prepare(&writer, out_data,
875 in_repeat - 1);
876 if (out_data == NULL)
877 goto error;
878 }
879
880 while ( --in_repeat > 0 )
881 *out_data++ = in_byte;
882 }
883 } else {
884 /* Normal byte */
885 *out_data++ = in_byte;
886 }
887 }
888 return _PyBytesWriter_Finish(&writer, out_data);
889
890 error:
891 _PyBytesWriter_Dealloc(&writer);
892 return NULL;
893 }
894
895
896 /*[clinic input]
897 binascii.crc_hqx -> unsigned_int
898
899 data: Py_buffer
900 crc: unsigned_int(bitwise=True)
901 /
902
903 Compute CRC-CCITT incrementally.
904 [clinic start generated code]*/
905
906 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)907 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
908 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
909 {
910 const unsigned char *bin_data;
911 Py_ssize_t len;
912
913 crc &= 0xffff;
914 bin_data = data->buf;
915 len = data->len;
916
917 while(len-- > 0) {
918 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
919 }
920
921 return crc;
922 }
923
924 #ifndef USE_ZLIB_CRC32
925 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
926 Also known as: ISO 3307
927 **********************************************************************|
928 * *|
929 * Demonstration program to compute the 32-bit CRC used as the frame *|
930 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
931 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
932 * protocol). The 32-bit FCS was added via the Federal Register, *|
933 * 1 June 1982, p.23798. I presume but don't know for certain that *|
934 * this polynomial is or will be included in CCITT V.41, which *|
935 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
936 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
937 * errors by a factor of 10^-5 over 16-bit FCS. *|
938 * *|
939 **********************************************************************|
940
941 Copyright (C) 1986 Gary S. Brown. You may use this program, or
942 code or tables extracted from it, as desired without restriction.
943
944 First, the polynomial itself and its table of feedback terms. The
945 polynomial is
946 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
947 Note that we take it "backwards" and put the highest-order term in
948 the lowest-order bit. The X^32 term is "implied"; the LSB is the
949 X^31 term, etc. The X^0 term (usually shown as "+1") results in
950 the MSB being 1.
951
952 Note that the usual hardware shift register implementation, which
953 is what we're using (we're merely optimizing it by doing eight-bit
954 chunks at a time) shifts bits into the lowest-order term. In our
955 implementation, that means shifting towards the right. Why do we
956 do it this way? Because the calculated CRC must be transmitted in
957 order from highest-order term to lowest-order term. UARTs transmit
958 characters in order from LSB to MSB. By storing the CRC this way,
959 we hand it to the UART in the order low-byte to high-byte; the UART
960 sends each low-bit to hight-bit; and the result is transmission bit
961 by bit from highest- to lowest-order term without requiring any bit
962 shuffling on our part. Reception works similarly.
963
964 The feedback terms table consists of 256, 32-bit entries. Notes:
965
966 1. The table can be generated at runtime if desired; code to do so
967 is shown later. It might not be obvious, but the feedback
968 terms simply represent the results of eight shift/xor opera-
969 tions for all combinations of data and CRC register values.
970
971 2. The CRC accumulation logic is the same for all CRC polynomials,
972 be they sixteen or thirty-two bits wide. You simply choose the
973 appropriate table. Alternatively, because the table can be
974 generated at runtime, you can start by generating the table for
975 the polynomial in question and use exactly the same "updcrc",
976 if your application needn't simultaneously handle two CRC
977 polynomials. (Note, however, that XMODEM is strange.)
978
979 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
980 of course, 32-bit entries work OK if the high 16 bits are zero.
981
982 4. The values must be right-shifted by eight bits by the "updcrc"
983 logic; the shift must be unsigned (bring in zeroes). On some
984 hardware you could probably optimize the shift in assembler by
985 using byte-swap instructions.
986 ********************************************************************/
987
988 static const unsigned int crc_32_tab[256] = {
989 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
990 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
991 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
992 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
993 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
994 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
995 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
996 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
997 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
998 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
999 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1000 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1001 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1002 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1003 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1004 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1005 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1006 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1007 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1008 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1009 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1010 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1011 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1012 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1013 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1014 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1015 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1016 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1017 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1018 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1019 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1020 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1021 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1022 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1023 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1024 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1025 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1026 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1027 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1028 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1029 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1030 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1031 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1032 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1033 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1034 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1035 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1036 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1037 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1038 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1039 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1040 0x2d02ef8dU
1041 };
1042 #endif /* USE_ZLIB_CRC32 */
1043
1044 /*[clinic input]
1045 binascii.crc32 -> unsigned_int
1046
1047 data: Py_buffer
1048 crc: unsigned_int(bitwise=True) = 0
1049 /
1050
1051 Compute CRC-32 incrementally.
1052 [clinic start generated code]*/
1053
1054 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1055 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1056 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1057
1058 #ifdef USE_ZLIB_CRC32
1059 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1060 {
1061 const Byte *buf;
1062 Py_ssize_t len;
1063 int signed_val;
1064
1065 buf = (Byte*)data->buf;
1066 len = data->len;
1067 signed_val = crc32(crc, buf, len);
1068 return (unsigned int)signed_val & 0xffffffffU;
1069 }
1070 #else /* USE_ZLIB_CRC32 */
1071 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1072 const unsigned char *bin_data;
1073 Py_ssize_t len;
1074 unsigned int result;
1075
1076 bin_data = data->buf;
1077 len = data->len;
1078
1079 crc = ~ crc;
1080 while (len-- > 0) {
1081 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1082 /* Note: (crc >> 8) MUST zero fill on left */
1083 }
1084
1085 result = (crc ^ 0xFFFFFFFF);
1086 return result & 0xffffffff;
1087 }
1088 #endif /* USE_ZLIB_CRC32 */
1089
1090 /*[clinic input]
1091 binascii.b2a_hex
1092
1093 data: Py_buffer
1094 /
1095
1096 Hexadecimal representation of binary data.
1097
1098 The return value is a bytes object. This function is also
1099 available as "hexlify()".
1100 [clinic start generated code]*/
1101
1102 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data)1103 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1104 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
1105 {
1106 return _Py_strhex_bytes((const char *)data->buf, data->len);
1107 }
1108
1109 /*[clinic input]
1110 binascii.hexlify = binascii.b2a_hex
1111
1112 Hexadecimal representation of binary data.
1113
1114 The return value is a bytes object.
1115 [clinic start generated code]*/
1116
1117 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data)1118 binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1119 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
1120 {
1121 return _Py_strhex_bytes((const char *)data->buf, data->len);
1122 }
1123
1124 static int
to_int(int c)1125 to_int(int c)
1126 {
1127 if (Py_ISDIGIT(c))
1128 return c - '0';
1129 else {
1130 if (Py_ISUPPER(c))
1131 c = Py_TOLOWER(c);
1132 if (c >= 'a' && c <= 'f')
1133 return c - 'a' + 10;
1134 }
1135 return -1;
1136 }
1137
1138
1139 /*[clinic input]
1140 binascii.a2b_hex
1141
1142 hexstr: ascii_buffer
1143 /
1144
1145 Binary data of hexadecimal representation.
1146
1147 hexstr must contain an even number of hex digits (upper or lower case).
1148 This function is also available as "unhexlify()".
1149 [clinic start generated code]*/
1150
1151 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1152 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1153 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1154 {
1155 const char* argbuf;
1156 Py_ssize_t arglen;
1157 PyObject *retval;
1158 char* retbuf;
1159 Py_ssize_t i, j;
1160
1161 argbuf = hexstr->buf;
1162 arglen = hexstr->len;
1163
1164 assert(arglen >= 0);
1165
1166 /* XXX What should we do about strings with an odd length? Should
1167 * we add an implicit leading zero, or a trailing zero? For now,
1168 * raise an exception.
1169 */
1170 if (arglen % 2) {
1171 PyErr_SetString(Error, "Odd-length string");
1172 return NULL;
1173 }
1174
1175 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1176 if (!retval)
1177 return NULL;
1178 retbuf = PyBytes_AS_STRING(retval);
1179
1180 for (i=j=0; i < arglen; i += 2) {
1181 int top = to_int(Py_CHARMASK(argbuf[i]));
1182 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1183 if (top == -1 || bot == -1) {
1184 PyErr_SetString(Error,
1185 "Non-hexadecimal digit found");
1186 goto finally;
1187 }
1188 retbuf[j++] = (top << 4) + bot;
1189 }
1190 return retval;
1191
1192 finally:
1193 Py_DECREF(retval);
1194 return NULL;
1195 }
1196
1197 /*[clinic input]
1198 binascii.unhexlify = binascii.a2b_hex
1199
1200 Binary data of hexadecimal representation.
1201
1202 hexstr must contain an even number of hex digits (upper or lower case).
1203 [clinic start generated code]*/
1204
1205 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1206 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1207 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1208 {
1209 return binascii_a2b_hex_impl(module, hexstr);
1210 }
1211
1212 static const int table_hex[128] = {
1213 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1214 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1215 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1216 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1217 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1219 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1220 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1221 };
1222
1223 #define hexval(c) table_hex[(unsigned int)(c)]
1224
1225 #define MAXLINESIZE 76
1226
1227
1228 /*[clinic input]
1229 binascii.a2b_qp
1230
1231 data: ascii_buffer
1232 header: int(c_default="0") = False
1233
1234 Decode a string of qp-encoded data.
1235 [clinic start generated code]*/
1236
1237 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1238 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1239 /*[clinic end generated code: output=e99f7846cfb9bc53 input=5187a0d3d8e54f3b]*/
1240 {
1241 Py_ssize_t in, out;
1242 char ch;
1243 const unsigned char *ascii_data;
1244 unsigned char *odata;
1245 Py_ssize_t datalen = 0;
1246 PyObject *rv;
1247
1248 ascii_data = data->buf;
1249 datalen = data->len;
1250
1251 /* We allocate the output same size as input, this is overkill.
1252 * The previous implementation used calloc() so we'll zero out the
1253 * memory here too, since PyMem_Malloc() does not guarantee that.
1254 */
1255 odata = (unsigned char *) PyMem_Malloc(datalen);
1256 if (odata == NULL) {
1257 PyErr_NoMemory();
1258 return NULL;
1259 }
1260 memset(odata, 0, datalen);
1261
1262 in = out = 0;
1263 while (in < datalen) {
1264 if (ascii_data[in] == '=') {
1265 in++;
1266 if (in >= datalen) break;
1267 /* Soft line breaks */
1268 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1269 if (ascii_data[in] != '\n') {
1270 while (in < datalen && ascii_data[in] != '\n') in++;
1271 }
1272 if (in < datalen) in++;
1273 }
1274 else if (ascii_data[in] == '=') {
1275 /* broken case from broken python qp */
1276 odata[out++] = '=';
1277 in++;
1278 }
1279 else if ((in + 1 < datalen) &&
1280 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1281 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1282 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1283 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1284 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1285 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1286 /* hexval */
1287 ch = hexval(ascii_data[in]) << 4;
1288 in++;
1289 ch |= hexval(ascii_data[in]);
1290 in++;
1291 odata[out++] = ch;
1292 }
1293 else {
1294 odata[out++] = '=';
1295 }
1296 }
1297 else if (header && ascii_data[in] == '_') {
1298 odata[out++] = ' ';
1299 in++;
1300 }
1301 else {
1302 odata[out] = ascii_data[in];
1303 in++;
1304 out++;
1305 }
1306 }
1307 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1308 PyMem_Free(odata);
1309 return NULL;
1310 }
1311 PyMem_Free(odata);
1312 return rv;
1313 }
1314
1315 static int
to_hex(unsigned char ch,unsigned char * s)1316 to_hex (unsigned char ch, unsigned char *s)
1317 {
1318 unsigned int uvalue = ch;
1319
1320 s[1] = "0123456789ABCDEF"[uvalue % 16];
1321 uvalue = (uvalue / 16);
1322 s[0] = "0123456789ABCDEF"[uvalue % 16];
1323 return 0;
1324 }
1325
1326 /* XXX: This is ridiculously complicated to be backward compatible
1327 * (mostly) with the quopri module. It doesn't re-create the quopri
1328 * module bug where text ending in CRLF has the CR encoded */
1329
1330 /*[clinic input]
1331 binascii.b2a_qp
1332
1333 data: Py_buffer
1334 quotetabs: int(c_default="0") = False
1335 istext: int(c_default="1") = True
1336 header: int(c_default="0") = False
1337
1338 Encode a string using quoted-printable encoding.
1339
1340 On encoding, when istext is set, newlines are not encoded, and white
1341 space at end of lines is. When istext is not set, \r and \n (CR/LF)
1342 are both encoded. When quotetabs is set, space and tabs are encoded.
1343 [clinic start generated code]*/
1344
1345 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1346 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1347 int istext, int header)
1348 /*[clinic end generated code: output=e9884472ebb1a94c input=7f2a9aaa008e92b2]*/
1349 {
1350 Py_ssize_t in, out;
1351 const unsigned char *databuf;
1352 unsigned char *odata;
1353 Py_ssize_t datalen = 0, odatalen = 0;
1354 PyObject *rv;
1355 unsigned int linelen = 0;
1356 unsigned char ch;
1357 int crlf = 0;
1358 const unsigned char *p;
1359
1360 databuf = data->buf;
1361 datalen = data->len;
1362
1363 /* See if this string is using CRLF line ends */
1364 /* XXX: this function has the side effect of converting all of
1365 * the end of lines to be the same depending on this detection
1366 * here */
1367 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1368 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1369 crlf = 1;
1370
1371 /* First, scan to see how many characters need to be encoded */
1372 in = 0;
1373 while (in < datalen) {
1374 Py_ssize_t delta = 0;
1375 if ((databuf[in] > 126) ||
1376 (databuf[in] == '=') ||
1377 (header && databuf[in] == '_') ||
1378 ((databuf[in] == '.') && (linelen == 0) &&
1379 (in + 1 == datalen || databuf[in+1] == '\n' ||
1380 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1381 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1382 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1383 ((databuf[in] < 33) &&
1384 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1385 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1386 {
1387 if ((linelen + 3) >= MAXLINESIZE) {
1388 linelen = 0;
1389 if (crlf)
1390 delta += 3;
1391 else
1392 delta += 2;
1393 }
1394 linelen += 3;
1395 delta += 3;
1396 in++;
1397 }
1398 else {
1399 if (istext &&
1400 ((databuf[in] == '\n') ||
1401 ((in+1 < datalen) && (databuf[in] == '\r') &&
1402 (databuf[in+1] == '\n'))))
1403 {
1404 linelen = 0;
1405 /* Protect against whitespace on end of line */
1406 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1407 delta += 2;
1408 if (crlf)
1409 delta += 2;
1410 else
1411 delta += 1;
1412 if (databuf[in] == '\r')
1413 in += 2;
1414 else
1415 in++;
1416 }
1417 else {
1418 if ((in + 1 != datalen) &&
1419 (databuf[in+1] != '\n') &&
1420 (linelen + 1) >= MAXLINESIZE) {
1421 linelen = 0;
1422 if (crlf)
1423 delta += 3;
1424 else
1425 delta += 2;
1426 }
1427 linelen++;
1428 delta++;
1429 in++;
1430 }
1431 }
1432 if (PY_SSIZE_T_MAX - delta < odatalen) {
1433 PyErr_NoMemory();
1434 return NULL;
1435 }
1436 odatalen += delta;
1437 }
1438
1439 /* We allocate the output same size as input, this is overkill.
1440 * The previous implementation used calloc() so we'll zero out the
1441 * memory here too, since PyMem_Malloc() does not guarantee that.
1442 */
1443 odata = (unsigned char *) PyMem_Malloc(odatalen);
1444 if (odata == NULL) {
1445 PyErr_NoMemory();
1446 return NULL;
1447 }
1448 memset(odata, 0, odatalen);
1449
1450 in = out = linelen = 0;
1451 while (in < datalen) {
1452 if ((databuf[in] > 126) ||
1453 (databuf[in] == '=') ||
1454 (header && databuf[in] == '_') ||
1455 ((databuf[in] == '.') && (linelen == 0) &&
1456 (in + 1 == datalen || databuf[in+1] == '\n' ||
1457 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1458 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1459 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1460 ((databuf[in] < 33) &&
1461 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1462 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1463 {
1464 if ((linelen + 3 )>= MAXLINESIZE) {
1465 odata[out++] = '=';
1466 if (crlf) odata[out++] = '\r';
1467 odata[out++] = '\n';
1468 linelen = 0;
1469 }
1470 odata[out++] = '=';
1471 to_hex(databuf[in], &odata[out]);
1472 out += 2;
1473 in++;
1474 linelen += 3;
1475 }
1476 else {
1477 if (istext &&
1478 ((databuf[in] == '\n') ||
1479 ((in+1 < datalen) && (databuf[in] == '\r') &&
1480 (databuf[in+1] == '\n'))))
1481 {
1482 linelen = 0;
1483 /* Protect against whitespace on end of line */
1484 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1485 ch = odata[out-1];
1486 odata[out-1] = '=';
1487 to_hex(ch, &odata[out]);
1488 out += 2;
1489 }
1490
1491 if (crlf) odata[out++] = '\r';
1492 odata[out++] = '\n';
1493 if (databuf[in] == '\r')
1494 in += 2;
1495 else
1496 in++;
1497 }
1498 else {
1499 if ((in + 1 != datalen) &&
1500 (databuf[in+1] != '\n') &&
1501 (linelen + 1) >= MAXLINESIZE) {
1502 odata[out++] = '=';
1503 if (crlf) odata[out++] = '\r';
1504 odata[out++] = '\n';
1505 linelen = 0;
1506 }
1507 linelen++;
1508 if (header && databuf[in] == ' ') {
1509 odata[out++] = '_';
1510 in++;
1511 }
1512 else {
1513 odata[out++] = databuf[in++];
1514 }
1515 }
1516 }
1517 }
1518 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1519 PyMem_Free(odata);
1520 return NULL;
1521 }
1522 PyMem_Free(odata);
1523 return rv;
1524 }
1525
1526 /* List of functions defined in the module */
1527
1528 static struct PyMethodDef binascii_module_methods[] = {
1529 BINASCII_A2B_UU_METHODDEF
1530 BINASCII_B2A_UU_METHODDEF
1531 BINASCII_A2B_BASE64_METHODDEF
1532 BINASCII_B2A_BASE64_METHODDEF
1533 BINASCII_A2B_HQX_METHODDEF
1534 BINASCII_B2A_HQX_METHODDEF
1535 BINASCII_A2B_HEX_METHODDEF
1536 BINASCII_B2A_HEX_METHODDEF
1537 BINASCII_HEXLIFY_METHODDEF
1538 BINASCII_UNHEXLIFY_METHODDEF
1539 BINASCII_RLECODE_HQX_METHODDEF
1540 BINASCII_RLEDECODE_HQX_METHODDEF
1541 BINASCII_CRC_HQX_METHODDEF
1542 BINASCII_CRC32_METHODDEF
1543 BINASCII_A2B_QP_METHODDEF
1544 BINASCII_B2A_QP_METHODDEF
1545 {NULL, NULL} /* sentinel */
1546 };
1547
1548
1549 /* Initialization function for the module (*must* be called PyInit_binascii) */
1550 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1551
1552
1553 static struct PyModuleDef binasciimodule = {
1554 PyModuleDef_HEAD_INIT,
1555 "binascii",
1556 doc_binascii,
1557 -1,
1558 binascii_module_methods,
1559 NULL,
1560 NULL,
1561 NULL,
1562 NULL
1563 };
1564
1565 PyMODINIT_FUNC
PyInit_binascii(void)1566 PyInit_binascii(void)
1567 {
1568 PyObject *m, *d;
1569
1570 /* Create the module and add the functions */
1571 m = PyModule_Create(&binasciimodule);
1572 if (m == NULL)
1573 return NULL;
1574
1575 d = PyModule_GetDict(m);
1576
1577 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1578 PyDict_SetItemString(d, "Error", Error);
1579 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1580 PyDict_SetItemString(d, "Incomplete", Incomplete);
1581 if (PyErr_Occurred()) {
1582 Py_DECREF(m);
1583 m = NULL;
1584 }
1585 return m;
1586 }
1587