1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63
64 typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67 } binascii_state;
68
69 static binascii_state *
get_binascii_state(PyObject * module)70 get_binascii_state(PyObject *module)
71 {
72 return (binascii_state *)PyModule_GetState(module);
73 }
74
75 /*
76 ** hqx lookup table, ascii->binary.
77 */
78
79 #define RUNCHAR 0x90
80
81 #define DONE 0x7F
82 #define SKIP 0x7E
83 #define FAIL 0x7D
84
85 static const unsigned char table_a2b_hqx[256] = {
86 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
87 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
88 /* \b \t \n ^K ^L \r ^N ^O */
89 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
90 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
91 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
92 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
93 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /* ! " # $ % & ' */
95 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
96 /* ( ) * + , - . / */
97 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
98 /* 0 1 2 3 4 5 6 7 */
99 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
100 /* 8 9 : ; < = > ? */
101 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
102 /* @ A B C D E F G */
103 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
104 /* H I J K L M N O */
105 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
106 /* P Q R S T U V W */
107 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
108 /* X Y Z [ \ ] ^ _ */
109 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
110 /* ` a b c d e f g */
111 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
112 /* h i j k l m n o */
113 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
114 /* p q r s t u v w */
115 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
116 /* x y z { | } ~ ^? */
117 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
128 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
129 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
130 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
131 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
132 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
133 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
134 };
135
136 static const unsigned char table_b2a_hqx[] =
137 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
138
139 static const unsigned char table_a2b_base64[] = {
140 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
141 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
142 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
143 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
144 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
145 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
146 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
147 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
148
149 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
151 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
152 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
153 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
154 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
155 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
156 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
157 };
158
159 #define BASE64_PAD '='
160
161 /* Max binary chunk size; limited only by available memory */
162 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
163
164 static const unsigned char table_b2a_base64[] =
165 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
166
167
168
169 static const unsigned short crctab_hqx[256] = {
170 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
171 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
172 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
173 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
174 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
175 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
176 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
177 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
178 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
179 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
180 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
181 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
182 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
183 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
184 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
185 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
186 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
187 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
188 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
189 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
190 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
191 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
192 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
193 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
194 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
195 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
196 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
197 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
198 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
199 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
200 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
201 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
202 };
203
204 /*[clinic input]
205 module binascii
206 [clinic start generated code]*/
207 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
208
209 /*[python input]
210
211 class ascii_buffer_converter(CConverter):
212 type = 'Py_buffer'
213 converter = 'ascii_buffer_converter'
214 impl_by_reference = True
215 c_default = "{NULL, NULL}"
216
217 def cleanup(self):
218 name = self.name
219 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
220
221 [python start generated code]*/
222 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
223
224 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)225 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
226 {
227 if (arg == NULL) {
228 PyBuffer_Release(buf);
229 return 1;
230 }
231 if (PyUnicode_Check(arg)) {
232 if (PyUnicode_READY(arg) < 0)
233 return 0;
234 if (!PyUnicode_IS_ASCII(arg)) {
235 PyErr_SetString(PyExc_ValueError,
236 "string argument should contain only ASCII characters");
237 return 0;
238 }
239 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
240 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
241 buf->len = PyUnicode_GET_LENGTH(arg);
242 buf->obj = NULL;
243 return 1;
244 }
245 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
246 PyErr_Format(PyExc_TypeError,
247 "argument should be bytes, buffer or ASCII string, "
248 "not '%.100s'", Py_TYPE(arg)->tp_name);
249 return 0;
250 }
251 if (!PyBuffer_IsContiguous(buf, 'C')) {
252 PyErr_Format(PyExc_TypeError,
253 "argument should be a contiguous buffer, "
254 "not '%.100s'", Py_TYPE(arg)->tp_name);
255 PyBuffer_Release(buf);
256 return 0;
257 }
258 return Py_CLEANUP_SUPPORTED;
259 }
260
261 #include "clinic/binascii.c.h"
262
263 /*[clinic input]
264 binascii.a2b_uu
265
266 data: ascii_buffer
267 /
268
269 Decode a line of uuencoded data.
270 [clinic start generated code]*/
271
272 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)273 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
274 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
275 {
276 const unsigned char *ascii_data;
277 unsigned char *bin_data;
278 int leftbits = 0;
279 unsigned char this_ch;
280 unsigned int leftchar = 0;
281 PyObject *rv;
282 Py_ssize_t ascii_len, bin_len;
283 binascii_state *state;
284
285 ascii_data = data->buf;
286 ascii_len = data->len;
287
288 assert(ascii_len >= 0);
289
290 /* First byte: binary data length (in bytes) */
291 bin_len = (*ascii_data++ - ' ') & 077;
292 ascii_len--;
293
294 /* Allocate the buffer */
295 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
296 return NULL;
297 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
298
299 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
300 /* XXX is it really best to add NULs if there's no more data */
301 this_ch = (ascii_len > 0) ? *ascii_data : 0;
302 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
303 /*
304 ** Whitespace. Assume some spaces got eaten at
305 ** end-of-line. (We check this later)
306 */
307 this_ch = 0;
308 } else {
309 /* Check the character for legality
310 ** The 64 in stead of the expected 63 is because
311 ** there are a few uuencodes out there that use
312 ** '`' as zero instead of space.
313 */
314 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
315 state = PyModule_GetState(module);
316 if (state == NULL) {
317 return NULL;
318 }
319 PyErr_SetString(state->Error, "Illegal char");
320 Py_DECREF(rv);
321 return NULL;
322 }
323 this_ch = (this_ch - ' ') & 077;
324 }
325 /*
326 ** Shift it in on the low end, and see if there's
327 ** a byte ready for output.
328 */
329 leftchar = (leftchar << 6) | (this_ch);
330 leftbits += 6;
331 if ( leftbits >= 8 ) {
332 leftbits -= 8;
333 *bin_data++ = (leftchar >> leftbits) & 0xff;
334 leftchar &= ((1 << leftbits) - 1);
335 bin_len--;
336 }
337 }
338 /*
339 ** Finally, check that if there's anything left on the line
340 ** that it's whitespace only.
341 */
342 while( ascii_len-- > 0 ) {
343 this_ch = *ascii_data++;
344 /* Extra '`' may be written as padding in some cases */
345 if ( this_ch != ' ' && this_ch != ' '+64 &&
346 this_ch != '\n' && this_ch != '\r' ) {
347 state = PyModule_GetState(module);
348 if (state == NULL) {
349 return NULL;
350 }
351 PyErr_SetString(state->Error, "Trailing garbage");
352 Py_DECREF(rv);
353 return NULL;
354 }
355 }
356 return rv;
357 }
358
359 /*[clinic input]
360 binascii.b2a_uu
361
362 data: Py_buffer
363 /
364 *
365 backtick: bool(accept={int}) = False
366
367 Uuencode line of data.
368 [clinic start generated code]*/
369
370 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)371 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
372 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
373 {
374 unsigned char *ascii_data;
375 const unsigned char *bin_data;
376 int leftbits = 0;
377 unsigned char this_ch;
378 unsigned int leftchar = 0;
379 binascii_state *state;
380 Py_ssize_t bin_len, out_len;
381 _PyBytesWriter writer;
382
383 _PyBytesWriter_Init(&writer);
384 bin_data = data->buf;
385 bin_len = data->len;
386 if ( bin_len > 45 ) {
387 /* The 45 is a limit that appears in all uuencode's */
388 state = PyModule_GetState(module);
389 if (state == NULL) {
390 return NULL;
391 }
392 PyErr_SetString(state->Error, "At most 45 bytes at once");
393 return NULL;
394 }
395
396 /* We're lazy and allocate to much (fixed up later) */
397 out_len = 2 + (bin_len + 2) / 3 * 4;
398 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
399 if (ascii_data == NULL)
400 return NULL;
401
402 /* Store the length */
403 if (backtick && !bin_len)
404 *ascii_data++ = '`';
405 else
406 *ascii_data++ = ' ' + (unsigned char)bin_len;
407
408 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
409 /* Shift the data (or padding) into our buffer */
410 if ( bin_len > 0 ) /* Data */
411 leftchar = (leftchar << 8) | *bin_data;
412 else /* Padding */
413 leftchar <<= 8;
414 leftbits += 8;
415
416 /* See if there are 6-bit groups ready */
417 while ( leftbits >= 6 ) {
418 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
419 leftbits -= 6;
420 if (backtick && !this_ch)
421 *ascii_data++ = '`';
422 else
423 *ascii_data++ = this_ch + ' ';
424 }
425 }
426 *ascii_data++ = '\n'; /* Append a courtesy newline */
427
428 return _PyBytesWriter_Finish(&writer, ascii_data);
429 }
430
431 /*[clinic input]
432 binascii.a2b_base64
433
434 data: ascii_buffer
435 /
436
437 Decode a line of base64 data.
438 [clinic start generated code]*/
439
440 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)441 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
442 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
443 {
444 assert(data->len >= 0);
445
446 const unsigned char *ascii_data = data->buf;
447 size_t ascii_len = data->len;
448
449 /* Allocate the buffer */
450 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
451 _PyBytesWriter writer;
452 _PyBytesWriter_Init(&writer);
453 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
454 if (bin_data == NULL)
455 return NULL;
456 unsigned char *bin_data_start = bin_data;
457
458 int quad_pos = 0;
459 unsigned char leftchar = 0;
460 int pads = 0;
461 for (size_t i = 0; i < ascii_len; i++) {
462 unsigned char this_ch = ascii_data[i];
463
464 /* Check for pad sequences and ignore
465 ** the invalid ones.
466 */
467 if (this_ch == BASE64_PAD) {
468 if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
469 /* A pad sequence means no more input.
470 ** We've already interpreted the data
471 ** from the quad at this point.
472 */
473 goto done;
474 }
475 continue;
476 }
477
478 this_ch = table_a2b_base64[this_ch];
479 if (this_ch >= 64) {
480 continue;
481 }
482 pads = 0;
483
484 switch (quad_pos) {
485 case 0:
486 quad_pos = 1;
487 leftchar = this_ch;
488 break;
489 case 1:
490 quad_pos = 2;
491 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
492 leftchar = this_ch & 0x0f;
493 break;
494 case 2:
495 quad_pos = 3;
496 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
497 leftchar = this_ch & 0x03;
498 break;
499 case 3:
500 quad_pos = 0;
501 *bin_data++ = (leftchar << 6) | (this_ch);
502 leftchar = 0;
503 break;
504 }
505 }
506
507 if (quad_pos != 0) {
508 binascii_state *state = PyModule_GetState(module);
509 if (state == NULL) {
510 /* error already set, from PyModule_GetState */
511 } else if (quad_pos == 1) {
512 /*
513 ** There is exactly one extra valid, non-padding, base64 character.
514 ** This is an invalid length, as there is no possible input that
515 ** could encoded into such a base64 string.
516 */
517 PyErr_Format(state->Error,
518 "Invalid base64-encoded string: "
519 "number of data characters (%zd) cannot be 1 more "
520 "than a multiple of 4",
521 (bin_data - bin_data_start) / 3 * 4 + 1);
522 } else {
523 PyErr_SetString(state->Error, "Incorrect padding");
524 }
525 _PyBytesWriter_Dealloc(&writer);
526 return NULL;
527 }
528
529 done:
530 return _PyBytesWriter_Finish(&writer, bin_data);
531 }
532
533
534 /*[clinic input]
535 binascii.b2a_base64
536
537 data: Py_buffer
538 /
539 *
540 newline: bool(accept={int}) = True
541
542 Base64-code line of data.
543 [clinic start generated code]*/
544
545 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)546 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
547 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
548 {
549 unsigned char *ascii_data;
550 const unsigned char *bin_data;
551 int leftbits = 0;
552 unsigned char this_ch;
553 unsigned int leftchar = 0;
554 Py_ssize_t bin_len, out_len;
555 _PyBytesWriter writer;
556 binascii_state *state;
557
558 bin_data = data->buf;
559 bin_len = data->len;
560 _PyBytesWriter_Init(&writer);
561
562 assert(bin_len >= 0);
563
564 if ( bin_len > BASE64_MAXBIN ) {
565 state = PyModule_GetState(module);
566 if (state == NULL) {
567 return NULL;
568 }
569 PyErr_SetString(state->Error, "Too much data for base64 line");
570 return NULL;
571 }
572
573 /* We're lazy and allocate too much (fixed up later).
574 "+2" leaves room for up to two pad characters.
575 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
576 out_len = bin_len*2 + 2;
577 if (newline)
578 out_len++;
579 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
580 if (ascii_data == NULL)
581 return NULL;
582
583 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
584 /* Shift the data into our buffer */
585 leftchar = (leftchar << 8) | *bin_data;
586 leftbits += 8;
587
588 /* See if there are 6-bit groups ready */
589 while ( leftbits >= 6 ) {
590 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
591 leftbits -= 6;
592 *ascii_data++ = table_b2a_base64[this_ch];
593 }
594 }
595 if ( leftbits == 2 ) {
596 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
597 *ascii_data++ = BASE64_PAD;
598 *ascii_data++ = BASE64_PAD;
599 } else if ( leftbits == 4 ) {
600 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
601 *ascii_data++ = BASE64_PAD;
602 }
603 if (newline)
604 *ascii_data++ = '\n'; /* Append a courtesy newline */
605
606 return _PyBytesWriter_Finish(&writer, ascii_data);
607 }
608
609 /*[clinic input]
610 binascii.a2b_hqx
611
612 data: ascii_buffer
613 /
614
615 Decode .hqx coding.
616 [clinic start generated code]*/
617
618 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)619 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
620 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
621 {
622 if (PyErr_WarnEx(PyExc_DeprecationWarning,
623 "binascii.a2b_hqx() is deprecated", 1) < 0) {
624 return NULL;
625 }
626
627 const unsigned char *ascii_data;
628 unsigned char *bin_data;
629 int leftbits = 0;
630 unsigned char this_ch;
631 unsigned int leftchar = 0;
632 PyObject *res;
633 Py_ssize_t len;
634 int done = 0;
635 _PyBytesWriter writer;
636 binascii_state *state;
637
638 ascii_data = data->buf;
639 len = data->len;
640 _PyBytesWriter_Init(&writer);
641
642 assert(len >= 0);
643
644 if (len > PY_SSIZE_T_MAX - 2)
645 return PyErr_NoMemory();
646
647 /* Allocate a string that is too big (fixed later)
648 Add two to the initial length to prevent interning which
649 would preclude subsequent resizing. */
650 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
651 if (bin_data == NULL)
652 return NULL;
653
654 for( ; len > 0 ; len--, ascii_data++ ) {
655 /* Get the byte and look it up */
656 this_ch = table_a2b_hqx[*ascii_data];
657 if ( this_ch == SKIP )
658 continue;
659 if ( this_ch == FAIL ) {
660 state = PyModule_GetState(module);
661 if (state == NULL) {
662 return NULL;
663 }
664 PyErr_SetString(state->Error, "Illegal char");
665 _PyBytesWriter_Dealloc(&writer);
666 return NULL;
667 }
668 if ( this_ch == DONE ) {
669 /* The terminating colon */
670 done = 1;
671 break;
672 }
673
674 /* Shift it into the buffer and see if any bytes are ready */
675 leftchar = (leftchar << 6) | (this_ch);
676 leftbits += 6;
677 if ( leftbits >= 8 ) {
678 leftbits -= 8;
679 *bin_data++ = (leftchar >> leftbits) & 0xff;
680 leftchar &= ((1 << leftbits) - 1);
681 }
682 }
683
684 if ( leftbits && !done ) {
685 state = PyModule_GetState(module);
686 if (state == NULL) {
687 return NULL;
688 }
689 PyErr_SetString(state->Incomplete,
690 "String has incomplete number of bytes");
691 _PyBytesWriter_Dealloc(&writer);
692 return NULL;
693 }
694
695 res = _PyBytesWriter_Finish(&writer, bin_data);
696 if (res == NULL)
697 return NULL;
698 return Py_BuildValue("Ni", res, done);
699 }
700
701
702 /*[clinic input]
703 binascii.rlecode_hqx
704
705 data: Py_buffer
706 /
707
708 Binhex RLE-code binary data.
709 [clinic start generated code]*/
710
711 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)712 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
713 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
714 {
715 if (PyErr_WarnEx(PyExc_DeprecationWarning,
716 "binascii.rlecode_hqx() is deprecated", 1) < 0) {
717 return NULL;
718 }
719
720 const unsigned char *in_data;
721 unsigned char *out_data;
722 unsigned char ch;
723 Py_ssize_t in, inend, len;
724 _PyBytesWriter writer;
725
726 _PyBytesWriter_Init(&writer);
727 in_data = data->buf;
728 len = data->len;
729
730 assert(len >= 0);
731
732 if (len > PY_SSIZE_T_MAX / 2 - 2)
733 return PyErr_NoMemory();
734
735 /* Worst case: output is twice as big as input (fixed later) */
736 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
737 if (out_data == NULL)
738 return NULL;
739
740 for( in=0; in<len; in++) {
741 ch = in_data[in];
742 if ( ch == RUNCHAR ) {
743 /* RUNCHAR. Escape it. */
744 *out_data++ = RUNCHAR;
745 *out_data++ = 0;
746 } else {
747 /* Check how many following are the same */
748 for(inend=in+1;
749 inend<len && in_data[inend] == ch &&
750 inend < in+255;
751 inend++) ;
752 if ( inend - in > 3 ) {
753 /* More than 3 in a row. Output RLE. */
754 *out_data++ = ch;
755 *out_data++ = RUNCHAR;
756 *out_data++ = (unsigned char) (inend-in);
757 in = inend-1;
758 } else {
759 /* Less than 3. Output the byte itself */
760 *out_data++ = ch;
761 }
762 }
763 }
764
765 return _PyBytesWriter_Finish(&writer, out_data);
766 }
767
768
769 /*[clinic input]
770 binascii.b2a_hqx
771
772 data: Py_buffer
773 /
774
775 Encode .hqx data.
776 [clinic start generated code]*/
777
778 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)779 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
780 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
781 {
782 if (PyErr_WarnEx(PyExc_DeprecationWarning,
783 "binascii.b2a_hqx() is deprecated", 1) < 0) {
784 return NULL;
785 }
786
787 unsigned char *ascii_data;
788 const unsigned char *bin_data;
789 int leftbits = 0;
790 unsigned char this_ch;
791 unsigned int leftchar = 0;
792 Py_ssize_t len;
793 _PyBytesWriter writer;
794
795 bin_data = data->buf;
796 len = data->len;
797 _PyBytesWriter_Init(&writer);
798
799 assert(len >= 0);
800
801 if (len > PY_SSIZE_T_MAX / 2 - 2)
802 return PyErr_NoMemory();
803
804 /* Allocate a buffer that is at least large enough */
805 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
806 if (ascii_data == NULL)
807 return NULL;
808
809 for( ; len > 0 ; len--, bin_data++ ) {
810 /* Shift into our buffer, and output any 6bits ready */
811 leftchar = (leftchar << 8) | *bin_data;
812 leftbits += 8;
813 while ( leftbits >= 6 ) {
814 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
815 leftbits -= 6;
816 *ascii_data++ = table_b2a_hqx[this_ch];
817 }
818 }
819 /* Output a possible runt byte */
820 if ( leftbits ) {
821 leftchar <<= (6-leftbits);
822 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
823 }
824
825 return _PyBytesWriter_Finish(&writer, ascii_data);
826 }
827
828
829 /*[clinic input]
830 binascii.rledecode_hqx
831
832 data: Py_buffer
833 /
834
835 Decode hexbin RLE-coded string.
836 [clinic start generated code]*/
837
838 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)839 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
840 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
841 {
842 if (PyErr_WarnEx(PyExc_DeprecationWarning,
843 "binascii.rledecode_hqx() is deprecated", 1) < 0) {
844 return NULL;
845 }
846
847 const unsigned char *in_data;
848 unsigned char *out_data;
849 unsigned char in_byte, in_repeat;
850 Py_ssize_t in_len;
851 _PyBytesWriter writer;
852
853 in_data = data->buf;
854 in_len = data->len;
855 _PyBytesWriter_Init(&writer);
856 binascii_state *state;
857
858 assert(in_len >= 0);
859
860 /* Empty string is a special case */
861 if ( in_len == 0 )
862 return PyBytes_FromStringAndSize("", 0);
863 else if (in_len > PY_SSIZE_T_MAX / 2)
864 return PyErr_NoMemory();
865
866 /* Allocate a buffer of reasonable size. Resized when needed */
867 out_data = _PyBytesWriter_Alloc(&writer, in_len);
868 if (out_data == NULL)
869 return NULL;
870
871 /* Use overallocation */
872 writer.overallocate = 1;
873
874 /*
875 ** We need two macros here to get/put bytes and handle
876 ** end-of-buffer for input and output strings.
877 */
878 #define INBYTE(b) \
879 do { \
880 if ( --in_len < 0 ) { \
881 state = PyModule_GetState(module); \
882 if (state == NULL) { \
883 return NULL; \
884 } \
885 PyErr_SetString(state->Incomplete, ""); \
886 goto error; \
887 } \
888 b = *in_data++; \
889 } while(0)
890
891 /*
892 ** Handle first byte separately (since we have to get angry
893 ** in case of an orphaned RLE code).
894 */
895 INBYTE(in_byte);
896
897 if (in_byte == RUNCHAR) {
898 INBYTE(in_repeat);
899 /* only 1 byte will be written, but 2 bytes were preallocated:
900 subtract 1 byte to prevent overallocation */
901 writer.min_size--;
902
903 if (in_repeat != 0) {
904 /* Note Error, not Incomplete (which is at the end
905 ** of the string only). This is a programmer error.
906 */
907 state = PyModule_GetState(module);
908 if (state == NULL) {
909 return NULL;
910 }
911 PyErr_SetString(state->Error, "Orphaned RLE code at start");
912 goto error;
913 }
914 *out_data++ = RUNCHAR;
915 } else {
916 *out_data++ = in_byte;
917 }
918
919 while( in_len > 0 ) {
920 INBYTE(in_byte);
921
922 if (in_byte == RUNCHAR) {
923 INBYTE(in_repeat);
924 /* only 1 byte will be written, but 2 bytes were preallocated:
925 subtract 1 byte to prevent overallocation */
926 writer.min_size--;
927
928 if ( in_repeat == 0 ) {
929 /* Just an escaped RUNCHAR value */
930 *out_data++ = RUNCHAR;
931 } else {
932 /* Pick up value and output a sequence of it */
933 in_byte = out_data[-1];
934
935 /* enlarge the buffer if needed */
936 if (in_repeat > 1) {
937 /* -1 because we already preallocated 1 byte */
938 out_data = _PyBytesWriter_Prepare(&writer, out_data,
939 in_repeat - 1);
940 if (out_data == NULL)
941 goto error;
942 }
943
944 while ( --in_repeat > 0 )
945 *out_data++ = in_byte;
946 }
947 } else {
948 /* Normal byte */
949 *out_data++ = in_byte;
950 }
951 }
952 return _PyBytesWriter_Finish(&writer, out_data);
953
954 error:
955 _PyBytesWriter_Dealloc(&writer);
956 return NULL;
957 }
958
959
960 /*[clinic input]
961 binascii.crc_hqx
962
963 data: Py_buffer
964 crc: unsigned_int(bitwise=True)
965 /
966
967 Compute CRC-CCITT incrementally.
968 [clinic start generated code]*/
969
970 static PyObject *
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)971 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
972 /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
973 {
974 const unsigned char *bin_data;
975 Py_ssize_t len;
976
977 crc &= 0xffff;
978 bin_data = data->buf;
979 len = data->len;
980
981 while(len-- > 0) {
982 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
983 }
984
985 return PyLong_FromUnsignedLong(crc);
986 }
987
988 #ifndef USE_ZLIB_CRC32
989 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
990 Also known as: ISO 3307
991 **********************************************************************|
992 * *|
993 * Demonstration program to compute the 32-bit CRC used as the frame *|
994 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
995 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
996 * protocol). The 32-bit FCS was added via the Federal Register, *|
997 * 1 June 1982, p.23798. I presume but don't know for certain that *|
998 * this polynomial is or will be included in CCITT V.41, which *|
999 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
1000 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1001 * errors by a factor of 10^-5 over 16-bit FCS. *|
1002 * *|
1003 **********************************************************************|
1004
1005 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1006 code or tables extracted from it, as desired without restriction.
1007
1008 First, the polynomial itself and its table of feedback terms. The
1009 polynomial is
1010 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1011 Note that we take it "backwards" and put the highest-order term in
1012 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1013 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1014 the MSB being 1.
1015
1016 Note that the usual hardware shift register implementation, which
1017 is what we're using (we're merely optimizing it by doing eight-bit
1018 chunks at a time) shifts bits into the lowest-order term. In our
1019 implementation, that means shifting towards the right. Why do we
1020 do it this way? Because the calculated CRC must be transmitted in
1021 order from highest-order term to lowest-order term. UARTs transmit
1022 characters in order from LSB to MSB. By storing the CRC this way,
1023 we hand it to the UART in the order low-byte to high-byte; the UART
1024 sends each low-bit to hight-bit; and the result is transmission bit
1025 by bit from highest- to lowest-order term without requiring any bit
1026 shuffling on our part. Reception works similarly.
1027
1028 The feedback terms table consists of 256, 32-bit entries. Notes:
1029
1030 1. The table can be generated at runtime if desired; code to do so
1031 is shown later. It might not be obvious, but the feedback
1032 terms simply represent the results of eight shift/xor opera-
1033 tions for all combinations of data and CRC register values.
1034
1035 2. The CRC accumulation logic is the same for all CRC polynomials,
1036 be they sixteen or thirty-two bits wide. You simply choose the
1037 appropriate table. Alternatively, because the table can be
1038 generated at runtime, you can start by generating the table for
1039 the polynomial in question and use exactly the same "updcrc",
1040 if your application needn't simultaneously handle two CRC
1041 polynomials. (Note, however, that XMODEM is strange.)
1042
1043 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1044 of course, 32-bit entries work OK if the high 16 bits are zero.
1045
1046 4. The values must be right-shifted by eight bits by the "updcrc"
1047 logic; the shift must be unsigned (bring in zeroes). On some
1048 hardware you could probably optimize the shift in assembler by
1049 using byte-swap instructions.
1050 ********************************************************************/
1051
1052 static const unsigned int crc_32_tab[256] = {
1053 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
1054 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
1055 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
1056 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
1057 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
1058 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
1059 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
1060 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
1061 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
1062 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
1063 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1064 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1065 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1066 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1067 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1068 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1069 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1070 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1071 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1072 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1073 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1074 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1075 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1076 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1077 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1078 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1079 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1080 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1081 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1082 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1083 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1084 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1085 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1086 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1087 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1088 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1089 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1090 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1091 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1092 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1093 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1094 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1095 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1096 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1097 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1098 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1099 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1100 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1101 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1102 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1103 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1104 0x2d02ef8dU
1105 };
1106 #endif /* USE_ZLIB_CRC32 */
1107
1108 /*[clinic input]
1109 binascii.crc32 -> unsigned_int
1110
1111 data: Py_buffer
1112 crc: unsigned_int(bitwise=True) = 0
1113 /
1114
1115 Compute CRC-32 incrementally.
1116 [clinic start generated code]*/
1117
1118 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1119 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1120 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1121
1122 #ifdef USE_ZLIB_CRC32
1123 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1124 {
1125 const Byte *buf;
1126 Py_ssize_t len;
1127 int signed_val;
1128
1129 buf = (Byte*)data->buf;
1130 len = data->len;
1131 signed_val = crc32(crc, buf, len);
1132 return (unsigned int)signed_val & 0xffffffffU;
1133 }
1134 #else /* USE_ZLIB_CRC32 */
1135 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1136 const unsigned char *bin_data;
1137 Py_ssize_t len;
1138 unsigned int result;
1139
1140 bin_data = data->buf;
1141 len = data->len;
1142
1143 crc = ~ crc;
1144 while (len-- > 0) {
1145 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1146 /* Note: (crc >> 8) MUST zero fill on left */
1147 }
1148
1149 result = (crc ^ 0xFFFFFFFF);
1150 return result & 0xffffffff;
1151 }
1152 #endif /* USE_ZLIB_CRC32 */
1153
1154 /*[clinic input]
1155 binascii.b2a_hex
1156
1157 data: Py_buffer
1158 sep: object = NULL
1159 An optional single character or byte to separate hex bytes.
1160 bytes_per_sep: int = 1
1161 How many bytes between separators. Positive values count from the
1162 right, negative values count from the left.
1163
1164 Hexadecimal representation of binary data.
1165
1166 The return value is a bytes object. This function is also
1167 available as "hexlify()".
1168
1169 Example:
1170 >>> binascii.b2a_hex(b'\xb9\x01\xef')
1171 b'b901ef'
1172 >>> binascii.hexlify(b'\xb9\x01\xef', ':')
1173 b'b9:01:ef'
1174 >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1175 b'b9_01ef'
1176 [clinic start generated code]*/
1177
1178 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1179 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1180 int bytes_per_sep)
1181 /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
1182 {
1183 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1184 sep, bytes_per_sep);
1185 }
1186
1187 /*[clinic input]
1188 binascii.hexlify = binascii.b2a_hex
1189
1190 Hexadecimal representation of binary data.
1191
1192 The return value is a bytes object. This function is also
1193 available as "b2a_hex()".
1194 [clinic start generated code]*/
1195
1196 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1197 binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1198 int bytes_per_sep)
1199 /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
1200 {
1201 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1202 sep, bytes_per_sep);
1203 }
1204
1205 /*[clinic input]
1206 binascii.a2b_hex
1207
1208 hexstr: ascii_buffer
1209 /
1210
1211 Binary data of hexadecimal representation.
1212
1213 hexstr must contain an even number of hex digits (upper or lower case).
1214 This function is also available as "unhexlify()".
1215 [clinic start generated code]*/
1216
1217 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1218 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1219 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1220 {
1221 const char* argbuf;
1222 Py_ssize_t arglen;
1223 PyObject *retval;
1224 char* retbuf;
1225 Py_ssize_t i, j;
1226 binascii_state *state;
1227
1228 argbuf = hexstr->buf;
1229 arglen = hexstr->len;
1230
1231 assert(arglen >= 0);
1232
1233 /* XXX What should we do about strings with an odd length? Should
1234 * we add an implicit leading zero, or a trailing zero? For now,
1235 * raise an exception.
1236 */
1237 if (arglen % 2) {
1238 state = PyModule_GetState(module);
1239 if (state == NULL) {
1240 return NULL;
1241 }
1242 PyErr_SetString(state->Error, "Odd-length string");
1243 return NULL;
1244 }
1245
1246 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1247 if (!retval)
1248 return NULL;
1249 retbuf = PyBytes_AS_STRING(retval);
1250
1251 for (i=j=0; i < arglen; i += 2) {
1252 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1253 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1254 if (top >= 16 || bot >= 16) {
1255 state = PyModule_GetState(module);
1256 if (state == NULL) {
1257 return NULL;
1258 }
1259 PyErr_SetString(state->Error,
1260 "Non-hexadecimal digit found");
1261 goto finally;
1262 }
1263 retbuf[j++] = (top << 4) + bot;
1264 }
1265 return retval;
1266
1267 finally:
1268 Py_DECREF(retval);
1269 return NULL;
1270 }
1271
1272 /*[clinic input]
1273 binascii.unhexlify = binascii.a2b_hex
1274
1275 Binary data of hexadecimal representation.
1276
1277 hexstr must contain an even number of hex digits (upper or lower case).
1278 [clinic start generated code]*/
1279
1280 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1281 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1282 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1283 {
1284 return binascii_a2b_hex_impl(module, hexstr);
1285 }
1286
1287 #define MAXLINESIZE 76
1288
1289
1290 /*[clinic input]
1291 binascii.a2b_qp
1292
1293 data: ascii_buffer
1294 header: bool(accept={int}) = False
1295
1296 Decode a string of qp-encoded data.
1297 [clinic start generated code]*/
1298
1299 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1300 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1301 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1302 {
1303 Py_ssize_t in, out;
1304 char ch;
1305 const unsigned char *ascii_data;
1306 unsigned char *odata;
1307 Py_ssize_t datalen = 0;
1308 PyObject *rv;
1309
1310 ascii_data = data->buf;
1311 datalen = data->len;
1312
1313 /* We allocate the output same size as input, this is overkill.
1314 */
1315 odata = (unsigned char *) PyMem_Calloc(1, datalen);
1316 if (odata == NULL) {
1317 PyErr_NoMemory();
1318 return NULL;
1319 }
1320
1321 in = out = 0;
1322 while (in < datalen) {
1323 if (ascii_data[in] == '=') {
1324 in++;
1325 if (in >= datalen) break;
1326 /* Soft line breaks */
1327 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1328 if (ascii_data[in] != '\n') {
1329 while (in < datalen && ascii_data[in] != '\n') in++;
1330 }
1331 if (in < datalen) in++;
1332 }
1333 else if (ascii_data[in] == '=') {
1334 /* broken case from broken python qp */
1335 odata[out++] = '=';
1336 in++;
1337 }
1338 else if ((in + 1 < datalen) &&
1339 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1340 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1341 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1342 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1343 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1344 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1345 /* hexval */
1346 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1347 in++;
1348 ch |= _PyLong_DigitValue[ascii_data[in]];
1349 in++;
1350 odata[out++] = ch;
1351 }
1352 else {
1353 odata[out++] = '=';
1354 }
1355 }
1356 else if (header && ascii_data[in] == '_') {
1357 odata[out++] = ' ';
1358 in++;
1359 }
1360 else {
1361 odata[out] = ascii_data[in];
1362 in++;
1363 out++;
1364 }
1365 }
1366 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1367 PyMem_Free(odata);
1368 return NULL;
1369 }
1370 PyMem_Free(odata);
1371 return rv;
1372 }
1373
1374 static int
to_hex(unsigned char ch,unsigned char * s)1375 to_hex (unsigned char ch, unsigned char *s)
1376 {
1377 unsigned int uvalue = ch;
1378
1379 s[1] = "0123456789ABCDEF"[uvalue % 16];
1380 uvalue = (uvalue / 16);
1381 s[0] = "0123456789ABCDEF"[uvalue % 16];
1382 return 0;
1383 }
1384
1385 /* XXX: This is ridiculously complicated to be backward compatible
1386 * (mostly) with the quopri module. It doesn't re-create the quopri
1387 * module bug where text ending in CRLF has the CR encoded */
1388
1389 /*[clinic input]
1390 binascii.b2a_qp
1391
1392 data: Py_buffer
1393 quotetabs: bool(accept={int}) = False
1394 istext: bool(accept={int}) = True
1395 header: bool(accept={int}) = False
1396
1397 Encode a string using quoted-printable encoding.
1398
1399 On encoding, when istext is set, newlines are not encoded, and white
1400 space at end of lines is. When istext is not set, \r and \n (CR/LF)
1401 are both encoded. When quotetabs is set, space and tabs are encoded.
1402 [clinic start generated code]*/
1403
1404 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1405 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1406 int istext, int header)
1407 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1408 {
1409 Py_ssize_t in, out;
1410 const unsigned char *databuf;
1411 unsigned char *odata;
1412 Py_ssize_t datalen = 0, odatalen = 0;
1413 PyObject *rv;
1414 unsigned int linelen = 0;
1415 unsigned char ch;
1416 int crlf = 0;
1417 const unsigned char *p;
1418
1419 databuf = data->buf;
1420 datalen = data->len;
1421
1422 /* See if this string is using CRLF line ends */
1423 /* XXX: this function has the side effect of converting all of
1424 * the end of lines to be the same depending on this detection
1425 * here */
1426 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1427 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1428 crlf = 1;
1429
1430 /* First, scan to see how many characters need to be encoded */
1431 in = 0;
1432 while (in < datalen) {
1433 Py_ssize_t delta = 0;
1434 if ((databuf[in] > 126) ||
1435 (databuf[in] == '=') ||
1436 (header && databuf[in] == '_') ||
1437 ((databuf[in] == '.') && (linelen == 0) &&
1438 (in + 1 == datalen || databuf[in+1] == '\n' ||
1439 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1440 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1441 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1442 ((databuf[in] < 33) &&
1443 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1444 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1445 {
1446 if ((linelen + 3) >= MAXLINESIZE) {
1447 linelen = 0;
1448 if (crlf)
1449 delta += 3;
1450 else
1451 delta += 2;
1452 }
1453 linelen += 3;
1454 delta += 3;
1455 in++;
1456 }
1457 else {
1458 if (istext &&
1459 ((databuf[in] == '\n') ||
1460 ((in+1 < datalen) && (databuf[in] == '\r') &&
1461 (databuf[in+1] == '\n'))))
1462 {
1463 linelen = 0;
1464 /* Protect against whitespace on end of line */
1465 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1466 delta += 2;
1467 if (crlf)
1468 delta += 2;
1469 else
1470 delta += 1;
1471 if (databuf[in] == '\r')
1472 in += 2;
1473 else
1474 in++;
1475 }
1476 else {
1477 if ((in + 1 != datalen) &&
1478 (databuf[in+1] != '\n') &&
1479 (linelen + 1) >= MAXLINESIZE) {
1480 linelen = 0;
1481 if (crlf)
1482 delta += 3;
1483 else
1484 delta += 2;
1485 }
1486 linelen++;
1487 delta++;
1488 in++;
1489 }
1490 }
1491 if (PY_SSIZE_T_MAX - delta < odatalen) {
1492 PyErr_NoMemory();
1493 return NULL;
1494 }
1495 odatalen += delta;
1496 }
1497
1498 /* We allocate the output same size as input, this is overkill.
1499 */
1500 odata = (unsigned char *) PyMem_Calloc(1, odatalen);
1501 if (odata == NULL) {
1502 PyErr_NoMemory();
1503 return NULL;
1504 }
1505
1506 in = out = linelen = 0;
1507 while (in < datalen) {
1508 if ((databuf[in] > 126) ||
1509 (databuf[in] == '=') ||
1510 (header && databuf[in] == '_') ||
1511 ((databuf[in] == '.') && (linelen == 0) &&
1512 (in + 1 == datalen || databuf[in+1] == '\n' ||
1513 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1514 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1515 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1516 ((databuf[in] < 33) &&
1517 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1518 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1519 {
1520 if ((linelen + 3 )>= MAXLINESIZE) {
1521 odata[out++] = '=';
1522 if (crlf) odata[out++] = '\r';
1523 odata[out++] = '\n';
1524 linelen = 0;
1525 }
1526 odata[out++] = '=';
1527 to_hex(databuf[in], &odata[out]);
1528 out += 2;
1529 in++;
1530 linelen += 3;
1531 }
1532 else {
1533 if (istext &&
1534 ((databuf[in] == '\n') ||
1535 ((in+1 < datalen) && (databuf[in] == '\r') &&
1536 (databuf[in+1] == '\n'))))
1537 {
1538 linelen = 0;
1539 /* Protect against whitespace on end of line */
1540 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1541 ch = odata[out-1];
1542 odata[out-1] = '=';
1543 to_hex(ch, &odata[out]);
1544 out += 2;
1545 }
1546
1547 if (crlf) odata[out++] = '\r';
1548 odata[out++] = '\n';
1549 if (databuf[in] == '\r')
1550 in += 2;
1551 else
1552 in++;
1553 }
1554 else {
1555 if ((in + 1 != datalen) &&
1556 (databuf[in+1] != '\n') &&
1557 (linelen + 1) >= MAXLINESIZE) {
1558 odata[out++] = '=';
1559 if (crlf) odata[out++] = '\r';
1560 odata[out++] = '\n';
1561 linelen = 0;
1562 }
1563 linelen++;
1564 if (header && databuf[in] == ' ') {
1565 odata[out++] = '_';
1566 in++;
1567 }
1568 else {
1569 odata[out++] = databuf[in++];
1570 }
1571 }
1572 }
1573 }
1574 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1575 PyMem_Free(odata);
1576 return NULL;
1577 }
1578 PyMem_Free(odata);
1579 return rv;
1580 }
1581
1582 /* List of functions defined in the module */
1583
1584 static struct PyMethodDef binascii_module_methods[] = {
1585 BINASCII_A2B_UU_METHODDEF
1586 BINASCII_B2A_UU_METHODDEF
1587 BINASCII_A2B_BASE64_METHODDEF
1588 BINASCII_B2A_BASE64_METHODDEF
1589 BINASCII_A2B_HQX_METHODDEF
1590 BINASCII_B2A_HQX_METHODDEF
1591 BINASCII_A2B_HEX_METHODDEF
1592 BINASCII_B2A_HEX_METHODDEF
1593 BINASCII_HEXLIFY_METHODDEF
1594 BINASCII_UNHEXLIFY_METHODDEF
1595 BINASCII_RLECODE_HQX_METHODDEF
1596 BINASCII_RLEDECODE_HQX_METHODDEF
1597 BINASCII_CRC_HQX_METHODDEF
1598 BINASCII_CRC32_METHODDEF
1599 BINASCII_A2B_QP_METHODDEF
1600 BINASCII_B2A_QP_METHODDEF
1601 {NULL, NULL} /* sentinel */
1602 };
1603
1604
1605 /* Initialization function for the module (*must* be called PyInit_binascii) */
1606 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1607
1608 static int
binascii_exec(PyObject * module)1609 binascii_exec(PyObject *module) {
1610 int result;
1611 binascii_state *state = PyModule_GetState(module);
1612 if (state == NULL) {
1613 return -1;
1614 }
1615
1616 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1617 if (state->Error == NULL) {
1618 return -1;
1619 }
1620 Py_INCREF(state->Error);
1621 result = PyModule_AddObject(module, "Error", state->Error);
1622 if (result == -1) {
1623 Py_DECREF(state->Error);
1624 return -1;
1625 }
1626
1627 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1628 if (state->Incomplete == NULL) {
1629 return -1;
1630 }
1631 Py_INCREF(state->Incomplete);
1632 result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
1633 if (result == -1) {
1634 Py_DECREF(state->Incomplete);
1635 return -1;
1636 }
1637
1638 return 0;
1639 }
1640
1641 static PyModuleDef_Slot binascii_slots[] = {
1642 {Py_mod_exec, binascii_exec},
1643 {0, NULL}
1644 };
1645
1646 static int
binascii_traverse(PyObject * module,visitproc visit,void * arg)1647 binascii_traverse(PyObject *module, visitproc visit, void *arg)
1648 {
1649 binascii_state *state = get_binascii_state(module);
1650 Py_VISIT(state->Error);
1651 Py_VISIT(state->Incomplete);
1652 return 0;
1653 }
1654
1655 static int
binascii_clear(PyObject * module)1656 binascii_clear(PyObject *module)
1657 {
1658 binascii_state *state = get_binascii_state(module);
1659 Py_CLEAR(state->Error);
1660 Py_CLEAR(state->Incomplete);
1661 return 0;
1662 }
1663
1664 static void
binascii_free(void * module)1665 binascii_free(void *module)
1666 {
1667 binascii_clear((PyObject *)module);
1668 }
1669
1670 static struct PyModuleDef binasciimodule = {
1671 PyModuleDef_HEAD_INIT,
1672 "binascii",
1673 doc_binascii,
1674 sizeof(binascii_state),
1675 binascii_module_methods,
1676 binascii_slots,
1677 binascii_traverse,
1678 binascii_clear,
1679 binascii_free
1680 };
1681
1682 PyMODINIT_FUNC
PyInit_binascii(void)1683 PyInit_binascii(void)
1684 {
1685 return PyModuleDef_Init(&binasciimodule);
1686 }
1687