• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 **      each line encodes 45 bytes (except possibly the last)
7 **      First char encodes (binary) length, rest data
8 **      each char encodes 6 bits, as follows:
9 **      binary: 01234567 abcdefgh ijklmnop
10 **      ascii:  012345 67abcd efghij klmnop
11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 **      short binary data is zero-extended (so the bits are always in the
13 **      right place), this does *not* reflect in the length.
14 ** base64:
15 **      Line breaks are insignificant, but lines are at most 76 chars
16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 **      is done via a table.
18 **      Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 **      File starts with introductory text, real data starts and ends
21 **      with colons.
22 **      Data consists of three similar parts: info, datafork, resourcefork.
23 **      Each part is protected (at the end) with a 16-bit crc
24 **      The binary data is run-length encoded, and then ascii-fied:
25 **      binary: 01234567 abcdefgh ijklmnop
26 **      ascii:  012345 67abcd efghij klmnop
27 **      ASCII encoding is table-driven, see the code.
28 **      Short binary data results in the runt ascii-byte being output with
29 **      the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 **      Programs that encode binary data in ASCII are written in
35 **      such a style that they are as unreadable as possible. Devices used
36 **      include unnecessary global variables, burying important tables
37 **      in unrelated sourcefiles, putting functions in include files,
38 **      using seemingly-descriptive variable names for different purposes,
39 **      calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character.  It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55 
56 #define PY_SSIZE_T_CLEAN
57 
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63 
64 static PyObject *Error;
65 static PyObject *Incomplete;
66 
67 /*
68 ** hqx lookup table, ascii->binary.
69 */
70 
71 #define RUNCHAR 0x90
72 
73 #define DONE 0x7F
74 #define SKIP 0x7E
75 #define FAIL 0x7D
76 
77 static const unsigned char table_a2b_hqx[256] = {
78 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
79 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
81 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
82 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
83 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
84 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
85 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /*              !     "     #     $     %     &     '   */
87 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
88 /*        (     )     *     +     ,     -     .     /   */
89 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
90 /*        0     1     2     3     4     5     6     7   */
91 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
92 /*        8     9     :     ;     <     =     >     ?   */
93 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /*        @     A     B     C     D     E     F     G   */
95 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
96 /*        H     I     J     K     L     M     N     O   */
97 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
98 /*        P     Q     R     S     T     U     V     W   */
99 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
100 /*        X     Y     Z     [     \     ]     ^     _   */
101 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
102 /*        `     a     b     c     d     e     f     g   */
103 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
104 /*        h     i     j     k     l     m     n     o   */
105 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
106 /*        p     q     r     s     t     u     v     w   */
107 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
108 /*        x     y     z     {     |     }     ~    ^?   */
109 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 };
127 
128 static const unsigned char table_b2a_hqx[] =
129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
130 
131 static const char table_a2b_base64[] = {
132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
137     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
140 };
141 
142 #define BASE64_PAD '='
143 
144 /* Max binary chunk size; limited only by available memory */
145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
146 
147 static const unsigned char table_b2a_base64[] =
148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
149 
150 
151 
152 static const unsigned short crctab_hqx[256] = {
153     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
185 };
186 
187 /*[clinic input]
188 module binascii
189 [clinic start generated code]*/
190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
191 
192 /*[python input]
193 
194 class ascii_buffer_converter(CConverter):
195     type = 'Py_buffer'
196     converter = 'ascii_buffer_converter'
197     impl_by_reference = True
198     c_default = "{NULL, NULL}"
199 
200     def cleanup(self):
201         name = self.name
202         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
203 
204 [python start generated code]*/
205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
206 
207 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209 {
210     if (arg == NULL) {
211         PyBuffer_Release(buf);
212         return 1;
213     }
214     if (PyUnicode_Check(arg)) {
215         if (PyUnicode_READY(arg) < 0)
216             return 0;
217         if (!PyUnicode_IS_ASCII(arg)) {
218             PyErr_SetString(PyExc_ValueError,
219                             "string argument should contain only ASCII characters");
220             return 0;
221         }
222         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224         buf->len = PyUnicode_GET_LENGTH(arg);
225         buf->obj = NULL;
226         return 1;
227     }
228     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229         PyErr_Format(PyExc_TypeError,
230                      "argument should be bytes, buffer or ASCII string, "
231                      "not '%.100s'", Py_TYPE(arg)->tp_name);
232         return 0;
233     }
234     if (!PyBuffer_IsContiguous(buf, 'C')) {
235         PyErr_Format(PyExc_TypeError,
236                      "argument should be a contiguous buffer, "
237                      "not '%.100s'", Py_TYPE(arg)->tp_name);
238         PyBuffer_Release(buf);
239         return 0;
240     }
241     return Py_CLEANUP_SUPPORTED;
242 }
243 
244 #include "clinic/binascii.c.h"
245 
246 /*[clinic input]
247 binascii.a2b_uu
248 
249     data: ascii_buffer
250     /
251 
252 Decode a line of uuencoded data.
253 [clinic start generated code]*/
254 
255 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
258 {
259     const unsigned char *ascii_data;
260     unsigned char *bin_data;
261     int leftbits = 0;
262     unsigned char this_ch;
263     unsigned int leftchar = 0;
264     PyObject *rv;
265     Py_ssize_t ascii_len, bin_len;
266 
267     ascii_data = data->buf;
268     ascii_len = data->len;
269 
270     assert(ascii_len >= 0);
271 
272     /* First byte: binary data length (in bytes) */
273     bin_len = (*ascii_data++ - ' ') & 077;
274     ascii_len--;
275 
276     /* Allocate the buffer */
277     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
278         return NULL;
279     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
280 
281     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282         /* XXX is it really best to add NULs if there's no more data */
283         this_ch = (ascii_len > 0) ? *ascii_data : 0;
284         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285             /*
286             ** Whitespace. Assume some spaces got eaten at
287             ** end-of-line. (We check this later)
288             */
289             this_ch = 0;
290         } else {
291             /* Check the character for legality
292             ** The 64 in stead of the expected 63 is because
293             ** there are a few uuencodes out there that use
294             ** '`' as zero instead of space.
295             */
296             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297                 PyErr_SetString(Error, "Illegal char");
298                 Py_DECREF(rv);
299                 return NULL;
300             }
301             this_ch = (this_ch - ' ') & 077;
302         }
303         /*
304         ** Shift it in on the low end, and see if there's
305         ** a byte ready for output.
306         */
307         leftchar = (leftchar << 6) | (this_ch);
308         leftbits += 6;
309         if ( leftbits >= 8 ) {
310             leftbits -= 8;
311             *bin_data++ = (leftchar >> leftbits) & 0xff;
312             leftchar &= ((1 << leftbits) - 1);
313             bin_len--;
314         }
315     }
316     /*
317     ** Finally, check that if there's anything left on the line
318     ** that it's whitespace only.
319     */
320     while( ascii_len-- > 0 ) {
321         this_ch = *ascii_data++;
322         /* Extra '`' may be written as padding in some cases */
323         if ( this_ch != ' ' && this_ch != ' '+64 &&
324              this_ch != '\n' && this_ch != '\r' ) {
325             PyErr_SetString(Error, "Trailing garbage");
326             Py_DECREF(rv);
327             return NULL;
328         }
329     }
330     return rv;
331 }
332 
333 /*[clinic input]
334 binascii.b2a_uu
335 
336     data: Py_buffer
337     /
338 
339 Uuencode line of data.
340 [clinic start generated code]*/
341 
342 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data)343 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data)
344 /*[clinic end generated code: output=0070670e52e4aa6b input=00fdf458ce8b465b]*/
345 {
346     unsigned char *ascii_data;
347     const unsigned char *bin_data;
348     int leftbits = 0;
349     unsigned char this_ch;
350     unsigned int leftchar = 0;
351     Py_ssize_t bin_len, out_len;
352     _PyBytesWriter writer;
353 
354     _PyBytesWriter_Init(&writer);
355     bin_data = data->buf;
356     bin_len = data->len;
357     if ( bin_len > 45 ) {
358         /* The 45 is a limit that appears in all uuencode's */
359         PyErr_SetString(Error, "At most 45 bytes at once");
360         return NULL;
361     }
362 
363     /* We're lazy and allocate to much (fixed up later) */
364     out_len = 2 + (bin_len + 2) / 3 * 4;
365     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
366     if (ascii_data == NULL)
367         return NULL;
368 
369     /* Store the length */
370     *ascii_data++ = ' ' + (bin_len & 077);
371 
372     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
373         /* Shift the data (or padding) into our buffer */
374         if ( bin_len > 0 )              /* Data */
375             leftchar = (leftchar << 8) | *bin_data;
376         else                            /* Padding */
377             leftchar <<= 8;
378         leftbits += 8;
379 
380         /* See if there are 6-bit groups ready */
381         while ( leftbits >= 6 ) {
382             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
383             leftbits -= 6;
384             *ascii_data++ = this_ch + ' ';
385         }
386     }
387     *ascii_data++ = '\n';       /* Append a courtesy newline */
388 
389     return _PyBytesWriter_Finish(&writer, ascii_data);
390 }
391 
392 
393 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)394 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
395 {
396     /* Finds & returns the (num+1)th
397     ** valid character for base64, or -1 if none.
398     */
399 
400     int ret = -1;
401     unsigned char c, b64val;
402 
403     while ((slen > 0) && (ret == -1)) {
404         c = *s;
405         b64val = table_a2b_base64[c & 0x7f];
406         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
407             if (num == 0)
408                 ret = *s;
409             num--;
410         }
411 
412         s++;
413         slen--;
414     }
415     return ret;
416 }
417 
418 /*[clinic input]
419 binascii.a2b_base64
420 
421     data: ascii_buffer
422     /
423 
424 Decode a line of base64 data.
425 [clinic start generated code]*/
426 
427 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)428 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
429 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
430 {
431     const unsigned char *ascii_data;
432     unsigned char *bin_data;
433     int leftbits = 0;
434     unsigned char this_ch;
435     unsigned int leftchar = 0;
436     Py_ssize_t ascii_len, bin_len;
437     int quad_pos = 0;
438     _PyBytesWriter writer;
439 
440     ascii_data = data->buf;
441     ascii_len = data->len;
442 
443     assert(ascii_len >= 0);
444 
445     if (ascii_len > PY_SSIZE_T_MAX - 3)
446         return PyErr_NoMemory();
447 
448     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
449 
450     _PyBytesWriter_Init(&writer);
451 
452     /* Allocate the buffer */
453     bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
454     if (bin_data == NULL)
455         return NULL;
456 
457     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
458         this_ch = *ascii_data;
459 
460         if (this_ch > 0x7f ||
461             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
462             continue;
463 
464         /* Check for pad sequences and ignore
465         ** the invalid ones.
466         */
467         if (this_ch == BASE64_PAD) {
468             if ( (quad_pos < 2) ||
469                  ((quad_pos == 2) &&
470                   (binascii_find_valid(ascii_data, ascii_len, 1)
471                    != BASE64_PAD)) )
472             {
473                 continue;
474             }
475             else {
476                 /* A pad sequence means no more input.
477                 ** We've already interpreted the data
478                 ** from the quad at this point.
479                 */
480                 leftbits = 0;
481                 break;
482             }
483         }
484 
485         this_ch = table_a2b_base64[*ascii_data];
486         if ( this_ch == (unsigned char) -1 )
487             continue;
488 
489         /*
490         ** Shift it in on the low end, and see if there's
491         ** a byte ready for output.
492         */
493         quad_pos = (quad_pos + 1) & 0x03;
494         leftchar = (leftchar << 6) | (this_ch);
495         leftbits += 6;
496 
497         if ( leftbits >= 8 ) {
498             leftbits -= 8;
499             *bin_data++ = (leftchar >> leftbits) & 0xff;
500             leftchar &= ((1 << leftbits) - 1);
501         }
502     }
503 
504     if (leftbits != 0) {
505         PyErr_SetString(Error, "Incorrect padding");
506         _PyBytesWriter_Dealloc(&writer);
507         return NULL;
508     }
509 
510     return _PyBytesWriter_Finish(&writer, bin_data);
511 }
512 
513 
514 /*[clinic input]
515 binascii.b2a_base64
516 
517     data: Py_buffer
518     *
519     newline: int(c_default="1") = True
520 
521 Base64-code line of data.
522 [clinic start generated code]*/
523 
524 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)525 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
526 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=7b2ea6fa38d8924c]*/
527 {
528     unsigned char *ascii_data;
529     const unsigned char *bin_data;
530     int leftbits = 0;
531     unsigned char this_ch;
532     unsigned int leftchar = 0;
533     Py_ssize_t bin_len, out_len;
534     _PyBytesWriter writer;
535 
536     bin_data = data->buf;
537     bin_len = data->len;
538     _PyBytesWriter_Init(&writer);
539 
540     assert(bin_len >= 0);
541 
542     if ( bin_len > BASE64_MAXBIN ) {
543         PyErr_SetString(Error, "Too much data for base64 line");
544         return NULL;
545     }
546 
547     /* We're lazy and allocate too much (fixed up later).
548        "+2" leaves room for up to two pad characters.
549        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
550     out_len = bin_len*2 + 2;
551     if (newline)
552         out_len++;
553     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
554     if (ascii_data == NULL)
555         return NULL;
556 
557     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
558         /* Shift the data into our buffer */
559         leftchar = (leftchar << 8) | *bin_data;
560         leftbits += 8;
561 
562         /* See if there are 6-bit groups ready */
563         while ( leftbits >= 6 ) {
564             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
565             leftbits -= 6;
566             *ascii_data++ = table_b2a_base64[this_ch];
567         }
568     }
569     if ( leftbits == 2 ) {
570         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
571         *ascii_data++ = BASE64_PAD;
572         *ascii_data++ = BASE64_PAD;
573     } else if ( leftbits == 4 ) {
574         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
575         *ascii_data++ = BASE64_PAD;
576     }
577     if (newline)
578         *ascii_data++ = '\n';       /* Append a courtesy newline */
579 
580     return _PyBytesWriter_Finish(&writer, ascii_data);
581 }
582 
583 /*[clinic input]
584 binascii.a2b_hqx
585 
586     data: ascii_buffer
587     /
588 
589 Decode .hqx coding.
590 [clinic start generated code]*/
591 
592 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)593 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
594 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
595 {
596     const unsigned char *ascii_data;
597     unsigned char *bin_data;
598     int leftbits = 0;
599     unsigned char this_ch;
600     unsigned int leftchar = 0;
601     PyObject *res;
602     Py_ssize_t len;
603     int done = 0;
604     _PyBytesWriter writer;
605 
606     ascii_data = data->buf;
607     len = data->len;
608     _PyBytesWriter_Init(&writer);
609 
610     assert(len >= 0);
611 
612     if (len > PY_SSIZE_T_MAX - 2)
613         return PyErr_NoMemory();
614 
615     /* Allocate a string that is too big (fixed later)
616        Add two to the initial length to prevent interning which
617        would preclude subsequent resizing.  */
618     bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
619     if (bin_data == NULL)
620         return NULL;
621 
622     for( ; len > 0 ; len--, ascii_data++ ) {
623         /* Get the byte and look it up */
624         this_ch = table_a2b_hqx[*ascii_data];
625         if ( this_ch == SKIP )
626             continue;
627         if ( this_ch == FAIL ) {
628             PyErr_SetString(Error, "Illegal char");
629             _PyBytesWriter_Dealloc(&writer);
630             return NULL;
631         }
632         if ( this_ch == DONE ) {
633             /* The terminating colon */
634             done = 1;
635             break;
636         }
637 
638         /* Shift it into the buffer and see if any bytes are ready */
639         leftchar = (leftchar << 6) | (this_ch);
640         leftbits += 6;
641         if ( leftbits >= 8 ) {
642             leftbits -= 8;
643             *bin_data++ = (leftchar >> leftbits) & 0xff;
644             leftchar &= ((1 << leftbits) - 1);
645         }
646     }
647 
648     if ( leftbits && !done ) {
649         PyErr_SetString(Incomplete,
650                         "String has incomplete number of bytes");
651         _PyBytesWriter_Dealloc(&writer);
652         return NULL;
653     }
654 
655     res = _PyBytesWriter_Finish(&writer, bin_data);
656     if (res == NULL)
657         return NULL;
658     return Py_BuildValue("Ni", res, done);
659 }
660 
661 
662 /*[clinic input]
663 binascii.rlecode_hqx
664 
665     data: Py_buffer
666     /
667 
668 Binhex RLE-code binary data.
669 [clinic start generated code]*/
670 
671 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)672 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
673 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
674 {
675     const unsigned char *in_data;
676     unsigned char *out_data;
677     unsigned char ch;
678     Py_ssize_t in, inend, len;
679     _PyBytesWriter writer;
680 
681     _PyBytesWriter_Init(&writer);
682     in_data = data->buf;
683     len = data->len;
684 
685     assert(len >= 0);
686 
687     if (len > PY_SSIZE_T_MAX / 2 - 2)
688         return PyErr_NoMemory();
689 
690     /* Worst case: output is twice as big as input (fixed later) */
691     out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
692     if (out_data == NULL)
693         return NULL;
694 
695     for( in=0; in<len; in++) {
696         ch = in_data[in];
697         if ( ch == RUNCHAR ) {
698             /* RUNCHAR. Escape it. */
699             *out_data++ = RUNCHAR;
700             *out_data++ = 0;
701         } else {
702             /* Check how many following are the same */
703             for(inend=in+1;
704                 inend<len && in_data[inend] == ch &&
705                     inend < in+255;
706                 inend++) ;
707             if ( inend - in > 3 ) {
708                 /* More than 3 in a row. Output RLE. */
709                 *out_data++ = ch;
710                 *out_data++ = RUNCHAR;
711                 *out_data++ = (unsigned char) (inend-in);
712                 in = inend-1;
713             } else {
714                 /* Less than 3. Output the byte itself */
715                 *out_data++ = ch;
716             }
717         }
718     }
719 
720     return _PyBytesWriter_Finish(&writer, out_data);
721 }
722 
723 
724 /*[clinic input]
725 binascii.b2a_hqx
726 
727     data: Py_buffer
728     /
729 
730 Encode .hqx data.
731 [clinic start generated code]*/
732 
733 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)734 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
735 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
736 {
737     unsigned char *ascii_data;
738     const unsigned char *bin_data;
739     int leftbits = 0;
740     unsigned char this_ch;
741     unsigned int leftchar = 0;
742     Py_ssize_t len;
743     _PyBytesWriter writer;
744 
745     bin_data = data->buf;
746     len = data->len;
747     _PyBytesWriter_Init(&writer);
748 
749     assert(len >= 0);
750 
751     if (len > PY_SSIZE_T_MAX / 2 - 2)
752         return PyErr_NoMemory();
753 
754     /* Allocate a buffer that is at least large enough */
755     ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
756     if (ascii_data == NULL)
757         return NULL;
758 
759     for( ; len > 0 ; len--, bin_data++ ) {
760         /* Shift into our buffer, and output any 6bits ready */
761         leftchar = (leftchar << 8) | *bin_data;
762         leftbits += 8;
763         while ( leftbits >= 6 ) {
764             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
765             leftbits -= 6;
766             *ascii_data++ = table_b2a_hqx[this_ch];
767         }
768     }
769     /* Output a possible runt byte */
770     if ( leftbits ) {
771         leftchar <<= (6-leftbits);
772         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
773     }
774 
775     return _PyBytesWriter_Finish(&writer, ascii_data);
776 }
777 
778 
779 /*[clinic input]
780 binascii.rledecode_hqx
781 
782     data: Py_buffer
783     /
784 
785 Decode hexbin RLE-coded string.
786 [clinic start generated code]*/
787 
788 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)789 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
790 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
791 {
792     const unsigned char *in_data;
793     unsigned char *out_data;
794     unsigned char in_byte, in_repeat;
795     Py_ssize_t in_len;
796     _PyBytesWriter writer;
797 
798     in_data = data->buf;
799     in_len = data->len;
800     _PyBytesWriter_Init(&writer);
801 
802     assert(in_len >= 0);
803 
804     /* Empty string is a special case */
805     if ( in_len == 0 )
806         return PyBytes_FromStringAndSize("", 0);
807     else if (in_len > PY_SSIZE_T_MAX / 2)
808         return PyErr_NoMemory();
809 
810     /* Allocate a buffer of reasonable size. Resized when needed */
811     out_data = _PyBytesWriter_Alloc(&writer, in_len);
812     if (out_data == NULL)
813         return NULL;
814 
815     /* Use overallocation */
816     writer.overallocate = 1;
817 
818     /*
819     ** We need two macros here to get/put bytes and handle
820     ** end-of-buffer for input and output strings.
821     */
822 #define INBYTE(b)                                                       \
823     do {                                                                \
824          if ( --in_len < 0 ) {                                          \
825            PyErr_SetString(Incomplete, "");                             \
826            goto error;                                                  \
827          }                                                              \
828          b = *in_data++;                                                \
829     } while(0)
830 
831     /*
832     ** Handle first byte separately (since we have to get angry
833     ** in case of an orphaned RLE code).
834     */
835     INBYTE(in_byte);
836 
837     if (in_byte == RUNCHAR) {
838         INBYTE(in_repeat);
839         /* only 1 byte will be written, but 2 bytes were preallocated:
840            subtract 1 byte to prevent overallocation */
841         writer.min_size--;
842 
843         if (in_repeat != 0) {
844             /* Note Error, not Incomplete (which is at the end
845             ** of the string only). This is a programmer error.
846             */
847             PyErr_SetString(Error, "Orphaned RLE code at start");
848             goto error;
849         }
850         *out_data++ = RUNCHAR;
851     } else {
852         *out_data++ = in_byte;
853     }
854 
855     while( in_len > 0 ) {
856         INBYTE(in_byte);
857 
858         if (in_byte == RUNCHAR) {
859             INBYTE(in_repeat);
860             /* only 1 byte will be written, but 2 bytes were preallocated:
861                subtract 1 byte to prevent overallocation */
862             writer.min_size--;
863 
864             if ( in_repeat == 0 ) {
865                 /* Just an escaped RUNCHAR value */
866                 *out_data++ = RUNCHAR;
867             } else {
868                 /* Pick up value and output a sequence of it */
869                 in_byte = out_data[-1];
870 
871                 /* enlarge the buffer if needed */
872                 if (in_repeat > 1) {
873                     /* -1 because we already preallocated 1 byte */
874                     out_data = _PyBytesWriter_Prepare(&writer, out_data,
875                                                       in_repeat - 1);
876                     if (out_data == NULL)
877                         goto error;
878                 }
879 
880                 while ( --in_repeat > 0 )
881                     *out_data++ = in_byte;
882             }
883         } else {
884             /* Normal byte */
885             *out_data++ = in_byte;
886         }
887     }
888     return _PyBytesWriter_Finish(&writer, out_data);
889 
890 error:
891     _PyBytesWriter_Dealloc(&writer);
892     return NULL;
893 }
894 
895 
896 /*[clinic input]
897 binascii.crc_hqx -> unsigned_int
898 
899     data: Py_buffer
900     crc: unsigned_int(bitwise=True)
901     /
902 
903 Compute CRC-CCITT incrementally.
904 [clinic start generated code]*/
905 
906 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)907 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
908 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
909 {
910     const unsigned char *bin_data;
911     Py_ssize_t len;
912 
913     crc &= 0xffff;
914     bin_data = data->buf;
915     len = data->len;
916 
917     while(len-- > 0) {
918         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
919     }
920 
921     return crc;
922 }
923 
924 #ifndef USE_ZLIB_CRC32
925 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
926     Also known as: ISO 3307
927 **********************************************************************|
928 *                                                                    *|
929 * Demonstration program to compute the 32-bit CRC used as the frame  *|
930 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
931 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
932 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
933 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
934 * this polynomial is or will be included in CCITT V.41, which        *|
935 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
936 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
937 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
938 *                                                                    *|
939 **********************************************************************|
940 
941  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
942  code or tables extracted from it, as desired without restriction.
943 
944  First, the polynomial itself and its table of feedback terms.  The
945  polynomial is
946  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
947  Note that we take it "backwards" and put the highest-order term in
948  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
949  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
950  the MSB being 1.
951 
952  Note that the usual hardware shift register implementation, which
953  is what we're using (we're merely optimizing it by doing eight-bit
954  chunks at a time) shifts bits into the lowest-order term.  In our
955  implementation, that means shifting towards the right.  Why do we
956  do it this way?  Because the calculated CRC must be transmitted in
957  order from highest-order term to lowest-order term.  UARTs transmit
958  characters in order from LSB to MSB.  By storing the CRC this way,
959  we hand it to the UART in the order low-byte to high-byte; the UART
960  sends each low-bit to hight-bit; and the result is transmission bit
961  by bit from highest- to lowest-order term without requiring any bit
962  shuffling on our part.  Reception works similarly.
963 
964  The feedback terms table consists of 256, 32-bit entries.  Notes:
965 
966   1. The table can be generated at runtime if desired; code to do so
967      is shown later.  It might not be obvious, but the feedback
968      terms simply represent the results of eight shift/xor opera-
969      tions for all combinations of data and CRC register values.
970 
971   2. The CRC accumulation logic is the same for all CRC polynomials,
972      be they sixteen or thirty-two bits wide.  You simply choose the
973      appropriate table.  Alternatively, because the table can be
974      generated at runtime, you can start by generating the table for
975      the polynomial in question and use exactly the same "updcrc",
976      if your application needn't simultaneously handle two CRC
977      polynomials.  (Note, however, that XMODEM is strange.)
978 
979   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
980      of course, 32-bit entries work OK if the high 16 bits are zero.
981 
982   4. The values must be right-shifted by eight bits by the "updcrc"
983      logic; the shift must be unsigned (bring in zeroes).  On some
984      hardware you could probably optimize the shift in assembler by
985      using byte-swap instructions.
986 ********************************************************************/
987 
988 static const unsigned int crc_32_tab[256] = {
989 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
990 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
991 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
992 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
993 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
994 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
995 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
996 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
997 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
998 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
999 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1000 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1001 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1002 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1003 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1004 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1005 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1006 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1007 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1008 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1009 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1010 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1011 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1012 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1013 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1014 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1015 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1016 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1017 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1018 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1019 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1020 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1021 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1022 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1023 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1024 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1025 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1026 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1027 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1028 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1029 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1030 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1031 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1032 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1033 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1034 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1035 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1036 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1037 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1038 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1039 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1040 0x2d02ef8dU
1041 };
1042 #endif  /* USE_ZLIB_CRC32 */
1043 
1044 /*[clinic input]
1045 binascii.crc32 -> unsigned_int
1046 
1047     data: Py_buffer
1048     crc: unsigned_int(bitwise=True) = 0
1049     /
1050 
1051 Compute CRC-32 incrementally.
1052 [clinic start generated code]*/
1053 
1054 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1055 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1056 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1057 
1058 #ifdef USE_ZLIB_CRC32
1059 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1060 {
1061     const Byte *buf;
1062     Py_ssize_t len;
1063     int signed_val;
1064 
1065     buf = (Byte*)data->buf;
1066     len = data->len;
1067     signed_val = crc32(crc, buf, len);
1068     return (unsigned int)signed_val & 0xffffffffU;
1069 }
1070 #else  /* USE_ZLIB_CRC32 */
1071 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1072     const unsigned char *bin_data;
1073     Py_ssize_t len;
1074     unsigned int result;
1075 
1076     bin_data = data->buf;
1077     len = data->len;
1078 
1079     crc = ~ crc;
1080     while (len-- > 0) {
1081         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1082         /* Note:  (crc >> 8) MUST zero fill on left */
1083     }
1084 
1085     result = (crc ^ 0xFFFFFFFF);
1086     return result & 0xffffffff;
1087 }
1088 #endif  /* USE_ZLIB_CRC32 */
1089 
1090 /*[clinic input]
1091 binascii.b2a_hex
1092 
1093     data: Py_buffer
1094     /
1095 
1096 Hexadecimal representation of binary data.
1097 
1098 The return value is a bytes object.  This function is also
1099 available as "hexlify()".
1100 [clinic start generated code]*/
1101 
1102 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data)1103 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1104 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
1105 {
1106     return _Py_strhex_bytes((const char *)data->buf, data->len);
1107 }
1108 
1109 /*[clinic input]
1110 binascii.hexlify = binascii.b2a_hex
1111 
1112 Hexadecimal representation of binary data.
1113 
1114 The return value is a bytes object.
1115 [clinic start generated code]*/
1116 
1117 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data)1118 binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1119 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
1120 {
1121     return _Py_strhex_bytes((const char *)data->buf, data->len);
1122 }
1123 
1124 static int
to_int(int c)1125 to_int(int c)
1126 {
1127     if (Py_ISDIGIT(c))
1128         return c - '0';
1129     else {
1130         if (Py_ISUPPER(c))
1131             c = Py_TOLOWER(c);
1132         if (c >= 'a' && c <= 'f')
1133             return c - 'a' + 10;
1134     }
1135     return -1;
1136 }
1137 
1138 
1139 /*[clinic input]
1140 binascii.a2b_hex
1141 
1142     hexstr: ascii_buffer
1143     /
1144 
1145 Binary data of hexadecimal representation.
1146 
1147 hexstr must contain an even number of hex digits (upper or lower case).
1148 This function is also available as "unhexlify()".
1149 [clinic start generated code]*/
1150 
1151 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1152 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1153 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1154 {
1155     const char* argbuf;
1156     Py_ssize_t arglen;
1157     PyObject *retval;
1158     char* retbuf;
1159     Py_ssize_t i, j;
1160 
1161     argbuf = hexstr->buf;
1162     arglen = hexstr->len;
1163 
1164     assert(arglen >= 0);
1165 
1166     /* XXX What should we do about strings with an odd length?  Should
1167      * we add an implicit leading zero, or a trailing zero?  For now,
1168      * raise an exception.
1169      */
1170     if (arglen % 2) {
1171         PyErr_SetString(Error, "Odd-length string");
1172         return NULL;
1173     }
1174 
1175     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1176     if (!retval)
1177         return NULL;
1178     retbuf = PyBytes_AS_STRING(retval);
1179 
1180     for (i=j=0; i < arglen; i += 2) {
1181         int top = to_int(Py_CHARMASK(argbuf[i]));
1182         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1183         if (top == -1 || bot == -1) {
1184             PyErr_SetString(Error,
1185                             "Non-hexadecimal digit found");
1186             goto finally;
1187         }
1188         retbuf[j++] = (top << 4) + bot;
1189     }
1190     return retval;
1191 
1192   finally:
1193     Py_DECREF(retval);
1194     return NULL;
1195 }
1196 
1197 /*[clinic input]
1198 binascii.unhexlify = binascii.a2b_hex
1199 
1200 Binary data of hexadecimal representation.
1201 
1202 hexstr must contain an even number of hex digits (upper or lower case).
1203 [clinic start generated code]*/
1204 
1205 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1206 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1207 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1208 {
1209     return binascii_a2b_hex_impl(module, hexstr);
1210 }
1211 
1212 static const int table_hex[128] = {
1213   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1214   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1215   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1216    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1217   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1218   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1219   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1220   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1221 };
1222 
1223 #define hexval(c) table_hex[(unsigned int)(c)]
1224 
1225 #define MAXLINESIZE 76
1226 
1227 
1228 /*[clinic input]
1229 binascii.a2b_qp
1230 
1231     data: ascii_buffer
1232     header: int(c_default="0") = False
1233 
1234 Decode a string of qp-encoded data.
1235 [clinic start generated code]*/
1236 
1237 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1238 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1239 /*[clinic end generated code: output=e99f7846cfb9bc53 input=5187a0d3d8e54f3b]*/
1240 {
1241     Py_ssize_t in, out;
1242     char ch;
1243     const unsigned char *ascii_data;
1244     unsigned char *odata;
1245     Py_ssize_t datalen = 0;
1246     PyObject *rv;
1247 
1248     ascii_data = data->buf;
1249     datalen = data->len;
1250 
1251     /* We allocate the output same size as input, this is overkill.
1252      * The previous implementation used calloc() so we'll zero out the
1253      * memory here too, since PyMem_Malloc() does not guarantee that.
1254      */
1255     odata = (unsigned char *) PyMem_Malloc(datalen);
1256     if (odata == NULL) {
1257         PyErr_NoMemory();
1258         return NULL;
1259     }
1260     memset(odata, 0, datalen);
1261 
1262     in = out = 0;
1263     while (in < datalen) {
1264         if (ascii_data[in] == '=') {
1265             in++;
1266             if (in >= datalen) break;
1267             /* Soft line breaks */
1268             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1269                 if (ascii_data[in] != '\n') {
1270                     while (in < datalen && ascii_data[in] != '\n') in++;
1271                 }
1272                 if (in < datalen) in++;
1273             }
1274             else if (ascii_data[in] == '=') {
1275                 /* broken case from broken python qp */
1276                 odata[out++] = '=';
1277                 in++;
1278             }
1279             else if ((in + 1 < datalen) &&
1280                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1281                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1282                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1283                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1284                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1285                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1286                 /* hexval */
1287                 ch = hexval(ascii_data[in]) << 4;
1288                 in++;
1289                 ch |= hexval(ascii_data[in]);
1290                 in++;
1291                 odata[out++] = ch;
1292             }
1293             else {
1294               odata[out++] = '=';
1295             }
1296         }
1297         else if (header && ascii_data[in] == '_') {
1298             odata[out++] = ' ';
1299             in++;
1300         }
1301         else {
1302             odata[out] = ascii_data[in];
1303             in++;
1304             out++;
1305         }
1306     }
1307     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1308         PyMem_Free(odata);
1309         return NULL;
1310     }
1311     PyMem_Free(odata);
1312     return rv;
1313 }
1314 
1315 static int
to_hex(unsigned char ch,unsigned char * s)1316 to_hex (unsigned char ch, unsigned char *s)
1317 {
1318     unsigned int uvalue = ch;
1319 
1320     s[1] = "0123456789ABCDEF"[uvalue % 16];
1321     uvalue = (uvalue / 16);
1322     s[0] = "0123456789ABCDEF"[uvalue % 16];
1323     return 0;
1324 }
1325 
1326 /* XXX: This is ridiculously complicated to be backward compatible
1327  * (mostly) with the quopri module.  It doesn't re-create the quopri
1328  * module bug where text ending in CRLF has the CR encoded */
1329 
1330 /*[clinic input]
1331 binascii.b2a_qp
1332 
1333     data: Py_buffer
1334     quotetabs: int(c_default="0") = False
1335     istext: int(c_default="1") = True
1336     header: int(c_default="0") = False
1337 
1338 Encode a string using quoted-printable encoding.
1339 
1340 On encoding, when istext is set, newlines are not encoded, and white
1341 space at end of lines is.  When istext is not set, \r and \n (CR/LF)
1342 are both encoded.  When quotetabs is set, space and tabs are encoded.
1343 [clinic start generated code]*/
1344 
1345 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1346 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1347                      int istext, int header)
1348 /*[clinic end generated code: output=e9884472ebb1a94c input=7f2a9aaa008e92b2]*/
1349 {
1350     Py_ssize_t in, out;
1351     const unsigned char *databuf;
1352     unsigned char *odata;
1353     Py_ssize_t datalen = 0, odatalen = 0;
1354     PyObject *rv;
1355     unsigned int linelen = 0;
1356     unsigned char ch;
1357     int crlf = 0;
1358     const unsigned char *p;
1359 
1360     databuf = data->buf;
1361     datalen = data->len;
1362 
1363     /* See if this string is using CRLF line ends */
1364     /* XXX: this function has the side effect of converting all of
1365      * the end of lines to be the same depending on this detection
1366      * here */
1367     p = (const unsigned char *) memchr(databuf, '\n', datalen);
1368     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1369         crlf = 1;
1370 
1371     /* First, scan to see how many characters need to be encoded */
1372     in = 0;
1373     while (in < datalen) {
1374         Py_ssize_t delta = 0;
1375         if ((databuf[in] > 126) ||
1376             (databuf[in] == '=') ||
1377             (header && databuf[in] == '_') ||
1378             ((databuf[in] == '.') && (linelen == 0) &&
1379              (in + 1 == datalen || databuf[in+1] == '\n' ||
1380               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1381             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1382             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1383             ((databuf[in] < 33) &&
1384              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1385              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1386         {
1387             if ((linelen + 3) >= MAXLINESIZE) {
1388                 linelen = 0;
1389                 if (crlf)
1390                     delta += 3;
1391                 else
1392                     delta += 2;
1393             }
1394             linelen += 3;
1395             delta += 3;
1396             in++;
1397         }
1398         else {
1399             if (istext &&
1400                 ((databuf[in] == '\n') ||
1401                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1402                  (databuf[in+1] == '\n'))))
1403             {
1404                 linelen = 0;
1405                 /* Protect against whitespace on end of line */
1406                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1407                     delta += 2;
1408                 if (crlf)
1409                     delta += 2;
1410                 else
1411                     delta += 1;
1412                 if (databuf[in] == '\r')
1413                     in += 2;
1414                 else
1415                     in++;
1416             }
1417             else {
1418                 if ((in + 1 != datalen) &&
1419                     (databuf[in+1] != '\n') &&
1420                     (linelen + 1) >= MAXLINESIZE) {
1421                     linelen = 0;
1422                     if (crlf)
1423                         delta += 3;
1424                     else
1425                         delta += 2;
1426                 }
1427                 linelen++;
1428                 delta++;
1429                 in++;
1430             }
1431         }
1432         if (PY_SSIZE_T_MAX - delta < odatalen) {
1433             PyErr_NoMemory();
1434             return NULL;
1435         }
1436         odatalen += delta;
1437     }
1438 
1439     /* We allocate the output same size as input, this is overkill.
1440      * The previous implementation used calloc() so we'll zero out the
1441      * memory here too, since PyMem_Malloc() does not guarantee that.
1442      */
1443     odata = (unsigned char *) PyMem_Malloc(odatalen);
1444     if (odata == NULL) {
1445         PyErr_NoMemory();
1446         return NULL;
1447     }
1448     memset(odata, 0, odatalen);
1449 
1450     in = out = linelen = 0;
1451     while (in < datalen) {
1452         if ((databuf[in] > 126) ||
1453             (databuf[in] == '=') ||
1454             (header && databuf[in] == '_') ||
1455             ((databuf[in] == '.') && (linelen == 0) &&
1456              (in + 1 == datalen || databuf[in+1] == '\n' ||
1457               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1458             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1459             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1460             ((databuf[in] < 33) &&
1461              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1462              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1463         {
1464             if ((linelen + 3 )>= MAXLINESIZE) {
1465                 odata[out++] = '=';
1466                 if (crlf) odata[out++] = '\r';
1467                 odata[out++] = '\n';
1468                 linelen = 0;
1469             }
1470             odata[out++] = '=';
1471             to_hex(databuf[in], &odata[out]);
1472             out += 2;
1473             in++;
1474             linelen += 3;
1475         }
1476         else {
1477             if (istext &&
1478                 ((databuf[in] == '\n') ||
1479                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1480                  (databuf[in+1] == '\n'))))
1481             {
1482                 linelen = 0;
1483                 /* Protect against whitespace on end of line */
1484                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1485                     ch = odata[out-1];
1486                     odata[out-1] = '=';
1487                     to_hex(ch, &odata[out]);
1488                     out += 2;
1489                 }
1490 
1491                 if (crlf) odata[out++] = '\r';
1492                 odata[out++] = '\n';
1493                 if (databuf[in] == '\r')
1494                     in += 2;
1495                 else
1496                     in++;
1497             }
1498             else {
1499                 if ((in + 1 != datalen) &&
1500                     (databuf[in+1] != '\n') &&
1501                     (linelen + 1) >= MAXLINESIZE) {
1502                     odata[out++] = '=';
1503                     if (crlf) odata[out++] = '\r';
1504                     odata[out++] = '\n';
1505                     linelen = 0;
1506                 }
1507                 linelen++;
1508                 if (header && databuf[in] == ' ') {
1509                     odata[out++] = '_';
1510                     in++;
1511                 }
1512                 else {
1513                     odata[out++] = databuf[in++];
1514                 }
1515             }
1516         }
1517     }
1518     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1519         PyMem_Free(odata);
1520         return NULL;
1521     }
1522     PyMem_Free(odata);
1523     return rv;
1524 }
1525 
1526 /* List of functions defined in the module */
1527 
1528 static struct PyMethodDef binascii_module_methods[] = {
1529     BINASCII_A2B_UU_METHODDEF
1530     BINASCII_B2A_UU_METHODDEF
1531     BINASCII_A2B_BASE64_METHODDEF
1532     BINASCII_B2A_BASE64_METHODDEF
1533     BINASCII_A2B_HQX_METHODDEF
1534     BINASCII_B2A_HQX_METHODDEF
1535     BINASCII_A2B_HEX_METHODDEF
1536     BINASCII_B2A_HEX_METHODDEF
1537     BINASCII_HEXLIFY_METHODDEF
1538     BINASCII_UNHEXLIFY_METHODDEF
1539     BINASCII_RLECODE_HQX_METHODDEF
1540     BINASCII_RLEDECODE_HQX_METHODDEF
1541     BINASCII_CRC_HQX_METHODDEF
1542     BINASCII_CRC32_METHODDEF
1543     BINASCII_A2B_QP_METHODDEF
1544     BINASCII_B2A_QP_METHODDEF
1545     {NULL, NULL}                             /* sentinel */
1546 };
1547 
1548 
1549 /* Initialization function for the module (*must* be called PyInit_binascii) */
1550 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1551 
1552 
1553 static struct PyModuleDef binasciimodule = {
1554     PyModuleDef_HEAD_INIT,
1555     "binascii",
1556     doc_binascii,
1557     -1,
1558     binascii_module_methods,
1559     NULL,
1560     NULL,
1561     NULL,
1562     NULL
1563 };
1564 
1565 PyMODINIT_FUNC
PyInit_binascii(void)1566 PyInit_binascii(void)
1567 {
1568     PyObject *m, *d;
1569 
1570     /* Create the module and add the functions */
1571     m = PyModule_Create(&binasciimodule);
1572     if (m == NULL)
1573         return NULL;
1574 
1575     d = PyModule_GetDict(m);
1576 
1577     Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1578     PyDict_SetItemString(d, "Error", Error);
1579     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1580     PyDict_SetItemString(d, "Incomplete", Incomplete);
1581     if (PyErr_Occurred()) {
1582         Py_DECREF(m);
1583         m = NULL;
1584     }
1585     return m;
1586 }
1587