1#! /usr/bin/env python3 2 3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" 4 5# Modified 04-Oct-1995 by Jack Jansen to use binascii module 6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 8 9import re 10import struct 11import binascii 12 13 14__all__ = [ 15 # Legacy interface exports traditional RFC 2045 Base64 encodings 16 'encode', 'decode', 'encodebytes', 'decodebytes', 17 # Generalized interface for other encodings 18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 19 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', 20 # Base85 and Ascii85 encodings 21 'b85encode', 'b85decode', 'a85encode', 'a85decode', 22 # Standard Base64 encoding 23 'standard_b64encode', 'standard_b64decode', 24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 25 # starting at: 26 # 27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 28 'urlsafe_b64encode', 'urlsafe_b64decode', 29 ] 30 31 32bytes_types = (bytes, bytearray) # Types acceptable as binary data 33 34def _bytes_from_decode_data(s): 35 if isinstance(s, str): 36 try: 37 return s.encode('ascii') 38 except UnicodeEncodeError: 39 raise ValueError('string argument should contain only ASCII characters') 40 if isinstance(s, bytes_types): 41 return s 42 try: 43 return memoryview(s).tobytes() 44 except TypeError: 45 raise TypeError("argument should be a bytes-like object or ASCII " 46 "string, not %r" % s.__class__.__name__) from None 47 48 49# Base64 encoding/decoding uses binascii 50 51def b64encode(s, altchars=None): 52 """Encode the bytes-like object s using Base64 and return a bytes object. 53 54 Optional altchars should be a byte string of length 2 which specifies an 55 alternative alphabet for the '+' and '/' characters. This allows an 56 application to e.g. generate url or filesystem safe Base64 strings. 57 """ 58 encoded = binascii.b2a_base64(s, newline=False) 59 if altchars is not None: 60 assert len(altchars) == 2, repr(altchars) 61 return encoded.translate(bytes.maketrans(b'+/', altchars)) 62 return encoded 63 64 65def b64decode(s, altchars=None, validate=False): 66 """Decode the Base64 encoded bytes-like object or ASCII string s. 67 68 Optional altchars must be a bytes-like object or ASCII string of length 2 69 which specifies the alternative alphabet used instead of the '+' and '/' 70 characters. 71 72 The result is returned as a bytes object. A binascii.Error is raised if 73 s is incorrectly padded. 74 75 If validate is False (the default), characters that are neither in the 76 normal base-64 alphabet nor the alternative alphabet are discarded prior 77 to the padding check. If validate is True, these non-alphabet characters 78 in the input result in a binascii.Error. 79 """ 80 s = _bytes_from_decode_data(s) 81 if altchars is not None: 82 altchars = _bytes_from_decode_data(altchars) 83 assert len(altchars) == 2, repr(altchars) 84 s = s.translate(bytes.maketrans(altchars, b'+/')) 85 if validate and not re.fullmatch(b'[A-Za-z0-9+/]*={0,2}', s): 86 raise binascii.Error('Non-base64 digit found') 87 return binascii.a2b_base64(s) 88 89 90def standard_b64encode(s): 91 """Encode bytes-like object s using the standard Base64 alphabet. 92 93 The result is returned as a bytes object. 94 """ 95 return b64encode(s) 96 97def standard_b64decode(s): 98 """Decode bytes encoded with the standard Base64 alphabet. 99 100 Argument s is a bytes-like object or ASCII string to decode. The result 101 is returned as a bytes object. A binascii.Error is raised if the input 102 is incorrectly padded. Characters that are not in the standard alphabet 103 are discarded prior to the padding check. 104 """ 105 return b64decode(s) 106 107 108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') 109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') 110 111def urlsafe_b64encode(s): 112 """Encode bytes using the URL- and filesystem-safe Base64 alphabet. 113 114 Argument s is a bytes-like object to encode. The result is returned as a 115 bytes object. The alphabet uses '-' instead of '+' and '_' instead of 116 '/'. 117 """ 118 return b64encode(s).translate(_urlsafe_encode_translation) 119 120def urlsafe_b64decode(s): 121 """Decode bytes using the URL- and filesystem-safe Base64 alphabet. 122 123 Argument s is a bytes-like object or ASCII string to decode. The result 124 is returned as a bytes object. A binascii.Error is raised if the input 125 is incorrectly padded. Characters that are not in the URL-safe base-64 126 alphabet, and are not a plus '+' or slash '/', are discarded prior to the 127 padding check. 128 129 The alphabet uses '-' instead of '+' and '_' instead of '/'. 130 """ 131 s = _bytes_from_decode_data(s) 132 s = s.translate(_urlsafe_decode_translation) 133 return b64decode(s) 134 135 136 137# Base32 encoding/decoding must be done in Python 138_B32_ENCODE_DOCSTRING = ''' 139Encode the bytes-like objects using {encoding} and return a bytes object. 140''' 141_B32_DECODE_DOCSTRING = ''' 142Decode the {encoding} encoded bytes-like object or ASCII string s. 143 144Optional casefold is a flag specifying whether a lowercase alphabet is 145acceptable as input. For security purposes, the default is False. 146{extra_args} 147The result is returned as a bytes object. A binascii.Error is raised if 148the input is incorrectly padded or if there are non-alphabet 149characters present in the input. 150''' 151_B32_DECODE_MAP01_DOCSTRING = ''' 152RFC 3548 allows for optional mapping of the digit 0 (zero) to the 153letter O (oh), and for optional mapping of the digit 1 (one) to 154either the letter I (eye) or letter L (el). The optional argument 155map01 when not None, specifies which letter the digit 1 should be 156mapped to (when map01 is not None, the digit 0 is always mapped to 157the letter O). For security purposes the default is None, so that 1580 and 1 are not allowed in the input. 159''' 160_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 161_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' 162_b32tab2 = {} 163_b32rev = {} 164 165def _b32encode(alphabet, s): 166 global _b32tab2 167 # Delay the initialization of the table to not waste memory 168 # if the function is never called 169 if alphabet not in _b32tab2: 170 b32tab = [bytes((i,)) for i in alphabet] 171 _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] 172 b32tab = None 173 174 if not isinstance(s, bytes_types): 175 s = memoryview(s).tobytes() 176 leftover = len(s) % 5 177 # Pad the last quantum with zero bits if necessary 178 if leftover: 179 s = s + b'\0' * (5 - leftover) # Don't use += ! 180 encoded = bytearray() 181 from_bytes = int.from_bytes 182 b32tab2 = _b32tab2[alphabet] 183 for i in range(0, len(s), 5): 184 c = from_bytes(s[i: i + 5], 'big') 185 encoded += (b32tab2[c >> 30] + # bits 1 - 10 186 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 187 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 188 b32tab2[c & 0x3ff] # bits 31 - 40 189 ) 190 # Adjust for any leftover partial quanta 191 if leftover == 1: 192 encoded[-6:] = b'======' 193 elif leftover == 2: 194 encoded[-4:] = b'====' 195 elif leftover == 3: 196 encoded[-3:] = b'===' 197 elif leftover == 4: 198 encoded[-1:] = b'=' 199 return bytes(encoded) 200 201def _b32decode(alphabet, s, casefold=False, map01=None): 202 global _b32rev 203 # Delay the initialization of the table to not waste memory 204 # if the function is never called 205 if alphabet not in _b32rev: 206 _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} 207 s = _bytes_from_decode_data(s) 208 if len(s) % 8: 209 raise binascii.Error('Incorrect padding') 210 # Handle section 2.4 zero and one mapping. The flag map01 will be either 211 # False, or the character to map the digit 1 (one) to. It should be 212 # either L (el) or I (eye). 213 if map01 is not None: 214 map01 = _bytes_from_decode_data(map01) 215 assert len(map01) == 1, repr(map01) 216 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 217 if casefold: 218 s = s.upper() 219 # Strip off pad characters from the right. We need to count the pad 220 # characters because this will tell us how many null bytes to remove from 221 # the end of the decoded string. 222 l = len(s) 223 s = s.rstrip(b'=') 224 padchars = l - len(s) 225 # Now decode the full quanta 226 decoded = bytearray() 227 b32rev = _b32rev[alphabet] 228 for i in range(0, len(s), 8): 229 quanta = s[i: i + 8] 230 acc = 0 231 try: 232 for c in quanta: 233 acc = (acc << 5) + b32rev[c] 234 except KeyError: 235 raise binascii.Error('Non-base32 digit found') from None 236 decoded += acc.to_bytes(5, 'big') 237 # Process the last, partial quanta 238 if l % 8 or padchars not in {0, 1, 3, 4, 6}: 239 raise binascii.Error('Incorrect padding') 240 if padchars and decoded: 241 acc <<= 5 * padchars 242 last = acc.to_bytes(5, 'big') 243 leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 244 decoded[-5:] = last[:leftover] 245 return bytes(decoded) 246 247 248def b32encode(s): 249 return _b32encode(_b32alphabet, s) 250b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') 251 252def b32decode(s, casefold=False, map01=None): 253 return _b32decode(_b32alphabet, s, casefold, map01) 254b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', 255 extra_args=_B32_DECODE_MAP01_DOCSTRING) 256 257def b32hexencode(s): 258 return _b32encode(_b32hexalphabet, s) 259b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') 260 261def b32hexdecode(s, casefold=False): 262 # base32hex does not have the 01 mapping 263 return _b32decode(_b32hexalphabet, s, casefold) 264b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', 265 extra_args='') 266 267 268# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 269# lowercase. The RFC also recommends against accepting input case 270# insensitively. 271def b16encode(s): 272 """Encode the bytes-like object s using Base16 and return a bytes object. 273 """ 274 return binascii.hexlify(s).upper() 275 276 277def b16decode(s, casefold=False): 278 """Decode the Base16 encoded bytes-like object or ASCII string s. 279 280 Optional casefold is a flag specifying whether a lowercase alphabet is 281 acceptable as input. For security purposes, the default is False. 282 283 The result is returned as a bytes object. A binascii.Error is raised if 284 s is incorrectly padded or if there are non-alphabet characters present 285 in the input. 286 """ 287 s = _bytes_from_decode_data(s) 288 if casefold: 289 s = s.upper() 290 if re.search(b'[^0-9A-F]', s): 291 raise binascii.Error('Non-base16 digit found') 292 return binascii.unhexlify(s) 293 294# 295# Ascii85 encoding/decoding 296# 297 298_a85chars = None 299_a85chars2 = None 300_A85START = b"<~" 301_A85END = b"~>" 302 303def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): 304 # Helper function for a85encode and b85encode 305 if not isinstance(b, bytes_types): 306 b = memoryview(b).tobytes() 307 308 padding = (-len(b)) % 4 309 if padding: 310 b = b + b'\0' * padding 311 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) 312 313 chunks = [b'z' if foldnuls and not word else 314 b'y' if foldspaces and word == 0x20202020 else 315 (chars2[word // 614125] + 316 chars2[word // 85 % 7225] + 317 chars[word % 85]) 318 for word in words] 319 320 if padding and not pad: 321 if chunks[-1] == b'z': 322 chunks[-1] = chars[0] * 5 323 chunks[-1] = chunks[-1][:-padding] 324 325 return b''.join(chunks) 326 327def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): 328 """Encode bytes-like object b using Ascii85 and return a bytes object. 329 330 foldspaces is an optional flag that uses the special short sequence 'y' 331 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This 332 feature is not supported by the "standard" Adobe encoding. 333 334 wrapcol controls whether the output should have newline (b'\\n') characters 335 added to it. If this is non-zero, each output line will be at most this 336 many characters long. 337 338 pad controls whether the input is padded to a multiple of 4 before 339 encoding. Note that the btoa implementation always pads. 340 341 adobe controls whether the encoded byte sequence is framed with <~ and ~>, 342 which is used by the Adobe implementation. 343 """ 344 global _a85chars, _a85chars2 345 # Delay the initialization of tables to not waste memory 346 # if the function is never called 347 if _a85chars2 is None: 348 _a85chars = [bytes((i,)) for i in range(33, 118)] 349 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] 350 351 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) 352 353 if adobe: 354 result = _A85START + result 355 if wrapcol: 356 wrapcol = max(2 if adobe else 1, wrapcol) 357 chunks = [result[i: i + wrapcol] 358 for i in range(0, len(result), wrapcol)] 359 if adobe: 360 if len(chunks[-1]) + 2 > wrapcol: 361 chunks.append(b'') 362 result = b'\n'.join(chunks) 363 if adobe: 364 result += _A85END 365 366 return result 367 368def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): 369 """Decode the Ascii85 encoded bytes-like object or ASCII string b. 370 371 foldspaces is a flag that specifies whether the 'y' short sequence should be 372 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is 373 not supported by the "standard" Adobe encoding. 374 375 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. 376 is framed with <~ and ~>). 377 378 ignorechars should be a byte string containing characters to ignore from the 379 input. This should only contain whitespace characters, and by default 380 contains all whitespace characters in ASCII. 381 382 The result is returned as a bytes object. 383 """ 384 b = _bytes_from_decode_data(b) 385 if adobe: 386 if not b.endswith(_A85END): 387 raise ValueError( 388 "Ascii85 encoded byte sequences must end " 389 "with {!r}".format(_A85END) 390 ) 391 if b.startswith(_A85START): 392 b = b[2:-2] # Strip off start/end markers 393 else: 394 b = b[:-2] 395 # 396 # We have to go through this stepwise, so as to ignore spaces and handle 397 # special short sequences 398 # 399 packI = struct.Struct('!I').pack 400 decoded = [] 401 decoded_append = decoded.append 402 curr = [] 403 curr_append = curr.append 404 curr_clear = curr.clear 405 for x in b + b'u' * 4: 406 if b'!'[0] <= x <= b'u'[0]: 407 curr_append(x) 408 if len(curr) == 5: 409 acc = 0 410 for x in curr: 411 acc = 85 * acc + (x - 33) 412 try: 413 decoded_append(packI(acc)) 414 except struct.error: 415 raise ValueError('Ascii85 overflow') from None 416 curr_clear() 417 elif x == b'z'[0]: 418 if curr: 419 raise ValueError('z inside Ascii85 5-tuple') 420 decoded_append(b'\0\0\0\0') 421 elif foldspaces and x == b'y'[0]: 422 if curr: 423 raise ValueError('y inside Ascii85 5-tuple') 424 decoded_append(b'\x20\x20\x20\x20') 425 elif x in ignorechars: 426 # Skip whitespace 427 continue 428 else: 429 raise ValueError('Non-Ascii85 digit found: %c' % x) 430 431 result = b''.join(decoded) 432 padding = 4 - len(curr) 433 if padding: 434 # Throw away the extra padding 435 result = result[:-padding] 436 return result 437 438# The following code is originally taken (with permission) from Mercurial 439 440_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 441 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") 442_b85chars = None 443_b85chars2 = None 444_b85dec = None 445 446def b85encode(b, pad=False): 447 """Encode bytes-like object b in base85 format and return a bytes object. 448 449 If pad is true, the input is padded with b'\\0' so its length is a multiple of 450 4 bytes before encoding. 451 """ 452 global _b85chars, _b85chars2 453 # Delay the initialization of tables to not waste memory 454 # if the function is never called 455 if _b85chars2 is None: 456 _b85chars = [bytes((i,)) for i in _b85alphabet] 457 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] 458 return _85encode(b, _b85chars, _b85chars2, pad) 459 460def b85decode(b): 461 """Decode the base85-encoded bytes-like object or ASCII string b 462 463 The result is returned as a bytes object. 464 """ 465 global _b85dec 466 # Delay the initialization of tables to not waste memory 467 # if the function is never called 468 if _b85dec is None: 469 _b85dec = [None] * 256 470 for i, c in enumerate(_b85alphabet): 471 _b85dec[c] = i 472 473 b = _bytes_from_decode_data(b) 474 padding = (-len(b)) % 5 475 b = b + b'~' * padding 476 out = [] 477 packI = struct.Struct('!I').pack 478 for i in range(0, len(b), 5): 479 chunk = b[i:i + 5] 480 acc = 0 481 try: 482 for c in chunk: 483 acc = acc * 85 + _b85dec[c] 484 except TypeError: 485 for j, c in enumerate(chunk): 486 if _b85dec[c] is None: 487 raise ValueError('bad base85 character at position %d' 488 % (i + j)) from None 489 raise 490 try: 491 out.append(packI(acc)) 492 except struct.error: 493 raise ValueError('base85 overflow in hunk starting at byte %d' 494 % i) from None 495 496 result = b''.join(out) 497 if padding: 498 result = result[:-padding] 499 return result 500 501# Legacy interface. This code could be cleaned up since I don't believe 502# binascii has any line length limitations. It just doesn't seem worth it 503# though. The files should be opened in binary mode. 504 505MAXLINESIZE = 76 # Excluding the CRLF 506MAXBINSIZE = (MAXLINESIZE//4)*3 507 508def encode(input, output): 509 """Encode a file; input and output are binary files.""" 510 while True: 511 s = input.read(MAXBINSIZE) 512 if not s: 513 break 514 while len(s) < MAXBINSIZE: 515 ns = input.read(MAXBINSIZE-len(s)) 516 if not ns: 517 break 518 s += ns 519 line = binascii.b2a_base64(s) 520 output.write(line) 521 522 523def decode(input, output): 524 """Decode a file; input and output are binary files.""" 525 while True: 526 line = input.readline() 527 if not line: 528 break 529 s = binascii.a2b_base64(line) 530 output.write(s) 531 532def _input_type_check(s): 533 try: 534 m = memoryview(s) 535 except TypeError as err: 536 msg = "expected bytes-like object, not %s" % s.__class__.__name__ 537 raise TypeError(msg) from err 538 if m.format not in ('c', 'b', 'B'): 539 msg = ("expected single byte elements, not %r from %s" % 540 (m.format, s.__class__.__name__)) 541 raise TypeError(msg) 542 if m.ndim != 1: 543 msg = ("expected 1-D data, not %d-D data from %s" % 544 (m.ndim, s.__class__.__name__)) 545 raise TypeError(msg) 546 547 548def encodebytes(s): 549 """Encode a bytestring into a bytes object containing multiple lines 550 of base-64 data.""" 551 _input_type_check(s) 552 pieces = [] 553 for i in range(0, len(s), MAXBINSIZE): 554 chunk = s[i : i + MAXBINSIZE] 555 pieces.append(binascii.b2a_base64(chunk)) 556 return b"".join(pieces) 557 558 559def decodebytes(s): 560 """Decode a bytestring of base-64 data into a bytes object.""" 561 _input_type_check(s) 562 return binascii.a2b_base64(s) 563 564 565# Usable as a script... 566def main(): 567 """Small main program""" 568 import sys, getopt 569 try: 570 opts, args = getopt.getopt(sys.argv[1:], 'deut') 571 except getopt.error as msg: 572 sys.stdout = sys.stderr 573 print(msg) 574 print("""usage: %s [-d|-e|-u|-t] [file|-] 575 -d, -u: decode 576 -e: encode (default) 577 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) 578 sys.exit(2) 579 func = encode 580 for o, a in opts: 581 if o == '-e': func = encode 582 if o == '-d': func = decode 583 if o == '-u': func = decode 584 if o == '-t': test(); return 585 if args and args[0] != '-': 586 with open(args[0], 'rb') as f: 587 func(f, sys.stdout.buffer) 588 else: 589 func(sys.stdin.buffer, sys.stdout.buffer) 590 591 592def test(): 593 s0 = b"Aladdin:open sesame" 594 print(repr(s0)) 595 s1 = encodebytes(s0) 596 print(repr(s1)) 597 s2 = decodebytes(s1) 598 print(repr(s2)) 599 assert s0 == s2 600 601 602if __name__ == '__main__': 603 main() 604