1#! /usr/bin/env python3 2 3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" 4 5# Modified 04-Oct-1995 by Jack Jansen to use binascii module 6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 8 9import re 10import struct 11import binascii 12 13 14__all__ = [ 15 # Legacy interface exports traditional RFC 2045 Base64 encodings 16 'encode', 'decode', 'encodebytes', 'decodebytes', 17 # Generalized interface for other encodings 18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 19 'b16encode', 'b16decode', 20 # Base85 and Ascii85 encodings 21 'b85encode', 'b85decode', 'a85encode', 'a85decode', 22 # Standard Base64 encoding 23 'standard_b64encode', 'standard_b64decode', 24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 25 # starting at: 26 # 27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 28 'urlsafe_b64encode', 'urlsafe_b64decode', 29 ] 30 31 32bytes_types = (bytes, bytearray) # Types acceptable as binary data 33 34def _bytes_from_decode_data(s): 35 if isinstance(s, str): 36 try: 37 return s.encode('ascii') 38 except UnicodeEncodeError: 39 raise ValueError('string argument should contain only ASCII characters') 40 if isinstance(s, bytes_types): 41 return s 42 try: 43 return memoryview(s).tobytes() 44 except TypeError: 45 raise TypeError("argument should be a bytes-like object or ASCII " 46 "string, not %r" % s.__class__.__name__) from None 47 48 49# Base64 encoding/decoding uses binascii 50 51def b64encode(s, altchars=None): 52 """Encode the bytes-like object s using Base64 and return a bytes object. 53 54 Optional altchars should be a byte string of length 2 which specifies an 55 alternative alphabet for the '+' and '/' characters. This allows an 56 application to e.g. generate url or filesystem safe Base64 strings. 57 """ 58 encoded = binascii.b2a_base64(s, newline=False) 59 if altchars is not None: 60 assert len(altchars) == 2, repr(altchars) 61 return encoded.translate(bytes.maketrans(b'+/', altchars)) 62 return encoded 63 64 65def b64decode(s, altchars=None, validate=False): 66 """Decode the Base64 encoded bytes-like object or ASCII string s. 67 68 Optional altchars must be a bytes-like object or ASCII string of length 2 69 which specifies the alternative alphabet used instead of the '+' and '/' 70 characters. 71 72 The result is returned as a bytes object. A binascii.Error is raised if 73 s is incorrectly padded. 74 75 If validate is False (the default), characters that are neither in the 76 normal base-64 alphabet nor the alternative alphabet are discarded prior 77 to the padding check. If validate is True, these non-alphabet characters 78 in the input result in a binascii.Error. 79 """ 80 s = _bytes_from_decode_data(s) 81 if altchars is not None: 82 altchars = _bytes_from_decode_data(altchars) 83 assert len(altchars) == 2, repr(altchars) 84 s = s.translate(bytes.maketrans(altchars, b'+/')) 85 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): 86 raise binascii.Error('Non-base64 digit found') 87 return binascii.a2b_base64(s) 88 89 90def standard_b64encode(s): 91 """Encode bytes-like object s using the standard Base64 alphabet. 92 93 The result is returned as a bytes object. 94 """ 95 return b64encode(s) 96 97def standard_b64decode(s): 98 """Decode bytes encoded with the standard Base64 alphabet. 99 100 Argument s is a bytes-like object or ASCII string to decode. The result 101 is returned as a bytes object. A binascii.Error is raised if the input 102 is incorrectly padded. Characters that are not in the standard alphabet 103 are discarded prior to the padding check. 104 """ 105 return b64decode(s) 106 107 108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') 109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') 110 111def urlsafe_b64encode(s): 112 """Encode bytes using the URL- and filesystem-safe Base64 alphabet. 113 114 Argument s is a bytes-like object to encode. The result is returned as a 115 bytes object. The alphabet uses '-' instead of '+' and '_' instead of 116 '/'. 117 """ 118 return b64encode(s).translate(_urlsafe_encode_translation) 119 120def urlsafe_b64decode(s): 121 """Decode bytes using the URL- and filesystem-safe Base64 alphabet. 122 123 Argument s is a bytes-like object or ASCII string to decode. The result 124 is returned as a bytes object. A binascii.Error is raised if the input 125 is incorrectly padded. Characters that are not in the URL-safe base-64 126 alphabet, and are not a plus '+' or slash '/', are discarded prior to the 127 padding check. 128 129 The alphabet uses '-' instead of '+' and '_' instead of '/'. 130 """ 131 s = _bytes_from_decode_data(s) 132 s = s.translate(_urlsafe_decode_translation) 133 return b64decode(s) 134 135 136 137# Base32 encoding/decoding must be done in Python 138_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 139_b32tab2 = None 140_b32rev = None 141 142def b32encode(s): 143 """Encode the bytes-like object s using Base32 and return a bytes object. 144 """ 145 global _b32tab2 146 # Delay the initialization of the table to not waste memory 147 # if the function is never called 148 if _b32tab2 is None: 149 b32tab = [bytes((i,)) for i in _b32alphabet] 150 _b32tab2 = [a + b for a in b32tab for b in b32tab] 151 b32tab = None 152 153 if not isinstance(s, bytes_types): 154 s = memoryview(s).tobytes() 155 leftover = len(s) % 5 156 # Pad the last quantum with zero bits if necessary 157 if leftover: 158 s = s + b'\0' * (5 - leftover) # Don't use += ! 159 encoded = bytearray() 160 from_bytes = int.from_bytes 161 b32tab2 = _b32tab2 162 for i in range(0, len(s), 5): 163 c = from_bytes(s[i: i + 5], 'big') 164 encoded += (b32tab2[c >> 30] + # bits 1 - 10 165 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 166 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 167 b32tab2[c & 0x3ff] # bits 31 - 40 168 ) 169 # Adjust for any leftover partial quanta 170 if leftover == 1: 171 encoded[-6:] = b'======' 172 elif leftover == 2: 173 encoded[-4:] = b'====' 174 elif leftover == 3: 175 encoded[-3:] = b'===' 176 elif leftover == 4: 177 encoded[-1:] = b'=' 178 return bytes(encoded) 179 180def b32decode(s, casefold=False, map01=None): 181 """Decode the Base32 encoded bytes-like object or ASCII string s. 182 183 Optional casefold is a flag specifying whether a lowercase alphabet is 184 acceptable as input. For security purposes, the default is False. 185 186 RFC 3548 allows for optional mapping of the digit 0 (zero) to the 187 letter O (oh), and for optional mapping of the digit 1 (one) to 188 either the letter I (eye) or letter L (el). The optional argument 189 map01 when not None, specifies which letter the digit 1 should be 190 mapped to (when map01 is not None, the digit 0 is always mapped to 191 the letter O). For security purposes the default is None, so that 192 0 and 1 are not allowed in the input. 193 194 The result is returned as a bytes object. A binascii.Error is raised if 195 the input is incorrectly padded or if there are non-alphabet 196 characters present in the input. 197 """ 198 global _b32rev 199 # Delay the initialization of the table to not waste memory 200 # if the function is never called 201 if _b32rev is None: 202 _b32rev = {v: k for k, v in enumerate(_b32alphabet)} 203 s = _bytes_from_decode_data(s) 204 if len(s) % 8: 205 raise binascii.Error('Incorrect padding') 206 # Handle section 2.4 zero and one mapping. The flag map01 will be either 207 # False, or the character to map the digit 1 (one) to. It should be 208 # either L (el) or I (eye). 209 if map01 is not None: 210 map01 = _bytes_from_decode_data(map01) 211 assert len(map01) == 1, repr(map01) 212 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 213 if casefold: 214 s = s.upper() 215 # Strip off pad characters from the right. We need to count the pad 216 # characters because this will tell us how many null bytes to remove from 217 # the end of the decoded string. 218 l = len(s) 219 s = s.rstrip(b'=') 220 padchars = l - len(s) 221 # Now decode the full quanta 222 decoded = bytearray() 223 b32rev = _b32rev 224 for i in range(0, len(s), 8): 225 quanta = s[i: i + 8] 226 acc = 0 227 try: 228 for c in quanta: 229 acc = (acc << 5) + b32rev[c] 230 except KeyError: 231 raise binascii.Error('Non-base32 digit found') from None 232 decoded += acc.to_bytes(5, 'big') 233 # Process the last, partial quanta 234 if padchars: 235 acc <<= 5 * padchars 236 last = acc.to_bytes(5, 'big') 237 if padchars == 1: 238 decoded[-5:] = last[:-1] 239 elif padchars == 3: 240 decoded[-5:] = last[:-2] 241 elif padchars == 4: 242 decoded[-5:] = last[:-3] 243 elif padchars == 6: 244 decoded[-5:] = last[:-4] 245 else: 246 raise binascii.Error('Incorrect padding') 247 return bytes(decoded) 248 249 250 251# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 252# lowercase. The RFC also recommends against accepting input case 253# insensitively. 254def b16encode(s): 255 """Encode the bytes-like object s using Base16 and return a bytes object. 256 """ 257 return binascii.hexlify(s).upper() 258 259 260def b16decode(s, casefold=False): 261 """Decode the Base16 encoded bytes-like object or ASCII string s. 262 263 Optional casefold is a flag specifying whether a lowercase alphabet is 264 acceptable as input. For security purposes, the default is False. 265 266 The result is returned as a bytes object. A binascii.Error is raised if 267 s is incorrectly padded or if there are non-alphabet characters present 268 in the input. 269 """ 270 s = _bytes_from_decode_data(s) 271 if casefold: 272 s = s.upper() 273 if re.search(b'[^0-9A-F]', s): 274 raise binascii.Error('Non-base16 digit found') 275 return binascii.unhexlify(s) 276 277# 278# Ascii85 encoding/decoding 279# 280 281_a85chars = None 282_a85chars2 = None 283_A85START = b"<~" 284_A85END = b"~>" 285 286def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): 287 # Helper function for a85encode and b85encode 288 if not isinstance(b, bytes_types): 289 b = memoryview(b).tobytes() 290 291 padding = (-len(b)) % 4 292 if padding: 293 b = b + b'\0' * padding 294 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) 295 296 chunks = [b'z' if foldnuls and not word else 297 b'y' if foldspaces and word == 0x20202020 else 298 (chars2[word // 614125] + 299 chars2[word // 85 % 7225] + 300 chars[word % 85]) 301 for word in words] 302 303 if padding and not pad: 304 if chunks[-1] == b'z': 305 chunks[-1] = chars[0] * 5 306 chunks[-1] = chunks[-1][:-padding] 307 308 return b''.join(chunks) 309 310def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): 311 """Encode bytes-like object b using Ascii85 and return a bytes object. 312 313 foldspaces is an optional flag that uses the special short sequence 'y' 314 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This 315 feature is not supported by the "standard" Adobe encoding. 316 317 wrapcol controls whether the output should have newline (b'\\n') characters 318 added to it. If this is non-zero, each output line will be at most this 319 many characters long. 320 321 pad controls whether the input is padded to a multiple of 4 before 322 encoding. Note that the btoa implementation always pads. 323 324 adobe controls whether the encoded byte sequence is framed with <~ and ~>, 325 which is used by the Adobe implementation. 326 """ 327 global _a85chars, _a85chars2 328 # Delay the initialization of tables to not waste memory 329 # if the function is never called 330 if _a85chars is None: 331 _a85chars = [bytes((i,)) for i in range(33, 118)] 332 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] 333 334 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) 335 336 if adobe: 337 result = _A85START + result 338 if wrapcol: 339 wrapcol = max(2 if adobe else 1, wrapcol) 340 chunks = [result[i: i + wrapcol] 341 for i in range(0, len(result), wrapcol)] 342 if adobe: 343 if len(chunks[-1]) + 2 > wrapcol: 344 chunks.append(b'') 345 result = b'\n'.join(chunks) 346 if adobe: 347 result += _A85END 348 349 return result 350 351def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): 352 """Decode the Ascii85 encoded bytes-like object or ASCII string b. 353 354 foldspaces is a flag that specifies whether the 'y' short sequence should be 355 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is 356 not supported by the "standard" Adobe encoding. 357 358 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. 359 is framed with <~ and ~>). 360 361 ignorechars should be a byte string containing characters to ignore from the 362 input. This should only contain whitespace characters, and by default 363 contains all whitespace characters in ASCII. 364 365 The result is returned as a bytes object. 366 """ 367 b = _bytes_from_decode_data(b) 368 if adobe: 369 if not b.endswith(_A85END): 370 raise ValueError( 371 "Ascii85 encoded byte sequences must end " 372 "with {!r}".format(_A85END) 373 ) 374 if b.startswith(_A85START): 375 b = b[2:-2] # Strip off start/end markers 376 else: 377 b = b[:-2] 378 # 379 # We have to go through this stepwise, so as to ignore spaces and handle 380 # special short sequences 381 # 382 packI = struct.Struct('!I').pack 383 decoded = [] 384 decoded_append = decoded.append 385 curr = [] 386 curr_append = curr.append 387 curr_clear = curr.clear 388 for x in b + b'u' * 4: 389 if b'!'[0] <= x <= b'u'[0]: 390 curr_append(x) 391 if len(curr) == 5: 392 acc = 0 393 for x in curr: 394 acc = 85 * acc + (x - 33) 395 try: 396 decoded_append(packI(acc)) 397 except struct.error: 398 raise ValueError('Ascii85 overflow') from None 399 curr_clear() 400 elif x == b'z'[0]: 401 if curr: 402 raise ValueError('z inside Ascii85 5-tuple') 403 decoded_append(b'\0\0\0\0') 404 elif foldspaces and x == b'y'[0]: 405 if curr: 406 raise ValueError('y inside Ascii85 5-tuple') 407 decoded_append(b'\x20\x20\x20\x20') 408 elif x in ignorechars: 409 # Skip whitespace 410 continue 411 else: 412 raise ValueError('Non-Ascii85 digit found: %c' % x) 413 414 result = b''.join(decoded) 415 padding = 4 - len(curr) 416 if padding: 417 # Throw away the extra padding 418 result = result[:-padding] 419 return result 420 421# The following code is originally taken (with permission) from Mercurial 422 423_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 424 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") 425_b85chars = None 426_b85chars2 = None 427_b85dec = None 428 429def b85encode(b, pad=False): 430 """Encode bytes-like object b in base85 format and return a bytes object. 431 432 If pad is true, the input is padded with b'\\0' so its length is a multiple of 433 4 bytes before encoding. 434 """ 435 global _b85chars, _b85chars2 436 # Delay the initialization of tables to not waste memory 437 # if the function is never called 438 if _b85chars is None: 439 _b85chars = [bytes((i,)) for i in _b85alphabet] 440 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] 441 return _85encode(b, _b85chars, _b85chars2, pad) 442 443def b85decode(b): 444 """Decode the base85-encoded bytes-like object or ASCII string b 445 446 The result is returned as a bytes object. 447 """ 448 global _b85dec 449 # Delay the initialization of tables to not waste memory 450 # if the function is never called 451 if _b85dec is None: 452 _b85dec = [None] * 256 453 for i, c in enumerate(_b85alphabet): 454 _b85dec[c] = i 455 456 b = _bytes_from_decode_data(b) 457 padding = (-len(b)) % 5 458 b = b + b'~' * padding 459 out = [] 460 packI = struct.Struct('!I').pack 461 for i in range(0, len(b), 5): 462 chunk = b[i:i + 5] 463 acc = 0 464 try: 465 for c in chunk: 466 acc = acc * 85 + _b85dec[c] 467 except TypeError: 468 for j, c in enumerate(chunk): 469 if _b85dec[c] is None: 470 raise ValueError('bad base85 character at position %d' 471 % (i + j)) from None 472 raise 473 try: 474 out.append(packI(acc)) 475 except struct.error: 476 raise ValueError('base85 overflow in hunk starting at byte %d' 477 % i) from None 478 479 result = b''.join(out) 480 if padding: 481 result = result[:-padding] 482 return result 483 484# Legacy interface. This code could be cleaned up since I don't believe 485# binascii has any line length limitations. It just doesn't seem worth it 486# though. The files should be opened in binary mode. 487 488MAXLINESIZE = 76 # Excluding the CRLF 489MAXBINSIZE = (MAXLINESIZE//4)*3 490 491def encode(input, output): 492 """Encode a file; input and output are binary files.""" 493 while True: 494 s = input.read(MAXBINSIZE) 495 if not s: 496 break 497 while len(s) < MAXBINSIZE: 498 ns = input.read(MAXBINSIZE-len(s)) 499 if not ns: 500 break 501 s += ns 502 line = binascii.b2a_base64(s) 503 output.write(line) 504 505 506def decode(input, output): 507 """Decode a file; input and output are binary files.""" 508 while True: 509 line = input.readline() 510 if not line: 511 break 512 s = binascii.a2b_base64(line) 513 output.write(s) 514 515def _input_type_check(s): 516 try: 517 m = memoryview(s) 518 except TypeError as err: 519 msg = "expected bytes-like object, not %s" % s.__class__.__name__ 520 raise TypeError(msg) from err 521 if m.format not in ('c', 'b', 'B'): 522 msg = ("expected single byte elements, not %r from %s" % 523 (m.format, s.__class__.__name__)) 524 raise TypeError(msg) 525 if m.ndim != 1: 526 msg = ("expected 1-D data, not %d-D data from %s" % 527 (m.ndim, s.__class__.__name__)) 528 raise TypeError(msg) 529 530 531def encodebytes(s): 532 """Encode a bytestring into a bytes object containing multiple lines 533 of base-64 data.""" 534 _input_type_check(s) 535 pieces = [] 536 for i in range(0, len(s), MAXBINSIZE): 537 chunk = s[i : i + MAXBINSIZE] 538 pieces.append(binascii.b2a_base64(chunk)) 539 return b"".join(pieces) 540 541def encodestring(s): 542 """Legacy alias of encodebytes().""" 543 import warnings 544 warnings.warn("encodestring() is a deprecated alias since 3.1, " 545 "use encodebytes()", 546 DeprecationWarning, 2) 547 return encodebytes(s) 548 549 550def decodebytes(s): 551 """Decode a bytestring of base-64 data into a bytes object.""" 552 _input_type_check(s) 553 return binascii.a2b_base64(s) 554 555def decodestring(s): 556 """Legacy alias of decodebytes().""" 557 import warnings 558 warnings.warn("decodestring() is a deprecated alias since Python 3.1, " 559 "use decodebytes()", 560 DeprecationWarning, 2) 561 return decodebytes(s) 562 563 564# Usable as a script... 565def main(): 566 """Small main program""" 567 import sys, getopt 568 try: 569 opts, args = getopt.getopt(sys.argv[1:], 'deut') 570 except getopt.error as msg: 571 sys.stdout = sys.stderr 572 print(msg) 573 print("""usage: %s [-d|-e|-u|-t] [file|-] 574 -d, -u: decode 575 -e: encode (default) 576 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) 577 sys.exit(2) 578 func = encode 579 for o, a in opts: 580 if o == '-e': func = encode 581 if o == '-d': func = decode 582 if o == '-u': func = decode 583 if o == '-t': test(); return 584 if args and args[0] != '-': 585 with open(args[0], 'rb') as f: 586 func(f, sys.stdout.buffer) 587 else: 588 func(sys.stdin.buffer, sys.stdout.buffer) 589 590 591def test(): 592 s0 = b"Aladdin:open sesame" 593 print(repr(s0)) 594 s1 = encodebytes(s0) 595 print(repr(s1)) 596 s2 = decodebytes(s1) 597 print(repr(s2)) 598 assert s0 == s2 599 600 601if __name__ == '__main__': 602 main() 603