1#! /usr/bin/env python3 2 3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" 4 5# Modified 04-Oct-1995 by Jack Jansen to use binascii module 6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 8 9import re 10import struct 11import binascii 12 13 14__all__ = [ 15 # Legacy interface exports traditional RFC 2045 Base64 encodings 16 'encode', 'decode', 'encodebytes', 'decodebytes', 17 # Generalized interface for other encodings 18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 19 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', 20 # Base85 and Ascii85 encodings 21 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', 22 # Standard Base64 encoding 23 'standard_b64encode', 'standard_b64decode', 24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 25 # starting at: 26 # 27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 28 'urlsafe_b64encode', 'urlsafe_b64decode', 29 ] 30 31 32bytes_types = (bytes, bytearray) # Types acceptable as binary data 33 34def _bytes_from_decode_data(s): 35 if isinstance(s, str): 36 try: 37 return s.encode('ascii') 38 except UnicodeEncodeError: 39 raise ValueError('string argument should contain only ASCII characters') 40 if isinstance(s, bytes_types): 41 return s 42 try: 43 return memoryview(s).tobytes() 44 except TypeError: 45 raise TypeError("argument should be a bytes-like object or ASCII " 46 "string, not %r" % s.__class__.__name__) from None 47 48 49# Base64 encoding/decoding uses binascii 50 51def b64encode(s, altchars=None): 52 """Encode the bytes-like object s using Base64 and return a bytes object. 53 54 Optional altchars should be a byte string of length 2 which specifies an 55 alternative alphabet for the '+' and '/' characters. This allows an 56 application to e.g. generate url or filesystem safe Base64 strings. 57 """ 58 encoded = binascii.b2a_base64(s, newline=False) 59 if altchars is not None: 60 assert len(altchars) == 2, repr(altchars) 61 return encoded.translate(bytes.maketrans(b'+/', altchars)) 62 return encoded 63 64 65def b64decode(s, altchars=None, validate=False): 66 """Decode the Base64 encoded bytes-like object or ASCII string s. 67 68 Optional altchars must be a bytes-like object or ASCII string of length 2 69 which specifies the alternative alphabet used instead of the '+' and '/' 70 characters. 71 72 The result is returned as a bytes object. A binascii.Error is raised if 73 s is incorrectly padded. 74 75 If validate is False (the default), characters that are neither in the 76 normal base-64 alphabet nor the alternative alphabet are discarded prior 77 to the padding check. If validate is True, these non-alphabet characters 78 in the input result in a binascii.Error. 79 For more information about the strict base64 check, see: 80 81 https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 82 """ 83 s = _bytes_from_decode_data(s) 84 if altchars is not None: 85 altchars = _bytes_from_decode_data(altchars) 86 assert len(altchars) == 2, repr(altchars) 87 s = s.translate(bytes.maketrans(altchars, b'+/')) 88 return binascii.a2b_base64(s, strict_mode=validate) 89 90 91def standard_b64encode(s): 92 """Encode bytes-like object s using the standard Base64 alphabet. 93 94 The result is returned as a bytes object. 95 """ 96 return b64encode(s) 97 98def standard_b64decode(s): 99 """Decode bytes encoded with the standard Base64 alphabet. 100 101 Argument s is a bytes-like object or ASCII string to decode. The result 102 is returned as a bytes object. A binascii.Error is raised if the input 103 is incorrectly padded. Characters that are not in the standard alphabet 104 are discarded prior to the padding check. 105 """ 106 return b64decode(s) 107 108 109_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') 110_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') 111 112def urlsafe_b64encode(s): 113 """Encode bytes using the URL- and filesystem-safe Base64 alphabet. 114 115 Argument s is a bytes-like object to encode. The result is returned as a 116 bytes object. The alphabet uses '-' instead of '+' and '_' instead of 117 '/'. 118 """ 119 return b64encode(s).translate(_urlsafe_encode_translation) 120 121def urlsafe_b64decode(s): 122 """Decode bytes using the URL- and filesystem-safe Base64 alphabet. 123 124 Argument s is a bytes-like object or ASCII string to decode. The result 125 is returned as a bytes object. A binascii.Error is raised if the input 126 is incorrectly padded. Characters that are not in the URL-safe base-64 127 alphabet, and are not a plus '+' or slash '/', are discarded prior to the 128 padding check. 129 130 The alphabet uses '-' instead of '+' and '_' instead of '/'. 131 """ 132 s = _bytes_from_decode_data(s) 133 s = s.translate(_urlsafe_decode_translation) 134 return b64decode(s) 135 136 137 138# Base32 encoding/decoding must be done in Python 139_B32_ENCODE_DOCSTRING = ''' 140Encode the bytes-like objects using {encoding} and return a bytes object. 141''' 142_B32_DECODE_DOCSTRING = ''' 143Decode the {encoding} encoded bytes-like object or ASCII string s. 144 145Optional casefold is a flag specifying whether a lowercase alphabet is 146acceptable as input. For security purposes, the default is False. 147{extra_args} 148The result is returned as a bytes object. A binascii.Error is raised if 149the input is incorrectly padded or if there are non-alphabet 150characters present in the input. 151''' 152_B32_DECODE_MAP01_DOCSTRING = ''' 153RFC 3548 allows for optional mapping of the digit 0 (zero) to the 154letter O (oh), and for optional mapping of the digit 1 (one) to 155either the letter I (eye) or letter L (el). The optional argument 156map01 when not None, specifies which letter the digit 1 should be 157mapped to (when map01 is not None, the digit 0 is always mapped to 158the letter O). For security purposes the default is None, so that 1590 and 1 are not allowed in the input. 160''' 161_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 162_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' 163_b32tab2 = {} 164_b32rev = {} 165 166def _b32encode(alphabet, s): 167 # Delay the initialization of the table to not waste memory 168 # if the function is never called 169 if alphabet not in _b32tab2: 170 b32tab = [bytes((i,)) for i in alphabet] 171 _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] 172 b32tab = None 173 174 if not isinstance(s, bytes_types): 175 s = memoryview(s).tobytes() 176 leftover = len(s) % 5 177 # Pad the last quantum with zero bits if necessary 178 if leftover: 179 s = s + b'\0' * (5 - leftover) # Don't use += ! 180 encoded = bytearray() 181 from_bytes = int.from_bytes 182 b32tab2 = _b32tab2[alphabet] 183 for i in range(0, len(s), 5): 184 c = from_bytes(s[i: i + 5]) # big endian 185 encoded += (b32tab2[c >> 30] + # bits 1 - 10 186 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 187 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 188 b32tab2[c & 0x3ff] # bits 31 - 40 189 ) 190 # Adjust for any leftover partial quanta 191 if leftover == 1: 192 encoded[-6:] = b'======' 193 elif leftover == 2: 194 encoded[-4:] = b'====' 195 elif leftover == 3: 196 encoded[-3:] = b'===' 197 elif leftover == 4: 198 encoded[-1:] = b'=' 199 return bytes(encoded) 200 201def _b32decode(alphabet, s, casefold=False, map01=None): 202 # Delay the initialization of the table to not waste memory 203 # if the function is never called 204 if alphabet not in _b32rev: 205 _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} 206 s = _bytes_from_decode_data(s) 207 if len(s) % 8: 208 raise binascii.Error('Incorrect padding') 209 # Handle section 2.4 zero and one mapping. The flag map01 will be either 210 # False, or the character to map the digit 1 (one) to. It should be 211 # either L (el) or I (eye). 212 if map01 is not None: 213 map01 = _bytes_from_decode_data(map01) 214 assert len(map01) == 1, repr(map01) 215 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 216 if casefold: 217 s = s.upper() 218 # Strip off pad characters from the right. We need to count the pad 219 # characters because this will tell us how many null bytes to remove from 220 # the end of the decoded string. 221 l = len(s) 222 s = s.rstrip(b'=') 223 padchars = l - len(s) 224 # Now decode the full quanta 225 decoded = bytearray() 226 b32rev = _b32rev[alphabet] 227 for i in range(0, len(s), 8): 228 quanta = s[i: i + 8] 229 acc = 0 230 try: 231 for c in quanta: 232 acc = (acc << 5) + b32rev[c] 233 except KeyError: 234 raise binascii.Error('Non-base32 digit found') from None 235 decoded += acc.to_bytes(5) # big endian 236 # Process the last, partial quanta 237 if l % 8 or padchars not in {0, 1, 3, 4, 6}: 238 raise binascii.Error('Incorrect padding') 239 if padchars and decoded: 240 acc <<= 5 * padchars 241 last = acc.to_bytes(5) # big endian 242 leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 243 decoded[-5:] = last[:leftover] 244 return bytes(decoded) 245 246 247def b32encode(s): 248 return _b32encode(_b32alphabet, s) 249b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') 250 251def b32decode(s, casefold=False, map01=None): 252 return _b32decode(_b32alphabet, s, casefold, map01) 253b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', 254 extra_args=_B32_DECODE_MAP01_DOCSTRING) 255 256def b32hexencode(s): 257 return _b32encode(_b32hexalphabet, s) 258b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') 259 260def b32hexdecode(s, casefold=False): 261 # base32hex does not have the 01 mapping 262 return _b32decode(_b32hexalphabet, s, casefold) 263b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', 264 extra_args='') 265 266 267# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 268# lowercase. The RFC also recommends against accepting input case 269# insensitively. 270def b16encode(s): 271 """Encode the bytes-like object s using Base16 and return a bytes object. 272 """ 273 return binascii.hexlify(s).upper() 274 275 276def b16decode(s, casefold=False): 277 """Decode the Base16 encoded bytes-like object or ASCII string s. 278 279 Optional casefold is a flag specifying whether a lowercase alphabet is 280 acceptable as input. For security purposes, the default is False. 281 282 The result is returned as a bytes object. A binascii.Error is raised if 283 s is incorrectly padded or if there are non-alphabet characters present 284 in the input. 285 """ 286 s = _bytes_from_decode_data(s) 287 if casefold: 288 s = s.upper() 289 if re.search(b'[^0-9A-F]', s): 290 raise binascii.Error('Non-base16 digit found') 291 return binascii.unhexlify(s) 292 293# 294# Ascii85 encoding/decoding 295# 296 297_a85chars = None 298_a85chars2 = None 299_A85START = b"<~" 300_A85END = b"~>" 301 302def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): 303 # Helper function for a85encode and b85encode 304 if not isinstance(b, bytes_types): 305 b = memoryview(b).tobytes() 306 307 padding = (-len(b)) % 4 308 if padding: 309 b = b + b'\0' * padding 310 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) 311 312 chunks = [b'z' if foldnuls and not word else 313 b'y' if foldspaces and word == 0x20202020 else 314 (chars2[word // 614125] + 315 chars2[word // 85 % 7225] + 316 chars[word % 85]) 317 for word in words] 318 319 if padding and not pad: 320 if chunks[-1] == b'z': 321 chunks[-1] = chars[0] * 5 322 chunks[-1] = chunks[-1][:-padding] 323 324 return b''.join(chunks) 325 326def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): 327 """Encode bytes-like object b using Ascii85 and return a bytes object. 328 329 foldspaces is an optional flag that uses the special short sequence 'y' 330 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This 331 feature is not supported by the "standard" Adobe encoding. 332 333 wrapcol controls whether the output should have newline (b'\\n') characters 334 added to it. If this is non-zero, each output line will be at most this 335 many characters long, excluding the trailing newline. 336 337 pad controls whether the input is padded to a multiple of 4 before 338 encoding. Note that the btoa implementation always pads. 339 340 adobe controls whether the encoded byte sequence is framed with <~ and ~>, 341 which is used by the Adobe implementation. 342 """ 343 global _a85chars, _a85chars2 344 # Delay the initialization of tables to not waste memory 345 # if the function is never called 346 if _a85chars2 is None: 347 _a85chars = [bytes((i,)) for i in range(33, 118)] 348 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] 349 350 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) 351 352 if adobe: 353 result = _A85START + result 354 if wrapcol: 355 wrapcol = max(2 if adobe else 1, wrapcol) 356 chunks = [result[i: i + wrapcol] 357 for i in range(0, len(result), wrapcol)] 358 if adobe: 359 if len(chunks[-1]) + 2 > wrapcol: 360 chunks.append(b'') 361 result = b'\n'.join(chunks) 362 if adobe: 363 result += _A85END 364 365 return result 366 367def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): 368 """Decode the Ascii85 encoded bytes-like object or ASCII string b. 369 370 foldspaces is a flag that specifies whether the 'y' short sequence should be 371 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is 372 not supported by the "standard" Adobe encoding. 373 374 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. 375 is framed with <~ and ~>). 376 377 ignorechars should be a byte string containing characters to ignore from the 378 input. This should only contain whitespace characters, and by default 379 contains all whitespace characters in ASCII. 380 381 The result is returned as a bytes object. 382 """ 383 b = _bytes_from_decode_data(b) 384 if adobe: 385 if not b.endswith(_A85END): 386 raise ValueError( 387 "Ascii85 encoded byte sequences must end " 388 "with {!r}".format(_A85END) 389 ) 390 if b.startswith(_A85START): 391 b = b[2:-2] # Strip off start/end markers 392 else: 393 b = b[:-2] 394 # 395 # We have to go through this stepwise, so as to ignore spaces and handle 396 # special short sequences 397 # 398 packI = struct.Struct('!I').pack 399 decoded = [] 400 decoded_append = decoded.append 401 curr = [] 402 curr_append = curr.append 403 curr_clear = curr.clear 404 for x in b + b'u' * 4: 405 if b'!'[0] <= x <= b'u'[0]: 406 curr_append(x) 407 if len(curr) == 5: 408 acc = 0 409 for x in curr: 410 acc = 85 * acc + (x - 33) 411 try: 412 decoded_append(packI(acc)) 413 except struct.error: 414 raise ValueError('Ascii85 overflow') from None 415 curr_clear() 416 elif x == b'z'[0]: 417 if curr: 418 raise ValueError('z inside Ascii85 5-tuple') 419 decoded_append(b'\0\0\0\0') 420 elif foldspaces and x == b'y'[0]: 421 if curr: 422 raise ValueError('y inside Ascii85 5-tuple') 423 decoded_append(b'\x20\x20\x20\x20') 424 elif x in ignorechars: 425 # Skip whitespace 426 continue 427 else: 428 raise ValueError('Non-Ascii85 digit found: %c' % x) 429 430 result = b''.join(decoded) 431 padding = 4 - len(curr) 432 if padding: 433 # Throw away the extra padding 434 result = result[:-padding] 435 return result 436 437# The following code is originally taken (with permission) from Mercurial 438 439_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 440 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") 441_b85chars = None 442_b85chars2 = None 443_b85dec = None 444 445def b85encode(b, pad=False): 446 """Encode bytes-like object b in base85 format and return a bytes object. 447 448 If pad is true, the input is padded with b'\\0' so its length is a multiple of 449 4 bytes before encoding. 450 """ 451 global _b85chars, _b85chars2 452 # Delay the initialization of tables to not waste memory 453 # if the function is never called 454 if _b85chars2 is None: 455 _b85chars = [bytes((i,)) for i in _b85alphabet] 456 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] 457 return _85encode(b, _b85chars, _b85chars2, pad) 458 459def b85decode(b): 460 """Decode the base85-encoded bytes-like object or ASCII string b 461 462 The result is returned as a bytes object. 463 """ 464 global _b85dec 465 # Delay the initialization of tables to not waste memory 466 # if the function is never called 467 if _b85dec is None: 468 _b85dec = [None] * 256 469 for i, c in enumerate(_b85alphabet): 470 _b85dec[c] = i 471 472 b = _bytes_from_decode_data(b) 473 padding = (-len(b)) % 5 474 b = b + b'~' * padding 475 out = [] 476 packI = struct.Struct('!I').pack 477 for i in range(0, len(b), 5): 478 chunk = b[i:i + 5] 479 acc = 0 480 try: 481 for c in chunk: 482 acc = acc * 85 + _b85dec[c] 483 except TypeError: 484 for j, c in enumerate(chunk): 485 if _b85dec[c] is None: 486 raise ValueError('bad base85 character at position %d' 487 % (i + j)) from None 488 raise 489 try: 490 out.append(packI(acc)) 491 except struct.error: 492 raise ValueError('base85 overflow in hunk starting at byte %d' 493 % i) from None 494 495 result = b''.join(out) 496 if padding: 497 result = result[:-padding] 498 return result 499 500_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' 501 b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') 502# Translating b85 valid but z85 invalid chars to b'\x00' is required 503# to prevent them from being decoded as b85 valid chars. 504_z85_b85_decode_diff = b';_`|~' 505_z85_decode_translation = bytes.maketrans( 506 _z85alphabet + _z85_b85_decode_diff, 507 _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) 508) 509_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) 510 511def z85encode(s): 512 """Encode bytes-like object b in z85 format and return a bytes object.""" 513 return b85encode(s).translate(_z85_encode_translation) 514 515def z85decode(s): 516 """Decode the z85-encoded bytes-like object or ASCII string b 517 518 The result is returned as a bytes object. 519 """ 520 s = _bytes_from_decode_data(s) 521 s = s.translate(_z85_decode_translation) 522 try: 523 return b85decode(s) 524 except ValueError as e: 525 raise ValueError(e.args[0].replace('base85', 'z85')) from None 526 527# Legacy interface. This code could be cleaned up since I don't believe 528# binascii has any line length limitations. It just doesn't seem worth it 529# though. The files should be opened in binary mode. 530 531MAXLINESIZE = 76 # Excluding the CRLF 532MAXBINSIZE = (MAXLINESIZE//4)*3 533 534def encode(input, output): 535 """Encode a file; input and output are binary files.""" 536 while s := input.read(MAXBINSIZE): 537 while len(s) < MAXBINSIZE and (ns := input.read(MAXBINSIZE-len(s))): 538 s += ns 539 line = binascii.b2a_base64(s) 540 output.write(line) 541 542 543def decode(input, output): 544 """Decode a file; input and output are binary files.""" 545 while line := input.readline(): 546 s = binascii.a2b_base64(line) 547 output.write(s) 548 549def _input_type_check(s): 550 try: 551 m = memoryview(s) 552 except TypeError as err: 553 msg = "expected bytes-like object, not %s" % s.__class__.__name__ 554 raise TypeError(msg) from err 555 if m.format not in ('c', 'b', 'B'): 556 msg = ("expected single byte elements, not %r from %s" % 557 (m.format, s.__class__.__name__)) 558 raise TypeError(msg) 559 if m.ndim != 1: 560 msg = ("expected 1-D data, not %d-D data from %s" % 561 (m.ndim, s.__class__.__name__)) 562 raise TypeError(msg) 563 564 565def encodebytes(s): 566 """Encode a bytestring into a bytes object containing multiple lines 567 of base-64 data.""" 568 _input_type_check(s) 569 pieces = [] 570 for i in range(0, len(s), MAXBINSIZE): 571 chunk = s[i : i + MAXBINSIZE] 572 pieces.append(binascii.b2a_base64(chunk)) 573 return b"".join(pieces) 574 575 576def decodebytes(s): 577 """Decode a bytestring of base-64 data into a bytes object.""" 578 _input_type_check(s) 579 return binascii.a2b_base64(s) 580 581 582# Usable as a script... 583def main(): 584 """Small main program""" 585 import sys, getopt 586 usage = f"""usage: {sys.argv[0]} [-h|-d|-e|-u] [file|-] 587 -h: print this help message and exit 588 -d, -u: decode 589 -e: encode (default)""" 590 try: 591 opts, args = getopt.getopt(sys.argv[1:], 'hdeu') 592 except getopt.error as msg: 593 sys.stdout = sys.stderr 594 print(msg) 595 print(usage) 596 sys.exit(2) 597 func = encode 598 for o, a in opts: 599 if o == '-e': func = encode 600 if o == '-d': func = decode 601 if o == '-u': func = decode 602 if o == '-h': print(usage); return 603 if args and args[0] != '-': 604 with open(args[0], 'rb') as f: 605 func(f, sys.stdout.buffer) 606 else: 607 func(sys.stdin.buffer, sys.stdout.buffer) 608 609 610if __name__ == '__main__': 611 main() 612