• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
4
5# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
8
9import re
10import struct
11import binascii
12
13
14__all__ = [
15    # Legacy interface exports traditional RFC 2045 Base64 encodings
16    'encode', 'decode', 'encodebytes', 'decodebytes',
17    # Generalized interface for other encodings
18    'b64encode', 'b64decode', 'b32encode', 'b32decode',
19    'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
20    # Base85 and Ascii85 encodings
21    'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode',
22    # Standard Base64 encoding
23    'standard_b64encode', 'standard_b64decode',
24    # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
25    # starting at:
26    #
27    # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
28    'urlsafe_b64encode', 'urlsafe_b64decode',
29    ]
30
31
32bytes_types = (bytes, bytearray)  # Types acceptable as binary data
33
34def _bytes_from_decode_data(s):
35    if isinstance(s, str):
36        try:
37            return s.encode('ascii')
38        except UnicodeEncodeError:
39            raise ValueError('string argument should contain only ASCII characters')
40    if isinstance(s, bytes_types):
41        return s
42    try:
43        return memoryview(s).tobytes()
44    except TypeError:
45        raise TypeError("argument should be a bytes-like object or ASCII "
46                        "string, not %r" % s.__class__.__name__) from None
47
48
49# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
52    """Encode the bytes-like object s using Base64 and return a bytes object.
53
54    Optional altchars should be a byte string of length 2 which specifies an
55    alternative alphabet for the '+' and '/' characters.  This allows an
56    application to e.g. generate url or filesystem safe Base64 strings.
57    """
58    encoded = binascii.b2a_base64(s, newline=False)
59    if altchars is not None:
60        assert len(altchars) == 2, repr(altchars)
61        return encoded.translate(bytes.maketrans(b'+/', altchars))
62    return encoded
63
64
65def b64decode(s, altchars=None, validate=False):
66    """Decode the Base64 encoded bytes-like object or ASCII string s.
67
68    Optional altchars must be a bytes-like object or ASCII string of length 2
69    which specifies the alternative alphabet used instead of the '+' and '/'
70    characters.
71
72    The result is returned as a bytes object.  A binascii.Error is raised if
73    s is incorrectly padded.
74
75    If validate is False (the default), characters that are neither in the
76    normal base-64 alphabet nor the alternative alphabet are discarded prior
77    to the padding check.  If validate is True, these non-alphabet characters
78    in the input result in a binascii.Error.
79    For more information about the strict base64 check, see:
80
81    https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
82    """
83    s = _bytes_from_decode_data(s)
84    if altchars is not None:
85        altchars = _bytes_from_decode_data(altchars)
86        assert len(altchars) == 2, repr(altchars)
87        s = s.translate(bytes.maketrans(altchars, b'+/'))
88    return binascii.a2b_base64(s, strict_mode=validate)
89
90
91def standard_b64encode(s):
92    """Encode bytes-like object s using the standard Base64 alphabet.
93
94    The result is returned as a bytes object.
95    """
96    return b64encode(s)
97
98def standard_b64decode(s):
99    """Decode bytes encoded with the standard Base64 alphabet.
100
101    Argument s is a bytes-like object or ASCII string to decode.  The result
102    is returned as a bytes object.  A binascii.Error is raised if the input
103    is incorrectly padded.  Characters that are not in the standard alphabet
104    are discarded prior to the padding check.
105    """
106    return b64decode(s)
107
108
109_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
110_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
111
112def urlsafe_b64encode(s):
113    """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
114
115    Argument s is a bytes-like object to encode.  The result is returned as a
116    bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
117    '/'.
118    """
119    return b64encode(s).translate(_urlsafe_encode_translation)
120
121def urlsafe_b64decode(s):
122    """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
123
124    Argument s is a bytes-like object or ASCII string to decode.  The result
125    is returned as a bytes object.  A binascii.Error is raised if the input
126    is incorrectly padded.  Characters that are not in the URL-safe base-64
127    alphabet, and are not a plus '+' or slash '/', are discarded prior to the
128    padding check.
129
130    The alphabet uses '-' instead of '+' and '_' instead of '/'.
131    """
132    s = _bytes_from_decode_data(s)
133    s = s.translate(_urlsafe_decode_translation)
134    return b64decode(s)
135
136
137
138# Base32 encoding/decoding must be done in Python
139_B32_ENCODE_DOCSTRING = '''
140Encode the bytes-like objects using {encoding} and return a bytes object.
141'''
142_B32_DECODE_DOCSTRING = '''
143Decode the {encoding} encoded bytes-like object or ASCII string s.
144
145Optional casefold is a flag specifying whether a lowercase alphabet is
146acceptable as input.  For security purposes, the default is False.
147{extra_args}
148The result is returned as a bytes object.  A binascii.Error is raised if
149the input is incorrectly padded or if there are non-alphabet
150characters present in the input.
151'''
152_B32_DECODE_MAP01_DOCSTRING = '''
153RFC 3548 allows for optional mapping of the digit 0 (zero) to the
154letter O (oh), and for optional mapping of the digit 1 (one) to
155either the letter I (eye) or letter L (el).  The optional argument
156map01 when not None, specifies which letter the digit 1 should be
157mapped to (when map01 is not None, the digit 0 is always mapped to
158the letter O).  For security purposes the default is None, so that
1590 and 1 are not allowed in the input.
160'''
161_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
162_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
163_b32tab2 = {}
164_b32rev = {}
165
166def _b32encode(alphabet, s):
167    # Delay the initialization of the table to not waste memory
168    # if the function is never called
169    if alphabet not in _b32tab2:
170        b32tab = [bytes((i,)) for i in alphabet]
171        _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
172        b32tab = None
173
174    if not isinstance(s, bytes_types):
175        s = memoryview(s).tobytes()
176    leftover = len(s) % 5
177    # Pad the last quantum with zero bits if necessary
178    if leftover:
179        s = s + b'\0' * (5 - leftover)  # Don't use += !
180    encoded = bytearray()
181    from_bytes = int.from_bytes
182    b32tab2 = _b32tab2[alphabet]
183    for i in range(0, len(s), 5):
184        c = from_bytes(s[i: i + 5])              # big endian
185        encoded += (b32tab2[c >> 30] +           # bits 1 - 10
186                    b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
187                    b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
188                    b32tab2[c & 0x3ff]           # bits 31 - 40
189                   )
190    # Adjust for any leftover partial quanta
191    if leftover == 1:
192        encoded[-6:] = b'======'
193    elif leftover == 2:
194        encoded[-4:] = b'===='
195    elif leftover == 3:
196        encoded[-3:] = b'==='
197    elif leftover == 4:
198        encoded[-1:] = b'='
199    return bytes(encoded)
200
201def _b32decode(alphabet, s, casefold=False, map01=None):
202    # Delay the initialization of the table to not waste memory
203    # if the function is never called
204    if alphabet not in _b32rev:
205        _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
206    s = _bytes_from_decode_data(s)
207    if len(s) % 8:
208        raise binascii.Error('Incorrect padding')
209    # Handle section 2.4 zero and one mapping.  The flag map01 will be either
210    # False, or the character to map the digit 1 (one) to.  It should be
211    # either L (el) or I (eye).
212    if map01 is not None:
213        map01 = _bytes_from_decode_data(map01)
214        assert len(map01) == 1, repr(map01)
215        s = s.translate(bytes.maketrans(b'01', b'O' + map01))
216    if casefold:
217        s = s.upper()
218    # Strip off pad characters from the right.  We need to count the pad
219    # characters because this will tell us how many null bytes to remove from
220    # the end of the decoded string.
221    l = len(s)
222    s = s.rstrip(b'=')
223    padchars = l - len(s)
224    # Now decode the full quanta
225    decoded = bytearray()
226    b32rev = _b32rev[alphabet]
227    for i in range(0, len(s), 8):
228        quanta = s[i: i + 8]
229        acc = 0
230        try:
231            for c in quanta:
232                acc = (acc << 5) + b32rev[c]
233        except KeyError:
234            raise binascii.Error('Non-base32 digit found') from None
235        decoded += acc.to_bytes(5)  # big endian
236    # Process the last, partial quanta
237    if l % 8 or padchars not in {0, 1, 3, 4, 6}:
238        raise binascii.Error('Incorrect padding')
239    if padchars and decoded:
240        acc <<= 5 * padchars
241        last = acc.to_bytes(5)  # big endian
242        leftover = (43 - 5 * padchars) // 8  # 1: 4, 3: 3, 4: 2, 6: 1
243        decoded[-5:] = last[:leftover]
244    return bytes(decoded)
245
246
247def b32encode(s):
248    return _b32encode(_b32alphabet, s)
249b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
250
251def b32decode(s, casefold=False, map01=None):
252    return _b32decode(_b32alphabet, s, casefold, map01)
253b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
254                                        extra_args=_B32_DECODE_MAP01_DOCSTRING)
255
256def b32hexencode(s):
257    return _b32encode(_b32hexalphabet, s)
258b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
259
260def b32hexdecode(s, casefold=False):
261    # base32hex does not have the 01 mapping
262    return _b32decode(_b32hexalphabet, s, casefold)
263b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
264                                                    extra_args='')
265
266
267# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
268# lowercase.  The RFC also recommends against accepting input case
269# insensitively.
270def b16encode(s):
271    """Encode the bytes-like object s using Base16 and return a bytes object.
272    """
273    return binascii.hexlify(s).upper()
274
275
276def b16decode(s, casefold=False):
277    """Decode the Base16 encoded bytes-like object or ASCII string s.
278
279    Optional casefold is a flag specifying whether a lowercase alphabet is
280    acceptable as input.  For security purposes, the default is False.
281
282    The result is returned as a bytes object.  A binascii.Error is raised if
283    s is incorrectly padded or if there are non-alphabet characters present
284    in the input.
285    """
286    s = _bytes_from_decode_data(s)
287    if casefold:
288        s = s.upper()
289    if re.search(b'[^0-9A-F]', s):
290        raise binascii.Error('Non-base16 digit found')
291    return binascii.unhexlify(s)
292
293#
294# Ascii85 encoding/decoding
295#
296
297_a85chars = None
298_a85chars2 = None
299_A85START = b"<~"
300_A85END = b"~>"
301
302def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
303    # Helper function for a85encode and b85encode
304    if not isinstance(b, bytes_types):
305        b = memoryview(b).tobytes()
306
307    padding = (-len(b)) % 4
308    if padding:
309        b = b + b'\0' * padding
310    words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
311
312    chunks = [b'z' if foldnuls and not word else
313              b'y' if foldspaces and word == 0x20202020 else
314              (chars2[word // 614125] +
315               chars2[word // 85 % 7225] +
316               chars[word % 85])
317              for word in words]
318
319    if padding and not pad:
320        if chunks[-1] == b'z':
321            chunks[-1] = chars[0] * 5
322        chunks[-1] = chunks[-1][:-padding]
323
324    return b''.join(chunks)
325
326def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
327    """Encode bytes-like object b using Ascii85 and return a bytes object.
328
329    foldspaces is an optional flag that uses the special short sequence 'y'
330    instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
331    feature is not supported by the "standard" Adobe encoding.
332
333    wrapcol controls whether the output should have newline (b'\\n') characters
334    added to it. If this is non-zero, each output line will be at most this
335    many characters long, excluding the trailing newline.
336
337    pad controls whether the input is padded to a multiple of 4 before
338    encoding. Note that the btoa implementation always pads.
339
340    adobe controls whether the encoded byte sequence is framed with <~ and ~>,
341    which is used by the Adobe implementation.
342    """
343    global _a85chars, _a85chars2
344    # Delay the initialization of tables to not waste memory
345    # if the function is never called
346    if _a85chars2 is None:
347        _a85chars = [bytes((i,)) for i in range(33, 118)]
348        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
349
350    result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
351
352    if adobe:
353        result = _A85START + result
354    if wrapcol:
355        wrapcol = max(2 if adobe else 1, wrapcol)
356        chunks = [result[i: i + wrapcol]
357                  for i in range(0, len(result), wrapcol)]
358        if adobe:
359            if len(chunks[-1]) + 2 > wrapcol:
360                chunks.append(b'')
361        result = b'\n'.join(chunks)
362    if adobe:
363        result += _A85END
364
365    return result
366
367def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
368    """Decode the Ascii85 encoded bytes-like object or ASCII string b.
369
370    foldspaces is a flag that specifies whether the 'y' short sequence should be
371    accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
372    not supported by the "standard" Adobe encoding.
373
374    adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
375    is framed with <~ and ~>).
376
377    ignorechars should be a byte string containing characters to ignore from the
378    input. This should only contain whitespace characters, and by default
379    contains all whitespace characters in ASCII.
380
381    The result is returned as a bytes object.
382    """
383    b = _bytes_from_decode_data(b)
384    if adobe:
385        if not b.endswith(_A85END):
386            raise ValueError(
387                "Ascii85 encoded byte sequences must end "
388                "with {!r}".format(_A85END)
389                )
390        if b.startswith(_A85START):
391            b = b[2:-2]  # Strip off start/end markers
392        else:
393            b = b[:-2]
394    #
395    # We have to go through this stepwise, so as to ignore spaces and handle
396    # special short sequences
397    #
398    packI = struct.Struct('!I').pack
399    decoded = []
400    decoded_append = decoded.append
401    curr = []
402    curr_append = curr.append
403    curr_clear = curr.clear
404    for x in b + b'u' * 4:
405        if b'!'[0] <= x <= b'u'[0]:
406            curr_append(x)
407            if len(curr) == 5:
408                acc = 0
409                for x in curr:
410                    acc = 85 * acc + (x - 33)
411                try:
412                    decoded_append(packI(acc))
413                except struct.error:
414                    raise ValueError('Ascii85 overflow') from None
415                curr_clear()
416        elif x == b'z'[0]:
417            if curr:
418                raise ValueError('z inside Ascii85 5-tuple')
419            decoded_append(b'\0\0\0\0')
420        elif foldspaces and x == b'y'[0]:
421            if curr:
422                raise ValueError('y inside Ascii85 5-tuple')
423            decoded_append(b'\x20\x20\x20\x20')
424        elif x in ignorechars:
425            # Skip whitespace
426            continue
427        else:
428            raise ValueError('Non-Ascii85 digit found: %c' % x)
429
430    result = b''.join(decoded)
431    padding = 4 - len(curr)
432    if padding:
433        # Throw away the extra padding
434        result = result[:-padding]
435    return result
436
437# The following code is originally taken (with permission) from Mercurial
438
439_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
440                b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
441_b85chars = None
442_b85chars2 = None
443_b85dec = None
444
445def b85encode(b, pad=False):
446    """Encode bytes-like object b in base85 format and return a bytes object.
447
448    If pad is true, the input is padded with b'\\0' so its length is a multiple of
449    4 bytes before encoding.
450    """
451    global _b85chars, _b85chars2
452    # Delay the initialization of tables to not waste memory
453    # if the function is never called
454    if _b85chars2 is None:
455        _b85chars = [bytes((i,)) for i in _b85alphabet]
456        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
457    return _85encode(b, _b85chars, _b85chars2, pad)
458
459def b85decode(b):
460    """Decode the base85-encoded bytes-like object or ASCII string b
461
462    The result is returned as a bytes object.
463    """
464    global _b85dec
465    # Delay the initialization of tables to not waste memory
466    # if the function is never called
467    if _b85dec is None:
468        _b85dec = [None] * 256
469        for i, c in enumerate(_b85alphabet):
470            _b85dec[c] = i
471
472    b = _bytes_from_decode_data(b)
473    padding = (-len(b)) % 5
474    b = b + b'~' * padding
475    out = []
476    packI = struct.Struct('!I').pack
477    for i in range(0, len(b), 5):
478        chunk = b[i:i + 5]
479        acc = 0
480        try:
481            for c in chunk:
482                acc = acc * 85 + _b85dec[c]
483        except TypeError:
484            for j, c in enumerate(chunk):
485                if _b85dec[c] is None:
486                    raise ValueError('bad base85 character at position %d'
487                                    % (i + j)) from None
488            raise
489        try:
490            out.append(packI(acc))
491        except struct.error:
492            raise ValueError('base85 overflow in hunk starting at byte %d'
493                             % i) from None
494
495    result = b''.join(out)
496    if padding:
497        result = result[:-padding]
498    return result
499
500_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
501                b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
502# Translating b85 valid but z85 invalid chars to b'\x00' is required
503# to prevent them from being decoded as b85 valid chars.
504_z85_b85_decode_diff = b';_`|~'
505_z85_decode_translation = bytes.maketrans(
506    _z85alphabet + _z85_b85_decode_diff,
507    _b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
508)
509_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)
510
511def z85encode(s):
512    """Encode bytes-like object b in z85 format and return a bytes object."""
513    return b85encode(s).translate(_z85_encode_translation)
514
515def z85decode(s):
516    """Decode the z85-encoded bytes-like object or ASCII string b
517
518    The result is returned as a bytes object.
519    """
520    s = _bytes_from_decode_data(s)
521    s = s.translate(_z85_decode_translation)
522    try:
523        return b85decode(s)
524    except ValueError as e:
525        raise ValueError(e.args[0].replace('base85', 'z85')) from None
526
527# Legacy interface.  This code could be cleaned up since I don't believe
528# binascii has any line length limitations.  It just doesn't seem worth it
529# though.  The files should be opened in binary mode.
530
531MAXLINESIZE = 76 # Excluding the CRLF
532MAXBINSIZE = (MAXLINESIZE//4)*3
533
534def encode(input, output):
535    """Encode a file; input and output are binary files."""
536    while s := input.read(MAXBINSIZE):
537        while len(s) < MAXBINSIZE and (ns := input.read(MAXBINSIZE-len(s))):
538            s += ns
539        line = binascii.b2a_base64(s)
540        output.write(line)
541
542
543def decode(input, output):
544    """Decode a file; input and output are binary files."""
545    while line := input.readline():
546        s = binascii.a2b_base64(line)
547        output.write(s)
548
549def _input_type_check(s):
550    try:
551        m = memoryview(s)
552    except TypeError as err:
553        msg = "expected bytes-like object, not %s" % s.__class__.__name__
554        raise TypeError(msg) from err
555    if m.format not in ('c', 'b', 'B'):
556        msg = ("expected single byte elements, not %r from %s" %
557                                          (m.format, s.__class__.__name__))
558        raise TypeError(msg)
559    if m.ndim != 1:
560        msg = ("expected 1-D data, not %d-D data from %s" %
561                                          (m.ndim, s.__class__.__name__))
562        raise TypeError(msg)
563
564
565def encodebytes(s):
566    """Encode a bytestring into a bytes object containing multiple lines
567    of base-64 data."""
568    _input_type_check(s)
569    pieces = []
570    for i in range(0, len(s), MAXBINSIZE):
571        chunk = s[i : i + MAXBINSIZE]
572        pieces.append(binascii.b2a_base64(chunk))
573    return b"".join(pieces)
574
575
576def decodebytes(s):
577    """Decode a bytestring of base-64 data into a bytes object."""
578    _input_type_check(s)
579    return binascii.a2b_base64(s)
580
581
582# Usable as a script...
583def main():
584    """Small main program"""
585    import sys, getopt
586    usage = f"""usage: {sys.argv[0]} [-h|-d|-e|-u] [file|-]
587        -h: print this help message and exit
588        -d, -u: decode
589        -e: encode (default)"""
590    try:
591        opts, args = getopt.getopt(sys.argv[1:], 'hdeu')
592    except getopt.error as msg:
593        sys.stdout = sys.stderr
594        print(msg)
595        print(usage)
596        sys.exit(2)
597    func = encode
598    for o, a in opts:
599        if o == '-e': func = encode
600        if o == '-d': func = decode
601        if o == '-u': func = decode
602        if o == '-h': print(usage); return
603    if args and args[0] != '-':
604        with open(args[0], 'rb') as f:
605            func(f, sys.stdout.buffer)
606    else:
607        func(sys.stdin.buffer, sys.stdout.buffer)
608
609
610if __name__ == '__main__':
611    main()
612