• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
4
5# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
8
9import re
10import struct
11import binascii
12
13
14__all__ = [
15    # Legacy interface exports traditional RFC 2045 Base64 encodings
16    'encode', 'decode', 'encodebytes', 'decodebytes',
17    # Generalized interface for other encodings
18    'b64encode', 'b64decode', 'b32encode', 'b32decode',
19    'b16encode', 'b16decode',
20    # Base85 and Ascii85 encodings
21    'b85encode', 'b85decode', 'a85encode', 'a85decode',
22    # Standard Base64 encoding
23    'standard_b64encode', 'standard_b64decode',
24    # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
25    # starting at:
26    #
27    # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
28    'urlsafe_b64encode', 'urlsafe_b64decode',
29    ]
30
31
32bytes_types = (bytes, bytearray)  # Types acceptable as binary data
33
34def _bytes_from_decode_data(s):
35    if isinstance(s, str):
36        try:
37            return s.encode('ascii')
38        except UnicodeEncodeError:
39            raise ValueError('string argument should contain only ASCII characters')
40    if isinstance(s, bytes_types):
41        return s
42    try:
43        return memoryview(s).tobytes()
44    except TypeError:
45        raise TypeError("argument should be a bytes-like object or ASCII "
46                        "string, not %r" % s.__class__.__name__) from None
47
48
49# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
52    """Encode the bytes-like object s using Base64 and return a bytes object.
53
54    Optional altchars should be a byte string of length 2 which specifies an
55    alternative alphabet for the '+' and '/' characters.  This allows an
56    application to e.g. generate url or filesystem safe Base64 strings.
57    """
58    encoded = binascii.b2a_base64(s, newline=False)
59    if altchars is not None:
60        assert len(altchars) == 2, repr(altchars)
61        return encoded.translate(bytes.maketrans(b'+/', altchars))
62    return encoded
63
64
65def b64decode(s, altchars=None, validate=False):
66    """Decode the Base64 encoded bytes-like object or ASCII string s.
67
68    Optional altchars must be a bytes-like object or ASCII string of length 2
69    which specifies the alternative alphabet used instead of the '+' and '/'
70    characters.
71
72    The result is returned as a bytes object.  A binascii.Error is raised if
73    s is incorrectly padded.
74
75    If validate is False (the default), characters that are neither in the
76    normal base-64 alphabet nor the alternative alphabet are discarded prior
77    to the padding check.  If validate is True, these non-alphabet characters
78    in the input result in a binascii.Error.
79    """
80    s = _bytes_from_decode_data(s)
81    if altchars is not None:
82        altchars = _bytes_from_decode_data(altchars)
83        assert len(altchars) == 2, repr(altchars)
84        s = s.translate(bytes.maketrans(altchars, b'+/'))
85    if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
86        raise binascii.Error('Non-base64 digit found')
87    return binascii.a2b_base64(s)
88
89
90def standard_b64encode(s):
91    """Encode bytes-like object s using the standard Base64 alphabet.
92
93    The result is returned as a bytes object.
94    """
95    return b64encode(s)
96
97def standard_b64decode(s):
98    """Decode bytes encoded with the standard Base64 alphabet.
99
100    Argument s is a bytes-like object or ASCII string to decode.  The result
101    is returned as a bytes object.  A binascii.Error is raised if the input
102    is incorrectly padded.  Characters that are not in the standard alphabet
103    are discarded prior to the padding check.
104    """
105    return b64decode(s)
106
107
108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
110
111def urlsafe_b64encode(s):
112    """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
113
114    Argument s is a bytes-like object to encode.  The result is returned as a
115    bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
116    '/'.
117    """
118    return b64encode(s).translate(_urlsafe_encode_translation)
119
120def urlsafe_b64decode(s):
121    """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
122
123    Argument s is a bytes-like object or ASCII string to decode.  The result
124    is returned as a bytes object.  A binascii.Error is raised if the input
125    is incorrectly padded.  Characters that are not in the URL-safe base-64
126    alphabet, and are not a plus '+' or slash '/', are discarded prior to the
127    padding check.
128
129    The alphabet uses '-' instead of '+' and '_' instead of '/'.
130    """
131    s = _bytes_from_decode_data(s)
132    s = s.translate(_urlsafe_decode_translation)
133    return b64decode(s)
134
135
136
137# Base32 encoding/decoding must be done in Python
138_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
139_b32tab2 = None
140_b32rev = None
141
142def b32encode(s):
143    """Encode the bytes-like object s using Base32 and return a bytes object.
144    """
145    global _b32tab2
146    # Delay the initialization of the table to not waste memory
147    # if the function is never called
148    if _b32tab2 is None:
149        b32tab = [bytes((i,)) for i in _b32alphabet]
150        _b32tab2 = [a + b for a in b32tab for b in b32tab]
151        b32tab = None
152
153    if not isinstance(s, bytes_types):
154        s = memoryview(s).tobytes()
155    leftover = len(s) % 5
156    # Pad the last quantum with zero bits if necessary
157    if leftover:
158        s = s + b'\0' * (5 - leftover)  # Don't use += !
159    encoded = bytearray()
160    from_bytes = int.from_bytes
161    b32tab2 = _b32tab2
162    for i in range(0, len(s), 5):
163        c = from_bytes(s[i: i + 5], 'big')
164        encoded += (b32tab2[c >> 30] +           # bits 1 - 10
165                    b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
166                    b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
167                    b32tab2[c & 0x3ff]           # bits 31 - 40
168                   )
169    # Adjust for any leftover partial quanta
170    if leftover == 1:
171        encoded[-6:] = b'======'
172    elif leftover == 2:
173        encoded[-4:] = b'===='
174    elif leftover == 3:
175        encoded[-3:] = b'==='
176    elif leftover == 4:
177        encoded[-1:] = b'='
178    return bytes(encoded)
179
180def b32decode(s, casefold=False, map01=None):
181    """Decode the Base32 encoded bytes-like object or ASCII string s.
182
183    Optional casefold is a flag specifying whether a lowercase alphabet is
184    acceptable as input.  For security purposes, the default is False.
185
186    RFC 3548 allows for optional mapping of the digit 0 (zero) to the
187    letter O (oh), and for optional mapping of the digit 1 (one) to
188    either the letter I (eye) or letter L (el).  The optional argument
189    map01 when not None, specifies which letter the digit 1 should be
190    mapped to (when map01 is not None, the digit 0 is always mapped to
191    the letter O).  For security purposes the default is None, so that
192    0 and 1 are not allowed in the input.
193
194    The result is returned as a bytes object.  A binascii.Error is raised if
195    the input is incorrectly padded or if there are non-alphabet
196    characters present in the input.
197    """
198    global _b32rev
199    # Delay the initialization of the table to not waste memory
200    # if the function is never called
201    if _b32rev is None:
202        _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
203    s = _bytes_from_decode_data(s)
204    if len(s) % 8:
205        raise binascii.Error('Incorrect padding')
206    # Handle section 2.4 zero and one mapping.  The flag map01 will be either
207    # False, or the character to map the digit 1 (one) to.  It should be
208    # either L (el) or I (eye).
209    if map01 is not None:
210        map01 = _bytes_from_decode_data(map01)
211        assert len(map01) == 1, repr(map01)
212        s = s.translate(bytes.maketrans(b'01', b'O' + map01))
213    if casefold:
214        s = s.upper()
215    # Strip off pad characters from the right.  We need to count the pad
216    # characters because this will tell us how many null bytes to remove from
217    # the end of the decoded string.
218    l = len(s)
219    s = s.rstrip(b'=')
220    padchars = l - len(s)
221    # Now decode the full quanta
222    decoded = bytearray()
223    b32rev = _b32rev
224    for i in range(0, len(s), 8):
225        quanta = s[i: i + 8]
226        acc = 0
227        try:
228            for c in quanta:
229                acc = (acc << 5) + b32rev[c]
230        except KeyError:
231            raise binascii.Error('Non-base32 digit found') from None
232        decoded += acc.to_bytes(5, 'big')
233    # Process the last, partial quanta
234    if l % 8 or padchars not in {0, 1, 3, 4, 6}:
235        raise binascii.Error('Incorrect padding')
236    if padchars and decoded:
237        acc <<= 5 * padchars
238        last = acc.to_bytes(5, 'big')
239        leftover = (43 - 5 * padchars) // 8  # 1: 4, 3: 3, 4: 2, 6: 1
240        decoded[-5:] = last[:leftover]
241    return bytes(decoded)
242
243
244# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
245# lowercase.  The RFC also recommends against accepting input case
246# insensitively.
247def b16encode(s):
248    """Encode the bytes-like object s using Base16 and return a bytes object.
249    """
250    return binascii.hexlify(s).upper()
251
252
253def b16decode(s, casefold=False):
254    """Decode the Base16 encoded bytes-like object or ASCII string s.
255
256    Optional casefold is a flag specifying whether a lowercase alphabet is
257    acceptable as input.  For security purposes, the default is False.
258
259    The result is returned as a bytes object.  A binascii.Error is raised if
260    s is incorrectly padded or if there are non-alphabet characters present
261    in the input.
262    """
263    s = _bytes_from_decode_data(s)
264    if casefold:
265        s = s.upper()
266    if re.search(b'[^0-9A-F]', s):
267        raise binascii.Error('Non-base16 digit found')
268    return binascii.unhexlify(s)
269
270#
271# Ascii85 encoding/decoding
272#
273
274_a85chars = None
275_a85chars2 = None
276_A85START = b"<~"
277_A85END = b"~>"
278
279def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
280    # Helper function for a85encode and b85encode
281    if not isinstance(b, bytes_types):
282        b = memoryview(b).tobytes()
283
284    padding = (-len(b)) % 4
285    if padding:
286        b = b + b'\0' * padding
287    words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
288
289    chunks = [b'z' if foldnuls and not word else
290              b'y' if foldspaces and word == 0x20202020 else
291              (chars2[word // 614125] +
292               chars2[word // 85 % 7225] +
293               chars[word % 85])
294              for word in words]
295
296    if padding and not pad:
297        if chunks[-1] == b'z':
298            chunks[-1] = chars[0] * 5
299        chunks[-1] = chunks[-1][:-padding]
300
301    return b''.join(chunks)
302
303def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
304    """Encode bytes-like object b using Ascii85 and return a bytes object.
305
306    foldspaces is an optional flag that uses the special short sequence 'y'
307    instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
308    feature is not supported by the "standard" Adobe encoding.
309
310    wrapcol controls whether the output should have newline (b'\\n') characters
311    added to it. If this is non-zero, each output line will be at most this
312    many characters long.
313
314    pad controls whether the input is padded to a multiple of 4 before
315    encoding. Note that the btoa implementation always pads.
316
317    adobe controls whether the encoded byte sequence is framed with <~ and ~>,
318    which is used by the Adobe implementation.
319    """
320    global _a85chars, _a85chars2
321    # Delay the initialization of tables to not waste memory
322    # if the function is never called
323    if _a85chars is None:
324        _a85chars = [bytes((i,)) for i in range(33, 118)]
325        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
326
327    result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
328
329    if adobe:
330        result = _A85START + result
331    if wrapcol:
332        wrapcol = max(2 if adobe else 1, wrapcol)
333        chunks = [result[i: i + wrapcol]
334                  for i in range(0, len(result), wrapcol)]
335        if adobe:
336            if len(chunks[-1]) + 2 > wrapcol:
337                chunks.append(b'')
338        result = b'\n'.join(chunks)
339    if adobe:
340        result += _A85END
341
342    return result
343
344def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
345    """Decode the Ascii85 encoded bytes-like object or ASCII string b.
346
347    foldspaces is a flag that specifies whether the 'y' short sequence should be
348    accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
349    not supported by the "standard" Adobe encoding.
350
351    adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
352    is framed with <~ and ~>).
353
354    ignorechars should be a byte string containing characters to ignore from the
355    input. This should only contain whitespace characters, and by default
356    contains all whitespace characters in ASCII.
357
358    The result is returned as a bytes object.
359    """
360    b = _bytes_from_decode_data(b)
361    if adobe:
362        if not b.endswith(_A85END):
363            raise ValueError(
364                "Ascii85 encoded byte sequences must end "
365                "with {!r}".format(_A85END)
366                )
367        if b.startswith(_A85START):
368            b = b[2:-2]  # Strip off start/end markers
369        else:
370            b = b[:-2]
371    #
372    # We have to go through this stepwise, so as to ignore spaces and handle
373    # special short sequences
374    #
375    packI = struct.Struct('!I').pack
376    decoded = []
377    decoded_append = decoded.append
378    curr = []
379    curr_append = curr.append
380    curr_clear = curr.clear
381    for x in b + b'u' * 4:
382        if b'!'[0] <= x <= b'u'[0]:
383            curr_append(x)
384            if len(curr) == 5:
385                acc = 0
386                for x in curr:
387                    acc = 85 * acc + (x - 33)
388                try:
389                    decoded_append(packI(acc))
390                except struct.error:
391                    raise ValueError('Ascii85 overflow') from None
392                curr_clear()
393        elif x == b'z'[0]:
394            if curr:
395                raise ValueError('z inside Ascii85 5-tuple')
396            decoded_append(b'\0\0\0\0')
397        elif foldspaces and x == b'y'[0]:
398            if curr:
399                raise ValueError('y inside Ascii85 5-tuple')
400            decoded_append(b'\x20\x20\x20\x20')
401        elif x in ignorechars:
402            # Skip whitespace
403            continue
404        else:
405            raise ValueError('Non-Ascii85 digit found: %c' % x)
406
407    result = b''.join(decoded)
408    padding = 4 - len(curr)
409    if padding:
410        # Throw away the extra padding
411        result = result[:-padding]
412    return result
413
414# The following code is originally taken (with permission) from Mercurial
415
416_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
417                b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
418_b85chars = None
419_b85chars2 = None
420_b85dec = None
421
422def b85encode(b, pad=False):
423    """Encode bytes-like object b in base85 format and return a bytes object.
424
425    If pad is true, the input is padded with b'\\0' so its length is a multiple of
426    4 bytes before encoding.
427    """
428    global _b85chars, _b85chars2
429    # Delay the initialization of tables to not waste memory
430    # if the function is never called
431    if _b85chars is None:
432        _b85chars = [bytes((i,)) for i in _b85alphabet]
433        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
434    return _85encode(b, _b85chars, _b85chars2, pad)
435
436def b85decode(b):
437    """Decode the base85-encoded bytes-like object or ASCII string b
438
439    The result is returned as a bytes object.
440    """
441    global _b85dec
442    # Delay the initialization of tables to not waste memory
443    # if the function is never called
444    if _b85dec is None:
445        _b85dec = [None] * 256
446        for i, c in enumerate(_b85alphabet):
447            _b85dec[c] = i
448
449    b = _bytes_from_decode_data(b)
450    padding = (-len(b)) % 5
451    b = b + b'~' * padding
452    out = []
453    packI = struct.Struct('!I').pack
454    for i in range(0, len(b), 5):
455        chunk = b[i:i + 5]
456        acc = 0
457        try:
458            for c in chunk:
459                acc = acc * 85 + _b85dec[c]
460        except TypeError:
461            for j, c in enumerate(chunk):
462                if _b85dec[c] is None:
463                    raise ValueError('bad base85 character at position %d'
464                                    % (i + j)) from None
465            raise
466        try:
467            out.append(packI(acc))
468        except struct.error:
469            raise ValueError('base85 overflow in hunk starting at byte %d'
470                             % i) from None
471
472    result = b''.join(out)
473    if padding:
474        result = result[:-padding]
475    return result
476
477# Legacy interface.  This code could be cleaned up since I don't believe
478# binascii has any line length limitations.  It just doesn't seem worth it
479# though.  The files should be opened in binary mode.
480
481MAXLINESIZE = 76 # Excluding the CRLF
482MAXBINSIZE = (MAXLINESIZE//4)*3
483
484def encode(input, output):
485    """Encode a file; input and output are binary files."""
486    while True:
487        s = input.read(MAXBINSIZE)
488        if not s:
489            break
490        while len(s) < MAXBINSIZE:
491            ns = input.read(MAXBINSIZE-len(s))
492            if not ns:
493                break
494            s += ns
495        line = binascii.b2a_base64(s)
496        output.write(line)
497
498
499def decode(input, output):
500    """Decode a file; input and output are binary files."""
501    while True:
502        line = input.readline()
503        if not line:
504            break
505        s = binascii.a2b_base64(line)
506        output.write(s)
507
508def _input_type_check(s):
509    try:
510        m = memoryview(s)
511    except TypeError as err:
512        msg = "expected bytes-like object, not %s" % s.__class__.__name__
513        raise TypeError(msg) from err
514    if m.format not in ('c', 'b', 'B'):
515        msg = ("expected single byte elements, not %r from %s" %
516                                          (m.format, s.__class__.__name__))
517        raise TypeError(msg)
518    if m.ndim != 1:
519        msg = ("expected 1-D data, not %d-D data from %s" %
520                                          (m.ndim, s.__class__.__name__))
521        raise TypeError(msg)
522
523
524def encodebytes(s):
525    """Encode a bytestring into a bytes object containing multiple lines
526    of base-64 data."""
527    _input_type_check(s)
528    pieces = []
529    for i in range(0, len(s), MAXBINSIZE):
530        chunk = s[i : i + MAXBINSIZE]
531        pieces.append(binascii.b2a_base64(chunk))
532    return b"".join(pieces)
533
534def encodestring(s):
535    """Legacy alias of encodebytes()."""
536    import warnings
537    warnings.warn("encodestring() is a deprecated alias since 3.1, "
538                  "use encodebytes()",
539                  DeprecationWarning, 2)
540    return encodebytes(s)
541
542
543def decodebytes(s):
544    """Decode a bytestring of base-64 data into a bytes object."""
545    _input_type_check(s)
546    return binascii.a2b_base64(s)
547
548def decodestring(s):
549    """Legacy alias of decodebytes()."""
550    import warnings
551    warnings.warn("decodestring() is a deprecated alias since Python 3.1, "
552                  "use decodebytes()",
553                  DeprecationWarning, 2)
554    return decodebytes(s)
555
556
557# Usable as a script...
558def main():
559    """Small main program"""
560    import sys, getopt
561    try:
562        opts, args = getopt.getopt(sys.argv[1:], 'deut')
563    except getopt.error as msg:
564        sys.stdout = sys.stderr
565        print(msg)
566        print("""usage: %s [-d|-e|-u|-t] [file|-]
567        -d, -u: decode
568        -e: encode (default)
569        -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
570        sys.exit(2)
571    func = encode
572    for o, a in opts:
573        if o == '-e': func = encode
574        if o == '-d': func = decode
575        if o == '-u': func = decode
576        if o == '-t': test(); return
577    if args and args[0] != '-':
578        with open(args[0], 'rb') as f:
579            func(f, sys.stdout.buffer)
580    else:
581        func(sys.stdin.buffer, sys.stdout.buffer)
582
583
584def test():
585    s0 = b"Aladdin:open sesame"
586    print(repr(s0))
587    s1 = encodebytes(s0)
588    print(repr(s1))
589    s2 = decodebytes(s1)
590    print(repr(s2))
591    assert s0 == s2
592
593
594if __name__ == '__main__':
595    main()
596