1#! /usr/bin/env python 2 3"""RFC 3548: Base16, Base32, Base64 Data Encodings""" 4 5# Modified 04-Oct-1995 by Jack Jansen to use binascii module 6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7 8import re 9import struct 10import string 11import binascii 12 13 14__all__ = [ 15 # Legacy interface exports traditional RFC 1521 Base64 encodings 16 'encode', 'decode', 'encodestring', 'decodestring', 17 # Generalized interface for other encodings 18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 19 'b16encode', 'b16decode', 20 # Standard Base64 encoding 21 'standard_b64encode', 'standard_b64decode', 22 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 23 # starting at: 24 # 25 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 26 'urlsafe_b64encode', 'urlsafe_b64decode', 27 ] 28 29_translation = [chr(_x) for _x in range(256)] 30EMPTYSTRING = '' 31 32 33def _translate(s, altchars): 34 translation = _translation[:] 35 for k, v in altchars.items(): 36 translation[ord(k)] = v 37 return s.translate(''.join(translation)) 38 39 40 41# Base64 encoding/decoding uses binascii 42 43def b64encode(s, altchars=None): 44 """Encode a string using Base64. 45 46 s is the string to encode. Optional altchars must be a string of at least 47 length 2 (additional characters are ignored) which specifies an 48 alternative alphabet for the '+' and '/' characters. This allows an 49 application to e.g. generate url or filesystem safe Base64 strings. 50 51 The encoded string is returned. 52 """ 53 # Strip off the trailing newline 54 encoded = binascii.b2a_base64(s)[:-1] 55 if altchars is not None: 56 return encoded.translate(string.maketrans(b'+/', altchars[:2])) 57 return encoded 58 59 60def b64decode(s, altchars=None): 61 """Decode a Base64 encoded string. 62 63 s is the string to decode. Optional altchars must be a string of at least 64 length 2 (additional characters are ignored) which specifies the 65 alternative alphabet used instead of the '+' and '/' characters. 66 67 The decoded string is returned. A TypeError is raised if s is 68 incorrectly padded. Characters that are neither in the normal base-64 69 alphabet nor the alternative alphabet are discarded prior to the padding 70 check. 71 """ 72 if altchars is not None: 73 s = s.translate(string.maketrans(altchars[:2], '+/')) 74 try: 75 return binascii.a2b_base64(s) 76 except binascii.Error, msg: 77 # Transform this exception for consistency 78 raise TypeError(msg) 79 80 81def standard_b64encode(s): 82 """Encode a string using the standard Base64 alphabet. 83 84 s is the string to encode. The encoded string is returned. 85 """ 86 return b64encode(s) 87 88def standard_b64decode(s): 89 """Decode a string encoded with the standard Base64 alphabet. 90 91 Argument s is the string to decode. The decoded string is returned. A 92 TypeError is raised if the string is incorrectly padded. Characters that 93 are not in the standard alphabet are discarded prior to the padding 94 check. 95 """ 96 return b64decode(s) 97 98_urlsafe_encode_translation = string.maketrans(b'+/', b'-_') 99_urlsafe_decode_translation = string.maketrans(b'-_', b'+/') 100 101def urlsafe_b64encode(s): 102 """Encode a string using the URL- and filesystem-safe Base64 alphabet. 103 104 Argument s is the string to encode. The encoded string is returned. The 105 alphabet uses '-' instead of '+' and '_' instead of '/'. 106 """ 107 return b64encode(s).translate(_urlsafe_encode_translation) 108 109def urlsafe_b64decode(s): 110 """Decode a string using the URL- and filesystem-safe Base64 alphabet. 111 112 Argument s is the string to decode. The decoded string is returned. A 113 TypeError is raised if the string is incorrectly padded. Characters that 114 are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash 115 '/', are discarded prior to the padding check. 116 117 The alphabet uses '-' instead of '+' and '_' instead of '/'. 118 """ 119 return b64decode(s.translate(_urlsafe_decode_translation)) 120 121 122 123# Base32 encoding/decoding must be done in Python 124_b32alphabet = { 125 0: 'A', 9: 'J', 18: 'S', 27: '3', 126 1: 'B', 10: 'K', 19: 'T', 28: '4', 127 2: 'C', 11: 'L', 20: 'U', 29: '5', 128 3: 'D', 12: 'M', 21: 'V', 30: '6', 129 4: 'E', 13: 'N', 22: 'W', 31: '7', 130 5: 'F', 14: 'O', 23: 'X', 131 6: 'G', 15: 'P', 24: 'Y', 132 7: 'H', 16: 'Q', 25: 'Z', 133 8: 'I', 17: 'R', 26: '2', 134 } 135 136_b32tab = _b32alphabet.items() 137_b32tab.sort() 138_b32tab = [v for k, v in _b32tab] 139_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()]) 140 141 142def b32encode(s): 143 """Encode a string using Base32. 144 145 s is the string to encode. The encoded string is returned. 146 """ 147 parts = [] 148 quanta, leftover = divmod(len(s), 5) 149 # Pad the last quantum with zero bits if necessary 150 if leftover: 151 s += ('\0' * (5 - leftover)) 152 quanta += 1 153 for i in range(quanta): 154 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this 155 # code is to process the 40 bits in units of 5 bits. So we take the 1 156 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover 157 # bits of c2 and tack them onto c3. The shifts and masks are intended 158 # to give us values of exactly 5 bits in width. 159 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) 160 c2 += (c1 & 1) << 16 # 17 bits wide 161 c3 += (c2 & 3) << 8 # 10 bits wide 162 parts.extend([_b32tab[c1 >> 11], # bits 1 - 5 163 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 164 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 165 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) 166 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) 167 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) 168 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) 169 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) 170 ]) 171 encoded = EMPTYSTRING.join(parts) 172 # Adjust for any leftover partial quanta 173 if leftover == 1: 174 return encoded[:-6] + '======' 175 elif leftover == 2: 176 return encoded[:-4] + '====' 177 elif leftover == 3: 178 return encoded[:-3] + '===' 179 elif leftover == 4: 180 return encoded[:-1] + '=' 181 return encoded 182 183 184def b32decode(s, casefold=False, map01=None): 185 """Decode a Base32 encoded string. 186 187 s is the string to decode. Optional casefold is a flag specifying whether 188 a lowercase alphabet is acceptable as input. For security purposes, the 189 default is False. 190 191 RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O 192 (oh), and for optional mapping of the digit 1 (one) to either the letter I 193 (eye) or letter L (el). The optional argument map01 when not None, 194 specifies which letter the digit 1 should be mapped to (when map01 is not 195 None, the digit 0 is always mapped to the letter O). For security 196 purposes the default is None, so that 0 and 1 are not allowed in the 197 input. 198 199 The decoded string is returned. A TypeError is raised if s were 200 incorrectly padded or if there are non-alphabet characters present in the 201 string. 202 """ 203 quanta, leftover = divmod(len(s), 8) 204 if leftover: 205 raise TypeError('Incorrect padding') 206 # Handle section 2.4 zero and one mapping. The flag map01 will be either 207 # False, or the character to map the digit 1 (one) to. It should be 208 # either L (el) or I (eye). 209 if map01: 210 s = s.translate(string.maketrans(b'01', b'O' + map01)) 211 if casefold: 212 s = s.upper() 213 # Strip off pad characters from the right. We need to count the pad 214 # characters because this will tell us how many null bytes to remove from 215 # the end of the decoded string. 216 padchars = 0 217 mo = re.search('(?P<pad>[=]*)$', s) 218 if mo: 219 padchars = len(mo.group('pad')) 220 if padchars > 0: 221 s = s[:-padchars] 222 # Now decode the full quanta 223 parts = [] 224 acc = 0 225 shift = 35 226 for c in s: 227 val = _b32rev.get(c) 228 if val is None: 229 raise TypeError('Non-base32 digit found') 230 acc += _b32rev[c] << shift 231 shift -= 5 232 if shift < 0: 233 parts.append(binascii.unhexlify('%010x' % acc)) 234 acc = 0 235 shift = 35 236 # Process the last, partial quanta 237 last = binascii.unhexlify('%010x' % acc) 238 if padchars == 0: 239 last = '' # No characters 240 elif padchars == 1: 241 last = last[:-1] 242 elif padchars == 3: 243 last = last[:-2] 244 elif padchars == 4: 245 last = last[:-3] 246 elif padchars == 6: 247 last = last[:-4] 248 else: 249 raise TypeError('Incorrect padding') 250 parts.append(last) 251 return EMPTYSTRING.join(parts) 252 253 254 255# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 256# lowercase. The RFC also recommends against accepting input case 257# insensitively. 258def b16encode(s): 259 """Encode a string using Base16. 260 261 s is the string to encode. The encoded string is returned. 262 """ 263 return binascii.hexlify(s).upper() 264 265 266def b16decode(s, casefold=False): 267 """Decode a Base16 encoded string. 268 269 s is the string to decode. Optional casefold is a flag specifying whether 270 a lowercase alphabet is acceptable as input. For security purposes, the 271 default is False. 272 273 The decoded string is returned. A TypeError is raised if s is 274 incorrectly padded or if there are non-alphabet characters present in the 275 string. 276 """ 277 if casefold: 278 s = s.upper() 279 if re.search('[^0-9A-F]', s): 280 raise TypeError('Non-base16 digit found') 281 return binascii.unhexlify(s) 282 283 284 285# Legacy interface. This code could be cleaned up since I don't believe 286# binascii has any line length limitations. It just doesn't seem worth it 287# though. 288 289MAXLINESIZE = 76 # Excluding the CRLF 290MAXBINSIZE = (MAXLINESIZE//4)*3 291 292def encode(input, output): 293 """Encode a file.""" 294 while True: 295 s = input.read(MAXBINSIZE) 296 if not s: 297 break 298 while len(s) < MAXBINSIZE: 299 ns = input.read(MAXBINSIZE-len(s)) 300 if not ns: 301 break 302 s += ns 303 line = binascii.b2a_base64(s) 304 output.write(line) 305 306 307def decode(input, output): 308 """Decode a file.""" 309 while True: 310 line = input.readline() 311 if not line: 312 break 313 s = binascii.a2b_base64(line) 314 output.write(s) 315 316 317def encodestring(s): 318 """Encode a string into multiple lines of base-64 data.""" 319 pieces = [] 320 for i in range(0, len(s), MAXBINSIZE): 321 chunk = s[i : i + MAXBINSIZE] 322 pieces.append(binascii.b2a_base64(chunk)) 323 return "".join(pieces) 324 325 326def decodestring(s): 327 """Decode a string.""" 328 return binascii.a2b_base64(s) 329 330 331 332# Useable as a script... 333def test(): 334 """Small test program""" 335 import sys, getopt 336 try: 337 opts, args = getopt.getopt(sys.argv[1:], 'deut') 338 except getopt.error, msg: 339 sys.stdout = sys.stderr 340 print msg 341 print """usage: %s [-d|-e|-u|-t] [file|-] 342 -d, -u: decode 343 -e: encode (default) 344 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0] 345 sys.exit(2) 346 func = encode 347 for o, a in opts: 348 if o == '-e': func = encode 349 if o == '-d': func = decode 350 if o == '-u': func = decode 351 if o == '-t': test1(); return 352 if args and args[0] != '-': 353 with open(args[0], 'rb') as f: 354 func(f, sys.stdout) 355 else: 356 func(sys.stdin, sys.stdout) 357 358 359def test1(): 360 s0 = "Aladdin:open sesame" 361 s1 = encodestring(s0) 362 s2 = decodestring(s1) 363 print s0, repr(s1), s2 364 365 366if __name__ == '__main__': 367 test() 368