1# Copyright (C) 2002-2006 Python Software Foundation 2# Author: Ben Gertzfield 3# Contact: email-sig@python.org 4 5"""Base64 content transfer encoding per RFCs 2045-2047. 6 7This module handles the content transfer encoding method defined in RFC 2045 8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit 9characters encoding known as Base64. 10 11It is used in the MIME standards for email to attach images, audio, and text 12using some 8-bit character sets to messages. 13 14This module provides an interface to encode and decode both headers and bodies 15with Base64 encoding. 16 17RFC 2045 defines a method for including character set information in an 18`encoded-word' in a header. This method is commonly used for 8-bit real names 19in To:, From:, Cc:, etc. fields, as well as Subject: lines. 20 21This module does not do the line wrapping or end-of-line character conversion 22necessary for proper internationalized headers; it only does dumb encoding and 23decoding. To deal with the various line wrapping issues, use the email.header 24module. 25""" 26 27__all__ = [ 28 'base64_len', 29 'body_decode', 30 'body_encode', 31 'decode', 32 'decodestring', 33 'encode', 34 'encodestring', 35 'header_encode', 36 ] 37 38 39from binascii import b2a_base64, a2b_base64 40from email.utils import fix_eols 41 42CRLF = '\r\n' 43NL = '\n' 44EMPTYSTRING = '' 45 46# See also Charset.py 47MISC_LEN = 7 48 49 50 51# Helpers 52def base64_len(s): 53 """Return the length of s when it is encoded with base64.""" 54 groups_of_3, leftover = divmod(len(s), 3) 55 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 56 # Thanks, Tim! 57 n = groups_of_3 * 4 58 if leftover: 59 n += 4 60 return n 61 62 63 64def header_encode(header, charset='iso-8859-1', keep_eols=False, 65 maxlinelen=76, eol=NL): 66 """Encode a single header line with Base64 encoding in a given charset. 67 68 Defined in RFC 2045, this Base64 encoding is identical to normal Base64 69 encoding, except that each line must be intelligently wrapped (respecting 70 the Base64 encoding), and subsequent lines must start with a space. 71 72 charset names the character set to use to encode the header. It defaults 73 to iso-8859-1. 74 75 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted 76 to the canonical email line separator \\r\\n unless the keep_eols 77 parameter is True (the default is False). 78 79 Each line of the header will be terminated in the value of eol, which 80 defaults to "\\n". Set this to "\\r\\n" if you are using the result of 81 this function directly in email. 82 83 The resulting string will be in the form: 84 85 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n 86 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" 87 88 with each line wrapped at, at most, maxlinelen characters (defaults to 76 89 characters). 90 """ 91 # Return empty headers unchanged 92 if not header: 93 return header 94 95 if not keep_eols: 96 header = fix_eols(header) 97 98 # Base64 encode each line, in encoded chunks no greater than maxlinelen in 99 # length, after the RFC chrome is added in. 100 base64ed = [] 101 max_encoded = maxlinelen - len(charset) - MISC_LEN 102 max_unencoded = max_encoded * 3 // 4 103 104 for i in range(0, len(header), max_unencoded): 105 base64ed.append(b2a_base64(header[i:i+max_unencoded])) 106 107 # Now add the RFC chrome to each encoded chunk 108 lines = [] 109 for line in base64ed: 110 # Ignore the last character of each line if it is a newline 111 if line.endswith(NL): 112 line = line[:-1] 113 # Add the chrome 114 lines.append('=?%s?b?%s?=' % (charset, line)) 115 # Glue the lines together and return it. BAW: should we be able to 116 # specify the leading whitespace in the joiner? 117 joiner = eol + ' ' 118 return joiner.join(lines) 119 120 121 122def encode(s, binary=True, maxlinelen=76, eol=NL): 123 """Encode a string with base64. 124 125 Each line will be wrapped at, at most, maxlinelen characters (defaults to 126 76 characters). 127 128 If binary is False, end-of-line characters will be converted to the 129 canonical email end-of-line sequence \\r\\n. Otherwise they will be left 130 verbatim (this is the default). 131 132 Each line of encoded text will end with eol, which defaults to "\\n". Set 133 this to "\\r\\n" if you will be using the result of this function directly 134 in an email. 135 """ 136 if not s: 137 return s 138 139 if not binary: 140 s = fix_eols(s) 141 142 encvec = [] 143 max_unencoded = maxlinelen * 3 // 4 144 for i in range(0, len(s), max_unencoded): 145 # BAW: should encode() inherit b2a_base64()'s dubious behavior in 146 # adding a newline to the encoded string? 147 enc = b2a_base64(s[i:i + max_unencoded]) 148 if enc.endswith(NL) and eol != NL: 149 enc = enc[:-1] + eol 150 encvec.append(enc) 151 return EMPTYSTRING.join(encvec) 152 153 154# For convenience and backwards compatibility w/ standard base64 module 155body_encode = encode 156encodestring = encode 157 158 159 160def decode(s, convert_eols=None): 161 """Decode a raw base64 string. 162 163 If convert_eols is set to a string value, all canonical email linefeeds, 164 e.g. "\\r\\n", in the decoded text will be converted to the value of 165 convert_eols. os.linesep is a good choice for convert_eols if you are 166 decoding a text attachment. 167 168 This function does not parse a full MIME header value encoded with 169 base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high 170 level email.header class for that functionality. 171 """ 172 if not s: 173 return s 174 175 dec = a2b_base64(s) 176 if convert_eols: 177 return dec.replace(CRLF, convert_eols) 178 return dec 179 180 181# For convenience and backwards compatibility w/ standard base64 module 182body_decode = decode 183decodestring = decode 184