• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2002-2006 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header.  This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding.  To deal with the various line wrapping issues, use the email.header
24module.
25"""
26
27__all__ = [
28    'base64_len',
29    'body_decode',
30    'body_encode',
31    'decode',
32    'decodestring',
33    'encode',
34    'encodestring',
35    'header_encode',
36    ]
37
38
39from binascii import b2a_base64, a2b_base64
40from email.utils import fix_eols
41
42CRLF = '\r\n'
43NL = '\n'
44EMPTYSTRING = ''
45
46# See also Charset.py
47MISC_LEN = 7
48
49
50
51# Helpers
52def base64_len(s):
53    """Return the length of s when it is encoded with base64."""
54    groups_of_3, leftover = divmod(len(s), 3)
55    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
56    # Thanks, Tim!
57    n = groups_of_3 * 4
58    if leftover:
59        n += 4
60    return n
61
62
63
64def header_encode(header, charset='iso-8859-1', keep_eols=False,
65                  maxlinelen=76, eol=NL):
66    """Encode a single header line with Base64 encoding in a given charset.
67
68    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
69    encoding, except that each line must be intelligently wrapped (respecting
70    the Base64 encoding), and subsequent lines must start with a space.
71
72    charset names the character set to use to encode the header.  It defaults
73    to iso-8859-1.
74
75    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
76    to the canonical email line separator \\r\\n unless the keep_eols
77    parameter is True (the default is False).
78
79    Each line of the header will be terminated in the value of eol, which
80    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
81    this function directly in email.
82
83    The resulting string will be in the form:
84
85    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
86      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
87
88    with each line wrapped at, at most, maxlinelen characters (defaults to 76
89    characters).
90    """
91    # Return empty headers unchanged
92    if not header:
93        return header
94
95    if not keep_eols:
96        header = fix_eols(header)
97
98    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
99    # length, after the RFC chrome is added in.
100    base64ed = []
101    max_encoded = maxlinelen - len(charset) - MISC_LEN
102    max_unencoded = max_encoded * 3 // 4
103
104    for i in range(0, len(header), max_unencoded):
105        base64ed.append(b2a_base64(header[i:i+max_unencoded]))
106
107    # Now add the RFC chrome to each encoded chunk
108    lines = []
109    for line in base64ed:
110        # Ignore the last character of each line if it is a newline
111        if line.endswith(NL):
112            line = line[:-1]
113        # Add the chrome
114        lines.append('=?%s?b?%s?=' % (charset, line))
115    # Glue the lines together and return it.  BAW: should we be able to
116    # specify the leading whitespace in the joiner?
117    joiner = eol + ' '
118    return joiner.join(lines)
119
120
121
122def encode(s, binary=True, maxlinelen=76, eol=NL):
123    """Encode a string with base64.
124
125    Each line will be wrapped at, at most, maxlinelen characters (defaults to
126    76 characters).
127
128    If binary is False, end-of-line characters will be converted to the
129    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
130    verbatim (this is the default).
131
132    Each line of encoded text will end with eol, which defaults to "\\n".  Set
133    this to "\\r\\n" if you will be using the result of this function directly
134    in an email.
135    """
136    if not s:
137        return s
138
139    if not binary:
140        s = fix_eols(s)
141
142    encvec = []
143    max_unencoded = maxlinelen * 3 // 4
144    for i in range(0, len(s), max_unencoded):
145        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
146        # adding a newline to the encoded string?
147        enc = b2a_base64(s[i:i + max_unencoded])
148        if enc.endswith(NL) and eol != NL:
149            enc = enc[:-1] + eol
150        encvec.append(enc)
151    return EMPTYSTRING.join(encvec)
152
153
154# For convenience and backwards compatibility w/ standard base64 module
155body_encode = encode
156encodestring = encode
157
158
159
160def decode(s, convert_eols=None):
161    """Decode a raw base64 string.
162
163    If convert_eols is set to a string value, all canonical email linefeeds,
164    e.g. "\\r\\n", in the decoded text will be converted to the value of
165    convert_eols.  os.linesep is a good choice for convert_eols if you are
166    decoding a text attachment.
167
168    This function does not parse a full MIME header value encoded with
169    base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
170    level email.header class for that functionality.
171    """
172    if not s:
173        return s
174
175    dec = a2b_base64(s)
176    if convert_eols:
177        return dec.replace(CRLF, convert_eols)
178    return dec
179
180
181# For convenience and backwards compatibility w/ standard base64 module
182body_decode = decode
183decodestring = decode
184