• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8    'collapse_rfc2231_value',
9    'decode_params',
10    'decode_rfc2231',
11    'encode_rfc2231',
12    'formataddr',
13    'formatdate',
14    'getaddresses',
15    'make_msgid',
16    'mktime_tz',
17    'parseaddr',
18    'parsedate',
19    'parsedate_tz',
20    'unquote',
21    ]
22
23import os
24import re
25import time
26import base64
27import random
28import socket
29import urllib
30import warnings
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56# Helpers
57
58def _identity(s):
59    return s
60
61
62def _bdecode(s):
63    """Decodes a base64 string.
64
65    This function is equivalent to base64.decodestring and it's retained only
66    for backward compatibility. It used to remove the last \\n of the decoded
67    string, if it had any (see issue 7143).
68    """
69    if not s:
70        return s
71    return base64.decodestring(s)
72
73
74
75def fix_eols(s):
76    """Replace all line-ending characters with \\r\\n."""
77    # Fix newlines with no preceding carriage return
78    s = re.sub(r'(?<!\r)\n', CRLF, s)
79    # Fix carriage returns with no following newline
80    s = re.sub(r'\r(?!\n)', CRLF, s)
81    return s
82
83
84
85def formataddr(pair):
86    """The inverse of parseaddr(), this takes a 2-tuple of the form
87    (realname, email_address) and returns the string value suitable
88    for an RFC 2822 From, To or Cc header.
89
90    If the first element of pair is false, then the second element is
91    returned unmodified.
92    """
93    name, address = pair
94    if name:
95        quotes = ''
96        if specialsre.search(name):
97            quotes = '"'
98        name = escapesre.sub(r'\\\g<0>', name)
99        return '%s%s%s <%s>' % (quotes, name, quotes, address)
100    return address
101
102
103
104def getaddresses(fieldvalues):
105    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
106    all = COMMASPACE.join(fieldvalues)
107    a = _AddressList(all)
108    return a.addresslist
109
110
111
112ecre = re.compile(r'''
113  =\?                   # literal =?
114  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
115  \?                    # literal ?
116  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
117  \?                    # literal ?
118  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
119  \?=                   # literal ?=
120  ''', re.VERBOSE | re.IGNORECASE)
121
122
123
124def formatdate(timeval=None, localtime=False, usegmt=False):
125    """Returns a date string as specified by RFC 2822, e.g.:
126
127    Fri, 09 Nov 2001 01:08:47 -0000
128
129    Optional timeval if given is a floating point time value as accepted by
130    gmtime() and localtime(), otherwise the current time is used.
131
132    Optional localtime is a flag that when True, interprets timeval, and
133    returns a date relative to the local timezone instead of UTC, properly
134    taking daylight savings time into account.
135
136    Optional argument usegmt means that the timezone is written out as
137    an ascii string, not numeric one (so "GMT" instead of "+0000"). This
138    is needed for HTTP, and is only used when localtime==False.
139    """
140    # Note: we cannot use strftime() because that honors the locale and RFC
141    # 2822 requires that day and month names be the English abbreviations.
142    if timeval is None:
143        timeval = time.time()
144    if localtime:
145        now = time.localtime(timeval)
146        # Calculate timezone offset, based on whether the local zone has
147        # daylight savings time, and whether DST is in effect.
148        if time.daylight and now[-1]:
149            offset = time.altzone
150        else:
151            offset = time.timezone
152        hours, minutes = divmod(abs(offset), 3600)
153        # Remember offset is in seconds west of UTC, but the timezone is in
154        # minutes east of UTC, so the signs differ.
155        if offset > 0:
156            sign = '-'
157        else:
158            sign = '+'
159        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
160    else:
161        now = time.gmtime(timeval)
162        # Timezone offset is always -0000
163        if usegmt:
164            zone = 'GMT'
165        else:
166            zone = '-0000'
167    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
168        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
169        now[2],
170        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
171         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
172        now[0], now[3], now[4], now[5],
173        zone)
174
175
176
177def make_msgid(idstring=None):
178    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
179
180    <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
181
182    Optional idstring if given is a string used to strengthen the
183    uniqueness of the message id.
184    """
185    timeval = int(time.time()*100)
186    pid = os.getpid()
187    randint = random.getrandbits(64)
188    if idstring is None:
189        idstring = ''
190    else:
191        idstring = '.' + idstring
192    idhost = socket.getfqdn()
193    msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost)
194    return msgid
195
196
197
198# These functions are in the standalone mimelib version only because they've
199# subsequently been fixed in the latest Python versions.  We use this to worm
200# around broken older Pythons.
201def parsedate(data):
202    if not data:
203        return None
204    return _parsedate(data)
205
206
207def parsedate_tz(data):
208    if not data:
209        return None
210    return _parsedate_tz(data)
211
212
213def parseaddr(addr):
214    addrs = _AddressList(addr).addresslist
215    if not addrs:
216        return '', ''
217    return addrs[0]
218
219
220# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
221def unquote(str):
222    """Remove quotes from a string."""
223    if len(str) > 1:
224        if str.startswith('"') and str.endswith('"'):
225            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
226        if str.startswith('<') and str.endswith('>'):
227            return str[1:-1]
228    return str
229
230
231
232# RFC2231-related functions - parameter encoding and decoding
233def decode_rfc2231(s):
234    """Decode string according to RFC 2231"""
235    parts = s.split(TICK, 2)
236    if len(parts) <= 2:
237        return None, None, s
238    return parts
239
240
241def encode_rfc2231(s, charset=None, language=None):
242    """Encode string according to RFC 2231.
243
244    If neither charset nor language is given, then s is returned as-is.  If
245    charset is given but not language, the string is encoded using the empty
246    string for language.
247    """
248    import urllib
249    s = urllib.quote(s, safe='')
250    if charset is None and language is None:
251        return s
252    if language is None:
253        language = ''
254    return "%s'%s'%s" % (charset, language, s)
255
256
257rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
258
259def decode_params(params):
260    """Decode parameters list according to RFC 2231.
261
262    params is a sequence of 2-tuples containing (param name, string value).
263    """
264    # Copy params so we don't mess with the original
265    params = params[:]
266    new_params = []
267    # Map parameter's name to a list of continuations.  The values are a
268    # 3-tuple of the continuation number, the string value, and a flag
269    # specifying whether a particular segment is %-encoded.
270    rfc2231_params = {}
271    name, value = params.pop(0)
272    new_params.append((name, value))
273    while params:
274        name, value = params.pop(0)
275        if name.endswith('*'):
276            encoded = True
277        else:
278            encoded = False
279        value = unquote(value)
280        mo = rfc2231_continuation.match(name)
281        if mo:
282            name, num = mo.group('name', 'num')
283            if num is not None:
284                num = int(num)
285            rfc2231_params.setdefault(name, []).append((num, value, encoded))
286        else:
287            new_params.append((name, '"%s"' % quote(value)))
288    if rfc2231_params:
289        for name, continuations in rfc2231_params.items():
290            value = []
291            extended = False
292            # Sort by number
293            continuations.sort()
294            # And now append all values in numerical order, converting
295            # %-encodings for the encoded segments.  If any of the
296            # continuation names ends in a *, then the entire string, after
297            # decoding segments and concatenating, must have the charset and
298            # language specifiers at the beginning of the string.
299            for num, s, encoded in continuations:
300                if encoded:
301                    s = urllib.unquote(s)
302                    extended = True
303                value.append(s)
304            value = quote(EMPTYSTRING.join(value))
305            if extended:
306                charset, language, value = decode_rfc2231(value)
307                new_params.append((name, (charset, language, '"%s"' % value)))
308            else:
309                new_params.append((name, '"%s"' % value))
310    return new_params
311
312def collapse_rfc2231_value(value, errors='replace',
313                           fallback_charset='us-ascii'):
314    if isinstance(value, tuple):
315        rawval = unquote(value[2])
316        charset = value[0] or 'us-ascii'
317        try:
318            return unicode(rawval, charset, errors)
319        except LookupError:
320            # XXX charset is unknown to Python.
321            return unicode(rawval, fallback_charset, errors)
322    else:
323        return unquote(value)
324