• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5"""
6from types import MappingProxyType
7
8from email import utils
9from email import errors
10from email import _header_value_parser as parser
11
12class Address:
13
14    def __init__(self, display_name='', username='', domain='', addr_spec=None):
15        """Create an object representing a full email address.
16
17        An address can have a 'display_name', a 'username', and a 'domain'.  In
18        addition to specifying the username and domain separately, they may be
19        specified together by using the addr_spec keyword *instead of* the
20        username and domain keywords.  If an addr_spec string is specified it
21        must be properly quoted according to RFC 5322 rules; an error will be
22        raised if it is not.
23
24        An Address object has display_name, username, domain, and addr_spec
25        attributes, all of which are read-only.  The addr_spec and the string
26        value of the object are both quoted according to RFC5322 rules, but
27        without any Content Transfer Encoding.
28
29        """
30
31        inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
32        if '\r' in inputs or '\n' in inputs:
33            raise ValueError("invalid arguments; address parts cannot contain CR or LF")
34
35        # This clause with its potential 'raise' may only happen when an
36        # application program creates an Address object using an addr_spec
37        # keyword.  The email library code itself must always supply username
38        # and domain.
39        if addr_spec is not None:
40            if username or domain:
41                raise TypeError("addrspec specified when username and/or "
42                                "domain also specified")
43            a_s, rest = parser.get_addr_spec(addr_spec)
44            if rest:
45                raise ValueError("Invalid addr_spec; only '{}' "
46                                 "could be parsed from '{}'".format(
47                                    a_s, addr_spec))
48            if a_s.all_defects:
49                raise a_s.all_defects[0]
50            username = a_s.local_part
51            domain = a_s.domain
52        self._display_name = display_name
53        self._username = username
54        self._domain = domain
55
56    @property
57    def display_name(self):
58        return self._display_name
59
60    @property
61    def username(self):
62        return self._username
63
64    @property
65    def domain(self):
66        return self._domain
67
68    @property
69    def addr_spec(self):
70        """The addr_spec (username@domain) portion of the address, quoted
71        according to RFC 5322 rules, but with no Content Transfer Encoding.
72        """
73        lp = self.username
74        if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
75            lp = parser.quote_string(lp)
76        if self.domain:
77            return lp + '@' + self.domain
78        if not lp:
79            return '<>'
80        return lp
81
82    def __repr__(self):
83        return "{}(display_name={!r}, username={!r}, domain={!r})".format(
84                        self.__class__.__name__,
85                        self.display_name, self.username, self.domain)
86
87    def __str__(self):
88        disp = self.display_name
89        if not parser.SPECIALS.isdisjoint(disp):
90            disp = parser.quote_string(disp)
91        if disp:
92            addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
93            return "{} <{}>".format(disp, addr_spec)
94        return self.addr_spec
95
96    def __eq__(self, other):
97        if not isinstance(other, Address):
98            return NotImplemented
99        return (self.display_name == other.display_name and
100                self.username == other.username and
101                self.domain == other.domain)
102
103
104class Group:
105
106    def __init__(self, display_name=None, addresses=None):
107        """Create an object representing an address group.
108
109        An address group consists of a display_name followed by colon and a
110        list of addresses (see Address) terminated by a semi-colon.  The Group
111        is created by specifying a display_name and a possibly empty list of
112        Address objects.  A Group can also be used to represent a single
113        address that is not in a group, which is convenient when manipulating
114        lists that are a combination of Groups and individual Addresses.  In
115        this case the display_name should be set to None.  In particular, the
116        string representation of a Group whose display_name is None is the same
117        as the Address object, if there is one and only one Address object in
118        the addresses list.
119
120        """
121        self._display_name = display_name
122        self._addresses = tuple(addresses) if addresses else tuple()
123
124    @property
125    def display_name(self):
126        return self._display_name
127
128    @property
129    def addresses(self):
130        return self._addresses
131
132    def __repr__(self):
133        return "{}(display_name={!r}, addresses={!r}".format(
134                 self.__class__.__name__,
135                 self.display_name, self.addresses)
136
137    def __str__(self):
138        if self.display_name is None and len(self.addresses)==1:
139            return str(self.addresses[0])
140        disp = self.display_name
141        if disp is not None and not parser.SPECIALS.isdisjoint(disp):
142            disp = parser.quote_string(disp)
143        adrstr = ", ".join(str(x) for x in self.addresses)
144        adrstr = ' ' + adrstr if adrstr else adrstr
145        return "{}:{};".format(disp, adrstr)
146
147    def __eq__(self, other):
148        if not isinstance(other, Group):
149            return NotImplemented
150        return (self.display_name == other.display_name and
151                self.addresses == other.addresses)
152
153
154# Header Classes #
155
156class BaseHeader(str):
157
158    """Base class for message headers.
159
160    Implements generic behavior and provides tools for subclasses.
161
162    A subclass must define a classmethod named 'parse' that takes an unfolded
163    value string and a dictionary as its arguments.  The dictionary will
164    contain one key, 'defects', initialized to an empty list.  After the call
165    the dictionary must contain two additional keys: parse_tree, set to the
166    parse tree obtained from parsing the header, and 'decoded', set to the
167    string value of the idealized representation of the data from the value.
168    (That is, encoded words are decoded, and values that have canonical
169    representations are so represented.)
170
171    The defects key is intended to collect parsing defects, which the message
172    parser will subsequently dispose of as appropriate.  The parser should not,
173    insofar as practical, raise any errors.  Defects should be added to the
174    list instead.  The standard header parsers register defects for RFC
175    compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
176    errors.
177
178    The parse method may add additional keys to the dictionary.  In this case
179    the subclass must define an 'init' method, which will be passed the
180    dictionary as its keyword arguments.  The method should use (usually by
181    setting them as the value of similarly named attributes) and remove all the
182    extra keys added by its parse method, and then use super to call its parent
183    class with the remaining arguments and keywords.
184
185    The subclass should also make sure that a 'max_count' attribute is defined
186    that is either None or 1. XXX: need to better define this API.
187
188    """
189
190    def __new__(cls, name, value):
191        kwds = {'defects': []}
192        cls.parse(value, kwds)
193        if utils._has_surrogates(kwds['decoded']):
194            kwds['decoded'] = utils._sanitize(kwds['decoded'])
195        self = str.__new__(cls, kwds['decoded'])
196        del kwds['decoded']
197        self.init(name, **kwds)
198        return self
199
200    def init(self, name, *, parse_tree, defects):
201        self._name = name
202        self._parse_tree = parse_tree
203        self._defects = defects
204
205    @property
206    def name(self):
207        return self._name
208
209    @property
210    def defects(self):
211        return tuple(self._defects)
212
213    def __reduce__(self):
214        return (
215            _reconstruct_header,
216            (
217                self.__class__.__name__,
218                self.__class__.__bases__,
219                str(self),
220            ),
221            self.__dict__)
222
223    @classmethod
224    def _reconstruct(cls, value):
225        return str.__new__(cls, value)
226
227    def fold(self, *, policy):
228        """Fold header according to policy.
229
230        The parsed representation of the header is folded according to
231        RFC5322 rules, as modified by the policy.  If the parse tree
232        contains surrogateescaped bytes, the bytes are CTE encoded using
233        the charset 'unknown-8bit".
234
235        Any non-ASCII characters in the parse tree are CTE encoded using
236        charset utf-8. XXX: make this a policy setting.
237
238        The returned value is an ASCII-only string possibly containing linesep
239        characters, and ending with a linesep character.  The string includes
240        the header name and the ': ' separator.
241
242        """
243        # At some point we need to put fws here if it was in the source.
244        header = parser.Header([
245            parser.HeaderLabel([
246                parser.ValueTerminal(self.name, 'header-name'),
247                parser.ValueTerminal(':', 'header-sep')]),
248            ])
249        if self._parse_tree:
250            header.append(
251                parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
252        header.append(self._parse_tree)
253        return header.fold(policy=policy)
254
255
256def _reconstruct_header(cls_name, bases, value):
257    return type(cls_name, bases, {})._reconstruct(value)
258
259
260class UnstructuredHeader:
261
262    max_count = None
263    value_parser = staticmethod(parser.get_unstructured)
264
265    @classmethod
266    def parse(cls, value, kwds):
267        kwds['parse_tree'] = cls.value_parser(value)
268        kwds['decoded'] = str(kwds['parse_tree'])
269
270
271class UniqueUnstructuredHeader(UnstructuredHeader):
272
273    max_count = 1
274
275
276class DateHeader:
277
278    """Header whose value consists of a single timestamp.
279
280    Provides an additional attribute, datetime, which is either an aware
281    datetime using a timezone, or a naive datetime if the timezone
282    in the input string is -0000.  Also accepts a datetime as input.
283    The 'value' attribute is the normalized form of the timestamp,
284    which means it is the output of format_datetime on the datetime.
285    """
286
287    max_count = None
288
289    # This is used only for folding, not for creating 'decoded'.
290    value_parser = staticmethod(parser.get_unstructured)
291
292    @classmethod
293    def parse(cls, value, kwds):
294        if not value:
295            kwds['defects'].append(errors.HeaderMissingRequiredValue())
296            kwds['datetime'] = None
297            kwds['decoded'] = ''
298            kwds['parse_tree'] = parser.TokenList()
299            return
300        if isinstance(value, str):
301            kwds['decoded'] = value
302            try:
303                value = utils.parsedate_to_datetime(value)
304            except ValueError:
305                kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
306                kwds['datetime'] = None
307                kwds['parse_tree'] = parser.TokenList()
308                return
309        kwds['datetime'] = value
310        kwds['decoded'] = utils.format_datetime(kwds['datetime'])
311        kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
312
313    def init(self, *args, **kw):
314        self._datetime = kw.pop('datetime')
315        super().init(*args, **kw)
316
317    @property
318    def datetime(self):
319        return self._datetime
320
321
322class UniqueDateHeader(DateHeader):
323
324    max_count = 1
325
326
327class AddressHeader:
328
329    max_count = None
330
331    @staticmethod
332    def value_parser(value):
333        address_list, value = parser.get_address_list(value)
334        assert not value, 'this should not happen'
335        return address_list
336
337    @classmethod
338    def parse(cls, value, kwds):
339        if isinstance(value, str):
340            # We are translating here from the RFC language (address/mailbox)
341            # to our API language (group/address).
342            kwds['parse_tree'] = address_list = cls.value_parser(value)
343            groups = []
344            for addr in address_list.addresses:
345                groups.append(Group(addr.display_name,
346                                    [Address(mb.display_name or '',
347                                             mb.local_part or '',
348                                             mb.domain or '')
349                                     for mb in addr.all_mailboxes]))
350            defects = list(address_list.all_defects)
351        else:
352            # Assume it is Address/Group stuff
353            if not hasattr(value, '__iter__'):
354                value = [value]
355            groups = [Group(None, [item]) if not hasattr(item, 'addresses')
356                                          else item
357                                    for item in value]
358            defects = []
359        kwds['groups'] = groups
360        kwds['defects'] = defects
361        kwds['decoded'] = ', '.join([str(item) for item in groups])
362        if 'parse_tree' not in kwds:
363            kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
364
365    def init(self, *args, **kw):
366        self._groups = tuple(kw.pop('groups'))
367        self._addresses = None
368        super().init(*args, **kw)
369
370    @property
371    def groups(self):
372        return self._groups
373
374    @property
375    def addresses(self):
376        if self._addresses is None:
377            self._addresses = tuple(address for group in self._groups
378                                            for address in group.addresses)
379        return self._addresses
380
381
382class UniqueAddressHeader(AddressHeader):
383
384    max_count = 1
385
386
387class SingleAddressHeader(AddressHeader):
388
389    @property
390    def address(self):
391        if len(self.addresses)!=1:
392            raise ValueError(("value of single address header {} is not "
393                "a single address").format(self.name))
394        return self.addresses[0]
395
396
397class UniqueSingleAddressHeader(SingleAddressHeader):
398
399    max_count = 1
400
401
402class MIMEVersionHeader:
403
404    max_count = 1
405
406    value_parser = staticmethod(parser.parse_mime_version)
407
408    @classmethod
409    def parse(cls, value, kwds):
410        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
411        kwds['decoded'] = str(parse_tree)
412        kwds['defects'].extend(parse_tree.all_defects)
413        kwds['major'] = None if parse_tree.minor is None else parse_tree.major
414        kwds['minor'] = parse_tree.minor
415        if parse_tree.minor is not None:
416            kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
417        else:
418            kwds['version'] = None
419
420    def init(self, *args, **kw):
421        self._version = kw.pop('version')
422        self._major = kw.pop('major')
423        self._minor = kw.pop('minor')
424        super().init(*args, **kw)
425
426    @property
427    def major(self):
428        return self._major
429
430    @property
431    def minor(self):
432        return self._minor
433
434    @property
435    def version(self):
436        return self._version
437
438
439class ParameterizedMIMEHeader:
440
441    # Mixin that handles the params dict.  Must be subclassed and
442    # a property value_parser for the specific header provided.
443
444    max_count = 1
445
446    @classmethod
447    def parse(cls, value, kwds):
448        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
449        kwds['decoded'] = str(parse_tree)
450        kwds['defects'].extend(parse_tree.all_defects)
451        if parse_tree.params is None:
452            kwds['params'] = {}
453        else:
454            # The MIME RFCs specify that parameter ordering is arbitrary.
455            kwds['params'] = {utils._sanitize(name).lower():
456                                    utils._sanitize(value)
457                               for name, value in parse_tree.params}
458
459    def init(self, *args, **kw):
460        self._params = kw.pop('params')
461        super().init(*args, **kw)
462
463    @property
464    def params(self):
465        return MappingProxyType(self._params)
466
467
468class ContentTypeHeader(ParameterizedMIMEHeader):
469
470    value_parser = staticmethod(parser.parse_content_type_header)
471
472    def init(self, *args, **kw):
473        super().init(*args, **kw)
474        self._maintype = utils._sanitize(self._parse_tree.maintype)
475        self._subtype = utils._sanitize(self._parse_tree.subtype)
476
477    @property
478    def maintype(self):
479        return self._maintype
480
481    @property
482    def subtype(self):
483        return self._subtype
484
485    @property
486    def content_type(self):
487        return self.maintype + '/' + self.subtype
488
489
490class ContentDispositionHeader(ParameterizedMIMEHeader):
491
492    value_parser = staticmethod(parser.parse_content_disposition_header)
493
494    def init(self, *args, **kw):
495        super().init(*args, **kw)
496        cd = self._parse_tree.content_disposition
497        self._content_disposition = cd if cd is None else utils._sanitize(cd)
498
499    @property
500    def content_disposition(self):
501        return self._content_disposition
502
503
504class ContentTransferEncodingHeader:
505
506    max_count = 1
507
508    value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
509
510    @classmethod
511    def parse(cls, value, kwds):
512        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
513        kwds['decoded'] = str(parse_tree)
514        kwds['defects'].extend(parse_tree.all_defects)
515
516    def init(self, *args, **kw):
517        super().init(*args, **kw)
518        self._cte = utils._sanitize(self._parse_tree.cte)
519
520    @property
521    def cte(self):
522        return self._cte
523
524
525class MessageIDHeader:
526
527    max_count = 1
528    value_parser = staticmethod(parser.parse_message_id)
529
530    @classmethod
531    def parse(cls, value, kwds):
532        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
533        kwds['decoded'] = str(parse_tree)
534        kwds['defects'].extend(parse_tree.all_defects)
535
536
537# The header factory #
538
539_default_header_map = {
540    'subject':                      UniqueUnstructuredHeader,
541    'date':                         UniqueDateHeader,
542    'resent-date':                  DateHeader,
543    'orig-date':                    UniqueDateHeader,
544    'sender':                       UniqueSingleAddressHeader,
545    'resent-sender':                SingleAddressHeader,
546    'to':                           UniqueAddressHeader,
547    'resent-to':                    AddressHeader,
548    'cc':                           UniqueAddressHeader,
549    'resent-cc':                    AddressHeader,
550    'bcc':                          UniqueAddressHeader,
551    'resent-bcc':                   AddressHeader,
552    'from':                         UniqueAddressHeader,
553    'resent-from':                  AddressHeader,
554    'reply-to':                     UniqueAddressHeader,
555    'mime-version':                 MIMEVersionHeader,
556    'content-type':                 ContentTypeHeader,
557    'content-disposition':          ContentDispositionHeader,
558    'content-transfer-encoding':    ContentTransferEncodingHeader,
559    'message-id':                   MessageIDHeader,
560    }
561
562class HeaderRegistry:
563
564    """A header_factory and header registry."""
565
566    def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
567                       use_default_map=True):
568        """Create a header_factory that works with the Policy API.
569
570        base_class is the class that will be the last class in the created
571        header class's __bases__ list.  default_class is the class that will be
572        used if "name" (see __call__) does not appear in the registry.
573        use_default_map controls whether or not the default mapping of names to
574        specialized classes is copied in to the registry when the factory is
575        created.  The default is True.
576
577        """
578        self.registry = {}
579        self.base_class = base_class
580        self.default_class = default_class
581        if use_default_map:
582            self.registry.update(_default_header_map)
583
584    def map_to_type(self, name, cls):
585        """Register cls as the specialized class for handling "name" headers.
586
587        """
588        self.registry[name.lower()] = cls
589
590    def __getitem__(self, name):
591        cls = self.registry.get(name.lower(), self.default_class)
592        return type('_'+cls.__name__, (cls, self.base_class), {})
593
594    def __call__(self, name, value):
595        """Create a header instance for header 'name' from 'value'.
596
597        Creates a header instance by creating a specialized class for parsing
598        and representing the specified header by combining the factory
599        base_class with a specialized class from the registry or the
600        default_class, and passing the name and value to the constructed
601        class's constructor.
602
603        """
604        return self[name](name, value)
605