• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10from types import MappingProxyType
11
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18    def __init__(self, display_name='', username='', domain='', addr_spec=None):
19        """Create an object representing a full email address.
20
21        An address can have a 'display_name', a 'username', and a 'domain'.  In
22        addition to specifying the username and domain separately, they may be
23        specified together by using the addr_spec keyword *instead of* the
24        username and domain keywords.  If an addr_spec string is specified it
25        must be properly quoted according to RFC 5322 rules; an error will be
26        raised if it is not.
27
28        An Address object has display_name, username, domain, and addr_spec
29        attributes, all of which are read-only.  The addr_spec and the string
30        value of the object are both quoted according to RFC5322 rules, but
31        without any Content Transfer Encoding.
32
33        """
34
35        inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
36        if '\r' in inputs or '\n' in inputs:
37            raise ValueError("invalid arguments; address parts cannot contain CR or LF")
38
39        # This clause with its potential 'raise' may only happen when an
40        # application program creates an Address object using an addr_spec
41        # keyword.  The email library code itself must always supply username
42        # and domain.
43        if addr_spec is not None:
44            if username or domain:
45                raise TypeError("addrspec specified when username and/or "
46                                "domain also specified")
47            a_s, rest = parser.get_addr_spec(addr_spec)
48            if rest:
49                raise ValueError("Invalid addr_spec; only '{}' "
50                                 "could be parsed from '{}'".format(
51                                    a_s, addr_spec))
52            if a_s.all_defects:
53                raise a_s.all_defects[0]
54            username = a_s.local_part
55            domain = a_s.domain
56        self._display_name = display_name
57        self._username = username
58        self._domain = domain
59
60    @property
61    def display_name(self):
62        return self._display_name
63
64    @property
65    def username(self):
66        return self._username
67
68    @property
69    def domain(self):
70        return self._domain
71
72    @property
73    def addr_spec(self):
74        """The addr_spec (username@domain) portion of the address, quoted
75        according to RFC 5322 rules, but with no Content Transfer Encoding.
76        """
77        lp = self.username
78        if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
79            lp = parser.quote_string(lp)
80        if self.domain:
81            return lp + '@' + self.domain
82        if not lp:
83            return '<>'
84        return lp
85
86    def __repr__(self):
87        return "{}(display_name={!r}, username={!r}, domain={!r})".format(
88                        self.__class__.__name__,
89                        self.display_name, self.username, self.domain)
90
91    def __str__(self):
92        disp = self.display_name
93        if not parser.SPECIALS.isdisjoint(disp):
94            disp = parser.quote_string(disp)
95        if disp:
96            addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
97            return "{} <{}>".format(disp, addr_spec)
98        return self.addr_spec
99
100    def __eq__(self, other):
101        if not isinstance(other, Address):
102            return NotImplemented
103        return (self.display_name == other.display_name and
104                self.username == other.username and
105                self.domain == other.domain)
106
107
108class Group:
109
110    def __init__(self, display_name=None, addresses=None):
111        """Create an object representing an address group.
112
113        An address group consists of a display_name followed by colon and a
114        list of addresses (see Address) terminated by a semi-colon.  The Group
115        is created by specifying a display_name and a possibly empty list of
116        Address objects.  A Group can also be used to represent a single
117        address that is not in a group, which is convenient when manipulating
118        lists that are a combination of Groups and individual Addresses.  In
119        this case the display_name should be set to None.  In particular, the
120        string representation of a Group whose display_name is None is the same
121        as the Address object, if there is one and only one Address object in
122        the addresses list.
123
124        """
125        self._display_name = display_name
126        self._addresses = tuple(addresses) if addresses else tuple()
127
128    @property
129    def display_name(self):
130        return self._display_name
131
132    @property
133    def addresses(self):
134        return self._addresses
135
136    def __repr__(self):
137        return "{}(display_name={!r}, addresses={!r}".format(
138                 self.__class__.__name__,
139                 self.display_name, self.addresses)
140
141    def __str__(self):
142        if self.display_name is None and len(self.addresses)==1:
143            return str(self.addresses[0])
144        disp = self.display_name
145        if disp is not None and not parser.SPECIALS.isdisjoint(disp):
146            disp = parser.quote_string(disp)
147        adrstr = ", ".join(str(x) for x in self.addresses)
148        adrstr = ' ' + adrstr if adrstr else adrstr
149        return "{}:{};".format(disp, adrstr)
150
151    def __eq__(self, other):
152        if not isinstance(other, Group):
153            return NotImplemented
154        return (self.display_name == other.display_name and
155                self.addresses == other.addresses)
156
157
158# Header Classes #
159
160class BaseHeader(str):
161
162    """Base class for message headers.
163
164    Implements generic behavior and provides tools for subclasses.
165
166    A subclass must define a classmethod named 'parse' that takes an unfolded
167    value string and a dictionary as its arguments.  The dictionary will
168    contain one key, 'defects', initialized to an empty list.  After the call
169    the dictionary must contain two additional keys: parse_tree, set to the
170    parse tree obtained from parsing the header, and 'decoded', set to the
171    string value of the idealized representation of the data from the value.
172    (That is, encoded words are decoded, and values that have canonical
173    representations are so represented.)
174
175    The defects key is intended to collect parsing defects, which the message
176    parser will subsequently dispose of as appropriate.  The parser should not,
177    insofar as practical, raise any errors.  Defects should be added to the
178    list instead.  The standard header parsers register defects for RFC
179    compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
180    errors.
181
182    The parse method may add additional keys to the dictionary.  In this case
183    the subclass must define an 'init' method, which will be passed the
184    dictionary as its keyword arguments.  The method should use (usually by
185    setting them as the value of similarly named attributes) and remove all the
186    extra keys added by its parse method, and then use super to call its parent
187    class with the remaining arguments and keywords.
188
189    The subclass should also make sure that a 'max_count' attribute is defined
190    that is either None or 1. XXX: need to better define this API.
191
192    """
193
194    def __new__(cls, name, value):
195        kwds = {'defects': []}
196        cls.parse(value, kwds)
197        if utils._has_surrogates(kwds['decoded']):
198            kwds['decoded'] = utils._sanitize(kwds['decoded'])
199        self = str.__new__(cls, kwds['decoded'])
200        del kwds['decoded']
201        self.init(name, **kwds)
202        return self
203
204    def init(self, name, *, parse_tree, defects):
205        self._name = name
206        self._parse_tree = parse_tree
207        self._defects = defects
208
209    @property
210    def name(self):
211        return self._name
212
213    @property
214    def defects(self):
215        return tuple(self._defects)
216
217    def __reduce__(self):
218        return (
219            _reconstruct_header,
220            (
221                self.__class__.__name__,
222                self.__class__.__bases__,
223                str(self),
224            ),
225            self.__dict__)
226
227    @classmethod
228    def _reconstruct(cls, value):
229        return str.__new__(cls, value)
230
231    def fold(self, *, policy):
232        """Fold header according to policy.
233
234        The parsed representation of the header is folded according to
235        RFC5322 rules, as modified by the policy.  If the parse tree
236        contains surrogateescaped bytes, the bytes are CTE encoded using
237        the charset 'unknown-8bit".
238
239        Any non-ASCII characters in the parse tree are CTE encoded using
240        charset utf-8. XXX: make this a policy setting.
241
242        The returned value is an ASCII-only string possibly containing linesep
243        characters, and ending with a linesep character.  The string includes
244        the header name and the ': ' separator.
245
246        """
247        # At some point we need to put fws here if it was in the source.
248        header = parser.Header([
249            parser.HeaderLabel([
250                parser.ValueTerminal(self.name, 'header-name'),
251                parser.ValueTerminal(':', 'header-sep')]),
252            ])
253        if self._parse_tree:
254            header.append(
255                parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
256        header.append(self._parse_tree)
257        return header.fold(policy=policy)
258
259
260def _reconstruct_header(cls_name, bases, value):
261    return type(cls_name, bases, {})._reconstruct(value)
262
263
264class UnstructuredHeader:
265
266    max_count = None
267    value_parser = staticmethod(parser.get_unstructured)
268
269    @classmethod
270    def parse(cls, value, kwds):
271        kwds['parse_tree'] = cls.value_parser(value)
272        kwds['decoded'] = str(kwds['parse_tree'])
273
274
275class UniqueUnstructuredHeader(UnstructuredHeader):
276
277    max_count = 1
278
279
280class DateHeader:
281
282    """Header whose value consists of a single timestamp.
283
284    Provides an additional attribute, datetime, which is either an aware
285    datetime using a timezone, or a naive datetime if the timezone
286    in the input string is -0000.  Also accepts a datetime as input.
287    The 'value' attribute is the normalized form of the timestamp,
288    which means it is the output of format_datetime on the datetime.
289    """
290
291    max_count = None
292
293    # This is used only for folding, not for creating 'decoded'.
294    value_parser = staticmethod(parser.get_unstructured)
295
296    @classmethod
297    def parse(cls, value, kwds):
298        if not value:
299            kwds['defects'].append(errors.HeaderMissingRequiredValue())
300            kwds['datetime'] = None
301            kwds['decoded'] = ''
302            kwds['parse_tree'] = parser.TokenList()
303            return
304        if isinstance(value, str):
305            value = utils.parsedate_to_datetime(value)
306        kwds['datetime'] = value
307        kwds['decoded'] = utils.format_datetime(kwds['datetime'])
308        kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
309
310    def init(self, *args, **kw):
311        self._datetime = kw.pop('datetime')
312        super().init(*args, **kw)
313
314    @property
315    def datetime(self):
316        return self._datetime
317
318
319class UniqueDateHeader(DateHeader):
320
321    max_count = 1
322
323
324class AddressHeader:
325
326    max_count = None
327
328    @staticmethod
329    def value_parser(value):
330        address_list, value = parser.get_address_list(value)
331        assert not value, 'this should not happen'
332        return address_list
333
334    @classmethod
335    def parse(cls, value, kwds):
336        if isinstance(value, str):
337            # We are translating here from the RFC language (address/mailbox)
338            # to our API language (group/address).
339            kwds['parse_tree'] = address_list = cls.value_parser(value)
340            groups = []
341            for addr in address_list.addresses:
342                groups.append(Group(addr.display_name,
343                                    [Address(mb.display_name or '',
344                                             mb.local_part or '',
345                                             mb.domain or '')
346                                     for mb in addr.all_mailboxes]))
347            defects = list(address_list.all_defects)
348        else:
349            # Assume it is Address/Group stuff
350            if not hasattr(value, '__iter__'):
351                value = [value]
352            groups = [Group(None, [item]) if not hasattr(item, 'addresses')
353                                          else item
354                                    for item in value]
355            defects = []
356        kwds['groups'] = groups
357        kwds['defects'] = defects
358        kwds['decoded'] = ', '.join([str(item) for item in groups])
359        if 'parse_tree' not in kwds:
360            kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
361
362    def init(self, *args, **kw):
363        self._groups = tuple(kw.pop('groups'))
364        self._addresses = None
365        super().init(*args, **kw)
366
367    @property
368    def groups(self):
369        return self._groups
370
371    @property
372    def addresses(self):
373        if self._addresses is None:
374            self._addresses = tuple(address for group in self._groups
375                                            for address in group.addresses)
376        return self._addresses
377
378
379class UniqueAddressHeader(AddressHeader):
380
381    max_count = 1
382
383
384class SingleAddressHeader(AddressHeader):
385
386    @property
387    def address(self):
388        if len(self.addresses)!=1:
389            raise ValueError(("value of single address header {} is not "
390                "a single address").format(self.name))
391        return self.addresses[0]
392
393
394class UniqueSingleAddressHeader(SingleAddressHeader):
395
396    max_count = 1
397
398
399class MIMEVersionHeader:
400
401    max_count = 1
402
403    value_parser = staticmethod(parser.parse_mime_version)
404
405    @classmethod
406    def parse(cls, value, kwds):
407        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
408        kwds['decoded'] = str(parse_tree)
409        kwds['defects'].extend(parse_tree.all_defects)
410        kwds['major'] = None if parse_tree.minor is None else parse_tree.major
411        kwds['minor'] = parse_tree.minor
412        if parse_tree.minor is not None:
413            kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
414        else:
415            kwds['version'] = None
416
417    def init(self, *args, **kw):
418        self._version = kw.pop('version')
419        self._major = kw.pop('major')
420        self._minor = kw.pop('minor')
421        super().init(*args, **kw)
422
423    @property
424    def major(self):
425        return self._major
426
427    @property
428    def minor(self):
429        return self._minor
430
431    @property
432    def version(self):
433        return self._version
434
435
436class ParameterizedMIMEHeader:
437
438    # Mixin that handles the params dict.  Must be subclassed and
439    # a property value_parser for the specific header provided.
440
441    max_count = 1
442
443    @classmethod
444    def parse(cls, value, kwds):
445        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
446        kwds['decoded'] = str(parse_tree)
447        kwds['defects'].extend(parse_tree.all_defects)
448        if parse_tree.params is None:
449            kwds['params'] = {}
450        else:
451            # The MIME RFCs specify that parameter ordering is arbitrary.
452            kwds['params'] = {utils._sanitize(name).lower():
453                                    utils._sanitize(value)
454                               for name, value in parse_tree.params}
455
456    def init(self, *args, **kw):
457        self._params = kw.pop('params')
458        super().init(*args, **kw)
459
460    @property
461    def params(self):
462        return MappingProxyType(self._params)
463
464
465class ContentTypeHeader(ParameterizedMIMEHeader):
466
467    value_parser = staticmethod(parser.parse_content_type_header)
468
469    def init(self, *args, **kw):
470        super().init(*args, **kw)
471        self._maintype = utils._sanitize(self._parse_tree.maintype)
472        self._subtype = utils._sanitize(self._parse_tree.subtype)
473
474    @property
475    def maintype(self):
476        return self._maintype
477
478    @property
479    def subtype(self):
480        return self._subtype
481
482    @property
483    def content_type(self):
484        return self.maintype + '/' + self.subtype
485
486
487class ContentDispositionHeader(ParameterizedMIMEHeader):
488
489    value_parser = staticmethod(parser.parse_content_disposition_header)
490
491    def init(self, *args, **kw):
492        super().init(*args, **kw)
493        cd = self._parse_tree.content_disposition
494        self._content_disposition = cd if cd is None else utils._sanitize(cd)
495
496    @property
497    def content_disposition(self):
498        return self._content_disposition
499
500
501class ContentTransferEncodingHeader:
502
503    max_count = 1
504
505    value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
506
507    @classmethod
508    def parse(cls, value, kwds):
509        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
510        kwds['decoded'] = str(parse_tree)
511        kwds['defects'].extend(parse_tree.all_defects)
512
513    def init(self, *args, **kw):
514        super().init(*args, **kw)
515        self._cte = utils._sanitize(self._parse_tree.cte)
516
517    @property
518    def cte(self):
519        return self._cte
520
521
522class MessageIDHeader:
523
524    max_count = 1
525    value_parser = staticmethod(parser.parse_message_id)
526
527    @classmethod
528    def parse(cls, value, kwds):
529        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
530        kwds['decoded'] = str(parse_tree)
531        kwds['defects'].extend(parse_tree.all_defects)
532
533
534# The header factory #
535
536_default_header_map = {
537    'subject':                      UniqueUnstructuredHeader,
538    'date':                         UniqueDateHeader,
539    'resent-date':                  DateHeader,
540    'orig-date':                    UniqueDateHeader,
541    'sender':                       UniqueSingleAddressHeader,
542    'resent-sender':                SingleAddressHeader,
543    'to':                           UniqueAddressHeader,
544    'resent-to':                    AddressHeader,
545    'cc':                           UniqueAddressHeader,
546    'resent-cc':                    AddressHeader,
547    'bcc':                          UniqueAddressHeader,
548    'resent-bcc':                   AddressHeader,
549    'from':                         UniqueAddressHeader,
550    'resent-from':                  AddressHeader,
551    'reply-to':                     UniqueAddressHeader,
552    'mime-version':                 MIMEVersionHeader,
553    'content-type':                 ContentTypeHeader,
554    'content-disposition':          ContentDispositionHeader,
555    'content-transfer-encoding':    ContentTransferEncodingHeader,
556    'message-id':                   MessageIDHeader,
557    }
558
559class HeaderRegistry:
560
561    """A header_factory and header registry."""
562
563    def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
564                       use_default_map=True):
565        """Create a header_factory that works with the Policy API.
566
567        base_class is the class that will be the last class in the created
568        header class's __bases__ list.  default_class is the class that will be
569        used if "name" (see __call__) does not appear in the registry.
570        use_default_map controls whether or not the default mapping of names to
571        specialized classes is copied in to the registry when the factory is
572        created.  The default is True.
573
574        """
575        self.registry = {}
576        self.base_class = base_class
577        self.default_class = default_class
578        if use_default_map:
579            self.registry.update(_default_header_map)
580
581    def map_to_type(self, name, cls):
582        """Register cls as the specialized class for handling "name" headers.
583
584        """
585        self.registry[name.lower()] = cls
586
587    def __getitem__(self, name):
588        cls = self.registry.get(name.lower(), self.default_class)
589        return type('_'+cls.__name__, (cls, self.base_class), {})
590
591    def __call__(self, name, value):
592        """Create a header instance for header 'name' from 'value'.
593
594        Creates a header instance by creating a specialized class for parsing
595        and representing the specified header by combining the factory
596        base_class with a specialized class from the registry or the
597        default_class, and passing the name and value to the constructed
598        class's constructor.
599
600        """
601        return self[name](name, value)
602