• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
7__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
8           'FeedParser', 'BytesFeedParser']
9
10from io import StringIO, TextIOWrapper
11
12from email.feedparser import FeedParser, BytesFeedParser
13from email._policybase import compat32
14
15
16
17class Parser:
18    def __init__(self, _class=None, *, policy=compat32):
19        """Parser of RFC 2822 and MIME email messages.
20
21        Creates an in-memory object tree representing the email message, which
22        can then be manipulated and turned over to a Generator to return the
23        textual representation of the message.
24
25        The string must be formatted as a block of RFC 2822 headers and header
26        continuation lines, optionally preceded by a `Unix-from' header.  The
27        header block is terminated either by the end of the string or by a
28        blank line.
29
30        _class is the class to instantiate for new message objects when they
31        must be created.  This class must have a constructor that can take
32        zero arguments.  Default is Message.Message.
33
34        The policy keyword specifies a policy object that controls a number of
35        aspects of the parser's operation.  The default policy maintains
36        backward compatibility.
37
38        """
39        self._class = _class
40        self.policy = policy
41
42    def parse(self, fp, headersonly=False):
43        """Create a message structure from the data in a file.
44
45        Reads all the data from the file and returns the root of the message
46        structure.  Optional headersonly is a flag specifying whether to stop
47        parsing after reading the headers or not.  The default is False,
48        meaning it parses the entire contents of the file.
49        """
50        feedparser = FeedParser(self._class, policy=self.policy)
51        if headersonly:
52            feedparser._set_headersonly()
53        while True:
54            data = fp.read(8192)
55            if not data:
56                break
57            feedparser.feed(data)
58        return feedparser.close()
59
60    def parsestr(self, text, headersonly=False):
61        """Create a message structure from a string.
62
63        Returns the root of the message structure.  Optional headersonly is a
64        flag specifying whether to stop parsing after reading the headers or
65        not.  The default is False, meaning it parses the entire contents of
66        the file.
67        """
68        return self.parse(StringIO(text), headersonly=headersonly)
69
70
71
72class HeaderParser(Parser):
73    def parse(self, fp, headersonly=True):
74        return Parser.parse(self, fp, True)
75
76    def parsestr(self, text, headersonly=True):
77        return Parser.parsestr(self, text, True)
78
79
80class BytesParser:
81
82    def __init__(self, *args, **kw):
83        """Parser of binary RFC 2822 and MIME email messages.
84
85        Creates an in-memory object tree representing the email message, which
86        can then be manipulated and turned over to a Generator to return the
87        textual representation of the message.
88
89        The input must be formatted as a block of RFC 2822 headers and header
90        continuation lines, optionally preceded by a `Unix-from' header.  The
91        header block is terminated either by the end of the input or by a
92        blank line.
93
94        _class is the class to instantiate for new message objects when they
95        must be created.  This class must have a constructor that can take
96        zero arguments.  Default is Message.Message.
97        """
98        self.parser = Parser(*args, **kw)
99
100    def parse(self, fp, headersonly=False):
101        """Create a message structure from the data in a binary file.
102
103        Reads all the data from the file and returns the root of the message
104        structure.  Optional headersonly is a flag specifying whether to stop
105        parsing after reading the headers or not.  The default is False,
106        meaning it parses the entire contents of the file.
107        """
108        fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
109        try:
110            return self.parser.parse(fp, headersonly)
111        finally:
112            fp.detach()
113
114
115    def parsebytes(self, text, headersonly=False):
116        """Create a message structure from a byte string.
117
118        Returns the root of the message structure.  Optional headersonly is a
119        flag specifying whether to stop parsing after reading the headers or
120        not.  The default is False, meaning it parses the entire contents of
121        the file.
122        """
123        text = text.decode('ASCII', errors='surrogateescape')
124        return self.parser.parsestr(text, headersonly)
125
126
127class BytesHeaderParser(BytesParser):
128    def parse(self, fp, headersonly=True):
129        return BytesParser.parse(self, fp, headersonly=True)
130
131    def parsebytes(self, text, headersonly=True):
132        return BytesParser.parsebytes(self, text, headersonly=True)
133