1# Copyright (C) 2001-2007 Python Software Foundation 2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter 3# Contact: email-sig@python.org 4 5"""A parser of RFC 2822 and MIME email messages.""" 6 7__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 8 'FeedParser', 'BytesFeedParser'] 9 10from io import StringIO, TextIOWrapper 11 12from email.feedparser import FeedParser, BytesFeedParser 13from email._policybase import compat32 14 15 16class Parser: 17 def __init__(self, _class=None, *, policy=compat32): 18 """Parser of RFC 2822 and MIME email messages. 19 20 Creates an in-memory object tree representing the email message, which 21 can then be manipulated and turned over to a Generator to return the 22 textual representation of the message. 23 24 The string must be formatted as a block of RFC 2822 headers and header 25 continuation lines, optionally preceded by a `Unix-from' header. The 26 header block is terminated either by the end of the string or by a 27 blank line. 28 29 _class is the class to instantiate for new message objects when they 30 must be created. This class must have a constructor that can take 31 zero arguments. Default is Message.Message. 32 33 The policy keyword specifies a policy object that controls a number of 34 aspects of the parser's operation. The default policy maintains 35 backward compatibility. 36 37 """ 38 self._class = _class 39 self.policy = policy 40 41 def parse(self, fp, headersonly=False): 42 """Create a message structure from the data in a file. 43 44 Reads all the data from the file and returns the root of the message 45 structure. Optional headersonly is a flag specifying whether to stop 46 parsing after reading the headers or not. The default is False, 47 meaning it parses the entire contents of the file. 48 """ 49 feedparser = FeedParser(self._class, policy=self.policy) 50 if headersonly: 51 feedparser._set_headersonly() 52 while data := fp.read(8192): 53 feedparser.feed(data) 54 return feedparser.close() 55 56 def parsestr(self, text, headersonly=False): 57 """Create a message structure from a string. 58 59 Returns the root of the message structure. Optional headersonly is a 60 flag specifying whether to stop parsing after reading the headers or 61 not. The default is False, meaning it parses the entire contents of 62 the file. 63 """ 64 return self.parse(StringIO(text), headersonly=headersonly) 65 66 67class HeaderParser(Parser): 68 def parse(self, fp, headersonly=True): 69 return Parser.parse(self, fp, True) 70 71 def parsestr(self, text, headersonly=True): 72 return Parser.parsestr(self, text, True) 73 74 75class BytesParser: 76 77 def __init__(self, *args, **kw): 78 """Parser of binary RFC 2822 and MIME email messages. 79 80 Creates an in-memory object tree representing the email message, which 81 can then be manipulated and turned over to a Generator to return the 82 textual representation of the message. 83 84 The input must be formatted as a block of RFC 2822 headers and header 85 continuation lines, optionally preceded by a `Unix-from' header. The 86 header block is terminated either by the end of the input or by a 87 blank line. 88 89 _class is the class to instantiate for new message objects when they 90 must be created. This class must have a constructor that can take 91 zero arguments. Default is Message.Message. 92 """ 93 self.parser = Parser(*args, **kw) 94 95 def parse(self, fp, headersonly=False): 96 """Create a message structure from the data in a binary file. 97 98 Reads all the data from the file and returns the root of the message 99 structure. Optional headersonly is a flag specifying whether to stop 100 parsing after reading the headers or not. The default is False, 101 meaning it parses the entire contents of the file. 102 """ 103 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') 104 try: 105 return self.parser.parse(fp, headersonly) 106 finally: 107 fp.detach() 108 109 110 def parsebytes(self, text, headersonly=False): 111 """Create a message structure from a byte string. 112 113 Returns the root of the message structure. Optional headersonly is a 114 flag specifying whether to stop parsing after reading the headers or 115 not. The default is False, meaning it parses the entire contents of 116 the file. 117 """ 118 text = text.decode('ASCII', errors='surrogateescape') 119 return self.parser.parsestr(text, headersonly) 120 121 122class BytesHeaderParser(BytesParser): 123 def parse(self, fp, headersonly=True): 124 return BytesParser.parse(self, fp, headersonly=True) 125 126 def parsebytes(self, text, headersonly=True): 127 return BytesParser.parsebytes(self, text, headersonly=True) 128