1#!/usr/bin/env python3 2"""Classes to parse mailer-daemon messages.""" 3 4import calendar 5import email.message 6import re 7import os 8import sys 9 10 11class Unparseable(Exception): 12 pass 13 14 15class ErrorMessage(email.message.Message): 16 def __init__(self): 17 email.message.Message.__init__(self) 18 self.sub = '' 19 20 def is_warning(self): 21 sub = self.get('Subject') 22 if not sub: 23 return 0 24 sub = sub.lower() 25 if sub.startswith('waiting mail'): 26 return 1 27 if 'warning' in sub: 28 return 1 29 self.sub = sub 30 return 0 31 32 def get_errors(self): 33 for p in EMPARSERS: 34 self.rewindbody() 35 try: 36 return p(self.fp, self.sub) 37 except Unparseable: 38 pass 39 raise Unparseable 40 41# List of re's or tuples of re's. 42# If a re, it should contain at least a group (?P<email>...) which 43# should refer to the email address. The re can also contain a group 44# (?P<reason>...) which should refer to the reason (error message). 45# If no reason is present, the emparse_list_reason list is used to 46# find a reason. 47# If a tuple, the tuple should contain 2 re's. The first re finds a 48# location, the second re is repeated one or more times to find 49# multiple email addresses. The second re is matched (not searched) 50# where the previous match ended. 51# The re's are compiled using the re module. 52emparse_list_list = [ 53 'error: (?P<reason>unresolvable): (?P<email>.+)', 54 ('----- The following addresses had permanent fatal errors -----\n', 55 '(?P<email>[^ \n].*)\n( .*\n)?'), 56 'remote execution.*\n.*rmail (?P<email>.+)', 57 ('The following recipients did not receive your message:\n\n', 58 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), 59 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', 60 '^<(?P<email>.*)>:\n(?P<reason>.*)', 61 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', 62 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', 63 '^Original-Recipient: rfc822;(?P<email>.*)', 64 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', 65 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', 66 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', 67 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', 68 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', 69 ] 70# compile the re's in the list and store them in-place. 71for i in range(len(emparse_list_list)): 72 x = emparse_list_list[i] 73 if type(x) is type(''): 74 x = re.compile(x, re.MULTILINE) 75 else: 76 xl = [] 77 for x in x: 78 xl.append(re.compile(x, re.MULTILINE)) 79 x = tuple(xl) 80 del xl 81 emparse_list_list[i] = x 82 del x 83del i 84 85# list of re's used to find reasons (error messages). 86# if a string, "<>" is replaced by a copy of the email address. 87# The expressions are searched for in order. After the first match, 88# no more expressions are searched for. So, order is important. 89emparse_list_reason = [ 90 r'^5\d{2} <>\.\.\. (?P<reason>.*)', 91 r'<>\.\.\. (?P<reason>.*)', 92 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), 93 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), 94 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), 95 ] 96emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) 97def emparse_list(fp, sub): 98 data = fp.read() 99 res = emparse_list_from.search(data) 100 if res is None: 101 from_index = len(data) 102 else: 103 from_index = res.start(0) 104 errors = [] 105 emails = [] 106 reason = None 107 for regexp in emparse_list_list: 108 if type(regexp) is type(()): 109 res = regexp[0].search(data, 0, from_index) 110 if res is not None: 111 try: 112 reason = res.group('reason') 113 except IndexError: 114 pass 115 while 1: 116 res = regexp[1].match(data, res.end(0), from_index) 117 if res is None: 118 break 119 emails.append(res.group('email')) 120 break 121 else: 122 res = regexp.search(data, 0, from_index) 123 if res is not None: 124 emails.append(res.group('email')) 125 try: 126 reason = res.group('reason') 127 except IndexError: 128 pass 129 break 130 if not emails: 131 raise Unparseable 132 if not reason: 133 reason = sub 134 if reason[:15] == 'returned mail: ': 135 reason = reason[15:] 136 for regexp in emparse_list_reason: 137 if type(regexp) is type(''): 138 for i in range(len(emails)-1,-1,-1): 139 email = emails[i] 140 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) 141 res = exp.search(data) 142 if res is not None: 143 errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) 144 del emails[i] 145 continue 146 res = regexp.search(data) 147 if res is not None: 148 reason = res.group('reason') 149 break 150 for email in emails: 151 errors.append(' '.join((email.strip()+': '+reason).split())) 152 return errors 153 154EMPARSERS = [emparse_list] 155 156def sort_numeric(a, b): 157 a = int(a) 158 b = int(b) 159 if a < b: 160 return -1 161 elif a > b: 162 return 1 163 else: 164 return 0 165 166def parsedir(dir, modify): 167 os.chdir(dir) 168 pat = re.compile('^[0-9]*$') 169 errordict = {} 170 errorfirst = {} 171 errorlast = {} 172 nok = nwarn = nbad = 0 173 174 # find all numeric file names and sort them 175 files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))) 176 files.sort(sort_numeric) 177 178 for fn in files: 179 # Lets try to parse the file. 180 fp = open(fn) 181 m = email.message_from_file(fp, _class=ErrorMessage) 182 sender = m.getaddr('From') 183 print('%s\t%-40s\t'%(fn, sender[1]), end=' ') 184 185 if m.is_warning(): 186 fp.close() 187 print('warning only') 188 nwarn = nwarn + 1 189 if modify: 190 os.rename(fn, ','+fn) 191## os.unlink(fn) 192 continue 193 194 try: 195 errors = m.get_errors() 196 except Unparseable: 197 print('** Not parseable') 198 nbad = nbad + 1 199 fp.close() 200 continue 201 print(len(errors), 'errors') 202 203 # Remember them 204 for e in errors: 205 try: 206 mm, dd = m.getdate('date')[1:1+2] 207 date = '%s %02d' % (calendar.month_abbr[mm], dd) 208 except: 209 date = '??????' 210 if e not in errordict: 211 errordict[e] = 1 212 errorfirst[e] = '%s (%s)' % (fn, date) 213 else: 214 errordict[e] = errordict[e] + 1 215 errorlast[e] = '%s (%s)' % (fn, date) 216 217 fp.close() 218 nok = nok + 1 219 if modify: 220 os.rename(fn, ','+fn) 221## os.unlink(fn) 222 223 print('--------------') 224 print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ') 225 print(nbad,'files unparseable') 226 print('--------------') 227 list = [] 228 for e in errordict.keys(): 229 list.append((errordict[e], errorfirst[e], errorlast[e], e)) 230 list.sort() 231 for num, first, last, e in list: 232 print('%d %s - %s\t%s' % (num, first, last, e)) 233 234def main(): 235 modify = 0 236 if len(sys.argv) > 1 and sys.argv[1] == '-d': 237 modify = 1 238 del sys.argv[1] 239 if len(sys.argv) > 1: 240 for folder in sys.argv[1:]: 241 parsedir(folder, modify) 242 else: 243 parsedir('/ufs/jack/Mail/errorsinbox', modify) 244 245if __name__ == '__main__' or sys.argv[0] == __name__: 246 main() 247