1// Copyright 2017 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4package email 5 6import ( 7 "bytes" 8 "encoding/base64" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "mime" 13 "mime/multipart" 14 "mime/quotedprintable" 15 "net/mail" 16 "regexp" 17 "sort" 18 "strings" 19) 20 21type Email struct { 22 BugID string 23 MessageID string 24 Link string 25 Subject string 26 From string 27 Cc []string 28 Body string // text/plain part 29 Patch string // attached patch, if any 30 Command string // command to bot (#syz is stripped) 31 CommandArgs string // arguments for the command 32} 33 34const commandPrefix = "#syz " 35 36var groupsLinkRe = regexp.MustCompile("\nTo view this discussion on the web visit" + 37 " (https://groups\\.google\\.com/.*?)\\.(?:\r)?\n") 38 39func Parse(r io.Reader, ownEmails []string) (*Email, error) { 40 msg, err := mail.ReadMessage(r) 41 if err != nil { 42 return nil, fmt.Errorf("failed to read email: %v", err) 43 } 44 from, err := msg.Header.AddressList("From") 45 if err != nil { 46 return nil, fmt.Errorf("failed to parse email header 'From': %v", err) 47 } 48 if len(from) == 0 { 49 return nil, fmt.Errorf("failed to parse email header 'To': no senders") 50 } 51 // Ignore errors since To: header may not be present (we've seen such case). 52 to, _ := msg.Header.AddressList("To") 53 // AddressList fails if the header is not present. 54 cc, _ := msg.Header.AddressList("Cc") 55 bugID := "" 56 var ccList []string 57 ownAddrs := make(map[string]bool) 58 for _, email := range ownEmails { 59 ownAddrs[email] = true 60 if addr, err := mail.ParseAddress(email); err == nil { 61 ownAddrs[addr.Address] = true 62 } 63 } 64 fromMe := false 65 for _, addr := range from { 66 cleaned, _, _ := RemoveAddrContext(addr.Address) 67 if addr, err := mail.ParseAddress(cleaned); err == nil && ownAddrs[addr.Address] { 68 fromMe = true 69 } 70 } 71 for _, addr := range append(append(cc, to...), from...) { 72 cleaned, context, _ := RemoveAddrContext(addr.Address) 73 if addr, err := mail.ParseAddress(cleaned); err == nil { 74 cleaned = addr.Address 75 } 76 if ownAddrs[cleaned] { 77 if bugID == "" { 78 bugID = context 79 } 80 } else { 81 ccList = append(ccList, cleaned) 82 } 83 } 84 ccList = MergeEmailLists(ccList) 85 body, attachments, err := parseBody(msg.Body, msg.Header) 86 if err != nil { 87 return nil, err 88 } 89 bodyStr := string(body) 90 patch, cmd, cmdArgs := "", "", "" 91 if !fromMe { 92 for _, a := range attachments { 93 _, patch, _ = ParsePatch(string(a)) 94 if patch != "" { 95 break 96 } 97 } 98 if patch == "" { 99 _, patch, _ = ParsePatch(bodyStr) 100 } 101 cmd, cmdArgs = extractCommand(body) 102 } 103 link := "" 104 if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil { 105 link = bodyStr[match[2]:match[3]] 106 } 107 email := &Email{ 108 BugID: bugID, 109 MessageID: msg.Header.Get("Message-ID"), 110 Link: link, 111 Subject: msg.Header.Get("Subject"), 112 From: from[0].String(), 113 Cc: ccList, 114 Body: string(body), 115 Patch: patch, 116 Command: cmd, 117 CommandArgs: cmdArgs, 118 } 119 return email, nil 120} 121 122// AddAddrContext embeds context into local part of the provided email address using '+'. 123// Returns the resulting email address. 124func AddAddrContext(email, context string) (string, error) { 125 addr, err := mail.ParseAddress(email) 126 if err != nil { 127 return "", fmt.Errorf("failed to parse %q as email: %v", email, err) 128 } 129 at := strings.IndexByte(addr.Address, '@') 130 if at == -1 { 131 return "", fmt.Errorf("failed to parse %q as email: no @", email) 132 } 133 result := addr.Address[:at] + "+" + context + addr.Address[at:] 134 if addr.Name != "" { 135 addr.Address = result 136 result = addr.String() 137 } 138 return result, nil 139} 140 141// RemoveAddrContext extracts context after '+' from the local part of the provided email address. 142// Returns address without the context and the context. 143func RemoveAddrContext(email string) (string, string, error) { 144 addr, err := mail.ParseAddress(email) 145 if err != nil { 146 return "", "", fmt.Errorf("failed to parse %q as email: %v", email, err) 147 } 148 at := strings.IndexByte(addr.Address, '@') 149 if at == -1 { 150 return "", "", fmt.Errorf("failed to parse %q as email: no @", email) 151 } 152 plus := strings.LastIndexByte(addr.Address[:at], '+') 153 if plus == -1 { 154 return email, "", nil 155 } 156 context := addr.Address[plus+1 : at] 157 addr.Address = addr.Address[:plus] + addr.Address[at:] 158 return addr.String(), context, nil 159} 160 161func CanonicalEmail(email string) string { 162 addr, err := mail.ParseAddress(email) 163 if err != nil { 164 return email 165 } 166 at := strings.IndexByte(addr.Address, '@') 167 if at == -1 { 168 return email 169 } 170 if plus := strings.IndexByte(addr.Address[:at], '+'); plus != -1 { 171 addr.Address = addr.Address[:plus] + addr.Address[at:] 172 } 173 return strings.ToLower(addr.Address) 174} 175 176// extractCommand extracts command to syzbot from email body. 177// Commands are of the following form: 178// ^#syz cmd args... 179func extractCommand(body []byte) (cmd, args string) { 180 cmdPos := bytes.Index(append([]byte{'\n'}, body...), []byte("\n"+commandPrefix)) 181 if cmdPos == -1 { 182 return 183 } 184 cmdPos += len(commandPrefix) 185 for cmdPos < len(body) && body[cmdPos] == ' ' { 186 cmdPos++ 187 } 188 cmdEnd := bytes.IndexByte(body[cmdPos:], '\n') 189 if cmdEnd == -1 { 190 cmdEnd = len(body) - cmdPos 191 } 192 if cmdEnd1 := bytes.IndexByte(body[cmdPos:], '\r'); cmdEnd1 != -1 && cmdEnd1 < cmdEnd { 193 cmdEnd = cmdEnd1 194 } 195 if cmdEnd1 := bytes.IndexByte(body[cmdPos:], ' '); cmdEnd1 != -1 && cmdEnd1 < cmdEnd { 196 cmdEnd = cmdEnd1 197 } 198 cmd = string(body[cmdPos : cmdPos+cmdEnd]) 199 // Some email clients split text emails at 80 columns are the transformation is irrevesible. 200 // We try hard to restore what was there before. 201 // For "test:" command we know that there must be 2 tokens without spaces. 202 // For "fix:"/"dup:" we need a whole non-empty line of text. 203 switch cmd { 204 case "test:": 205 args = extractArgsTokens(body[cmdPos+cmdEnd:], 2) 206 case "test_5_arg_cmd": 207 args = extractArgsTokens(body[cmdPos+cmdEnd:], 5) 208 case "fix:", "dup:": 209 args = extractArgsLine(body[cmdPos+cmdEnd:]) 210 } 211 return 212} 213 214func extractArgsTokens(body []byte, num int) string { 215 var args []string 216 for pos := 0; len(args) < num && pos < len(body); { 217 lineEnd := bytes.IndexByte(body[pos:], '\n') 218 if lineEnd == -1 { 219 lineEnd = len(body) - pos 220 } 221 line := strings.TrimSpace(string(body[pos : pos+lineEnd])) 222 for { 223 line1 := strings.Replace(line, " ", " ", -1) 224 if line == line1 { 225 break 226 } 227 line = line1 228 } 229 if line != "" { 230 args = append(args, strings.Split(line, " ")...) 231 } 232 pos += lineEnd + 1 233 } 234 return strings.TrimSpace(strings.Join(args, " ")) 235} 236 237func extractArgsLine(body []byte) string { 238 pos := 0 239 for pos < len(body) && (body[pos] == ' ' || body[pos] == '\t' || 240 body[pos] == '\n' || body[pos] == '\r') { 241 pos++ 242 } 243 lineEnd := bytes.IndexByte(body[pos:], '\n') 244 if lineEnd == -1 { 245 lineEnd = len(body) - pos 246 } 247 return strings.TrimSpace(string(body[pos : pos+lineEnd])) 248} 249 250func parseBody(r io.Reader, headers mail.Header) ([]byte, [][]byte, error) { 251 // git-send-email sends emails without Content-Type, let's assume it's text. 252 mediaType := "text/plain" 253 var params map[string]string 254 if contentType := headers.Get("Content-Type"); contentType != "" { 255 var err error 256 mediaType, params, err = mime.ParseMediaType(headers.Get("Content-Type")) 257 if err != nil { 258 return nil, nil, fmt.Errorf("failed to parse email header 'Content-Type': %v", err) 259 } 260 } 261 switch strings.ToLower(headers.Get("Content-Transfer-Encoding")) { 262 case "quoted-printable": 263 r = quotedprintable.NewReader(r) 264 case "base64": 265 r = base64.NewDecoder(base64.StdEncoding, r) 266 } 267 disp, _, _ := mime.ParseMediaType(headers.Get("Content-Disposition")) 268 if disp == "attachment" { 269 attachment, err := ioutil.ReadAll(r) 270 if err != nil { 271 return nil, nil, fmt.Errorf("failed to read email body: %v", err) 272 } 273 return nil, [][]byte{attachment}, nil 274 } 275 if mediaType == "text/plain" { 276 body, err := ioutil.ReadAll(r) 277 if err != nil { 278 return nil, nil, fmt.Errorf("failed to read email body: %v", err) 279 } 280 return body, nil, nil 281 } 282 if !strings.HasPrefix(mediaType, "multipart/") { 283 return nil, nil, nil 284 } 285 var body []byte 286 var attachments [][]byte 287 mr := multipart.NewReader(r, params["boundary"]) 288 for { 289 p, err := mr.NextPart() 290 if err == io.EOF { 291 return body, attachments, nil 292 } 293 if err != nil { 294 return nil, nil, fmt.Errorf("failed to parse MIME parts: %v", err) 295 } 296 body1, attachments1, err1 := parseBody(p, mail.Header(p.Header)) 297 if err1 != nil { 298 return nil, nil, err1 299 } 300 if body == nil { 301 body = body1 302 } 303 attachments = append(attachments, attachments1...) 304 } 305} 306 307// MergeEmailLists merges several email lists removing duplicates and invalid entries. 308func MergeEmailLists(lists ...[]string) []string { 309 const ( 310 maxEmailLen = 1000 311 maxEmails = 50 312 ) 313 merged := make(map[string]bool) 314 for _, list := range lists { 315 for _, email := range list { 316 addr, err := mail.ParseAddress(email) 317 if err != nil || len(addr.Address) > maxEmailLen { 318 continue 319 } 320 merged[addr.Address] = true 321 } 322 } 323 var result []string 324 for e := range merged { 325 result = append(result, e) 326 } 327 sort.Strings(result) 328 if len(result) > maxEmails { 329 result = result[:maxEmails] 330 } 331 return result 332} 333