• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2017 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package email
5
6import (
7	"bytes"
8	"encoding/base64"
9	"fmt"
10	"io"
11	"io/ioutil"
12	"mime"
13	"mime/multipart"
14	"mime/quotedprintable"
15	"net/mail"
16	"regexp"
17	"sort"
18	"strings"
19)
20
21type Email struct {
22	BugID       string
23	MessageID   string
24	Link        string
25	Subject     string
26	From        string
27	Cc          []string
28	Body        string // text/plain part
29	Patch       string // attached patch, if any
30	Command     string // command to bot (#syz is stripped)
31	CommandArgs string // arguments for the command
32}
33
34const commandPrefix = "#syz "
35
36var groupsLinkRe = regexp.MustCompile("\nTo view this discussion on the web visit" +
37	" (https://groups\\.google\\.com/.*?)\\.(?:\r)?\n")
38
39func Parse(r io.Reader, ownEmails []string) (*Email, error) {
40	msg, err := mail.ReadMessage(r)
41	if err != nil {
42		return nil, fmt.Errorf("failed to read email: %v", err)
43	}
44	from, err := msg.Header.AddressList("From")
45	if err != nil {
46		return nil, fmt.Errorf("failed to parse email header 'From': %v", err)
47	}
48	if len(from) == 0 {
49		return nil, fmt.Errorf("failed to parse email header 'To': no senders")
50	}
51	// Ignore errors since To: header may not be present (we've seen such case).
52	to, _ := msg.Header.AddressList("To")
53	// AddressList fails if the header is not present.
54	cc, _ := msg.Header.AddressList("Cc")
55	bugID := ""
56	var ccList []string
57	ownAddrs := make(map[string]bool)
58	for _, email := range ownEmails {
59		ownAddrs[email] = true
60		if addr, err := mail.ParseAddress(email); err == nil {
61			ownAddrs[addr.Address] = true
62		}
63	}
64	fromMe := false
65	for _, addr := range from {
66		cleaned, _, _ := RemoveAddrContext(addr.Address)
67		if addr, err := mail.ParseAddress(cleaned); err == nil && ownAddrs[addr.Address] {
68			fromMe = true
69		}
70	}
71	for _, addr := range append(append(cc, to...), from...) {
72		cleaned, context, _ := RemoveAddrContext(addr.Address)
73		if addr, err := mail.ParseAddress(cleaned); err == nil {
74			cleaned = addr.Address
75		}
76		if ownAddrs[cleaned] {
77			if bugID == "" {
78				bugID = context
79			}
80		} else {
81			ccList = append(ccList, cleaned)
82		}
83	}
84	ccList = MergeEmailLists(ccList)
85	body, attachments, err := parseBody(msg.Body, msg.Header)
86	if err != nil {
87		return nil, err
88	}
89	bodyStr := string(body)
90	patch, cmd, cmdArgs := "", "", ""
91	if !fromMe {
92		for _, a := range attachments {
93			_, patch, _ = ParsePatch(string(a))
94			if patch != "" {
95				break
96			}
97		}
98		if patch == "" {
99			_, patch, _ = ParsePatch(bodyStr)
100		}
101		cmd, cmdArgs = extractCommand(body)
102	}
103	link := ""
104	if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil {
105		link = bodyStr[match[2]:match[3]]
106	}
107	email := &Email{
108		BugID:       bugID,
109		MessageID:   msg.Header.Get("Message-ID"),
110		Link:        link,
111		Subject:     msg.Header.Get("Subject"),
112		From:        from[0].String(),
113		Cc:          ccList,
114		Body:        string(body),
115		Patch:       patch,
116		Command:     cmd,
117		CommandArgs: cmdArgs,
118	}
119	return email, nil
120}
121
122// AddAddrContext embeds context into local part of the provided email address using '+'.
123// Returns the resulting email address.
124func AddAddrContext(email, context string) (string, error) {
125	addr, err := mail.ParseAddress(email)
126	if err != nil {
127		return "", fmt.Errorf("failed to parse %q as email: %v", email, err)
128	}
129	at := strings.IndexByte(addr.Address, '@')
130	if at == -1 {
131		return "", fmt.Errorf("failed to parse %q as email: no @", email)
132	}
133	result := addr.Address[:at] + "+" + context + addr.Address[at:]
134	if addr.Name != "" {
135		addr.Address = result
136		result = addr.String()
137	}
138	return result, nil
139}
140
141// RemoveAddrContext extracts context after '+' from the local part of the provided email address.
142// Returns address without the context and the context.
143func RemoveAddrContext(email string) (string, string, error) {
144	addr, err := mail.ParseAddress(email)
145	if err != nil {
146		return "", "", fmt.Errorf("failed to parse %q as email: %v", email, err)
147	}
148	at := strings.IndexByte(addr.Address, '@')
149	if at == -1 {
150		return "", "", fmt.Errorf("failed to parse %q as email: no @", email)
151	}
152	plus := strings.LastIndexByte(addr.Address[:at], '+')
153	if plus == -1 {
154		return email, "", nil
155	}
156	context := addr.Address[plus+1 : at]
157	addr.Address = addr.Address[:plus] + addr.Address[at:]
158	return addr.String(), context, nil
159}
160
161func CanonicalEmail(email string) string {
162	addr, err := mail.ParseAddress(email)
163	if err != nil {
164		return email
165	}
166	at := strings.IndexByte(addr.Address, '@')
167	if at == -1 {
168		return email
169	}
170	if plus := strings.IndexByte(addr.Address[:at], '+'); plus != -1 {
171		addr.Address = addr.Address[:plus] + addr.Address[at:]
172	}
173	return strings.ToLower(addr.Address)
174}
175
176// extractCommand extracts command to syzbot from email body.
177// Commands are of the following form:
178// ^#syz cmd args...
179func extractCommand(body []byte) (cmd, args string) {
180	cmdPos := bytes.Index(append([]byte{'\n'}, body...), []byte("\n"+commandPrefix))
181	if cmdPos == -1 {
182		return
183	}
184	cmdPos += len(commandPrefix)
185	for cmdPos < len(body) && body[cmdPos] == ' ' {
186		cmdPos++
187	}
188	cmdEnd := bytes.IndexByte(body[cmdPos:], '\n')
189	if cmdEnd == -1 {
190		cmdEnd = len(body) - cmdPos
191	}
192	if cmdEnd1 := bytes.IndexByte(body[cmdPos:], '\r'); cmdEnd1 != -1 && cmdEnd1 < cmdEnd {
193		cmdEnd = cmdEnd1
194	}
195	if cmdEnd1 := bytes.IndexByte(body[cmdPos:], ' '); cmdEnd1 != -1 && cmdEnd1 < cmdEnd {
196		cmdEnd = cmdEnd1
197	}
198	cmd = string(body[cmdPos : cmdPos+cmdEnd])
199	// Some email clients split text emails at 80 columns are the transformation is irrevesible.
200	// We try hard to restore what was there before.
201	// For "test:" command we know that there must be 2 tokens without spaces.
202	// For "fix:"/"dup:" we need a whole non-empty line of text.
203	switch cmd {
204	case "test:":
205		args = extractArgsTokens(body[cmdPos+cmdEnd:], 2)
206	case "test_5_arg_cmd":
207		args = extractArgsTokens(body[cmdPos+cmdEnd:], 5)
208	case "fix:", "dup:":
209		args = extractArgsLine(body[cmdPos+cmdEnd:])
210	}
211	return
212}
213
214func extractArgsTokens(body []byte, num int) string {
215	var args []string
216	for pos := 0; len(args) < num && pos < len(body); {
217		lineEnd := bytes.IndexByte(body[pos:], '\n')
218		if lineEnd == -1 {
219			lineEnd = len(body) - pos
220		}
221		line := strings.TrimSpace(string(body[pos : pos+lineEnd]))
222		for {
223			line1 := strings.Replace(line, "  ", " ", -1)
224			if line == line1 {
225				break
226			}
227			line = line1
228		}
229		if line != "" {
230			args = append(args, strings.Split(line, " ")...)
231		}
232		pos += lineEnd + 1
233	}
234	return strings.TrimSpace(strings.Join(args, " "))
235}
236
237func extractArgsLine(body []byte) string {
238	pos := 0
239	for pos < len(body) && (body[pos] == ' ' || body[pos] == '\t' ||
240		body[pos] == '\n' || body[pos] == '\r') {
241		pos++
242	}
243	lineEnd := bytes.IndexByte(body[pos:], '\n')
244	if lineEnd == -1 {
245		lineEnd = len(body) - pos
246	}
247	return strings.TrimSpace(string(body[pos : pos+lineEnd]))
248}
249
250func parseBody(r io.Reader, headers mail.Header) ([]byte, [][]byte, error) {
251	// git-send-email sends emails without Content-Type, let's assume it's text.
252	mediaType := "text/plain"
253	var params map[string]string
254	if contentType := headers.Get("Content-Type"); contentType != "" {
255		var err error
256		mediaType, params, err = mime.ParseMediaType(headers.Get("Content-Type"))
257		if err != nil {
258			return nil, nil, fmt.Errorf("failed to parse email header 'Content-Type': %v", err)
259		}
260	}
261	switch strings.ToLower(headers.Get("Content-Transfer-Encoding")) {
262	case "quoted-printable":
263		r = quotedprintable.NewReader(r)
264	case "base64":
265		r = base64.NewDecoder(base64.StdEncoding, r)
266	}
267	disp, _, _ := mime.ParseMediaType(headers.Get("Content-Disposition"))
268	if disp == "attachment" {
269		attachment, err := ioutil.ReadAll(r)
270		if err != nil {
271			return nil, nil, fmt.Errorf("failed to read email body: %v", err)
272		}
273		return nil, [][]byte{attachment}, nil
274	}
275	if mediaType == "text/plain" {
276		body, err := ioutil.ReadAll(r)
277		if err != nil {
278			return nil, nil, fmt.Errorf("failed to read email body: %v", err)
279		}
280		return body, nil, nil
281	}
282	if !strings.HasPrefix(mediaType, "multipart/") {
283		return nil, nil, nil
284	}
285	var body []byte
286	var attachments [][]byte
287	mr := multipart.NewReader(r, params["boundary"])
288	for {
289		p, err := mr.NextPart()
290		if err == io.EOF {
291			return body, attachments, nil
292		}
293		if err != nil {
294			return nil, nil, fmt.Errorf("failed to parse MIME parts: %v", err)
295		}
296		body1, attachments1, err1 := parseBody(p, mail.Header(p.Header))
297		if err1 != nil {
298			return nil, nil, err1
299		}
300		if body == nil {
301			body = body1
302		}
303		attachments = append(attachments, attachments1...)
304	}
305}
306
307// MergeEmailLists merges several email lists removing duplicates and invalid entries.
308func MergeEmailLists(lists ...[]string) []string {
309	const (
310		maxEmailLen = 1000
311		maxEmails   = 50
312	)
313	merged := make(map[string]bool)
314	for _, list := range lists {
315		for _, email := range list {
316			addr, err := mail.ParseAddress(email)
317			if err != nil || len(addr.Address) > maxEmailLen {
318				continue
319			}
320			merged[addr.Address] = true
321		}
322	}
323	var result []string
324	for e := range merged {
325		result = append(result, e)
326	}
327	sort.Strings(result)
328	if len(result) > maxEmails {
329		result = result[:maxEmails]
330	}
331	return result
332}
333