• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/tokenization/rfc822-tokenizer.h"
16 
17 #include <memory>
18 #include <string>
19 #include <string_view>
20 
21 #include "gmock/gmock.h"
22 #include "gtest/gtest.h"
23 #include "icing/testing/common-matchers.h"
24 
25 namespace icing {
26 namespace lib {
27 namespace {
28 using ::testing::ElementsAre;
29 using ::testing::IsEmpty;
30 
TEST(Rfc822TokenizerTest,StartingState)31 TEST(Rfc822TokenizerTest, StartingState) {
32   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
33   std::string text = "a@g.c";
34   auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
35 
36   ASSERT_THAT(token_iterator->GetTokens(), IsEmpty());
37   ASSERT_TRUE(token_iterator->Advance());
38   ASSERT_THAT(token_iterator->GetTokens(), Not(IsEmpty()));
39 }
40 
TEST(Rfc822TokenizerTest,EmptyMiddleToken)41 TEST(Rfc822TokenizerTest, EmptyMiddleToken) {
42   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
43 
44   std::string s("<alex>,,<tom>");
45 
46   EXPECT_THAT(
47       rfc822_tokenizer.TokenizeAll(s),
48       IsOkAndHolds(ElementsAre(
49           EqualsToken(Token::Type::RFC822_TOKEN, "<alex>"),
50           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"),
51           EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
52           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
53           EqualsToken(Token::Type::RFC822_TOKEN, "<tom>"),
54           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom"),
55           EqualsToken(Token::Type::RFC822_ADDRESS, "tom"),
56           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"))));
57 }
58 
TEST(Rfc822TokenizerTest,Simple)59 TEST(Rfc822TokenizerTest, Simple) {
60   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
61 
62   std::string_view s("<你alex@google.com>");
63 
64   EXPECT_THAT(
65       rfc822_tokenizer.TokenizeAll(s),
66       IsOkAndHolds(ElementsAre(
67           EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
68           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
69           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
70           EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
71           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
72           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
73           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
74 }
75 
TEST(Rfc822TokenizerTest,Small)76 TEST(Rfc822TokenizerTest, Small) {
77   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
78 
79   std::string s = "\"a\"";
80 
81   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
82               IsOkAndHolds(ElementsAre(
83                   EqualsToken(Token::Type::RFC822_TOKEN, "a"),
84                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
85                   EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
86                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"))));
87 
88   s = "\"a\", \"b\"";
89 
90   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
91               IsOkAndHolds(ElementsAre(
92                   EqualsToken(Token::Type::RFC822_TOKEN, "a"),
93                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
94                   EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
95                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"),
96                   EqualsToken(Token::Type::RFC822_TOKEN, "b"),
97                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "b"),
98                   EqualsToken(Token::Type::RFC822_ADDRESS, "b"),
99                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "b"))));
100 
101   s = "(a)";
102 
103   EXPECT_THAT(
104       rfc822_tokenizer.TokenizeAll(s),
105       IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN, "(a)"),
106                                EqualsToken(Token::Type::RFC822_COMMENT, "a"))));
107 }
108 
TEST(Rfc822TokenizerTest,PB)109 TEST(Rfc822TokenizerTest, PB) {
110   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
111 
112   std::string_view s("peanut (comment) butter, <alex@google.com>");
113 
114   EXPECT_THAT(
115       rfc822_tokenizer.TokenizeAll(s),
116       IsOkAndHolds(ElementsAre(
117           EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
118           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
119           EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
120           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
121           EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
122           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
123           EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
124           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
125           EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
126           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"),
127           EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com>"),
128           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
129           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
130           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
131           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
132           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
133           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
134 }
135 
TEST(Rfc822TokenizerTest,NoBrackets)136 TEST(Rfc822TokenizerTest, NoBrackets) {
137   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
138 
139   std::string_view s("alex@google.com");
140 
141   EXPECT_THAT(
142       rfc822_tokenizer.TokenizeAll(s),
143       IsOkAndHolds(ElementsAre(
144           EqualsToken(Token::Type::RFC822_TOKEN, "alex@google.com"),
145           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
146           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
147           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
148           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
149           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
150           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
151 }
152 
TEST(Rfc822TokenizerTest,TwoAddresses)153 TEST(Rfc822TokenizerTest, TwoAddresses) {
154   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
155 
156   std::string_view s("<你alex@google.com>; <alexsav@gmail.com>");
157 
158   EXPECT_THAT(
159       rfc822_tokenizer.TokenizeAll(s),
160       IsOkAndHolds(ElementsAre(
161           EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
162           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
163           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
164           EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
165           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
166           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
167           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
168           EqualsToken(Token::Type::RFC822_TOKEN, "<alexsav@gmail.com>"),
169           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
170           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "gmail.com"),
171           EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@gmail.com"),
172           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
173           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
174           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
175 }
176 
TEST(Rfc822TokenizerTest,Comment)177 TEST(Rfc822TokenizerTest, Comment) {
178   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
179 
180   std::string_view s("(a comment) <alex@google.com>");
181   EXPECT_THAT(
182       rfc822_tokenizer.TokenizeAll(s),
183       IsOkAndHolds(ElementsAre(
184           EqualsToken(Token::Type::RFC822_TOKEN,
185                       "(a comment) <alex@google.com>"),
186           EqualsToken(Token::Type::RFC822_COMMENT, "a"),
187           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
188           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
189           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
190           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
191           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
192           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
193           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
194 }
195 
TEST(Rfc822TokenizerTest,NameAndComment)196 TEST(Rfc822TokenizerTest, NameAndComment) {
197   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
198 
199   std::string_view s("\"a name\" also a name <alex@google.com>");
200   EXPECT_THAT(
201       rfc822_tokenizer.TokenizeAll(s),
202       IsOkAndHolds(ElementsAre(
203           EqualsToken(Token::Type::RFC822_TOKEN,
204                       "\"a name\" also a name <alex@google.com>"),
205           EqualsToken(Token::Type::RFC822_NAME, "a"),
206           EqualsToken(Token::Type::RFC822_NAME, "name"),
207           EqualsToken(Token::Type::RFC822_NAME, "also"),
208           EqualsToken(Token::Type::RFC822_NAME, "a"),
209           EqualsToken(Token::Type::RFC822_NAME, "name"),
210           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
211           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
212           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
213           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
214           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
215           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
216 }
217 
218 // Test from tokenizer_test.cc.
TEST(Rfc822TokenizerTest,Rfc822SanityCheck)219 TEST(Rfc822TokenizerTest, Rfc822SanityCheck) {
220   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
221 
222   std::string addr1("A name (A comment) <address@domain.com>");
223   std::string addr2(
224       "\"(Another name)\" (A different comment) "
225       "<bob-loblaw@foo.bar.com>");
226   std::string addr3("<no.at.sign.present>");
227   std::string addr4("<double@at@signs.present>");
228   std::string rfc822 = addr1 + ", " + addr2 + ", " + addr3 + ", " + addr4;
229   EXPECT_THAT(
230       rfc822_tokenizer.TokenizeAll(rfc822),
231       IsOkAndHolds(ElementsAre(
232 
233           EqualsToken(Token::Type::RFC822_TOKEN, addr1),
234           EqualsToken(Token::Type::RFC822_NAME, "A"),
235           EqualsToken(Token::Type::RFC822_NAME, "name"),
236           EqualsToken(Token::Type::RFC822_COMMENT, "A"),
237           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
238           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "address"),
239           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "domain.com"),
240           EqualsToken(Token::Type::RFC822_ADDRESS, "address@domain.com"),
241           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "address"),
242           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "domain"),
243           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
244 
245           EqualsToken(Token::Type::RFC822_TOKEN, addr2),
246           EqualsToken(Token::Type::RFC822_NAME, "Another"),
247           EqualsToken(Token::Type::RFC822_NAME, "name"),
248           EqualsToken(Token::Type::RFC822_COMMENT, "A"),
249           EqualsToken(Token::Type::RFC822_COMMENT, "different"),
250           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
251           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bob-loblaw"),
252           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo.bar.com"),
253           EqualsToken(Token::Type::RFC822_ADDRESS, "bob-loblaw@foo.bar.com"),
254           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bob"),
255           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "loblaw"),
256           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
257           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "bar"),
258           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
259 
260           EqualsToken(Token::Type::RFC822_TOKEN, addr3),
261           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "no.at.sign.present"),
262           EqualsToken(Token::Type::RFC822_ADDRESS, "no.at.sign.present"),
263           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "no"),
264           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "at"),
265           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "sign"),
266           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"),
267 
268           EqualsToken(Token::Type::RFC822_TOKEN, addr4),
269           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "double@at"),
270           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "signs.present"),
271           EqualsToken(Token::Type::RFC822_ADDRESS, "double@at@signs.present"),
272           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "double"),
273           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "at"),
274           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "signs"),
275           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"))));
276 }
277 
278 // Tests from rfc822 converter.
TEST(Rfc822TokenizerTest,SimpleRfcText)279 TEST(Rfc822TokenizerTest, SimpleRfcText) {
280   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
281   std::string test_string =
282       "foo@google.com,bar@google.com,baz@google.com,foo+hello@google.com,baz@"
283       "corp.google.com";
284 
285   EXPECT_THAT(
286       rfc822_tokenizer.TokenizeAll(test_string),
287       IsOkAndHolds(ElementsAre(
288           EqualsToken(Token::Type::RFC822_TOKEN, "foo@google.com"),
289           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
290           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
291           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
292           EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
293           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
294           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
295 
296           EqualsToken(Token::Type::RFC822_TOKEN, "bar@google.com"),
297           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bar"),
298           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
299           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
300           EqualsToken(Token::Type::RFC822_ADDRESS, "bar@google.com"),
301           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bar"),
302           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
303 
304           EqualsToken(Token::Type::RFC822_TOKEN, "baz@google.com"),
305           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
306           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
307           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
308           EqualsToken(Token::Type::RFC822_ADDRESS, "baz@google.com"),
309           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
310           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
311 
312           EqualsToken(Token::Type::RFC822_TOKEN, "foo+hello@google.com"),
313           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
314           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "hello"),
315           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
316           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
317           EqualsToken(Token::Type::RFC822_ADDRESS, "foo+hello@google.com"),
318           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo+hello"),
319           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
320 
321           EqualsToken(Token::Type::RFC822_TOKEN, "baz@corp.google.com"),
322           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
323           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "corp"),
324           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
325           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
326           EqualsToken(Token::Type::RFC822_ADDRESS, "baz@corp.google.com"),
327           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
328           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "corp.google.com"))));
329 }
330 
TEST(Rfc822TokenizerTest,ComplicatedRfcText)331 TEST(Rfc822TokenizerTest, ComplicatedRfcText) {
332   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
333   std::string test_string =
334       R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>
335       <easy@google.com>)raw";
336 
337   EXPECT_THAT(
338       rfc822_tokenizer.TokenizeAll(test_string),
339       IsOkAndHolds(ElementsAre(
340           EqualsToken(
341               Token::Type::RFC822_TOKEN,
342               R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>)raw"),
343           EqualsToken(Token::Type::RFC822_NAME, "Weird"),
344           EqualsToken(Token::Type::RFC822_NAME, "But"),
345           EqualsToken(Token::Type::RFC822_NAME, "Also"),
346           EqualsToken(Token::Type::RFC822_NAME, "Valid"),
347           EqualsToken(Token::Type::RFC822_NAME, "Name"),
348           EqualsToken(Token::Type::RFC822_COMMENT, "With"),
349           EqualsToken(Token::Type::RFC822_COMMENT, "an"),
350           EqualsToken(Token::Type::RFC822_COMMENT, "odd"),
351           EqualsToken(Token::Type::RFC822_COMMENT, "cmt"),
352           EqualsToken(Token::Type::RFC822_COMMENT, "too"),
353           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "Foo B(a)r,Baz"),
354           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.co"),
355           EqualsToken(Token::Type::RFC822_ADDRESS, "Foo B(a)r,Baz@g.co"),
356           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Foo"),
357           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "B"),
358           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
359           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "r"),
360           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Baz"),
361           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
362           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "co"),
363           EqualsToken(Token::Type::RFC822_TOKEN, "<easy@google.com>"),
364           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "easy"),
365           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
366           EqualsToken(Token::Type::RFC822_ADDRESS, "easy@google.com"),
367           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "easy"),
368           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
369           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
370 }
371 
TEST(Rfc822TokenizerTest,FromHtmlBugs)372 TEST(Rfc822TokenizerTest, FromHtmlBugs) {
373   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
374   // This input used to cause HTML parsing exception. We don't do HTML parsing
375   // any more (b/8388100) so we are just checking that it does not crash and
376   // that it retains the input.
377 
378   // http://b/8988210. Put crashing string "&\r" x 100 into name and comment
379   // field of rfc822 token.
380 
381   std::string s("\"");
382   for (int i = 0; i < 100; i++) {
383     s.append("&\r");
384   }
385   s.append("\" (");
386   for (int i = 0; i < 100; i++) {
387     s.append("&\r");
388   }
389   s.append(") <foo@google.com>");
390 
391   // It shouldn't change anything
392   EXPECT_THAT(
393       rfc822_tokenizer.TokenizeAll(s),
394       IsOkAndHolds(ElementsAre(
395           EqualsToken(Token::Type::RFC822_TOKEN, s),
396           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
397           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
398           EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
399           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
400           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
401           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
402 }
403 
TEST(Rfc822TokenizerTest,EmptyComponentsTest)404 TEST(Rfc822TokenizerTest, EmptyComponentsTest) {
405   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
406   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(""),
407               IsOkAndHolds(testing::IsEmpty()));
408 
409   // Name is considered the address if address is empty.
410   EXPECT_THAT(
411       rfc822_tokenizer.TokenizeAll("name<>"),
412       IsOkAndHolds(ElementsAre(
413           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
414           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
415           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
416           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
417 
418   // Empty name and address means that there is no token.
419   EXPECT_THAT(
420       rfc822_tokenizer.TokenizeAll("(a long comment with nothing else)"),
421       IsOkAndHolds(
422           ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN,
423                                   "(a long comment with nothing else)"),
424                       EqualsToken(Token::Type::RFC822_COMMENT, "a"),
425                       EqualsToken(Token::Type::RFC822_COMMENT, "long"),
426                       EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
427                       EqualsToken(Token::Type::RFC822_COMMENT, "with"),
428                       EqualsToken(Token::Type::RFC822_COMMENT, "nothing"),
429                       EqualsToken(Token::Type::RFC822_COMMENT, "else"))));
430 
431   EXPECT_THAT(
432       rfc822_tokenizer.TokenizeAll("name ()"),
433       IsOkAndHolds(ElementsAre(
434           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
435           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
436           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
437           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
438 
439   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((comment) "")"),
440               IsOkAndHolds(ElementsAre(
441                   EqualsToken(Token::Type::RFC822_TOKEN, "(comment) \"\""),
442                   EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
443 }
444 
TEST(Rfc822TokenizerTest,NameTest)445 TEST(Rfc822TokenizerTest, NameTest) {
446   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
447 
448   // Name spread between address or comment.
449   EXPECT_THAT(
450       rfc822_tokenizer.TokenizeAll("peanut <address> butter"),
451       IsOkAndHolds(ElementsAre(
452           EqualsToken(Token::Type::RFC822_TOKEN, "peanut <address> butter"),
453           EqualsToken(Token::Type::RFC822_NAME, "peanut"),
454           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
455           EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
456           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
457           EqualsToken(Token::Type::RFC822_NAME, "butter"))));
458 
459   EXPECT_THAT(
460       rfc822_tokenizer.TokenizeAll("peanut (comment) butter"),
461       IsOkAndHolds(ElementsAre(
462           EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
463           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
464           EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
465           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
466           EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
467           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
468           EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
469           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
470           EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
471           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
472 
473   // Dropping quotes when they're not needed.
474   std::string s = R"(peanut <address> "butter")";
475   EXPECT_THAT(
476       rfc822_tokenizer.TokenizeAll(s),
477       IsOkAndHolds(ElementsAre(
478           EqualsToken(Token::Type::RFC822_TOKEN, s),
479           EqualsToken(Token::Type::RFC822_NAME, "peanut"),
480           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
481           EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
482           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
483           EqualsToken(Token::Type::RFC822_NAME, "butter"))));
484 
485   s = R"(peanut "butter")";
486   EXPECT_THAT(
487       rfc822_tokenizer.TokenizeAll(s),
488       IsOkAndHolds(ElementsAre(
489           EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
490           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
491           EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
492           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
493           EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
494           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
495           EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
496           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
497   // Adding quotes when they are needed.
498   EXPECT_THAT(
499       rfc822_tokenizer.TokenizeAll("ple@se quote this <addr>"),
500       IsOkAndHolds(ElementsAre(
501           EqualsToken(Token::Type::RFC822_TOKEN, "ple@se quote this <addr>"),
502           EqualsToken(Token::Type::RFC822_NAME, "ple"),
503           EqualsToken(Token::Type::RFC822_NAME, "se"),
504           EqualsToken(Token::Type::RFC822_NAME, "quote"),
505           EqualsToken(Token::Type::RFC822_NAME, "this"),
506           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
507           EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
508           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
509 }
510 
TEST(Rfc822TokenizerTest,CommentEscapeTest)511 TEST(Rfc822TokenizerTest, CommentEscapeTest) {
512   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
513   // '(', ')', '\\' chars should be escaped. All other escaped chars should be
514   // unescaped.
515   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((co\)mm\\en\(t))"),
516               IsOkAndHolds(ElementsAre(
517                   EqualsToken(Token::Type::RFC822_TOKEN, R"((co\)mm\\en\(t))"),
518                   EqualsToken(Token::Type::RFC822_COMMENT, "co"),
519                   EqualsToken(Token::Type::RFC822_COMMENT, "mm"),
520                   EqualsToken(Token::Type::RFC822_COMMENT, "en"),
521                   EqualsToken(Token::Type::RFC822_COMMENT, "t"))));
522 
523   EXPECT_THAT(
524       rfc822_tokenizer.TokenizeAll(R"((c\om\ment) name)"),
525       IsOkAndHolds(ElementsAre(
526           EqualsToken(Token::Type::RFC822_TOKEN, R"(c\om\ment)"),
527           EqualsToken(Token::Type::RFC822_COMMENT, R"(c\om\ment)"),
528           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
529           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
530           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
531           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
532 
533   EXPECT_THAT(
534       rfc822_tokenizer.TokenizeAll(R"((co(m\))ment) name)"),
535       IsOkAndHolds(ElementsAre(
536           EqualsToken(Token::Type::RFC822_TOKEN, R"(co(m\))ment)"),
537           EqualsToken(Token::Type::RFC822_COMMENT, "co"),
538           EqualsToken(Token::Type::RFC822_COMMENT, "m"),
539           EqualsToken(Token::Type::RFC822_COMMENT, "ment"),
540           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
541           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
542           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
543           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
544 }
545 
TEST(Rfc822TokenizerTest,QuoteEscapeTest)546 TEST(Rfc822TokenizerTest, QuoteEscapeTest) {
547   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
548   // All names that include non-alphanumeric chars must be quoted and have '\\'
549   // and '"' chars escaped.
550   EXPECT_THAT(
551       rfc822_tokenizer.TokenizeAll(R"(n\\a\me <addr>)"),
552       IsOkAndHolds(ElementsAre(
553           EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\me <addr>)"),
554           EqualsToken(Token::Type::RFC822_NAME, "n"),
555           EqualsToken(Token::Type::RFC822_NAME, "a"),
556           EqualsToken(Token::Type::RFC822_NAME, "me"),
557           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
558           EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
559           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
560 
561   // Names that are within quotes should have all characters blindly unescaped.
562   // When a name is made into an address, it isn't re-escaped.
563   EXPECT_THAT(
564       rfc822_tokenizer.TokenizeAll(R"("n\\a\m\"e")"),
565       // <n\am"e>
566       IsOkAndHolds(ElementsAre(
567           EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\m\"e)"),
568           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "n"),
569           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a\\m"),
570           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "e"),
571           EqualsToken(Token::Type::RFC822_ADDRESS, R"(n\\a\m\"e)"),
572           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(n\\a\m\"e)"))));
573 }
574 
TEST(Rfc822TokenizerTest,UnterminatedComponentTest)575 TEST(Rfc822TokenizerTest, UnterminatedComponentTest) {
576   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
577 
578   EXPECT_THAT(
579       rfc822_tokenizer.TokenizeAll("name (comment"),
580       IsOkAndHolds(ElementsAre(
581           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
582           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
583           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
584           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
585           EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
586           EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
587 
588   EXPECT_THAT(
589       rfc822_tokenizer.TokenizeAll(R"(half of "the name)"),
590       IsOkAndHolds(ElementsAre(
591           EqualsToken(Token::Type::RFC822_TOKEN, "half"),
592           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "half"),
593           EqualsToken(Token::Type::RFC822_ADDRESS, "half"),
594           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "half"),
595           EqualsToken(Token::Type::RFC822_TOKEN, "of"),
596           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "of"),
597           EqualsToken(Token::Type::RFC822_ADDRESS, "of"),
598           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "of"),
599           EqualsToken(Token::Type::RFC822_TOKEN, "the name"),
600           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "the"),
601           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
602           EqualsToken(Token::Type::RFC822_ADDRESS, "the name"),
603           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "the name"))));
604 
605   EXPECT_THAT(
606       rfc822_tokenizer.TokenizeAll(R"("name\)"),
607       IsOkAndHolds(ElementsAre(
608           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
609           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
610           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
611           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
612 
613   EXPECT_THAT(
614       rfc822_tokenizer.TokenizeAll(R"(name (comment\)"),
615       IsOkAndHolds(ElementsAre(
616           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
617           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
618           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
619           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
620           EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
621           EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
622 
623   EXPECT_THAT(
624       rfc822_tokenizer.TokenizeAll(R"(<addr> "name\)"),
625       IsOkAndHolds(ElementsAre(
626           EqualsToken(Token::Type::RFC822_TOKEN, "<addr> \"name\\"),
627           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
628           EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
629           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"),
630           EqualsToken(Token::Type::RFC822_NAME, "name"))));
631 
632   EXPECT_THAT(
633       rfc822_tokenizer.TokenizeAll(R"(name (comment\))"),
634       IsOkAndHolds(ElementsAre(
635           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
636           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
637           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
638           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
639           EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
640           EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
641 }
642 
TEST(Rfc822TokenizerTest,Tokenize)643 TEST(Rfc822TokenizerTest, Tokenize) {
644   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
645 
646   std::string text =
647       R"raw("Berg" (home) <berg\@google.com>, tom\@google.com (work))raw";
648   EXPECT_THAT(
649       rfc822_tokenizer.TokenizeAll(text),
650       IsOkAndHolds(ElementsAre(
651           EqualsToken(Token::Type::RFC822_TOKEN,
652                       R"("Berg" (home) <berg\@google.com>)"),
653           EqualsToken(Token::Type::RFC822_NAME, "Berg"),
654           EqualsToken(Token::Type::RFC822_COMMENT, "home"),
655           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "berg\\"),
656           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
657           EqualsToken(Token::Type::RFC822_ADDRESS, "berg\\@google.com"),
658           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "berg"),
659           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
660           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
661           EqualsToken(Token::Type::RFC822_TOKEN, "tom\\@google.com"),
662           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"),
663           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
664           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
665           EqualsToken(Token::Type::RFC822_ADDRESS, "tom\\@google.com"),
666           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom\\@google.com"),
667           EqualsToken(Token::Type::RFC822_TOKEN, "work"),
668           EqualsToken(Token::Type::RFC822_COMMENT, "work"))));
669 
670   text = R"raw(Foo Bar (something) <foo\@google.com>, )raw"
671          R"raw(blah\@google.com (something))raw";
672   EXPECT_THAT(
673       rfc822_tokenizer.TokenizeAll(text),
674       IsOkAndHolds(ElementsAre(
675           EqualsToken(Token::Type::RFC822_TOKEN,
676                       "Foo Bar (something) <foo\\@google.com>"),
677           EqualsToken(Token::Type::RFC822_NAME, "Foo"),
678           EqualsToken(Token::Type::RFC822_NAME, "Bar"),
679           EqualsToken(Token::Type::RFC822_COMMENT, "something"),
680           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo\\"),
681           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
682           EqualsToken(Token::Type::RFC822_ADDRESS, "foo\\@google.com"),
683           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
684           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
685           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
686           EqualsToken(Token::Type::RFC822_TOKEN, "blah\\@google.com"),
687           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "blah"),
688           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
689           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
690           EqualsToken(Token::Type::RFC822_ADDRESS, "blah\\@google.com"),
691           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "blah\\@google.com"),
692           EqualsToken(Token::Type::RFC822_TOKEN, "something"),
693           EqualsToken(Token::Type::RFC822_COMMENT, "something"))));
694 }
695 
TEST(Rfc822TokenizerTest,EdgeCases)696 TEST(Rfc822TokenizerTest, EdgeCases) {
697   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
698 
699   // Text to trigger the scenario where you have a non-alphabetic followed
700   // by a \ followed by non alphabetic to end an in-address token.
701   std::string text = R"raw(<be.\&rg@google.com>)raw";
702   EXPECT_THAT(
703       rfc822_tokenizer.TokenizeAll(text),
704       IsOkAndHolds(ElementsAre(
705           EqualsToken(Token::Type::RFC822_TOKEN,
706                       R"raw(<be.\&rg@google.com>)raw"),
707           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "be.\\&rg"),
708           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
709           EqualsToken(Token::Type::RFC822_ADDRESS, "be.\\&rg@google.com"),
710           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "be"),
711           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "rg"),
712           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
713           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
714 
715   // A \ followed by an alphabetic shouldn't end the token.
716   text = "<a\\lex@google.com>";
717   EXPECT_THAT(
718       rfc822_tokenizer.TokenizeAll(text),
719       IsOkAndHolds(ElementsAre(
720           EqualsToken(Token::Type::RFC822_TOKEN, "<a\\lex@google.com>"),
721           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a\\lex"),
722           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
723           EqualsToken(Token::Type::RFC822_ADDRESS, "a\\lex@google.com"),
724           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a\\lex"),
725           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
726           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
727 
728   // \\ or \" in a quoted section.
729   text = R"("al\\ex@goo\"<idk>gle.com")";
730   EXPECT_THAT(
731       rfc822_tokenizer.TokenizeAll(text),
732       IsOkAndHolds(ElementsAre(
733           EqualsToken(Token::Type::RFC822_TOKEN, R"(al\\ex@goo\"<idk>gle.com)"),
734           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "al"),
735           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "ex"),
736           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "goo"),
737           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "idk"),
738           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gle"),
739           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
740           EqualsToken(Token::Type::RFC822_ADDRESS,
741                       R"(al\\ex@goo\"<idk>gle.com)"),
742           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "al\\\\ex"),
743           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "goo\\\"<idk>gle.com"))));
744 
745   text = "<alex@google.com";
746   EXPECT_THAT(
747       rfc822_tokenizer.TokenizeAll(text),
748       IsOkAndHolds(ElementsAre(
749           EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com"),
750           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
751           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
752           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
753           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
754           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
755           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
756 }
757 
TEST(Rfc822TokenizerTest,NumberInAddress)758 TEST(Rfc822TokenizerTest, NumberInAddress) {
759   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
760   std::string text = "<3alex@google.com>";
761   EXPECT_THAT(
762       rfc822_tokenizer.TokenizeAll(text),
763       IsOkAndHolds(ElementsAre(
764           EqualsToken(Token::Type::RFC822_TOKEN, "<3alex@google.com>"),
765           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "3alex"),
766           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
767           EqualsToken(Token::Type::RFC822_ADDRESS, "3alex@google.com"),
768           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "3alex"),
769           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
770           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
771 }
772 
TEST(Rfc822TokenizerTest,DoubleQuoteDoubleSlash)773 TEST(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
774   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
775   std::string text = R"("alex\"")";
776   EXPECT_THAT(
777       rfc822_tokenizer.TokenizeAll(text),
778       IsOkAndHolds(ElementsAre(
779           EqualsToken(Token::Type::RFC822_TOKEN, "alex"),
780           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
781           EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
782           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"))));
783 
784   text = R"("alex\\\a")";
785   EXPECT_THAT(
786       rfc822_tokenizer.TokenizeAll(text),
787       IsOkAndHolds(ElementsAre(
788           EqualsToken(Token::Type::RFC822_TOKEN, R"(alex\\\a)"),
789           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
790           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
791           EqualsToken(Token::Type::RFC822_ADDRESS, R"(alex\\\a)"),
792           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(alex\\\a)"))));
793 }
794 
795 TEST(Rfc822TokenizerTest, TwoEmails) {
796   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
797   std::string text = "tjbarron@google.com alexsav@google.com";
798   EXPECT_THAT(
799       rfc822_tokenizer.TokenizeAll(text),
800       IsOkAndHolds(ElementsAre(
801           EqualsToken(Token::Type::RFC822_TOKEN, "tjbarron@google.com"),
802           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tjbarron"),
803           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
804           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
805           EqualsToken(Token::Type::RFC822_ADDRESS, "tjbarron@google.com"),
806           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tjbarron"),
807           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
808           EqualsToken(Token::Type::RFC822_TOKEN, "alexsav@google.com"),
809           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
810           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
811           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
812           EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@google.com"),
813           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
814           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
815 }
816 
817 TEST(Rfc822TokenizerTest, BackSlashes) {
818   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
819   std::string text = R"("\name")";
820   EXPECT_THAT(
821       rfc822_tokenizer.TokenizeAll(text),
822       IsOkAndHolds(ElementsAre(
823           EqualsToken(Token::Type::RFC822_TOKEN, "name"),
824           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
825           EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
826           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
827 
828   text = R"("name@foo\@gmail")";
829   EXPECT_THAT(
830       rfc822_tokenizer.TokenizeAll(text),
831       IsOkAndHolds(ElementsAre(
832           EqualsToken(Token::Type::RFC822_TOKEN, "name@foo\\@gmail"),
833           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "name"),
834           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
835           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
836           EqualsToken(Token::Type::RFC822_ADDRESS, "name@foo\\@gmail"),
837           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "name"),
838           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo\\@gmail"))));
839 }
840 
841 TEST(Rfc822TokenizerTest, BigWhitespace) {
842   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
843   std::string text = "\"quoted\"              <address>";
844   EXPECT_THAT(
845       rfc822_tokenizer.TokenizeAll(text),
846       IsOkAndHolds(ElementsAre(
847           EqualsToken(Token::Type::RFC822_TOKEN, text),
848           EqualsToken(Token::Type::RFC822_NAME, "quoted"),
849           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
850           EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
851           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"))));
852 }
853 
TEST(Rfc822TokenizerTest,AtSignFirst)854 TEST(Rfc822TokenizerTest, AtSignFirst) {
855   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
856   std::string text = "\"@foo\"";
857   EXPECT_THAT(
858       rfc822_tokenizer.TokenizeAll(text),
859       IsOkAndHolds(ElementsAre(
860           EqualsToken(Token::Type::RFC822_TOKEN, "foo"),
861           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
862           EqualsToken(Token::Type::RFC822_ADDRESS, "foo"),
863           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo"))));
864 }
865 
TEST(Rfc822TokenizerTest,SlashThenUnicode)866 TEST(Rfc822TokenizerTest, SlashThenUnicode) {
867   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
868   std::string text = R"("quoted\你cjk")";
869   EXPECT_THAT(
870       rfc822_tokenizer.TokenizeAll(text),
871       IsOkAndHolds(ElementsAre(
872           EqualsToken(Token::Type::RFC822_TOKEN, "quoted\\你cjk"),
873           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST,
874                       "quoted\\你cjk"),
875           EqualsToken(Token::Type::RFC822_ADDRESS, "quoted\\你cjk"),
876           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "quoted\\你cjk"))));
877 }
878 
TEST(Rfc822TokenizerTest,AddressEmptyAddress)879 TEST(Rfc822TokenizerTest, AddressEmptyAddress) {
880   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
881   std::string text = "<address> <> Name";
882   EXPECT_THAT(
883       rfc822_tokenizer.TokenizeAll(text),
884       IsOkAndHolds(ElementsAre(
885           EqualsToken(Token::Type::RFC822_TOKEN, text),
886           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
887           EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
888           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
889           EqualsToken(Token::Type::RFC822_NAME, "Name"))));
890 }
891 
TEST(Rfc822TokenizerTest,ProperComment)892 TEST(Rfc822TokenizerTest, ProperComment) {
893   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
894   std::string text = "(comment)alex@google.com";
895   EXPECT_THAT(
896       rfc822_tokenizer.TokenizeAll(text),
897       IsOkAndHolds(ElementsAre(
898           EqualsToken(Token::Type::RFC822_TOKEN, "comment)alex@google.com"),
899           EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
900           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
901           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
902           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
903           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
904           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
905           EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
906 }
907 
TEST(Rfc822TokenizerTest,SmallNameToEmail)908 TEST(Rfc822TokenizerTest, SmallNameToEmail) {
909   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
910   std::string text = "a@g.c,b@g.c";
911   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
912               IsOkAndHolds(ElementsAre(
913                   EqualsToken(Token::Type::RFC822_TOKEN, "a@g.c"),
914                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
915                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
916                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
917                   EqualsToken(Token::Type::RFC822_ADDRESS, "a@g.c"),
918                   EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
919                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"),
920                   EqualsToken(Token::Type::RFC822_TOKEN, "b@g.c"),
921                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "b"),
922                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
923                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
924                   EqualsToken(Token::Type::RFC822_ADDRESS, "b@g.c"),
925                   EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "b"),
926                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
927 
928   text = "a\\\\@g.c";
929   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
930               IsOkAndHolds(ElementsAre(
931                   EqualsToken(Token::Type::RFC822_TOKEN, "a\\\\@g.c"),
932                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
933                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
934                   EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
935                   EqualsToken(Token::Type::RFC822_ADDRESS, "a\\\\@g.c"),
936                   EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
937                   EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
938 }
939 
TEST(Rfc822TokenizerTest,AtSignLast)940 TEST(Rfc822TokenizerTest, AtSignLast) {
941   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
942   std::string_view text("<alex@>, tim@");
943   EXPECT_THAT(
944       rfc822_tokenizer.TokenizeAll(text),
945       IsOkAndHolds(ElementsAre(
946           EqualsToken(Token::Type::RFC822_TOKEN, "<alex@>"),
947           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
948           EqualsToken(Token::Type::RFC822_ADDRESS, "alex@"),
949           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
950           EqualsToken(Token::Type::RFC822_TOKEN, "tim"),
951           EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tim"),
952           EqualsToken(Token::Type::RFC822_ADDRESS, "tim"),
953           EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tim"))));
954 }
955 
TEST(Rfc822TokenizerTest,Commas)956 TEST(Rfc822TokenizerTest, Commas) {
957   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
958   std::string text = ",,,,,,,,,,,,,,,,,,,,,,,,,,;";
959   EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text), IsOkAndHolds(IsEmpty()));
960 }
961 
TEST(Rfc822TokenizerTest,ResetToTokenStartingAfter)962 TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
963   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
964   std::string text = "a@g.c,b@g.c";
965   auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
966   ASSERT_TRUE(token_iterator->Advance());
967   ASSERT_TRUE(token_iterator->Advance());
968 
969   ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
970   EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
971 
972   ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(5));
973   EXPECT_THAT(token_iterator->GetTokens().at(0).text, "b@g.c");
974 
975   ASSERT_FALSE(token_iterator->ResetToTokenStartingAfter(6));
976 }
977 
TEST(Rfc822TokenizerTest,ResetToTokenEndingBefore)978 TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
979   Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
980   std::string text = "a@g.c,b@g.c";
981   auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
982   token_iterator->Advance();
983 
984   ASSERT_TRUE(token_iterator->ResetToTokenEndingBefore(5));
985   EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
986 
987   ASSERT_FALSE(token_iterator->ResetToTokenEndingBefore(4));
988 }
989 
990 }  // namespace
991 }  // namespace lib
992 }  // namespace icing
993