1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/tokenization/rfc822-tokenizer.h"
16
17 #include <memory>
18 #include <string>
19 #include <string_view>
20
21 #include "gmock/gmock.h"
22 #include "gtest/gtest.h"
23 #include "icing/testing/common-matchers.h"
24
25 namespace icing {
26 namespace lib {
27 namespace {
28 using ::testing::ElementsAre;
29 using ::testing::IsEmpty;
30
TEST(Rfc822TokenizerTest,StartingState)31 TEST(Rfc822TokenizerTest, StartingState) {
32 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
33 std::string text = "a@g.c";
34 auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
35
36 ASSERT_THAT(token_iterator->GetTokens(), IsEmpty());
37 ASSERT_TRUE(token_iterator->Advance());
38 ASSERT_THAT(token_iterator->GetTokens(), Not(IsEmpty()));
39 }
40
TEST(Rfc822TokenizerTest,EmptyMiddleToken)41 TEST(Rfc822TokenizerTest, EmptyMiddleToken) {
42 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
43
44 std::string s("<alex>,,<tom>");
45
46 EXPECT_THAT(
47 rfc822_tokenizer.TokenizeAll(s),
48 IsOkAndHolds(ElementsAre(
49 EqualsToken(Token::Type::RFC822_TOKEN, "<alex>"),
50 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"),
51 EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
52 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
53 EqualsToken(Token::Type::RFC822_TOKEN, "<tom>"),
54 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom"),
55 EqualsToken(Token::Type::RFC822_ADDRESS, "tom"),
56 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"))));
57 }
58
TEST(Rfc822TokenizerTest,Simple)59 TEST(Rfc822TokenizerTest, Simple) {
60 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
61
62 std::string_view s("<你alex@google.com>");
63
64 EXPECT_THAT(
65 rfc822_tokenizer.TokenizeAll(s),
66 IsOkAndHolds(ElementsAre(
67 EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
68 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
69 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
70 EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
71 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
72 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
73 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
74 }
75
TEST(Rfc822TokenizerTest,Small)76 TEST(Rfc822TokenizerTest, Small) {
77 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
78
79 std::string s = "\"a\"";
80
81 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
82 IsOkAndHolds(ElementsAre(
83 EqualsToken(Token::Type::RFC822_TOKEN, "a"),
84 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
85 EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
86 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"))));
87
88 s = "\"a\", \"b\"";
89
90 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
91 IsOkAndHolds(ElementsAre(
92 EqualsToken(Token::Type::RFC822_TOKEN, "a"),
93 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
94 EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
95 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"),
96 EqualsToken(Token::Type::RFC822_TOKEN, "b"),
97 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "b"),
98 EqualsToken(Token::Type::RFC822_ADDRESS, "b"),
99 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "b"))));
100
101 s = "(a)";
102
103 EXPECT_THAT(
104 rfc822_tokenizer.TokenizeAll(s),
105 IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN, "(a)"),
106 EqualsToken(Token::Type::RFC822_COMMENT, "a"))));
107 }
108
TEST(Rfc822TokenizerTest,PB)109 TEST(Rfc822TokenizerTest, PB) {
110 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
111
112 std::string_view s("peanut (comment) butter, <alex@google.com>");
113
114 EXPECT_THAT(
115 rfc822_tokenizer.TokenizeAll(s),
116 IsOkAndHolds(ElementsAre(
117 EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
118 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
119 EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
120 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
121 EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
122 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
123 EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
124 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
125 EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
126 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"),
127 EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com>"),
128 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
129 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
130 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
131 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
132 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
133 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
134 }
135
TEST(Rfc822TokenizerTest,NoBrackets)136 TEST(Rfc822TokenizerTest, NoBrackets) {
137 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
138
139 std::string_view s("alex@google.com");
140
141 EXPECT_THAT(
142 rfc822_tokenizer.TokenizeAll(s),
143 IsOkAndHolds(ElementsAre(
144 EqualsToken(Token::Type::RFC822_TOKEN, "alex@google.com"),
145 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
146 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
147 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
148 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
149 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
150 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
151 }
152
TEST(Rfc822TokenizerTest,TwoAddresses)153 TEST(Rfc822TokenizerTest, TwoAddresses) {
154 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
155
156 std::string_view s("<你alex@google.com>; <alexsav@gmail.com>");
157
158 EXPECT_THAT(
159 rfc822_tokenizer.TokenizeAll(s),
160 IsOkAndHolds(ElementsAre(
161 EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
162 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
163 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
164 EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
165 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
166 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
167 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
168 EqualsToken(Token::Type::RFC822_TOKEN, "<alexsav@gmail.com>"),
169 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
170 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "gmail.com"),
171 EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@gmail.com"),
172 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
173 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
174 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
175 }
176
TEST(Rfc822TokenizerTest,Comment)177 TEST(Rfc822TokenizerTest, Comment) {
178 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
179
180 std::string_view s("(a comment) <alex@google.com>");
181 EXPECT_THAT(
182 rfc822_tokenizer.TokenizeAll(s),
183 IsOkAndHolds(ElementsAre(
184 EqualsToken(Token::Type::RFC822_TOKEN,
185 "(a comment) <alex@google.com>"),
186 EqualsToken(Token::Type::RFC822_COMMENT, "a"),
187 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
188 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
189 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
190 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
191 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
192 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
193 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
194 }
195
TEST(Rfc822TokenizerTest,NameAndComment)196 TEST(Rfc822TokenizerTest, NameAndComment) {
197 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
198
199 std::string_view s("\"a name\" also a name <alex@google.com>");
200 EXPECT_THAT(
201 rfc822_tokenizer.TokenizeAll(s),
202 IsOkAndHolds(ElementsAre(
203 EqualsToken(Token::Type::RFC822_TOKEN,
204 "\"a name\" also a name <alex@google.com>"),
205 EqualsToken(Token::Type::RFC822_NAME, "a"),
206 EqualsToken(Token::Type::RFC822_NAME, "name"),
207 EqualsToken(Token::Type::RFC822_NAME, "also"),
208 EqualsToken(Token::Type::RFC822_NAME, "a"),
209 EqualsToken(Token::Type::RFC822_NAME, "name"),
210 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
211 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
212 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
213 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
214 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
215 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
216 }
217
218 // Test from tokenizer_test.cc.
TEST(Rfc822TokenizerTest,Rfc822SanityCheck)219 TEST(Rfc822TokenizerTest, Rfc822SanityCheck) {
220 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
221
222 std::string addr1("A name (A comment) <address@domain.com>");
223 std::string addr2(
224 "\"(Another name)\" (A different comment) "
225 "<bob-loblaw@foo.bar.com>");
226 std::string addr3("<no.at.sign.present>");
227 std::string addr4("<double@at@signs.present>");
228 std::string rfc822 = addr1 + ", " + addr2 + ", " + addr3 + ", " + addr4;
229 EXPECT_THAT(
230 rfc822_tokenizer.TokenizeAll(rfc822),
231 IsOkAndHolds(ElementsAre(
232
233 EqualsToken(Token::Type::RFC822_TOKEN, addr1),
234 EqualsToken(Token::Type::RFC822_NAME, "A"),
235 EqualsToken(Token::Type::RFC822_NAME, "name"),
236 EqualsToken(Token::Type::RFC822_COMMENT, "A"),
237 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
238 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "address"),
239 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "domain.com"),
240 EqualsToken(Token::Type::RFC822_ADDRESS, "address@domain.com"),
241 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "address"),
242 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "domain"),
243 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
244
245 EqualsToken(Token::Type::RFC822_TOKEN, addr2),
246 EqualsToken(Token::Type::RFC822_NAME, "Another"),
247 EqualsToken(Token::Type::RFC822_NAME, "name"),
248 EqualsToken(Token::Type::RFC822_COMMENT, "A"),
249 EqualsToken(Token::Type::RFC822_COMMENT, "different"),
250 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
251 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bob-loblaw"),
252 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo.bar.com"),
253 EqualsToken(Token::Type::RFC822_ADDRESS, "bob-loblaw@foo.bar.com"),
254 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bob"),
255 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "loblaw"),
256 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
257 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "bar"),
258 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
259
260 EqualsToken(Token::Type::RFC822_TOKEN, addr3),
261 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "no.at.sign.present"),
262 EqualsToken(Token::Type::RFC822_ADDRESS, "no.at.sign.present"),
263 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "no"),
264 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "at"),
265 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "sign"),
266 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"),
267
268 EqualsToken(Token::Type::RFC822_TOKEN, addr4),
269 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "double@at"),
270 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "signs.present"),
271 EqualsToken(Token::Type::RFC822_ADDRESS, "double@at@signs.present"),
272 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "double"),
273 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "at"),
274 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "signs"),
275 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"))));
276 }
277
278 // Tests from rfc822 converter.
TEST(Rfc822TokenizerTest,SimpleRfcText)279 TEST(Rfc822TokenizerTest, SimpleRfcText) {
280 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
281 std::string test_string =
282 "foo@google.com,bar@google.com,baz@google.com,foo+hello@google.com,baz@"
283 "corp.google.com";
284
285 EXPECT_THAT(
286 rfc822_tokenizer.TokenizeAll(test_string),
287 IsOkAndHolds(ElementsAre(
288 EqualsToken(Token::Type::RFC822_TOKEN, "foo@google.com"),
289 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
290 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
291 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
292 EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
293 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
294 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
295
296 EqualsToken(Token::Type::RFC822_TOKEN, "bar@google.com"),
297 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bar"),
298 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
299 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
300 EqualsToken(Token::Type::RFC822_ADDRESS, "bar@google.com"),
301 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bar"),
302 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
303
304 EqualsToken(Token::Type::RFC822_TOKEN, "baz@google.com"),
305 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
306 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
307 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
308 EqualsToken(Token::Type::RFC822_ADDRESS, "baz@google.com"),
309 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
310 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
311
312 EqualsToken(Token::Type::RFC822_TOKEN, "foo+hello@google.com"),
313 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
314 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "hello"),
315 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
316 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
317 EqualsToken(Token::Type::RFC822_ADDRESS, "foo+hello@google.com"),
318 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo+hello"),
319 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
320
321 EqualsToken(Token::Type::RFC822_TOKEN, "baz@corp.google.com"),
322 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
323 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "corp"),
324 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
325 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
326 EqualsToken(Token::Type::RFC822_ADDRESS, "baz@corp.google.com"),
327 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
328 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "corp.google.com"))));
329 }
330
TEST(Rfc822TokenizerTest,ComplicatedRfcText)331 TEST(Rfc822TokenizerTest, ComplicatedRfcText) {
332 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
333 std::string test_string =
334 R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>
335 <easy@google.com>)raw";
336
337 EXPECT_THAT(
338 rfc822_tokenizer.TokenizeAll(test_string),
339 IsOkAndHolds(ElementsAre(
340 EqualsToken(
341 Token::Type::RFC822_TOKEN,
342 R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>)raw"),
343 EqualsToken(Token::Type::RFC822_NAME, "Weird"),
344 EqualsToken(Token::Type::RFC822_NAME, "But"),
345 EqualsToken(Token::Type::RFC822_NAME, "Also"),
346 EqualsToken(Token::Type::RFC822_NAME, "Valid"),
347 EqualsToken(Token::Type::RFC822_NAME, "Name"),
348 EqualsToken(Token::Type::RFC822_COMMENT, "With"),
349 EqualsToken(Token::Type::RFC822_COMMENT, "an"),
350 EqualsToken(Token::Type::RFC822_COMMENT, "odd"),
351 EqualsToken(Token::Type::RFC822_COMMENT, "cmt"),
352 EqualsToken(Token::Type::RFC822_COMMENT, "too"),
353 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "Foo B(a)r,Baz"),
354 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.co"),
355 EqualsToken(Token::Type::RFC822_ADDRESS, "Foo B(a)r,Baz@g.co"),
356 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Foo"),
357 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "B"),
358 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
359 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "r"),
360 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Baz"),
361 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
362 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "co"),
363 EqualsToken(Token::Type::RFC822_TOKEN, "<easy@google.com>"),
364 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "easy"),
365 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
366 EqualsToken(Token::Type::RFC822_ADDRESS, "easy@google.com"),
367 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "easy"),
368 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
369 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
370 }
371
TEST(Rfc822TokenizerTest,FromHtmlBugs)372 TEST(Rfc822TokenizerTest, FromHtmlBugs) {
373 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
374 // This input used to cause HTML parsing exception. We don't do HTML parsing
375 // any more (b/8388100) so we are just checking that it does not crash and
376 // that it retains the input.
377
378 // http://b/8988210. Put crashing string "&\r" x 100 into name and comment
379 // field of rfc822 token.
380
381 std::string s("\"");
382 for (int i = 0; i < 100; i++) {
383 s.append("&\r");
384 }
385 s.append("\" (");
386 for (int i = 0; i < 100; i++) {
387 s.append("&\r");
388 }
389 s.append(") <foo@google.com>");
390
391 // It shouldn't change anything
392 EXPECT_THAT(
393 rfc822_tokenizer.TokenizeAll(s),
394 IsOkAndHolds(ElementsAre(
395 EqualsToken(Token::Type::RFC822_TOKEN, s),
396 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
397 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
398 EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
399 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
400 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
401 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
402 }
403
TEST(Rfc822TokenizerTest,EmptyComponentsTest)404 TEST(Rfc822TokenizerTest, EmptyComponentsTest) {
405 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
406 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(""),
407 IsOkAndHolds(testing::IsEmpty()));
408
409 // Name is considered the address if address is empty.
410 EXPECT_THAT(
411 rfc822_tokenizer.TokenizeAll("name<>"),
412 IsOkAndHolds(ElementsAre(
413 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
414 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
415 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
416 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
417
418 // Empty name and address means that there is no token.
419 EXPECT_THAT(
420 rfc822_tokenizer.TokenizeAll("(a long comment with nothing else)"),
421 IsOkAndHolds(
422 ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN,
423 "(a long comment with nothing else)"),
424 EqualsToken(Token::Type::RFC822_COMMENT, "a"),
425 EqualsToken(Token::Type::RFC822_COMMENT, "long"),
426 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
427 EqualsToken(Token::Type::RFC822_COMMENT, "with"),
428 EqualsToken(Token::Type::RFC822_COMMENT, "nothing"),
429 EqualsToken(Token::Type::RFC822_COMMENT, "else"))));
430
431 EXPECT_THAT(
432 rfc822_tokenizer.TokenizeAll("name ()"),
433 IsOkAndHolds(ElementsAre(
434 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
435 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
436 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
437 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
438
439 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((comment) "")"),
440 IsOkAndHolds(ElementsAre(
441 EqualsToken(Token::Type::RFC822_TOKEN, "(comment) \"\""),
442 EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
443 }
444
TEST(Rfc822TokenizerTest,NameTest)445 TEST(Rfc822TokenizerTest, NameTest) {
446 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
447
448 // Name spread between address or comment.
449 EXPECT_THAT(
450 rfc822_tokenizer.TokenizeAll("peanut <address> butter"),
451 IsOkAndHolds(ElementsAre(
452 EqualsToken(Token::Type::RFC822_TOKEN, "peanut <address> butter"),
453 EqualsToken(Token::Type::RFC822_NAME, "peanut"),
454 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
455 EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
456 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
457 EqualsToken(Token::Type::RFC822_NAME, "butter"))));
458
459 EXPECT_THAT(
460 rfc822_tokenizer.TokenizeAll("peanut (comment) butter"),
461 IsOkAndHolds(ElementsAre(
462 EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
463 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
464 EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
465 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
466 EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
467 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
468 EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
469 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
470 EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
471 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
472
473 // Dropping quotes when they're not needed.
474 std::string s = R"(peanut <address> "butter")";
475 EXPECT_THAT(
476 rfc822_tokenizer.TokenizeAll(s),
477 IsOkAndHolds(ElementsAre(
478 EqualsToken(Token::Type::RFC822_TOKEN, s),
479 EqualsToken(Token::Type::RFC822_NAME, "peanut"),
480 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
481 EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
482 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
483 EqualsToken(Token::Type::RFC822_NAME, "butter"))));
484
485 s = R"(peanut "butter")";
486 EXPECT_THAT(
487 rfc822_tokenizer.TokenizeAll(s),
488 IsOkAndHolds(ElementsAre(
489 EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
490 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
491 EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
492 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
493 EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
494 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
495 EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
496 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
497 // Adding quotes when they are needed.
498 EXPECT_THAT(
499 rfc822_tokenizer.TokenizeAll("ple@se quote this <addr>"),
500 IsOkAndHolds(ElementsAre(
501 EqualsToken(Token::Type::RFC822_TOKEN, "ple@se quote this <addr>"),
502 EqualsToken(Token::Type::RFC822_NAME, "ple"),
503 EqualsToken(Token::Type::RFC822_NAME, "se"),
504 EqualsToken(Token::Type::RFC822_NAME, "quote"),
505 EqualsToken(Token::Type::RFC822_NAME, "this"),
506 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
507 EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
508 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
509 }
510
TEST(Rfc822TokenizerTest,CommentEscapeTest)511 TEST(Rfc822TokenizerTest, CommentEscapeTest) {
512 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
513 // '(', ')', '\\' chars should be escaped. All other escaped chars should be
514 // unescaped.
515 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((co\)mm\\en\(t))"),
516 IsOkAndHolds(ElementsAre(
517 EqualsToken(Token::Type::RFC822_TOKEN, R"((co\)mm\\en\(t))"),
518 EqualsToken(Token::Type::RFC822_COMMENT, "co"),
519 EqualsToken(Token::Type::RFC822_COMMENT, "mm"),
520 EqualsToken(Token::Type::RFC822_COMMENT, "en"),
521 EqualsToken(Token::Type::RFC822_COMMENT, "t"))));
522
523 EXPECT_THAT(
524 rfc822_tokenizer.TokenizeAll(R"((c\om\ment) name)"),
525 IsOkAndHolds(ElementsAre(
526 EqualsToken(Token::Type::RFC822_TOKEN, R"(c\om\ment)"),
527 EqualsToken(Token::Type::RFC822_COMMENT, R"(c\om\ment)"),
528 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
529 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
530 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
531 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
532
533 EXPECT_THAT(
534 rfc822_tokenizer.TokenizeAll(R"((co(m\))ment) name)"),
535 IsOkAndHolds(ElementsAre(
536 EqualsToken(Token::Type::RFC822_TOKEN, R"(co(m\))ment)"),
537 EqualsToken(Token::Type::RFC822_COMMENT, "co"),
538 EqualsToken(Token::Type::RFC822_COMMENT, "m"),
539 EqualsToken(Token::Type::RFC822_COMMENT, "ment"),
540 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
541 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
542 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
543 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
544 }
545
TEST(Rfc822TokenizerTest,QuoteEscapeTest)546 TEST(Rfc822TokenizerTest, QuoteEscapeTest) {
547 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
548 // All names that include non-alphanumeric chars must be quoted and have '\\'
549 // and '"' chars escaped.
550 EXPECT_THAT(
551 rfc822_tokenizer.TokenizeAll(R"(n\\a\me <addr>)"),
552 IsOkAndHolds(ElementsAre(
553 EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\me <addr>)"),
554 EqualsToken(Token::Type::RFC822_NAME, "n"),
555 EqualsToken(Token::Type::RFC822_NAME, "a"),
556 EqualsToken(Token::Type::RFC822_NAME, "me"),
557 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
558 EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
559 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
560
561 // Names that are within quotes should have all characters blindly unescaped.
562 // When a name is made into an address, it isn't re-escaped.
563 EXPECT_THAT(
564 rfc822_tokenizer.TokenizeAll(R"("n\\a\m\"e")"),
565 // <n\am"e>
566 IsOkAndHolds(ElementsAre(
567 EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\m\"e)"),
568 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "n"),
569 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a\\m"),
570 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "e"),
571 EqualsToken(Token::Type::RFC822_ADDRESS, R"(n\\a\m\"e)"),
572 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(n\\a\m\"e)"))));
573 }
574
TEST(Rfc822TokenizerTest,UnterminatedComponentTest)575 TEST(Rfc822TokenizerTest, UnterminatedComponentTest) {
576 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
577
578 EXPECT_THAT(
579 rfc822_tokenizer.TokenizeAll("name (comment"),
580 IsOkAndHolds(ElementsAre(
581 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
582 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
583 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
584 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
585 EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
586 EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
587
588 EXPECT_THAT(
589 rfc822_tokenizer.TokenizeAll(R"(half of "the name)"),
590 IsOkAndHolds(ElementsAre(
591 EqualsToken(Token::Type::RFC822_TOKEN, "half"),
592 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "half"),
593 EqualsToken(Token::Type::RFC822_ADDRESS, "half"),
594 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "half"),
595 EqualsToken(Token::Type::RFC822_TOKEN, "of"),
596 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "of"),
597 EqualsToken(Token::Type::RFC822_ADDRESS, "of"),
598 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "of"),
599 EqualsToken(Token::Type::RFC822_TOKEN, "the name"),
600 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "the"),
601 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
602 EqualsToken(Token::Type::RFC822_ADDRESS, "the name"),
603 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "the name"))));
604
605 EXPECT_THAT(
606 rfc822_tokenizer.TokenizeAll(R"("name\)"),
607 IsOkAndHolds(ElementsAre(
608 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
609 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
610 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
611 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
612
613 EXPECT_THAT(
614 rfc822_tokenizer.TokenizeAll(R"(name (comment\)"),
615 IsOkAndHolds(ElementsAre(
616 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
617 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
618 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
619 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
620 EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
621 EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
622
623 EXPECT_THAT(
624 rfc822_tokenizer.TokenizeAll(R"(<addr> "name\)"),
625 IsOkAndHolds(ElementsAre(
626 EqualsToken(Token::Type::RFC822_TOKEN, "<addr> \"name\\"),
627 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
628 EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
629 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"),
630 EqualsToken(Token::Type::RFC822_NAME, "name"))));
631
632 EXPECT_THAT(
633 rfc822_tokenizer.TokenizeAll(R"(name (comment\))"),
634 IsOkAndHolds(ElementsAre(
635 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
636 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
637 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
638 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
639 EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
640 EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
641 }
642
TEST(Rfc822TokenizerTest,Tokenize)643 TEST(Rfc822TokenizerTest, Tokenize) {
644 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
645
646 std::string text =
647 R"raw("Berg" (home) <berg\@google.com>, tom\@google.com (work))raw";
648 EXPECT_THAT(
649 rfc822_tokenizer.TokenizeAll(text),
650 IsOkAndHolds(ElementsAre(
651 EqualsToken(Token::Type::RFC822_TOKEN,
652 R"("Berg" (home) <berg\@google.com>)"),
653 EqualsToken(Token::Type::RFC822_NAME, "Berg"),
654 EqualsToken(Token::Type::RFC822_COMMENT, "home"),
655 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "berg\\"),
656 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
657 EqualsToken(Token::Type::RFC822_ADDRESS, "berg\\@google.com"),
658 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "berg"),
659 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
660 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
661 EqualsToken(Token::Type::RFC822_TOKEN, "tom\\@google.com"),
662 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"),
663 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
664 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
665 EqualsToken(Token::Type::RFC822_ADDRESS, "tom\\@google.com"),
666 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom\\@google.com"),
667 EqualsToken(Token::Type::RFC822_TOKEN, "work"),
668 EqualsToken(Token::Type::RFC822_COMMENT, "work"))));
669
670 text = R"raw(Foo Bar (something) <foo\@google.com>, )raw"
671 R"raw(blah\@google.com (something))raw";
672 EXPECT_THAT(
673 rfc822_tokenizer.TokenizeAll(text),
674 IsOkAndHolds(ElementsAre(
675 EqualsToken(Token::Type::RFC822_TOKEN,
676 "Foo Bar (something) <foo\\@google.com>"),
677 EqualsToken(Token::Type::RFC822_NAME, "Foo"),
678 EqualsToken(Token::Type::RFC822_NAME, "Bar"),
679 EqualsToken(Token::Type::RFC822_COMMENT, "something"),
680 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo\\"),
681 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
682 EqualsToken(Token::Type::RFC822_ADDRESS, "foo\\@google.com"),
683 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
684 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
685 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
686 EqualsToken(Token::Type::RFC822_TOKEN, "blah\\@google.com"),
687 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "blah"),
688 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
689 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
690 EqualsToken(Token::Type::RFC822_ADDRESS, "blah\\@google.com"),
691 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "blah\\@google.com"),
692 EqualsToken(Token::Type::RFC822_TOKEN, "something"),
693 EqualsToken(Token::Type::RFC822_COMMENT, "something"))));
694 }
695
TEST(Rfc822TokenizerTest,EdgeCases)696 TEST(Rfc822TokenizerTest, EdgeCases) {
697 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
698
699 // Text to trigger the scenario where you have a non-alphabetic followed
700 // by a \ followed by non alphabetic to end an in-address token.
701 std::string text = R"raw(<be.\&rg@google.com>)raw";
702 EXPECT_THAT(
703 rfc822_tokenizer.TokenizeAll(text),
704 IsOkAndHolds(ElementsAre(
705 EqualsToken(Token::Type::RFC822_TOKEN,
706 R"raw(<be.\&rg@google.com>)raw"),
707 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "be.\\&rg"),
708 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
709 EqualsToken(Token::Type::RFC822_ADDRESS, "be.\\&rg@google.com"),
710 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "be"),
711 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "rg"),
712 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
713 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
714
715 // A \ followed by an alphabetic shouldn't end the token.
716 text = "<a\\lex@google.com>";
717 EXPECT_THAT(
718 rfc822_tokenizer.TokenizeAll(text),
719 IsOkAndHolds(ElementsAre(
720 EqualsToken(Token::Type::RFC822_TOKEN, "<a\\lex@google.com>"),
721 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a\\lex"),
722 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
723 EqualsToken(Token::Type::RFC822_ADDRESS, "a\\lex@google.com"),
724 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a\\lex"),
725 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
726 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
727
728 // \\ or \" in a quoted section.
729 text = R"("al\\ex@goo\"<idk>gle.com")";
730 EXPECT_THAT(
731 rfc822_tokenizer.TokenizeAll(text),
732 IsOkAndHolds(ElementsAre(
733 EqualsToken(Token::Type::RFC822_TOKEN, R"(al\\ex@goo\"<idk>gle.com)"),
734 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "al"),
735 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "ex"),
736 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "goo"),
737 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "idk"),
738 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gle"),
739 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
740 EqualsToken(Token::Type::RFC822_ADDRESS,
741 R"(al\\ex@goo\"<idk>gle.com)"),
742 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "al\\\\ex"),
743 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "goo\\\"<idk>gle.com"))));
744
745 text = "<alex@google.com";
746 EXPECT_THAT(
747 rfc822_tokenizer.TokenizeAll(text),
748 IsOkAndHolds(ElementsAre(
749 EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com"),
750 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
751 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
752 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
753 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
754 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
755 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
756 }
757
TEST(Rfc822TokenizerTest,NumberInAddress)758 TEST(Rfc822TokenizerTest, NumberInAddress) {
759 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
760 std::string text = "<3alex@google.com>";
761 EXPECT_THAT(
762 rfc822_tokenizer.TokenizeAll(text),
763 IsOkAndHolds(ElementsAre(
764 EqualsToken(Token::Type::RFC822_TOKEN, "<3alex@google.com>"),
765 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "3alex"),
766 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
767 EqualsToken(Token::Type::RFC822_ADDRESS, "3alex@google.com"),
768 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "3alex"),
769 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
770 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
771 }
772
TEST(Rfc822TokenizerTest,DoubleQuoteDoubleSlash)773 TEST(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
774 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
775 std::string text = R"("alex\"")";
776 EXPECT_THAT(
777 rfc822_tokenizer.TokenizeAll(text),
778 IsOkAndHolds(ElementsAre(
779 EqualsToken(Token::Type::RFC822_TOKEN, "alex"),
780 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
781 EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
782 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"))));
783
784 text = R"("alex\\\a")";
785 EXPECT_THAT(
786 rfc822_tokenizer.TokenizeAll(text),
787 IsOkAndHolds(ElementsAre(
788 EqualsToken(Token::Type::RFC822_TOKEN, R"(alex\\\a)"),
789 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
790 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
791 EqualsToken(Token::Type::RFC822_ADDRESS, R"(alex\\\a)"),
792 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(alex\\\a)"))));
793 }
794
795 TEST(Rfc822TokenizerTest, TwoEmails) {
796 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
797 std::string text = "tjbarron@google.com alexsav@google.com";
798 EXPECT_THAT(
799 rfc822_tokenizer.TokenizeAll(text),
800 IsOkAndHolds(ElementsAre(
801 EqualsToken(Token::Type::RFC822_TOKEN, "tjbarron@google.com"),
802 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tjbarron"),
803 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
804 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
805 EqualsToken(Token::Type::RFC822_ADDRESS, "tjbarron@google.com"),
806 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tjbarron"),
807 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
808 EqualsToken(Token::Type::RFC822_TOKEN, "alexsav@google.com"),
809 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
810 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
811 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
812 EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@google.com"),
813 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
814 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
815 }
816
817 TEST(Rfc822TokenizerTest, BackSlashes) {
818 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
819 std::string text = R"("\name")";
820 EXPECT_THAT(
821 rfc822_tokenizer.TokenizeAll(text),
822 IsOkAndHolds(ElementsAre(
823 EqualsToken(Token::Type::RFC822_TOKEN, "name"),
824 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
825 EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
826 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
827
828 text = R"("name@foo\@gmail")";
829 EXPECT_THAT(
830 rfc822_tokenizer.TokenizeAll(text),
831 IsOkAndHolds(ElementsAre(
832 EqualsToken(Token::Type::RFC822_TOKEN, "name@foo\\@gmail"),
833 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "name"),
834 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
835 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
836 EqualsToken(Token::Type::RFC822_ADDRESS, "name@foo\\@gmail"),
837 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "name"),
838 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo\\@gmail"))));
839 }
840
841 TEST(Rfc822TokenizerTest, BigWhitespace) {
842 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
843 std::string text = "\"quoted\" <address>";
844 EXPECT_THAT(
845 rfc822_tokenizer.TokenizeAll(text),
846 IsOkAndHolds(ElementsAre(
847 EqualsToken(Token::Type::RFC822_TOKEN, text),
848 EqualsToken(Token::Type::RFC822_NAME, "quoted"),
849 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
850 EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
851 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"))));
852 }
853
TEST(Rfc822TokenizerTest,AtSignFirst)854 TEST(Rfc822TokenizerTest, AtSignFirst) {
855 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
856 std::string text = "\"@foo\"";
857 EXPECT_THAT(
858 rfc822_tokenizer.TokenizeAll(text),
859 IsOkAndHolds(ElementsAre(
860 EqualsToken(Token::Type::RFC822_TOKEN, "foo"),
861 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
862 EqualsToken(Token::Type::RFC822_ADDRESS, "foo"),
863 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo"))));
864 }
865
TEST(Rfc822TokenizerTest,SlashThenUnicode)866 TEST(Rfc822TokenizerTest, SlashThenUnicode) {
867 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
868 std::string text = R"("quoted\你cjk")";
869 EXPECT_THAT(
870 rfc822_tokenizer.TokenizeAll(text),
871 IsOkAndHolds(ElementsAre(
872 EqualsToken(Token::Type::RFC822_TOKEN, "quoted\\你cjk"),
873 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST,
874 "quoted\\你cjk"),
875 EqualsToken(Token::Type::RFC822_ADDRESS, "quoted\\你cjk"),
876 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "quoted\\你cjk"))));
877 }
878
TEST(Rfc822TokenizerTest,AddressEmptyAddress)879 TEST(Rfc822TokenizerTest, AddressEmptyAddress) {
880 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
881 std::string text = "<address> <> Name";
882 EXPECT_THAT(
883 rfc822_tokenizer.TokenizeAll(text),
884 IsOkAndHolds(ElementsAre(
885 EqualsToken(Token::Type::RFC822_TOKEN, text),
886 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
887 EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
888 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
889 EqualsToken(Token::Type::RFC822_NAME, "Name"))));
890 }
891
TEST(Rfc822TokenizerTest,ProperComment)892 TEST(Rfc822TokenizerTest, ProperComment) {
893 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
894 std::string text = "(comment)alex@google.com";
895 EXPECT_THAT(
896 rfc822_tokenizer.TokenizeAll(text),
897 IsOkAndHolds(ElementsAre(
898 EqualsToken(Token::Type::RFC822_TOKEN, "comment)alex@google.com"),
899 EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
900 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
901 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
902 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
903 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
904 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
905 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
906 }
907
TEST(Rfc822TokenizerTest,SmallNameToEmail)908 TEST(Rfc822TokenizerTest, SmallNameToEmail) {
909 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
910 std::string text = "a@g.c,b@g.c";
911 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
912 IsOkAndHolds(ElementsAre(
913 EqualsToken(Token::Type::RFC822_TOKEN, "a@g.c"),
914 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
915 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
916 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
917 EqualsToken(Token::Type::RFC822_ADDRESS, "a@g.c"),
918 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
919 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"),
920 EqualsToken(Token::Type::RFC822_TOKEN, "b@g.c"),
921 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "b"),
922 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
923 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
924 EqualsToken(Token::Type::RFC822_ADDRESS, "b@g.c"),
925 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "b"),
926 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
927
928 text = "a\\\\@g.c";
929 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
930 IsOkAndHolds(ElementsAre(
931 EqualsToken(Token::Type::RFC822_TOKEN, "a\\\\@g.c"),
932 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
933 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
934 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
935 EqualsToken(Token::Type::RFC822_ADDRESS, "a\\\\@g.c"),
936 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
937 EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
938 }
939
TEST(Rfc822TokenizerTest,AtSignLast)940 TEST(Rfc822TokenizerTest, AtSignLast) {
941 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
942 std::string_view text("<alex@>, tim@");
943 EXPECT_THAT(
944 rfc822_tokenizer.TokenizeAll(text),
945 IsOkAndHolds(ElementsAre(
946 EqualsToken(Token::Type::RFC822_TOKEN, "<alex@>"),
947 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
948 EqualsToken(Token::Type::RFC822_ADDRESS, "alex@"),
949 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
950 EqualsToken(Token::Type::RFC822_TOKEN, "tim"),
951 EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tim"),
952 EqualsToken(Token::Type::RFC822_ADDRESS, "tim"),
953 EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tim"))));
954 }
955
TEST(Rfc822TokenizerTest,Commas)956 TEST(Rfc822TokenizerTest, Commas) {
957 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
958 std::string text = ",,,,,,,,,,,,,,,,,,,,,,,,,,;";
959 EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text), IsOkAndHolds(IsEmpty()));
960 }
961
TEST(Rfc822TokenizerTest,ResetToTokenStartingAfter)962 TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
963 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
964 std::string text = "a@g.c,b@g.c";
965 auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
966 ASSERT_TRUE(token_iterator->Advance());
967 ASSERT_TRUE(token_iterator->Advance());
968
969 ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
970 EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
971
972 ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(5));
973 EXPECT_THAT(token_iterator->GetTokens().at(0).text, "b@g.c");
974
975 ASSERT_FALSE(token_iterator->ResetToTokenStartingAfter(6));
976 }
977
TEST(Rfc822TokenizerTest,ResetToTokenEndingBefore)978 TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
979 Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
980 std::string text = "a@g.c,b@g.c";
981 auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
982 token_iterator->Advance();
983
984 ASSERT_TRUE(token_iterator->ResetToTokenEndingBefore(5));
985 EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
986
987 ASSERT_FALSE(token_iterator->ResetToTokenEndingBefore(4));
988 }
989
990 } // namespace
991 } // namespace lib
992 } // namespace icing
993