• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006-2008 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_tokenizer.h"
6 
7 #include "testing/gmock/include/gmock/gmock.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 
10 using std::string;
11 using testing::Eq;
12 using testing::Optional;
13 
14 namespace base {
15 
16 namespace {
17 
TEST(StringTokenizerTest,Simple)18 TEST(StringTokenizerTest, Simple) {
19   string input = "this is a test";
20   StringTokenizer t(input, " ");
21   // The start of string, before returning any tokens, is considered a
22   // delimiter.
23   EXPECT_TRUE(t.token_is_delim());
24 
25   EXPECT_TRUE(t.GetNext());
26   EXPECT_FALSE(t.token_is_delim());
27   EXPECT_EQ("this", t.token());
28 
29   EXPECT_TRUE(t.GetNext());
30   EXPECT_FALSE(t.token_is_delim());
31   EXPECT_EQ("is", t.token());
32 
33   EXPECT_TRUE(t.GetNext());
34   EXPECT_FALSE(t.token_is_delim());
35   EXPECT_EQ("a", t.token());
36 
37   EXPECT_TRUE(t.GetNext());
38   EXPECT_FALSE(t.token_is_delim());
39   EXPECT_EQ("test", t.token());
40 
41   EXPECT_FALSE(t.GetNext());
42   // The end of string, after the last token tokens, is considered a delimiter.
43   EXPECT_TRUE(t.token_is_delim());
44 }
45 
TEST(StringTokenizerTest,SimpleUsingTokenView)46 TEST(StringTokenizerTest, SimpleUsingTokenView) {
47   string input = "this is a test";
48   StringTokenizer t(input, " ");
49   // The start of string, before returning any tokens, is considered a
50   // delimiter.
51   EXPECT_TRUE(t.token_is_delim());
52 
53   EXPECT_THAT(t.GetNextTokenView(), Optional(Eq("this")));
54   EXPECT_FALSE(t.token_is_delim());
55 
56   EXPECT_THAT(t.GetNextTokenView(), Optional(Eq("is")));
57   EXPECT_FALSE(t.token_is_delim());
58 
59   EXPECT_THAT(t.GetNextTokenView(), Optional(Eq("a")));
60   EXPECT_FALSE(t.token_is_delim());
61 
62   EXPECT_THAT(t.GetNextTokenView(), Optional(Eq("test")));
63   EXPECT_FALSE(t.token_is_delim());
64 
65   EXPECT_THAT(t.GetNextTokenView(), Eq(std::nullopt));
66   // The end of string, after the last token tokens, is considered a delimiter.
67   EXPECT_TRUE(t.token_is_delim());
68 }
69 
TEST(StringTokenizerTest,Reset)70 TEST(StringTokenizerTest, Reset) {
71   string input = "this is a test";
72   StringTokenizer t(input, " ");
73 
74   for (int i = 0; i < 2; ++i) {
75     EXPECT_TRUE(t.token_is_delim());
76 
77     EXPECT_TRUE(t.GetNext());
78     EXPECT_FALSE(t.token_is_delim());
79     EXPECT_EQ("this", t.token());
80 
81     EXPECT_TRUE(t.GetNext());
82     EXPECT_FALSE(t.token_is_delim());
83     EXPECT_EQ("is", t.token());
84 
85     EXPECT_TRUE(t.GetNext());
86     EXPECT_FALSE(t.token_is_delim());
87     EXPECT_EQ("a", t.token());
88 
89     EXPECT_TRUE(t.GetNext());
90     EXPECT_FALSE(t.token_is_delim());
91     EXPECT_EQ("test", t.token());
92 
93     EXPECT_FALSE(t.GetNext());
94     EXPECT_TRUE(t.token_is_delim());
95 
96     t.Reset();
97   }
98 }
99 
TEST(StringTokenizerTest,RetDelims)100 TEST(StringTokenizerTest, RetDelims) {
101   string input = "this is a test";
102   StringTokenizer t(input, " ");
103   t.set_options(StringTokenizer::RETURN_DELIMS);
104   EXPECT_TRUE(t.token_is_delim());
105 
106   EXPECT_TRUE(t.GetNext());
107   EXPECT_FALSE(t.token_is_delim());
108   EXPECT_EQ("this", t.token());
109 
110   EXPECT_TRUE(t.GetNext());
111   EXPECT_TRUE(t.token_is_delim());
112   EXPECT_EQ(" ", t.token());
113 
114   EXPECT_TRUE(t.GetNext());
115   EXPECT_FALSE(t.token_is_delim());
116   EXPECT_EQ("is", t.token());
117 
118   EXPECT_TRUE(t.GetNext());
119   EXPECT_TRUE(t.token_is_delim());
120   EXPECT_EQ(" ", t.token());
121 
122   EXPECT_TRUE(t.GetNext());
123   EXPECT_FALSE(t.token_is_delim());
124   EXPECT_EQ("a", t.token());
125 
126   EXPECT_TRUE(t.GetNext());
127   EXPECT_TRUE(t.token_is_delim());
128   EXPECT_EQ(" ", t.token());
129 
130   EXPECT_TRUE(t.GetNext());
131   EXPECT_FALSE(t.token_is_delim());
132   EXPECT_EQ("test", t.token());
133 
134   EXPECT_FALSE(t.GetNext());
135   EXPECT_TRUE(t.token_is_delim());
136 }
137 
TEST(StringTokenizerTest,RetEmptyTokens)138 TEST(StringTokenizerTest, RetEmptyTokens) {
139   string input = "foo='a, b',,bar,,baz,quux";
140   StringTokenizer t(input, ",");
141   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
142   t.set_quote_chars("'");
143 
144   ASSERT_TRUE(t.GetNext());
145   EXPECT_EQ("foo='a, b'", t.token());
146 
147   ASSERT_TRUE(t.GetNext());
148   EXPECT_EQ("", t.token());
149 
150   ASSERT_TRUE(t.GetNext());
151   EXPECT_EQ("bar", t.token());
152 
153   ASSERT_TRUE(t.GetNext());
154   EXPECT_EQ("", t.token());
155 
156   ASSERT_TRUE(t.GetNext());
157   EXPECT_EQ("baz", t.token());
158 
159   ASSERT_TRUE(t.GetNext());
160   EXPECT_EQ("quux", t.token());
161 
162   EXPECT_FALSE(t.GetNext());
163 }
164 
TEST(StringTokenizerTest,RetEmptyTokens_AtStart)165 TEST(StringTokenizerTest, RetEmptyTokens_AtStart) {
166   string input = ",bar";
167   StringTokenizer t(input, ",");
168   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
169   t.set_quote_chars("'");
170 
171   ASSERT_TRUE(t.GetNext());
172   EXPECT_EQ("", t.token());
173 
174   ASSERT_TRUE(t.GetNext());
175   EXPECT_EQ("bar", t.token());
176 
177   EXPECT_FALSE(t.GetNext());
178 }
179 
TEST(StringTokenizerTest,RetEmptyTokens_AtEnd)180 TEST(StringTokenizerTest, RetEmptyTokens_AtEnd) {
181   string input = "bar,";
182   StringTokenizer t(input, ",");
183   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
184   t.set_quote_chars("'");
185 
186   ASSERT_TRUE(t.GetNext());
187   EXPECT_EQ("bar", t.token());
188 
189   ASSERT_TRUE(t.GetNext());
190   EXPECT_EQ("", t.token());
191 
192   EXPECT_FALSE(t.GetNext());
193 }
194 
TEST(StringTokenizerTest,RetEmptyTokens_Both)195 TEST(StringTokenizerTest, RetEmptyTokens_Both) {
196   string input = ",";
197   StringTokenizer t(input, ",");
198   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
199   t.set_quote_chars("'");
200 
201   ASSERT_TRUE(t.GetNext());
202   EXPECT_EQ("", t.token());
203 
204   ASSERT_TRUE(t.GetNext());
205   EXPECT_EQ("", t.token());
206 
207   EXPECT_FALSE(t.GetNext());
208 }
209 
TEST(StringTokenizerTest,RetEmptyTokens_Empty)210 TEST(StringTokenizerTest, RetEmptyTokens_Empty) {
211   string input = "";
212   StringTokenizer t(input, ",");
213   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
214 
215   ASSERT_TRUE(t.GetNext());
216   EXPECT_EQ("", t.token());
217 
218   EXPECT_FALSE(t.GetNext());
219 }
220 
TEST(StringTokenizerTest,RetDelimsAndEmptyTokens)221 TEST(StringTokenizerTest, RetDelimsAndEmptyTokens) {
222   string input = "foo='a, b',,bar,,baz,quux";
223   StringTokenizer t(input, ",");
224   t.set_options(StringTokenizer::RETURN_DELIMS |
225                 StringTokenizer::RETURN_EMPTY_TOKENS);
226   t.set_quote_chars("'");
227 
228   ASSERT_TRUE(t.GetNext());
229   EXPECT_EQ("foo='a, b'", t.token());
230 
231   ASSERT_TRUE(t.GetNext());
232   EXPECT_EQ(",", t.token());
233 
234   ASSERT_TRUE(t.GetNext());
235   EXPECT_EQ("", t.token());
236 
237   ASSERT_TRUE(t.GetNext());
238   EXPECT_EQ(",", t.token());
239 
240   ASSERT_TRUE(t.GetNext());
241   EXPECT_EQ("bar", t.token());
242 
243   ASSERT_TRUE(t.GetNext());
244   EXPECT_EQ(",", t.token());
245 
246   ASSERT_TRUE(t.GetNext());
247   EXPECT_EQ("", t.token());
248 
249   ASSERT_TRUE(t.GetNext());
250   EXPECT_EQ(",", t.token());
251 
252   ASSERT_TRUE(t.GetNext());
253   EXPECT_EQ("baz", t.token());
254 
255   ASSERT_TRUE(t.GetNext());
256   EXPECT_EQ(",", t.token());
257 
258   ASSERT_TRUE(t.GetNext());
259   EXPECT_EQ("quux", t.token());
260 
261   EXPECT_FALSE(t.GetNext());
262 }
263 
TEST(StringTokenizerTest,ManyDelims)264 TEST(StringTokenizerTest, ManyDelims) {
265   string input = "this: is, a-test";
266   StringTokenizer t(input, ": ,-");
267 
268   EXPECT_TRUE(t.GetNext());
269   EXPECT_EQ("this", t.token());
270 
271   EXPECT_TRUE(t.GetNext());
272   EXPECT_EQ("is", t.token());
273 
274   EXPECT_TRUE(t.GetNext());
275   EXPECT_EQ("a", t.token());
276 
277   EXPECT_TRUE(t.GetNext());
278   EXPECT_EQ("test", t.token());
279 
280   EXPECT_FALSE(t.GetNext());
281 }
282 
TEST(StringTokenizerTest,ParseHeader)283 TEST(StringTokenizerTest, ParseHeader) {
284   string input = "Content-Type: text/html ; charset=UTF-8";
285   StringTokenizer t(input, ": ;=");
286   t.set_options(StringTokenizer::RETURN_DELIMS);
287   EXPECT_TRUE(t.token_is_delim());
288 
289   EXPECT_TRUE(t.GetNext());
290   EXPECT_FALSE(t.token_is_delim());
291   EXPECT_EQ("Content-Type", t.token());
292 
293   EXPECT_TRUE(t.GetNext());
294   EXPECT_TRUE(t.token_is_delim());
295   EXPECT_EQ(":", t.token());
296 
297   EXPECT_TRUE(t.GetNext());
298   EXPECT_TRUE(t.token_is_delim());
299   EXPECT_EQ(" ", t.token());
300 
301   EXPECT_TRUE(t.GetNext());
302   EXPECT_FALSE(t.token_is_delim());
303   EXPECT_EQ("text/html", t.token());
304 
305   EXPECT_TRUE(t.GetNext());
306   EXPECT_TRUE(t.token_is_delim());
307   EXPECT_EQ(" ", t.token());
308 
309   EXPECT_TRUE(t.GetNext());
310   EXPECT_TRUE(t.token_is_delim());
311   EXPECT_EQ(";", t.token());
312 
313   EXPECT_TRUE(t.GetNext());
314   EXPECT_TRUE(t.token_is_delim());
315   EXPECT_EQ(" ", t.token());
316 
317   EXPECT_TRUE(t.GetNext());
318   EXPECT_FALSE(t.token_is_delim());
319   EXPECT_EQ("charset", t.token());
320 
321   EXPECT_TRUE(t.GetNext());
322   EXPECT_TRUE(t.token_is_delim());
323   EXPECT_EQ("=", t.token());
324 
325   EXPECT_TRUE(t.GetNext());
326   EXPECT_FALSE(t.token_is_delim());
327   EXPECT_EQ("UTF-8", t.token());
328 
329   EXPECT_FALSE(t.GetNext());
330   EXPECT_TRUE(t.token_is_delim());
331 }
332 
TEST(StringTokenizerTest,ParseQuotedString)333 TEST(StringTokenizerTest, ParseQuotedString) {
334   string input = "foo bar 'hello world' baz";
335   StringTokenizer t(input, " ");
336   t.set_quote_chars("'");
337 
338   EXPECT_TRUE(t.GetNext());
339   EXPECT_EQ("foo", t.token());
340 
341   EXPECT_TRUE(t.GetNext());
342   EXPECT_EQ("bar", t.token());
343 
344   EXPECT_TRUE(t.GetNext());
345   EXPECT_EQ("'hello world'", t.token());
346 
347   EXPECT_TRUE(t.GetNext());
348   EXPECT_EQ("baz", t.token());
349 
350   EXPECT_FALSE(t.GetNext());
351 }
352 
TEST(StringTokenizerTest,ParseQuotedString_Malformed)353 TEST(StringTokenizerTest, ParseQuotedString_Malformed) {
354   string input = "bar 'hello wo";
355   StringTokenizer t(input, " ");
356   t.set_quote_chars("'");
357 
358   EXPECT_TRUE(t.GetNext());
359   EXPECT_EQ("bar", t.token());
360 
361   EXPECT_TRUE(t.GetNext());
362   EXPECT_EQ("'hello wo", t.token());
363 
364   EXPECT_FALSE(t.GetNext());
365 }
366 
TEST(StringTokenizerTest,ParseQuotedString_Multiple)367 TEST(StringTokenizerTest, ParseQuotedString_Multiple) {
368   string input = "bar 'hel\"lo\" wo' baz\"";
369   StringTokenizer t(input, " ");
370   t.set_quote_chars("'\"");
371 
372   EXPECT_TRUE(t.GetNext());
373   EXPECT_EQ("bar", t.token());
374 
375   EXPECT_TRUE(t.GetNext());
376   EXPECT_EQ("'hel\"lo\" wo'", t.token());
377 
378   EXPECT_TRUE(t.GetNext());
379   EXPECT_EQ("baz\"", t.token());
380 
381   EXPECT_FALSE(t.GetNext());
382 }
383 
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes)384 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes) {
385   string input = "foo 'don\\'t do that'";
386   StringTokenizer t(input, " ");
387   t.set_quote_chars("'");
388 
389   EXPECT_TRUE(t.GetNext());
390   EXPECT_EQ("foo", t.token());
391 
392   EXPECT_TRUE(t.GetNext());
393   EXPECT_EQ("'don\\'t do that'", t.token());
394 
395   EXPECT_FALSE(t.GetNext());
396 }
397 
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes2)398 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes2) {
399   string input = "foo='a, b', bar";
400   StringTokenizer t(input, ", ");
401   t.set_quote_chars("'");
402 
403   EXPECT_TRUE(t.GetNext());
404   EXPECT_EQ("foo='a, b'", t.token());
405 
406   EXPECT_TRUE(t.GetNext());
407   EXPECT_EQ("bar", t.token());
408 
409   EXPECT_FALSE(t.GetNext());
410 }
411 
TEST(StringTokenizerTest,ParseWithWhitespace_NoQuotes)412 TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) {
413   string input = "\t\t\t     foo=a,\r\n b,\r\n\t\t\t      bar\t ";
414   StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
415 
416   EXPECT_TRUE(t.GetNext());
417   EXPECT_EQ("foo=a", t.token());
418 
419   EXPECT_TRUE(t.GetNext());
420   EXPECT_EQ("b", t.token());
421 
422   EXPECT_TRUE(t.GetNext());
423   EXPECT_EQ("bar", t.token());
424 
425   EXPECT_FALSE(t.GetNext());
426 }
427 
TEST(StringTokenizerTest,ParseWithWhitespace_Quotes)428 TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) {
429   string input = "\t\t\t     foo='a, b',\t\t\t      bar\t ";
430   StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
431   t.set_quote_chars("'");
432 
433   EXPECT_TRUE(t.GetNext());
434   EXPECT_EQ("foo='a, b'", t.token());
435 
436   EXPECT_TRUE(t.GetNext());
437   EXPECT_EQ("bar", t.token());
438 
439   EXPECT_FALSE(t.GetNext());
440 }
441 
442 }  // namespace
443 
444 }  // namespace base
445