1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/str_split.h"
16
17 #include <deque>
18 #include <initializer_list>
19 #include <list>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <type_traits>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <vector>
27
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "absl/base/dynamic_annotations.h"
31 #include "absl/base/macros.h"
32 #include "absl/container/flat_hash_map.h"
33 #include "absl/container/node_hash_map.h"
34 #include "absl/strings/numbers.h"
35
36 namespace {
37
38 using ::testing::ElementsAre;
39 using ::testing::Pair;
40 using ::testing::UnorderedElementsAre;
41
TEST(Split,TraitsTest)42 TEST(Split, TraitsTest) {
43 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
44 "");
45 static_assert(
46 !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
47 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
48 std::vector<std::string>>::value,
49 "");
50 static_assert(
51 !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
52 "");
53 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
54 std::vector<absl::string_view>>::value,
55 "");
56 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
57 std::map<std::string, std::string>>::value,
58 "");
59 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
60 std::map<absl::string_view, absl::string_view>>::value,
61 "");
62 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
63 std::map<int, std::string>>::value,
64 "");
65 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
66 std::map<std::string, int>>::value,
67 "");
68 }
69
70 // This tests the overall split API, which is made up of the absl::StrSplit()
71 // function and the Delimiter objects in the absl:: namespace.
72 // This TEST macro is outside of any namespace to require full specification of
73 // namespaces just like callers will need to use.
TEST(Split,APIExamples)74 TEST(Split, APIExamples) {
75 {
76 // Passes string delimiter. Assumes the default of ByString.
77 std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
78 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
79
80 // Equivalent to...
81 using absl::ByString;
82 v = absl::StrSplit("a,b,c", ByString(","));
83 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
84
85 // Equivalent to...
86 EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
87 ElementsAre("a", "b", "c"));
88 }
89
90 {
91 // Same as above, but using a single character as the delimiter.
92 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
93 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
94
95 // Equivalent to...
96 using absl::ByChar;
97 v = absl::StrSplit("a,b,c", ByChar(','));
98 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
99 }
100
101 {
102 // Uses the Literal string "=>" as the delimiter.
103 const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
104 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
105 }
106
107 {
108 // The substrings are returned as string_views, eliminating copying.
109 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
110 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
111 }
112
113 {
114 // Leading and trailing empty substrings.
115 std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
116 EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
117 }
118
119 {
120 // Splits on a delimiter that is not found.
121 std::vector<std::string> v = absl::StrSplit("abc", ',');
122 EXPECT_THAT(v, ElementsAre("abc"));
123 }
124
125 {
126 // Splits the input string into individual characters by using an empty
127 // string as the delimiter.
128 std::vector<std::string> v = absl::StrSplit("abc", "");
129 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
130 }
131
132 {
133 // Splits string data with embedded NUL characters, using NUL as the
134 // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
135 // say that's the empty string when constructing the absl::string_view
136 // delimiter. Instead, a non-empty string containing NUL can be used as the
137 // delimiter.
138 std::string embedded_nulls("a\0b\0c", 5);
139 std::string null_delim("\0", 1);
140 std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
141 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
142 }
143
144 {
145 // Stores first two split strings as the members in a std::pair.
146 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
147 EXPECT_EQ("a", p.first);
148 EXPECT_EQ("b", p.second);
149 // "c" is omitted because std::pair can hold only two elements.
150 }
151
152 {
153 // Results stored in std::set<std::string>
154 std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
155 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
156 }
157
158 {
159 // Uses a non-const char* delimiter.
160 char a[] = ",";
161 char* d = a + 0;
162 std::vector<std::string> v = absl::StrSplit("a,b,c", d);
163 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
164 }
165
166 {
167 // Results split using either of , or ;
168 using absl::ByAnyChar;
169 std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
170 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
171 }
172
173 {
174 // Uses the SkipWhitespace predicate.
175 using absl::SkipWhitespace;
176 std::vector<std::string> v =
177 absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
178 EXPECT_THAT(v, ElementsAre(" a ", "b"));
179 }
180
181 {
182 // Uses the ByLength delimiter.
183 using absl::ByLength;
184 std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
185 EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
186 }
187
188 {
189 // Different forms of initialization / conversion.
190 std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
191 EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
192 std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
193 EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
194 auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
195 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
196 v3 = absl::StrSplit("a,b,c", ',');
197 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
198 }
199
200 {
201 // Results stored in a std::map.
202 std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
203 EXPECT_EQ(2, m.size());
204 EXPECT_EQ("3", m["a"]);
205 EXPECT_EQ("2", m["b"]);
206 }
207
208 {
209 // Results stored in a std::multimap.
210 std::multimap<std::string, std::string> m =
211 absl::StrSplit("a,1,b,2,a,3", ',');
212 EXPECT_EQ(3, m.size());
213 auto it = m.find("a");
214 EXPECT_EQ("1", it->second);
215 ++it;
216 EXPECT_EQ("3", it->second);
217 it = m.find("b");
218 EXPECT_EQ("2", it->second);
219 }
220
221 {
222 // Demonstrates use in a range-based for loop in C++11.
223 std::string s = "x,x,x,x,x,x,x";
224 for (absl::string_view sp : absl::StrSplit(s, ',')) {
225 EXPECT_EQ("x", sp);
226 }
227 }
228
229 {
230 // Demonstrates use with a Predicate in a range-based for loop.
231 using absl::SkipWhitespace;
232 std::string s = " ,x,,x,,x,x,x,,";
233 for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
234 EXPECT_EQ("x", sp);
235 }
236 }
237
238 {
239 // Demonstrates a "smart" split to std::map using two separate calls to
240 // absl::StrSplit. One call to split the records, and another call to split
241 // the keys and values. This also uses the Limit delimiter so that the
242 // std::string "a=b=c" will split to "a" -> "b=c".
243 std::map<std::string, std::string> m;
244 for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
245 m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
246 }
247 EXPECT_EQ("b=c", m.find("a")->second);
248 EXPECT_EQ("e", m.find("d")->second);
249 EXPECT_EQ("", m.find("f")->second);
250 EXPECT_EQ("", m.find("g")->second);
251 }
252 }
253
254 //
255 // Tests for SplitIterator
256 //
257
TEST(SplitIterator,Basics)258 TEST(SplitIterator, Basics) {
259 auto splitter = absl::StrSplit("a,b", ',');
260 auto it = splitter.begin();
261 auto end = splitter.end();
262
263 EXPECT_NE(it, end);
264 EXPECT_EQ("a", *it); // tests dereference
265 ++it; // tests preincrement
266 EXPECT_NE(it, end);
267 EXPECT_EQ("b",
268 std::string(it->data(), it->size())); // tests dereference as ptr
269 it++; // tests postincrement
270 EXPECT_EQ(it, end);
271 }
272
273 // Simple Predicate to skip a particular string.
274 class Skip {
275 public:
Skip(const std::string & s)276 explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)277 bool operator()(absl::string_view sp) { return sp != s_; }
278
279 private:
280 std::string s_;
281 };
282
TEST(SplitIterator,Predicate)283 TEST(SplitIterator, Predicate) {
284 auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
285 auto it = splitter.begin();
286 auto end = splitter.end();
287
288 EXPECT_NE(it, end);
289 EXPECT_EQ("a", *it); // tests dereference
290 ++it; // tests preincrement -- "b" should be skipped here.
291 EXPECT_NE(it, end);
292 EXPECT_EQ("c",
293 std::string(it->data(), it->size())); // tests dereference as ptr
294 it++; // tests postincrement
295 EXPECT_EQ(it, end);
296 }
297
TEST(SplitIterator,EdgeCases)298 TEST(SplitIterator, EdgeCases) {
299 // Expected input and output, assuming a delimiter of ','
300 struct {
301 std::string in;
302 std::vector<std::string> expect;
303 } specs[] = {
304 {"", {""}},
305 {"foo", {"foo"}},
306 {",", {"", ""}},
307 {",foo", {"", "foo"}},
308 {"foo,", {"foo", ""}},
309 {",foo,", {"", "foo", ""}},
310 {"foo,bar", {"foo", "bar"}},
311 };
312
313 for (const auto& spec : specs) {
314 SCOPED_TRACE(spec.in);
315 auto splitter = absl::StrSplit(spec.in, ',');
316 auto it = splitter.begin();
317 auto end = splitter.end();
318 for (const auto& expected : spec.expect) {
319 EXPECT_NE(it, end);
320 EXPECT_EQ(expected, *it++);
321 }
322 EXPECT_EQ(it, end);
323 }
324 }
325
TEST(Splitter,Const)326 TEST(Splitter, Const) {
327 const auto splitter = absl::StrSplit("a,b,c", ',');
328 EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
329 }
330
TEST(Split,EmptyAndNull)331 TEST(Split, EmptyAndNull) {
332 // Attention: Splitting a null absl::string_view is different than splitting
333 // an empty absl::string_view even though both string_views are considered
334 // equal. This behavior is likely surprising and undesirable. However, to
335 // maintain backward compatibility, there is a small "hack" in
336 // str_split_internal.h that preserves this behavior. If that behavior is ever
337 // changed/fixed, this test will need to be updated.
338 EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
339 EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
340 }
341
TEST(SplitIterator,EqualityAsEndCondition)342 TEST(SplitIterator, EqualityAsEndCondition) {
343 auto splitter = absl::StrSplit("a,b,c", ',');
344 auto it = splitter.begin();
345 auto it2 = it;
346
347 // Increments it2 twice to point to "c" in the input text.
348 ++it2;
349 ++it2;
350 EXPECT_EQ("c", *it2);
351
352 // This test uses a non-end SplitIterator as the terminating condition in a
353 // for loop. This relies on SplitIterator equality for non-end SplitIterators
354 // working correctly. At this point it2 points to "c", and we use that as the
355 // "end" condition in this test.
356 std::vector<absl::string_view> v;
357 for (; it != it2; ++it) {
358 v.push_back(*it);
359 }
360 EXPECT_THAT(v, ElementsAre("a", "b"));
361 }
362
363 //
364 // Tests for Splitter
365 //
366
TEST(Splitter,RangeIterators)367 TEST(Splitter, RangeIterators) {
368 auto splitter = absl::StrSplit("a,b,c", ',');
369 std::vector<absl::string_view> output;
370 for (const absl::string_view p : splitter) {
371 output.push_back(p);
372 }
373 EXPECT_THAT(output, ElementsAre("a", "b", "c"));
374 }
375
376 // Some template functions for use in testing conversion operators
377 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)378 void TestConversionOperator(const Splitter& splitter) {
379 ContainerType output = splitter;
380 EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
381 }
382
383 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)384 void TestMapConversionOperator(const Splitter& splitter) {
385 MapType m = splitter;
386 EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
387 }
388
389 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)390 void TestPairConversionOperator(const Splitter& splitter) {
391 std::pair<FirstType, SecondType> p = splitter;
392 EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
393 }
394
TEST(Splitter,ConversionOperator)395 TEST(Splitter, ConversionOperator) {
396 auto splitter = absl::StrSplit("a,b,c,d", ',');
397
398 TestConversionOperator<std::vector<absl::string_view>>(splitter);
399 TestConversionOperator<std::vector<std::string>>(splitter);
400 TestConversionOperator<std::list<absl::string_view>>(splitter);
401 TestConversionOperator<std::list<std::string>>(splitter);
402 TestConversionOperator<std::deque<absl::string_view>>(splitter);
403 TestConversionOperator<std::deque<std::string>>(splitter);
404 TestConversionOperator<std::set<absl::string_view>>(splitter);
405 TestConversionOperator<std::set<std::string>>(splitter);
406 TestConversionOperator<std::multiset<absl::string_view>>(splitter);
407 TestConversionOperator<std::multiset<std::string>>(splitter);
408 TestConversionOperator<std::unordered_set<std::string>>(splitter);
409
410 // Tests conversion to map-like objects.
411
412 TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
413 splitter);
414 TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
415 TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
416 TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
417 TestMapConversionOperator<
418 std::multimap<absl::string_view, absl::string_view>>(splitter);
419 TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
420 splitter);
421 TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
422 splitter);
423 TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
424 TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
425 splitter);
426 TestMapConversionOperator<
427 absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
428 TestMapConversionOperator<
429 absl::node_hash_map<absl::string_view, std::string>>(splitter);
430 TestMapConversionOperator<
431 absl::node_hash_map<std::string, absl::string_view>>(splitter);
432 TestMapConversionOperator<
433 absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
434 TestMapConversionOperator<
435 absl::flat_hash_map<absl::string_view, std::string>>(splitter);
436 TestMapConversionOperator<
437 absl::flat_hash_map<std::string, absl::string_view>>(splitter);
438
439 // Tests conversion to std::pair
440
441 TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
442 TestPairConversionOperator<absl::string_view, std::string>(splitter);
443 TestPairConversionOperator<std::string, absl::string_view>(splitter);
444 TestPairConversionOperator<std::string, std::string>(splitter);
445 }
446
447 // A few additional tests for conversion to std::pair. This conversion is
448 // different from others because a std::pair always has exactly two elements:
449 // .first and .second. The split has to work even when the split has
450 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)451 TEST(Splitter, ToPair) {
452 {
453 // Empty string
454 std::pair<std::string, std::string> p = absl::StrSplit("", ',');
455 EXPECT_EQ("", p.first);
456 EXPECT_EQ("", p.second);
457 }
458
459 {
460 // Only first
461 std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
462 EXPECT_EQ("a", p.first);
463 EXPECT_EQ("", p.second);
464 }
465
466 {
467 // Only second
468 std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
469 EXPECT_EQ("", p.first);
470 EXPECT_EQ("b", p.second);
471 }
472
473 {
474 // First and second.
475 std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
476 EXPECT_EQ("a", p.first);
477 EXPECT_EQ("b", p.second);
478 }
479
480 {
481 // First and second and then more stuff that will be ignored.
482 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
483 EXPECT_EQ("a", p.first);
484 EXPECT_EQ("b", p.second);
485 // "c" is omitted.
486 }
487 }
488
TEST(Splitter,Predicates)489 TEST(Splitter, Predicates) {
490 static const char kTestChars[] = ",a, ,b,";
491 using absl::AllowEmpty;
492 using absl::SkipEmpty;
493 using absl::SkipWhitespace;
494
495 {
496 // No predicate. Does not skip empties.
497 auto splitter = absl::StrSplit(kTestChars, ',');
498 std::vector<std::string> v = splitter;
499 EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
500 }
501
502 {
503 // Allows empty strings. Same behavior as no predicate at all.
504 auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
505 std::vector<std::string> v_allowempty = splitter;
506 EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
507
508 // Ensures AllowEmpty equals the behavior with no predicate.
509 auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
510 std::vector<std::string> v_nopredicate = splitter_nopredicate;
511 EXPECT_EQ(v_allowempty, v_nopredicate);
512 }
513
514 {
515 // Skips empty strings.
516 auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
517 std::vector<std::string> v = splitter;
518 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
519 }
520
521 {
522 // Skips empty and all-whitespace strings.
523 auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
524 std::vector<std::string> v = splitter;
525 EXPECT_THAT(v, ElementsAre("a", "b"));
526 }
527 }
528
529 //
530 // Tests for StrSplit()
531 //
532
TEST(Split,Basics)533 TEST(Split, Basics) {
534 {
535 // Doesn't really do anything useful because the return value is ignored,
536 // but it should work.
537 absl::StrSplit("a,b,c", ',');
538 }
539
540 {
541 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
542 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
543 }
544
545 {
546 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
547 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
548 }
549
550 {
551 // Ensures that assignment works. This requires a little extra work with
552 // C++11 because of overloads with initializer_list.
553 std::vector<std::string> v;
554 v = absl::StrSplit("a,b,c", ',');
555
556 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
557 std::map<std::string, std::string> m;
558 m = absl::StrSplit("a,b,c", ',');
559 EXPECT_EQ(2, m.size());
560 std::unordered_map<std::string, std::string> hm;
561 hm = absl::StrSplit("a,b,c", ',');
562 EXPECT_EQ(2, hm.size());
563 }
564 }
565
ReturnStringView()566 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()567 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()568 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
569
TEST(Split,AcceptsCertainTemporaries)570 TEST(Split, AcceptsCertainTemporaries) {
571 std::vector<std::string> v;
572 v = absl::StrSplit(ReturnStringView(), ' ');
573 EXPECT_THAT(v, ElementsAre("Hello", "World"));
574 v = absl::StrSplit(ReturnConstCharP(), ' ');
575 EXPECT_THAT(v, ElementsAre("Hello", "World"));
576 v = absl::StrSplit(ReturnCharP(), ' ');
577 EXPECT_THAT(v, ElementsAre("Hello", "World"));
578 }
579
TEST(Split,Temporary)580 TEST(Split, Temporary) {
581 // Use a std::string longer than the SSO length, so that when the temporary is
582 // destroyed, if the splitter keeps a reference to the string's contents,
583 // it'll reference freed memory instead of just dead on-stack memory.
584 const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
585 EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
586 << "Input should be larger than fits on the stack.";
587
588 // This happens more often in C++11 as part of a range-based for loop.
589 auto splitter = absl::StrSplit(std::string(input), ',');
590 std::string expected = "a";
591 for (absl::string_view letter : splitter) {
592 EXPECT_EQ(expected, letter);
593 ++expected[0];
594 }
595 EXPECT_EQ("v", expected);
596
597 // This happens more often in C++11 as part of a range-based for loop.
598 auto std_splitter = absl::StrSplit(std::string(input), ',');
599 expected = "a";
600 for (absl::string_view letter : std_splitter) {
601 EXPECT_EQ(expected, letter);
602 ++expected[0];
603 }
604 EXPECT_EQ("v", expected);
605 }
606
607 template <typename T>
CopyToHeap(const T & value)608 static std::unique_ptr<T> CopyToHeap(const T& value) {
609 return std::unique_ptr<T>(new T(value));
610 }
611
TEST(Split,LvalueCaptureIsCopyable)612 TEST(Split, LvalueCaptureIsCopyable) {
613 std::string input = "a,b";
614 auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
615 auto stack_splitter = *heap_splitter;
616 heap_splitter.reset();
617 std::vector<std::string> result = stack_splitter;
618 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
619 }
620
TEST(Split,TemporaryCaptureIsCopyable)621 TEST(Split, TemporaryCaptureIsCopyable) {
622 auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
623 auto stack_splitter = *heap_splitter;
624 heap_splitter.reset();
625 std::vector<std::string> result = stack_splitter;
626 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
627 }
628
TEST(Split,SplitterIsCopyableAndMoveable)629 TEST(Split, SplitterIsCopyableAndMoveable) {
630 auto a = absl::StrSplit("foo", '-');
631
632 // Ensures that the following expressions compile.
633 auto b = a; // Copy construct
634 auto c = std::move(a); // Move construct
635 b = c; // Copy assign
636 c = std::move(b); // Move assign
637
638 EXPECT_THAT(c, ElementsAre("foo"));
639 }
640
TEST(Split,StringDelimiter)641 TEST(Split, StringDelimiter) {
642 {
643 std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
644 EXPECT_THAT(v, ElementsAre("a", "b"));
645 }
646
647 {
648 std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
649 EXPECT_THAT(v, ElementsAre("a", "b"));
650 }
651
652 {
653 std::vector<absl::string_view> v =
654 absl::StrSplit("a,b", absl::string_view(","));
655 EXPECT_THAT(v, ElementsAre("a", "b"));
656 }
657 }
658
659 #if !defined(__cpp_char8_t)
660 #if defined(__clang__)
661 #pragma clang diagnostic push
662 #pragma clang diagnostic ignored "-Wc++2a-compat"
663 #endif
TEST(Split,UTF8)664 TEST(Split, UTF8) {
665 // Tests splitting utf8 strings and utf8 delimiters.
666 std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
667 {
668 // A utf8 input string with an ascii delimiter.
669 std::string to_split = "a," + utf8_string;
670 std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
671 EXPECT_THAT(v, ElementsAre("a", utf8_string));
672 }
673
674 {
675 // A utf8 input string and a utf8 delimiter.
676 std::string to_split = "a," + utf8_string + ",b";
677 std::string unicode_delimiter = "," + utf8_string + ",";
678 std::vector<absl::string_view> v =
679 absl::StrSplit(to_split, unicode_delimiter);
680 EXPECT_THAT(v, ElementsAre("a", "b"));
681 }
682
683 {
684 // A utf8 input string and ByAnyChar with ascii chars.
685 std::vector<absl::string_view> v =
686 absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
687 EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
688 }
689 }
690 #if defined(__clang__)
691 #pragma clang diagnostic pop
692 #endif
693 #endif // !defined(__cpp_char8_t)
694
TEST(Split,EmptyStringDelimiter)695 TEST(Split, EmptyStringDelimiter) {
696 {
697 std::vector<std::string> v = absl::StrSplit("", "");
698 EXPECT_THAT(v, ElementsAre(""));
699 }
700
701 {
702 std::vector<std::string> v = absl::StrSplit("a", "");
703 EXPECT_THAT(v, ElementsAre("a"));
704 }
705
706 {
707 std::vector<std::string> v = absl::StrSplit("ab", "");
708 EXPECT_THAT(v, ElementsAre("a", "b"));
709 }
710
711 {
712 std::vector<std::string> v = absl::StrSplit("a b", "");
713 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
714 }
715 }
716
TEST(Split,SubstrDelimiter)717 TEST(Split, SubstrDelimiter) {
718 std::vector<absl::string_view> results;
719 absl::string_view delim("//");
720
721 results = absl::StrSplit("", delim);
722 EXPECT_THAT(results, ElementsAre(""));
723
724 results = absl::StrSplit("//", delim);
725 EXPECT_THAT(results, ElementsAre("", ""));
726
727 results = absl::StrSplit("ab", delim);
728 EXPECT_THAT(results, ElementsAre("ab"));
729
730 results = absl::StrSplit("ab//", delim);
731 EXPECT_THAT(results, ElementsAre("ab", ""));
732
733 results = absl::StrSplit("ab/", delim);
734 EXPECT_THAT(results, ElementsAre("ab/"));
735
736 results = absl::StrSplit("a/b", delim);
737 EXPECT_THAT(results, ElementsAre("a/b"));
738
739 results = absl::StrSplit("a//b", delim);
740 EXPECT_THAT(results, ElementsAre("a", "b"));
741
742 results = absl::StrSplit("a///b", delim);
743 EXPECT_THAT(results, ElementsAre("a", "/b"));
744
745 results = absl::StrSplit("a////b", delim);
746 EXPECT_THAT(results, ElementsAre("a", "", "b"));
747 }
748
TEST(Split,EmptyResults)749 TEST(Split, EmptyResults) {
750 std::vector<absl::string_view> results;
751
752 results = absl::StrSplit("", '#');
753 EXPECT_THAT(results, ElementsAre(""));
754
755 results = absl::StrSplit("#", '#');
756 EXPECT_THAT(results, ElementsAre("", ""));
757
758 results = absl::StrSplit("#cd", '#');
759 EXPECT_THAT(results, ElementsAre("", "cd"));
760
761 results = absl::StrSplit("ab#cd#", '#');
762 EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
763
764 results = absl::StrSplit("ab##cd", '#');
765 EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
766
767 results = absl::StrSplit("ab##", '#');
768 EXPECT_THAT(results, ElementsAre("ab", "", ""));
769
770 results = absl::StrSplit("ab#ab#", '#');
771 EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
772
773 results = absl::StrSplit("aaaa", 'a');
774 EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
775
776 results = absl::StrSplit("", '#', absl::SkipEmpty());
777 EXPECT_THAT(results, ElementsAre());
778 }
779
780 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)781 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
782 size_t starting_pos, int expected_pos) {
783 absl::string_view found = d.Find(text, starting_pos);
784 return found.data() != text.data() + text.size() &&
785 expected_pos == found.data() - text.data();
786 }
787
788 // Helper function for testing Delimiter objects. Returns true if the given
789 // Delimiter is found in the given string at the given position. This function
790 // tests two cases:
791 // 1. The actual text given, staring at position 0
792 // 2. The text given with leading padding that should be ignored
793 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)794 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
795 const std::string leading_text = ",x,y,z,";
796 return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
797 IsFoundAtStartingPos(leading_text + std::string(text), d,
798 leading_text.length(),
799 expected_pos + leading_text.length());
800 }
801
802 //
803 // Tests for ByString
804 //
805
806 // Tests using any delimiter that represents a single comma.
807 template <typename Delimiter>
TestComma(Delimiter d)808 void TestComma(Delimiter d) {
809 EXPECT_TRUE(IsFoundAt(",", d, 0));
810 EXPECT_TRUE(IsFoundAt("a,", d, 1));
811 EXPECT_TRUE(IsFoundAt(",b", d, 0));
812 EXPECT_TRUE(IsFoundAt("a,b", d, 1));
813 EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
814 EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
815 EXPECT_FALSE(IsFoundAt("", d, -1));
816 EXPECT_FALSE(IsFoundAt(" ", d, -1));
817 EXPECT_FALSE(IsFoundAt("a", d, -1));
818 EXPECT_FALSE(IsFoundAt("a b c", d, -1));
819 EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
820 EXPECT_FALSE(IsFoundAt(";", d, -1));
821 }
822
TEST(Delimiter,ByString)823 TEST(Delimiter, ByString) {
824 using absl::ByString;
825 TestComma(ByString(","));
826
827 // Works as named variable.
828 ByString comma_string(",");
829 TestComma(comma_string);
830
831 // The first occurrence of empty string ("") in a string is at position 0.
832 // There is a test below that demonstrates this for absl::string_view::find().
833 // If the ByString delimiter returned position 0 for this, there would
834 // be an infinite loop in the SplitIterator code. To avoid this, empty string
835 // is a special case in that it always returns the item at position 1.
836 absl::string_view abc("abc");
837 EXPECT_EQ(0, abc.find("")); // "" is found at position 0
838 ByString empty("");
839 EXPECT_FALSE(IsFoundAt("", empty, 0));
840 EXPECT_FALSE(IsFoundAt("a", empty, 0));
841 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
842 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
843 }
844
TEST(Split,ByChar)845 TEST(Split, ByChar) {
846 using absl::ByChar;
847 TestComma(ByChar(','));
848
849 // Works as named variable.
850 ByChar comma_char(',');
851 TestComma(comma_char);
852 }
853
854 //
855 // Tests for ByAnyChar
856 //
857
TEST(Delimiter,ByAnyChar)858 TEST(Delimiter, ByAnyChar) {
859 using absl::ByAnyChar;
860 ByAnyChar one_delim(",");
861 // Found
862 EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
863 EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
864 EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
865 EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
866 // Not found
867 EXPECT_FALSE(IsFoundAt("", one_delim, -1));
868 EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
869 EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
870 EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
871 EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
872
873 ByAnyChar two_delims(",;");
874 // Found
875 EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
876 EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
877 EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
878 EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
879 EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
880 EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
881 EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
882 EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
883 EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
884 EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
885 // Not found
886 EXPECT_FALSE(IsFoundAt("", two_delims, -1));
887 EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
888 EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
889 EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
890 EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
891
892 // ByAnyChar behaves just like ByString when given a delimiter of empty
893 // string. That is, it always returns a zero-length absl::string_view
894 // referring to the item at position 1, not position 0.
895 ByAnyChar empty("");
896 EXPECT_FALSE(IsFoundAt("", empty, 0));
897 EXPECT_FALSE(IsFoundAt("a", empty, 0));
898 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
899 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
900 }
901
902 //
903 // Tests for ByLength
904 //
905
TEST(Delimiter,ByLength)906 TEST(Delimiter, ByLength) {
907 using absl::ByLength;
908
909 ByLength four_char_delim(4);
910
911 // Found
912 EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
913 EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
914 EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
915 // Not found
916 EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
917 EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
918 EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
919 EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
920 EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
921 }
922
TEST(Split,WorksWithLargeStrings)923 TEST(Split, WorksWithLargeStrings) {
924 if (sizeof(size_t) > 4) {
925 std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte
926 s.back() = '-';
927 std::vector<absl::string_view> v = absl::StrSplit(s, '-');
928 EXPECT_EQ(2, v.size());
929 // The first element will contain 2G of 'x's.
930 // testing::StartsWith is too slow with a 2G string.
931 EXPECT_EQ('x', v[0][0]);
932 EXPECT_EQ('x', v[0][1]);
933 EXPECT_EQ('x', v[0][3]);
934 EXPECT_EQ("", v[1]);
935 }
936 }
937
TEST(SplitInternalTest,TypeTraits)938 TEST(SplitInternalTest, TypeTraits) {
939 EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
940 EXPECT_TRUE(
941 (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
942 EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
943 EXPECT_TRUE(
944 (absl::strings_internal::HasValueType<std::map<int, int>>::value));
945 EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
946 EXPECT_TRUE(
947 (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
948 EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
949 EXPECT_TRUE((absl::strings_internal::IsInitializerList<
950 std::initializer_list<int>>::value));
951 }
952
953 } // namespace
954