1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/str_split.h"
16
17 #include <cstddef>
18 #include <cstdint>
19 #include <deque>
20 #include <initializer_list>
21 #include <list>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <string>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <utility>
29 #include <vector>
30
31 #include "gmock/gmock.h"
32 #include "gtest/gtest.h"
33 #include "absl/base/macros.h"
34 #include "absl/container/btree_map.h"
35 #include "absl/container/btree_set.h"
36 #include "absl/container/flat_hash_map.h"
37 #include "absl/container/node_hash_map.h"
38 #include "absl/strings/string_view.h"
39
40 namespace {
41
42 using ::testing::ElementsAre;
43 using ::testing::Pair;
44 using ::testing::UnorderedElementsAre;
45
TEST(Split,TraitsTest)46 TEST(Split, TraitsTest) {
47 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
48 "");
49 static_assert(
50 !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
51 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
52 std::vector<std::string>>::value,
53 "");
54 static_assert(
55 !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
56 "");
57 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
58 std::vector<absl::string_view>>::value,
59 "");
60 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
61 std::map<std::string, std::string>>::value,
62 "");
63 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
64 std::map<absl::string_view, absl::string_view>>::value,
65 "");
66 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
67 std::map<int, std::string>>::value,
68 "");
69 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
70 std::map<std::string, int>>::value,
71 "");
72 }
73
74 // This tests the overall split API, which is made up of the absl::StrSplit()
75 // function and the Delimiter objects in the absl:: namespace.
76 // This TEST macro is outside of any namespace to require full specification of
77 // namespaces just like callers will need to use.
TEST(Split,APIExamples)78 TEST(Split, APIExamples) {
79 {
80 // Passes string delimiter. Assumes the default of ByString.
81 std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
82 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
83
84 // Equivalent to...
85 using absl::ByString;
86 v = absl::StrSplit("a,b,c", ByString(","));
87 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
88
89 // Equivalent to...
90 EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
91 ElementsAre("a", "b", "c"));
92 }
93
94 {
95 // Same as above, but using a single character as the delimiter.
96 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
97 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
98
99 // Equivalent to...
100 using absl::ByChar;
101 v = absl::StrSplit("a,b,c", ByChar(','));
102 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
103 }
104
105 {
106 // Uses the Literal string "=>" as the delimiter.
107 const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
108 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
109 }
110
111 {
112 // The substrings are returned as string_views, eliminating copying.
113 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
114 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
115 }
116
117 {
118 // Leading and trailing empty substrings.
119 std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
120 EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
121 }
122
123 {
124 // Splits on a delimiter that is not found.
125 std::vector<std::string> v = absl::StrSplit("abc", ',');
126 EXPECT_THAT(v, ElementsAre("abc"));
127 }
128
129 {
130 // Splits the input string into individual characters by using an empty
131 // string as the delimiter.
132 std::vector<std::string> v = absl::StrSplit("abc", "");
133 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
134 }
135
136 {
137 // Splits string data with embedded NUL characters, using NUL as the
138 // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
139 // say that's the empty string when constructing the absl::string_view
140 // delimiter. Instead, a non-empty string containing NUL can be used as the
141 // delimiter.
142 std::string embedded_nulls("a\0b\0c", 5);
143 std::string null_delim("\0", 1);
144 std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
145 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
146 }
147
148 {
149 // Stores first two split strings as the members in a std::pair.
150 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
151 EXPECT_EQ("a", p.first);
152 EXPECT_EQ("b", p.second);
153 // "c" is omitted because std::pair can hold only two elements.
154 }
155
156 {
157 // Results stored in std::set<std::string>
158 std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
159 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
160 }
161
162 {
163 // Uses a non-const char* delimiter.
164 char a[] = ",";
165 char* d = a + 0;
166 std::vector<std::string> v = absl::StrSplit("a,b,c", d);
167 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
168 }
169
170 {
171 // Results split using either of , or ;
172 using absl::ByAnyChar;
173 std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
174 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
175 }
176
177 {
178 // Uses the SkipWhitespace predicate.
179 using absl::SkipWhitespace;
180 std::vector<std::string> v =
181 absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
182 EXPECT_THAT(v, ElementsAre(" a ", "b"));
183 }
184
185 {
186 // Uses the ByLength delimiter.
187 using absl::ByLength;
188 std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
189 EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
190 }
191
192 {
193 // Different forms of initialization / conversion.
194 std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
195 EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
196 std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
197 EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
198 auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
199 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
200 v3 = absl::StrSplit("a,b,c", ',');
201 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
202 }
203
204 {
205 // Results stored in a std::map.
206 std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
207 EXPECT_EQ(2, m.size());
208 EXPECT_EQ("3", m["a"]);
209 EXPECT_EQ("2", m["b"]);
210 }
211
212 {
213 // Results stored in a std::multimap.
214 std::multimap<std::string, std::string> m =
215 absl::StrSplit("a,1,b,2,a,3", ',');
216 EXPECT_EQ(3, m.size());
217 auto it = m.find("a");
218 EXPECT_EQ("1", it->second);
219 ++it;
220 EXPECT_EQ("3", it->second);
221 it = m.find("b");
222 EXPECT_EQ("2", it->second);
223 }
224
225 {
226 // Demonstrates use in a range-based for loop in C++11.
227 std::string s = "x,x,x,x,x,x,x";
228 for (absl::string_view sp : absl::StrSplit(s, ',')) {
229 EXPECT_EQ("x", sp);
230 }
231 }
232
233 {
234 // Demonstrates use with a Predicate in a range-based for loop.
235 using absl::SkipWhitespace;
236 std::string s = " ,x,,x,,x,x,x,,";
237 for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
238 EXPECT_EQ("x", sp);
239 }
240 }
241
242 {
243 // Demonstrates a "smart" split to std::map using two separate calls to
244 // absl::StrSplit. One call to split the records, and another call to split
245 // the keys and values. This also uses the Limit delimiter so that the
246 // std::string "a=b=c" will split to "a" -> "b=c".
247 std::map<std::string, std::string> m;
248 for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
249 m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
250 }
251 EXPECT_EQ("b=c", m.find("a")->second);
252 EXPECT_EQ("e", m.find("d")->second);
253 EXPECT_EQ("", m.find("f")->second);
254 EXPECT_EQ("", m.find("g")->second);
255 }
256 }
257
258 //
259 // Tests for SplitIterator
260 //
261
TEST(SplitIterator,Basics)262 TEST(SplitIterator, Basics) {
263 auto splitter = absl::StrSplit("a,b", ',');
264 auto it = splitter.begin();
265 auto end = splitter.end();
266
267 EXPECT_NE(it, end);
268 EXPECT_EQ("a", *it); // tests dereference
269 ++it; // tests preincrement
270 EXPECT_NE(it, end);
271 EXPECT_EQ("b",
272 std::string(it->data(), it->size())); // tests dereference as ptr
273 it++; // tests postincrement
274 EXPECT_EQ(it, end);
275 }
276
277 // Simple Predicate to skip a particular string.
278 class Skip {
279 public:
Skip(const std::string & s)280 explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)281 bool operator()(absl::string_view sp) { return sp != s_; }
282
283 private:
284 std::string s_;
285 };
286
TEST(SplitIterator,Predicate)287 TEST(SplitIterator, Predicate) {
288 auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
289 auto it = splitter.begin();
290 auto end = splitter.end();
291
292 EXPECT_NE(it, end);
293 EXPECT_EQ("a", *it); // tests dereference
294 ++it; // tests preincrement -- "b" should be skipped here.
295 EXPECT_NE(it, end);
296 EXPECT_EQ("c",
297 std::string(it->data(), it->size())); // tests dereference as ptr
298 it++; // tests postincrement
299 EXPECT_EQ(it, end);
300 }
301
TEST(SplitIterator,EdgeCases)302 TEST(SplitIterator, EdgeCases) {
303 // Expected input and output, assuming a delimiter of ','
304 struct {
305 std::string in;
306 std::vector<std::string> expect;
307 } specs[] = {
308 {"", {""}},
309 {"foo", {"foo"}},
310 {",", {"", ""}},
311 {",foo", {"", "foo"}},
312 {"foo,", {"foo", ""}},
313 {",foo,", {"", "foo", ""}},
314 {"foo,bar", {"foo", "bar"}},
315 };
316
317 for (const auto& spec : specs) {
318 SCOPED_TRACE(spec.in);
319 auto splitter = absl::StrSplit(spec.in, ',');
320 auto it = splitter.begin();
321 auto end = splitter.end();
322 for (const auto& expected : spec.expect) {
323 EXPECT_NE(it, end);
324 EXPECT_EQ(expected, *it++);
325 }
326 EXPECT_EQ(it, end);
327 }
328 }
329
TEST(Splitter,Const)330 TEST(Splitter, Const) {
331 const auto splitter = absl::StrSplit("a,b,c", ',');
332 EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
333 }
334
TEST(Split,EmptyAndNull)335 TEST(Split, EmptyAndNull) {
336 // Attention: Splitting a null absl::string_view is different than splitting
337 // an empty absl::string_view even though both string_views are considered
338 // equal. This behavior is likely surprising and undesirable. However, to
339 // maintain backward compatibility, there is a small "hack" in
340 // str_split_internal.h that preserves this behavior. If that behavior is ever
341 // changed/fixed, this test will need to be updated.
342 EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
343 EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
344 }
345
TEST(SplitIterator,EqualityAsEndCondition)346 TEST(SplitIterator, EqualityAsEndCondition) {
347 auto splitter = absl::StrSplit("a,b,c", ',');
348 auto it = splitter.begin();
349 auto it2 = it;
350
351 // Increments it2 twice to point to "c" in the input text.
352 ++it2;
353 ++it2;
354 EXPECT_EQ("c", *it2);
355
356 // This test uses a non-end SplitIterator as the terminating condition in a
357 // for loop. This relies on SplitIterator equality for non-end SplitIterators
358 // working correctly. At this point it2 points to "c", and we use that as the
359 // "end" condition in this test.
360 std::vector<absl::string_view> v;
361 for (; it != it2; ++it) {
362 v.push_back(*it);
363 }
364 EXPECT_THAT(v, ElementsAre("a", "b"));
365 }
366
367 //
368 // Tests for Splitter
369 //
370
TEST(Splitter,RangeIterators)371 TEST(Splitter, RangeIterators) {
372 auto splitter = absl::StrSplit("a,b,c", ',');
373 std::vector<absl::string_view> output;
374 for (absl::string_view p : splitter) {
375 output.push_back(p);
376 }
377 EXPECT_THAT(output, ElementsAre("a", "b", "c"));
378 }
379
380 // Some template functions for use in testing conversion operators
381 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)382 void TestConversionOperator(const Splitter& splitter) {
383 ContainerType output = splitter;
384 EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
385 }
386
387 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)388 void TestMapConversionOperator(const Splitter& splitter) {
389 MapType m = splitter;
390 EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
391 }
392
393 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)394 void TestPairConversionOperator(const Splitter& splitter) {
395 std::pair<FirstType, SecondType> p = splitter;
396 EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
397 }
398
TEST(Splitter,ConversionOperator)399 TEST(Splitter, ConversionOperator) {
400 auto splitter = absl::StrSplit("a,b,c,d", ',');
401
402 TestConversionOperator<std::vector<absl::string_view>>(splitter);
403 TestConversionOperator<std::vector<std::string>>(splitter);
404 TestConversionOperator<std::list<absl::string_view>>(splitter);
405 TestConversionOperator<std::list<std::string>>(splitter);
406 TestConversionOperator<std::deque<absl::string_view>>(splitter);
407 TestConversionOperator<std::deque<std::string>>(splitter);
408 TestConversionOperator<std::set<absl::string_view>>(splitter);
409 TestConversionOperator<std::set<std::string>>(splitter);
410 TestConversionOperator<std::multiset<absl::string_view>>(splitter);
411 TestConversionOperator<std::multiset<std::string>>(splitter);
412 TestConversionOperator<absl::btree_set<absl::string_view>>(splitter);
413 TestConversionOperator<absl::btree_set<std::string>>(splitter);
414 TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter);
415 TestConversionOperator<absl::btree_multiset<std::string>>(splitter);
416 TestConversionOperator<std::unordered_set<std::string>>(splitter);
417
418 // Tests conversion to map-like objects.
419
420 TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
421 splitter);
422 TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
423 TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
424 TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
425 TestMapConversionOperator<
426 std::multimap<absl::string_view, absl::string_view>>(splitter);
427 TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
428 splitter);
429 TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
430 splitter);
431 TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
432 TestMapConversionOperator<
433 absl::btree_map<absl::string_view, absl::string_view>>(splitter);
434 TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>(
435 splitter);
436 TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>(
437 splitter);
438 TestMapConversionOperator<absl::btree_map<std::string, std::string>>(
439 splitter);
440 TestMapConversionOperator<
441 absl::btree_multimap<absl::string_view, absl::string_view>>(splitter);
442 TestMapConversionOperator<
443 absl::btree_multimap<absl::string_view, std::string>>(splitter);
444 TestMapConversionOperator<
445 absl::btree_multimap<std::string, absl::string_view>>(splitter);
446 TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>(
447 splitter);
448 TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
449 splitter);
450 TestMapConversionOperator<
451 absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
452 TestMapConversionOperator<
453 absl::node_hash_map<absl::string_view, std::string>>(splitter);
454 TestMapConversionOperator<
455 absl::node_hash_map<std::string, absl::string_view>>(splitter);
456 TestMapConversionOperator<
457 absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
458 TestMapConversionOperator<
459 absl::flat_hash_map<absl::string_view, std::string>>(splitter);
460 TestMapConversionOperator<
461 absl::flat_hash_map<std::string, absl::string_view>>(splitter);
462
463 // Tests conversion to std::pair
464
465 TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
466 TestPairConversionOperator<absl::string_view, std::string>(splitter);
467 TestPairConversionOperator<std::string, absl::string_view>(splitter);
468 TestPairConversionOperator<std::string, std::string>(splitter);
469 }
470
471 // A few additional tests for conversion to std::pair. This conversion is
472 // different from others because a std::pair always has exactly two elements:
473 // .first and .second. The split has to work even when the split has
474 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)475 TEST(Splitter, ToPair) {
476 {
477 // Empty string
478 std::pair<std::string, std::string> p = absl::StrSplit("", ',');
479 EXPECT_EQ("", p.first);
480 EXPECT_EQ("", p.second);
481 }
482
483 {
484 // Only first
485 std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
486 EXPECT_EQ("a", p.first);
487 EXPECT_EQ("", p.second);
488 }
489
490 {
491 // Only second
492 std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
493 EXPECT_EQ("", p.first);
494 EXPECT_EQ("b", p.second);
495 }
496
497 {
498 // First and second.
499 std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
500 EXPECT_EQ("a", p.first);
501 EXPECT_EQ("b", p.second);
502 }
503
504 {
505 // First and second and then more stuff that will be ignored.
506 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
507 EXPECT_EQ("a", p.first);
508 EXPECT_EQ("b", p.second);
509 // "c" is omitted.
510 }
511 }
512
TEST(Splitter,Predicates)513 TEST(Splitter, Predicates) {
514 static const char kTestChars[] = ",a, ,b,";
515 using absl::AllowEmpty;
516 using absl::SkipEmpty;
517 using absl::SkipWhitespace;
518
519 {
520 // No predicate. Does not skip empties.
521 auto splitter = absl::StrSplit(kTestChars, ',');
522 std::vector<std::string> v = splitter;
523 EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
524 }
525
526 {
527 // Allows empty strings. Same behavior as no predicate at all.
528 auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
529 std::vector<std::string> v_allowempty = splitter;
530 EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
531
532 // Ensures AllowEmpty equals the behavior with no predicate.
533 auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
534 std::vector<std::string> v_nopredicate = splitter_nopredicate;
535 EXPECT_EQ(v_allowempty, v_nopredicate);
536 }
537
538 {
539 // Skips empty strings.
540 auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
541 std::vector<std::string> v = splitter;
542 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
543 }
544
545 {
546 // Skips empty and all-whitespace strings.
547 auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
548 std::vector<std::string> v = splitter;
549 EXPECT_THAT(v, ElementsAre("a", "b"));
550 }
551 }
552
553 //
554 // Tests for StrSplit()
555 //
556
TEST(Split,Basics)557 TEST(Split, Basics) {
558 {
559 // Doesn't really do anything useful because the return value is ignored,
560 // but it should work.
561 absl::StrSplit("a,b,c", ',');
562 }
563
564 {
565 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
566 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
567 }
568
569 {
570 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
571 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
572 }
573
574 {
575 // Ensures that assignment works. This requires a little extra work with
576 // C++11 because of overloads with initializer_list.
577 std::vector<std::string> v;
578 v = absl::StrSplit("a,b,c", ',');
579
580 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
581 std::map<std::string, std::string> m;
582 m = absl::StrSplit("a,b,c", ',');
583 EXPECT_EQ(2, m.size());
584 std::unordered_map<std::string, std::string> hm;
585 hm = absl::StrSplit("a,b,c", ',');
586 EXPECT_EQ(2, hm.size());
587 }
588 }
589
ReturnStringView()590 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()591 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()592 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
593
TEST(Split,AcceptsCertainTemporaries)594 TEST(Split, AcceptsCertainTemporaries) {
595 std::vector<std::string> v;
596 v = absl::StrSplit(ReturnStringView(), ' ');
597 EXPECT_THAT(v, ElementsAre("Hello", "World"));
598 v = absl::StrSplit(ReturnConstCharP(), ' ');
599 EXPECT_THAT(v, ElementsAre("Hello", "World"));
600 v = absl::StrSplit(ReturnCharP(), ' ');
601 EXPECT_THAT(v, ElementsAre("Hello", "World"));
602 }
603
TEST(Split,Temporary)604 TEST(Split, Temporary) {
605 // Use a std::string longer than the SSO length, so that when the temporary is
606 // destroyed, if the splitter keeps a reference to the string's contents,
607 // it'll reference freed memory instead of just dead on-stack memory.
608 const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
609 EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
610 << "Input should be larger than fits on the stack.";
611
612 // This happens more often in C++11 as part of a range-based for loop.
613 auto splitter = absl::StrSplit(std::string(input), ',');
614 std::string expected = "a";
615 for (absl::string_view letter : splitter) {
616 EXPECT_EQ(expected, letter);
617 ++expected[0];
618 }
619 EXPECT_EQ("v", expected);
620
621 // This happens more often in C++11 as part of a range-based for loop.
622 auto std_splitter = absl::StrSplit(std::string(input), ',');
623 expected = "a";
624 for (absl::string_view letter : std_splitter) {
625 EXPECT_EQ(expected, letter);
626 ++expected[0];
627 }
628 EXPECT_EQ("v", expected);
629 }
630
631 template <typename T>
CopyToHeap(const T & value)632 static std::unique_ptr<T> CopyToHeap(const T& value) {
633 return std::unique_ptr<T>(new T(value));
634 }
635
TEST(Split,LvalueCaptureIsCopyable)636 TEST(Split, LvalueCaptureIsCopyable) {
637 std::string input = "a,b";
638 auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
639 auto stack_splitter = *heap_splitter;
640 heap_splitter.reset();
641 std::vector<std::string> result = stack_splitter;
642 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
643 }
644
TEST(Split,TemporaryCaptureIsCopyable)645 TEST(Split, TemporaryCaptureIsCopyable) {
646 auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
647 auto stack_splitter = *heap_splitter;
648 heap_splitter.reset();
649 std::vector<std::string> result = stack_splitter;
650 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
651 }
652
TEST(Split,SplitterIsCopyableAndMoveable)653 TEST(Split, SplitterIsCopyableAndMoveable) {
654 auto a = absl::StrSplit("foo", '-');
655
656 // Ensures that the following expressions compile.
657 auto b = a; // Copy construct
658 auto c = std::move(a); // Move construct
659 b = c; // Copy assign
660 c = std::move(b); // Move assign
661
662 EXPECT_THAT(c, ElementsAre("foo"));
663 }
664
TEST(Split,StringDelimiter)665 TEST(Split, StringDelimiter) {
666 {
667 std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
668 EXPECT_THAT(v, ElementsAre("a", "b"));
669 }
670
671 {
672 std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
673 EXPECT_THAT(v, ElementsAre("a", "b"));
674 }
675
676 {
677 std::vector<absl::string_view> v =
678 absl::StrSplit("a,b", absl::string_view(","));
679 EXPECT_THAT(v, ElementsAre("a", "b"));
680 }
681 }
682
683 #if !defined(__cpp_char8_t)
684 #if defined(__clang__)
685 #pragma clang diagnostic push
686 #pragma clang diagnostic ignored "-Wc++2a-compat"
687 #endif
TEST(Split,UTF8)688 TEST(Split, UTF8) {
689 // Tests splitting utf8 strings and utf8 delimiters.
690 std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
691 {
692 // A utf8 input string with an ascii delimiter.
693 std::string to_split = "a," + utf8_string;
694 std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
695 EXPECT_THAT(v, ElementsAre("a", utf8_string));
696 }
697
698 {
699 // A utf8 input string and a utf8 delimiter.
700 std::string to_split = "a," + utf8_string + ",b";
701 std::string unicode_delimiter = "," + utf8_string + ",";
702 std::vector<absl::string_view> v =
703 absl::StrSplit(to_split, unicode_delimiter);
704 EXPECT_THAT(v, ElementsAre("a", "b"));
705 }
706
707 {
708 // A utf8 input string and ByAnyChar with ascii chars.
709 std::vector<absl::string_view> v =
710 absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
711 EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
712 }
713 }
714 #if defined(__clang__)
715 #pragma clang diagnostic pop
716 #endif
717 #endif // !defined(__cpp_char8_t)
718
TEST(Split,EmptyStringDelimiter)719 TEST(Split, EmptyStringDelimiter) {
720 {
721 std::vector<std::string> v = absl::StrSplit("", "");
722 EXPECT_THAT(v, ElementsAre(""));
723 }
724
725 {
726 std::vector<std::string> v = absl::StrSplit("a", "");
727 EXPECT_THAT(v, ElementsAre("a"));
728 }
729
730 {
731 std::vector<std::string> v = absl::StrSplit("ab", "");
732 EXPECT_THAT(v, ElementsAre("a", "b"));
733 }
734
735 {
736 std::vector<std::string> v = absl::StrSplit("a b", "");
737 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
738 }
739 }
740
TEST(Split,SubstrDelimiter)741 TEST(Split, SubstrDelimiter) {
742 std::vector<absl::string_view> results;
743 absl::string_view delim("//");
744
745 results = absl::StrSplit("", delim);
746 EXPECT_THAT(results, ElementsAre(""));
747
748 results = absl::StrSplit("//", delim);
749 EXPECT_THAT(results, ElementsAre("", ""));
750
751 results = absl::StrSplit("ab", delim);
752 EXPECT_THAT(results, ElementsAre("ab"));
753
754 results = absl::StrSplit("ab//", delim);
755 EXPECT_THAT(results, ElementsAre("ab", ""));
756
757 results = absl::StrSplit("ab/", delim);
758 EXPECT_THAT(results, ElementsAre("ab/"));
759
760 results = absl::StrSplit("a/b", delim);
761 EXPECT_THAT(results, ElementsAre("a/b"));
762
763 results = absl::StrSplit("a//b", delim);
764 EXPECT_THAT(results, ElementsAre("a", "b"));
765
766 results = absl::StrSplit("a///b", delim);
767 EXPECT_THAT(results, ElementsAre("a", "/b"));
768
769 results = absl::StrSplit("a////b", delim);
770 EXPECT_THAT(results, ElementsAre("a", "", "b"));
771 }
772
TEST(Split,EmptyResults)773 TEST(Split, EmptyResults) {
774 std::vector<absl::string_view> results;
775
776 results = absl::StrSplit("", '#');
777 EXPECT_THAT(results, ElementsAre(""));
778
779 results = absl::StrSplit("#", '#');
780 EXPECT_THAT(results, ElementsAre("", ""));
781
782 results = absl::StrSplit("#cd", '#');
783 EXPECT_THAT(results, ElementsAre("", "cd"));
784
785 results = absl::StrSplit("ab#cd#", '#');
786 EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
787
788 results = absl::StrSplit("ab##cd", '#');
789 EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
790
791 results = absl::StrSplit("ab##", '#');
792 EXPECT_THAT(results, ElementsAre("ab", "", ""));
793
794 results = absl::StrSplit("ab#ab#", '#');
795 EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
796
797 results = absl::StrSplit("aaaa", 'a');
798 EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
799
800 results = absl::StrSplit("", '#', absl::SkipEmpty());
801 EXPECT_THAT(results, ElementsAre());
802 }
803
804 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)805 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
806 size_t starting_pos, int expected_pos) {
807 absl::string_view found = d.Find(text, starting_pos);
808 return found.data() != text.data() + text.size() &&
809 expected_pos == found.data() - text.data();
810 }
811
812 // Helper function for testing Delimiter objects. Returns true if the given
813 // Delimiter is found in the given string at the given position. This function
814 // tests two cases:
815 // 1. The actual text given, staring at position 0
816 // 2. The text given with leading padding that should be ignored
817 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)818 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
819 const std::string leading_text = ",x,y,z,";
820 return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
821 IsFoundAtStartingPos(leading_text + std::string(text), d,
822 leading_text.length(),
823 expected_pos + leading_text.length());
824 }
825
826 //
827 // Tests for ByString
828 //
829
830 // Tests using any delimiter that represents a single comma.
831 template <typename Delimiter>
TestComma(Delimiter d)832 void TestComma(Delimiter d) {
833 EXPECT_TRUE(IsFoundAt(",", d, 0));
834 EXPECT_TRUE(IsFoundAt("a,", d, 1));
835 EXPECT_TRUE(IsFoundAt(",b", d, 0));
836 EXPECT_TRUE(IsFoundAt("a,b", d, 1));
837 EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
838 EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
839 EXPECT_FALSE(IsFoundAt("", d, -1));
840 EXPECT_FALSE(IsFoundAt(" ", d, -1));
841 EXPECT_FALSE(IsFoundAt("a", d, -1));
842 EXPECT_FALSE(IsFoundAt("a b c", d, -1));
843 EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
844 EXPECT_FALSE(IsFoundAt(";", d, -1));
845 }
846
TEST(Delimiter,ByString)847 TEST(Delimiter, ByString) {
848 using absl::ByString;
849 TestComma(ByString(","));
850
851 // Works as named variable.
852 ByString comma_string(",");
853 TestComma(comma_string);
854
855 // The first occurrence of empty string ("") in a string is at position 0.
856 // There is a test below that demonstrates this for absl::string_view::find().
857 // If the ByString delimiter returned position 0 for this, there would
858 // be an infinite loop in the SplitIterator code. To avoid this, empty string
859 // is a special case in that it always returns the item at position 1.
860 absl::string_view abc("abc");
861 EXPECT_EQ(0, abc.find("")); // "" is found at position 0
862 ByString empty("");
863 EXPECT_FALSE(IsFoundAt("", empty, 0));
864 EXPECT_FALSE(IsFoundAt("a", empty, 0));
865 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
866 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
867 }
868
TEST(Split,ByChar)869 TEST(Split, ByChar) {
870 using absl::ByChar;
871 TestComma(ByChar(','));
872
873 // Works as named variable.
874 ByChar comma_char(',');
875 TestComma(comma_char);
876 }
877
878 //
879 // Tests for ByAnyChar
880 //
881
TEST(Delimiter,ByAnyChar)882 TEST(Delimiter, ByAnyChar) {
883 using absl::ByAnyChar;
884 ByAnyChar one_delim(",");
885 // Found
886 EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
887 EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
888 EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
889 EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
890 // Not found
891 EXPECT_FALSE(IsFoundAt("", one_delim, -1));
892 EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
893 EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
894 EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
895 EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
896
897 ByAnyChar two_delims(",;");
898 // Found
899 EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
900 EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
901 EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
902 EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
903 EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
904 EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
905 EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
906 EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
907 EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
908 EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
909 // Not found
910 EXPECT_FALSE(IsFoundAt("", two_delims, -1));
911 EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
912 EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
913 EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
914 EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
915
916 // ByAnyChar behaves just like ByString when given a delimiter of empty
917 // string. That is, it always returns a zero-length absl::string_view
918 // referring to the item at position 1, not position 0.
919 ByAnyChar empty("");
920 EXPECT_FALSE(IsFoundAt("", empty, 0));
921 EXPECT_FALSE(IsFoundAt("a", empty, 0));
922 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
923 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
924 }
925
926 //
927 // Tests for ByLength
928 //
929
TEST(Delimiter,ByLength)930 TEST(Delimiter, ByLength) {
931 using absl::ByLength;
932
933 ByLength four_char_delim(4);
934
935 // Found
936 EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
937 EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
938 EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
939 // Not found
940 EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
941 EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
942 EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
943 EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
944 EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
945 }
946
TEST(Split,WorksWithLargeStrings)947 TEST(Split, WorksWithLargeStrings) {
948 #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
949 defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER)
950 constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte
951 #else
952 constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte
953 #endif
954 if (sizeof(size_t) > 4) {
955 std::string s(kSize, 'x');
956 s.back() = '-';
957 std::vector<absl::string_view> v = absl::StrSplit(s, '-');
958 EXPECT_EQ(2, v.size());
959 // The first element will contain 2G of 'x's.
960 // testing::StartsWith is too slow with a 2G string.
961 EXPECT_EQ('x', v[0][0]);
962 EXPECT_EQ('x', v[0][1]);
963 EXPECT_EQ('x', v[0][3]);
964 EXPECT_EQ("", v[1]);
965 }
966 }
967
TEST(SplitInternalTest,TypeTraits)968 TEST(SplitInternalTest, TypeTraits) {
969 EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
970 EXPECT_TRUE(
971 (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
972 EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
973 EXPECT_TRUE(
974 (absl::strings_internal::HasValueType<std::map<int, int>>::value));
975 EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
976 EXPECT_TRUE(
977 (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
978 EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
979 EXPECT_TRUE((absl::strings_internal::IsInitializerList<
980 std::initializer_list<int>>::value));
981 }
982
983 } // namespace
984