1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/str_split.h"
16
17 #include <deque>
18 #include <initializer_list>
19 #include <list>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <type_traits>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <vector>
27
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "absl/base/dynamic_annotations.h" // for RunningOnValgrind
31 #include "absl/base/macros.h"
32 #include "absl/strings/numbers.h"
33
34 namespace {
35
36 using ::testing::ElementsAre;
37 using ::testing::Pair;
38 using ::testing::UnorderedElementsAre;
39
TEST(Split,TraitsTest)40 TEST(Split, TraitsTest) {
41 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
42 "");
43 static_assert(
44 !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
45 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
46 std::vector<std::string>>::value,
47 "");
48 static_assert(
49 !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
50 "");
51 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
52 std::vector<absl::string_view>>::value,
53 "");
54 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
55 std::map<std::string, std::string>>::value,
56 "");
57 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
58 std::map<absl::string_view, absl::string_view>>::value,
59 "");
60 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
61 std::map<int, std::string>>::value,
62 "");
63 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
64 std::map<std::string, int>>::value,
65 "");
66 }
67
68 // This tests the overall split API, which is made up of the absl::StrSplit()
69 // function and the Delimiter objects in the absl:: namespace.
70 // This TEST macro is outside of any namespace to require full specification of
71 // namespaces just like callers will need to use.
TEST(Split,APIExamples)72 TEST(Split, APIExamples) {
73 {
74 // Passes std::string delimiter. Assumes the default of ByString.
75 std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
76 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
77
78 // Equivalent to...
79 using absl::ByString;
80 v = absl::StrSplit("a,b,c", ByString(","));
81 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
82
83 // Equivalent to...
84 EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
85 ElementsAre("a", "b", "c"));
86 }
87
88 {
89 // Same as above, but using a single character as the delimiter.
90 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
91 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
92
93 // Equivalent to...
94 using absl::ByChar;
95 v = absl::StrSplit("a,b,c", ByChar(','));
96 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
97 }
98
99 {
100 // Uses the Literal std::string "=>" as the delimiter.
101 const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
102 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
103 }
104
105 {
106 // The substrings are returned as string_views, eliminating copying.
107 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
108 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
109 }
110
111 {
112 // Leading and trailing empty substrings.
113 std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
114 EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
115 }
116
117 {
118 // Splits on a delimiter that is not found.
119 std::vector<std::string> v = absl::StrSplit("abc", ',');
120 EXPECT_THAT(v, ElementsAre("abc"));
121 }
122
123 {
124 // Splits the input std::string into individual characters by using an empty
125 // std::string as the delimiter.
126 std::vector<std::string> v = absl::StrSplit("abc", "");
127 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
128 }
129
130 {
131 // Splits std::string data with embedded NUL characters, using NUL as the
132 // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
133 // say that's the empty std::string when constructing the absl::string_view
134 // delimiter. Instead, a non-empty std::string containing NUL can be used as the
135 // delimiter.
136 std::string embedded_nulls("a\0b\0c", 5);
137 std::string null_delim("\0", 1);
138 std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
139 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
140 }
141
142 {
143 // Stores first two split strings as the members in a std::pair.
144 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
145 EXPECT_EQ("a", p.first);
146 EXPECT_EQ("b", p.second);
147 // "c" is omitted because std::pair can hold only two elements.
148 }
149
150 {
151 // Results stored in std::set<std::string>
152 std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
153 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
154 }
155
156 {
157 // Uses a non-const char* delimiter.
158 char a[] = ",";
159 char* d = a + 0;
160 std::vector<std::string> v = absl::StrSplit("a,b,c", d);
161 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
162 }
163
164 {
165 // Results split using either of , or ;
166 using absl::ByAnyChar;
167 std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
168 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
169 }
170
171 {
172 // Uses the SkipWhitespace predicate.
173 using absl::SkipWhitespace;
174 std::vector<std::string> v =
175 absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
176 EXPECT_THAT(v, ElementsAre(" a ", "b"));
177 }
178
179 {
180 // Uses the ByLength delimiter.
181 using absl::ByLength;
182 std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
183 EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
184 }
185
186 {
187 // Different forms of initialization / conversion.
188 std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
189 EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
190 std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
191 EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
192 auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
193 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
194 v3 = absl::StrSplit("a,b,c", ',');
195 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
196 }
197
198 {
199 // Results stored in a std::map.
200 std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
201 EXPECT_EQ(2, m.size());
202 EXPECT_EQ("3", m["a"]);
203 EXPECT_EQ("2", m["b"]);
204 }
205
206 {
207 // Results stored in a std::multimap.
208 std::multimap<std::string, std::string> m =
209 absl::StrSplit("a,1,b,2,a,3", ',');
210 EXPECT_EQ(3, m.size());
211 auto it = m.find("a");
212 EXPECT_EQ("1", it->second);
213 ++it;
214 EXPECT_EQ("3", it->second);
215 it = m.find("b");
216 EXPECT_EQ("2", it->second);
217 }
218
219 {
220 // Demonstrates use in a range-based for loop in C++11.
221 std::string s = "x,x,x,x,x,x,x";
222 for (absl::string_view sp : absl::StrSplit(s, ',')) {
223 EXPECT_EQ("x", sp);
224 }
225 }
226
227 {
228 // Demonstrates use with a Predicate in a range-based for loop.
229 using absl::SkipWhitespace;
230 std::string s = " ,x,,x,,x,x,x,,";
231 for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
232 EXPECT_EQ("x", sp);
233 }
234 }
235
236 {
237 // Demonstrates a "smart" split to std::map using two separate calls to
238 // absl::StrSplit. One call to split the records, and another call to split
239 // the keys and values. This also uses the Limit delimiter so that the
240 // std::string "a=b=c" will split to "a" -> "b=c".
241 std::map<std::string, std::string> m;
242 for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
243 m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
244 }
245 EXPECT_EQ("b=c", m.find("a")->second);
246 EXPECT_EQ("e", m.find("d")->second);
247 EXPECT_EQ("", m.find("f")->second);
248 EXPECT_EQ("", m.find("g")->second);
249 }
250 }
251
252 //
253 // Tests for SplitIterator
254 //
255
TEST(SplitIterator,Basics)256 TEST(SplitIterator, Basics) {
257 auto splitter = absl::StrSplit("a,b", ',');
258 auto it = splitter.begin();
259 auto end = splitter.end();
260
261 EXPECT_NE(it, end);
262 EXPECT_EQ("a", *it); // tests dereference
263 ++it; // tests preincrement
264 EXPECT_NE(it, end);
265 EXPECT_EQ("b",
266 std::string(it->data(), it->size())); // tests dereference as ptr
267 it++; // tests postincrement
268 EXPECT_EQ(it, end);
269 }
270
271 // Simple Predicate to skip a particular string.
272 class Skip {
273 public:
Skip(const std::string & s)274 explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)275 bool operator()(absl::string_view sp) { return sp != s_; }
276
277 private:
278 std::string s_;
279 };
280
TEST(SplitIterator,Predicate)281 TEST(SplitIterator, Predicate) {
282 auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
283 auto it = splitter.begin();
284 auto end = splitter.end();
285
286 EXPECT_NE(it, end);
287 EXPECT_EQ("a", *it); // tests dereference
288 ++it; // tests preincrement -- "b" should be skipped here.
289 EXPECT_NE(it, end);
290 EXPECT_EQ("c",
291 std::string(it->data(), it->size())); // tests dereference as ptr
292 it++; // tests postincrement
293 EXPECT_EQ(it, end);
294 }
295
TEST(SplitIterator,EdgeCases)296 TEST(SplitIterator, EdgeCases) {
297 // Expected input and output, assuming a delimiter of ','
298 struct {
299 std::string in;
300 std::vector<std::string> expect;
301 } specs[] = {
302 {"", {""}},
303 {"foo", {"foo"}},
304 {",", {"", ""}},
305 {",foo", {"", "foo"}},
306 {"foo,", {"foo", ""}},
307 {",foo,", {"", "foo", ""}},
308 {"foo,bar", {"foo", "bar"}},
309 };
310
311 for (const auto& spec : specs) {
312 SCOPED_TRACE(spec.in);
313 auto splitter = absl::StrSplit(spec.in, ',');
314 auto it = splitter.begin();
315 auto end = splitter.end();
316 for (const auto& expected : spec.expect) {
317 EXPECT_NE(it, end);
318 EXPECT_EQ(expected, *it++);
319 }
320 EXPECT_EQ(it, end);
321 }
322 }
323
TEST(Splitter,Const)324 TEST(Splitter, Const) {
325 const auto splitter = absl::StrSplit("a,b,c", ',');
326 EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
327 }
328
TEST(Split,EmptyAndNull)329 TEST(Split, EmptyAndNull) {
330 // Attention: Splitting a null absl::string_view is different than splitting
331 // an empty absl::string_view even though both string_views are considered
332 // equal. This behavior is likely surprising and undesirable. However, to
333 // maintain backward compatibility, there is a small "hack" in
334 // str_split_internal.h that preserves this behavior. If that behavior is ever
335 // changed/fixed, this test will need to be updated.
336 EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
337 EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
338 }
339
TEST(SplitIterator,EqualityAsEndCondition)340 TEST(SplitIterator, EqualityAsEndCondition) {
341 auto splitter = absl::StrSplit("a,b,c", ',');
342 auto it = splitter.begin();
343 auto it2 = it;
344
345 // Increments it2 twice to point to "c" in the input text.
346 ++it2;
347 ++it2;
348 EXPECT_EQ("c", *it2);
349
350 // This test uses a non-end SplitIterator as the terminating condition in a
351 // for loop. This relies on SplitIterator equality for non-end SplitIterators
352 // working correctly. At this point it2 points to "c", and we use that as the
353 // "end" condition in this test.
354 std::vector<absl::string_view> v;
355 for (; it != it2; ++it) {
356 v.push_back(*it);
357 }
358 EXPECT_THAT(v, ElementsAre("a", "b"));
359 }
360
361 //
362 // Tests for Splitter
363 //
364
TEST(Splitter,RangeIterators)365 TEST(Splitter, RangeIterators) {
366 auto splitter = absl::StrSplit("a,b,c", ',');
367 std::vector<absl::string_view> output;
368 for (const absl::string_view p : splitter) {
369 output.push_back(p);
370 }
371 EXPECT_THAT(output, ElementsAre("a", "b", "c"));
372 }
373
374 // Some template functions for use in testing conversion operators
375 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)376 void TestConversionOperator(const Splitter& splitter) {
377 ContainerType output = splitter;
378 EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
379 }
380
381 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)382 void TestMapConversionOperator(const Splitter& splitter) {
383 MapType m = splitter;
384 EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
385 }
386
387 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)388 void TestPairConversionOperator(const Splitter& splitter) {
389 std::pair<FirstType, SecondType> p = splitter;
390 EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
391 }
392
TEST(Splitter,ConversionOperator)393 TEST(Splitter, ConversionOperator) {
394 auto splitter = absl::StrSplit("a,b,c,d", ',');
395
396 TestConversionOperator<std::vector<absl::string_view>>(splitter);
397 TestConversionOperator<std::vector<std::string>>(splitter);
398 TestConversionOperator<std::list<absl::string_view>>(splitter);
399 TestConversionOperator<std::list<std::string>>(splitter);
400 TestConversionOperator<std::deque<absl::string_view>>(splitter);
401 TestConversionOperator<std::deque<std::string>>(splitter);
402 TestConversionOperator<std::set<absl::string_view>>(splitter);
403 TestConversionOperator<std::set<std::string>>(splitter);
404 TestConversionOperator<std::multiset<absl::string_view>>(splitter);
405 TestConversionOperator<std::multiset<std::string>>(splitter);
406 TestConversionOperator<std::unordered_set<std::string>>(splitter);
407
408 // Tests conversion to map-like objects.
409
410 TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
411 splitter);
412 TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
413 TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
414 TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
415 TestMapConversionOperator<
416 std::multimap<absl::string_view, absl::string_view>>(splitter);
417 TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
418 splitter);
419 TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
420 splitter);
421 TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
422 TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
423 splitter);
424
425 // Tests conversion to std::pair
426
427 TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
428 TestPairConversionOperator<absl::string_view, std::string>(splitter);
429 TestPairConversionOperator<std::string, absl::string_view>(splitter);
430 TestPairConversionOperator<std::string, std::string>(splitter);
431 }
432
433 // A few additional tests for conversion to std::pair. This conversion is
434 // different from others because a std::pair always has exactly two elements:
435 // .first and .second. The split has to work even when the split has
436 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)437 TEST(Splitter, ToPair) {
438 {
439 // Empty std::string
440 std::pair<std::string, std::string> p = absl::StrSplit("", ',');
441 EXPECT_EQ("", p.first);
442 EXPECT_EQ("", p.second);
443 }
444
445 {
446 // Only first
447 std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
448 EXPECT_EQ("a", p.first);
449 EXPECT_EQ("", p.second);
450 }
451
452 {
453 // Only second
454 std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
455 EXPECT_EQ("", p.first);
456 EXPECT_EQ("b", p.second);
457 }
458
459 {
460 // First and second.
461 std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
462 EXPECT_EQ("a", p.first);
463 EXPECT_EQ("b", p.second);
464 }
465
466 {
467 // First and second and then more stuff that will be ignored.
468 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
469 EXPECT_EQ("a", p.first);
470 EXPECT_EQ("b", p.second);
471 // "c" is omitted.
472 }
473 }
474
TEST(Splitter,Predicates)475 TEST(Splitter, Predicates) {
476 static const char kTestChars[] = ",a, ,b,";
477 using absl::AllowEmpty;
478 using absl::SkipEmpty;
479 using absl::SkipWhitespace;
480
481 {
482 // No predicate. Does not skip empties.
483 auto splitter = absl::StrSplit(kTestChars, ',');
484 std::vector<std::string> v = splitter;
485 EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
486 }
487
488 {
489 // Allows empty strings. Same behavior as no predicate at all.
490 auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
491 std::vector<std::string> v_allowempty = splitter;
492 EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
493
494 // Ensures AllowEmpty equals the behavior with no predicate.
495 auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
496 std::vector<std::string> v_nopredicate = splitter_nopredicate;
497 EXPECT_EQ(v_allowempty, v_nopredicate);
498 }
499
500 {
501 // Skips empty strings.
502 auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
503 std::vector<std::string> v = splitter;
504 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
505 }
506
507 {
508 // Skips empty and all-whitespace strings.
509 auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
510 std::vector<std::string> v = splitter;
511 EXPECT_THAT(v, ElementsAre("a", "b"));
512 }
513 }
514
515 //
516 // Tests for StrSplit()
517 //
518
TEST(Split,Basics)519 TEST(Split, Basics) {
520 {
521 // Doesn't really do anything useful because the return value is ignored,
522 // but it should work.
523 absl::StrSplit("a,b,c", ',');
524 }
525
526 {
527 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
528 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
529 }
530
531 {
532 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
533 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
534 }
535
536 {
537 // Ensures that assignment works. This requires a little extra work with
538 // C++11 because of overloads with initializer_list.
539 std::vector<std::string> v;
540 v = absl::StrSplit("a,b,c", ',');
541
542 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
543 std::map<std::string, std::string> m;
544 m = absl::StrSplit("a,b,c", ',');
545 EXPECT_EQ(2, m.size());
546 std::unordered_map<std::string, std::string> hm;
547 hm = absl::StrSplit("a,b,c", ',');
548 EXPECT_EQ(2, hm.size());
549 }
550 }
551
ReturnStringView()552 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()553 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()554 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
555
TEST(Split,AcceptsCertainTemporaries)556 TEST(Split, AcceptsCertainTemporaries) {
557 std::vector<std::string> v;
558 v = absl::StrSplit(ReturnStringView(), ' ');
559 EXPECT_THAT(v, ElementsAre("Hello", "World"));
560 v = absl::StrSplit(ReturnConstCharP(), ' ');
561 EXPECT_THAT(v, ElementsAre("Hello", "World"));
562 v = absl::StrSplit(ReturnCharP(), ' ');
563 EXPECT_THAT(v, ElementsAre("Hello", "World"));
564 }
565
TEST(Split,Temporary)566 TEST(Split, Temporary) {
567 // Use a std::string longer than the SSO length, so that when the temporary is
568 // destroyed, if the splitter keeps a reference to the std::string's contents,
569 // it'll reference freed memory instead of just dead on-stack memory.
570 const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
571 EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
572 << "Input should be larger than fits on the stack.";
573
574 // This happens more often in C++11 as part of a range-based for loop.
575 auto splitter = absl::StrSplit(std::string(input), ',');
576 std::string expected = "a";
577 for (absl::string_view letter : splitter) {
578 EXPECT_EQ(expected, letter);
579 ++expected[0];
580 }
581 EXPECT_EQ("v", expected);
582
583 // This happens more often in C++11 as part of a range-based for loop.
584 auto std_splitter = absl::StrSplit(std::string(input), ',');
585 expected = "a";
586 for (absl::string_view letter : std_splitter) {
587 EXPECT_EQ(expected, letter);
588 ++expected[0];
589 }
590 EXPECT_EQ("v", expected);
591 }
592
593 template <typename T>
CopyToHeap(const T & value)594 static std::unique_ptr<T> CopyToHeap(const T& value) {
595 return std::unique_ptr<T>(new T(value));
596 }
597
TEST(Split,LvalueCaptureIsCopyable)598 TEST(Split, LvalueCaptureIsCopyable) {
599 std::string input = "a,b";
600 auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
601 auto stack_splitter = *heap_splitter;
602 heap_splitter.reset();
603 std::vector<std::string> result = stack_splitter;
604 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
605 }
606
TEST(Split,TemporaryCaptureIsCopyable)607 TEST(Split, TemporaryCaptureIsCopyable) {
608 auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
609 auto stack_splitter = *heap_splitter;
610 heap_splitter.reset();
611 std::vector<std::string> result = stack_splitter;
612 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
613 }
614
TEST(Split,SplitterIsCopyableAndMoveable)615 TEST(Split, SplitterIsCopyableAndMoveable) {
616 auto a = absl::StrSplit("foo", '-');
617
618 // Ensures that the following expressions compile.
619 auto b = a; // Copy construct
620 auto c = std::move(a); // Move construct
621 b = c; // Copy assign
622 c = std::move(b); // Move assign
623
624 EXPECT_THAT(c, ElementsAre("foo"));
625 }
626
TEST(Split,StringDelimiter)627 TEST(Split, StringDelimiter) {
628 {
629 std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
630 EXPECT_THAT(v, ElementsAre("a", "b"));
631 }
632
633 {
634 std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
635 EXPECT_THAT(v, ElementsAre("a", "b"));
636 }
637
638 {
639 std::vector<absl::string_view> v =
640 absl::StrSplit("a,b", absl::string_view(","));
641 EXPECT_THAT(v, ElementsAre("a", "b"));
642 }
643 }
644
645 #if !defined(__cpp_char8_t)
646 #if defined(__clang__)
647 #pragma clang diagnostic push
648 #pragma clang diagnostic ignored "-Wc++2a-compat"
649 #endif
TEST(Split,UTF8)650 TEST(Split, UTF8) {
651 // Tests splitting utf8 strings and utf8 delimiters.
652 std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
653 {
654 // A utf8 input std::string with an ascii delimiter.
655 std::string to_split = "a," + utf8_string;
656 std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
657 EXPECT_THAT(v, ElementsAre("a", utf8_string));
658 }
659
660 {
661 // A utf8 input std::string and a utf8 delimiter.
662 std::string to_split = "a," + utf8_string + ",b";
663 std::string unicode_delimiter = "," + utf8_string + ",";
664 std::vector<absl::string_view> v =
665 absl::StrSplit(to_split, unicode_delimiter);
666 EXPECT_THAT(v, ElementsAre("a", "b"));
667 }
668
669 {
670 // A utf8 input std::string and ByAnyChar with ascii chars.
671 std::vector<absl::string_view> v =
672 absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
673 EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
674 }
675 }
676 #if defined(__clang__)
677 #pragma clang diagnostic pop
678 #endif
679 #endif // !defined(__cpp_char8_t)
680
TEST(Split,EmptyStringDelimiter)681 TEST(Split, EmptyStringDelimiter) {
682 {
683 std::vector<std::string> v = absl::StrSplit("", "");
684 EXPECT_THAT(v, ElementsAre(""));
685 }
686
687 {
688 std::vector<std::string> v = absl::StrSplit("a", "");
689 EXPECT_THAT(v, ElementsAre("a"));
690 }
691
692 {
693 std::vector<std::string> v = absl::StrSplit("ab", "");
694 EXPECT_THAT(v, ElementsAre("a", "b"));
695 }
696
697 {
698 std::vector<std::string> v = absl::StrSplit("a b", "");
699 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
700 }
701 }
702
TEST(Split,SubstrDelimiter)703 TEST(Split, SubstrDelimiter) {
704 std::vector<absl::string_view> results;
705 absl::string_view delim("//");
706
707 results = absl::StrSplit("", delim);
708 EXPECT_THAT(results, ElementsAre(""));
709
710 results = absl::StrSplit("//", delim);
711 EXPECT_THAT(results, ElementsAre("", ""));
712
713 results = absl::StrSplit("ab", delim);
714 EXPECT_THAT(results, ElementsAre("ab"));
715
716 results = absl::StrSplit("ab//", delim);
717 EXPECT_THAT(results, ElementsAre("ab", ""));
718
719 results = absl::StrSplit("ab/", delim);
720 EXPECT_THAT(results, ElementsAre("ab/"));
721
722 results = absl::StrSplit("a/b", delim);
723 EXPECT_THAT(results, ElementsAre("a/b"));
724
725 results = absl::StrSplit("a//b", delim);
726 EXPECT_THAT(results, ElementsAre("a", "b"));
727
728 results = absl::StrSplit("a///b", delim);
729 EXPECT_THAT(results, ElementsAre("a", "/b"));
730
731 results = absl::StrSplit("a////b", delim);
732 EXPECT_THAT(results, ElementsAre("a", "", "b"));
733 }
734
TEST(Split,EmptyResults)735 TEST(Split, EmptyResults) {
736 std::vector<absl::string_view> results;
737
738 results = absl::StrSplit("", '#');
739 EXPECT_THAT(results, ElementsAre(""));
740
741 results = absl::StrSplit("#", '#');
742 EXPECT_THAT(results, ElementsAre("", ""));
743
744 results = absl::StrSplit("#cd", '#');
745 EXPECT_THAT(results, ElementsAre("", "cd"));
746
747 results = absl::StrSplit("ab#cd#", '#');
748 EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
749
750 results = absl::StrSplit("ab##cd", '#');
751 EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
752
753 results = absl::StrSplit("ab##", '#');
754 EXPECT_THAT(results, ElementsAre("ab", "", ""));
755
756 results = absl::StrSplit("ab#ab#", '#');
757 EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
758
759 results = absl::StrSplit("aaaa", 'a');
760 EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
761
762 results = absl::StrSplit("", '#', absl::SkipEmpty());
763 EXPECT_THAT(results, ElementsAre());
764 }
765
766 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)767 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
768 size_t starting_pos, int expected_pos) {
769 absl::string_view found = d.Find(text, starting_pos);
770 return found.data() != text.data() + text.size() &&
771 expected_pos == found.data() - text.data();
772 }
773
774 // Helper function for testing Delimiter objects. Returns true if the given
775 // Delimiter is found in the given string at the given position. This function
776 // tests two cases:
777 // 1. The actual text given, staring at position 0
778 // 2. The text given with leading padding that should be ignored
779 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)780 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
781 const std::string leading_text = ",x,y,z,";
782 return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
783 IsFoundAtStartingPos(leading_text + std::string(text), d,
784 leading_text.length(),
785 expected_pos + leading_text.length());
786 }
787
788 //
789 // Tests for ByString
790 //
791
792 // Tests using any delimiter that represents a single comma.
793 template <typename Delimiter>
TestComma(Delimiter d)794 void TestComma(Delimiter d) {
795 EXPECT_TRUE(IsFoundAt(",", d, 0));
796 EXPECT_TRUE(IsFoundAt("a,", d, 1));
797 EXPECT_TRUE(IsFoundAt(",b", d, 0));
798 EXPECT_TRUE(IsFoundAt("a,b", d, 1));
799 EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
800 EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
801 EXPECT_FALSE(IsFoundAt("", d, -1));
802 EXPECT_FALSE(IsFoundAt(" ", d, -1));
803 EXPECT_FALSE(IsFoundAt("a", d, -1));
804 EXPECT_FALSE(IsFoundAt("a b c", d, -1));
805 EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
806 EXPECT_FALSE(IsFoundAt(";", d, -1));
807 }
808
TEST(Delimiter,ByString)809 TEST(Delimiter, ByString) {
810 using absl::ByString;
811 TestComma(ByString(","));
812
813 // Works as named variable.
814 ByString comma_string(",");
815 TestComma(comma_string);
816
817 // The first occurrence of empty std::string ("") in a std::string is at position 0.
818 // There is a test below that demonstrates this for absl::string_view::find().
819 // If the ByString delimiter returned position 0 for this, there would
820 // be an infinite loop in the SplitIterator code. To avoid this, empty std::string
821 // is a special case in that it always returns the item at position 1.
822 absl::string_view abc("abc");
823 EXPECT_EQ(0, abc.find("")); // "" is found at position 0
824 ByString empty("");
825 EXPECT_FALSE(IsFoundAt("", empty, 0));
826 EXPECT_FALSE(IsFoundAt("a", empty, 0));
827 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
828 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
829 }
830
TEST(Split,ByChar)831 TEST(Split, ByChar) {
832 using absl::ByChar;
833 TestComma(ByChar(','));
834
835 // Works as named variable.
836 ByChar comma_char(',');
837 TestComma(comma_char);
838 }
839
840 //
841 // Tests for ByAnyChar
842 //
843
TEST(Delimiter,ByAnyChar)844 TEST(Delimiter, ByAnyChar) {
845 using absl::ByAnyChar;
846 ByAnyChar one_delim(",");
847 // Found
848 EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
849 EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
850 EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
851 EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
852 // Not found
853 EXPECT_FALSE(IsFoundAt("", one_delim, -1));
854 EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
855 EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
856 EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
857 EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
858
859 ByAnyChar two_delims(",;");
860 // Found
861 EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
862 EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
863 EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
864 EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
865 EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
866 EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
867 EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
868 EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
869 EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
870 EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
871 // Not found
872 EXPECT_FALSE(IsFoundAt("", two_delims, -1));
873 EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
874 EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
875 EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
876 EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
877
878 // ByAnyChar behaves just like ByString when given a delimiter of empty
879 // std::string. That is, it always returns a zero-length absl::string_view
880 // referring to the item at position 1, not position 0.
881 ByAnyChar empty("");
882 EXPECT_FALSE(IsFoundAt("", empty, 0));
883 EXPECT_FALSE(IsFoundAt("a", empty, 0));
884 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
885 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
886 }
887
888 //
889 // Tests for ByLength
890 //
891
TEST(Delimiter,ByLength)892 TEST(Delimiter, ByLength) {
893 using absl::ByLength;
894
895 ByLength four_char_delim(4);
896
897 // Found
898 EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
899 EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
900 EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
901 // Not found
902 EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
903 EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
904 EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
905 EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
906 EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
907 }
908
TEST(Split,WorksWithLargeStrings)909 TEST(Split, WorksWithLargeStrings) {
910 if (sizeof(size_t) > 4) {
911 std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte
912 s.back() = '-';
913 std::vector<absl::string_view> v = absl::StrSplit(s, '-');
914 EXPECT_EQ(2, v.size());
915 // The first element will contain 2G of 'x's.
916 // testing::StartsWith is too slow with a 2G std::string.
917 EXPECT_EQ('x', v[0][0]);
918 EXPECT_EQ('x', v[0][1]);
919 EXPECT_EQ('x', v[0][3]);
920 EXPECT_EQ("", v[1]);
921 }
922 }
923
TEST(SplitInternalTest,TypeTraits)924 TEST(SplitInternalTest, TypeTraits) {
925 EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
926 EXPECT_TRUE(
927 (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
928 EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
929 EXPECT_TRUE(
930 (absl::strings_internal::HasValueType<std::map<int, int>>::value));
931 EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
932 EXPECT_TRUE(
933 (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
934 EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
935 EXPECT_TRUE((absl::strings_internal::IsInitializerList<
936 std::initializer_list<int>>::value));
937 }
938
939 } // namespace
940