• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/str_split.h"
16 
17 #include <array>
18 #include <cstddef>
19 #include <cstdint>
20 #include <deque>
21 #include <initializer_list>
22 #include <list>
23 #include <map>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31 
32 #include "gmock/gmock.h"
33 #include "gtest/gtest.h"
34 #include "absl/base/macros.h"
35 #include "absl/container/btree_map.h"
36 #include "absl/container/btree_set.h"
37 #include "absl/container/flat_hash_map.h"
38 #include "absl/container/node_hash_map.h"
39 #include "absl/strings/string_view.h"
40 
41 namespace {
42 
43 using ::testing::ElementsAre;
44 using ::testing::IsEmpty;
45 using ::testing::Pair;
46 using ::testing::UnorderedElementsAre;
47 
TEST(Split,TraitsTest)48 TEST(Split, TraitsTest) {
49   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
50                 "");
51   static_assert(
52       !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
53   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
54                     std::vector<std::string>>::value,
55                 "");
56   static_assert(
57       !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
58       "");
59   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
60                     std::vector<absl::string_view>>::value,
61                 "");
62   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
63                     std::map<std::string, std::string>>::value,
64                 "");
65   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
66                     std::map<absl::string_view, absl::string_view>>::value,
67                 "");
68   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
69                     std::map<int, std::string>>::value,
70                 "");
71   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
72                     std::map<std::string, int>>::value,
73                 "");
74 }
75 
76 // This tests the overall split API, which is made up of the absl::StrSplit()
77 // function and the Delimiter objects in the absl:: namespace.
78 // This TEST macro is outside of any namespace to require full specification of
79 // namespaces just like callers will need to use.
TEST(Split,APIExamples)80 TEST(Split, APIExamples) {
81   {
82     // Passes string delimiter. Assumes the default of ByString.
83     std::vector<std::string> v = absl::StrSplit("a,b,c", ",");  // NOLINT
84     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
85 
86     // Equivalent to...
87     using absl::ByString;
88     v = absl::StrSplit("a,b,c", ByString(","));
89     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
90 
91     // Equivalent to...
92     EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
93                 ElementsAre("a", "b", "c"));
94   }
95 
96   {
97     // Same as above, but using a single character as the delimiter.
98     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
99     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
100 
101     // Equivalent to...
102     using absl::ByChar;
103     v = absl::StrSplit("a,b,c", ByChar(','));
104     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
105   }
106 
107   {
108     // Uses the Literal string "=>" as the delimiter.
109     const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
110     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
111   }
112 
113   {
114     // The substrings are returned as string_views, eliminating copying.
115     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
116     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
117   }
118 
119   {
120     // Leading and trailing empty substrings.
121     std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
122     EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
123   }
124 
125   {
126     // Splits on a delimiter that is not found.
127     std::vector<std::string> v = absl::StrSplit("abc", ',');
128     EXPECT_THAT(v, ElementsAre("abc"));
129   }
130 
131   {
132     // Splits the input string into individual characters by using an empty
133     // string as the delimiter.
134     std::vector<std::string> v = absl::StrSplit("abc", "");
135     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
136   }
137 
138   {
139     // Splits string data with embedded NUL characters, using NUL as the
140     // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
141     // say that's the empty string when constructing the absl::string_view
142     // delimiter. Instead, a non-empty string containing NUL can be used as the
143     // delimiter.
144     std::string embedded_nulls("a\0b\0c", 5);
145     std::string null_delim("\0", 1);
146     std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
147     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
148   }
149 
150   {
151     // Stores first two split strings as the members in a std::pair.
152     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
153     EXPECT_EQ("a", p.first);
154     EXPECT_EQ("b", p.second);
155     // "c" is omitted because std::pair can hold only two elements.
156   }
157 
158   {
159     // Results stored in std::set<std::string>
160     std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
161     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
162   }
163 
164   {
165     // Uses a non-const char* delimiter.
166     char a[] = ",";
167     char* d = a + 0;
168     std::vector<std::string> v = absl::StrSplit("a,b,c", d);
169     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
170   }
171 
172   {
173     // Results split using either of , or ;
174     using absl::ByAnyChar;
175     std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
176     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
177   }
178 
179   {
180     // Uses the SkipWhitespace predicate.
181     using absl::SkipWhitespace;
182     std::vector<std::string> v =
183         absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
184     EXPECT_THAT(v, ElementsAre(" a ", "b"));
185   }
186 
187   {
188     // Uses the ByLength delimiter.
189     using absl::ByLength;
190     std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
191     EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
192   }
193 
194   {
195     // Different forms of initialization / conversion.
196     std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
197     EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
198     std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
199     EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
200     auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
201     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
202     v3 = absl::StrSplit("a,b,c", ',');
203     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
204   }
205 
206   {
207     // Results stored in a std::map.
208     std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
209     EXPECT_EQ(2, m.size());
210     EXPECT_EQ("3", m["a"]);
211     EXPECT_EQ("2", m["b"]);
212   }
213 
214   {
215     // Results stored in a std::multimap.
216     std::multimap<std::string, std::string> m =
217         absl::StrSplit("a,1,b,2,a,3", ',');
218     EXPECT_EQ(3, m.size());
219     auto it = m.find("a");
220     EXPECT_EQ("1", it->second);
221     ++it;
222     EXPECT_EQ("3", it->second);
223     it = m.find("b");
224     EXPECT_EQ("2", it->second);
225   }
226 
227   {
228     // Demonstrates use in a range-based for loop in C++11.
229     std::string s = "x,x,x,x,x,x,x";
230     for (absl::string_view sp : absl::StrSplit(s, ',')) {
231       EXPECT_EQ("x", sp);
232     }
233   }
234 
235   {
236     // Demonstrates use with a Predicate in a range-based for loop.
237     using absl::SkipWhitespace;
238     std::string s = " ,x,,x,,x,x,x,,";
239     for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
240       EXPECT_EQ("x", sp);
241     }
242   }
243 
244   {
245     // Demonstrates a "smart" split to std::map using two separate calls to
246     // absl::StrSplit. One call to split the records, and another call to split
247     // the keys and values. This also uses the Limit delimiter so that the
248     // std::string "a=b=c" will split to "a" -> "b=c".
249     std::map<std::string, std::string> m;
250     for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
251       m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
252     }
253     EXPECT_EQ("b=c", m.find("a")->second);
254     EXPECT_EQ("e", m.find("d")->second);
255     EXPECT_EQ("", m.find("f")->second);
256     EXPECT_EQ("", m.find("g")->second);
257   }
258 }
259 
260 //
261 // Tests for SplitIterator
262 //
263 
TEST(SplitIterator,Basics)264 TEST(SplitIterator, Basics) {
265   auto splitter = absl::StrSplit("a,b", ',');
266   auto it = splitter.begin();
267   auto end = splitter.end();
268 
269   EXPECT_NE(it, end);
270   EXPECT_EQ("a", *it);  // tests dereference
271   ++it;                 // tests preincrement
272   EXPECT_NE(it, end);
273   EXPECT_EQ("b",
274             std::string(it->data(), it->size()));  // tests dereference as ptr
275   it++;                                            // tests postincrement
276   EXPECT_EQ(it, end);
277 }
278 
279 // Simple Predicate to skip a particular string.
280 class Skip {
281  public:
Skip(const std::string & s)282   explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)283   bool operator()(absl::string_view sp) { return sp != s_; }
284 
285  private:
286   std::string s_;
287 };
288 
TEST(SplitIterator,Predicate)289 TEST(SplitIterator, Predicate) {
290   auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
291   auto it = splitter.begin();
292   auto end = splitter.end();
293 
294   EXPECT_NE(it, end);
295   EXPECT_EQ("a", *it);  // tests dereference
296   ++it;                 // tests preincrement -- "b" should be skipped here.
297   EXPECT_NE(it, end);
298   EXPECT_EQ("c",
299             std::string(it->data(), it->size()));  // tests dereference as ptr
300   it++;                                            // tests postincrement
301   EXPECT_EQ(it, end);
302 }
303 
TEST(SplitIterator,EdgeCases)304 TEST(SplitIterator, EdgeCases) {
305   // Expected input and output, assuming a delimiter of ','
306   struct {
307     std::string in;
308     std::vector<std::string> expect;
309   } specs[] = {
310       {"", {""}},
311       {"foo", {"foo"}},
312       {",", {"", ""}},
313       {",foo", {"", "foo"}},
314       {"foo,", {"foo", ""}},
315       {",foo,", {"", "foo", ""}},
316       {"foo,bar", {"foo", "bar"}},
317   };
318 
319   for (const auto& spec : specs) {
320     SCOPED_TRACE(spec.in);
321     auto splitter = absl::StrSplit(spec.in, ',');
322     auto it = splitter.begin();
323     auto end = splitter.end();
324     for (const auto& expected : spec.expect) {
325       EXPECT_NE(it, end);
326       EXPECT_EQ(expected, *it++);
327     }
328     EXPECT_EQ(it, end);
329   }
330 }
331 
TEST(Splitter,Const)332 TEST(Splitter, Const) {
333   const auto splitter = absl::StrSplit("a,b,c", ',');
334   EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
335 }
336 
TEST(Split,EmptyAndNull)337 TEST(Split, EmptyAndNull) {
338   // Attention: Splitting a null absl::string_view is different than splitting
339   // an empty absl::string_view even though both string_views are considered
340   // equal. This behavior is likely surprising and undesirable. However, to
341   // maintain backward compatibility, there is a small "hack" in
342   // str_split_internal.h that preserves this behavior. If that behavior is ever
343   // changed/fixed, this test will need to be updated.
344   EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
345   EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
346 }
347 
TEST(SplitIterator,EqualityAsEndCondition)348 TEST(SplitIterator, EqualityAsEndCondition) {
349   auto splitter = absl::StrSplit("a,b,c", ',');
350   auto it = splitter.begin();
351   auto it2 = it;
352 
353   // Increments it2 twice to point to "c" in the input text.
354   ++it2;
355   ++it2;
356   EXPECT_EQ("c", *it2);
357 
358   // This test uses a non-end SplitIterator as the terminating condition in a
359   // for loop. This relies on SplitIterator equality for non-end SplitIterators
360   // working correctly. At this point it2 points to "c", and we use that as the
361   // "end" condition in this test.
362   std::vector<absl::string_view> v;
363   for (; it != it2; ++it) {
364     v.push_back(*it);
365   }
366   EXPECT_THAT(v, ElementsAre("a", "b"));
367 }
368 
369 //
370 // Tests for Splitter
371 //
372 
TEST(Splitter,RangeIterators)373 TEST(Splitter, RangeIterators) {
374   auto splitter = absl::StrSplit("a,b,c", ',');
375   std::vector<absl::string_view> output;
376   for (absl::string_view p : splitter) {
377     output.push_back(p);
378   }
379   EXPECT_THAT(output, ElementsAre("a", "b", "c"));
380 }
381 
382 // Some template functions for use in testing conversion operators
383 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)384 void TestConversionOperator(const Splitter& splitter) {
385   ContainerType output = splitter;
386   EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
387 }
388 
389 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)390 void TestMapConversionOperator(const Splitter& splitter) {
391   MapType m = splitter;
392   EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
393 }
394 
395 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)396 void TestPairConversionOperator(const Splitter& splitter) {
397   std::pair<FirstType, SecondType> p = splitter;
398   EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
399 }
400 
401 template <typename StringType, typename Splitter>
TestArrayConversionOperator(const Splitter & splitter)402 void TestArrayConversionOperator(const Splitter& splitter) {
403   std::array<StringType, 2> a = splitter;
404   EXPECT_THAT(a, ElementsAre("a", "b"));
405 }
406 
TEST(Splitter,ConversionOperator)407 TEST(Splitter, ConversionOperator) {
408   auto splitter = absl::StrSplit("a,b,c,d", ',');
409 
410   TestConversionOperator<std::vector<absl::string_view>>(splitter);
411   TestConversionOperator<std::vector<std::string>>(splitter);
412   TestConversionOperator<std::list<absl::string_view>>(splitter);
413   TestConversionOperator<std::list<std::string>>(splitter);
414   TestConversionOperator<std::deque<absl::string_view>>(splitter);
415   TestConversionOperator<std::deque<std::string>>(splitter);
416   TestConversionOperator<std::set<absl::string_view>>(splitter);
417   TestConversionOperator<std::set<std::string>>(splitter);
418   TestConversionOperator<std::multiset<absl::string_view>>(splitter);
419   TestConversionOperator<std::multiset<std::string>>(splitter);
420   TestConversionOperator<absl::btree_set<absl::string_view>>(splitter);
421   TestConversionOperator<absl::btree_set<std::string>>(splitter);
422   TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter);
423   TestConversionOperator<absl::btree_multiset<std::string>>(splitter);
424   TestConversionOperator<std::unordered_set<std::string>>(splitter);
425 
426   // Tests conversion to map-like objects.
427 
428   TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
429       splitter);
430   TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
431   TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
432   TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
433   TestMapConversionOperator<
434       std::multimap<absl::string_view, absl::string_view>>(splitter);
435   TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
436       splitter);
437   TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
438       splitter);
439   TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
440   TestMapConversionOperator<
441       absl::btree_map<absl::string_view, absl::string_view>>(splitter);
442   TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>(
443       splitter);
444   TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>(
445       splitter);
446   TestMapConversionOperator<absl::btree_map<std::string, std::string>>(
447       splitter);
448   TestMapConversionOperator<
449       absl::btree_multimap<absl::string_view, absl::string_view>>(splitter);
450   TestMapConversionOperator<
451       absl::btree_multimap<absl::string_view, std::string>>(splitter);
452   TestMapConversionOperator<
453       absl::btree_multimap<std::string, absl::string_view>>(splitter);
454   TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>(
455       splitter);
456   TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
457       splitter);
458   TestMapConversionOperator<
459       absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
460   TestMapConversionOperator<
461       absl::node_hash_map<absl::string_view, std::string>>(splitter);
462   TestMapConversionOperator<
463       absl::node_hash_map<std::string, absl::string_view>>(splitter);
464   TestMapConversionOperator<
465       absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
466   TestMapConversionOperator<
467       absl::flat_hash_map<absl::string_view, std::string>>(splitter);
468   TestMapConversionOperator<
469       absl::flat_hash_map<std::string, absl::string_view>>(splitter);
470 
471   // Tests conversion to std::pair
472 
473   TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
474   TestPairConversionOperator<absl::string_view, std::string>(splitter);
475   TestPairConversionOperator<std::string, absl::string_view>(splitter);
476   TestPairConversionOperator<std::string, std::string>(splitter);
477 
478   // Tests conversion to std::array
479   TestArrayConversionOperator<std::string>(splitter);
480   TestArrayConversionOperator<absl::string_view>(splitter);
481 }
482 
483 // A few additional tests for conversion to std::pair. This conversion is
484 // different from others because a std::pair always has exactly two elements:
485 // .first and .second. The split has to work even when the split has
486 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)487 TEST(Splitter, ToPair) {
488   {
489     // Empty string
490     std::pair<std::string, std::string> p = absl::StrSplit("", ',');
491     EXPECT_EQ("", p.first);
492     EXPECT_EQ("", p.second);
493   }
494 
495   {
496     // Only first
497     std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
498     EXPECT_EQ("a", p.first);
499     EXPECT_EQ("", p.second);
500   }
501 
502   {
503     // Only second
504     std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
505     EXPECT_EQ("", p.first);
506     EXPECT_EQ("b", p.second);
507   }
508 
509   {
510     // First and second.
511     std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
512     EXPECT_EQ("a", p.first);
513     EXPECT_EQ("b", p.second);
514   }
515 
516   {
517     // First and second and then more stuff that will be ignored.
518     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
519     EXPECT_EQ("a", p.first);
520     EXPECT_EQ("b", p.second);
521     // "c" is omitted.
522   }
523 }
524 
525 // std::array tests similar to std::pair tests above, testing fewer, exactly,
526 // or more elements than the array size.
TEST(Splitter,ToArray)527 TEST(Splitter, ToArray) {
528   {
529     // Empty string
530     std::array<std::string, 2> p = absl::StrSplit("", ',');
531     EXPECT_THAT(p, ElementsAre("", ""));
532   }
533 
534   {
535     // Only first
536     std::array<std::string, 2> p = absl::StrSplit("a", ',');
537     EXPECT_THAT(p, ElementsAre("a", ""));
538   }
539 
540   {
541     // Only second
542     std::array<std::string, 2> p = absl::StrSplit(",b", ',');
543     EXPECT_THAT(p, ElementsAre("", "b"));
544   }
545 
546   {
547     // First and second.
548     std::array<std::string, 2> p = absl::StrSplit("a,b", ',');
549     EXPECT_THAT(p, ElementsAre("a", "b"));
550   }
551 
552   {
553     // First and second and then more stuff that will be ignored.
554     std::array<std::string, 2> p = absl::StrSplit("a,b,c", ',');
555     EXPECT_THAT(p, ElementsAre("a", "b"));
556     // "c" is omitted.
557   }
558 }
559 
TEST(Splitter,Predicates)560 TEST(Splitter, Predicates) {
561   static const char kTestChars[] = ",a, ,b,";
562   using absl::AllowEmpty;
563   using absl::SkipEmpty;
564   using absl::SkipWhitespace;
565 
566   {
567     // No predicate. Does not skip empties.
568     auto splitter = absl::StrSplit(kTestChars, ',');
569     std::vector<std::string> v = splitter;
570     EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
571   }
572 
573   {
574     // Allows empty strings. Same behavior as no predicate at all.
575     auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
576     std::vector<std::string> v_allowempty = splitter;
577     EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
578 
579     // Ensures AllowEmpty equals the behavior with no predicate.
580     auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
581     std::vector<std::string> v_nopredicate = splitter_nopredicate;
582     EXPECT_EQ(v_allowempty, v_nopredicate);
583   }
584 
585   {
586     // Skips empty strings.
587     auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
588     std::vector<std::string> v = splitter;
589     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
590   }
591 
592   {
593     // Skips empty and all-whitespace strings.
594     auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
595     std::vector<std::string> v = splitter;
596     EXPECT_THAT(v, ElementsAre("a", "b"));
597   }
598 }
599 
600 //
601 // Tests for StrSplit()
602 //
603 
TEST(Split,Basics)604 TEST(Split, Basics) {
605   {
606     // Doesn't really do anything useful because the return value is ignored,
607     // but it should work.
608     absl::StrSplit("a,b,c", ',');
609   }
610 
611   {
612     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
613     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
614   }
615 
616   {
617     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
618     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
619   }
620 
621   {
622     // Ensures that assignment works. This requires a little extra work with
623     // C++11 because of overloads with initializer_list.
624     std::vector<std::string> v;
625     v = absl::StrSplit("a,b,c", ',');
626 
627     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
628     std::map<std::string, std::string> m;
629     m = absl::StrSplit("a,b,c", ',');
630     EXPECT_EQ(2, m.size());
631     std::unordered_map<std::string, std::string> hm;
632     hm = absl::StrSplit("a,b,c", ',');
633     EXPECT_EQ(2, hm.size());
634   }
635 }
636 
ReturnStringView()637 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()638 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()639 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
640 
TEST(Split,AcceptsCertainTemporaries)641 TEST(Split, AcceptsCertainTemporaries) {
642   std::vector<std::string> v;
643   v = absl::StrSplit(ReturnStringView(), ' ');
644   EXPECT_THAT(v, ElementsAre("Hello", "World"));
645   v = absl::StrSplit(ReturnConstCharP(), ' ');
646   EXPECT_THAT(v, ElementsAre("Hello", "World"));
647   v = absl::StrSplit(ReturnCharP(), ' ');
648   EXPECT_THAT(v, ElementsAre("Hello", "World"));
649 }
650 
TEST(Split,Temporary)651 TEST(Split, Temporary) {
652   // Use a std::string longer than the SSO length, so that when the temporary is
653   // destroyed, if the splitter keeps a reference to the string's contents,
654   // it'll reference freed memory instead of just dead on-stack memory.
655   const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
656   EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
657       << "Input should be larger than fits on the stack.";
658 
659   // This happens more often in C++11 as part of a range-based for loop.
660   auto splitter = absl::StrSplit(std::string(input), ',');
661   std::string expected = "a";
662   for (absl::string_view letter : splitter) {
663     EXPECT_EQ(expected, letter);
664     ++expected[0];
665   }
666   EXPECT_EQ("v", expected);
667 
668   // This happens more often in C++11 as part of a range-based for loop.
669   auto std_splitter = absl::StrSplit(std::string(input), ',');
670   expected = "a";
671   for (absl::string_view letter : std_splitter) {
672     EXPECT_EQ(expected, letter);
673     ++expected[0];
674   }
675   EXPECT_EQ("v", expected);
676 }
677 
678 template <typename T>
CopyToHeap(const T & value)679 static std::unique_ptr<T> CopyToHeap(const T& value) {
680   return std::unique_ptr<T>(new T(value));
681 }
682 
TEST(Split,LvalueCaptureIsCopyable)683 TEST(Split, LvalueCaptureIsCopyable) {
684   std::string input = "a,b";
685   auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
686   auto stack_splitter = *heap_splitter;
687   heap_splitter.reset();
688   std::vector<std::string> result = stack_splitter;
689   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
690 }
691 
TEST(Split,TemporaryCaptureIsCopyable)692 TEST(Split, TemporaryCaptureIsCopyable) {
693   auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
694   auto stack_splitter = *heap_splitter;
695   heap_splitter.reset();
696   std::vector<std::string> result = stack_splitter;
697   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
698 }
699 
TEST(Split,SplitterIsCopyableAndMoveable)700 TEST(Split, SplitterIsCopyableAndMoveable) {
701   auto a = absl::StrSplit("foo", '-');
702 
703   // Ensures that the following expressions compile.
704   auto b = a;             // Copy construct
705   auto c = std::move(a);  // Move construct
706   b = c;                  // Copy assign
707   c = std::move(b);       // Move assign
708 
709   EXPECT_THAT(c, ElementsAre("foo"));
710 }
711 
TEST(Split,StringDelimiter)712 TEST(Split, StringDelimiter) {
713   {
714     std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
715     EXPECT_THAT(v, ElementsAre("a", "b"));
716   }
717 
718   {
719     std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
720     EXPECT_THAT(v, ElementsAre("a", "b"));
721   }
722 
723   {
724     std::vector<absl::string_view> v =
725         absl::StrSplit("a,b", absl::string_view(","));
726     EXPECT_THAT(v, ElementsAre("a", "b"));
727   }
728 }
729 
730 #if !defined(__cpp_char8_t)
731 #if defined(__clang__)
732 #pragma clang diagnostic push
733 #pragma clang diagnostic ignored "-Wc++2a-compat"
734 #endif
TEST(Split,UTF8)735 TEST(Split, UTF8) {
736   // Tests splitting utf8 strings and utf8 delimiters.
737   std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
738   {
739     // A utf8 input string with an ascii delimiter.
740     std::string to_split = "a," + utf8_string;
741     std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
742     EXPECT_THAT(v, ElementsAre("a", utf8_string));
743   }
744 
745   {
746     // A utf8 input string and a utf8 delimiter.
747     std::string to_split = "a," + utf8_string + ",b";
748     std::string unicode_delimiter = "," + utf8_string + ",";
749     std::vector<absl::string_view> v =
750         absl::StrSplit(to_split, unicode_delimiter);
751     EXPECT_THAT(v, ElementsAre("a", "b"));
752   }
753 
754   {
755     // A utf8 input string and ByAnyChar with ascii chars.
756     std::vector<absl::string_view> v =
757         absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
758     EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
759   }
760 }
761 #if defined(__clang__)
762 #pragma clang diagnostic pop
763 #endif
764 #endif  // !defined(__cpp_char8_t)
765 
TEST(Split,EmptyStringDelimiter)766 TEST(Split, EmptyStringDelimiter) {
767   {
768     std::vector<std::string> v = absl::StrSplit("", "");
769     EXPECT_THAT(v, ElementsAre(""));
770   }
771 
772   {
773     std::vector<std::string> v = absl::StrSplit("a", "");
774     EXPECT_THAT(v, ElementsAre("a"));
775   }
776 
777   {
778     std::vector<std::string> v = absl::StrSplit("ab", "");
779     EXPECT_THAT(v, ElementsAre("a", "b"));
780   }
781 
782   {
783     std::vector<std::string> v = absl::StrSplit("a b", "");
784     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
785   }
786 }
787 
TEST(Split,SubstrDelimiter)788 TEST(Split, SubstrDelimiter) {
789   std::vector<absl::string_view> results;
790   absl::string_view delim("//");
791 
792   results = absl::StrSplit("", delim);
793   EXPECT_THAT(results, ElementsAre(""));
794 
795   results = absl::StrSplit("//", delim);
796   EXPECT_THAT(results, ElementsAre("", ""));
797 
798   results = absl::StrSplit("ab", delim);
799   EXPECT_THAT(results, ElementsAre("ab"));
800 
801   results = absl::StrSplit("ab//", delim);
802   EXPECT_THAT(results, ElementsAre("ab", ""));
803 
804   results = absl::StrSplit("ab/", delim);
805   EXPECT_THAT(results, ElementsAre("ab/"));
806 
807   results = absl::StrSplit("a/b", delim);
808   EXPECT_THAT(results, ElementsAre("a/b"));
809 
810   results = absl::StrSplit("a//b", delim);
811   EXPECT_THAT(results, ElementsAre("a", "b"));
812 
813   results = absl::StrSplit("a///b", delim);
814   EXPECT_THAT(results, ElementsAre("a", "/b"));
815 
816   results = absl::StrSplit("a////b", delim);
817   EXPECT_THAT(results, ElementsAre("a", "", "b"));
818 }
819 
TEST(Split,EmptyResults)820 TEST(Split, EmptyResults) {
821   std::vector<absl::string_view> results;
822 
823   results = absl::StrSplit("", '#');
824   EXPECT_THAT(results, ElementsAre(""));
825 
826   results = absl::StrSplit("#", '#');
827   EXPECT_THAT(results, ElementsAre("", ""));
828 
829   results = absl::StrSplit("#cd", '#');
830   EXPECT_THAT(results, ElementsAre("", "cd"));
831 
832   results = absl::StrSplit("ab#cd#", '#');
833   EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
834 
835   results = absl::StrSplit("ab##cd", '#');
836   EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
837 
838   results = absl::StrSplit("ab##", '#');
839   EXPECT_THAT(results, ElementsAre("ab", "", ""));
840 
841   results = absl::StrSplit("ab#ab#", '#');
842   EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
843 
844   results = absl::StrSplit("aaaa", 'a');
845   EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
846 
847   results = absl::StrSplit("", '#', absl::SkipEmpty());
848   EXPECT_THAT(results, ElementsAre());
849 }
850 
851 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)852 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
853                                  size_t starting_pos, int expected_pos) {
854   absl::string_view found = d.Find(text, starting_pos);
855   return found.data() != text.data() + text.size() &&
856          expected_pos == found.data() - text.data();
857 }
858 
859 // Helper function for testing Delimiter objects. Returns true if the given
860 // Delimiter is found in the given string at the given position. This function
861 // tests two cases:
862 //   1. The actual text given, staring at position 0
863 //   2. The text given with leading padding that should be ignored
864 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)865 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
866   const std::string leading_text = ",x,y,z,";
867   return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
868          IsFoundAtStartingPos(leading_text + std::string(text), d,
869                               leading_text.length(),
870                               expected_pos + leading_text.length());
871 }
872 
873 //
874 // Tests for ByString
875 //
876 
877 // Tests using any delimiter that represents a single comma.
878 template <typename Delimiter>
TestComma(Delimiter d)879 void TestComma(Delimiter d) {
880   EXPECT_TRUE(IsFoundAt(",", d, 0));
881   EXPECT_TRUE(IsFoundAt("a,", d, 1));
882   EXPECT_TRUE(IsFoundAt(",b", d, 0));
883   EXPECT_TRUE(IsFoundAt("a,b", d, 1));
884   EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
885   EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
886   EXPECT_FALSE(IsFoundAt("", d, -1));
887   EXPECT_FALSE(IsFoundAt(" ", d, -1));
888   EXPECT_FALSE(IsFoundAt("a", d, -1));
889   EXPECT_FALSE(IsFoundAt("a b c", d, -1));
890   EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
891   EXPECT_FALSE(IsFoundAt(";", d, -1));
892 }
893 
TEST(Delimiter,ByString)894 TEST(Delimiter, ByString) {
895   using absl::ByString;
896   TestComma(ByString(","));
897 
898   // Works as named variable.
899   ByString comma_string(",");
900   TestComma(comma_string);
901 
902   // The first occurrence of empty string ("") in a string is at position 0.
903   // There is a test below that demonstrates this for absl::string_view::find().
904   // If the ByString delimiter returned position 0 for this, there would
905   // be an infinite loop in the SplitIterator code. To avoid this, empty string
906   // is a special case in that it always returns the item at position 1.
907   absl::string_view abc("abc");
908   EXPECT_EQ(0, abc.find(""));  // "" is found at position 0
909   ByString empty("");
910   EXPECT_FALSE(IsFoundAt("", empty, 0));
911   EXPECT_FALSE(IsFoundAt("a", empty, 0));
912   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
913   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
914 }
915 
TEST(Split,ByChar)916 TEST(Split, ByChar) {
917   using absl::ByChar;
918   TestComma(ByChar(','));
919 
920   // Works as named variable.
921   ByChar comma_char(',');
922   TestComma(comma_char);
923 }
924 
925 //
926 // Tests for ByAnyChar
927 //
928 
TEST(Delimiter,ByAnyChar)929 TEST(Delimiter, ByAnyChar) {
930   using absl::ByAnyChar;
931   ByAnyChar one_delim(",");
932   // Found
933   EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
934   EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
935   EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
936   EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
937   // Not found
938   EXPECT_FALSE(IsFoundAt("", one_delim, -1));
939   EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
940   EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
941   EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
942   EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
943 
944   ByAnyChar two_delims(",;");
945   // Found
946   EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
947   EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
948   EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
949   EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
950   EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
951   EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
952   EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
953   EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
954   EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
955   EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
956   // Not found
957   EXPECT_FALSE(IsFoundAt("", two_delims, -1));
958   EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
959   EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
960   EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
961   EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
962 
963   // ByAnyChar behaves just like ByString when given a delimiter of empty
964   // string. That is, it always returns a zero-length absl::string_view
965   // referring to the item at position 1, not position 0.
966   ByAnyChar empty("");
967   EXPECT_FALSE(IsFoundAt("", empty, 0));
968   EXPECT_FALSE(IsFoundAt("a", empty, 0));
969   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
970   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
971 }
972 
973 //
974 // Tests for ByAsciiWhitespace
975 //
TEST(Split,ByAsciiWhitespace)976 TEST(Split, ByAsciiWhitespace) {
977   using absl::ByAsciiWhitespace;
978   using absl::SkipEmpty;
979   std::vector<absl::string_view> results;
980 
981   results = absl::StrSplit("aaaa\n", ByAsciiWhitespace());
982   EXPECT_THAT(results, ElementsAre("aaaa", ""));
983 
984   results = absl::StrSplit("aaaa\n", ByAsciiWhitespace(), SkipEmpty());
985   EXPECT_THAT(results, ElementsAre("aaaa"));
986 
987   results = absl::StrSplit(" ", ByAsciiWhitespace());
988   EXPECT_THAT(results, ElementsAre("", ""));
989 
990   results = absl::StrSplit(" ", ByAsciiWhitespace(), SkipEmpty());
991   EXPECT_THAT(results, IsEmpty());
992 
993   results = absl::StrSplit("a", ByAsciiWhitespace());
994   EXPECT_THAT(results, ElementsAre("a"));
995 
996   results = absl::StrSplit("", ByAsciiWhitespace());
997   EXPECT_THAT(results, ElementsAre(""));
998 
999   results = absl::StrSplit("", ByAsciiWhitespace(), SkipEmpty());
1000   EXPECT_THAT(results, IsEmpty());
1001 
1002   results = absl::StrSplit("a b\tc\n  d\n", ByAsciiWhitespace());
1003   EXPECT_THAT(results, ElementsAre("a", "b", "c", "", "", "d", ""));
1004 
1005   results = absl::StrSplit("a b\tc\n  d  \n", ByAsciiWhitespace(), SkipEmpty());
1006   EXPECT_THAT(results, ElementsAre("a", "b", "c", "d"));
1007 
1008   results = absl::StrSplit("a\t\n\v\f\r b", ByAsciiWhitespace(), SkipEmpty());
1009   EXPECT_THAT(results, ElementsAre("a", "b"));
1010 }
1011 
1012 //
1013 // Tests for ByLength
1014 //
1015 
TEST(Delimiter,ByLength)1016 TEST(Delimiter, ByLength) {
1017   using absl::ByLength;
1018 
1019   ByLength four_char_delim(4);
1020 
1021   // Found
1022   EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
1023   EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
1024   EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
1025   // Not found
1026   EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
1027   EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
1028   EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
1029   EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
1030   EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
1031 }
1032 
TEST(Split,WorksWithLargeStrings)1033 TEST(Split, WorksWithLargeStrings) {
1034 #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
1035     defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER)
1036   constexpr size_t kSize = (uint32_t{1} << 26) + 1;  // 64M + 1 byte
1037 #else
1038   constexpr size_t kSize = (uint32_t{1} << 31) + 1;  // 2G + 1 byte
1039 #endif
1040   if (sizeof(size_t) > 4) {
1041     std::string s(kSize, 'x');
1042     s.back() = '-';
1043     std::vector<absl::string_view> v = absl::StrSplit(s, '-');
1044     EXPECT_EQ(2, v.size());
1045     // The first element will contain 2G of 'x's.
1046     // testing::StartsWith is too slow with a 2G string.
1047     EXPECT_EQ('x', v[0][0]);
1048     EXPECT_EQ('x', v[0][1]);
1049     EXPECT_EQ('x', v[0][3]);
1050     EXPECT_EQ("", v[1]);
1051   }
1052 }
1053 
TEST(SplitInternalTest,TypeTraits)1054 TEST(SplitInternalTest, TypeTraits) {
1055   EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
1056   EXPECT_TRUE(
1057       (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
1058   EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
1059   EXPECT_TRUE(
1060       (absl::strings_internal::HasValueType<std::map<int, int>>::value));
1061   EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
1062   EXPECT_TRUE(
1063       (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
1064   EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
1065   EXPECT_TRUE((absl::strings_internal::IsInitializerList<
1066                std::initializer_list<int>>::value));
1067 }
1068 
1069 }  // namespace
1070