• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // This file declares INTERNAL parts of the Split API that are inline/templated
17 // or otherwise need to be available at compile time. The main abstractions
18 // defined in here are
19 //
20 //   - ConvertibleToStringView
21 //   - SplitIterator<>
22 //   - Splitter<>
23 //
24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25 // absl/strings/str_split.h.
26 //
27 // IWYU pragma: private, include "absl/strings/str_split.h"
28 
29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31 
32 #include <array>
33 #include <initializer_list>
34 #include <iterator>
35 #include <map>
36 #include <type_traits>
37 #include <utility>
38 #include <vector>
39 
40 #include "absl/base/macros.h"
41 #include "absl/base/port.h"
42 #include "absl/meta/type_traits.h"
43 #include "absl/strings/string_view.h"
44 
45 #ifdef _GLIBCXX_DEBUG
46 #include "absl/strings/internal/stl_type_traits.h"
47 #endif  // _GLIBCXX_DEBUG
48 
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51 namespace strings_internal {
52 
53 // This class is implicitly constructible from everything that absl::string_view
54 // is implicitly constructible from. If it's constructed from a temporary
55 // string, the data is moved into a data member so its lifetime matches that of
56 // the ConvertibleToStringView instance.
57 class ConvertibleToStringView {
58  public:
ConvertibleToStringView(const char * s)59   ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
60       : value_(s) {}
ConvertibleToStringView(char * s)61   ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s)62   ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
63       : value_(s) {}
ConvertibleToStringView(const std::string & s)64   ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
65       : value_(s) {}
66 
67   // Matches rvalue strings and moves their data to a member.
ConvertibleToStringView(std::string && s)68   ConvertibleToStringView(std::string&& s)  // NOLINT(runtime/explicit)
69       : copy_(std::move(s)), value_(copy_) {}
70 
ConvertibleToStringView(const ConvertibleToStringView & other)71   ConvertibleToStringView(const ConvertibleToStringView& other)
72       : copy_(other.copy_),
73         value_(other.IsSelfReferential() ? copy_ : other.value_) {}
74 
ConvertibleToStringView(ConvertibleToStringView && other)75   ConvertibleToStringView(ConvertibleToStringView&& other) {
76     StealMembers(std::move(other));
77   }
78 
79   ConvertibleToStringView& operator=(ConvertibleToStringView other) {
80     StealMembers(std::move(other));
81     return *this;
82   }
83 
value()84   absl::string_view value() const { return value_; }
85 
86  private:
87   // Returns true if ctsp's value refers to its internal copy_ member.
IsSelfReferential()88   bool IsSelfReferential() const { return value_.data() == copy_.data(); }
89 
StealMembers(ConvertibleToStringView && other)90   void StealMembers(ConvertibleToStringView&& other) {
91     if (other.IsSelfReferential()) {
92       copy_ = std::move(other.copy_);
93       value_ = copy_;
94       other.value_ = other.copy_;
95     } else {
96       value_ = other.value_;
97     }
98   }
99 
100   // Holds the data moved from temporary std::string arguments. Declared first
101   // so that 'value' can refer to 'copy_'.
102   std::string copy_;
103   absl::string_view value_;
104 };
105 
106 // An iterator that enumerates the parts of a string from a Splitter. The text
107 // to be split, the Delimiter, and the Predicate are all taken from the given
108 // Splitter object. Iterators may only be compared if they refer to the same
109 // Splitter instance.
110 //
111 // This class is NOT part of the public splitting API.
112 template <typename Splitter>
113 class SplitIterator {
114  public:
115   using iterator_category = std::input_iterator_tag;
116   using value_type = absl::string_view;
117   using difference_type = ptrdiff_t;
118   using pointer = const value_type*;
119   using reference = const value_type&;
120 
121   enum State { kInitState, kLastState, kEndState };
SplitIterator(State state,const Splitter * splitter)122   SplitIterator(State state, const Splitter* splitter)
123       : pos_(0),
124         state_(state),
125         splitter_(splitter),
126         delimiter_(splitter->delimiter()),
127         predicate_(splitter->predicate()) {
128     // Hack to maintain backward compatibility. This one block makes it so an
129     // empty absl::string_view whose .data() happens to be nullptr behaves
130     // *differently* from an otherwise empty absl::string_view whose .data() is
131     // not nullptr. This is an undesirable difference in general, but this
132     // behavior is maintained to avoid breaking existing code that happens to
133     // depend on this old behavior/bug. Perhaps it will be fixed one day. The
134     // difference in behavior is as follows:
135     //   Split(absl::string_view(""), '-');  // {""}
136     //   Split(absl::string_view(), '-');    // {}
137     if (splitter_->text().data() == nullptr) {
138       state_ = kEndState;
139       pos_ = splitter_->text().size();
140       return;
141     }
142 
143     if (state_ == kEndState) {
144       pos_ = splitter_->text().size();
145     } else {
146       ++(*this);
147     }
148   }
149 
at_end()150   bool at_end() const { return state_ == kEndState; }
151 
152   reference operator*() const { return curr_; }
153   pointer operator->() const { return &curr_; }
154 
155   SplitIterator& operator++() {
156     do {
157       if (state_ == kLastState) {
158         state_ = kEndState;
159         return *this;
160       }
161       const absl::string_view text = splitter_->text();
162       const absl::string_view d = delimiter_.Find(text, pos_);
163       if (d.data() == text.data() + text.size()) state_ = kLastState;
164       curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
165       pos_ += curr_.size() + d.size();
166     } while (!predicate_(curr_));
167     return *this;
168   }
169 
170   SplitIterator operator++(int) {
171     SplitIterator old(*this);
172     ++(*this);
173     return old;
174   }
175 
176   friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
177     return a.state_ == b.state_ && a.pos_ == b.pos_;
178   }
179 
180   friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
181     return !(a == b);
182   }
183 
184  private:
185   size_t pos_;
186   State state_;
187   absl::string_view curr_;
188   const Splitter* splitter_;
189   typename Splitter::DelimiterType delimiter_;
190   typename Splitter::PredicateType predicate_;
191 };
192 
193 // HasMappedType<T>::value is true iff there exists a type T::mapped_type.
194 template <typename T, typename = void>
195 struct HasMappedType : std::false_type {};
196 template <typename T>
197 struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
198     : std::true_type {};
199 
200 // HasValueType<T>::value is true iff there exists a type T::value_type.
201 template <typename T, typename = void>
202 struct HasValueType : std::false_type {};
203 template <typename T>
204 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
205 };
206 
207 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
208 template <typename T, typename = void>
209 struct HasConstIterator : std::false_type {};
210 template <typename T>
211 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
212     : std::true_type {};
213 
214 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More
215 // details below in Splitter<> where this is used.
216 std::false_type IsInitializerListDispatch(...);  // default: No
217 template <typename T>
218 std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
219 template <typename T>
220 struct IsInitializerList
221     : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
222 
223 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
224 // is true for type 'C'.
225 //
226 // Restricts conversion to container-like types (by testing for the presence of
227 // a const_iterator member type) and also to disable conversion to an
228 // std::initializer_list (which also has a const_iterator). Otherwise, code
229 // compiled in C++11 will get an error due to ambiguous conversion paths (in
230 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
231 // or an std::initializer_list<T>).
232 
233 template <typename C, bool has_value_type, bool has_mapped_type>
234 struct SplitterIsConvertibleToImpl : std::false_type {};
235 
236 template <typename C>
237 struct SplitterIsConvertibleToImpl<C, true, false>
238     : std::is_constructible<typename C::value_type, absl::string_view> {};
239 
240 template <typename C>
241 struct SplitterIsConvertibleToImpl<C, true, true>
242     : absl::conjunction<
243           std::is_constructible<typename C::key_type, absl::string_view>,
244           std::is_constructible<typename C::mapped_type, absl::string_view>> {};
245 
246 template <typename C>
247 struct SplitterIsConvertibleTo
248     : SplitterIsConvertibleToImpl<
249           C,
250 #ifdef _GLIBCXX_DEBUG
251           !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
252 #endif  // _GLIBCXX_DEBUG
253               !IsInitializerList<
254                   typename std::remove_reference<C>::type>::value &&
255               HasValueType<C>::value && HasConstIterator<C>::value,
256           HasMappedType<C>::value> {
257 };
258 
259 // This class implements the range that is returned by absl::StrSplit(). This
260 // class has templated conversion operators that allow it to be implicitly
261 // converted to a variety of types that the caller may have specified on the
262 // left-hand side of an assignment.
263 //
264 // The main interface for interacting with this class is through its implicit
265 // conversion operators. However, this class may also be used like a container
266 // in that it has .begin() and .end() member functions. It may also be used
267 // within a range-for loop.
268 //
269 // Output containers can be collections of any type that is constructible from
270 // an absl::string_view.
271 //
272 // An Predicate functor may be supplied. This predicate will be used to filter
273 // the split strings: only strings for which the predicate returns true will be
274 // kept. A Predicate object is any unary functor that takes an absl::string_view
275 // and returns bool.
276 template <typename Delimiter, typename Predicate>
277 class Splitter {
278  public:
279   using DelimiterType = Delimiter;
280   using PredicateType = Predicate;
281   using const_iterator = strings_internal::SplitIterator<Splitter>;
282   using value_type = typename std::iterator_traits<const_iterator>::value_type;
283 
284   Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
285       : text_(std::move(input_text)),
286         delimiter_(std::move(d)),
287         predicate_(std::move(p)) {}
288 
289   absl::string_view text() const { return text_.value(); }
290   const Delimiter& delimiter() const { return delimiter_; }
291   const Predicate& predicate() const { return predicate_; }
292 
293   // Range functions that iterate the split substrings as absl::string_view
294   // objects. These methods enable a Splitter to be used in a range-based for
295   // loop.
296   const_iterator begin() const { return {const_iterator::kInitState, this}; }
297   const_iterator end() const { return {const_iterator::kEndState, this}; }
298 
299   // An implicit conversion operator that is restricted to only those containers
300   // that the splitter is convertible to.
301   template <typename Container,
302             typename = typename std::enable_if<
303                 SplitterIsConvertibleTo<Container>::value>::type>
304   operator Container() const {  // NOLINT(runtime/explicit)
305     return ConvertToContainer<Container, typename Container::value_type,
306                               HasMappedType<Container>::value>()(*this);
307   }
308 
309   // Returns a pair with its .first and .second members set to the first two
310   // strings returned by the begin() iterator. Either/both of .first and .second
311   // will be constructed with empty strings if the iterator doesn't have a
312   // corresponding value.
313   template <typename First, typename Second>
314   operator std::pair<First, Second>() const {  // NOLINT(runtime/explicit)
315     absl::string_view first, second;
316     auto it = begin();
317     if (it != end()) {
318       first = *it;
319       if (++it != end()) {
320         second = *it;
321       }
322     }
323     return {First(first), Second(second)};
324   }
325 
326  private:
327   // ConvertToContainer is a functor converting a Splitter to the requested
328   // Container of ValueType. It is specialized below to optimize splitting to
329   // certain combinations of Container and ValueType.
330   //
331   // This base template handles the generic case of storing the split results in
332   // the requested non-map-like container and converting the split substrings to
333   // the requested type.
334   template <typename Container, typename ValueType, bool is_map = false>
335   struct ConvertToContainer {
336     Container operator()(const Splitter& splitter) const {
337       Container c;
338       auto it = std::inserter(c, c.end());
339       for (const auto sp : splitter) {
340         *it++ = ValueType(sp);
341       }
342       return c;
343     }
344   };
345 
346   // Partial specialization for a std::vector<absl::string_view>.
347   //
348   // Optimized for the common case of splitting to a
349   // std::vector<absl::string_view>. In this case we first split the results to
350   // a small array of absl::string_view on the stack, to reduce reallocations.
351   template <typename A>
352   struct ConvertToContainer<std::vector<absl::string_view, A>,
353                             absl::string_view, false> {
354     std::vector<absl::string_view, A> operator()(
355         const Splitter& splitter) const {
356       struct raw_view {
357         const char* data;
358         size_t size;
359         operator absl::string_view() const {  // NOLINT(runtime/explicit)
360           return {data, size};
361         }
362       };
363       std::vector<absl::string_view, A> v;
364       std::array<raw_view, 16> ar;
365       for (auto it = splitter.begin(); !it.at_end();) {
366         size_t index = 0;
367         do {
368           ar[index].data = it->data();
369           ar[index].size = it->size();
370           ++it;
371         } while (++index != ar.size() && !it.at_end());
372         v.insert(v.end(), ar.begin(), ar.begin() + index);
373       }
374       return v;
375     }
376   };
377 
378   // Partial specialization for a std::vector<std::string>.
379   //
380   // Optimized for the common case of splitting to a std::vector<std::string>.
381   // In this case we first split the results to a std::vector<absl::string_view>
382   // so the returned std::vector<std::string> can have space reserved to avoid
383   // std::string moves.
384   template <typename A>
385   struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
386     std::vector<std::string, A> operator()(const Splitter& splitter) const {
387       const std::vector<absl::string_view> v = splitter;
388       return std::vector<std::string, A>(v.begin(), v.end());
389     }
390   };
391 
392   // Partial specialization for containers of pairs (e.g., maps).
393   //
394   // The algorithm is to insert a new pair into the map for each even-numbered
395   // item, with the even-numbered item as the key with a default-constructed
396   // value. Each odd-numbered item will then be assigned to the last pair's
397   // value.
398   template <typename Container, typename First, typename Second>
399   struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
400     Container operator()(const Splitter& splitter) const {
401       Container m;
402       typename Container::iterator it;
403       bool insert = true;
404       for (const auto sp : splitter) {
405         if (insert) {
406           it = Inserter<Container>::Insert(&m, First(sp), Second());
407         } else {
408           it->second = Second(sp);
409         }
410         insert = !insert;
411       }
412       return m;
413     }
414 
415     // Inserts the key and value into the given map, returning an iterator to
416     // the inserted item. Specialized for std::map and std::multimap to use
417     // emplace() and adapt emplace()'s return value.
418     template <typename Map>
419     struct Inserter {
420       using M = Map;
421       template <typename... Args>
422       static typename M::iterator Insert(M* m, Args&&... args) {
423         return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
424       }
425     };
426 
427     template <typename... Ts>
428     struct Inserter<std::map<Ts...>> {
429       using M = std::map<Ts...>;
430       template <typename... Args>
431       static typename M::iterator Insert(M* m, Args&&... args) {
432         return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
433       }
434     };
435 
436     template <typename... Ts>
437     struct Inserter<std::multimap<Ts...>> {
438       using M = std::multimap<Ts...>;
439       template <typename... Args>
440       static typename M::iterator Insert(M* m, Args&&... args) {
441         return m->emplace(std::make_pair(std::forward<Args>(args)...));
442       }
443     };
444   };
445 
446   ConvertibleToStringView text_;
447   Delimiter delimiter_;
448   Predicate predicate_;
449 };
450 
451 }  // namespace strings_internal
452 ABSL_NAMESPACE_END
453 }  // namespace absl
454 
455 #endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
456