• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // This file declares INTERNAL parts of the Split API that are inline/templated
17 // or otherwise need to be available at compile time. The main abstractions
18 // defined in here are
19 //
20 //   - ConvertibleToStringView
21 //   - SplitIterator<>
22 //   - Splitter<>
23 //
24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25 // absl/strings/str_split.h.
26 //
27 // IWYU pragma: private, include "absl/strings/str_split.h"
28 
29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31 
32 #include <array>
33 #include <initializer_list>
34 #include <iterator>
35 #include <tuple>
36 #include <type_traits>
37 #include <utility>
38 #include <vector>
39 
40 #include "absl/base/macros.h"
41 #include "absl/base/port.h"
42 #include "absl/meta/type_traits.h"
43 #include "absl/strings/string_view.h"
44 
45 #ifdef _GLIBCXX_DEBUG
46 #include "absl/strings/internal/stl_type_traits.h"
47 #endif  // _GLIBCXX_DEBUG
48 
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51 namespace strings_internal {
52 
53 // This class is implicitly constructible from everything that absl::string_view
54 // is implicitly constructible from, except for rvalue strings.  This means it
55 // can be used as a function parameter in places where passing a temporary
56 // string might cause memory lifetime issues.
57 class ConvertibleToStringView {
58  public:
ConvertibleToStringView(const char * s)59   ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
60       : value_(s) {}
ConvertibleToStringView(char * s)61   ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s)62   ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
63       : value_(s) {}
ConvertibleToStringView(const std::string & s)64   ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
65       : value_(s) {}
66 
67   // Disable conversion from rvalue strings.
68   ConvertibleToStringView(std::string&& s) = delete;
69   ConvertibleToStringView(const std::string&& s) = delete;
70 
value()71   absl::string_view value() const { return value_; }
72 
73  private:
74   absl::string_view value_;
75 };
76 
77 // An iterator that enumerates the parts of a string from a Splitter. The text
78 // to be split, the Delimiter, and the Predicate are all taken from the given
79 // Splitter object. Iterators may only be compared if they refer to the same
80 // Splitter instance.
81 //
82 // This class is NOT part of the public splitting API.
83 template <typename Splitter>
84 class SplitIterator {
85  public:
86   using iterator_category = std::input_iterator_tag;
87   using value_type = absl::string_view;
88   using difference_type = ptrdiff_t;
89   using pointer = const value_type*;
90   using reference = const value_type&;
91 
92   enum State { kInitState, kLastState, kEndState };
SplitIterator(State state,const Splitter * splitter)93   SplitIterator(State state, const Splitter* splitter)
94       : pos_(0),
95         state_(state),
96         splitter_(splitter),
97         delimiter_(splitter->delimiter()),
98         predicate_(splitter->predicate()) {
99     // Hack to maintain backward compatibility. This one block makes it so an
100     // empty absl::string_view whose .data() happens to be nullptr behaves
101     // *differently* from an otherwise empty absl::string_view whose .data() is
102     // not nullptr. This is an undesirable difference in general, but this
103     // behavior is maintained to avoid breaking existing code that happens to
104     // depend on this old behavior/bug. Perhaps it will be fixed one day. The
105     // difference in behavior is as follows:
106     //   Split(absl::string_view(""), '-');  // {""}
107     //   Split(absl::string_view(), '-');    // {}
108     if (splitter_->text().data() == nullptr) {
109       state_ = kEndState;
110       pos_ = splitter_->text().size();
111       return;
112     }
113 
114     if (state_ == kEndState) {
115       pos_ = splitter_->text().size();
116     } else {
117       ++(*this);
118     }
119   }
120 
at_end()121   bool at_end() const { return state_ == kEndState; }
122 
123   reference operator*() const { return curr_; }
124   pointer operator->() const { return &curr_; }
125 
126   SplitIterator& operator++() {
127     do {
128       if (state_ == kLastState) {
129         state_ = kEndState;
130         return *this;
131       }
132       const absl::string_view text = splitter_->text();
133       const absl::string_view d = delimiter_.Find(text, pos_);
134       if (d.data() == text.data() + text.size()) state_ = kLastState;
135       curr_ = text.substr(pos_,
136                           static_cast<size_t>(d.data() - (text.data() + pos_)));
137       pos_ += curr_.size() + d.size();
138     } while (!predicate_(curr_));
139     return *this;
140   }
141 
142   SplitIterator operator++(int) {
143     SplitIterator old(*this);
144     ++(*this);
145     return old;
146   }
147 
148   friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
149     return a.state_ == b.state_ && a.pos_ == b.pos_;
150   }
151 
152   friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
153     return !(a == b);
154   }
155 
156  private:
157   size_t pos_;
158   State state_;
159   absl::string_view curr_;
160   const Splitter* splitter_;
161   typename Splitter::DelimiterType delimiter_;
162   typename Splitter::PredicateType predicate_;
163 };
164 
165 // HasMappedType<T>::value is true iff there exists a type T::mapped_type.
166 template <typename T, typename = void>
167 struct HasMappedType : std::false_type {};
168 template <typename T>
169 struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
170     : std::true_type {};
171 
172 // HasValueType<T>::value is true iff there exists a type T::value_type.
173 template <typename T, typename = void>
174 struct HasValueType : std::false_type {};
175 template <typename T>
176 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
177 };
178 
179 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
180 template <typename T, typename = void>
181 struct HasConstIterator : std::false_type {};
182 template <typename T>
183 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
184     : std::true_type {};
185 
186 // HasEmplace<T>::value is true iff there exists a method T::emplace().
187 template <typename T, typename = void>
188 struct HasEmplace : std::false_type {};
189 template <typename T>
190 struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
191     : std::true_type {};
192 
193 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More
194 // details below in Splitter<> where this is used.
195 std::false_type IsInitializerListDispatch(...);  // default: No
196 template <typename T>
197 std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
198 template <typename T>
199 struct IsInitializerList
200     : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
201 
202 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
203 // is true for type 'C'.
204 //
205 // Restricts conversion to container-like types (by testing for the presence of
206 // a const_iterator member type) and also to disable conversion to an
207 // std::initializer_list (which also has a const_iterator). Otherwise, code
208 // compiled in C++11 will get an error due to ambiguous conversion paths (in
209 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
210 // or an std::initializer_list<T>).
211 
212 template <typename C, bool has_value_type, bool has_mapped_type>
213 struct SplitterIsConvertibleToImpl : std::false_type {};
214 
215 template <typename C>
216 struct SplitterIsConvertibleToImpl<C, true, false>
217     : std::is_constructible<typename C::value_type, absl::string_view> {};
218 
219 template <typename C>
220 struct SplitterIsConvertibleToImpl<C, true, true>
221     : absl::conjunction<
222           std::is_constructible<typename C::key_type, absl::string_view>,
223           std::is_constructible<typename C::mapped_type, absl::string_view>> {};
224 
225 template <typename C>
226 struct SplitterIsConvertibleTo
227     : SplitterIsConvertibleToImpl<
228           C,
229 #ifdef _GLIBCXX_DEBUG
230           !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
231 #endif  // _GLIBCXX_DEBUG
232               !IsInitializerList<
233                   typename std::remove_reference<C>::type>::value &&
234               HasValueType<C>::value && HasConstIterator<C>::value,
235           HasMappedType<C>::value> {
236 };
237 
238 template <typename StringType, typename Container, typename = void>
239 struct ShouldUseLifetimeBound : std::false_type {};
240 
241 template <typename StringType, typename Container>
242 struct ShouldUseLifetimeBound<
243     StringType, Container,
244     std::enable_if_t<
245         std::is_same<StringType, std::string>::value &&
246         std::is_same<typename Container::value_type, absl::string_view>::value>>
247     : std::true_type {};
248 
249 template <typename StringType, typename First, typename Second>
250 using ShouldUseLifetimeBoundForPair = std::integral_constant<
251     bool, std::is_same<StringType, std::string>::value &&
252               (std::is_same<First, absl::string_view>::value ||
253                std::is_same<Second, absl::string_view>::value)>;
254 
255 
256 // This class implements the range that is returned by absl::StrSplit(). This
257 // class has templated conversion operators that allow it to be implicitly
258 // converted to a variety of types that the caller may have specified on the
259 // left-hand side of an assignment.
260 //
261 // The main interface for interacting with this class is through its implicit
262 // conversion operators. However, this class may also be used like a container
263 // in that it has .begin() and .end() member functions. It may also be used
264 // within a range-for loop.
265 //
266 // Output containers can be collections of any type that is constructible from
267 // an absl::string_view.
268 //
269 // An Predicate functor may be supplied. This predicate will be used to filter
270 // the split strings: only strings for which the predicate returns true will be
271 // kept. A Predicate object is any unary functor that takes an absl::string_view
272 // and returns bool.
273 //
274 // The StringType parameter can be either string_view or string, depending on
275 // whether the Splitter refers to a string stored elsewhere, or if the string
276 // resides inside the Splitter itself.
277 template <typename Delimiter, typename Predicate, typename StringType>
278 class Splitter {
279  public:
280   using DelimiterType = Delimiter;
281   using PredicateType = Predicate;
282   using const_iterator = strings_internal::SplitIterator<Splitter>;
283   using value_type = typename std::iterator_traits<const_iterator>::value_type;
284 
285   Splitter(StringType input_text, Delimiter d, Predicate p)
286       : text_(std::move(input_text)),
287         delimiter_(std::move(d)),
288         predicate_(std::move(p)) {}
289 
290   absl::string_view text() const { return text_; }
291   const Delimiter& delimiter() const { return delimiter_; }
292   const Predicate& predicate() const { return predicate_; }
293 
294   // Range functions that iterate the split substrings as absl::string_view
295   // objects. These methods enable a Splitter to be used in a range-based for
296   // loop.
297   const_iterator begin() const { return {const_iterator::kInitState, this}; }
298   const_iterator end() const { return {const_iterator::kEndState, this}; }
299 
300   // An implicit conversion operator that is restricted to only those containers
301   // that the splitter is convertible to.
302   template <
303       typename Container,
304       std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
305                            SplitterIsConvertibleTo<Container>::value,
306                        std::nullptr_t> = nullptr>
307   // NOLINTNEXTLINE(google-explicit-constructor)
308   operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
309     return ConvertToContainer<Container, typename Container::value_type,
310                               HasMappedType<Container>::value>()(*this);
311   }
312 
313   template <
314       typename Container,
315       std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
316                            SplitterIsConvertibleTo<Container>::value,
317                        std::nullptr_t> = nullptr>
318   // NOLINTNEXTLINE(google-explicit-constructor)
319   operator Container() const {
320     return ConvertToContainer<Container, typename Container::value_type,
321                               HasMappedType<Container>::value>()(*this);
322   }
323 
324   // Returns a pair with its .first and .second members set to the first two
325   // strings returned by the begin() iterator. Either/both of .first and .second
326   // will be constructed with empty strings if the iterator doesn't have a
327   // corresponding value.
328   template <typename First, typename Second,
329             std::enable_if_t<
330                 ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
331                 std::nullptr_t> = nullptr>
332   // NOLINTNEXTLINE(google-explicit-constructor)
333   operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
334     return ConvertToPair<First, Second>();
335   }
336 
337   template <typename First, typename Second,
338             std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
339                                                             Second>::value,
340                              std::nullptr_t> = nullptr>
341   // NOLINTNEXTLINE(google-explicit-constructor)
342   operator std::pair<First, Second>() const {
343     return ConvertToPair<First, Second>();
344   }
345 
346  private:
347   template <typename First, typename Second>
348   std::pair<First, Second> ConvertToPair() const {
349     absl::string_view first, second;
350     auto it = begin();
351     if (it != end()) {
352       first = *it;
353       if (++it != end()) {
354         second = *it;
355       }
356     }
357     return {First(first), Second(second)};
358   }
359 
360   // ConvertToContainer is a functor converting a Splitter to the requested
361   // Container of ValueType. It is specialized below to optimize splitting to
362   // certain combinations of Container and ValueType.
363   //
364   // This base template handles the generic case of storing the split results in
365   // the requested non-map-like container and converting the split substrings to
366   // the requested type.
367   template <typename Container, typename ValueType, bool is_map = false>
368   struct ConvertToContainer {
369     Container operator()(const Splitter& splitter) const {
370       Container c;
371       auto it = std::inserter(c, c.end());
372       for (const auto& sp : splitter) {
373         *it++ = ValueType(sp);
374       }
375       return c;
376     }
377   };
378 
379   // Partial specialization for a std::vector<absl::string_view>.
380   //
381   // Optimized for the common case of splitting to a
382   // std::vector<absl::string_view>. In this case we first split the results to
383   // a small array of absl::string_view on the stack, to reduce reallocations.
384   template <typename A>
385   struct ConvertToContainer<std::vector<absl::string_view, A>,
386                             absl::string_view, false> {
387     std::vector<absl::string_view, A> operator()(
388         const Splitter& splitter) const {
389       struct raw_view {
390         const char* data;
391         size_t size;
392         operator absl::string_view() const {  // NOLINT(runtime/explicit)
393           return {data, size};
394         }
395       };
396       std::vector<absl::string_view, A> v;
397       std::array<raw_view, 16> ar;
398       for (auto it = splitter.begin(); !it.at_end();) {
399         size_t index = 0;
400         do {
401           ar[index].data = it->data();
402           ar[index].size = it->size();
403           ++it;
404         } while (++index != ar.size() && !it.at_end());
405         v.insert(v.end(), ar.begin(), ar.begin() + index);
406       }
407       return v;
408     }
409   };
410 
411   // Partial specialization for a std::vector<std::string>.
412   //
413   // Optimized for the common case of splitting to a std::vector<std::string>.
414   // In this case we first split the results to a std::vector<absl::string_view>
415   // so the returned std::vector<std::string> can have space reserved to avoid
416   // std::string moves.
417   template <typename A>
418   struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
419     std::vector<std::string, A> operator()(const Splitter& splitter) const {
420       const std::vector<absl::string_view> v = splitter;
421       return std::vector<std::string, A>(v.begin(), v.end());
422     }
423   };
424 
425   // Partial specialization for containers of pairs (e.g., maps).
426   //
427   // The algorithm is to insert a new pair into the map for each even-numbered
428   // item, with the even-numbered item as the key with a default-constructed
429   // value. Each odd-numbered item will then be assigned to the last pair's
430   // value.
431   template <typename Container, typename First, typename Second>
432   struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
433     using iterator = typename Container::iterator;
434 
435     Container operator()(const Splitter& splitter) const {
436       Container m;
437       iterator it;
438       bool insert = true;
439       for (const absl::string_view sv : splitter) {
440         if (insert) {
441           it = InsertOrEmplace(&m, sv);
442         } else {
443           it->second = Second(sv);
444         }
445         insert = !insert;
446       }
447       return m;
448     }
449 
450     // Inserts the key and an empty value into the map, returning an iterator to
451     // the inserted item. We use emplace() if available, otherwise insert().
452     template <typename M>
453     static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
454         M* m, absl::string_view key) {
455       // Use piecewise_construct to support old versions of gcc in which pair
456       // constructor can't otherwise construct string from string_view.
457       return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
458                                std::tuple<>()));
459     }
460     template <typename M>
461     static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
462         M* m, absl::string_view key) {
463       return ToIter(m->insert(std::make_pair(First(key), Second(""))));
464     }
465 
466     static iterator ToIter(std::pair<iterator, bool> pair) {
467       return pair.first;
468     }
469     static iterator ToIter(iterator iter) { return iter; }
470   };
471 
472   StringType text_;
473   Delimiter delimiter_;
474   Predicate predicate_;
475 };
476 
477 }  // namespace strings_internal
478 ABSL_NAMESPACE_END
479 }  // namespace absl
480 
481 #endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
482