1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 16 // This file declares INTERNAL parts of the Split API that are inline/templated 17 // or otherwise need to be available at compile time. The main abstractions 18 // defined in here are 19 // 20 // - ConvertibleToStringView 21 // - SplitIterator<> 22 // - Splitter<> 23 // 24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including 25 // absl/strings/str_split.h. 26 // 27 // IWYU pragma: private, include "absl/strings/str_split.h" 28 29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 31 32 #include <array> 33 #include <initializer_list> 34 #include <iterator> 35 #include <map> 36 #include <type_traits> 37 #include <utility> 38 #include <vector> 39 40 #include "absl/base/macros.h" 41 #include "absl/base/port.h" 42 #include "absl/meta/type_traits.h" 43 #include "absl/strings/string_view.h" 44 45 #ifdef _GLIBCXX_DEBUG 46 #include "absl/strings/internal/stl_type_traits.h" 47 #endif // _GLIBCXX_DEBUG 48 49 namespace absl { 50 ABSL_NAMESPACE_BEGIN 51 namespace strings_internal { 52 53 // This class is implicitly constructible from everything that absl::string_view 54 // is implicitly constructible from. If it's constructed from a temporary 55 // string, the data is moved into a data member so its lifetime matches that of 56 // the ConvertibleToStringView instance. 57 class ConvertibleToStringView { 58 public: ConvertibleToStringView(const char * s)59 ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) 60 : value_(s) {} ConvertibleToStringView(char * s)61 ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) ConvertibleToStringView(absl::string_view s)62 ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) 63 : value_(s) {} ConvertibleToStringView(const std::string & s)64 ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) 65 : value_(s) {} 66 67 // Matches rvalue strings and moves their data to a member. ConvertibleToStringView(std::string && s)68 ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) 69 : copy_(std::move(s)), value_(copy_) {} 70 ConvertibleToStringView(const ConvertibleToStringView & other)71 ConvertibleToStringView(const ConvertibleToStringView& other) 72 : copy_(other.copy_), 73 value_(other.IsSelfReferential() ? copy_ : other.value_) {} 74 ConvertibleToStringView(ConvertibleToStringView && other)75 ConvertibleToStringView(ConvertibleToStringView&& other) { 76 StealMembers(std::move(other)); 77 } 78 79 ConvertibleToStringView& operator=(ConvertibleToStringView other) { 80 StealMembers(std::move(other)); 81 return *this; 82 } 83 value()84 absl::string_view value() const { return value_; } 85 86 private: 87 // Returns true if ctsp's value refers to its internal copy_ member. IsSelfReferential()88 bool IsSelfReferential() const { return value_.data() == copy_.data(); } 89 StealMembers(ConvertibleToStringView && other)90 void StealMembers(ConvertibleToStringView&& other) { 91 if (other.IsSelfReferential()) { 92 copy_ = std::move(other.copy_); 93 value_ = copy_; 94 other.value_ = other.copy_; 95 } else { 96 value_ = other.value_; 97 } 98 } 99 100 // Holds the data moved from temporary std::string arguments. Declared first 101 // so that 'value' can refer to 'copy_'. 102 std::string copy_; 103 absl::string_view value_; 104 }; 105 106 // An iterator that enumerates the parts of a string from a Splitter. The text 107 // to be split, the Delimiter, and the Predicate are all taken from the given 108 // Splitter object. Iterators may only be compared if they refer to the same 109 // Splitter instance. 110 // 111 // This class is NOT part of the public splitting API. 112 template <typename Splitter> 113 class SplitIterator { 114 public: 115 using iterator_category = std::input_iterator_tag; 116 using value_type = absl::string_view; 117 using difference_type = ptrdiff_t; 118 using pointer = const value_type*; 119 using reference = const value_type&; 120 121 enum State { kInitState, kLastState, kEndState }; SplitIterator(State state,const Splitter * splitter)122 SplitIterator(State state, const Splitter* splitter) 123 : pos_(0), 124 state_(state), 125 splitter_(splitter), 126 delimiter_(splitter->delimiter()), 127 predicate_(splitter->predicate()) { 128 // Hack to maintain backward compatibility. This one block makes it so an 129 // empty absl::string_view whose .data() happens to be nullptr behaves 130 // *differently* from an otherwise empty absl::string_view whose .data() is 131 // not nullptr. This is an undesirable difference in general, but this 132 // behavior is maintained to avoid breaking existing code that happens to 133 // depend on this old behavior/bug. Perhaps it will be fixed one day. The 134 // difference in behavior is as follows: 135 // Split(absl::string_view(""), '-'); // {""} 136 // Split(absl::string_view(), '-'); // {} 137 if (splitter_->text().data() == nullptr) { 138 state_ = kEndState; 139 pos_ = splitter_->text().size(); 140 return; 141 } 142 143 if (state_ == kEndState) { 144 pos_ = splitter_->text().size(); 145 } else { 146 ++(*this); 147 } 148 } 149 at_end()150 bool at_end() const { return state_ == kEndState; } 151 152 reference operator*() const { return curr_; } 153 pointer operator->() const { return &curr_; } 154 155 SplitIterator& operator++() { 156 do { 157 if (state_ == kLastState) { 158 state_ = kEndState; 159 return *this; 160 } 161 const absl::string_view text = splitter_->text(); 162 const absl::string_view d = delimiter_.Find(text, pos_); 163 if (d.data() == text.data() + text.size()) state_ = kLastState; 164 curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); 165 pos_ += curr_.size() + d.size(); 166 } while (!predicate_(curr_)); 167 return *this; 168 } 169 170 SplitIterator operator++(int) { 171 SplitIterator old(*this); 172 ++(*this); 173 return old; 174 } 175 176 friend bool operator==(const SplitIterator& a, const SplitIterator& b) { 177 return a.state_ == b.state_ && a.pos_ == b.pos_; 178 } 179 180 friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { 181 return !(a == b); 182 } 183 184 private: 185 size_t pos_; 186 State state_; 187 absl::string_view curr_; 188 const Splitter* splitter_; 189 typename Splitter::DelimiterType delimiter_; 190 typename Splitter::PredicateType predicate_; 191 }; 192 193 // HasMappedType<T>::value is true iff there exists a type T::mapped_type. 194 template <typename T, typename = void> 195 struct HasMappedType : std::false_type {}; 196 template <typename T> 197 struct HasMappedType<T, absl::void_t<typename T::mapped_type>> 198 : std::true_type {}; 199 200 // HasValueType<T>::value is true iff there exists a type T::value_type. 201 template <typename T, typename = void> 202 struct HasValueType : std::false_type {}; 203 template <typename T> 204 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { 205 }; 206 207 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator. 208 template <typename T, typename = void> 209 struct HasConstIterator : std::false_type {}; 210 template <typename T> 211 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> 212 : std::true_type {}; 213 214 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More 215 // details below in Splitter<> where this is used. 216 std::false_type IsInitializerListDispatch(...); // default: No 217 template <typename T> 218 std::true_type IsInitializerListDispatch(std::initializer_list<T>*); 219 template <typename T> 220 struct IsInitializerList 221 : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; 222 223 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition 224 // is true for type 'C'. 225 // 226 // Restricts conversion to container-like types (by testing for the presence of 227 // a const_iterator member type) and also to disable conversion to an 228 // std::initializer_list (which also has a const_iterator). Otherwise, code 229 // compiled in C++11 will get an error due to ambiguous conversion paths (in 230 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> 231 // or an std::initializer_list<T>). 232 233 template <typename C, bool has_value_type, bool has_mapped_type> 234 struct SplitterIsConvertibleToImpl : std::false_type {}; 235 236 template <typename C> 237 struct SplitterIsConvertibleToImpl<C, true, false> 238 : std::is_constructible<typename C::value_type, absl::string_view> {}; 239 240 template <typename C> 241 struct SplitterIsConvertibleToImpl<C, true, true> 242 : absl::conjunction< 243 std::is_constructible<typename C::key_type, absl::string_view>, 244 std::is_constructible<typename C::mapped_type, absl::string_view>> {}; 245 246 template <typename C> 247 struct SplitterIsConvertibleTo 248 : SplitterIsConvertibleToImpl< 249 C, 250 #ifdef _GLIBCXX_DEBUG 251 !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && 252 #endif // _GLIBCXX_DEBUG 253 !IsInitializerList< 254 typename std::remove_reference<C>::type>::value && 255 HasValueType<C>::value && HasConstIterator<C>::value, 256 HasMappedType<C>::value> { 257 }; 258 259 // This class implements the range that is returned by absl::StrSplit(). This 260 // class has templated conversion operators that allow it to be implicitly 261 // converted to a variety of types that the caller may have specified on the 262 // left-hand side of an assignment. 263 // 264 // The main interface for interacting with this class is through its implicit 265 // conversion operators. However, this class may also be used like a container 266 // in that it has .begin() and .end() member functions. It may also be used 267 // within a range-for loop. 268 // 269 // Output containers can be collections of any type that is constructible from 270 // an absl::string_view. 271 // 272 // An Predicate functor may be supplied. This predicate will be used to filter 273 // the split strings: only strings for which the predicate returns true will be 274 // kept. A Predicate object is any unary functor that takes an absl::string_view 275 // and returns bool. 276 template <typename Delimiter, typename Predicate> 277 class Splitter { 278 public: 279 using DelimiterType = Delimiter; 280 using PredicateType = Predicate; 281 using const_iterator = strings_internal::SplitIterator<Splitter>; 282 using value_type = typename std::iterator_traits<const_iterator>::value_type; 283 284 Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) 285 : text_(std::move(input_text)), 286 delimiter_(std::move(d)), 287 predicate_(std::move(p)) {} 288 289 absl::string_view text() const { return text_.value(); } 290 const Delimiter& delimiter() const { return delimiter_; } 291 const Predicate& predicate() const { return predicate_; } 292 293 // Range functions that iterate the split substrings as absl::string_view 294 // objects. These methods enable a Splitter to be used in a range-based for 295 // loop. 296 const_iterator begin() const { return {const_iterator::kInitState, this}; } 297 const_iterator end() const { return {const_iterator::kEndState, this}; } 298 299 // An implicit conversion operator that is restricted to only those containers 300 // that the splitter is convertible to. 301 template <typename Container, 302 typename = typename std::enable_if< 303 SplitterIsConvertibleTo<Container>::value>::type> 304 operator Container() const { // NOLINT(runtime/explicit) 305 return ConvertToContainer<Container, typename Container::value_type, 306 HasMappedType<Container>::value>()(*this); 307 } 308 309 // Returns a pair with its .first and .second members set to the first two 310 // strings returned by the begin() iterator. Either/both of .first and .second 311 // will be constructed with empty strings if the iterator doesn't have a 312 // corresponding value. 313 template <typename First, typename Second> 314 operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) 315 absl::string_view first, second; 316 auto it = begin(); 317 if (it != end()) { 318 first = *it; 319 if (++it != end()) { 320 second = *it; 321 } 322 } 323 return {First(first), Second(second)}; 324 } 325 326 private: 327 // ConvertToContainer is a functor converting a Splitter to the requested 328 // Container of ValueType. It is specialized below to optimize splitting to 329 // certain combinations of Container and ValueType. 330 // 331 // This base template handles the generic case of storing the split results in 332 // the requested non-map-like container and converting the split substrings to 333 // the requested type. 334 template <typename Container, typename ValueType, bool is_map = false> 335 struct ConvertToContainer { 336 Container operator()(const Splitter& splitter) const { 337 Container c; 338 auto it = std::inserter(c, c.end()); 339 for (const auto sp : splitter) { 340 *it++ = ValueType(sp); 341 } 342 return c; 343 } 344 }; 345 346 // Partial specialization for a std::vector<absl::string_view>. 347 // 348 // Optimized for the common case of splitting to a 349 // std::vector<absl::string_view>. In this case we first split the results to 350 // a small array of absl::string_view on the stack, to reduce reallocations. 351 template <typename A> 352 struct ConvertToContainer<std::vector<absl::string_view, A>, 353 absl::string_view, false> { 354 std::vector<absl::string_view, A> operator()( 355 const Splitter& splitter) const { 356 struct raw_view { 357 const char* data; 358 size_t size; 359 operator absl::string_view() const { // NOLINT(runtime/explicit) 360 return {data, size}; 361 } 362 }; 363 std::vector<absl::string_view, A> v; 364 std::array<raw_view, 16> ar; 365 for (auto it = splitter.begin(); !it.at_end();) { 366 size_t index = 0; 367 do { 368 ar[index].data = it->data(); 369 ar[index].size = it->size(); 370 ++it; 371 } while (++index != ar.size() && !it.at_end()); 372 v.insert(v.end(), ar.begin(), ar.begin() + index); 373 } 374 return v; 375 } 376 }; 377 378 // Partial specialization for a std::vector<std::string>. 379 // 380 // Optimized for the common case of splitting to a std::vector<std::string>. 381 // In this case we first split the results to a std::vector<absl::string_view> 382 // so the returned std::vector<std::string> can have space reserved to avoid 383 // std::string moves. 384 template <typename A> 385 struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { 386 std::vector<std::string, A> operator()(const Splitter& splitter) const { 387 const std::vector<absl::string_view> v = splitter; 388 return std::vector<std::string, A>(v.begin(), v.end()); 389 } 390 }; 391 392 // Partial specialization for containers of pairs (e.g., maps). 393 // 394 // The algorithm is to insert a new pair into the map for each even-numbered 395 // item, with the even-numbered item as the key with a default-constructed 396 // value. Each odd-numbered item will then be assigned to the last pair's 397 // value. 398 template <typename Container, typename First, typename Second> 399 struct ConvertToContainer<Container, std::pair<const First, Second>, true> { 400 Container operator()(const Splitter& splitter) const { 401 Container m; 402 typename Container::iterator it; 403 bool insert = true; 404 for (const auto sp : splitter) { 405 if (insert) { 406 it = Inserter<Container>::Insert(&m, First(sp), Second()); 407 } else { 408 it->second = Second(sp); 409 } 410 insert = !insert; 411 } 412 return m; 413 } 414 415 // Inserts the key and value into the given map, returning an iterator to 416 // the inserted item. Specialized for std::map and std::multimap to use 417 // emplace() and adapt emplace()'s return value. 418 template <typename Map> 419 struct Inserter { 420 using M = Map; 421 template <typename... Args> 422 static typename M::iterator Insert(M* m, Args&&... args) { 423 return m->insert(std::make_pair(std::forward<Args>(args)...)).first; 424 } 425 }; 426 427 template <typename... Ts> 428 struct Inserter<std::map<Ts...>> { 429 using M = std::map<Ts...>; 430 template <typename... Args> 431 static typename M::iterator Insert(M* m, Args&&... args) { 432 return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; 433 } 434 }; 435 436 template <typename... Ts> 437 struct Inserter<std::multimap<Ts...>> { 438 using M = std::multimap<Ts...>; 439 template <typename... Args> 440 static typename M::iterator Insert(M* m, Args&&... args) { 441 return m->emplace(std::make_pair(std::forward<Args>(args)...)); 442 } 443 }; 444 }; 445 446 ConvertibleToStringView text_; 447 Delimiter delimiter_; 448 Predicate predicate_; 449 }; 450 451 } // namespace strings_internal 452 ABSL_NAMESPACE_END 453 } // namespace absl 454 455 #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 456