1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 16 // This file declares INTERNAL parts of the Split API that are inline/templated 17 // or otherwise need to be available at compile time. The main abstractions 18 // defined in here are 19 // 20 // - ConvertibleToStringView 21 // - SplitIterator<> 22 // - Splitter<> 23 // 24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including 25 // absl/strings/str_split.h. 26 // 27 // IWYU pragma: private, include "absl/strings/str_split.h" 28 29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 31 32 #include <array> 33 #include <cstddef> 34 #include <initializer_list> 35 #include <iterator> 36 #include <tuple> 37 #include <type_traits> 38 #include <utility> 39 #include <vector> 40 41 #include "absl/base/macros.h" 42 #include "absl/base/port.h" 43 #include "absl/meta/type_traits.h" 44 #include "absl/strings/string_view.h" 45 46 #ifdef _GLIBCXX_DEBUG 47 #include "absl/strings/internal/stl_type_traits.h" 48 #endif // _GLIBCXX_DEBUG 49 50 namespace absl { 51 ABSL_NAMESPACE_BEGIN 52 namespace strings_internal { 53 54 // This class is implicitly constructible from everything that absl::string_view 55 // is implicitly constructible from, except for rvalue strings. This means it 56 // can be used as a function parameter in places where passing a temporary 57 // string might cause memory lifetime issues. 58 class ConvertibleToStringView { 59 public: ConvertibleToStringView(const char * s)60 ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) 61 : value_(s) {} ConvertibleToStringView(char * s)62 ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) ConvertibleToStringView(absl::string_view s)63 ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) 64 : value_(s) {} ConvertibleToStringView(const std::string & s)65 ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) 66 : value_(s) {} 67 68 // Disable conversion from rvalue strings. 69 ConvertibleToStringView(std::string&& s) = delete; 70 ConvertibleToStringView(const std::string&& s) = delete; 71 value()72 absl::string_view value() const { return value_; } 73 74 private: 75 absl::string_view value_; 76 }; 77 78 // An iterator that enumerates the parts of a string from a Splitter. The text 79 // to be split, the Delimiter, and the Predicate are all taken from the given 80 // Splitter object. Iterators may only be compared if they refer to the same 81 // Splitter instance. 82 // 83 // This class is NOT part of the public splitting API. 84 template <typename Splitter> 85 class SplitIterator { 86 public: 87 using iterator_category = std::input_iterator_tag; 88 using value_type = absl::string_view; 89 using difference_type = ptrdiff_t; 90 using pointer = const value_type*; 91 using reference = const value_type&; 92 93 enum State { kInitState, kLastState, kEndState }; SplitIterator(State state,const Splitter * splitter)94 SplitIterator(State state, const Splitter* splitter) 95 : pos_(0), 96 state_(state), 97 splitter_(splitter), 98 delimiter_(splitter->delimiter()), 99 predicate_(splitter->predicate()) { 100 // Hack to maintain backward compatibility. This one block makes it so an 101 // empty absl::string_view whose .data() happens to be nullptr behaves 102 // *differently* from an otherwise empty absl::string_view whose .data() is 103 // not nullptr. This is an undesirable difference in general, but this 104 // behavior is maintained to avoid breaking existing code that happens to 105 // depend on this old behavior/bug. Perhaps it will be fixed one day. The 106 // difference in behavior is as follows: 107 // Split(absl::string_view(""), '-'); // {""} 108 // Split(absl::string_view(), '-'); // {} 109 if (splitter_->text().data() == nullptr) { 110 state_ = kEndState; 111 pos_ = splitter_->text().size(); 112 return; 113 } 114 115 if (state_ == kEndState) { 116 pos_ = splitter_->text().size(); 117 } else { 118 ++(*this); 119 } 120 } 121 at_end()122 bool at_end() const { return state_ == kEndState; } 123 124 reference operator*() const { return curr_; } 125 pointer operator->() const { return &curr_; } 126 127 SplitIterator& operator++() { 128 do { 129 if (state_ == kLastState) { 130 state_ = kEndState; 131 return *this; 132 } 133 const absl::string_view text = splitter_->text(); 134 const absl::string_view d = delimiter_.Find(text, pos_); 135 if (d.data() == text.data() + text.size()) state_ = kLastState; 136 curr_ = text.substr(pos_, 137 static_cast<size_t>(d.data() - (text.data() + pos_))); 138 pos_ += curr_.size() + d.size(); 139 } while (!predicate_(curr_)); 140 return *this; 141 } 142 143 SplitIterator operator++(int) { 144 SplitIterator old(*this); 145 ++(*this); 146 return old; 147 } 148 149 friend bool operator==(const SplitIterator& a, const SplitIterator& b) { 150 return a.state_ == b.state_ && a.pos_ == b.pos_; 151 } 152 153 friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { 154 return !(a == b); 155 } 156 157 private: 158 size_t pos_; 159 State state_; 160 absl::string_view curr_; 161 const Splitter* splitter_; 162 typename Splitter::DelimiterType delimiter_; 163 typename Splitter::PredicateType predicate_; 164 }; 165 166 // HasMappedType<T>::value is true iff there exists a type T::mapped_type. 167 template <typename T, typename = void> 168 struct HasMappedType : std::false_type {}; 169 template <typename T> 170 struct HasMappedType<T, absl::void_t<typename T::mapped_type>> 171 : std::true_type {}; 172 173 // HasValueType<T>::value is true iff there exists a type T::value_type. 174 template <typename T, typename = void> 175 struct HasValueType : std::false_type {}; 176 template <typename T> 177 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { 178 }; 179 180 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator. 181 template <typename T, typename = void> 182 struct HasConstIterator : std::false_type {}; 183 template <typename T> 184 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> 185 : std::true_type {}; 186 187 // HasEmplace<T>::value is true iff there exists a method T::emplace(). 188 template <typename T, typename = void> 189 struct HasEmplace : std::false_type {}; 190 template <typename T> 191 struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> 192 : std::true_type {}; 193 194 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More 195 // details below in Splitter<> where this is used. 196 std::false_type IsInitializerListDispatch(...); // default: No 197 template <typename T> 198 std::true_type IsInitializerListDispatch(std::initializer_list<T>*); 199 template <typename T> 200 struct IsInitializerList 201 : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; 202 203 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition 204 // is true for type 'C'. 205 // 206 // Restricts conversion to container-like types (by testing for the presence of 207 // a const_iterator member type) and also to disable conversion to an 208 // std::initializer_list (which also has a const_iterator). Otherwise, code 209 // compiled in C++11 will get an error due to ambiguous conversion paths (in 210 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> 211 // or an std::initializer_list<T>). 212 213 template <typename C, bool has_value_type, bool has_mapped_type> 214 struct SplitterIsConvertibleToImpl : std::false_type {}; 215 216 template <typename C> 217 struct SplitterIsConvertibleToImpl<C, true, false> 218 : std::is_constructible<typename C::value_type, absl::string_view> {}; 219 220 template <typename C> 221 struct SplitterIsConvertibleToImpl<C, true, true> 222 : absl::conjunction< 223 std::is_constructible<typename C::key_type, absl::string_view>, 224 std::is_constructible<typename C::mapped_type, absl::string_view>> {}; 225 226 template <typename C> 227 struct SplitterIsConvertibleTo 228 : SplitterIsConvertibleToImpl< 229 C, 230 #ifdef _GLIBCXX_DEBUG 231 !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && 232 #endif // _GLIBCXX_DEBUG 233 !IsInitializerList< 234 typename std::remove_reference<C>::type>::value && 235 HasValueType<C>::value && HasConstIterator<C>::value, 236 HasMappedType<C>::value> { 237 }; 238 239 template <typename StringType, typename Container, typename = void> 240 struct ShouldUseLifetimeBound : std::false_type {}; 241 242 template <typename StringType, typename Container> 243 struct ShouldUseLifetimeBound< 244 StringType, Container, 245 std::enable_if_t< 246 std::is_same<StringType, std::string>::value && 247 std::is_same<typename Container::value_type, absl::string_view>::value>> 248 : std::true_type {}; 249 250 template <typename StringType, typename First, typename Second> 251 using ShouldUseLifetimeBoundForPair = std::integral_constant< 252 bool, std::is_same<StringType, std::string>::value && 253 (std::is_same<First, absl::string_view>::value || 254 std::is_same<Second, absl::string_view>::value)>; 255 256 template <typename StringType, typename ElementType, std::size_t Size> 257 using ShouldUseLifetimeBoundForArray = std::integral_constant< 258 bool, std::is_same<StringType, std::string>::value && 259 std::is_same<ElementType, absl::string_view>::value>; 260 261 // This class implements the range that is returned by absl::StrSplit(). This 262 // class has templated conversion operators that allow it to be implicitly 263 // converted to a variety of types that the caller may have specified on the 264 // left-hand side of an assignment. 265 // 266 // The main interface for interacting with this class is through its implicit 267 // conversion operators. However, this class may also be used like a container 268 // in that it has .begin() and .end() member functions. It may also be used 269 // within a range-for loop. 270 // 271 // Output containers can be collections of any type that is constructible from 272 // an absl::string_view. 273 // 274 // An Predicate functor may be supplied. This predicate will be used to filter 275 // the split strings: only strings for which the predicate returns true will be 276 // kept. A Predicate object is any unary functor that takes an absl::string_view 277 // and returns bool. 278 // 279 // The StringType parameter can be either string_view or string, depending on 280 // whether the Splitter refers to a string stored elsewhere, or if the string 281 // resides inside the Splitter itself. 282 template <typename Delimiter, typename Predicate, typename StringType> 283 class Splitter { 284 public: 285 using DelimiterType = Delimiter; 286 using PredicateType = Predicate; 287 using const_iterator = strings_internal::SplitIterator<Splitter>; 288 using value_type = typename std::iterator_traits<const_iterator>::value_type; 289 290 Splitter(StringType input_text, Delimiter d, Predicate p) 291 : text_(std::move(input_text)), 292 delimiter_(std::move(d)), 293 predicate_(std::move(p)) {} 294 295 absl::string_view text() const { return text_; } 296 const Delimiter& delimiter() const { return delimiter_; } 297 const Predicate& predicate() const { return predicate_; } 298 299 // Range functions that iterate the split substrings as absl::string_view 300 // objects. These methods enable a Splitter to be used in a range-based for 301 // loop. 302 const_iterator begin() const { return {const_iterator::kInitState, this}; } 303 const_iterator end() const { return {const_iterator::kEndState, this}; } 304 305 // An implicit conversion operator that is restricted to only those containers 306 // that the splitter is convertible to. 307 template < 308 typename Container, 309 std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value && 310 SplitterIsConvertibleTo<Container>::value, 311 std::nullptr_t> = nullptr> 312 // NOLINTNEXTLINE(google-explicit-constructor) 313 operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 314 return ConvertToContainer<Container, typename Container::value_type, 315 HasMappedType<Container>::value>()(*this); 316 } 317 318 template < 319 typename Container, 320 std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value && 321 SplitterIsConvertibleTo<Container>::value, 322 std::nullptr_t> = nullptr> 323 // NOLINTNEXTLINE(google-explicit-constructor) 324 operator Container() const { 325 return ConvertToContainer<Container, typename Container::value_type, 326 HasMappedType<Container>::value>()(*this); 327 } 328 329 // Returns a pair with its .first and .second members set to the first two 330 // strings returned by the begin() iterator. Either/both of .first and .second 331 // will be constructed with empty strings if the iterator doesn't have a 332 // corresponding value. 333 template <typename First, typename Second, 334 std::enable_if_t< 335 ShouldUseLifetimeBoundForPair<StringType, First, Second>::value, 336 std::nullptr_t> = nullptr> 337 // NOLINTNEXTLINE(google-explicit-constructor) 338 operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 339 return ConvertToPair<First, Second>(); 340 } 341 342 template <typename First, typename Second, 343 std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First, 344 Second>::value, 345 std::nullptr_t> = nullptr> 346 // NOLINTNEXTLINE(google-explicit-constructor) 347 operator std::pair<First, Second>() const { 348 return ConvertToPair<First, Second>(); 349 } 350 351 // Returns an array with its elements set to the first few strings returned by 352 // the begin() iterator. If there is not a corresponding value the empty 353 // string is used. 354 template <typename ElementType, std::size_t Size, 355 std::enable_if_t<ShouldUseLifetimeBoundForArray< 356 StringType, ElementType, Size>::value, 357 std::nullptr_t> = nullptr> 358 // NOLINTNEXTLINE(google-explicit-constructor) 359 operator std::array<ElementType, Size>() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 360 return ConvertToArray<ElementType, Size>(); 361 } 362 363 template <typename ElementType, std::size_t Size, 364 std::enable_if_t<!ShouldUseLifetimeBoundForArray< 365 StringType, ElementType, Size>::value, 366 std::nullptr_t> = nullptr> 367 // NOLINTNEXTLINE(google-explicit-constructor) 368 operator std::array<ElementType, Size>() const { 369 return ConvertToArray<ElementType, Size>(); 370 } 371 372 private: 373 template <typename ElementType, std::size_t Size> 374 std::array<ElementType, Size> ConvertToArray() const { 375 std::array<ElementType, Size> a; 376 auto it = begin(); 377 for (std::size_t i = 0; i < Size && it != end(); ++i, ++it) { 378 a[i] = ElementType(*it); 379 } 380 return a; 381 } 382 383 template <typename First, typename Second> 384 std::pair<First, Second> ConvertToPair() const { 385 absl::string_view first, second; 386 auto it = begin(); 387 if (it != end()) { 388 first = *it; 389 if (++it != end()) { 390 second = *it; 391 } 392 } 393 return {First(first), Second(second)}; 394 } 395 396 // ConvertToContainer is a functor converting a Splitter to the requested 397 // Container of ValueType. It is specialized below to optimize splitting to 398 // certain combinations of Container and ValueType. 399 // 400 // This base template handles the generic case of storing the split results in 401 // the requested non-map-like container and converting the split substrings to 402 // the requested type. 403 template <typename Container, typename ValueType, bool is_map = false> 404 struct ConvertToContainer { 405 Container operator()(const Splitter& splitter) const { 406 Container c; 407 auto it = std::inserter(c, c.end()); 408 for (const auto& sp : splitter) { 409 *it++ = ValueType(sp); 410 } 411 return c; 412 } 413 }; 414 415 // Partial specialization for a std::vector<absl::string_view>. 416 // 417 // Optimized for the common case of splitting to a 418 // std::vector<absl::string_view>. In this case we first split the results to 419 // a small array of absl::string_view on the stack, to reduce reallocations. 420 template <typename A> 421 struct ConvertToContainer<std::vector<absl::string_view, A>, 422 absl::string_view, false> { 423 std::vector<absl::string_view, A> operator()( 424 const Splitter& splitter) const { 425 struct raw_view { 426 const char* data; 427 size_t size; 428 operator absl::string_view() const { // NOLINT(runtime/explicit) 429 return {data, size}; 430 } 431 }; 432 std::vector<absl::string_view, A> v; 433 std::array<raw_view, 16> ar; 434 for (auto it = splitter.begin(); !it.at_end();) { 435 size_t index = 0; 436 do { 437 ar[index].data = it->data(); 438 ar[index].size = it->size(); 439 ++it; 440 } while (++index != ar.size() && !it.at_end()); 441 // We static_cast index to a signed type to work around overzealous 442 // compiler warnings about signedness. 443 v.insert(v.end(), ar.begin(), 444 ar.begin() + static_cast<ptrdiff_t>(index)); 445 } 446 return v; 447 } 448 }; 449 450 // Partial specialization for a std::vector<std::string>. 451 // 452 // Optimized for the common case of splitting to a std::vector<std::string>. 453 // In this case we first split the results to a std::vector<absl::string_view> 454 // so the returned std::vector<std::string> can have space reserved to avoid 455 // std::string moves. 456 template <typename A> 457 struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { 458 std::vector<std::string, A> operator()(const Splitter& splitter) const { 459 const std::vector<absl::string_view> v = splitter; 460 return std::vector<std::string, A>(v.begin(), v.end()); 461 } 462 }; 463 464 // Partial specialization for containers of pairs (e.g., maps). 465 // 466 // The algorithm is to insert a new pair into the map for each even-numbered 467 // item, with the even-numbered item as the key with a default-constructed 468 // value. Each odd-numbered item will then be assigned to the last pair's 469 // value. 470 template <typename Container, typename First, typename Second> 471 struct ConvertToContainer<Container, std::pair<const First, Second>, true> { 472 using iterator = typename Container::iterator; 473 474 Container operator()(const Splitter& splitter) const { 475 Container m; 476 iterator it; 477 bool insert = true; 478 for (const absl::string_view sv : splitter) { 479 if (insert) { 480 it = InsertOrEmplace(&m, sv); 481 } else { 482 it->second = Second(sv); 483 } 484 insert = !insert; 485 } 486 return m; 487 } 488 489 // Inserts the key and an empty value into the map, returning an iterator to 490 // the inserted item. We use emplace() if available, otherwise insert(). 491 template <typename M> 492 static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( 493 M* m, absl::string_view key) { 494 // Use piecewise_construct to support old versions of gcc in which pair 495 // constructor can't otherwise construct string from string_view. 496 return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), 497 std::tuple<>())); 498 } 499 template <typename M> 500 static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( 501 M* m, absl::string_view key) { 502 return ToIter(m->insert(std::make_pair(First(key), Second("")))); 503 } 504 505 static iterator ToIter(std::pair<iterator, bool> pair) { 506 return pair.first; 507 } 508 static iterator ToIter(iterator iter) { return iter; } 509 }; 510 511 StringType text_; 512 Delimiter delimiter_; 513 Predicate predicate_; 514 }; 515 516 } // namespace strings_internal 517 ABSL_NAMESPACE_END 518 } // namespace absl 519 520 #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 521