• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: string_view.h
18 // -----------------------------------------------------------------------------
19 //
20 // This file contains the definition of the `absl::string_view` class. A
21 // `string_view` points to a contiguous span of characters, often part or all of
22 // another `std::string`, double-quoted string literal, character array, or even
23 // another `string_view`.
24 //
25 // This `absl::string_view` abstraction is designed to be a drop-in
26 // replacement for the C++17 `std::string_view` abstraction.
27 #ifndef ABSL_STRINGS_STRING_VIEW_H_
28 #define ABSL_STRINGS_STRING_VIEW_H_
29 
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstring>
34 #include <iosfwd>
35 #include <iterator>
36 #include <limits>
37 #include <string>
38 
39 #include "absl/base/config.h"
40 #include "absl/base/internal/throw_delegate.h"
41 #include "absl/base/macros.h"
42 #include "absl/base/optimization.h"
43 #include "absl/base/port.h"
44 
45 #ifdef ABSL_USES_STD_STRING_VIEW
46 
47 #include <string_view>  // IWYU pragma: export
48 
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51 using std::string_view;
52 ABSL_NAMESPACE_END
53 }  // namespace absl
54 
55 #else  // ABSL_USES_STD_STRING_VIEW
56 
57 #if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \
58     (defined(__GNUC__) && !defined(__clang__))
59 #define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp
60 #else  // ABSL_HAVE_BUILTIN(__builtin_memcmp)
61 #define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp
62 #endif  // ABSL_HAVE_BUILTIN(__builtin_memcmp)
63 
64 namespace absl {
65 ABSL_NAMESPACE_BEGIN
66 
67 // absl::string_view
68 //
69 // A `string_view` provides a lightweight view into the string data provided by
70 // a `std::string`, double-quoted string literal, character array, or even
71 // another `string_view`. A `string_view` does *not* own the string to which it
72 // points, and that data cannot be modified through the view.
73 //
74 // You can use `string_view` as a function or method parameter anywhere a
75 // parameter can receive a double-quoted string literal, `const char*`,
76 // `std::string`, or another `absl::string_view` argument with no need to copy
77 // the string data. Systematic use of `string_view` within function arguments
78 // reduces data copies and `strlen()` calls.
79 //
80 // Because of its small size, prefer passing `string_view` by value:
81 //
82 //   void MyFunction(absl::string_view arg);
83 //
84 // If circumstances require, you may also pass one by const reference:
85 //
86 //   void MyFunction(const absl::string_view& arg);  // not preferred
87 //
88 // Passing by value generates slightly smaller code for many architectures.
89 //
90 // In either case, the source data of the `string_view` must outlive the
91 // `string_view` itself.
92 //
93 // A `string_view` is also suitable for local variables if you know that the
94 // lifetime of the underlying object is longer than the lifetime of your
95 // `string_view` variable. However, beware of binding a `string_view` to a
96 // temporary value:
97 //
98 //   // BAD use of string_view: lifetime problem
99 //   absl::string_view sv = obj.ReturnAString();
100 //
101 //   // GOOD use of string_view: str outlives sv
102 //   std::string str = obj.ReturnAString();
103 //   absl::string_view sv = str;
104 //
105 // Due to lifetime issues, a `string_view` is sometimes a poor choice for a
106 // return value and usually a poor choice for a data member. If you do use a
107 // `string_view` this way, it is your responsibility to ensure that the object
108 // pointed to by the `string_view` outlives the `string_view`.
109 //
110 // A `string_view` may represent a whole string or just part of a string. For
111 // example, when splitting a string, `std::vector<absl::string_view>` is a
112 // natural data type for the output.
113 //
114 // When constructed from a source which is NUL-terminated, the `string_view`
115 // itself will not include the NUL-terminator unless a specific size (including
116 // the NUL) is passed to the constructor. As a result, common idioms that work
117 // on NUL-terminated strings do not work on `string_view` objects. If you write
118 // code that scans a `string_view`, you must check its length rather than test
119 // for nul, for example. Note, however, that nuls may still be embedded within
120 // a `string_view` explicitly.
121 //
122 // You may create a null `string_view` in two ways:
123 //
124 //   absl::string_view sv;
125 //   absl::string_view sv(nullptr, 0);
126 //
127 // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
128 // `sv.empty() == true`. Also, if you create a `string_view` with a non-null
129 // pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
130 // signal an undefined value that is different from other `string_view` values
131 // in a similar fashion to how `const char* p1 = nullptr;` is different from
132 // `const char* p2 = "";`. However, in practice, it is not recommended to rely
133 // on this behavior.
134 //
135 // Be careful not to confuse a null `string_view` with an empty one. A null
136 // `string_view` is an empty `string_view`, but some empty `string_view`s are
137 // not null. Prefer checking for emptiness over checking for null.
138 //
139 // There are many ways to create an empty string_view:
140 //
141 //   const char* nullcp = nullptr;
142 //   // string_view.size() will return 0 in all cases.
143 //   absl::string_view();
144 //   absl::string_view(nullcp, 0);
145 //   absl::string_view("");
146 //   absl::string_view("", 0);
147 //   absl::string_view("abcdef", 0);
148 //   absl::string_view("abcdef" + 6, 0);
149 //
150 // All empty `string_view` objects whether null or not, are equal:
151 //
152 //   absl::string_view() == absl::string_view("", 0)
153 //   absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0)
154 class string_view {
155  public:
156   using traits_type = std::char_traits<char>;
157   using value_type = char;
158   using pointer = char*;
159   using const_pointer = const char*;
160   using reference = char&;
161   using const_reference = const char&;
162   using const_iterator = const char*;
163   using iterator = const_iterator;
164   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
165   using reverse_iterator = const_reverse_iterator;
166   using size_type = size_t;
167   using difference_type = std::ptrdiff_t;
168 
169   static constexpr size_type npos = static_cast<size_type>(-1);
170 
171   // Null `string_view` constructor
string_view()172   constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
173 
174   // Implicit constructors
175 
176   template <typename Allocator>
string_view(const std::basic_string<char,std::char_traits<char>,Allocator> & str)177   string_view(  // NOLINT(runtime/explicit)
178       const std::basic_string<char, std::char_traits<char>, Allocator>&
179           str) noexcept
180       // This is implemented in terms of `string_view(p, n)` so `str.size()`
181       // doesn't need to be reevaluated after `ptr_` is set.
182       : string_view(str.data(), str.size()) {}
183 
184   // Implicit constructor of a `string_view` from NUL-terminated `str`. When
185   // accepting possibly null strings, use `absl::NullSafeStringView(str)`
186   // instead (see below).
string_view(const char * str)187   constexpr string_view(const char* str)  // NOLINT(runtime/explicit)
188       : ptr_(str),
189         length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {}
190 
191   // Implicit constructor of a `string_view` from a `const char*` and length.
string_view(const char * data,size_type len)192   constexpr string_view(const char* data, size_type len)
193       : ptr_(data), length_(CheckLengthInternal(len)) {}
194 
195   // NOTE: Harmlessly omitted to work around gdb bug.
196   //   constexpr string_view(const string_view&) noexcept = default;
197   //   string_view& operator=(const string_view&) noexcept = default;
198 
199   // Iterators
200 
201   // string_view::begin()
202   //
203   // Returns an iterator pointing to the first character at the beginning of the
204   // `string_view`, or `end()` if the `string_view` is empty.
begin()205   constexpr const_iterator begin() const noexcept { return ptr_; }
206 
207   // string_view::end()
208   //
209   // Returns an iterator pointing just beyond the last character at the end of
210   // the `string_view`. This iterator acts as a placeholder; attempting to
211   // access it results in undefined behavior.
end()212   constexpr const_iterator end() const noexcept { return ptr_ + length_; }
213 
214   // string_view::cbegin()
215   //
216   // Returns a const iterator pointing to the first character at the beginning
217   // of the `string_view`, or `end()` if the `string_view` is empty.
cbegin()218   constexpr const_iterator cbegin() const noexcept { return begin(); }
219 
220   // string_view::cend()
221   //
222   // Returns a const iterator pointing just beyond the last character at the end
223   // of the `string_view`. This pointer acts as a placeholder; attempting to
224   // access its element results in undefined behavior.
cend()225   constexpr const_iterator cend() const noexcept { return end(); }
226 
227   // string_view::rbegin()
228   //
229   // Returns a reverse iterator pointing to the last character at the end of the
230   // `string_view`, or `rend()` if the `string_view` is empty.
rbegin()231   const_reverse_iterator rbegin() const noexcept {
232     return const_reverse_iterator(end());
233   }
234 
235   // string_view::rend()
236   //
237   // Returns a reverse iterator pointing just before the first character at the
238   // beginning of the `string_view`. This pointer acts as a placeholder;
239   // attempting to access its element results in undefined behavior.
rend()240   const_reverse_iterator rend() const noexcept {
241     return const_reverse_iterator(begin());
242   }
243 
244   // string_view::crbegin()
245   //
246   // Returns a const reverse iterator pointing to the last character at the end
247   // of the `string_view`, or `crend()` if the `string_view` is empty.
crbegin()248   const_reverse_iterator crbegin() const noexcept { return rbegin(); }
249 
250   // string_view::crend()
251   //
252   // Returns a const reverse iterator pointing just before the first character
253   // at the beginning of the `string_view`. This pointer acts as a placeholder;
254   // attempting to access its element results in undefined behavior.
crend()255   const_reverse_iterator crend() const noexcept { return rend(); }
256 
257   // Capacity Utilities
258 
259   // string_view::size()
260   //
261   // Returns the number of characters in the `string_view`.
size()262   constexpr size_type size() const noexcept {
263     return length_;
264   }
265 
266   // string_view::length()
267   //
268   // Returns the number of characters in the `string_view`. Alias for `size()`.
length()269   constexpr size_type length() const noexcept { return size(); }
270 
271   // string_view::max_size()
272   //
273   // Returns the maximum number of characters the `string_view` can hold.
max_size()274   constexpr size_type max_size() const noexcept { return kMaxSize; }
275 
276   // string_view::empty()
277   //
278   // Checks if the `string_view` is empty (refers to no characters).
empty()279   constexpr bool empty() const noexcept { return length_ == 0; }
280 
281   // string_view::operator[]
282   //
283   // Returns the ith element of the `string_view` using the array operator.
284   // Note that this operator does not perform any bounds checking.
285   constexpr const_reference operator[](size_type i) const {
286     return ABSL_ASSERT(i < size()), ptr_[i];
287   }
288 
289   // string_view::at()
290   //
291   // Returns the ith element of the `string_view`. Bounds checking is performed,
292   // and an exception of type `std::out_of_range` will be thrown on invalid
293   // access.
at(size_type i)294   constexpr const_reference at(size_type i) const {
295     return ABSL_PREDICT_TRUE(i < size())
296                ? ptr_[i]
297                : ((void)base_internal::ThrowStdOutOfRange(
298                       "absl::string_view::at"),
299                   ptr_[i]);
300   }
301 
302   // string_view::front()
303   //
304   // Returns the first element of a `string_view`.
front()305   constexpr const_reference front() const {
306     return ABSL_ASSERT(!empty()), ptr_[0];
307   }
308 
309   // string_view::back()
310   //
311   // Returns the last element of a `string_view`.
back()312   constexpr const_reference back() const {
313     return ABSL_ASSERT(!empty()), ptr_[size() - 1];
314   }
315 
316   // string_view::data()
317   //
318   // Returns a pointer to the underlying character array (which is of course
319   // stored elsewhere). Note that `string_view::data()` may contain embedded nul
320   // characters, but the returned buffer may or may not be NUL-terminated;
321   // therefore, do not pass `data()` to a routine that expects a NUL-terminated
322   // std::string.
data()323   constexpr const_pointer data() const noexcept { return ptr_; }
324 
325   // Modifiers
326 
327   // string_view::remove_prefix()
328   //
329   // Removes the first `n` characters from the `string_view`. Note that the
330   // underlying std::string is not changed, only the view.
remove_prefix(size_type n)331   void remove_prefix(size_type n) {
332     assert(n <= length_);
333     ptr_ += n;
334     length_ -= n;
335   }
336 
337   // string_view::remove_suffix()
338   //
339   // Removes the last `n` characters from the `string_view`. Note that the
340   // underlying std::string is not changed, only the view.
remove_suffix(size_type n)341   void remove_suffix(size_type n) {
342     assert(n <= length_);
343     length_ -= n;
344   }
345 
346   // string_view::swap()
347   //
348   // Swaps this `string_view` with another `string_view`.
swap(string_view & s)349   void swap(string_view& s) noexcept {
350     auto t = *this;
351     *this = s;
352     s = t;
353   }
354 
355   // Explicit conversion operators
356 
357   // Converts to `std::basic_string`.
358   template <typename A>
359   explicit operator std::basic_string<char, traits_type, A>() const {
360     if (!data()) return {};
361     return std::basic_string<char, traits_type, A>(data(), size());
362   }
363 
364   // string_view::copy()
365   //
366   // Copies the contents of the `string_view` at offset `pos` and length `n`
367   // into `buf`.
368   size_type copy(char* buf, size_type n, size_type pos = 0) const {
369     if (ABSL_PREDICT_FALSE(pos > length_)) {
370       base_internal::ThrowStdOutOfRange("absl::string_view::copy");
371     }
372     size_type rlen = (std::min)(length_ - pos, n);
373     if (rlen > 0) {
374       const char* start = ptr_ + pos;
375       traits_type::copy(buf, start, rlen);
376     }
377     return rlen;
378   }
379 
380   // string_view::substr()
381   //
382   // Returns a "substring" of the `string_view` (at offset `pos` and length
383   // `n`) as another string_view. This function throws `std::out_of_bounds` if
384   // `pos > size`.
385   string_view substr(size_type pos, size_type n = npos) const {
386     if (ABSL_PREDICT_FALSE(pos > length_))
387       base_internal::ThrowStdOutOfRange("absl::string_view::substr");
388     n = (std::min)(n, length_ - pos);
389     return string_view(ptr_ + pos, n);
390   }
391 
392   // string_view::compare()
393   //
394   // Performs a lexicographical comparison between the `string_view` and
395   // another `absl::string_view`, returning -1 if `this` is less than, 0 if
396   // `this` is equal to, and 1 if `this` is greater than the passed std::string
397   // view. Note that in the case of data equality, a further comparison is made
398   // on the respective sizes of the two `string_view`s to determine which is
399   // smaller, equal, or greater.
compare(string_view x)400   constexpr int compare(string_view x) const noexcept {
401     return CompareImpl(length_, x.length_,
402                        Min(length_, x.length_) == 0
403                            ? 0
404                            : ABSL_INTERNAL_STRING_VIEW_MEMCMP(
405                                  ptr_, x.ptr_, Min(length_, x.length_)));
406   }
407 
408   // Overload of `string_view::compare()` for comparing a substring of the
409   // 'string_view` and another `absl::string_view`.
compare(size_type pos1,size_type count1,string_view v)410   int compare(size_type pos1, size_type count1, string_view v) const {
411     return substr(pos1, count1).compare(v);
412   }
413 
414   // Overload of `string_view::compare()` for comparing a substring of the
415   // `string_view` and a substring of another `absl::string_view`.
compare(size_type pos1,size_type count1,string_view v,size_type pos2,size_type count2)416   int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
417               size_type count2) const {
418     return substr(pos1, count1).compare(v.substr(pos2, count2));
419   }
420 
421   // Overload of `string_view::compare()` for comparing a `string_view` and a
422   // a different  C-style std::string `s`.
compare(const char * s)423   int compare(const char* s) const { return compare(string_view(s)); }
424 
425   // Overload of `string_view::compare()` for comparing a substring of the
426   // `string_view` and a different std::string C-style std::string `s`.
compare(size_type pos1,size_type count1,const char * s)427   int compare(size_type pos1, size_type count1, const char* s) const {
428     return substr(pos1, count1).compare(string_view(s));
429   }
430 
431   // Overload of `string_view::compare()` for comparing a substring of the
432   // `string_view` and a substring of a different C-style std::string `s`.
compare(size_type pos1,size_type count1,const char * s,size_type count2)433   int compare(size_type pos1, size_type count1, const char* s,
434               size_type count2) const {
435     return substr(pos1, count1).compare(string_view(s, count2));
436   }
437 
438   // Find Utilities
439 
440   // string_view::find()
441   //
442   // Finds the first occurrence of the substring `s` within the `string_view`,
443   // returning the position of the first character's match, or `npos` if no
444   // match was found.
445   size_type find(string_view s, size_type pos = 0) const noexcept;
446 
447   // Overload of `string_view::find()` for finding the given character `c`
448   // within the `string_view`.
449   size_type find(char c, size_type pos = 0) const noexcept;
450 
451   // string_view::rfind()
452   //
453   // Finds the last occurrence of a substring `s` within the `string_view`,
454   // returning the position of the first character's match, or `npos` if no
455   // match was found.
456   size_type rfind(string_view s, size_type pos = npos) const
457       noexcept;
458 
459   // Overload of `string_view::rfind()` for finding the last given character `c`
460   // within the `string_view`.
461   size_type rfind(char c, size_type pos = npos) const noexcept;
462 
463   // string_view::find_first_of()
464   //
465   // Finds the first occurrence of any of the characters in `s` within the
466   // `string_view`, returning the start position of the match, or `npos` if no
467   // match was found.
468   size_type find_first_of(string_view s, size_type pos = 0) const
469       noexcept;
470 
471   // Overload of `string_view::find_first_of()` for finding a character `c`
472   // within the `string_view`.
473   size_type find_first_of(char c, size_type pos = 0) const
474       noexcept {
475     return find(c, pos);
476   }
477 
478   // string_view::find_last_of()
479   //
480   // Finds the last occurrence of any of the characters in `s` within the
481   // `string_view`, returning the start position of the match, or `npos` if no
482   // match was found.
483   size_type find_last_of(string_view s, size_type pos = npos) const
484       noexcept;
485 
486   // Overload of `string_view::find_last_of()` for finding a character `c`
487   // within the `string_view`.
488   size_type find_last_of(char c, size_type pos = npos) const
489       noexcept {
490     return rfind(c, pos);
491   }
492 
493   // string_view::find_first_not_of()
494   //
495   // Finds the first occurrence of any of the characters not in `s` within the
496   // `string_view`, returning the start position of the first non-match, or
497   // `npos` if no non-match was found.
498   size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
499 
500   // Overload of `string_view::find_first_not_of()` for finding a character
501   // that is not `c` within the `string_view`.
502   size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
503 
504   // string_view::find_last_not_of()
505   //
506   // Finds the last occurrence of any of the characters not in `s` within the
507   // `string_view`, returning the start position of the last non-match, or
508   // `npos` if no non-match was found.
509   size_type find_last_not_of(string_view s,
510                                           size_type pos = npos) const noexcept;
511 
512   // Overload of `string_view::find_last_not_of()` for finding a character
513   // that is not `c` within the `string_view`.
514   size_type find_last_not_of(char c, size_type pos = npos) const
515       noexcept;
516 
517  private:
518   static constexpr size_type kMaxSize =
519       (std::numeric_limits<difference_type>::max)();
520 
CheckLengthInternal(size_type len)521   static constexpr size_type CheckLengthInternal(size_type len) {
522     return (void)ABSL_ASSERT(len <= kMaxSize), len;
523   }
524 
StrlenInternal(const char * str)525   static constexpr size_type StrlenInternal(const char* str) {
526 #if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)
527     // MSVC 2017+ can evaluate this at compile-time.
528     const char* begin = str;
529     while (*str != '\0') ++str;
530     return str - begin;
531 #elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \
532     (defined(__GNUC__) && !defined(__clang__))
533     // GCC has __builtin_strlen according to
534     // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but
535     // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above.
536     // __builtin_strlen is constexpr.
537     return __builtin_strlen(str);
538 #else
539     return str ? strlen(str) : 0;
540 #endif
541   }
542 
Min(size_type length_a,size_type length_b)543   static constexpr size_t Min(size_type length_a, size_type length_b) {
544     return length_a < length_b ? length_a : length_b;
545   }
546 
CompareImpl(size_type length_a,size_type length_b,int compare_result)547   static constexpr int CompareImpl(size_type length_a, size_type length_b,
548                                    int compare_result) {
549     return compare_result == 0 ? static_cast<int>(length_a > length_b) -
550                                      static_cast<int>(length_a < length_b)
551                                : (compare_result < 0 ? -1 : 1);
552   }
553 
554   const char* ptr_;
555   size_type length_;
556 };
557 
558 // This large function is defined inline so that in a fairly common case where
559 // one of the arguments is a literal, the compiler can elide a lot of the
560 // following comparisons.
561 constexpr bool operator==(string_view x, string_view y) noexcept {
562   return x.size() == y.size() &&
563          (x.empty() ||
564           ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0);
565 }
566 
567 constexpr bool operator!=(string_view x, string_view y) noexcept {
568   return !(x == y);
569 }
570 
571 constexpr bool operator<(string_view x, string_view y) noexcept {
572   return x.compare(y) < 0;
573 }
574 
575 constexpr bool operator>(string_view x, string_view y) noexcept {
576   return y < x;
577 }
578 
579 constexpr bool operator<=(string_view x, string_view y) noexcept {
580   return !(y < x);
581 }
582 
583 constexpr bool operator>=(string_view x, string_view y) noexcept {
584   return !(x < y);
585 }
586 
587 // IO Insertion Operator
588 std::ostream& operator<<(std::ostream& o, string_view piece);
589 
590 ABSL_NAMESPACE_END
591 }  // namespace absl
592 
593 #undef ABSL_INTERNAL_STRING_VIEW_MEMCMP
594 
595 #endif  // ABSL_USES_STD_STRING_VIEW
596 
597 namespace absl {
598 ABSL_NAMESPACE_BEGIN
599 
600 // ClippedSubstr()
601 //
602 // Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
603 // Provided because std::string_view::substr throws if `pos > size()`
604 inline string_view ClippedSubstr(string_view s, size_t pos,
605                                  size_t n = string_view::npos) {
606   pos = (std::min)(pos, static_cast<size_t>(s.size()));
607   return s.substr(pos, n);
608 }
609 
610 // NullSafeStringView()
611 //
612 // Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
613 // This function should be used where an `absl::string_view` can be created from
614 // a possibly-null pointer.
NullSafeStringView(const char * p)615 constexpr string_view NullSafeStringView(const char* p) {
616   return p ? string_view(p) : string_view();
617 }
618 
619 ABSL_NAMESPACE_END
620 }  // namespace absl
621 
622 #endif  // ABSL_STRINGS_STRING_VIEW_H_
623