1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: string_view.h
18 // -----------------------------------------------------------------------------
19 //
20 // This file contains the definition of the `absl::string_view` class. A
21 // `string_view` points to a contiguous span of characters, often part or all of
22 // another `std::string`, double-quoted string literal, character array, or even
23 // another `string_view`.
24 //
25 // This `absl::string_view` abstraction is designed to be a drop-in
26 // replacement for the C++17 `std::string_view` abstraction.
27 #ifndef ABSL_STRINGS_STRING_VIEW_H_
28 #define ABSL_STRINGS_STRING_VIEW_H_
29
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstring>
34 #include <iosfwd>
35 #include <iterator>
36 #include <limits>
37 #include <string>
38
39 #include "absl/base/config.h"
40 #include "absl/base/internal/throw_delegate.h"
41 #include "absl/base/macros.h"
42 #include "absl/base/optimization.h"
43 #include "absl/base/port.h"
44
45 #ifdef ABSL_USES_STD_STRING_VIEW
46
47 #include <string_view> // IWYU pragma: export
48
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51 using string_view = std::string_view;
52 ABSL_NAMESPACE_END
53 } // namespace absl
54
55 #else // ABSL_USES_STD_STRING_VIEW
56
57 #if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \
58 (defined(__GNUC__) && !defined(__clang__))
59 #define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp
60 #else // ABSL_HAVE_BUILTIN(__builtin_memcmp)
61 #define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp
62 #endif // ABSL_HAVE_BUILTIN(__builtin_memcmp)
63
64 namespace absl {
65 ABSL_NAMESPACE_BEGIN
66
67 // absl::string_view
68 //
69 // A `string_view` provides a lightweight view into the string data provided by
70 // a `std::string`, double-quoted string literal, character array, or even
71 // another `string_view`. A `string_view` does *not* own the string to which it
72 // points, and that data cannot be modified through the view.
73 //
74 // You can use `string_view` as a function or method parameter anywhere a
75 // parameter can receive a double-quoted string literal, `const char*`,
76 // `std::string`, or another `absl::string_view` argument with no need to copy
77 // the string data. Systematic use of `string_view` within function arguments
78 // reduces data copies and `strlen()` calls.
79 //
80 // Because of its small size, prefer passing `string_view` by value:
81 //
82 // void MyFunction(absl::string_view arg);
83 //
84 // If circumstances require, you may also pass one by const reference:
85 //
86 // void MyFunction(const absl::string_view& arg); // not preferred
87 //
88 // Passing by value generates slightly smaller code for many architectures.
89 //
90 // In either case, the source data of the `string_view` must outlive the
91 // `string_view` itself.
92 //
93 // A `string_view` is also suitable for local variables if you know that the
94 // lifetime of the underlying object is longer than the lifetime of your
95 // `string_view` variable. However, beware of binding a `string_view` to a
96 // temporary value:
97 //
98 // // BAD use of string_view: lifetime problem
99 // absl::string_view sv = obj.ReturnAString();
100 //
101 // // GOOD use of string_view: str outlives sv
102 // std::string str = obj.ReturnAString();
103 // absl::string_view sv = str;
104 //
105 // Due to lifetime issues, a `string_view` is sometimes a poor choice for a
106 // return value and usually a poor choice for a data member. If you do use a
107 // `string_view` this way, it is your responsibility to ensure that the object
108 // pointed to by the `string_view` outlives the `string_view`.
109 //
110 // A `string_view` may represent a whole string or just part of a string. For
111 // example, when splitting a string, `std::vector<absl::string_view>` is a
112 // natural data type for the output.
113 //
114 // For another example, a Cord is a non-contiguous, potentially very
115 // long string-like object. The Cord class has an interface that iteratively
116 // provides string_view objects that point to the successive pieces of a Cord
117 // object.
118 //
119 // When constructed from a source which is NUL-terminated, the `string_view`
120 // itself will not include the NUL-terminator unless a specific size (including
121 // the NUL) is passed to the constructor. As a result, common idioms that work
122 // on NUL-terminated strings do not work on `string_view` objects. If you write
123 // code that scans a `string_view`, you must check its length rather than test
124 // for nul, for example. Note, however, that nuls may still be embedded within
125 // a `string_view` explicitly.
126 //
127 // You may create a null `string_view` in two ways:
128 //
129 // absl::string_view sv;
130 // absl::string_view sv(nullptr, 0);
131 //
132 // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
133 // `sv.empty() == true`. Also, if you create a `string_view` with a non-null
134 // pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
135 // signal an undefined value that is different from other `string_view` values
136 // in a similar fashion to how `const char* p1 = nullptr;` is different from
137 // `const char* p2 = "";`. However, in practice, it is not recommended to rely
138 // on this behavior.
139 //
140 // Be careful not to confuse a null `string_view` with an empty one. A null
141 // `string_view` is an empty `string_view`, but some empty `string_view`s are
142 // not null. Prefer checking for emptiness over checking for null.
143 //
144 // There are many ways to create an empty string_view:
145 //
146 // const char* nullcp = nullptr;
147 // // string_view.size() will return 0 in all cases.
148 // absl::string_view();
149 // absl::string_view(nullcp, 0);
150 // absl::string_view("");
151 // absl::string_view("", 0);
152 // absl::string_view("abcdef", 0);
153 // absl::string_view("abcdef" + 6, 0);
154 //
155 // All empty `string_view` objects whether null or not, are equal:
156 //
157 // absl::string_view() == absl::string_view("", 0)
158 // absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0)
159 class string_view {
160 public:
161 using traits_type = std::char_traits<char>;
162 using value_type = char;
163 using pointer = char*;
164 using const_pointer = const char*;
165 using reference = char&;
166 using const_reference = const char&;
167 using const_iterator = const char*;
168 using iterator = const_iterator;
169 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
170 using reverse_iterator = const_reverse_iterator;
171 using size_type = size_t;
172 using difference_type = std::ptrdiff_t;
173
174 static constexpr size_type npos = static_cast<size_type>(-1);
175
176 // Null `string_view` constructor
string_view()177 constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
178
179 // Implicit constructors
180
181 template <typename Allocator>
string_view(const std::basic_string<char,std::char_traits<char>,Allocator> & str)182 string_view( // NOLINT(runtime/explicit)
183 const std::basic_string<char, std::char_traits<char>, Allocator>&
184 str) noexcept
185 // This is implemented in terms of `string_view(p, n)` so `str.size()`
186 // doesn't need to be reevaluated after `ptr_` is set.
187 : string_view(str.data(), str.size()) {}
188
189 // Implicit constructor of a `string_view` from NUL-terminated `str`. When
190 // accepting possibly null strings, use `absl::NullSafeStringView(str)`
191 // instead (see below).
string_view(const char * str)192 constexpr string_view(const char* str) // NOLINT(runtime/explicit)
193 : ptr_(str),
194 length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {}
195
196 // Implicit constructor of a `string_view` from a `const char*` and length.
string_view(const char * data,size_type len)197 constexpr string_view(const char* data, size_type len)
198 : ptr_(data), length_(CheckLengthInternal(len)) {}
199
200 // NOTE: Harmlessly omitted to work around gdb bug.
201 // constexpr string_view(const string_view&) noexcept = default;
202 // string_view& operator=(const string_view&) noexcept = default;
203
204 // Iterators
205
206 // string_view::begin()
207 //
208 // Returns an iterator pointing to the first character at the beginning of the
209 // `string_view`, or `end()` if the `string_view` is empty.
begin()210 constexpr const_iterator begin() const noexcept { return ptr_; }
211
212 // string_view::end()
213 //
214 // Returns an iterator pointing just beyond the last character at the end of
215 // the `string_view`. This iterator acts as a placeholder; attempting to
216 // access it results in undefined behavior.
end()217 constexpr const_iterator end() const noexcept { return ptr_ + length_; }
218
219 // string_view::cbegin()
220 //
221 // Returns a const iterator pointing to the first character at the beginning
222 // of the `string_view`, or `end()` if the `string_view` is empty.
cbegin()223 constexpr const_iterator cbegin() const noexcept { return begin(); }
224
225 // string_view::cend()
226 //
227 // Returns a const iterator pointing just beyond the last character at the end
228 // of the `string_view`. This pointer acts as a placeholder; attempting to
229 // access its element results in undefined behavior.
cend()230 constexpr const_iterator cend() const noexcept { return end(); }
231
232 // string_view::rbegin()
233 //
234 // Returns a reverse iterator pointing to the last character at the end of the
235 // `string_view`, or `rend()` if the `string_view` is empty.
rbegin()236 const_reverse_iterator rbegin() const noexcept {
237 return const_reverse_iterator(end());
238 }
239
240 // string_view::rend()
241 //
242 // Returns a reverse iterator pointing just before the first character at the
243 // beginning of the `string_view`. This pointer acts as a placeholder;
244 // attempting to access its element results in undefined behavior.
rend()245 const_reverse_iterator rend() const noexcept {
246 return const_reverse_iterator(begin());
247 }
248
249 // string_view::crbegin()
250 //
251 // Returns a const reverse iterator pointing to the last character at the end
252 // of the `string_view`, or `crend()` if the `string_view` is empty.
crbegin()253 const_reverse_iterator crbegin() const noexcept { return rbegin(); }
254
255 // string_view::crend()
256 //
257 // Returns a const reverse iterator pointing just before the first character
258 // at the beginning of the `string_view`. This pointer acts as a placeholder;
259 // attempting to access its element results in undefined behavior.
crend()260 const_reverse_iterator crend() const noexcept { return rend(); }
261
262 // Capacity Utilities
263
264 // string_view::size()
265 //
266 // Returns the number of characters in the `string_view`.
size()267 constexpr size_type size() const noexcept {
268 return length_;
269 }
270
271 // string_view::length()
272 //
273 // Returns the number of characters in the `string_view`. Alias for `size()`.
length()274 constexpr size_type length() const noexcept { return size(); }
275
276 // string_view::max_size()
277 //
278 // Returns the maximum number of characters the `string_view` can hold.
max_size()279 constexpr size_type max_size() const noexcept { return kMaxSize; }
280
281 // string_view::empty()
282 //
283 // Checks if the `string_view` is empty (refers to no characters).
empty()284 constexpr bool empty() const noexcept { return length_ == 0; }
285
286 // string_view::operator[]
287 //
288 // Returns the ith element of the `string_view` using the array operator.
289 // Note that this operator does not perform any bounds checking.
290 constexpr const_reference operator[](size_type i) const {
291 return ABSL_HARDENING_ASSERT(i < size()), ptr_[i];
292 }
293
294 // string_view::at()
295 //
296 // Returns the ith element of the `string_view`. Bounds checking is performed,
297 // and an exception of type `std::out_of_range` will be thrown on invalid
298 // access.
at(size_type i)299 constexpr const_reference at(size_type i) const {
300 return ABSL_PREDICT_TRUE(i < size())
301 ? ptr_[i]
302 : ((void)base_internal::ThrowStdOutOfRange(
303 "absl::string_view::at"),
304 ptr_[i]);
305 }
306
307 // string_view::front()
308 //
309 // Returns the first element of a `string_view`.
front()310 constexpr const_reference front() const {
311 return ABSL_HARDENING_ASSERT(!empty()), ptr_[0];
312 }
313
314 // string_view::back()
315 //
316 // Returns the last element of a `string_view`.
back()317 constexpr const_reference back() const {
318 return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1];
319 }
320
321 // string_view::data()
322 //
323 // Returns a pointer to the underlying character array (which is of course
324 // stored elsewhere). Note that `string_view::data()` may contain embedded nul
325 // characters, but the returned buffer may or may not be NUL-terminated;
326 // therefore, do not pass `data()` to a routine that expects a NUL-terminated
327 // string.
data()328 constexpr const_pointer data() const noexcept { return ptr_; }
329
330 // Modifiers
331
332 // string_view::remove_prefix()
333 //
334 // Removes the first `n` characters from the `string_view`. Note that the
335 // underlying string is not changed, only the view.
remove_prefix(size_type n)336 void remove_prefix(size_type n) {
337 ABSL_HARDENING_ASSERT(n <= length_);
338 ptr_ += n;
339 length_ -= n;
340 }
341
342 // string_view::remove_suffix()
343 //
344 // Removes the last `n` characters from the `string_view`. Note that the
345 // underlying string is not changed, only the view.
remove_suffix(size_type n)346 void remove_suffix(size_type n) {
347 ABSL_HARDENING_ASSERT(n <= length_);
348 length_ -= n;
349 }
350
351 // string_view::swap()
352 //
353 // Swaps this `string_view` with another `string_view`.
swap(string_view & s)354 void swap(string_view& s) noexcept {
355 auto t = *this;
356 *this = s;
357 s = t;
358 }
359
360 // Explicit conversion operators
361
362 // Converts to `std::basic_string`.
363 template <typename A>
364 explicit operator std::basic_string<char, traits_type, A>() const {
365 if (!data()) return {};
366 return std::basic_string<char, traits_type, A>(data(), size());
367 }
368
369 // string_view::copy()
370 //
371 // Copies the contents of the `string_view` at offset `pos` and length `n`
372 // into `buf`.
373 size_type copy(char* buf, size_type n, size_type pos = 0) const {
374 if (ABSL_PREDICT_FALSE(pos > length_)) {
375 base_internal::ThrowStdOutOfRange("absl::string_view::copy");
376 }
377 size_type rlen = (std::min)(length_ - pos, n);
378 if (rlen > 0) {
379 const char* start = ptr_ + pos;
380 traits_type::copy(buf, start, rlen);
381 }
382 return rlen;
383 }
384
385 // string_view::substr()
386 //
387 // Returns a "substring" of the `string_view` (at offset `pos` and length
388 // `n`) as another string_view. This function throws `std::out_of_bounds` if
389 // `pos > size`.
390 // Use absl::ClippedSubstr if you need a truncating substr operation.
391 constexpr string_view substr(size_type pos, size_type n = npos) const {
392 return ABSL_PREDICT_FALSE(pos > length_)
393 ? (base_internal::ThrowStdOutOfRange(
394 "absl::string_view::substr"),
395 string_view())
396 : string_view(ptr_ + pos, Min(n, length_ - pos));
397 }
398
399 // string_view::compare()
400 //
401 // Performs a lexicographical comparison between the `string_view` and
402 // another `absl::string_view`, returning -1 if `this` is less than, 0 if
403 // `this` is equal to, and 1 if `this` is greater than the passed string
404 // view. Note that in the case of data equality, a further comparison is made
405 // on the respective sizes of the two `string_view`s to determine which is
406 // smaller, equal, or greater.
compare(string_view x)407 constexpr int compare(string_view x) const noexcept {
408 return CompareImpl(length_, x.length_,
409 Min(length_, x.length_) == 0
410 ? 0
411 : ABSL_INTERNAL_STRING_VIEW_MEMCMP(
412 ptr_, x.ptr_, Min(length_, x.length_)));
413 }
414
415 // Overload of `string_view::compare()` for comparing a substring of the
416 // 'string_view` and another `absl::string_view`.
compare(size_type pos1,size_type count1,string_view v)417 int compare(size_type pos1, size_type count1, string_view v) const {
418 return substr(pos1, count1).compare(v);
419 }
420
421 // Overload of `string_view::compare()` for comparing a substring of the
422 // `string_view` and a substring of another `absl::string_view`.
compare(size_type pos1,size_type count1,string_view v,size_type pos2,size_type count2)423 int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
424 size_type count2) const {
425 return substr(pos1, count1).compare(v.substr(pos2, count2));
426 }
427
428 // Overload of `string_view::compare()` for comparing a `string_view` and a
429 // a different C-style string `s`.
compare(const char * s)430 int compare(const char* s) const { return compare(string_view(s)); }
431
432 // Overload of `string_view::compare()` for comparing a substring of the
433 // `string_view` and a different string C-style string `s`.
compare(size_type pos1,size_type count1,const char * s)434 int compare(size_type pos1, size_type count1, const char* s) const {
435 return substr(pos1, count1).compare(string_view(s));
436 }
437
438 // Overload of `string_view::compare()` for comparing a substring of the
439 // `string_view` and a substring of a different C-style string `s`.
compare(size_type pos1,size_type count1,const char * s,size_type count2)440 int compare(size_type pos1, size_type count1, const char* s,
441 size_type count2) const {
442 return substr(pos1, count1).compare(string_view(s, count2));
443 }
444
445 // Find Utilities
446
447 // string_view::find()
448 //
449 // Finds the first occurrence of the substring `s` within the `string_view`,
450 // returning the position of the first character's match, or `npos` if no
451 // match was found.
452 size_type find(string_view s, size_type pos = 0) const noexcept;
453
454 // Overload of `string_view::find()` for finding the given character `c`
455 // within the `string_view`.
456 size_type find(char c, size_type pos = 0) const noexcept;
457
458 // string_view::rfind()
459 //
460 // Finds the last occurrence of a substring `s` within the `string_view`,
461 // returning the position of the first character's match, or `npos` if no
462 // match was found.
463 size_type rfind(string_view s, size_type pos = npos) const
464 noexcept;
465
466 // Overload of `string_view::rfind()` for finding the last given character `c`
467 // within the `string_view`.
468 size_type rfind(char c, size_type pos = npos) const noexcept;
469
470 // string_view::find_first_of()
471 //
472 // Finds the first occurrence of any of the characters in `s` within the
473 // `string_view`, returning the start position of the match, or `npos` if no
474 // match was found.
475 size_type find_first_of(string_view s, size_type pos = 0) const
476 noexcept;
477
478 // Overload of `string_view::find_first_of()` for finding a character `c`
479 // within the `string_view`.
480 size_type find_first_of(char c, size_type pos = 0) const
481 noexcept {
482 return find(c, pos);
483 }
484
485 // string_view::find_last_of()
486 //
487 // Finds the last occurrence of any of the characters in `s` within the
488 // `string_view`, returning the start position of the match, or `npos` if no
489 // match was found.
490 size_type find_last_of(string_view s, size_type pos = npos) const
491 noexcept;
492
493 // Overload of `string_view::find_last_of()` for finding a character `c`
494 // within the `string_view`.
495 size_type find_last_of(char c, size_type pos = npos) const
496 noexcept {
497 return rfind(c, pos);
498 }
499
500 // string_view::find_first_not_of()
501 //
502 // Finds the first occurrence of any of the characters not in `s` within the
503 // `string_view`, returning the start position of the first non-match, or
504 // `npos` if no non-match was found.
505 size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
506
507 // Overload of `string_view::find_first_not_of()` for finding a character
508 // that is not `c` within the `string_view`.
509 size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
510
511 // string_view::find_last_not_of()
512 //
513 // Finds the last occurrence of any of the characters not in `s` within the
514 // `string_view`, returning the start position of the last non-match, or
515 // `npos` if no non-match was found.
516 size_type find_last_not_of(string_view s,
517 size_type pos = npos) const noexcept;
518
519 // Overload of `string_view::find_last_not_of()` for finding a character
520 // that is not `c` within the `string_view`.
521 size_type find_last_not_of(char c, size_type pos = npos) const
522 noexcept;
523
524 private:
525 static constexpr size_type kMaxSize =
526 (std::numeric_limits<difference_type>::max)();
527
CheckLengthInternal(size_type len)528 static constexpr size_type CheckLengthInternal(size_type len) {
529 return ABSL_HARDENING_ASSERT(len <= kMaxSize), len;
530 }
531
StrlenInternal(const char * str)532 static constexpr size_type StrlenInternal(const char* str) {
533 #if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)
534 // MSVC 2017+ can evaluate this at compile-time.
535 const char* begin = str;
536 while (*str != '\0') ++str;
537 return str - begin;
538 #elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \
539 (defined(__GNUC__) && !defined(__clang__))
540 // GCC has __builtin_strlen according to
541 // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but
542 // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above.
543 // __builtin_strlen is constexpr.
544 return __builtin_strlen(str);
545 #else
546 return str ? strlen(str) : 0;
547 #endif
548 }
549
Min(size_type length_a,size_type length_b)550 static constexpr size_t Min(size_type length_a, size_type length_b) {
551 return length_a < length_b ? length_a : length_b;
552 }
553
CompareImpl(size_type length_a,size_type length_b,int compare_result)554 static constexpr int CompareImpl(size_type length_a, size_type length_b,
555 int compare_result) {
556 return compare_result == 0 ? static_cast<int>(length_a > length_b) -
557 static_cast<int>(length_a < length_b)
558 : (compare_result < 0 ? -1 : 1);
559 }
560
561 const char* ptr_;
562 size_type length_;
563 };
564
565 // This large function is defined inline so that in a fairly common case where
566 // one of the arguments is a literal, the compiler can elide a lot of the
567 // following comparisons.
568 constexpr bool operator==(string_view x, string_view y) noexcept {
569 return x.size() == y.size() &&
570 (x.empty() ||
571 ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0);
572 }
573
574 constexpr bool operator!=(string_view x, string_view y) noexcept {
575 return !(x == y);
576 }
577
578 constexpr bool operator<(string_view x, string_view y) noexcept {
579 return x.compare(y) < 0;
580 }
581
582 constexpr bool operator>(string_view x, string_view y) noexcept {
583 return y < x;
584 }
585
586 constexpr bool operator<=(string_view x, string_view y) noexcept {
587 return !(y < x);
588 }
589
590 constexpr bool operator>=(string_view x, string_view y) noexcept {
591 return !(x < y);
592 }
593
594 // IO Insertion Operator
595 std::ostream& operator<<(std::ostream& o, string_view piece);
596
597 ABSL_NAMESPACE_END
598 } // namespace absl
599
600 #undef ABSL_INTERNAL_STRING_VIEW_MEMCMP
601
602 #endif // ABSL_USES_STD_STRING_VIEW
603
604 namespace absl {
605 ABSL_NAMESPACE_BEGIN
606
607 // ClippedSubstr()
608 //
609 // Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
610 // Provided because std::string_view::substr throws if `pos > size()`
611 inline string_view ClippedSubstr(string_view s, size_t pos,
612 size_t n = string_view::npos) {
613 pos = (std::min)(pos, static_cast<size_t>(s.size()));
614 return s.substr(pos, n);
615 }
616
617 // NullSafeStringView()
618 //
619 // Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
620 // This function should be used where an `absl::string_view` can be created from
621 // a possibly-null pointer.
NullSafeStringView(const char * p)622 constexpr string_view NullSafeStringView(const char* p) {
623 return p ? string_view(p) : string_view();
624 }
625
626 ABSL_NAMESPACE_END
627 } // namespace absl
628
629 #endif // ABSL_STRINGS_STRING_VIEW_H_
630