• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // A StringPiece points to part or all of a string, Cord, double-quoted string
32 // literal, or other string-like object.  A StringPiece does *not* own the
33 // string to which it points.  A StringPiece is not null-terminated.
34 //
35 // You can use StringPiece as a function or method parameter.  A StringPiece
36 // parameter can receive a double-quoted string literal argument, a "const
37 // char*" argument, a string argument, or a StringPiece argument with no data
38 // copying.  Systematic use of StringPiece for arguments reduces data
39 // copies and strlen() calls.
40 //
41 // Prefer passing StringPieces by value:
42 //   void MyFunction(StringPiece arg);
43 // If circumstances require, you may also pass by const reference:
44 //   void MyFunction(const StringPiece& arg);  // not preferred
45 // Both of these have the same lifetime semantics.  Passing by value
46 // generates slightly smaller code.  For more discussion, see the thread
47 // go/stringpiecebyvalue on c-users.
48 //
49 // StringPiece is also suitable for local variables if you know that
50 // the lifetime of the underlying object is longer than the lifetime
51 // of your StringPiece variable.
52 //
53 // Beware of binding a StringPiece to a temporary:
54 //   StringPiece sp = obj.MethodReturningString();  // BAD: lifetime problem
55 //
56 // This code is okay:
57 //   string str = obj.MethodReturningString();  // str owns its contents
58 //   StringPiece sp(str);  // GOOD, because str outlives sp
59 //
60 // StringPiece is sometimes a poor choice for a return value and usually a poor
61 // choice for a data member.  If you do use a StringPiece this way, it is your
62 // responsibility to ensure that the object pointed to by the StringPiece
63 // outlives the StringPiece.
64 //
65 // A StringPiece may represent just part of a string; thus the name "Piece".
66 // For example, when splitting a string, vector<StringPiece> is a natural data
67 // type for the output.  For another example, a Cord is a non-contiguous,
68 // potentially very long string-like object.  The Cord class has an interface
69 // that iteratively provides StringPiece objects that point to the
70 // successive pieces of a Cord object.
71 //
72 // A StringPiece is not null-terminated.  If you write code that scans a
73 // StringPiece, you must check its length before reading any characters.
74 // Common idioms that work on null-terminated strings do not work on
75 // StringPiece objects.
76 //
77 // There are several ways to create a null StringPiece:
78 //   StringPiece()
79 //   StringPiece(nullptr)
80 //   StringPiece(nullptr, 0)
81 // For all of the above, sp.data() == nullptr, sp.length() == 0,
82 // and sp.empty() == true.  Also, if you create a StringPiece with
83 // a non-null pointer then sp.data() != nullptr.  Once created,
84 // sp.data() will stay either nullptr or not-nullptr, except if you call
85 // sp.clear() or sp.set().
86 //
87 // Thus, you can use StringPiece(nullptr) to signal an out-of-band value
88 // that is different from other StringPiece values.  This is similar
89 // to the way that const char* p1 = nullptr; is different from
90 // const char* p2 = "";.
91 //
92 // There are many ways to create an empty StringPiece:
93 //   StringPiece()
94 //   StringPiece(nullptr)
95 //   StringPiece(nullptr, 0)
96 //   StringPiece("")
97 //   StringPiece("", 0)
98 //   StringPiece("abcdef", 0)
99 //   StringPiece("abcdef"+6, 0)
100 // For all of the above, sp.length() will be 0 and sp.empty() will be true.
101 // For some empty StringPiece values, sp.data() will be nullptr.
102 // For some empty StringPiece values, sp.data() will not be nullptr.
103 //
104 // Be careful not to confuse: null StringPiece and empty StringPiece.
105 // The set of empty StringPieces properly includes the set of null StringPieces.
106 // That is, every null StringPiece is an empty StringPiece,
107 // but some non-null StringPieces are empty Stringpieces too.
108 //
109 // All empty StringPiece values compare equal to each other.
110 // Even a null StringPieces compares equal to a non-null empty StringPiece:
111 //  StringPiece() == StringPiece("", 0)
112 //  StringPiece(nullptr) == StringPiece("abc", 0)
113 //  StringPiece(nullptr, 0) == StringPiece("abcdef"+6, 0)
114 //
115 // Look carefully at this example:
116 //   StringPiece("") == nullptr
117 // True or false?  TRUE, because StringPiece::operator== converts
118 // the right-hand side from nullptr to StringPiece(nullptr),
119 // and then compares two zero-length spans of characters.
120 // However, we are working to make this example produce a compile error.
121 //
122 // Suppose you want to write:
123 //   bool TestWhat?(StringPiece sp) { return sp == nullptr; }  // BAD
124 // Do not do that.  Write one of these instead:
125 //   bool TestNull(StringPiece sp) { return sp.data() == nullptr; }
126 //   bool TestEmpty(StringPiece sp) { return sp.empty(); }
127 // The intent of TestWhat? is unclear.  Did you mean TestNull or TestEmpty?
128 // Right now, TestWhat? behaves likes TestEmpty.
129 // We are working to make TestWhat? produce a compile error.
130 // TestNull is good to test for an out-of-band signal.
131 // TestEmpty is good to test for an empty StringPiece.
132 //
133 // Caveats (again):
134 // (1) The lifetime of the pointed-to string (or piece of a string)
135 //     must be longer than the lifetime of the StringPiece.
136 // (2) There may or may not be a '\0' character after the end of
137 //     StringPiece data.
138 // (3) A null StringPiece is empty.
139 //     An empty StringPiece may or may not be a null StringPiece.
140 
141 #ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
142 #define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
143 
144 #include <assert.h>
145 #include <stddef.h>
146 #include <string.h>
147 #include <iosfwd>
148 #include <limits>
149 #include <string>
150 
151 #include <google/protobuf/stubs/common.h>
152 #include <google/protobuf/stubs/hash.h>
153 
154 #include <google/protobuf/port_def.inc>
155 
156 namespace google {
157 namespace protobuf {
158 // StringPiece has *two* size types.
159 // StringPiece::size_type
160 //   is unsigned
161 //   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
162 //   no future changes intended
163 // stringpiece_ssize_type
164 //   is signed
165 //   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
166 //   future changes intended: http://go/64BitStringPiece
167 //
168 typedef string::difference_type stringpiece_ssize_type;
169 
170 // STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows.
171 // TODO(mec): delete this after stringpiece_ssize_type goes 64 bit.
172 #if !defined(NDEBUG)
173 #define STRINGPIECE_CHECK_SIZE 1
174 #elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
175 #define STRINGPIECE_CHECK_SIZE 1
176 #else
177 #define STRINGPIECE_CHECK_SIZE 0
178 #endif
179 
180 class PROTOBUF_EXPORT StringPiece {
181  private:
182   const char* ptr_;
183   stringpiece_ssize_type length_;
184 
185   // Prevent overflow in debug mode or fortified mode.
186   // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t).
CheckedSsizeTFromSizeT(size_t size)187   static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) {
188 #if STRINGPIECE_CHECK_SIZE > 0
189 #ifdef max
190 #undef max
191 #endif
192     if (size > static_cast<size_t>(
193         std::numeric_limits<stringpiece_ssize_type>::max())) {
194       // Some people grep for this message in logs
195       // so take care if you ever change it.
196       LogFatalSizeTooBig(size, "size_t to int conversion");
197     }
198 #endif
199     return static_cast<stringpiece_ssize_type>(size);
200   }
201 
202   // Out-of-line error path.
203   static void LogFatalSizeTooBig(size_t size, const char* details);
204 
205  public:
206   // We provide non-explicit singleton constructors so users can pass
207   // in a "const char*" or a "string" wherever a "StringPiece" is
208   // expected.
209   //
210   // Style guide exception granted:
211   // http://goto/style-guide-exception-20978288
StringPiece()212   StringPiece() : ptr_(nullptr), length_(0) {}
213 
StringPiece(const char * str)214   StringPiece(const char* str)  // NOLINT(runtime/explicit)
215       : ptr_(str), length_(0) {
216     if (str != nullptr) {
217       length_ = CheckedSsizeTFromSizeT(strlen(str));
218     }
219   }
220 
221   template <class Allocator>
StringPiece(const std::basic_string<char,std::char_traits<char>,Allocator> & str)222   StringPiece(  // NOLINT(runtime/explicit)
223       const std::basic_string<char, std::char_traits<char>, Allocator>& str)
224       : ptr_(str.data()), length_(0) {
225     length_ = CheckedSsizeTFromSizeT(str.size());
226   }
227 
StringPiece(const char * offset,stringpiece_ssize_type len)228   StringPiece(const char* offset, stringpiece_ssize_type len)
229       : ptr_(offset), length_(len) {
230     assert(len >= 0);
231   }
232 
233   // Substring of another StringPiece.
234   // pos must be non-negative and <= x.length().
235   StringPiece(StringPiece x, stringpiece_ssize_type pos);
236   // Substring of another StringPiece.
237   // pos must be non-negative and <= x.length().
238   // len must be non-negative and will be pinned to at most x.length() - pos.
239   StringPiece(StringPiece x,
240               stringpiece_ssize_type pos,
241               stringpiece_ssize_type len);
242 
243   // data() may return a pointer to a buffer with embedded NULs, and the
244   // returned buffer may or may not be null terminated.  Therefore it is
245   // typically a mistake to pass data() to a routine that expects a NUL
246   // terminated string.
data()247   const char* data() const { return ptr_; }
size()248   stringpiece_ssize_type size() const { return length_; }
length()249   stringpiece_ssize_type length() const { return length_; }
empty()250   bool empty() const { return length_ == 0; }
251 
clear()252   void clear() {
253     ptr_ = nullptr;
254     length_ = 0;
255   }
256 
set(const char * data,stringpiece_ssize_type len)257   void set(const char* data, stringpiece_ssize_type len) {
258     assert(len >= 0);
259     ptr_ = data;
260     length_ = len;
261   }
262 
set(const char * str)263   void set(const char* str) {
264     ptr_ = str;
265     if (str != nullptr)
266       length_ = CheckedSsizeTFromSizeT(strlen(str));
267     else
268       length_ = 0;
269   }
270 
set(const void * data,stringpiece_ssize_type len)271   void set(const void* data, stringpiece_ssize_type len) {
272     ptr_ = reinterpret_cast<const char*>(data);
273     length_ = len;
274   }
275 
276   char operator[](stringpiece_ssize_type i) const {
277     assert(0 <= i);
278     assert(i < length_);
279     return ptr_[i];
280   }
281 
remove_prefix(stringpiece_ssize_type n)282   void remove_prefix(stringpiece_ssize_type n) {
283     assert(length_ >= n);
284     ptr_ += n;
285     length_ -= n;
286   }
287 
remove_suffix(stringpiece_ssize_type n)288   void remove_suffix(stringpiece_ssize_type n) {
289     assert(length_ >= n);
290     length_ -= n;
291   }
292 
293   // returns {-1, 0, 1}
compare(StringPiece x)294   int compare(StringPiece x) const {
295     const stringpiece_ssize_type min_size =
296         length_ < x.length_ ? length_ : x.length_;
297     int r = memcmp(ptr_, x.ptr_, static_cast<size_t>(min_size));
298     if (r < 0) return -1;
299     if (r > 0) return 1;
300     if (length_ < x.length_) return -1;
301     if (length_ > x.length_) return 1;
302     return 0;
303   }
304 
as_string()305   string as_string() const {
306     return ToString();
307   }
308   // We also define ToString() here, since many other string-like
309   // interfaces name the routine that converts to a C++ string
310   // "ToString", and it's confusing to have the method that does that
311   // for a StringPiece be called "as_string()".  We also leave the
312   // "as_string()" method defined here for existing code.
ToString()313   string ToString() const {
314     if (ptr_ == nullptr) return string();
315     return string(data(), static_cast<size_type>(size()));
316   }
317 
string()318   operator string() const {
319     return ToString();
320   }
321 
322   void CopyToString(string* target) const;
323   void AppendToString(string* target) const;
324 
starts_with(StringPiece x)325   bool starts_with(StringPiece x) const {
326     return (length_ >= x.length_) &&
327            (memcmp(ptr_, x.ptr_, static_cast<size_t>(x.length_)) == 0);
328   }
329 
ends_with(StringPiece x)330   bool ends_with(StringPiece x) const {
331     return ((length_ >= x.length_) &&
332             (memcmp(ptr_ + (length_-x.length_), x.ptr_,
333                  static_cast<size_t>(x.length_)) == 0));
334   }
335 
336   // Checks whether StringPiece starts with x and if so advances the beginning
337   // of it to past the match.  It's basically a shortcut for starts_with
338   // followed by remove_prefix.
339   bool Consume(StringPiece x);
340   // Like above but for the end of the string.
341   bool ConsumeFromEnd(StringPiece x);
342 
343   // standard STL container boilerplate
344   typedef char value_type;
345   typedef const char* pointer;
346   typedef const char& reference;
347   typedef const char& const_reference;
348   typedef size_t size_type;
349   typedef ptrdiff_t difference_type;
350   static const size_type npos;
351   typedef const char* const_iterator;
352   typedef const char* iterator;
353   typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
354   typedef std::reverse_iterator<iterator> reverse_iterator;
begin()355   iterator begin() const { return ptr_; }
end()356   iterator end() const { return ptr_ + length_; }
rbegin()357   const_reverse_iterator rbegin() const {
358     return const_reverse_iterator(ptr_ + length_);
359   }
rend()360   const_reverse_iterator rend() const {
361     return const_reverse_iterator(ptr_);
362   }
max_size()363   stringpiece_ssize_type max_size() const { return length_; }
capacity()364   stringpiece_ssize_type capacity() const { return length_; }
365 
366   // cpplint.py emits a false positive [build/include_what_you_use]
367   stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const;  // NOLINT
368 
369   bool contains(StringPiece s) const;
370 
371   stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const;
372   stringpiece_ssize_type find(char c, size_type pos = 0) const;
373   stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const;
374   stringpiece_ssize_type rfind(char c, size_type pos = npos) const;
375 
376   stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const;
377   stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const {
378     return find(c, pos);
379   }
380   stringpiece_ssize_type find_first_not_of(StringPiece s,
381                                            size_type pos = 0) const;
382   stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const;
383   stringpiece_ssize_type find_last_of(StringPiece s,
384                                       size_type pos = npos) const;
385   stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const {
386     return rfind(c, pos);
387   }
388   stringpiece_ssize_type find_last_not_of(StringPiece s,
389                                           size_type pos = npos) const;
390   stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const;
391 
392   StringPiece substr(size_type pos, size_type n = npos) const;
393 };
394 
395 // This large function is defined inline so that in a fairly common case where
396 // one of the arguments is a literal, the compiler can elide a lot of the
397 // following comparisons.
398 inline bool operator==(StringPiece x, StringPiece y) {
399   stringpiece_ssize_type len = x.size();
400   if (len != y.size()) {
401     return false;
402   }
403 
404   return x.data() == y.data() || len <= 0 ||
405       memcmp(x.data(), y.data(), static_cast<size_t>(len)) == 0;
406 }
407 
408 inline bool operator!=(StringPiece x, StringPiece y) {
409   return !(x == y);
410 }
411 
412 inline bool operator<(StringPiece x, StringPiece y) {
413   const stringpiece_ssize_type min_size =
414       x.size() < y.size() ? x.size() : y.size();
415   const int r = memcmp(x.data(), y.data(), static_cast<size_t>(min_size));
416   return (r < 0) || (r == 0 && x.size() < y.size());
417 }
418 
419 inline bool operator>(StringPiece x, StringPiece y) {
420   return y < x;
421 }
422 
423 inline bool operator<=(StringPiece x, StringPiece y) {
424   return !(x > y);
425 }
426 
427 inline bool operator>=(StringPiece x, StringPiece y) {
428   return !(x < y);
429 }
430 
431 // allow StringPiece to be logged
432 extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
433 
434 namespace internal {
435 // StringPiece is not a POD and can not be used in an union (pre C++11). We
436 // need a POD version of it.
437 struct StringPiecePod {
438   // Create from a StringPiece.
CreateFromStringPieceStringPiecePod439   static StringPiecePod CreateFromStringPiece(StringPiece str) {
440     StringPiecePod pod;
441     pod.data_ = str.data();
442     pod.size_ = str.size();
443     return pod;
444   }
445 
446   // Cast to StringPiece.
StringPieceStringPiecePod447   operator StringPiece() const { return StringPiece(data_, size_); }
448 
449   bool operator==(const char* value) const {
450     return StringPiece(data_, size_) == StringPiece(value);
451   }
452 
453   char operator[](stringpiece_ssize_type i) const {
454     assert(0 <= i);
455     assert(i < size_);
456     return data_[i];
457   }
458 
dataStringPiecePod459   const char* data() const { return data_; }
460 
sizeStringPiecePod461   stringpiece_ssize_type size() const {
462     return size_;
463   }
464 
ToStringStringPiecePod465   std::string ToString() const {
466     return std::string(data_, static_cast<size_t>(size_));
467   }
468 
stringStringPiecePod469   operator string() const { return ToString(); }
470 
471  private:
472   const char* data_;
473   stringpiece_ssize_type size_;
474 };
475 
476 }  // namespace internal
477 }  // namespace protobuf
478 }  // namespace google
479 
480 GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START
481 template<> struct hash<StringPiece> {
482   size_t operator()(const StringPiece& s) const {
483     size_t result = 0;
484     for (const char *str = s.data(), *end = str + s.size(); str < end; str++) {
485       result = 5 * result + static_cast<size_t>(*str);
486     }
487     return result;
488   }
489 };
490 GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END
491 
492 #include <google/protobuf/port_undef.inc>
493 
494 #endif  // STRINGS_STRINGPIECE_H_
495