1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // Copyright (C) 2009-2013, International Business Machines 4 // Corporation and others. All Rights Reserved. 5 // 6 // Copyright 2001 and onwards Google Inc. 7 // Author: Sanjay Ghemawat 8 9 // This code is a contribution of Google code, and the style used here is 10 // a compromise between the original Google code and the ICU coding guidelines. 11 // For example, data types are ICU-ified (size_t,int->int32_t), 12 // and API comments doxygen-ified, but function names and behavior are 13 // as in the original, if possible. 14 // Assertion-style error handling, not available in ICU, was changed to 15 // parameter "pinning" similar to UnicodeString. 16 // 17 // In addition, this is only a partial port of the original Google code, 18 // limited to what was needed so far. The (nearly) complete original code 19 // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib 20 // (see ICU ticket 6765, r25517). 21 22 #ifndef __STRINGPIECE_H__ 23 #define __STRINGPIECE_H__ 24 25 /** 26 * \file 27 * \brief C++ API: StringPiece: Read-only byte string wrapper class. 28 */ 29 30 #include "unicode/utypes.h" 31 32 #if U_SHOW_CPLUSPLUS_API 33 34 #include <cstddef> 35 #include <type_traits> 36 37 #include "unicode/uobject.h" 38 #include "unicode/std_string.h" 39 40 // Arghh! I wish C++ literals were "string". 41 42 U_NAMESPACE_BEGIN 43 44 /** 45 * A string-like object that points to a sized piece of memory. 46 * 47 * We provide non-explicit singleton constructors so users can pass 48 * in a "const char*" or a "string" wherever a "StringPiece" is 49 * expected. 50 * 51 * Functions or methods may use StringPiece parameters to accept either a 52 * "const char*" or a "string" value that will be implicitly converted to a 53 * StringPiece. 54 * 55 * Systematic usage of StringPiece is encouraged as it will reduce unnecessary 56 * conversions from "const char*" to "string" and back again. 57 * 58 * @stable ICU 4.2 59 */ 60 class U_COMMON_API StringPiece : public UMemory { 61 private: 62 const char* ptr_; 63 int32_t length_; 64 65 public: 66 /** 67 * Default constructor, creates an empty StringPiece. 68 * @stable ICU 4.2 69 */ StringPiece()70 StringPiece() : ptr_(nullptr), length_(0) { } 71 72 /** 73 * Constructs from a NUL-terminated const char * pointer. 74 * @param str a NUL-terminated const char * pointer 75 * @stable ICU 4.2 76 */ 77 StringPiece(const char* str); 78 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 79 /** 80 * Constructs from a NUL-terminated const char8_t * pointer. 81 * @param str a NUL-terminated const char8_t * pointer 82 * @stable ICU 67 83 */ StringPiece(const char8_t * str)84 StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} 85 #endif 86 /** 87 * Constructs an empty StringPiece. 88 * Needed for type disambiguation from multiple other overloads. 89 * @param p nullptr 90 * @stable ICU 67 91 */ StringPiece(std::nullptr_t p)92 StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} 93 94 /** 95 * Constructs from a std::string. 96 * @stable ICU 4.2 97 */ StringPiece(const std::string & str)98 StringPiece(const std::string& str) 99 : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } 100 #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) 101 /** 102 * Constructs from a std::u8string. 103 * @stable ICU 67 104 */ StringPiece(const std::u8string & str)105 StringPiece(const std::u8string& str) 106 : ptr_(reinterpret_cast<const char*>(str.data())), 107 length_(static_cast<int32_t>(str.size())) { } 108 #endif 109 110 /** 111 * Constructs from some other implementation of a string piece class, from any 112 * C++ record type that has these two methods: 113 * 114 * \code{.cpp} 115 * 116 * struct OtherStringPieceClass { 117 * const char* data(); // or const char8_t* 118 * size_t size(); 119 * }; 120 * 121 * \endcode 122 * 123 * The other string piece class will typically be std::string_view from C++17 124 * or absl::string_view from Abseil. 125 * 126 * Starting with C++20, data() may also return a const char8_t* pointer, 127 * as from std::u8string_view. 128 * 129 * @param str the other string piece 130 * @stable ICU 65 131 */ 132 template <typename T, 133 typename = typename std::enable_if< 134 (std::is_same<decltype(T().data()), const char*>::value 135 #if defined(__cpp_char8_t) 136 || std::is_same<decltype(T().data()), const char8_t*>::value 137 #endif 138 ) && 139 std::is_same<decltype(T().size()), size_t>::value>::type> StringPiece(T str)140 StringPiece(T str) 141 : ptr_(reinterpret_cast<const char*>(str.data())), 142 length_(static_cast<int32_t>(str.size())) {} 143 144 /** 145 * Constructs from a const char * pointer and a specified length. 146 * @param offset a const char * pointer (need not be terminated) 147 * @param len the length of the string; must be non-negative 148 * @stable ICU 4.2 149 */ StringPiece(const char * offset,int32_t len)150 StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } 151 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 152 /** 153 * Constructs from a const char8_t * pointer and a specified length. 154 * @param str a const char8_t * pointer (need not be terminated) 155 * @param len the length of the string; must be non-negative 156 * @stable ICU 67 157 */ StringPiece(const char8_t * str,int32_t len)158 StringPiece(const char8_t* str, int32_t len) : 159 StringPiece(reinterpret_cast<const char*>(str), len) {} 160 #endif 161 162 /** 163 * Substring of another StringPiece. 164 * @param x the other StringPiece 165 * @param pos start position in x; must be non-negative and <= x.length(). 166 * @stable ICU 4.2 167 */ 168 StringPiece(const StringPiece& x, int32_t pos); 169 /** 170 * Substring of another StringPiece. 171 * @param x the other StringPiece 172 * @param pos start position in x; must be non-negative and <= x.length(). 173 * @param len length of the substring; 174 * must be non-negative and will be pinned to at most x.length() - pos. 175 * @stable ICU 4.2 176 */ 177 StringPiece(const StringPiece& x, int32_t pos, int32_t len); 178 179 /** 180 * Returns the string pointer. May be nullptr if it is empty. 181 * 182 * data() may return a pointer to a buffer with embedded NULs, and the 183 * returned buffer may or may not be null terminated. Therefore it is 184 * typically a mistake to pass data() to a routine that expects a NUL 185 * terminated string. 186 * @return the string pointer 187 * @stable ICU 4.2 188 */ data()189 const char* data() const { return ptr_; } 190 /** 191 * Returns the string length. Same as length(). 192 * @return the string length 193 * @stable ICU 4.2 194 */ size()195 int32_t size() const { return length_; } 196 /** 197 * Returns the string length. Same as size(). 198 * @return the string length 199 * @stable ICU 4.2 200 */ length()201 int32_t length() const { return length_; } 202 /** 203 * Returns whether the string is empty. 204 * @return true if the string is empty 205 * @stable ICU 4.2 206 */ empty()207 UBool empty() const { return length_ == 0; } 208 209 /** 210 * Sets to an empty string. 211 * @stable ICU 4.2 212 */ clear()213 void clear() { ptr_ = nullptr; length_ = 0; } 214 215 /** 216 * Reset the stringpiece to refer to new data. 217 * @param xdata pointer the new string data. Need not be nul terminated. 218 * @param len the length of the new data 219 * @stable ICU 4.8 220 */ set(const char * xdata,int32_t len)221 void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } 222 223 /** 224 * Reset the stringpiece to refer to new data. 225 * @param str a pointer to a NUL-terminated string. 226 * @stable ICU 4.8 227 */ 228 void set(const char* str); 229 230 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 231 /** 232 * Resets the stringpiece to refer to new data. 233 * @param xdata pointer the new string data. Need not be NUL-terminated. 234 * @param len the length of the new data 235 * @stable ICU 67 236 */ set(const char8_t * xdata,int32_t len)237 inline void set(const char8_t* xdata, int32_t len) { 238 set(reinterpret_cast<const char*>(xdata), len); 239 } 240 241 /** 242 * Resets the stringpiece to refer to new data. 243 * @param str a pointer to a NUL-terminated string. 244 * @stable ICU 67 245 */ set(const char8_t * str)246 inline void set(const char8_t* str) { 247 set(reinterpret_cast<const char*>(str)); 248 } 249 #endif 250 251 /** 252 * Removes the first n string units. 253 * @param n prefix length, must be non-negative and <=length() 254 * @stable ICU 4.2 255 */ remove_prefix(int32_t n)256 void remove_prefix(int32_t n) { 257 if (n >= 0) { 258 if (n > length_) { 259 n = length_; 260 } 261 ptr_ += n; 262 length_ -= n; 263 } 264 } 265 266 /** 267 * Removes the last n string units. 268 * @param n suffix length, must be non-negative and <=length() 269 * @stable ICU 4.2 270 */ remove_suffix(int32_t n)271 void remove_suffix(int32_t n) { 272 if (n >= 0) { 273 if (n <= length_) { 274 length_ -= n; 275 } else { 276 length_ = 0; 277 } 278 } 279 } 280 281 /** 282 * Searches the StringPiece for the given search string (needle); 283 * @param needle The string for which to search. 284 * @param offset Where to start searching within this string (haystack). 285 * @return The offset of needle in haystack, or -1 if not found. 286 * @stable ICU 67 287 */ 288 int32_t find(StringPiece needle, int32_t offset); 289 290 /** 291 * Compares this StringPiece with the other StringPiece, with semantics 292 * similar to std::string::compare(). 293 * @param other The string to compare to. 294 * @return below zero if this < other; above zero if this > other; 0 if this == other. 295 * @stable ICU 67 296 */ 297 int32_t compare(StringPiece other); 298 299 /** 300 * Maximum integer, used as a default value for substring methods. 301 * @stable ICU 4.2 302 */ 303 static const int32_t npos; // = 0x7fffffff; 304 305 /** 306 * Returns a substring of this StringPiece. 307 * @param pos start position; must be non-negative and <= length(). 308 * @param len length of the substring; 309 * must be non-negative and will be pinned to at most length() - pos. 310 * @return the substring StringPiece 311 * @stable ICU 4.2 312 */ 313 StringPiece substr(int32_t pos, int32_t len = npos) const { 314 return StringPiece(*this, pos, len); 315 } 316 }; 317 318 /** 319 * Global operator == for StringPiece 320 * @param x The first StringPiece to compare. 321 * @param y The second StringPiece to compare. 322 * @return true if the string data is equal 323 * @stable ICU 4.8 324 */ 325 U_EXPORT UBool U_EXPORT2 326 operator==(const StringPiece& x, const StringPiece& y); 327 328 /** 329 * Global operator != for StringPiece 330 * @param x The first StringPiece to compare. 331 * @param y The second StringPiece to compare. 332 * @return true if the string data is not equal 333 * @stable ICU 4.8 334 */ 335 inline UBool operator!=(const StringPiece& x, const StringPiece& y) { 336 return !(x == y); 337 } 338 339 U_NAMESPACE_END 340 341 #endif /* U_SHOW_CPLUSPLUS_API */ 342 343 #endif // __STRINGPIECE_H__ 344