1 //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_ADT_STRINGREF_H 11 #define LLVM_ADT_STRINGREF_H 12 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/iterator_range.h" 15 #include "llvm/Support/Compiler.h" 16 #include <algorithm> 17 #include <cassert> 18 #include <cstring> 19 #include <limits> 20 #include <string> 21 #include <utility> 22 23 namespace llvm { 24 template <typename T> 25 class SmallVectorImpl; 26 class APInt; 27 class hash_code; 28 class StringRef; 29 30 /// Helper functions for StringRef::getAsInteger. 31 bool getAsUnsignedInteger(StringRef Str, unsigned Radix, 32 unsigned long long &Result); 33 34 bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); 35 36 bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, 37 unsigned long long &Result); 38 bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); 39 40 /// StringRef - Represent a constant reference to a string, i.e. a character 41 /// array and a length, which need not be null terminated. 42 /// 43 /// This class does not own the string data, it is expected to be used in 44 /// situations where the character data resides in some other buffer, whose 45 /// lifetime extends past that of the StringRef. For this reason, it is not in 46 /// general safe to store a StringRef. 47 class StringRef { 48 public: 49 typedef const char *iterator; 50 typedef const char *const_iterator; 51 static const size_t npos = ~size_t(0); 52 typedef size_t size_type; 53 54 private: 55 /// The start of the string, in an external buffer. 56 const char *Data = nullptr; 57 58 /// The length of the string. 59 size_t Length = 0; 60 61 // Workaround memcmp issue with null pointers (undefined behavior) 62 // by providing a specialized version 63 LLVM_ATTRIBUTE_ALWAYS_INLINE compareMemory(const char * Lhs,const char * Rhs,size_t Length)64 static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { 65 if (Length == 0) { return 0; } 66 return ::memcmp(Lhs,Rhs,Length); 67 } 68 69 public: 70 /// @name Constructors 71 /// @{ 72 73 /// Construct an empty string ref. 74 /*implicit*/ StringRef() = default; 75 76 /// Disable conversion from nullptr. This prevents things like 77 /// if (S == nullptr) 78 StringRef(std::nullptr_t) = delete; 79 80 /// Construct a string ref from a cstring. 81 LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef(const char * Str)82 /*implicit*/ StringRef(const char *Str) 83 : Data(Str), Length(Str ? ::strlen(Str) : 0) {} 84 85 /// Construct a string ref from a pointer and length. 86 LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef(const char * data,size_t length)87 /*implicit*/ constexpr StringRef(const char *data, size_t length) 88 : Data(data), Length(length) {} 89 90 /// Construct a string ref from an std::string. 91 LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef(const std::string & Str)92 /*implicit*/ StringRef(const std::string &Str) 93 : Data(Str.data()), Length(Str.length()) {} 94 withNullAsEmpty(const char * data)95 static StringRef withNullAsEmpty(const char *data) { 96 return StringRef(data ? data : ""); 97 } 98 99 /// @} 100 /// @name Iterators 101 /// @{ 102 begin()103 iterator begin() const { return Data; } 104 end()105 iterator end() const { return Data + Length; } 106 bytes_begin()107 const unsigned char *bytes_begin() const { 108 return reinterpret_cast<const unsigned char *>(begin()); 109 } bytes_end()110 const unsigned char *bytes_end() const { 111 return reinterpret_cast<const unsigned char *>(end()); 112 } bytes()113 iterator_range<const unsigned char *> bytes() const { 114 return make_range(bytes_begin(), bytes_end()); 115 } 116 117 /// @} 118 /// @name String Operations 119 /// @{ 120 121 /// data - Get a pointer to the start of the string (which may not be null 122 /// terminated). 123 LLVM_NODISCARD 124 LLVM_ATTRIBUTE_ALWAYS_INLINE data()125 const char *data() const { return Data; } 126 127 /// empty - Check if the string is empty. 128 LLVM_NODISCARD 129 LLVM_ATTRIBUTE_ALWAYS_INLINE empty()130 bool empty() const { return Length == 0; } 131 132 /// size - Get the string size. 133 LLVM_NODISCARD 134 LLVM_ATTRIBUTE_ALWAYS_INLINE size()135 size_t size() const { return Length; } 136 137 /// front - Get the first character in the string. 138 LLVM_NODISCARD front()139 char front() const { 140 assert(!empty()); 141 return Data[0]; 142 } 143 144 /// back - Get the last character in the string. 145 LLVM_NODISCARD back()146 char back() const { 147 assert(!empty()); 148 return Data[Length-1]; 149 } 150 151 // copy - Allocate copy in Allocator and return StringRef to it. 152 template <typename Allocator> copy(Allocator & A)153 LLVM_NODISCARD StringRef copy(Allocator &A) const { 154 // Don't request a length 0 copy from the allocator. 155 if (empty()) 156 return StringRef(); 157 char *S = A.template Allocate<char>(Length); 158 std::copy(begin(), end(), S); 159 return StringRef(S, Length); 160 } 161 162 /// equals - Check for string equality, this is more efficient than 163 /// compare() when the relative ordering of inequal strings isn't needed. 164 LLVM_NODISCARD 165 LLVM_ATTRIBUTE_ALWAYS_INLINE equals(StringRef RHS)166 bool equals(StringRef RHS) const { 167 return (Length == RHS.Length && 168 compareMemory(Data, RHS.Data, RHS.Length) == 0); 169 } 170 171 /// equals_lower - Check for string equality, ignoring case. 172 LLVM_NODISCARD equals_lower(StringRef RHS)173 bool equals_lower(StringRef RHS) const { 174 return Length == RHS.Length && compare_lower(RHS) == 0; 175 } 176 177 /// compare - Compare two strings; the result is -1, 0, or 1 if this string 178 /// is lexicographically less than, equal to, or greater than the \p RHS. 179 LLVM_NODISCARD 180 LLVM_ATTRIBUTE_ALWAYS_INLINE compare(StringRef RHS)181 int compare(StringRef RHS) const { 182 // Check the prefix for a mismatch. 183 if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) 184 return Res < 0 ? -1 : 1; 185 186 // Otherwise the prefixes match, so we only need to check the lengths. 187 if (Length == RHS.Length) 188 return 0; 189 return Length < RHS.Length ? -1 : 1; 190 } 191 192 /// compare_lower - Compare two strings, ignoring case. 193 LLVM_NODISCARD 194 int compare_lower(StringRef RHS) const; 195 196 /// compare_numeric - Compare two strings, treating sequences of digits as 197 /// numbers. 198 LLVM_NODISCARD 199 int compare_numeric(StringRef RHS) const; 200 201 /// \brief Determine the edit distance between this string and another 202 /// string. 203 /// 204 /// \param Other the string to compare this string against. 205 /// 206 /// \param AllowReplacements whether to allow character 207 /// replacements (change one character into another) as a single 208 /// operation, rather than as two operations (an insertion and a 209 /// removal). 210 /// 211 /// \param MaxEditDistance If non-zero, the maximum edit distance that 212 /// this routine is allowed to compute. If the edit distance will exceed 213 /// that maximum, returns \c MaxEditDistance+1. 214 /// 215 /// \returns the minimum number of character insertions, removals, 216 /// or (if \p AllowReplacements is \c true) replacements needed to 217 /// transform one of the given strings into the other. If zero, 218 /// the strings are identical. 219 LLVM_NODISCARD 220 unsigned edit_distance(StringRef Other, bool AllowReplacements = true, 221 unsigned MaxEditDistance = 0) const; 222 223 /// str - Get the contents as an std::string. 224 LLVM_NODISCARD str()225 std::string str() const { 226 if (!Data) return std::string(); 227 return std::string(Data, Length); 228 } 229 230 /// @} 231 /// @name Operator Overloads 232 /// @{ 233 234 LLVM_NODISCARD 235 char operator[](size_t Index) const { 236 assert(Index < Length && "Invalid index!"); 237 return Data[Index]; 238 } 239 240 /// Disallow accidental assignment from a temporary std::string. 241 /// 242 /// The declaration here is extra complicated so that `stringRef = {}` 243 /// and `stringRef = "abc"` continue to select the move assignment operator. 244 template <typename T> 245 typename std::enable_if<std::is_same<T, std::string>::value, 246 StringRef>::type & 247 operator=(T &&Str) = delete; 248 249 /// @} 250 /// @name Type Conversions 251 /// @{ 252 string()253 operator std::string() const { 254 return str(); 255 } 256 257 /// @} 258 /// @name String Predicates 259 /// @{ 260 261 /// Check if this string starts with the given \p Prefix. 262 LLVM_NODISCARD 263 LLVM_ATTRIBUTE_ALWAYS_INLINE startswith(StringRef Prefix)264 bool startswith(StringRef Prefix) const { 265 return Length >= Prefix.Length && 266 compareMemory(Data, Prefix.Data, Prefix.Length) == 0; 267 } 268 269 /// Check if this string starts with the given \p Prefix, ignoring case. 270 LLVM_NODISCARD 271 bool startswith_lower(StringRef Prefix) const; 272 273 /// Check if this string ends with the given \p Suffix. 274 LLVM_NODISCARD 275 LLVM_ATTRIBUTE_ALWAYS_INLINE endswith(StringRef Suffix)276 bool endswith(StringRef Suffix) const { 277 return Length >= Suffix.Length && 278 compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; 279 } 280 281 /// Check if this string ends with the given \p Suffix, ignoring case. 282 LLVM_NODISCARD 283 bool endswith_lower(StringRef Suffix) const; 284 285 /// @} 286 /// @name String Searching 287 /// @{ 288 289 /// Search for the first character \p C in the string. 290 /// 291 /// \returns The index of the first occurrence of \p C, or npos if not 292 /// found. 293 LLVM_NODISCARD 294 LLVM_ATTRIBUTE_ALWAYS_INLINE 295 size_t find(char C, size_t From = 0) const { 296 size_t FindBegin = std::min(From, Length); 297 if (FindBegin < Length) { // Avoid calling memchr with nullptr. 298 // Just forward to memchr, which is faster than a hand-rolled loop. 299 if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin)) 300 return static_cast<const char *>(P) - Data; 301 } 302 return npos; 303 } 304 305 /// Search for the first character \p C in the string, ignoring case. 306 /// 307 /// \returns The index of the first occurrence of \p C, or npos if not 308 /// found. 309 LLVM_NODISCARD 310 size_t find_lower(char C, size_t From = 0) const; 311 312 /// Search for the first character satisfying the predicate \p F 313 /// 314 /// \returns The index of the first character satisfying \p F starting from 315 /// \p From, or npos if not found. 316 LLVM_NODISCARD 317 LLVM_ATTRIBUTE_ALWAYS_INLINE 318 size_t find_if(function_ref<bool(char)> F, size_t From = 0) const { 319 StringRef S = drop_front(From); 320 while (!S.empty()) { 321 if (F(S.front())) 322 return size() - S.size(); 323 S = S.drop_front(); 324 } 325 return npos; 326 } 327 328 /// Search for the first character not satisfying the predicate \p F 329 /// 330 /// \returns The index of the first character not satisfying \p F starting 331 /// from \p From, or npos if not found. 332 LLVM_NODISCARD 333 LLVM_ATTRIBUTE_ALWAYS_INLINE 334 size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const { 335 return find_if([F](char c) { return !F(c); }, From); 336 } 337 338 /// Search for the first string \p Str in the string. 339 /// 340 /// \returns The index of the first occurrence of \p Str, or npos if not 341 /// found. 342 LLVM_NODISCARD 343 size_t find(StringRef Str, size_t From = 0) const; 344 345 /// Search for the first string \p Str in the string, ignoring case. 346 /// 347 /// \returns The index of the first occurrence of \p Str, or npos if not 348 /// found. 349 LLVM_NODISCARD 350 size_t find_lower(StringRef Str, size_t From = 0) const; 351 352 /// Search for the last character \p C in the string. 353 /// 354 /// \returns The index of the last occurrence of \p C, or npos if not 355 /// found. 356 LLVM_NODISCARD 357 size_t rfind(char C, size_t From = npos) const { 358 From = std::min(From, Length); 359 size_t i = From; 360 while (i != 0) { 361 --i; 362 if (Data[i] == C) 363 return i; 364 } 365 return npos; 366 } 367 368 /// Search for the last character \p C in the string, ignoring case. 369 /// 370 /// \returns The index of the last occurrence of \p C, or npos if not 371 /// found. 372 LLVM_NODISCARD 373 size_t rfind_lower(char C, size_t From = npos) const; 374 375 /// Search for the last string \p Str in the string. 376 /// 377 /// \returns The index of the last occurrence of \p Str, or npos if not 378 /// found. 379 LLVM_NODISCARD 380 size_t rfind(StringRef Str) const; 381 382 /// Search for the last string \p Str in the string, ignoring case. 383 /// 384 /// \returns The index of the last occurrence of \p Str, or npos if not 385 /// found. 386 LLVM_NODISCARD 387 size_t rfind_lower(StringRef Str) const; 388 389 /// Find the first character in the string that is \p C, or npos if not 390 /// found. Same as find. 391 LLVM_NODISCARD 392 size_t find_first_of(char C, size_t From = 0) const { 393 return find(C, From); 394 } 395 396 /// Find the first character in the string that is in \p Chars, or npos if 397 /// not found. 398 /// 399 /// Complexity: O(size() + Chars.size()) 400 LLVM_NODISCARD 401 size_t find_first_of(StringRef Chars, size_t From = 0) const; 402 403 /// Find the first character in the string that is not \p C or npos if not 404 /// found. 405 LLVM_NODISCARD 406 size_t find_first_not_of(char C, size_t From = 0) const; 407 408 /// Find the first character in the string that is not in the string 409 /// \p Chars, or npos if not found. 410 /// 411 /// Complexity: O(size() + Chars.size()) 412 LLVM_NODISCARD 413 size_t find_first_not_of(StringRef Chars, size_t From = 0) const; 414 415 /// Find the last character in the string that is \p C, or npos if not 416 /// found. 417 LLVM_NODISCARD 418 size_t find_last_of(char C, size_t From = npos) const { 419 return rfind(C, From); 420 } 421 422 /// Find the last character in the string that is in \p C, or npos if not 423 /// found. 424 /// 425 /// Complexity: O(size() + Chars.size()) 426 LLVM_NODISCARD 427 size_t find_last_of(StringRef Chars, size_t From = npos) const; 428 429 /// Find the last character in the string that is not \p C, or npos if not 430 /// found. 431 LLVM_NODISCARD 432 size_t find_last_not_of(char C, size_t From = npos) const; 433 434 /// Find the last character in the string that is not in \p Chars, or 435 /// npos if not found. 436 /// 437 /// Complexity: O(size() + Chars.size()) 438 LLVM_NODISCARD 439 size_t find_last_not_of(StringRef Chars, size_t From = npos) const; 440 441 /// Return true if the given string is a substring of *this, and false 442 /// otherwise. 443 LLVM_NODISCARD 444 LLVM_ATTRIBUTE_ALWAYS_INLINE contains(StringRef Other)445 bool contains(StringRef Other) const { return find(Other) != npos; } 446 447 /// Return true if the given character is contained in *this, and false 448 /// otherwise. 449 LLVM_NODISCARD 450 LLVM_ATTRIBUTE_ALWAYS_INLINE contains(char C)451 bool contains(char C) const { return find_first_of(C) != npos; } 452 453 /// Return true if the given string is a substring of *this, and false 454 /// otherwise. 455 LLVM_NODISCARD 456 LLVM_ATTRIBUTE_ALWAYS_INLINE contains_lower(StringRef Other)457 bool contains_lower(StringRef Other) const { 458 return find_lower(Other) != npos; 459 } 460 461 /// Return true if the given character is contained in *this, and false 462 /// otherwise. 463 LLVM_NODISCARD 464 LLVM_ATTRIBUTE_ALWAYS_INLINE contains_lower(char C)465 bool contains_lower(char C) const { return find_lower(C) != npos; } 466 467 /// @} 468 /// @name Helpful Algorithms 469 /// @{ 470 471 /// Return the number of occurrences of \p C in the string. 472 LLVM_NODISCARD count(char C)473 size_t count(char C) const { 474 size_t Count = 0; 475 for (size_t i = 0, e = Length; i != e; ++i) 476 if (Data[i] == C) 477 ++Count; 478 return Count; 479 } 480 481 /// Return the number of non-overlapped occurrences of \p Str in 482 /// the string. 483 size_t count(StringRef Str) const; 484 485 /// Parse the current string as an integer of the specified radix. If 486 /// \p Radix is specified as zero, this does radix autosensing using 487 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 488 /// 489 /// If the string is invalid or if only a subset of the string is valid, 490 /// this returns true to signify the error. The string is considered 491 /// erroneous if empty or if it overflows T. 492 template <typename T> 493 typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type getAsInteger(unsigned Radix,T & Result)494 getAsInteger(unsigned Radix, T &Result) const { 495 long long LLVal; 496 if (getAsSignedInteger(*this, Radix, LLVal) || 497 static_cast<T>(LLVal) != LLVal) 498 return true; 499 Result = LLVal; 500 return false; 501 } 502 503 template <typename T> 504 typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type getAsInteger(unsigned Radix,T & Result)505 getAsInteger(unsigned Radix, T &Result) const { 506 unsigned long long ULLVal; 507 // The additional cast to unsigned long long is required to avoid the 508 // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type 509 // 'unsigned __int64' when instantiating getAsInteger with T = bool. 510 if (getAsUnsignedInteger(*this, Radix, ULLVal) || 511 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 512 return true; 513 Result = ULLVal; 514 return false; 515 } 516 517 /// Parse the current string as an integer of the specified radix. If 518 /// \p Radix is specified as zero, this does radix autosensing using 519 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 520 /// 521 /// If the string does not begin with a number of the specified radix, 522 /// this returns true to signify the error. The string is considered 523 /// erroneous if empty or if it overflows T. 524 /// The portion of the string representing the discovered numeric value 525 /// is removed from the beginning of the string. 526 template <typename T> 527 typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type consumeInteger(unsigned Radix,T & Result)528 consumeInteger(unsigned Radix, T &Result) { 529 long long LLVal; 530 if (consumeSignedInteger(*this, Radix, LLVal) || 531 static_cast<long long>(static_cast<T>(LLVal)) != LLVal) 532 return true; 533 Result = LLVal; 534 return false; 535 } 536 537 template <typename T> 538 typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type consumeInteger(unsigned Radix,T & Result)539 consumeInteger(unsigned Radix, T &Result) { 540 unsigned long long ULLVal; 541 if (consumeUnsignedInteger(*this, Radix, ULLVal) || 542 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 543 return true; 544 Result = ULLVal; 545 return false; 546 } 547 548 /// Parse the current string as an integer of the specified \p Radix, or of 549 /// an autosensed radix if the \p Radix given is 0. The current value in 550 /// \p Result is discarded, and the storage is changed to be wide enough to 551 /// store the parsed integer. 552 /// 553 /// \returns true if the string does not solely consist of a valid 554 /// non-empty number in the appropriate base. 555 /// 556 /// APInt::fromString is superficially similar but assumes the 557 /// string is well-formed in the given radix. 558 bool getAsInteger(unsigned Radix, APInt &Result) const; 559 560 /// @} 561 /// @name String Operations 562 /// @{ 563 564 // Convert the given ASCII string to lowercase. 565 LLVM_NODISCARD 566 std::string lower() const; 567 568 /// Convert the given ASCII string to uppercase. 569 LLVM_NODISCARD 570 std::string upper() const; 571 572 /// @} 573 /// @name Substring Operations 574 /// @{ 575 576 /// Return a reference to the substring from [Start, Start + N). 577 /// 578 /// \param Start The index of the starting character in the substring; if 579 /// the index is npos or greater than the length of the string then the 580 /// empty substring will be returned. 581 /// 582 /// \param N The number of characters to included in the substring. If N 583 /// exceeds the number of characters remaining in the string, the string 584 /// suffix (starting with \p Start) will be returned. 585 LLVM_NODISCARD 586 LLVM_ATTRIBUTE_ALWAYS_INLINE 587 StringRef substr(size_t Start, size_t N = npos) const { 588 Start = std::min(Start, Length); 589 return StringRef(Data + Start, std::min(N, Length - Start)); 590 } 591 592 /// Return a StringRef equal to 'this' but with only the first \p N 593 /// elements remaining. If \p N is greater than the length of the 594 /// string, the entire string is returned. 595 LLVM_NODISCARD 596 LLVM_ATTRIBUTE_ALWAYS_INLINE 597 StringRef take_front(size_t N = 1) const { 598 if (N >= size()) 599 return *this; 600 return drop_back(size() - N); 601 } 602 603 /// Return a StringRef equal to 'this' but with only the first \p N 604 /// elements remaining. If \p N is greater than the length of the 605 /// string, the entire string is returned. 606 LLVM_NODISCARD 607 LLVM_ATTRIBUTE_ALWAYS_INLINE 608 StringRef take_back(size_t N = 1) const { 609 if (N >= size()) 610 return *this; 611 return drop_front(size() - N); 612 } 613 614 /// Return the longest prefix of 'this' such that every character 615 /// in the prefix satisfies the given predicate. 616 LLVM_NODISCARD 617 LLVM_ATTRIBUTE_ALWAYS_INLINE take_while(function_ref<bool (char)> F)618 StringRef take_while(function_ref<bool(char)> F) const { 619 return substr(0, find_if_not(F)); 620 } 621 622 /// Return the longest prefix of 'this' such that no character in 623 /// the prefix satisfies the given predicate. 624 LLVM_NODISCARD 625 LLVM_ATTRIBUTE_ALWAYS_INLINE take_until(function_ref<bool (char)> F)626 StringRef take_until(function_ref<bool(char)> F) const { 627 return substr(0, find_if(F)); 628 } 629 630 /// Return a StringRef equal to 'this' but with the first \p N elements 631 /// dropped. 632 LLVM_NODISCARD 633 LLVM_ATTRIBUTE_ALWAYS_INLINE 634 StringRef drop_front(size_t N = 1) const { 635 assert(size() >= N && "Dropping more elements than exist"); 636 return substr(N); 637 } 638 639 /// Return a StringRef equal to 'this' but with the last \p N elements 640 /// dropped. 641 LLVM_NODISCARD 642 LLVM_ATTRIBUTE_ALWAYS_INLINE 643 StringRef drop_back(size_t N = 1) const { 644 assert(size() >= N && "Dropping more elements than exist"); 645 return substr(0, size()-N); 646 } 647 648 /// Return a StringRef equal to 'this', but with all characters satisfying 649 /// the given predicate dropped from the beginning of the string. 650 LLVM_NODISCARD 651 LLVM_ATTRIBUTE_ALWAYS_INLINE drop_while(function_ref<bool (char)> F)652 StringRef drop_while(function_ref<bool(char)> F) const { 653 return substr(find_if_not(F)); 654 } 655 656 /// Return a StringRef equal to 'this', but with all characters not 657 /// satisfying the given predicate dropped from the beginning of the string. 658 LLVM_NODISCARD 659 LLVM_ATTRIBUTE_ALWAYS_INLINE drop_until(function_ref<bool (char)> F)660 StringRef drop_until(function_ref<bool(char)> F) const { 661 return substr(find_if(F)); 662 } 663 664 /// Returns true if this StringRef has the given prefix and removes that 665 /// prefix. 666 LLVM_ATTRIBUTE_ALWAYS_INLINE consume_front(StringRef Prefix)667 bool consume_front(StringRef Prefix) { 668 if (!startswith(Prefix)) 669 return false; 670 671 *this = drop_front(Prefix.size()); 672 return true; 673 } 674 675 /// Returns true if this StringRef has the given suffix and removes that 676 /// suffix. 677 LLVM_ATTRIBUTE_ALWAYS_INLINE consume_back(StringRef Suffix)678 bool consume_back(StringRef Suffix) { 679 if (!endswith(Suffix)) 680 return false; 681 682 *this = drop_back(Suffix.size()); 683 return true; 684 } 685 686 /// Return a reference to the substring from [Start, End). 687 /// 688 /// \param Start The index of the starting character in the substring; if 689 /// the index is npos or greater than the length of the string then the 690 /// empty substring will be returned. 691 /// 692 /// \param End The index following the last character to include in the 693 /// substring. If this is npos or exceeds the number of characters 694 /// remaining in the string, the string suffix (starting with \p Start) 695 /// will be returned. If this is less than \p Start, an empty string will 696 /// be returned. 697 LLVM_NODISCARD 698 LLVM_ATTRIBUTE_ALWAYS_INLINE slice(size_t Start,size_t End)699 StringRef slice(size_t Start, size_t End) const { 700 Start = std::min(Start, Length); 701 End = std::min(std::max(Start, End), Length); 702 return StringRef(Data + Start, End - Start); 703 } 704 705 /// Split into two substrings around the first occurrence of a separator 706 /// character. 707 /// 708 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 709 /// such that (*this == LHS + Separator + RHS) is true and RHS is 710 /// maximal. If \p Separator is not in the string, then the result is a 711 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 712 /// 713 /// \param Separator The character to split on. 714 /// \returns The split substrings. 715 LLVM_NODISCARD split(char Separator)716 std::pair<StringRef, StringRef> split(char Separator) const { 717 size_t Idx = find(Separator); 718 if (Idx == npos) 719 return std::make_pair(*this, StringRef()); 720 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 721 } 722 723 /// Split into two substrings around the first occurrence of a separator 724 /// string. 725 /// 726 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 727 /// such that (*this == LHS + Separator + RHS) is true and RHS is 728 /// maximal. If \p Separator is not in the string, then the result is a 729 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 730 /// 731 /// \param Separator - The string to split on. 732 /// \return - The split substrings. 733 LLVM_NODISCARD split(StringRef Separator)734 std::pair<StringRef, StringRef> split(StringRef Separator) const { 735 size_t Idx = find(Separator); 736 if (Idx == npos) 737 return std::make_pair(*this, StringRef()); 738 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 739 } 740 741 /// Split into substrings around the occurrences of a separator string. 742 /// 743 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 744 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 745 /// elements are added to A. 746 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 747 /// still count when considering \p MaxSplit 748 /// An useful invariant is that 749 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 750 /// 751 /// \param A - Where to put the substrings. 752 /// \param Separator - The string to split on. 753 /// \param MaxSplit - The maximum number of times the string is split. 754 /// \param KeepEmpty - True if empty substring should be added. 755 void split(SmallVectorImpl<StringRef> &A, 756 StringRef Separator, int MaxSplit = -1, 757 bool KeepEmpty = true) const; 758 759 /// Split into substrings around the occurrences of a separator character. 760 /// 761 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 762 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 763 /// elements are added to A. 764 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 765 /// still count when considering \p MaxSplit 766 /// An useful invariant is that 767 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 768 /// 769 /// \param A - Where to put the substrings. 770 /// \param Separator - The string to split on. 771 /// \param MaxSplit - The maximum number of times the string is split. 772 /// \param KeepEmpty - True if empty substring should be added. 773 void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, 774 bool KeepEmpty = true) const; 775 776 /// Split into two substrings around the last occurrence of a separator 777 /// character. 778 /// 779 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 780 /// such that (*this == LHS + Separator + RHS) is true and RHS is 781 /// minimal. If \p Separator is not in the string, then the result is a 782 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 783 /// 784 /// \param Separator - The character to split on. 785 /// \return - The split substrings. 786 LLVM_NODISCARD rsplit(char Separator)787 std::pair<StringRef, StringRef> rsplit(char Separator) const { 788 size_t Idx = rfind(Separator); 789 if (Idx == npos) 790 return std::make_pair(*this, StringRef()); 791 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 792 } 793 794 /// Return string with consecutive \p Char characters starting from the 795 /// the left removed. 796 LLVM_NODISCARD ltrim(char Char)797 StringRef ltrim(char Char) const { 798 return drop_front(std::min(Length, find_first_not_of(Char))); 799 } 800 801 /// Return string with consecutive characters in \p Chars starting from 802 /// the left removed. 803 LLVM_NODISCARD 804 StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { 805 return drop_front(std::min(Length, find_first_not_of(Chars))); 806 } 807 808 /// Return string with consecutive \p Char characters starting from the 809 /// right removed. 810 LLVM_NODISCARD rtrim(char Char)811 StringRef rtrim(char Char) const { 812 return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); 813 } 814 815 /// Return string with consecutive characters in \p Chars starting from 816 /// the right removed. 817 LLVM_NODISCARD 818 StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { 819 return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); 820 } 821 822 /// Return string with consecutive \p Char characters starting from the 823 /// left and right removed. 824 LLVM_NODISCARD trim(char Char)825 StringRef trim(char Char) const { 826 return ltrim(Char).rtrim(Char); 827 } 828 829 /// Return string with consecutive characters in \p Chars starting from 830 /// the left and right removed. 831 LLVM_NODISCARD 832 StringRef trim(StringRef Chars = " \t\n\v\f\r") const { 833 return ltrim(Chars).rtrim(Chars); 834 } 835 836 /// @} 837 }; 838 839 /// A wrapper around a string literal that serves as a proxy for constructing 840 /// global tables of StringRefs with the length computed at compile time. 841 /// In order to avoid the invocation of a global constructor, StringLiteral 842 /// should *only* be used in a constexpr context, as such: 843 /// 844 /// constexpr StringLiteral S("test"); 845 /// 846 class StringLiteral : public StringRef { 847 public: 848 template <size_t N> StringLiteral(const char (& Str)[N])849 constexpr StringLiteral(const char (&Str)[N]) 850 #if defined(__clang__) && __has_attribute(enable_if) 851 #pragma clang diagnostic push 852 #pragma clang diagnostic ignored "-Wgcc-compat" 853 __attribute((enable_if(__builtin_strlen(Str) == N - 1, 854 "invalid string literal"))) 855 #pragma clang diagnostic pop 856 #endif 857 : StringRef(Str, N - 1) { 858 } 859 }; 860 861 /// @name StringRef Comparison Operators 862 /// @{ 863 864 LLVM_ATTRIBUTE_ALWAYS_INLINE 865 inline bool operator==(StringRef LHS, StringRef RHS) { 866 return LHS.equals(RHS); 867 } 868 869 LLVM_ATTRIBUTE_ALWAYS_INLINE 870 inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } 871 872 inline bool operator<(StringRef LHS, StringRef RHS) { 873 return LHS.compare(RHS) == -1; 874 } 875 876 inline bool operator<=(StringRef LHS, StringRef RHS) { 877 return LHS.compare(RHS) != 1; 878 } 879 880 inline bool operator>(StringRef LHS, StringRef RHS) { 881 return LHS.compare(RHS) == 1; 882 } 883 884 inline bool operator>=(StringRef LHS, StringRef RHS) { 885 return LHS.compare(RHS) != -1; 886 } 887 888 inline std::string &operator+=(std::string &buffer, StringRef string) { 889 return buffer.append(string.data(), string.size()); 890 } 891 892 /// @} 893 894 /// \brief Compute a hash_code for a StringRef. 895 LLVM_NODISCARD 896 hash_code hash_value(StringRef S); 897 898 // StringRefs can be treated like a POD type. 899 template <typename T> struct isPodLike; 900 template <> struct isPodLike<StringRef> { static const bool value = true; }; 901 } 902 903 #endif 904