• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_ADT_STRINGREF_H
11 #define LLVM_ADT_STRINGREF_H
12 
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/iterator_range.h"
15 #include "llvm/Support/Compiler.h"
16 #include <algorithm>
17 #include <cassert>
18 #include <cstring>
19 #include <limits>
20 #include <string>
21 #include <utility>
22 
23 namespace llvm {
24   template <typename T>
25   class SmallVectorImpl;
26   class APInt;
27   class hash_code;
28   class StringRef;
29 
30   /// Helper functions for StringRef::getAsInteger.
31   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
32                             unsigned long long &Result);
33 
34   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
35 
36   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
37                               unsigned long long &Result);
38   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
39 
40   /// StringRef - Represent a constant reference to a string, i.e. a character
41   /// array and a length, which need not be null terminated.
42   ///
43   /// This class does not own the string data, it is expected to be used in
44   /// situations where the character data resides in some other buffer, whose
45   /// lifetime extends past that of the StringRef. For this reason, it is not in
46   /// general safe to store a StringRef.
47   class StringRef {
48   public:
49     typedef const char *iterator;
50     typedef const char *const_iterator;
51     static const size_t npos = ~size_t(0);
52     typedef size_t size_type;
53 
54   private:
55     /// The start of the string, in an external buffer.
56     const char *Data = nullptr;
57 
58     /// The length of the string.
59     size_t Length = 0;
60 
61     // Workaround memcmp issue with null pointers (undefined behavior)
62     // by providing a specialized version
63     LLVM_ATTRIBUTE_ALWAYS_INLINE
compareMemory(const char * Lhs,const char * Rhs,size_t Length)64     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
65       if (Length == 0) { return 0; }
66       return ::memcmp(Lhs,Rhs,Length);
67     }
68 
69   public:
70     /// @name Constructors
71     /// @{
72 
73     /// Construct an empty string ref.
74     /*implicit*/ StringRef() = default;
75 
76     /// Disable conversion from nullptr.  This prevents things like
77     /// if (S == nullptr)
78     StringRef(std::nullptr_t) = delete;
79 
80     /// Construct a string ref from a cstring.
81     LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef(const char * Str)82     /*implicit*/ StringRef(const char *Str)
83         : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
84 
85     /// Construct a string ref from a pointer and length.
86     LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef(const char * data,size_t length)87     /*implicit*/ constexpr StringRef(const char *data, size_t length)
88         : Data(data), Length(length) {}
89 
90     /// Construct a string ref from an std::string.
91     LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef(const std::string & Str)92     /*implicit*/ StringRef(const std::string &Str)
93       : Data(Str.data()), Length(Str.length()) {}
94 
withNullAsEmpty(const char * data)95     static StringRef withNullAsEmpty(const char *data) {
96       return StringRef(data ? data : "");
97     }
98 
99     /// @}
100     /// @name Iterators
101     /// @{
102 
begin()103     iterator begin() const { return Data; }
104 
end()105     iterator end() const { return Data + Length; }
106 
bytes_begin()107     const unsigned char *bytes_begin() const {
108       return reinterpret_cast<const unsigned char *>(begin());
109     }
bytes_end()110     const unsigned char *bytes_end() const {
111       return reinterpret_cast<const unsigned char *>(end());
112     }
bytes()113     iterator_range<const unsigned char *> bytes() const {
114       return make_range(bytes_begin(), bytes_end());
115     }
116 
117     /// @}
118     /// @name String Operations
119     /// @{
120 
121     /// data - Get a pointer to the start of the string (which may not be null
122     /// terminated).
123     LLVM_NODISCARD
124     LLVM_ATTRIBUTE_ALWAYS_INLINE
data()125     const char *data() const { return Data; }
126 
127     /// empty - Check if the string is empty.
128     LLVM_NODISCARD
129     LLVM_ATTRIBUTE_ALWAYS_INLINE
empty()130     bool empty() const { return Length == 0; }
131 
132     /// size - Get the string size.
133     LLVM_NODISCARD
134     LLVM_ATTRIBUTE_ALWAYS_INLINE
size()135     size_t size() const { return Length; }
136 
137     /// front - Get the first character in the string.
138     LLVM_NODISCARD
front()139     char front() const {
140       assert(!empty());
141       return Data[0];
142     }
143 
144     /// back - Get the last character in the string.
145     LLVM_NODISCARD
back()146     char back() const {
147       assert(!empty());
148       return Data[Length-1];
149     }
150 
151     // copy - Allocate copy in Allocator and return StringRef to it.
152     template <typename Allocator>
copy(Allocator & A)153     LLVM_NODISCARD StringRef copy(Allocator &A) const {
154       // Don't request a length 0 copy from the allocator.
155       if (empty())
156         return StringRef();
157       char *S = A.template Allocate<char>(Length);
158       std::copy(begin(), end(), S);
159       return StringRef(S, Length);
160     }
161 
162     /// equals - Check for string equality, this is more efficient than
163     /// compare() when the relative ordering of inequal strings isn't needed.
164     LLVM_NODISCARD
165     LLVM_ATTRIBUTE_ALWAYS_INLINE
equals(StringRef RHS)166     bool equals(StringRef RHS) const {
167       return (Length == RHS.Length &&
168               compareMemory(Data, RHS.Data, RHS.Length) == 0);
169     }
170 
171     /// equals_lower - Check for string equality, ignoring case.
172     LLVM_NODISCARD
equals_lower(StringRef RHS)173     bool equals_lower(StringRef RHS) const {
174       return Length == RHS.Length && compare_lower(RHS) == 0;
175     }
176 
177     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
178     /// is lexicographically less than, equal to, or greater than the \p RHS.
179     LLVM_NODISCARD
180     LLVM_ATTRIBUTE_ALWAYS_INLINE
compare(StringRef RHS)181     int compare(StringRef RHS) const {
182       // Check the prefix for a mismatch.
183       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
184         return Res < 0 ? -1 : 1;
185 
186       // Otherwise the prefixes match, so we only need to check the lengths.
187       if (Length == RHS.Length)
188         return 0;
189       return Length < RHS.Length ? -1 : 1;
190     }
191 
192     /// compare_lower - Compare two strings, ignoring case.
193     LLVM_NODISCARD
194     int compare_lower(StringRef RHS) const;
195 
196     /// compare_numeric - Compare two strings, treating sequences of digits as
197     /// numbers.
198     LLVM_NODISCARD
199     int compare_numeric(StringRef RHS) const;
200 
201     /// \brief Determine the edit distance between this string and another
202     /// string.
203     ///
204     /// \param Other the string to compare this string against.
205     ///
206     /// \param AllowReplacements whether to allow character
207     /// replacements (change one character into another) as a single
208     /// operation, rather than as two operations (an insertion and a
209     /// removal).
210     ///
211     /// \param MaxEditDistance If non-zero, the maximum edit distance that
212     /// this routine is allowed to compute. If the edit distance will exceed
213     /// that maximum, returns \c MaxEditDistance+1.
214     ///
215     /// \returns the minimum number of character insertions, removals,
216     /// or (if \p AllowReplacements is \c true) replacements needed to
217     /// transform one of the given strings into the other. If zero,
218     /// the strings are identical.
219     LLVM_NODISCARD
220     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
221                            unsigned MaxEditDistance = 0) const;
222 
223     /// str - Get the contents as an std::string.
224     LLVM_NODISCARD
str()225     std::string str() const {
226       if (!Data) return std::string();
227       return std::string(Data, Length);
228     }
229 
230     /// @}
231     /// @name Operator Overloads
232     /// @{
233 
234     LLVM_NODISCARD
235     char operator[](size_t Index) const {
236       assert(Index < Length && "Invalid index!");
237       return Data[Index];
238     }
239 
240     /// Disallow accidental assignment from a temporary std::string.
241     ///
242     /// The declaration here is extra complicated so that `stringRef = {}`
243     /// and `stringRef = "abc"` continue to select the move assignment operator.
244     template <typename T>
245     typename std::enable_if<std::is_same<T, std::string>::value,
246                             StringRef>::type &
247     operator=(T &&Str) = delete;
248 
249     /// @}
250     /// @name Type Conversions
251     /// @{
252 
string()253     operator std::string() const {
254       return str();
255     }
256 
257     /// @}
258     /// @name String Predicates
259     /// @{
260 
261     /// Check if this string starts with the given \p Prefix.
262     LLVM_NODISCARD
263     LLVM_ATTRIBUTE_ALWAYS_INLINE
startswith(StringRef Prefix)264     bool startswith(StringRef Prefix) const {
265       return Length >= Prefix.Length &&
266              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
267     }
268 
269     /// Check if this string starts with the given \p Prefix, ignoring case.
270     LLVM_NODISCARD
271     bool startswith_lower(StringRef Prefix) const;
272 
273     /// Check if this string ends with the given \p Suffix.
274     LLVM_NODISCARD
275     LLVM_ATTRIBUTE_ALWAYS_INLINE
endswith(StringRef Suffix)276     bool endswith(StringRef Suffix) const {
277       return Length >= Suffix.Length &&
278         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
279     }
280 
281     /// Check if this string ends with the given \p Suffix, ignoring case.
282     LLVM_NODISCARD
283     bool endswith_lower(StringRef Suffix) const;
284 
285     /// @}
286     /// @name String Searching
287     /// @{
288 
289     /// Search for the first character \p C in the string.
290     ///
291     /// \returns The index of the first occurrence of \p C, or npos if not
292     /// found.
293     LLVM_NODISCARD
294     LLVM_ATTRIBUTE_ALWAYS_INLINE
295     size_t find(char C, size_t From = 0) const {
296       size_t FindBegin = std::min(From, Length);
297       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
298         // Just forward to memchr, which is faster than a hand-rolled loop.
299         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
300           return static_cast<const char *>(P) - Data;
301       }
302       return npos;
303     }
304 
305     /// Search for the first character \p C in the string, ignoring case.
306     ///
307     /// \returns The index of the first occurrence of \p C, or npos if not
308     /// found.
309     LLVM_NODISCARD
310     size_t find_lower(char C, size_t From = 0) const;
311 
312     /// Search for the first character satisfying the predicate \p F
313     ///
314     /// \returns The index of the first character satisfying \p F starting from
315     /// \p From, or npos if not found.
316     LLVM_NODISCARD
317     LLVM_ATTRIBUTE_ALWAYS_INLINE
318     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
319       StringRef S = drop_front(From);
320       while (!S.empty()) {
321         if (F(S.front()))
322           return size() - S.size();
323         S = S.drop_front();
324       }
325       return npos;
326     }
327 
328     /// Search for the first character not satisfying the predicate \p F
329     ///
330     /// \returns The index of the first character not satisfying \p F starting
331     /// from \p From, or npos if not found.
332     LLVM_NODISCARD
333     LLVM_ATTRIBUTE_ALWAYS_INLINE
334     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
335       return find_if([F](char c) { return !F(c); }, From);
336     }
337 
338     /// Search for the first string \p Str in the string.
339     ///
340     /// \returns The index of the first occurrence of \p Str, or npos if not
341     /// found.
342     LLVM_NODISCARD
343     size_t find(StringRef Str, size_t From = 0) const;
344 
345     /// Search for the first string \p Str in the string, ignoring case.
346     ///
347     /// \returns The index of the first occurrence of \p Str, or npos if not
348     /// found.
349     LLVM_NODISCARD
350     size_t find_lower(StringRef Str, size_t From = 0) const;
351 
352     /// Search for the last character \p C in the string.
353     ///
354     /// \returns The index of the last occurrence of \p C, or npos if not
355     /// found.
356     LLVM_NODISCARD
357     size_t rfind(char C, size_t From = npos) const {
358       From = std::min(From, Length);
359       size_t i = From;
360       while (i != 0) {
361         --i;
362         if (Data[i] == C)
363           return i;
364       }
365       return npos;
366     }
367 
368     /// Search for the last character \p C in the string, ignoring case.
369     ///
370     /// \returns The index of the last occurrence of \p C, or npos if not
371     /// found.
372     LLVM_NODISCARD
373     size_t rfind_lower(char C, size_t From = npos) const;
374 
375     /// Search for the last string \p Str in the string.
376     ///
377     /// \returns The index of the last occurrence of \p Str, or npos if not
378     /// found.
379     LLVM_NODISCARD
380     size_t rfind(StringRef Str) const;
381 
382     /// Search for the last string \p Str in the string, ignoring case.
383     ///
384     /// \returns The index of the last occurrence of \p Str, or npos if not
385     /// found.
386     LLVM_NODISCARD
387     size_t rfind_lower(StringRef Str) const;
388 
389     /// Find the first character in the string that is \p C, or npos if not
390     /// found. Same as find.
391     LLVM_NODISCARD
392     size_t find_first_of(char C, size_t From = 0) const {
393       return find(C, From);
394     }
395 
396     /// Find the first character in the string that is in \p Chars, or npos if
397     /// not found.
398     ///
399     /// Complexity: O(size() + Chars.size())
400     LLVM_NODISCARD
401     size_t find_first_of(StringRef Chars, size_t From = 0) const;
402 
403     /// Find the first character in the string that is not \p C or npos if not
404     /// found.
405     LLVM_NODISCARD
406     size_t find_first_not_of(char C, size_t From = 0) const;
407 
408     /// Find the first character in the string that is not in the string
409     /// \p Chars, or npos if not found.
410     ///
411     /// Complexity: O(size() + Chars.size())
412     LLVM_NODISCARD
413     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
414 
415     /// Find the last character in the string that is \p C, or npos if not
416     /// found.
417     LLVM_NODISCARD
418     size_t find_last_of(char C, size_t From = npos) const {
419       return rfind(C, From);
420     }
421 
422     /// Find the last character in the string that is in \p C, or npos if not
423     /// found.
424     ///
425     /// Complexity: O(size() + Chars.size())
426     LLVM_NODISCARD
427     size_t find_last_of(StringRef Chars, size_t From = npos) const;
428 
429     /// Find the last character in the string that is not \p C, or npos if not
430     /// found.
431     LLVM_NODISCARD
432     size_t find_last_not_of(char C, size_t From = npos) const;
433 
434     /// Find the last character in the string that is not in \p Chars, or
435     /// npos if not found.
436     ///
437     /// Complexity: O(size() + Chars.size())
438     LLVM_NODISCARD
439     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
440 
441     /// Return true if the given string is a substring of *this, and false
442     /// otherwise.
443     LLVM_NODISCARD
444     LLVM_ATTRIBUTE_ALWAYS_INLINE
contains(StringRef Other)445     bool contains(StringRef Other) const { return find(Other) != npos; }
446 
447     /// Return true if the given character is contained in *this, and false
448     /// otherwise.
449     LLVM_NODISCARD
450     LLVM_ATTRIBUTE_ALWAYS_INLINE
contains(char C)451     bool contains(char C) const { return find_first_of(C) != npos; }
452 
453     /// Return true if the given string is a substring of *this, and false
454     /// otherwise.
455     LLVM_NODISCARD
456     LLVM_ATTRIBUTE_ALWAYS_INLINE
contains_lower(StringRef Other)457     bool contains_lower(StringRef Other) const {
458       return find_lower(Other) != npos;
459     }
460 
461     /// Return true if the given character is contained in *this, and false
462     /// otherwise.
463     LLVM_NODISCARD
464     LLVM_ATTRIBUTE_ALWAYS_INLINE
contains_lower(char C)465     bool contains_lower(char C) const { return find_lower(C) != npos; }
466 
467     /// @}
468     /// @name Helpful Algorithms
469     /// @{
470 
471     /// Return the number of occurrences of \p C in the string.
472     LLVM_NODISCARD
count(char C)473     size_t count(char C) const {
474       size_t Count = 0;
475       for (size_t i = 0, e = Length; i != e; ++i)
476         if (Data[i] == C)
477           ++Count;
478       return Count;
479     }
480 
481     /// Return the number of non-overlapped occurrences of \p Str in
482     /// the string.
483     size_t count(StringRef Str) const;
484 
485     /// Parse the current string as an integer of the specified radix.  If
486     /// \p Radix is specified as zero, this does radix autosensing using
487     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
488     ///
489     /// If the string is invalid or if only a subset of the string is valid,
490     /// this returns true to signify the error.  The string is considered
491     /// erroneous if empty or if it overflows T.
492     template <typename T>
493     typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
getAsInteger(unsigned Radix,T & Result)494     getAsInteger(unsigned Radix, T &Result) const {
495       long long LLVal;
496       if (getAsSignedInteger(*this, Radix, LLVal) ||
497             static_cast<T>(LLVal) != LLVal)
498         return true;
499       Result = LLVal;
500       return false;
501     }
502 
503     template <typename T>
504     typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
getAsInteger(unsigned Radix,T & Result)505     getAsInteger(unsigned Radix, T &Result) const {
506       unsigned long long ULLVal;
507       // The additional cast to unsigned long long is required to avoid the
508       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
509       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
510       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
511           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
512         return true;
513       Result = ULLVal;
514       return false;
515     }
516 
517     /// Parse the current string as an integer of the specified radix.  If
518     /// \p Radix is specified as zero, this does radix autosensing using
519     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
520     ///
521     /// If the string does not begin with a number of the specified radix,
522     /// this returns true to signify the error. The string is considered
523     /// erroneous if empty or if it overflows T.
524     /// The portion of the string representing the discovered numeric value
525     /// is removed from the beginning of the string.
526     template <typename T>
527     typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
consumeInteger(unsigned Radix,T & Result)528     consumeInteger(unsigned Radix, T &Result) {
529       long long LLVal;
530       if (consumeSignedInteger(*this, Radix, LLVal) ||
531           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
532         return true;
533       Result = LLVal;
534       return false;
535     }
536 
537     template <typename T>
538     typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
consumeInteger(unsigned Radix,T & Result)539     consumeInteger(unsigned Radix, T &Result) {
540       unsigned long long ULLVal;
541       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
542           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
543         return true;
544       Result = ULLVal;
545       return false;
546     }
547 
548     /// Parse the current string as an integer of the specified \p Radix, or of
549     /// an autosensed radix if the \p Radix given is 0.  The current value in
550     /// \p Result is discarded, and the storage is changed to be wide enough to
551     /// store the parsed integer.
552     ///
553     /// \returns true if the string does not solely consist of a valid
554     /// non-empty number in the appropriate base.
555     ///
556     /// APInt::fromString is superficially similar but assumes the
557     /// string is well-formed in the given radix.
558     bool getAsInteger(unsigned Radix, APInt &Result) const;
559 
560     /// @}
561     /// @name String Operations
562     /// @{
563 
564     // Convert the given ASCII string to lowercase.
565     LLVM_NODISCARD
566     std::string lower() const;
567 
568     /// Convert the given ASCII string to uppercase.
569     LLVM_NODISCARD
570     std::string upper() const;
571 
572     /// @}
573     /// @name Substring Operations
574     /// @{
575 
576     /// Return a reference to the substring from [Start, Start + N).
577     ///
578     /// \param Start The index of the starting character in the substring; if
579     /// the index is npos or greater than the length of the string then the
580     /// empty substring will be returned.
581     ///
582     /// \param N The number of characters to included in the substring. If N
583     /// exceeds the number of characters remaining in the string, the string
584     /// suffix (starting with \p Start) will be returned.
585     LLVM_NODISCARD
586     LLVM_ATTRIBUTE_ALWAYS_INLINE
587     StringRef substr(size_t Start, size_t N = npos) const {
588       Start = std::min(Start, Length);
589       return StringRef(Data + Start, std::min(N, Length - Start));
590     }
591 
592     /// Return a StringRef equal to 'this' but with only the first \p N
593     /// elements remaining.  If \p N is greater than the length of the
594     /// string, the entire string is returned.
595     LLVM_NODISCARD
596     LLVM_ATTRIBUTE_ALWAYS_INLINE
597     StringRef take_front(size_t N = 1) const {
598       if (N >= size())
599         return *this;
600       return drop_back(size() - N);
601     }
602 
603     /// Return a StringRef equal to 'this' but with only the first \p N
604     /// elements remaining.  If \p N is greater than the length of the
605     /// string, the entire string is returned.
606     LLVM_NODISCARD
607     LLVM_ATTRIBUTE_ALWAYS_INLINE
608     StringRef take_back(size_t N = 1) const {
609       if (N >= size())
610         return *this;
611       return drop_front(size() - N);
612     }
613 
614     /// Return the longest prefix of 'this' such that every character
615     /// in the prefix satisfies the given predicate.
616     LLVM_NODISCARD
617     LLVM_ATTRIBUTE_ALWAYS_INLINE
take_while(function_ref<bool (char)> F)618     StringRef take_while(function_ref<bool(char)> F) const {
619       return substr(0, find_if_not(F));
620     }
621 
622     /// Return the longest prefix of 'this' such that no character in
623     /// the prefix satisfies the given predicate.
624     LLVM_NODISCARD
625     LLVM_ATTRIBUTE_ALWAYS_INLINE
take_until(function_ref<bool (char)> F)626     StringRef take_until(function_ref<bool(char)> F) const {
627       return substr(0, find_if(F));
628     }
629 
630     /// Return a StringRef equal to 'this' but with the first \p N elements
631     /// dropped.
632     LLVM_NODISCARD
633     LLVM_ATTRIBUTE_ALWAYS_INLINE
634     StringRef drop_front(size_t N = 1) const {
635       assert(size() >= N && "Dropping more elements than exist");
636       return substr(N);
637     }
638 
639     /// Return a StringRef equal to 'this' but with the last \p N elements
640     /// dropped.
641     LLVM_NODISCARD
642     LLVM_ATTRIBUTE_ALWAYS_INLINE
643     StringRef drop_back(size_t N = 1) const {
644       assert(size() >= N && "Dropping more elements than exist");
645       return substr(0, size()-N);
646     }
647 
648     /// Return a StringRef equal to 'this', but with all characters satisfying
649     /// the given predicate dropped from the beginning of the string.
650     LLVM_NODISCARD
651     LLVM_ATTRIBUTE_ALWAYS_INLINE
drop_while(function_ref<bool (char)> F)652     StringRef drop_while(function_ref<bool(char)> F) const {
653       return substr(find_if_not(F));
654     }
655 
656     /// Return a StringRef equal to 'this', but with all characters not
657     /// satisfying the given predicate dropped from the beginning of the string.
658     LLVM_NODISCARD
659     LLVM_ATTRIBUTE_ALWAYS_INLINE
drop_until(function_ref<bool (char)> F)660     StringRef drop_until(function_ref<bool(char)> F) const {
661       return substr(find_if(F));
662     }
663 
664     /// Returns true if this StringRef has the given prefix and removes that
665     /// prefix.
666     LLVM_ATTRIBUTE_ALWAYS_INLINE
consume_front(StringRef Prefix)667     bool consume_front(StringRef Prefix) {
668       if (!startswith(Prefix))
669         return false;
670 
671       *this = drop_front(Prefix.size());
672       return true;
673     }
674 
675     /// Returns true if this StringRef has the given suffix and removes that
676     /// suffix.
677     LLVM_ATTRIBUTE_ALWAYS_INLINE
consume_back(StringRef Suffix)678     bool consume_back(StringRef Suffix) {
679       if (!endswith(Suffix))
680         return false;
681 
682       *this = drop_back(Suffix.size());
683       return true;
684     }
685 
686     /// Return a reference to the substring from [Start, End).
687     ///
688     /// \param Start The index of the starting character in the substring; if
689     /// the index is npos or greater than the length of the string then the
690     /// empty substring will be returned.
691     ///
692     /// \param End The index following the last character to include in the
693     /// substring. If this is npos or exceeds the number of characters
694     /// remaining in the string, the string suffix (starting with \p Start)
695     /// will be returned. If this is less than \p Start, an empty string will
696     /// be returned.
697     LLVM_NODISCARD
698     LLVM_ATTRIBUTE_ALWAYS_INLINE
slice(size_t Start,size_t End)699     StringRef slice(size_t Start, size_t End) const {
700       Start = std::min(Start, Length);
701       End = std::min(std::max(Start, End), Length);
702       return StringRef(Data + Start, End - Start);
703     }
704 
705     /// Split into two substrings around the first occurrence of a separator
706     /// character.
707     ///
708     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
709     /// such that (*this == LHS + Separator + RHS) is true and RHS is
710     /// maximal. If \p Separator is not in the string, then the result is a
711     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
712     ///
713     /// \param Separator The character to split on.
714     /// \returns The split substrings.
715     LLVM_NODISCARD
split(char Separator)716     std::pair<StringRef, StringRef> split(char Separator) const {
717       size_t Idx = find(Separator);
718       if (Idx == npos)
719         return std::make_pair(*this, StringRef());
720       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
721     }
722 
723     /// Split into two substrings around the first occurrence of a separator
724     /// string.
725     ///
726     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
727     /// such that (*this == LHS + Separator + RHS) is true and RHS is
728     /// maximal. If \p Separator is not in the string, then the result is a
729     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
730     ///
731     /// \param Separator - The string to split on.
732     /// \return - The split substrings.
733     LLVM_NODISCARD
split(StringRef Separator)734     std::pair<StringRef, StringRef> split(StringRef Separator) const {
735       size_t Idx = find(Separator);
736       if (Idx == npos)
737         return std::make_pair(*this, StringRef());
738       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
739     }
740 
741     /// Split into substrings around the occurrences of a separator string.
742     ///
743     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
744     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
745     /// elements are added to A.
746     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
747     /// still count when considering \p MaxSplit
748     /// An useful invariant is that
749     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
750     ///
751     /// \param A - Where to put the substrings.
752     /// \param Separator - The string to split on.
753     /// \param MaxSplit - The maximum number of times the string is split.
754     /// \param KeepEmpty - True if empty substring should be added.
755     void split(SmallVectorImpl<StringRef> &A,
756                StringRef Separator, int MaxSplit = -1,
757                bool KeepEmpty = true) const;
758 
759     /// Split into substrings around the occurrences of a separator character.
760     ///
761     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
762     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
763     /// elements are added to A.
764     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
765     /// still count when considering \p MaxSplit
766     /// An useful invariant is that
767     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
768     ///
769     /// \param A - Where to put the substrings.
770     /// \param Separator - The string to split on.
771     /// \param MaxSplit - The maximum number of times the string is split.
772     /// \param KeepEmpty - True if empty substring should be added.
773     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
774                bool KeepEmpty = true) const;
775 
776     /// Split into two substrings around the last occurrence of a separator
777     /// character.
778     ///
779     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
780     /// such that (*this == LHS + Separator + RHS) is true and RHS is
781     /// minimal. If \p Separator is not in the string, then the result is a
782     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
783     ///
784     /// \param Separator - The character to split on.
785     /// \return - The split substrings.
786     LLVM_NODISCARD
rsplit(char Separator)787     std::pair<StringRef, StringRef> rsplit(char Separator) const {
788       size_t Idx = rfind(Separator);
789       if (Idx == npos)
790         return std::make_pair(*this, StringRef());
791       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
792     }
793 
794     /// Return string with consecutive \p Char characters starting from the
795     /// the left removed.
796     LLVM_NODISCARD
ltrim(char Char)797     StringRef ltrim(char Char) const {
798       return drop_front(std::min(Length, find_first_not_of(Char)));
799     }
800 
801     /// Return string with consecutive characters in \p Chars starting from
802     /// the left removed.
803     LLVM_NODISCARD
804     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
805       return drop_front(std::min(Length, find_first_not_of(Chars)));
806     }
807 
808     /// Return string with consecutive \p Char characters starting from the
809     /// right removed.
810     LLVM_NODISCARD
rtrim(char Char)811     StringRef rtrim(char Char) const {
812       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
813     }
814 
815     /// Return string with consecutive characters in \p Chars starting from
816     /// the right removed.
817     LLVM_NODISCARD
818     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
819       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
820     }
821 
822     /// Return string with consecutive \p Char characters starting from the
823     /// left and right removed.
824     LLVM_NODISCARD
trim(char Char)825     StringRef trim(char Char) const {
826       return ltrim(Char).rtrim(Char);
827     }
828 
829     /// Return string with consecutive characters in \p Chars starting from
830     /// the left and right removed.
831     LLVM_NODISCARD
832     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
833       return ltrim(Chars).rtrim(Chars);
834     }
835 
836     /// @}
837   };
838 
839   /// A wrapper around a string literal that serves as a proxy for constructing
840   /// global tables of StringRefs with the length computed at compile time.
841   /// In order to avoid the invocation of a global constructor, StringLiteral
842   /// should *only* be used in a constexpr context, as such:
843   ///
844   /// constexpr StringLiteral S("test");
845   ///
846   class StringLiteral : public StringRef {
847   public:
848     template <size_t N>
StringLiteral(const char (& Str)[N])849     constexpr StringLiteral(const char (&Str)[N])
850 #if defined(__clang__) && __has_attribute(enable_if)
851 #pragma clang diagnostic push
852 #pragma clang diagnostic ignored "-Wgcc-compat"
853         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
854                                "invalid string literal")))
855 #pragma clang diagnostic pop
856 #endif
857         : StringRef(Str, N - 1) {
858     }
859   };
860 
861   /// @name StringRef Comparison Operators
862   /// @{
863 
864   LLVM_ATTRIBUTE_ALWAYS_INLINE
865   inline bool operator==(StringRef LHS, StringRef RHS) {
866     return LHS.equals(RHS);
867   }
868 
869   LLVM_ATTRIBUTE_ALWAYS_INLINE
870   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
871 
872   inline bool operator<(StringRef LHS, StringRef RHS) {
873     return LHS.compare(RHS) == -1;
874   }
875 
876   inline bool operator<=(StringRef LHS, StringRef RHS) {
877     return LHS.compare(RHS) != 1;
878   }
879 
880   inline bool operator>(StringRef LHS, StringRef RHS) {
881     return LHS.compare(RHS) == 1;
882   }
883 
884   inline bool operator>=(StringRef LHS, StringRef RHS) {
885     return LHS.compare(RHS) != -1;
886   }
887 
888   inline std::string &operator+=(std::string &buffer, StringRef string) {
889     return buffer.append(string.data(), string.size());
890   }
891 
892   /// @}
893 
894   /// \brief Compute a hash_code for a StringRef.
895   LLVM_NODISCARD
896   hash_code hash_value(StringRef S);
897 
898   // StringRefs can be treated like a POD type.
899   template <typename T> struct isPodLike;
900   template <> struct isPodLike<StringRef> { static const bool value = true; };
901 }
902 
903 #endif
904