1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_STRINGSEGMENT_H__ 8 #define __NUMPARSE_STRINGSEGMENT_H__ 9 10 #include "unicode/unistr.h" 11 #include "unicode/uniset.h" 12 13 U_NAMESPACE_BEGIN 14 15 16 /** 17 * A mutable UnicodeString wrapper with a variable offset and length and 18 * support for case folding. The charAt, length, and subSequence methods all 19 * operate relative to the fixed offset into the UnicodeString. 20 * 21 * Intended to be useful for parsing. 22 * 23 * CAUTION: Since this class is mutable, it must not be used anywhere that an 24 * immutable object is required, like in a cache or as the key of a hash map. 25 * 26 * @author sffc (Shane Carr) 27 */ 28 // Exported as U_I18N_API for tests 29 class U_I18N_API StringSegment : public UMemory { 30 public: 31 StringSegment(const UnicodeString& str, bool ignoreCase); 32 33 int32_t getOffset() const; 34 35 void setOffset(int32_t start); 36 37 /** 38 * Equivalent to <code>setOffset(getOffset()+delta)</code>. 39 * 40 * <p> 41 * This method is usually called by a Matcher to register that a char was consumed. If the char is 42 * strong (it usually is, except for things like whitespace), follow this with a call to 43 * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method. 44 */ 45 void adjustOffset(int32_t delta); 46 47 /** 48 * Adjusts the offset by the width of the current code point, either 1 or 2 chars. 49 */ 50 void adjustOffsetByCodePoint(); 51 52 void setLength(int32_t length); 53 54 void resetLength(); 55 56 int32_t length() const; 57 58 char16_t charAt(int32_t index) const; 59 60 UChar32 codePointAt(int32_t index) const; 61 62 UnicodeString toUnicodeString() const; 63 64 const UnicodeString toTempUnicodeString() const; 65 66 /** 67 * Returns the first code point in the string segment, or -1 if the string starts with an invalid 68 * code point. 69 * 70 * <p> 71 * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles case 72 * folding logic, instead of this method. 73 */ 74 UChar32 getCodePoint() const; 75 76 /** 77 * Returns true if the first code point of this StringSegment equals the given code point. 78 * 79 * <p> 80 * This method will perform case folding if case folding is enabled for the parser. 81 */ 82 bool startsWith(UChar32 otherCp) const; 83 84 /** 85 * Returns true if the first code point of this StringSegment is in the given UnicodeSet. 86 */ 87 bool startsWith(const UnicodeSet& uniset) const; 88 89 /** 90 * Returns true if there is at least one code point of overlap between this StringSegment and the 91 * given UnicodeString. 92 */ 93 bool startsWith(const UnicodeString& other) const; 94 95 /** 96 * Returns the length of the prefix shared by this StringSegment and the given UnicodeString. For 97 * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2, 98 * since the first 2 characters are the same. 99 * 100 * <p> 101 * This method only returns offsets along code point boundaries. 102 * 103 * <p> 104 * This method will perform case folding if case folding was enabled in the constructor. 105 * 106 * <p> 107 * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check. 108 */ 109 int32_t getCommonPrefixLength(const UnicodeString& other); 110 111 /** 112 * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is 113 * enabled for the parser. 114 */ 115 int32_t getCaseSensitivePrefixLength(const UnicodeString& other); 116 117 bool operator==(const UnicodeString& other) const; 118 119 private: 120 const UnicodeString& fStr; 121 int32_t fStart; 122 int32_t fEnd; 123 bool fFoldCase; 124 125 int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase); 126 127 static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase); 128 }; 129 130 131 U_NAMESPACE_END 132 133 #endif //__NUMPARSE_STRINGSEGMENT_H__ 134 #endif /* #if !UCONFIG_NO_FORMATTING */ 135