1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2017 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 package ohos.global.icu.impl; 5 6 import ohos.global.icu.lang.UCharacter; 7 import ohos.global.icu.text.UnicodeSet; 8 9 /** 10 * A mutable String wrapper with a variable offset and length and 11 * support for case folding. The charAt, length, and subSequence methods all 12 * operate relative to the fixed offset into the String. 13 * 14 * Intended to be useful for parsing. 15 * 16 * CAUTION: Since this class is mutable, it must not be used anywhere that an 17 * immutable object is required, like in a cache or as the key of a hash map. 18 * 19 * @author sffc (Shane Carr) 20 * @hide exposed on OHOS 21 */ 22 public class StringSegment implements CharSequence { 23 private final String str; 24 private int start; 25 private int end; 26 private boolean foldCase; 27 StringSegment(String str, boolean foldCase)28 public StringSegment(String str, boolean foldCase) { 29 this.str = str; 30 this.start = 0; 31 this.end = str.length(); 32 this.foldCase = foldCase; 33 } 34 getOffset()35 public int getOffset() { 36 return start; 37 } 38 setOffset(int start)39 public void setOffset(int start) { 40 assert start <= end; 41 this.start = start; 42 } 43 44 /** 45 * Equivalent to <code>setOffset(getOffset()+delta)</code>. 46 * 47 * <p> 48 * Number parsing note: This method is usually called by a Matcher to register that a char was 49 * consumed. If the char is strong (it usually is, except for things like whitespace), follow this 50 * with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that 51 * method. 52 */ adjustOffset(int delta)53 public void adjustOffset(int delta) { 54 assert start + delta >= 0; 55 assert start + delta <= end; 56 start += delta; 57 } 58 59 /** 60 * Adjusts the offset by the width of the current lead code point, either 1 or 2 chars. 61 */ adjustOffsetByCodePoint()62 public void adjustOffsetByCodePoint() { 63 start += Character.charCount(getCodePoint()); 64 } 65 setLength(int length)66 public void setLength(int length) { 67 assert length >= 0; 68 assert start + length <= str.length(); 69 end = start + length; 70 } 71 resetLength()72 public void resetLength() { 73 end = str.length(); 74 } 75 76 @Override length()77 public int length() { 78 return end - start; 79 } 80 81 @Override charAt(int index)82 public char charAt(int index) { 83 return str.charAt(index + start); 84 } 85 86 @Override subSequence(int start, int end)87 public CharSequence subSequence(int start, int end) { 88 return str.subSequence(start + this.start, end + this.start); 89 } 90 91 /** 92 * Returns the first code point in the string segment. 93 * 94 * <p> 95 * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles 96 * case folding logic, instead of this method. 97 */ getCodePoint()98 public int getCodePoint() { 99 assert start < end; 100 char lead = str.charAt(start); 101 char trail; 102 if (Character.isHighSurrogate(lead) 103 && start + 1 < end 104 && Character.isLowSurrogate(trail = str.charAt(start + 1))) { 105 return Character.toCodePoint(lead, trail); 106 } 107 return lead; 108 } 109 110 /** 111 * Returns the code point at the given index relative to the current offset. 112 */ 113 public int codePointAt(int index) { 114 return str.codePointAt(start + index); 115 } 116 117 /** 118 * Returns true if the first code point of this StringSegment equals the given code point. 119 * 120 * <p> 121 * This method will perform case folding if case folding is enabled for the parser. 122 */ 123 public boolean startsWith(int otherCp) { 124 return codePointsEqual(getCodePoint(), otherCp, foldCase); 125 } 126 127 /** 128 * Returns true if the first code point of this StringSegment is in the given UnicodeSet. 129 */ 130 public boolean startsWith(UnicodeSet uniset) { 131 // TODO: Move UnicodeSet case-folding logic here. 132 // TODO: Handle string matches here instead of separately. 133 int cp = getCodePoint(); 134 if (cp == -1) { 135 return false; 136 } 137 return uniset.contains(cp); 138 } 139 140 /** 141 * Returns true if there is at least one code point of overlap between this StringSegment and the 142 * given CharSequence. Null-safe. 143 */ 144 public boolean startsWith(CharSequence other) { 145 if (other == null || other.length() == 0 || length() == 0) { 146 return false; 147 } 148 int cp1 = Character.codePointAt(this, 0); 149 int cp2 = Character.codePointAt(other, 0); 150 return codePointsEqual(cp1, cp2, foldCase); 151 } 152 153 /** 154 * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For 155 * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2, 156 * since the first 2 characters are the same. 157 * 158 * <p> 159 * This method only returns offsets along code point boundaries. 160 * 161 * <p> 162 * This method will perform case folding if case folding was enabled in the constructor. 163 * 164 * <p> 165 * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check. 166 */ 167 public int getCommonPrefixLength(CharSequence other) { 168 return getPrefixLengthInternal(other, foldCase); 169 } 170 171 /** 172 * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was 173 * enabled in the constructor. 174 */ 175 public int getCaseSensitivePrefixLength(CharSequence other) { 176 return getPrefixLengthInternal(other, false); 177 } 178 179 private int getPrefixLengthInternal(CharSequence other, boolean foldCase) { 180 assert other.length() != 0; 181 int offset = 0; 182 for (; offset < Math.min(length(), other.length());) { 183 // TODO: case-fold code points, not chars 184 int cp1 = Character.codePointAt(this, offset); 185 int cp2 = Character.codePointAt(other, offset); 186 if (!codePointsEqual(cp1, cp2, foldCase)) { 187 break; 188 } 189 offset += Character.charCount(cp1); 190 } 191 return offset; 192 } 193 194 private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) { 195 if (cp1 == cp2) { 196 return true; 197 } 198 if (!foldCase) { 199 return false; 200 } 201 cp1 = UCharacter.foldCase(cp1, true); 202 cp2 = UCharacter.foldCase(cp2, true); 203 return cp1 == cp2; 204 } 205 206 /** 207 * Equals any CharSequence with the same chars as this segment. 208 * 209 * <p> 210 * This method does not perform case folding; if you want case-insensitive equality, use 211 * {@link #getCommonPrefixLength}. 212 */ 213 @Override 214 public boolean equals(Object other) { 215 if (!(other instanceof CharSequence)) 216 return false; 217 return Utility.charSequenceEquals(this, (CharSequence) other); 218 } 219 220 /** Returns a hash code equivalent to calling .toString().hashCode() */ 221 @Override 222 public int hashCode() { 223 return Utility.charSequenceHashCode(this); 224 } 225 226 /** Returns a string representation useful for debugging. */ 227 @Override 228 public String toString() { 229 return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end); 230 } 231 232 /** Returns a String that is equivalent to the CharSequence representation. */ 233 public String asString() { 234 return str.substring(start, end); 235 } 236 } 237