• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2017 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 package ohos.global.icu.impl;
5 
6 import ohos.global.icu.lang.UCharacter;
7 import ohos.global.icu.text.UnicodeSet;
8 
9 /**
10  * A mutable String wrapper with a variable offset and length and
11  * support for case folding. The charAt, length, and subSequence methods all
12  * operate relative to the fixed offset into the String.
13  *
14  * Intended to be useful for parsing.
15  *
16  * CAUTION: Since this class is mutable, it must not be used anywhere that an
17  * immutable object is required, like in a cache or as the key of a hash map.
18  *
19  * @author sffc (Shane Carr)
20  * @hide exposed on OHOS
21  */
22 public class StringSegment implements CharSequence {
23     private final String str;
24     private int start;
25     private int end;
26     private boolean foldCase;
27 
StringSegment(String str, boolean foldCase)28     public StringSegment(String str, boolean foldCase) {
29         this.str = str;
30         this.start = 0;
31         this.end = str.length();
32         this.foldCase = foldCase;
33     }
34 
getOffset()35     public int getOffset() {
36         return start;
37     }
38 
setOffset(int start)39     public void setOffset(int start) {
40         assert start <= end;
41         this.start = start;
42     }
43 
44     /**
45      * Equivalent to <code>setOffset(getOffset()+delta)</code>.
46      *
47      * <p>
48      * Number parsing note: This method is usually called by a Matcher to register that a char was
49      * consumed. If the char is strong (it usually is, except for things like whitespace), follow this
50      * with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that
51      * method.
52      */
adjustOffset(int delta)53     public void adjustOffset(int delta) {
54         assert start + delta >= 0;
55         assert start + delta <= end;
56         start += delta;
57     }
58 
59     /**
60      * Adjusts the offset by the width of the current lead code point, either 1 or 2 chars.
61      */
adjustOffsetByCodePoint()62     public void adjustOffsetByCodePoint() {
63         start += Character.charCount(getCodePoint());
64     }
65 
setLength(int length)66     public void setLength(int length) {
67         assert length >= 0;
68         assert start + length <= str.length();
69         end = start + length;
70     }
71 
resetLength()72     public void resetLength() {
73         end = str.length();
74     }
75 
76     @Override
length()77     public int length() {
78         return end - start;
79     }
80 
81     @Override
charAt(int index)82     public char charAt(int index) {
83         return str.charAt(index + start);
84     }
85 
86     @Override
subSequence(int start, int end)87     public CharSequence subSequence(int start, int end) {
88         return str.subSequence(start + this.start, end + this.start);
89     }
90 
91     /**
92      * Returns the first code point in the string segment.
93      *
94      * <p>
95      * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles
96      * case folding logic, instead of this method.
97      */
getCodePoint()98     public int getCodePoint() {
99         assert start < end;
100         char lead = str.charAt(start);
101         char trail;
102         if (Character.isHighSurrogate(lead)
103                 && start + 1 < end
104                 && Character.isLowSurrogate(trail = str.charAt(start + 1))) {
105             return Character.toCodePoint(lead, trail);
106         }
107         return lead;
108     }
109 
110     /**
111      * Returns the code point at the given index relative to the current offset.
112      */
113     public int codePointAt(int index) {
114         return str.codePointAt(start + index);
115     }
116 
117     /**
118      * Returns true if the first code point of this StringSegment equals the given code point.
119      *
120      * <p>
121      * This method will perform case folding if case folding is enabled for the parser.
122      */
123     public boolean startsWith(int otherCp) {
124         return codePointsEqual(getCodePoint(), otherCp, foldCase);
125     }
126 
127     /**
128      * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
129      */
130     public boolean startsWith(UnicodeSet uniset) {
131         // TODO: Move UnicodeSet case-folding logic here.
132         // TODO: Handle string matches here instead of separately.
133         int cp = getCodePoint();
134         if (cp == -1) {
135             return false;
136         }
137         return uniset.contains(cp);
138     }
139 
140     /**
141      * Returns true if there is at least one code point of overlap between this StringSegment and the
142      * given CharSequence. Null-safe.
143      */
144     public boolean startsWith(CharSequence other) {
145         if (other == null || other.length() == 0 || length() == 0) {
146             return false;
147         }
148         int cp1 = Character.codePointAt(this, 0);
149         int cp2 = Character.codePointAt(other, 0);
150         return codePointsEqual(cp1, cp2, foldCase);
151     }
152 
153     /**
154      * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
155      * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
156      * since the first 2 characters are the same.
157      *
158      * <p>
159      * This method only returns offsets along code point boundaries.
160      *
161      * <p>
162      * This method will perform case folding if case folding was enabled in the constructor.
163      *
164      * <p>
165      * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
166      */
167     public int getCommonPrefixLength(CharSequence other) {
168         return getPrefixLengthInternal(other, foldCase);
169     }
170 
171     /**
172      * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was
173      * enabled in the constructor.
174      */
175     public int getCaseSensitivePrefixLength(CharSequence other) {
176         return getPrefixLengthInternal(other, false);
177     }
178 
179     private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
180         assert other.length() != 0;
181         int offset = 0;
182         for (; offset < Math.min(length(), other.length());) {
183             // TODO: case-fold code points, not chars
184             int cp1 = Character.codePointAt(this, offset);
185             int cp2 = Character.codePointAt(other, offset);
186             if (!codePointsEqual(cp1, cp2, foldCase)) {
187                 break;
188             }
189             offset += Character.charCount(cp1);
190         }
191         return offset;
192     }
193 
194     private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
195         if (cp1 == cp2) {
196             return true;
197         }
198         if (!foldCase) {
199             return false;
200         }
201         cp1 = UCharacter.foldCase(cp1, true);
202         cp2 = UCharacter.foldCase(cp2, true);
203         return cp1 == cp2;
204     }
205 
206     /**
207      * Equals any CharSequence with the same chars as this segment.
208      *
209      * <p>
210      * This method does not perform case folding; if you want case-insensitive equality, use
211      * {@link #getCommonPrefixLength}.
212      */
213     @Override
214     public boolean equals(Object other) {
215         if (!(other instanceof CharSequence))
216             return false;
217         return Utility.charSequenceEquals(this, (CharSequence) other);
218     }
219 
220     /** Returns a hash code equivalent to calling .toString().hashCode() */
221     @Override
222     public int hashCode() {
223         return Utility.charSequenceHashCode(this);
224     }
225 
226     /** Returns a string representation useful for debugging. */
227     @Override
228     public String toString() {
229         return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
230     }
231 
232     /** Returns a String that is equivalent to the CharSequence representation. */
233     public String asString() {
234         return str.substring(start, end);
235     }
236 }
237