• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2010-2014, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 * FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
10 *
11 * C++ version created on: 2010oct27
12 * created by: Markus W. Scherer
13 */
14 
15 package ohos.global.icu.impl.coll;
16 
17 import ohos.global.icu.impl.Normalizer2Impl;
18 
19 /**
20  * Incrementally checks the input text for FCD and normalizes where necessary.
21  * @hide exposed on OHOS
22  */
23 public final class FCDUTF16CollationIterator extends UTF16CollationIterator {
24     /**
25      * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
26      */
FCDUTF16CollationIterator(CollationData d)27     public FCDUTF16CollationIterator(CollationData d) {
28         super(d);
29         nfcImpl = d.nfcImpl;
30     }
31 
FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p)32     public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) {
33         super(data, numeric, s, p);
34         rawSeq = s;
35         segmentStart = p;
36         rawLimit = s.length();
37         nfcImpl = data.nfcImpl;
38         checkDir = 1;
39     }
40 
41     @Override
equals(Object other)42     public boolean equals(Object other) {
43         // Skip the UTF16CollationIterator and call its parent.
44         if (!(other instanceof CollationIterator)
45             || !((CollationIterator)this).equals(other)
46             || !(other instanceof FCDUTF16CollationIterator))
47         {
48             return false;
49         }
50         FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other;
51         // Compare the iterator state but not the text: Assume that the caller does that.
52         if (checkDir != o.checkDir) {
53             return false;
54         }
55         if (checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) {
56             return false;
57         }
58         if (checkDir != 0 || seq == rawSeq) {
59             return (pos - rawStart) == (o.pos - /*o.*/ rawStart);
60         }
61         else {
62             return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) &&
63                     (pos - start) == (o.pos - o.start);
64         }
65     }
66 
67     @Override
hashCode()68     public int hashCode() {
69         assert false : "hashCode not designed";
70         return 42; // any arbitrary constant will do
71     }
72 
73     @Override
resetToOffset(int newOffset)74     public void resetToOffset(int newOffset) {
75         reset();
76         seq = rawSeq;
77         start = segmentStart = pos = rawStart + newOffset;
78         limit = rawLimit;
79         checkDir = 1;
80     }
81 
82     @Override
getOffset()83     public int getOffset() {
84         if(checkDir != 0 || seq == rawSeq) {
85             return pos - rawStart;
86         } else if(pos == start) {
87             return segmentStart - rawStart;
88         } else {
89             return segmentLimit - rawStart;
90         }
91     }
92 
93     @Override
setText(boolean numeric, CharSequence s, int p)94     public void setText(boolean numeric, CharSequence s, int p) {
95         super.setText(numeric, s, p);
96         rawSeq = s;
97         segmentStart = p;
98         rawLimit = limit = s.length();
99         checkDir = 1;
100     }
101 
102     @Override
nextCodePoint()103     public int nextCodePoint() {
104         char c;
105         for(;;) {
106             if(checkDir > 0) {
107                 if(pos == limit) {
108                     return Collation.SENTINEL_CP;
109                 }
110                 c = seq.charAt(pos++);
111                 if(CollationFCD.hasTccc(c)) {
112                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
113                             (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
114                         --pos;
115                         nextSegment();
116                         c = seq.charAt(pos++);
117                     }
118                 }
119                 break;
120             } else if(checkDir == 0 && pos != limit) {
121                 c = seq.charAt(pos++);
122                 break;
123             } else {
124                 switchToForward();
125             }
126         }
127         char trail;
128         if(Character.isHighSurrogate(c) && pos != limit &&
129                 Character.isLowSurrogate(trail = seq.charAt(pos))) {
130             ++pos;
131             return Character.toCodePoint(c, trail);
132         } else {
133             return c;
134         }
135     }
136 
137     @Override
previousCodePoint()138     public int previousCodePoint() {
139         char c;
140         for(;;) {
141             if(checkDir < 0) {
142                 if(pos == start) {
143                     return Collation.SENTINEL_CP;
144                 }
145                 c = seq.charAt(--pos);
146                 if(CollationFCD.hasLccc(c)) {
147                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
148                             (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) {
149                         ++pos;
150                         previousSegment();
151                         c = seq.charAt(--pos);
152                     }
153                 }
154                 break;
155             } else if(checkDir == 0 && pos != start) {
156                 c = seq.charAt(--pos);
157                 break;
158             } else {
159                 switchToBackward();
160             }
161         }
162         char lead;
163         if(Character.isLowSurrogate(c) && pos != start &&
164                 Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
165             --pos;
166             return Character.toCodePoint(lead, c);
167         } else {
168             return c;
169         }
170     }
171 
172     @Override
handleNextCE32()173     protected long handleNextCE32() {
174         char c;
175         for(;;) {
176             if(checkDir > 0) {
177                 if(pos == limit) {
178                     return NO_CP_AND_CE32;
179                 }
180                 c = seq.charAt(pos++);
181                 if(CollationFCD.hasTccc(c)) {
182                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
183                             (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
184                         --pos;
185                         nextSegment();
186                         c = seq.charAt(pos++);
187                     }
188                 }
189                 break;
190             } else if(checkDir == 0 && pos != limit) {
191                 c = seq.charAt(pos++);
192                 break;
193             } else {
194                 switchToForward();
195             }
196         }
197         return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
198     }
199 
200     /* boolean foundNULTerminator(); */
201 
202     @Override
forwardNumCodePoints(int num)203     protected void forwardNumCodePoints(int num) {
204         // Specify the class to avoid a virtual-function indirection.
205         // In Java, we would declare this class final.
206         while(num > 0 && nextCodePoint() >= 0) {
207             --num;
208         }
209     }
210 
211     @Override
backwardNumCodePoints(int num)212     protected void backwardNumCodePoints(int num) {
213         // Specify the class to avoid a virtual-function indirection.
214         // In Java, we would declare this class final.
215         while(num > 0 && previousCodePoint() >= 0) {
216             --num;
217         }
218     }
219 
220     /**
221      * Switches to forward checking if possible.
222      * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
223      * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
224      */
switchToForward()225     private void switchToForward() {
226         assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit));
227         if(checkDir < 0) {
228             // Turn around from backward checking.
229             start = segmentStart = pos;
230             if(pos == segmentLimit) {
231                 limit = rawLimit;
232                 checkDir = 1;  // Check forward.
233             } else {  // pos < segmentLimit
234                 checkDir = 0;  // Stay in FCD segment.
235             }
236         } else {
237             // Reached the end of the FCD segment.
238             if(seq == rawSeq) {
239                 // The input text segment is FCD, extend it forward.
240             } else {
241                 // The input text segment needed to be normalized.
242                 // Switch to checking forward from it.
243                 seq = rawSeq;
244                 pos = start = segmentStart = segmentLimit;
245                 // Note: If this segment is at the end of the input text,
246                 // then it might help to return false to indicate that, so that
247                 // we do not have to re-check and normalize when we turn around and go backwards.
248                 // However, that would complicate the call sites for an optimization of an unusual case.
249             }
250             limit = rawLimit;
251             checkDir = 1;
252         }
253     }
254 
255     /**
256      * Extend the FCD text segment forward or normalize around pos.
257      * To be called when checkDir > 0 && pos != limit.
258      * Returns with checkDir == 0 and pos != limit.
259      */
260     private void nextSegment() {
261         assert(checkDir > 0 && seq == rawSeq && pos != limit);
262         // The input text [segmentStart..pos[ passes the FCD check.
263         int p = pos;
264         int prevCC = 0;
265         for(;;) {
266             // Fetch the next character's fcd16 value.
267             int q = p;
268             int c = Character.codePointAt(seq, p);
269             p += Character.charCount(c);
270             int fcd16 = nfcImpl.getFCD16(c);
271             int leadCC = fcd16 >> 8;
272             if(leadCC == 0 && q != pos) {
273                 // FCD boundary before the [q, p[ character.
274                 limit = segmentLimit = q;
275                 break;
276             }
277             if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
278                 // Fails FCD check. Find the next FCD boundary and normalize.
279                 do {
280                     q = p;
281                     if(p == rawLimit) { break; }
282                     c = Character.codePointAt(seq, p);
283                     p += Character.charCount(c);
284                 } while(nfcImpl.getFCD16(c) > 0xff);
285                 normalize(pos, q);
286                 pos = start;
287                 break;
288             }
289             prevCC = fcd16 & 0xff;
290             if(p == rawLimit || prevCC == 0) {
291                 // FCD boundary after the last character.
292                 limit = segmentLimit = p;
293                 break;
294             }
295         }
296         assert(pos != limit);
297         checkDir = 0;
298     }
299 
300     /**
301      * Switches to backward checking.
302      * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
303      * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
304      */
305     private void switchToBackward() {
306         assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start));
307         if(checkDir > 0) {
308             // Turn around from forward checking.
309             limit = segmentLimit = pos;
310             if(pos == segmentStart) {
311                 start = rawStart;
312                 checkDir = -1;  // Check backward.
313             } else {  // pos > segmentStart
314                 checkDir = 0;  // Stay in FCD segment.
315             }
316         } else {
317             // Reached the start of the FCD segment.
318             if(seq == rawSeq) {
319                 // The input text segment is FCD, extend it backward.
320             } else {
321                 // The input text segment needed to be normalized.
322                 // Switch to checking backward from it.
323                 seq = rawSeq;
324                 pos = limit = segmentLimit = segmentStart;
325             }
326             start = rawStart;
327             checkDir = -1;
328         }
329     }
330 
331     /**
332      * Extend the FCD text segment backward or normalize around pos.
333      * To be called when checkDir < 0 && pos != start.
334      * Returns with checkDir == 0 and pos != start.
335      */
336     private void previousSegment() {
337         assert(checkDir < 0 && seq == rawSeq && pos != start);
338         // The input text [pos..segmentLimit[ passes the FCD check.
339         int p = pos;
340         int nextCC = 0;
341         for(;;) {
342             // Fetch the previous character's fcd16 value.
343             int q = p;
344             int c = Character.codePointBefore(seq, p);
345             p -= Character.charCount(c);
346             int fcd16 = nfcImpl.getFCD16(c);
347             int trailCC = fcd16 & 0xff;
348             if(trailCC == 0 && q != pos) {
349                 // FCD boundary after the [p, q[ character.
350                 start = segmentStart = q;
351                 break;
352             }
353             if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
354                                 CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
355                 // Fails FCD check. Find the previous FCD boundary and normalize.
356                 do {
357                     q = p;
358                     if(fcd16 <= 0xff || p == rawStart) { break; }
359                     c = Character.codePointBefore(seq, p);
360                     p -= Character.charCount(c);
361                 } while((fcd16 = nfcImpl.getFCD16(c)) != 0);
362                 normalize(q, pos);
363                 pos = limit;
364                 break;
365             }
366             nextCC = fcd16 >> 8;
367             if(p == rawStart || nextCC == 0) {
368                 // FCD boundary before the following character.
369                 start = segmentStart = p;
370                 break;
371             }
372         }
373         assert(pos != start);
374         checkDir = 0;
375     }
376 
377     private void normalize(int from, int to) {
378         if(normalized == null) {
379             normalized = new StringBuilder();
380         }
381         // NFD without argument checking.
382         nfcImpl.decompose(rawSeq, from, to, normalized, to - from);
383         // Switch collation processing into the FCD buffer
384         // with the result of normalizing [segmentStart, segmentLimit[.
385         segmentStart = from;
386         segmentLimit = to;
387         seq = normalized;
388         start = 0;
389         limit = start + normalized.length();
390     }
391 
392     // Text pointers: The input text is rawSeq[rawStart, rawLimit[.
393     // (In C++, these are const UChar * pointers.
394     // In Java, we use CharSequence rawSeq and the parent class' seq
395     // together with int indexes.)
396     //
397     // checkDir > 0:
398     //
399     // The input text rawSeq[segmentStart..pos[ passes the FCD check.
400     // Moving forward checks incrementally.
401     // segmentLimit is undefined. seq == rawSeq. limit == rawLimit.
402     //
403     // checkDir < 0:
404     // The input text rawSeq[pos..segmentLimit[ passes the FCD check.
405     // Moving backward checks incrementally.
406     // segmentStart is undefined. seq == rawSeq. start == rawStart.
407     //
408     // checkDir == 0:
409     //
410     // The input text rawSeq[segmentStart..segmentLimit[ is being processed.
411     // These pointers are at FCD boundaries.
412     // Either this text segment already passes the FCD check
413     // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit,
414     // or the current segment had to be normalized so that
415     // rawSeq[segmentStart..segmentLimit[ turned into the normalized string,
416     // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length().
417     private CharSequence rawSeq;
418     private static final int rawStart = 0;
419     private int segmentStart;
420     private int segmentLimit;
421     private int rawLimit;
422 
423     private final Normalizer2Impl nfcImpl;
424     private StringBuilder normalized;
425     // Direction of incremental FCD check. See comments before rawStart.
426     private int checkDir;
427 }
428