• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2012-2014, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 * FCDIterCollationIterator.java, ported from uitercollationiterator.h/.cpp
10 *
11 * C++ version created on: 2012sep23 (from utf16collationiterator.h)
12 * created by: Markus W. Scherer
13 */
14 
15 package ohos.global.icu.impl.coll;
16 
17 import ohos.global.icu.impl.Normalizer2Impl;
18 import ohos.global.icu.text.UCharacterIterator;
19 
20 /**
21  * Incrementally checks the input text for FCD and normalizes where necessary.
22  * @hide exposed on OHOS
23  */
24 public final class FCDIterCollationIterator extends IterCollationIterator {
FCDIterCollationIterator(CollationData data, boolean numeric, UCharacterIterator ui, int startIndex)25     public FCDIterCollationIterator(CollationData data, boolean numeric,
26             UCharacterIterator ui, int startIndex) {
27         super(data, numeric, ui);
28         state = State.ITER_CHECK_FWD;
29         start = startIndex;
30         nfcImpl = data.nfcImpl;
31     }
32 
33     @Override
resetToOffset(int newOffset)34     public void resetToOffset(int newOffset) {
35         super.resetToOffset(newOffset);
36         start = newOffset;
37         state = State.ITER_CHECK_FWD;
38     }
39 
40     @Override
getOffset()41     public int getOffset() {
42         if(state.compareTo(State.ITER_CHECK_BWD) <= 0) {
43             return iter.getIndex();
44         } else if(state == State.ITER_IN_FCD_SEGMENT) {
45             return pos;
46         } else if(pos == 0) {
47             return start;
48         } else {
49             return limit;
50         }
51     }
52 
53     @Override
nextCodePoint()54     public int nextCodePoint() {
55         int c;
56         for(;;) {
57             if(state == State.ITER_CHECK_FWD) {
58                 c = iter.next();
59                 if(c < 0) {
60                     return c;
61                 }
62                 if(CollationFCD.hasTccc(c)) {
63                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
64                             CollationFCD.hasLccc(iter.current())) {
65                         iter.previous();
66                         if(!nextSegment()) {
67                             return Collation.SENTINEL_CP;
68                         }
69                         continue;
70                     }
71                 }
72                 if(isLeadSurrogate(c)) {
73                     int trail = iter.next();
74                     if(isTrailSurrogate(trail)) {
75                         return Character.toCodePoint((char)c, (char)trail);
76                     } else if(trail >= 0) {
77                         iter.previous();
78                     }
79                 }
80                 return c;
81             } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
82                 c = iter.nextCodePoint();
83                 pos += Character.charCount(c);
84                 assert(c >= 0);
85                 return c;
86             } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
87                     pos != normalized.length()) {
88                 c = normalized.codePointAt(pos);
89                 pos += Character.charCount(c);
90                 return c;
91             } else {
92                 switchToForward();
93             }
94         }
95     }
96 
97     @Override
previousCodePoint()98     public int previousCodePoint() {
99         int c;
100         for(;;) {
101             if(state == State.ITER_CHECK_BWD) {
102                 c = iter.previous();
103                 if(c < 0) {
104                     start = pos = 0;
105                     state = State.ITER_IN_FCD_SEGMENT;
106                     return Collation.SENTINEL_CP;
107                 }
108                 if(CollationFCD.hasLccc(c)) {
109                     int prev = Collation.SENTINEL_CP;
110                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
111                             CollationFCD.hasTccc(prev = iter.previous())) {
112                         iter.next();
113                         if(prev >= 0) {
114                             iter.next();
115                         }
116                         if(!previousSegment()) {
117                             return Collation.SENTINEL_CP;
118                         }
119                         continue;
120                     }
121                     // hasLccc(trail)=true for all trail surrogates
122                     if(isTrailSurrogate(c)) {
123                         if(prev < 0) {
124                             prev = iter.previous();
125                         }
126                         if(isLeadSurrogate(prev)) {
127                             return Character.toCodePoint((char)prev, (char)c);
128                         }
129                     }
130                     if(prev >= 0) {
131                         iter.next();
132                     }
133                 }
134                 return c;
135             } else if(state == State.ITER_IN_FCD_SEGMENT && pos != start) {
136                 c = iter.previousCodePoint();
137                 pos -= Character.charCount(c);
138                 assert(c >= 0);
139                 return c;
140             } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos != 0) {
141                 c = normalized.codePointBefore(pos);
142                 pos -= Character.charCount(c);
143                 return c;
144             } else {
145                 switchToBackward();
146             }
147         }
148     }
149 
150     @Override
handleNextCE32()151     protected long handleNextCE32() {
152         int c;
153         for(;;) {
154             if(state == State.ITER_CHECK_FWD) {
155                 c = iter.next();
156                 if(c < 0) {
157                     return NO_CP_AND_CE32;
158                 }
159                 if(CollationFCD.hasTccc(c)) {
160                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
161                             CollationFCD.hasLccc(iter.current())) {
162                         iter.previous();
163                         if(!nextSegment()) {
164                             c = Collation.SENTINEL_CP;
165                             return Collation.FALLBACK_CE32;
166                         }
167                         continue;
168                     }
169                 }
170                 break;
171             } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) {
172                 c = iter.next();
173                 ++pos;
174                 assert(c >= 0);
175                 break;
176             } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 &&
177                     pos != normalized.length()) {
178                 c = normalized.charAt(pos++);
179                 break;
180             } else {
181                 switchToForward();
182             }
183         }
184         return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead((char)c));
185     }
186 
187     @Override
handleGetTrailSurrogate()188     protected char handleGetTrailSurrogate() {
189         if(state.compareTo(State.ITER_IN_FCD_SEGMENT) <= 0) {
190             int trail = iter.next();
191             if(isTrailSurrogate(trail)) {
192                 if(state == State.ITER_IN_FCD_SEGMENT) { ++pos; }
193             } else if(trail >= 0) {
194                 iter.previous();
195             }
196             return (char)trail;
197         } else {
198             assert(pos < normalized.length());
199             char trail;
200             if(Character.isLowSurrogate(trail = normalized.charAt(pos))) { ++pos; }
201             return trail;
202         }
203     }
204 
205     @Override
206     protected void forwardNumCodePoints(int num) {
207         // Specify the class to avoid a virtual-function indirection.
208         // In Java, we would declare this class final.
209         while(num > 0 && nextCodePoint() >= 0) {
210             --num;
211         }
212     }
213 
214     @Override
backwardNumCodePoints(int num)215     protected void backwardNumCodePoints(int num) {
216         // Specify the class to avoid a virtual-function indirection.
217         // In Java, we would declare this class final.
218         while(num > 0 && previousCodePoint() >= 0) {
219             --num;
220         }
221     }
222 
223     /**
224      * Switches to forward checking if possible.
225      */
switchToForward()226     private void switchToForward() {
227         assert(state == State.ITER_CHECK_BWD ||
228                 (state == State.ITER_IN_FCD_SEGMENT && pos == limit) ||
229                 (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == normalized.length()));
230         if(state == State.ITER_CHECK_BWD) {
231             // Turn around from backward checking.
232             start = pos = iter.getIndex();
233             if(pos == limit) {
234                 state = State.ITER_CHECK_FWD;  // Check forward.
235             } else {  // pos < limit
236                 state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
237             }
238         } else {
239             // Reached the end of the FCD segment.
240             if(state == State.ITER_IN_FCD_SEGMENT) {
241                 // The input text segment is FCD, extend it forward.
242             } else {
243                 // The input text segment needed to be normalized.
244                 // Switch to checking forward from it.
245                 if(state == State.IN_NORM_ITER_AT_START) {
246                     iter.moveIndex(limit - start);
247                 }
248                 start = limit;
249             }
250             state = State.ITER_CHECK_FWD;
251         }
252     }
253 
254     /**
255      * Extends the FCD text segment forward or normalizes around pos.
256      * @return true if success
257      */
nextSegment()258     private boolean nextSegment() {
259         assert(state == State.ITER_CHECK_FWD);
260         // The input text [start..(iter index)[ passes the FCD check.
261         pos = iter.getIndex();
262         // Collect the characters being checked, in case they need to be normalized.
263         if(s == null) {
264             s = new StringBuilder();
265         } else {
266             s.setLength(0);
267         }
268         int prevCC = 0;
269         for(;;) {
270             // Fetch the next character and its fcd16 value.
271             int c = iter.nextCodePoint();
272             if(c < 0) { break; }
273             int fcd16 = nfcImpl.getFCD16(c);
274             int leadCC = fcd16 >> 8;
275             if(leadCC == 0 && s.length() != 0) {
276                 // FCD boundary before this character.
277                 iter.previousCodePoint();
278                 break;
279             }
280             s.appendCodePoint(c);
281             if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
282                 // Fails FCD check. Find the next FCD boundary and normalize.
283                 for(;;) {
284                     c = iter.nextCodePoint();
285                     if(c < 0) { break; }
286                     if(nfcImpl.getFCD16(c) <= 0xff) {
287                         iter.previousCodePoint();
288                         break;
289                     }
290                     s.appendCodePoint(c);
291                 }
292                 normalize(s);
293                 start = pos;
294                 limit = pos + s.length();
295                 state = State.IN_NORM_ITER_AT_LIMIT;
296                 pos = 0;
297                 return true;
298             }
299             prevCC = fcd16 & 0xff;
300             if(prevCC == 0) {
301                 // FCD boundary after the last character.
302                 break;
303             }
304         }
305         limit = pos + s.length();
306         assert(pos != limit);
307         iter.moveIndex(-s.length());
308         state = State.ITER_IN_FCD_SEGMENT;
309         return true;
310     }
311 
312     /**
313      * Switches to backward checking.
314      */
switchToBackward()315     private void switchToBackward() {
316         assert(state == State.ITER_CHECK_FWD ||
317                 (state == State.ITER_IN_FCD_SEGMENT && pos == start) ||
318                 (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == 0));
319         if(state == State.ITER_CHECK_FWD) {
320             // Turn around from forward checking.
321             limit = pos = iter.getIndex();
322             if(pos == start) {
323                 state = State.ITER_CHECK_BWD;  // Check backward.
324             } else {  // pos > start
325                 state = State.ITER_IN_FCD_SEGMENT;  // Stay in FCD segment.
326             }
327         } else {
328             // Reached the start of the FCD segment.
329             if(state == State.ITER_IN_FCD_SEGMENT) {
330                 // The input text segment is FCD, extend it backward.
331             } else {
332                 // The input text segment needed to be normalized.
333                 // Switch to checking backward from it.
334                 if(state == State.IN_NORM_ITER_AT_LIMIT) {
335                     iter.moveIndex(start - limit);
336                 }
337                 limit = start;
338             }
339             state = State.ITER_CHECK_BWD;
340         }
341     }
342 
343     /**
344      * Extends the FCD text segment backward or normalizes around pos.
345      * @return true if success
346      */
previousSegment()347     private boolean previousSegment() {
348         assert(state == State.ITER_CHECK_BWD);
349         // The input text [(iter index)..limit[ passes the FCD check.
350         pos = iter.getIndex();
351         // Collect the characters being checked, in case they need to be normalized.
352         if(s == null) {
353             s = new StringBuilder();
354         } else {
355             s.setLength(0);
356         }
357         int nextCC = 0;
358         for(;;) {
359             // Fetch the previous character and its fcd16 value.
360             int c = iter.previousCodePoint();
361             if(c < 0) { break; }
362             int fcd16 = nfcImpl.getFCD16(c);
363             int trailCC = fcd16 & 0xff;
364             if(trailCC == 0 && s.length() != 0) {
365                 // FCD boundary after this character.
366                 iter.nextCodePoint();
367                 break;
368             }
369             s.appendCodePoint(c);
370             if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
371                                 CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
372                 // Fails FCD check. Find the previous FCD boundary and normalize.
373                 while(fcd16 > 0xff) {
374                     c = iter.previousCodePoint();
375                     if(c < 0) { break; }
376                     fcd16 = nfcImpl.getFCD16(c);
377                     if(fcd16 == 0) {
378                         iter.nextCodePoint();
379                         break;
380                     }
381                     s.appendCodePoint(c);
382                 }
383                 s.reverse();
384                 normalize(s);
385                 limit = pos;
386                 start = pos - s.length();
387                 state = State.IN_NORM_ITER_AT_START;
388                 pos = normalized.length();
389                 return true;
390             }
391             nextCC = fcd16 >> 8;
392             if(nextCC == 0) {
393                 // FCD boundary before the following character.
394                 break;
395             }
396         }
397         start = pos - s.length();
398         assert(pos != start);
399         iter.moveIndex(s.length());
400         state = State.ITER_IN_FCD_SEGMENT;
401         return true;
402     }
403 
normalize(CharSequence s)404     private void normalize(CharSequence s) {
405         if(normalized == null) {
406             normalized = new StringBuilder();
407         }
408         // NFD without argument checking.
409         nfcImpl.decompose(s, normalized);
410     }
411 
412     private enum State {
413         /**
414          * The input text [start..(iter index)[ passes the FCD check.
415          * Moving forward checks incrementally.
416          * pos & limit are undefined.
417          */
418         ITER_CHECK_FWD,
419         /**
420          * The input text [(iter index)..limit[ passes the FCD check.
421          * Moving backward checks incrementally.
422          * start & pos are undefined.
423          */
424         ITER_CHECK_BWD,
425         /**
426          * The input text [start..limit[ passes the FCD check.
427          * pos tracks the current text index.
428          */
429         ITER_IN_FCD_SEGMENT,
430         /**
431          * The input text [start..limit[ failed the FCD check and was normalized.
432          * pos tracks the current index in the normalized string.
433          * The text iterator is at the limit index.
434          */
435         IN_NORM_ITER_AT_LIMIT,
436         /**
437          * The input text [start..limit[ failed the FCD check and was normalized.
438          * pos tracks the current index in the normalized string.
439          * The text iterator is at the start index.
440          */
441         IN_NORM_ITER_AT_START
442     }
443 
444     private State state;
445 
446     private int start;
447     private int pos;
448     private int limit;
449 
450     private final Normalizer2Impl nfcImpl;
451     private StringBuilder s;
452     private StringBuilder normalized;
453 }
454