• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2013-2014, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 * ContractionsAndExpansions.java, ported from collationsets.h/.cpp
10 *
11 * C++ version created on: 2013feb09
12 * created by: Markus W. Scherer
13 */
14 
15 package ohos.global.icu.impl.coll;
16 
17 import java.util.Iterator;
18 
19 import ohos.global.icu.impl.Trie2;
20 import ohos.global.icu.text.UnicodeSet;
21 import ohos.global.icu.util.CharsTrie;
22 import ohos.global.icu.util.CharsTrie.Entry;
23 
24 /**
25  * @hide exposed on OHOS
26  */
27 public final class ContractionsAndExpansions {
28     // C++: The following fields are @internal, only public for access by callback.
29     private CollationData data;
30     private UnicodeSet contractions;
31     private UnicodeSet expansions;
32     private CESink sink;
33     private boolean addPrefixes;
34     private int checkTailored = 0;  // -1: collected tailored  +1: exclude tailored
35     private UnicodeSet tailored = new UnicodeSet();
36     private UnicodeSet ranges;
37     private StringBuilder unreversedPrefix = new StringBuilder();
38     private String suffix;
39     private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
40 
41     /**
42      * @hide exposed on OHOS
43      */
44     public static interface CESink {
handleCE(long ce)45         void handleCE(long ce);
handleExpansion(long ces[], int start, int length)46         void handleExpansion(long ces[], int start, int length);
47     }
48 
ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes)49     public ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes) {
50         contractions = con;
51         expansions = exp;
52         sink = s;
53         addPrefixes = prefixes;
54     }
55 
forData(CollationData d)56     public void forData(CollationData d) {
57         // Add all from the data, can be tailoring or base.
58         if (d.base != null) {
59             checkTailored = -1;
60         }
61         data = d;
62         Iterator<Trie2.Range> trieIterator = data.trie.iterator();
63         Trie2.Range range;
64         while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
65             enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
66         }
67         if (d.base == null) {
68             return;
69         }
70         // Add all from the base data but only for un-tailored code points.
71         tailored.freeze();
72         checkTailored = 1;
73         data = d.base;
74         trieIterator = data.trie.iterator();
75         while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
76             enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this);
77         }
78     }
79 
enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne)80     private void enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne) {
81         if (cne.checkTailored == 0) {
82             // There is no tailoring.
83             // No need to collect nor check the tailored set.
84         } else if (cne.checkTailored < 0) {
85             // Collect the set of code points with mappings in the tailoring data.
86             if (ce32 == Collation.FALLBACK_CE32) {
87                 return; // fallback to base, not tailored
88             } else {
89                 cne.tailored.add(start, end);
90             }
91             // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
92         } else if (start == end) {
93             if (cne.tailored.contains(start)) {
94                 return;
95             }
96         } else if (cne.tailored.containsSome(start, end)) {
97             if (cne.ranges == null) {
98                 cne.ranges = new UnicodeSet();
99             }
100             cne.ranges.set(start, end).removeAll(cne.tailored);
101             int count = cne.ranges.getRangeCount();
102             for (int i = 0; i < count; ++i) {
103                 cne.handleCE32(cne.ranges.getRangeStart(i), cne.ranges.getRangeEnd(i), ce32);
104             }
105         }
106         cne.handleCE32(start, end, ce32);
107     }
108 
forCodePoint(CollationData d, int c)109     public void forCodePoint(CollationData d, int c) {
110         int ce32 = d.getCE32(c);
111         if (ce32 == Collation.FALLBACK_CE32) {
112             d = d.base;
113             ce32 = d.getCE32(c);
114         }
115         data = d;
116         handleCE32(c, c, ce32);
117     }
118 
handleCE32(int start, int end, int ce32)119     private void handleCE32(int start, int end, int ce32) {
120         for (;;) {
121             if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) {
122                 // !isSpecialCE32()
123                 if (sink != null) {
124                     sink.handleCE(Collation.ceFromSimpleCE32(ce32));
125                 }
126                 return;
127             }
128             switch (Collation.tagFromCE32(ce32)) {
129             case Collation.FALLBACK_TAG:
130                 return;
131             case Collation.RESERVED_TAG_3:
132             case Collation.BUILDER_DATA_TAG:
133             case Collation.LEAD_SURROGATE_TAG:
134                 // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C.
135                 throw new AssertionError(
136                         String.format("Unexpected CE32 tag type %d for ce32=0x%08x",
137                                 Collation.tagFromCE32(ce32), ce32));
138             case Collation.LONG_PRIMARY_TAG:
139                 if (sink != null) {
140                     sink.handleCE(Collation.ceFromLongPrimaryCE32(ce32));
141                 }
142                 return;
143             case Collation.LONG_SECONDARY_TAG:
144                 if (sink != null) {
145                     sink.handleCE(Collation.ceFromLongSecondaryCE32(ce32));
146                 }
147                 return;
148             case Collation.LATIN_EXPANSION_TAG:
149                 if (sink != null) {
150                     ces[0] = Collation.latinCE0FromCE32(ce32);
151                     ces[1] = Collation.latinCE1FromCE32(ce32);
152                     sink.handleExpansion(ces, 0, 2);
153                 }
154                 // Optimization: If we have a prefix,
155                 // then the relevant strings have been added already.
156                 if (unreversedPrefix.length() == 0) {
157                     addExpansions(start, end);
158                 }
159                 return;
160             case Collation.EXPANSION32_TAG:
161                 if (sink != null) {
162                     int idx = Collation.indexFromCE32(ce32);
163                     int length = Collation.lengthFromCE32(ce32);
164                     for (int i = 0; i < length; ++i) {
165                         ces[i] = Collation.ceFromCE32(data.ce32s[idx + i]);
166                     }
167                     sink.handleExpansion(ces, 0, length);
168                 }
169                 // Optimization: If we have a prefix,
170                 // then the relevant strings have been added already.
171                 if (unreversedPrefix.length() == 0) {
172                     addExpansions(start, end);
173                 }
174                 return;
175             case Collation.EXPANSION_TAG:
176                 if (sink != null) {
177                     int idx = Collation.indexFromCE32(ce32);
178                     int length = Collation.lengthFromCE32(ce32);
179                     sink.handleExpansion(data.ces, idx, length);
180                 }
181                 // Optimization: If we have a prefix,
182                 // then the relevant strings have been added already.
183                 if (unreversedPrefix.length() == 0) {
184                     addExpansions(start, end);
185                 }
186                 return;
187             case Collation.PREFIX_TAG:
188                 handlePrefixes(start, end, ce32);
189                 return;
190             case Collation.CONTRACTION_TAG:
191                 handleContractions(start, end, ce32);
192                 return;
193             case Collation.DIGIT_TAG:
194                 // Fetch the non-numeric-collation CE32 and continue.
195                 ce32 = data.ce32s[Collation.indexFromCE32(ce32)];
196                 break;
197             case Collation.U0000_TAG:
198                 assert (start == 0 && end == 0);
199                 // Fetch the normal ce32 for U+0000 and continue.
200                 ce32 = data.ce32s[0];
201                 break;
202             case Collation.HANGUL_TAG:
203                 if (sink != null) {
204                     // TODO: This should be optimized,
205                     // especially if [start..end] is the complete Hangul range. (assert that)
206                     UTF16CollationIterator iter = new UTF16CollationIterator(data);
207                     StringBuilder hangul = new StringBuilder(1);
208                     for (int c = start; c <= end; ++c) {
209                         hangul.setLength(0);
210                         hangul.appendCodePoint(c);
211                         iter.setText(false, hangul, 0);
212                         int length = iter.fetchCEs();
213                         // Ignore the terminating non-CE.
214                         assert (length >= 2 && iter.getCE(length - 1) == Collation.NO_CE);
215                         sink.handleExpansion(iter.getCEs(), 0, length - 1);
216                     }
217                 }
218                 // Optimization: If we have a prefix,
219                 // then the relevant strings have been added already.
220                 if (unreversedPrefix.length() == 0) {
221                     addExpansions(start, end);
222                 }
223                 return;
224             case Collation.OFFSET_TAG:
225                 // Currently no need to send offset CEs to the sink.
226                 return;
227             case Collation.IMPLICIT_TAG:
228                 // Currently no need to send implicit CEs to the sink.
229                 return;
230             }
231         }
232     }
233 
handlePrefixes(int start, int end, int ce32)234     private void handlePrefixes(int start, int end, int ce32) {
235         int index = Collation.indexFromCE32(ce32);
236         ce32 = data.getCE32FromContexts(index); // Default if no prefix match.
237         handleCE32(start, end, ce32);
238         if (!addPrefixes) {
239             return;
240         }
241         CharsTrie.Iterator prefixes = new CharsTrie(data.contexts, index + 2).iterator();
242         while (prefixes.hasNext()) {
243             Entry e = prefixes.next();
244             setPrefix(e.chars);
245             // Prefix/pre-context mappings are special kinds of contractions
246             // that always yield expansions.
247             addStrings(start, end, contractions);
248             addStrings(start, end, expansions);
249             handleCE32(start, end, e.value);
250         }
251         resetPrefix();
252     }
253 
handleContractions(int start, int end, int ce32)254     void handleContractions(int start, int end, int ce32) {
255         int index = Collation.indexFromCE32(ce32);
256         if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
257             // No match on the single code point.
258             // We are underneath a prefix, and the default mapping is just
259             // a fallback to the mappings for a shorter prefix.
260             assert (unreversedPrefix.length() != 0);
261         } else {
262             ce32 = data.getCE32FromContexts(index); // Default if no suffix match.
263             assert (!Collation.isContractionCE32(ce32));
264             handleCE32(start, end, ce32);
265         }
266         CharsTrie.Iterator suffixes = new CharsTrie(data.contexts, index + 2).iterator();
267         while (suffixes.hasNext()) {
268             Entry e = suffixes.next();
269             suffix = e.chars.toString();
270             addStrings(start, end, contractions);
271             if (unreversedPrefix.length() != 0) {
272                 addStrings(start, end, expansions);
273             }
274             handleCE32(start, end, e.value);
275         }
276         suffix = null;
277     }
278 
addExpansions(int start, int end)279     void addExpansions(int start, int end) {
280         if (unreversedPrefix.length() == 0 && suffix == null) {
281             if (expansions != null) {
282                 expansions.add(start, end);
283             }
284         } else {
285             addStrings(start, end, expansions);
286         }
287     }
288 
addStrings(int start, int end, UnicodeSet set)289     void addStrings(int start, int end, UnicodeSet set) {
290         if (set == null) {
291             return;
292         }
293         StringBuilder s = new StringBuilder(unreversedPrefix);
294         do {
295             s.appendCodePoint(start);
296             if (suffix != null) {
297                 s.append(suffix);
298             }
299             set.add(s);
300             s.setLength(unreversedPrefix.length());
301         } while (++start <= end);
302     }
303 
304     // Prefixes are reversed in the data structure.
setPrefix(CharSequence pfx)305     private void setPrefix(CharSequence pfx) {
306         unreversedPrefix.setLength(0);
307         unreversedPrefix.append(pfx).reverse();
308     }
309 
resetPrefix()310     private void resetPrefix() {
311         unreversedPrefix.setLength(0);
312     }
313 }