1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2013-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * ContractionsAndExpansions.java, ported from collationsets.h/.cpp 10 * 11 * C++ version created on: 2013feb09 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 import java.util.Iterator; 18 19 import ohos.global.icu.impl.Trie2; 20 import ohos.global.icu.text.UnicodeSet; 21 import ohos.global.icu.util.CharsTrie; 22 import ohos.global.icu.util.CharsTrie.Entry; 23 24 /** 25 * @hide exposed on OHOS 26 */ 27 public final class ContractionsAndExpansions { 28 // C++: The following fields are @internal, only public for access by callback. 29 private CollationData data; 30 private UnicodeSet contractions; 31 private UnicodeSet expansions; 32 private CESink sink; 33 private boolean addPrefixes; 34 private int checkTailored = 0; // -1: collected tailored +1: exclude tailored 35 private UnicodeSet tailored = new UnicodeSet(); 36 private UnicodeSet ranges; 37 private StringBuilder unreversedPrefix = new StringBuilder(); 38 private String suffix; 39 private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH]; 40 41 /** 42 * @hide exposed on OHOS 43 */ 44 public static interface CESink { handleCE(long ce)45 void handleCE(long ce); handleExpansion(long ces[], int start, int length)46 void handleExpansion(long ces[], int start, int length); 47 } 48 ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes)49 public ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes) { 50 contractions = con; 51 expansions = exp; 52 sink = s; 53 addPrefixes = prefixes; 54 } 55 forData(CollationData d)56 public void forData(CollationData d) { 57 // Add all from the data, can be tailoring or base. 58 if (d.base != null) { 59 checkTailored = -1; 60 } 61 data = d; 62 Iterator<Trie2.Range> trieIterator = data.trie.iterator(); 63 Trie2.Range range; 64 while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) { 65 enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this); 66 } 67 if (d.base == null) { 68 return; 69 } 70 // Add all from the base data but only for un-tailored code points. 71 tailored.freeze(); 72 checkTailored = 1; 73 data = d.base; 74 trieIterator = data.trie.iterator(); 75 while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) { 76 enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this); 77 } 78 } 79 enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne)80 private void enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne) { 81 if (cne.checkTailored == 0) { 82 // There is no tailoring. 83 // No need to collect nor check the tailored set. 84 } else if (cne.checkTailored < 0) { 85 // Collect the set of code points with mappings in the tailoring data. 86 if (ce32 == Collation.FALLBACK_CE32) { 87 return; // fallback to base, not tailored 88 } else { 89 cne.tailored.add(start, end); 90 } 91 // checkTailored > 0: Exclude tailored ranges from the base data enumeration. 92 } else if (start == end) { 93 if (cne.tailored.contains(start)) { 94 return; 95 } 96 } else if (cne.tailored.containsSome(start, end)) { 97 if (cne.ranges == null) { 98 cne.ranges = new UnicodeSet(); 99 } 100 cne.ranges.set(start, end).removeAll(cne.tailored); 101 int count = cne.ranges.getRangeCount(); 102 for (int i = 0; i < count; ++i) { 103 cne.handleCE32(cne.ranges.getRangeStart(i), cne.ranges.getRangeEnd(i), ce32); 104 } 105 } 106 cne.handleCE32(start, end, ce32); 107 } 108 forCodePoint(CollationData d, int c)109 public void forCodePoint(CollationData d, int c) { 110 int ce32 = d.getCE32(c); 111 if (ce32 == Collation.FALLBACK_CE32) { 112 d = d.base; 113 ce32 = d.getCE32(c); 114 } 115 data = d; 116 handleCE32(c, c, ce32); 117 } 118 handleCE32(int start, int end, int ce32)119 private void handleCE32(int start, int end, int ce32) { 120 for (;;) { 121 if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) { 122 // !isSpecialCE32() 123 if (sink != null) { 124 sink.handleCE(Collation.ceFromSimpleCE32(ce32)); 125 } 126 return; 127 } 128 switch (Collation.tagFromCE32(ce32)) { 129 case Collation.FALLBACK_TAG: 130 return; 131 case Collation.RESERVED_TAG_3: 132 case Collation.BUILDER_DATA_TAG: 133 case Collation.LEAD_SURROGATE_TAG: 134 // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C. 135 throw new AssertionError( 136 String.format("Unexpected CE32 tag type %d for ce32=0x%08x", 137 Collation.tagFromCE32(ce32), ce32)); 138 case Collation.LONG_PRIMARY_TAG: 139 if (sink != null) { 140 sink.handleCE(Collation.ceFromLongPrimaryCE32(ce32)); 141 } 142 return; 143 case Collation.LONG_SECONDARY_TAG: 144 if (sink != null) { 145 sink.handleCE(Collation.ceFromLongSecondaryCE32(ce32)); 146 } 147 return; 148 case Collation.LATIN_EXPANSION_TAG: 149 if (sink != null) { 150 ces[0] = Collation.latinCE0FromCE32(ce32); 151 ces[1] = Collation.latinCE1FromCE32(ce32); 152 sink.handleExpansion(ces, 0, 2); 153 } 154 // Optimization: If we have a prefix, 155 // then the relevant strings have been added already. 156 if (unreversedPrefix.length() == 0) { 157 addExpansions(start, end); 158 } 159 return; 160 case Collation.EXPANSION32_TAG: 161 if (sink != null) { 162 int idx = Collation.indexFromCE32(ce32); 163 int length = Collation.lengthFromCE32(ce32); 164 for (int i = 0; i < length; ++i) { 165 ces[i] = Collation.ceFromCE32(data.ce32s[idx + i]); 166 } 167 sink.handleExpansion(ces, 0, length); 168 } 169 // Optimization: If we have a prefix, 170 // then the relevant strings have been added already. 171 if (unreversedPrefix.length() == 0) { 172 addExpansions(start, end); 173 } 174 return; 175 case Collation.EXPANSION_TAG: 176 if (sink != null) { 177 int idx = Collation.indexFromCE32(ce32); 178 int length = Collation.lengthFromCE32(ce32); 179 sink.handleExpansion(data.ces, idx, length); 180 } 181 // Optimization: If we have a prefix, 182 // then the relevant strings have been added already. 183 if (unreversedPrefix.length() == 0) { 184 addExpansions(start, end); 185 } 186 return; 187 case Collation.PREFIX_TAG: 188 handlePrefixes(start, end, ce32); 189 return; 190 case Collation.CONTRACTION_TAG: 191 handleContractions(start, end, ce32); 192 return; 193 case Collation.DIGIT_TAG: 194 // Fetch the non-numeric-collation CE32 and continue. 195 ce32 = data.ce32s[Collation.indexFromCE32(ce32)]; 196 break; 197 case Collation.U0000_TAG: 198 assert (start == 0 && end == 0); 199 // Fetch the normal ce32 for U+0000 and continue. 200 ce32 = data.ce32s[0]; 201 break; 202 case Collation.HANGUL_TAG: 203 if (sink != null) { 204 // TODO: This should be optimized, 205 // especially if [start..end] is the complete Hangul range. (assert that) 206 UTF16CollationIterator iter = new UTF16CollationIterator(data); 207 StringBuilder hangul = new StringBuilder(1); 208 for (int c = start; c <= end; ++c) { 209 hangul.setLength(0); 210 hangul.appendCodePoint(c); 211 iter.setText(false, hangul, 0); 212 int length = iter.fetchCEs(); 213 // Ignore the terminating non-CE. 214 assert (length >= 2 && iter.getCE(length - 1) == Collation.NO_CE); 215 sink.handleExpansion(iter.getCEs(), 0, length - 1); 216 } 217 } 218 // Optimization: If we have a prefix, 219 // then the relevant strings have been added already. 220 if (unreversedPrefix.length() == 0) { 221 addExpansions(start, end); 222 } 223 return; 224 case Collation.OFFSET_TAG: 225 // Currently no need to send offset CEs to the sink. 226 return; 227 case Collation.IMPLICIT_TAG: 228 // Currently no need to send implicit CEs to the sink. 229 return; 230 } 231 } 232 } 233 handlePrefixes(int start, int end, int ce32)234 private void handlePrefixes(int start, int end, int ce32) { 235 int index = Collation.indexFromCE32(ce32); 236 ce32 = data.getCE32FromContexts(index); // Default if no prefix match. 237 handleCE32(start, end, ce32); 238 if (!addPrefixes) { 239 return; 240 } 241 CharsTrie.Iterator prefixes = new CharsTrie(data.contexts, index + 2).iterator(); 242 while (prefixes.hasNext()) { 243 Entry e = prefixes.next(); 244 setPrefix(e.chars); 245 // Prefix/pre-context mappings are special kinds of contractions 246 // that always yield expansions. 247 addStrings(start, end, contractions); 248 addStrings(start, end, expansions); 249 handleCE32(start, end, e.value); 250 } 251 resetPrefix(); 252 } 253 handleContractions(int start, int end, int ce32)254 void handleContractions(int start, int end, int ce32) { 255 int index = Collation.indexFromCE32(ce32); 256 if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) { 257 // No match on the single code point. 258 // We are underneath a prefix, and the default mapping is just 259 // a fallback to the mappings for a shorter prefix. 260 assert (unreversedPrefix.length() != 0); 261 } else { 262 ce32 = data.getCE32FromContexts(index); // Default if no suffix match. 263 assert (!Collation.isContractionCE32(ce32)); 264 handleCE32(start, end, ce32); 265 } 266 CharsTrie.Iterator suffixes = new CharsTrie(data.contexts, index + 2).iterator(); 267 while (suffixes.hasNext()) { 268 Entry e = suffixes.next(); 269 suffix = e.chars.toString(); 270 addStrings(start, end, contractions); 271 if (unreversedPrefix.length() != 0) { 272 addStrings(start, end, expansions); 273 } 274 handleCE32(start, end, e.value); 275 } 276 suffix = null; 277 } 278 addExpansions(int start, int end)279 void addExpansions(int start, int end) { 280 if (unreversedPrefix.length() == 0 && suffix == null) { 281 if (expansions != null) { 282 expansions.add(start, end); 283 } 284 } else { 285 addStrings(start, end, expansions); 286 } 287 } 288 addStrings(int start, int end, UnicodeSet set)289 void addStrings(int start, int end, UnicodeSet set) { 290 if (set == null) { 291 return; 292 } 293 StringBuilder s = new StringBuilder(unreversedPrefix); 294 do { 295 s.appendCodePoint(start); 296 if (suffix != null) { 297 s.append(suffix); 298 } 299 set.add(s); 300 s.setLength(unreversedPrefix.length()); 301 } while (++start <= end); 302 } 303 304 // Prefixes are reversed in the data structure. setPrefix(CharSequence pfx)305 private void setPrefix(CharSequence pfx) { 306 unreversedPrefix.setLength(0); 307 unreversedPrefix.append(pfx).reverse(); 308 } 309 resetPrefix()310 private void resetPrefix() { 311 unreversedPrefix.setLength(0); 312 } 313 }