1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2010, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 12 import java.util.List; 13 14 import ohos.global.icu.impl.Utility; 15 import ohos.global.icu.impl.UtilityExtensions; 16 17 /** 18 * A transliterator that is composed of two or more other 19 * transliterator objects linked together. For example, if one 20 * transliterator transliterates from script A to script B, and 21 * another transliterates from script B to script C, the two may be 22 * combined to form a new transliterator from A to C. 23 * 24 * <p>Composed transliterators may not behave as expected. For 25 * example, inverses may not combine to form the identity 26 * transliterator. See the class documentation for {@link 27 * Transliterator} for details. 28 * 29 * <p>Copyright © IBM Corporation 1999. All rights reserved. 30 * 31 * @author Alan Liu 32 */ 33 class CompoundTransliterator extends Transliterator { 34 35 private Transliterator[] trans; 36 37 private int numAnonymousRBTs = 0; 38 39 /** 40 * Constructs a new compound transliterator given an array of 41 * transliterators. The array of transliterators may be of any 42 * length, including zero or one, however, useful compound 43 * transliterators have at least two components. 44 * @param transliterators array of <code>Transliterator</code> 45 * objects 46 * @param filter the filter. Any character for which 47 * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 48 * altered by this transliterator. If <tt>filter</tt> is 49 * <tt>null</tt> then no filtering is applied. 50 */ 51 /*public CompoundTransliterator(Transliterator[] transliterators, 52 UnicodeFilter filter) { 53 super(joinIDs(transliterators), filter); 54 trans = new Transliterator[transliterators.length]; 55 System.arraycopy(transliterators, 0, trans, 0, trans.length); 56 computeMaximumContextLength(); 57 }*/ 58 59 /** 60 * Constructs a new compound transliterator given an array of 61 * transliterators. The array of transliterators may be of any 62 * length, including zero or one, however, useful compound 63 * transliterators have at least two components. 64 * @param transliterators array of <code>Transliterator</code> 65 * objects 66 */ 67 /*public CompoundTransliterator(Transliterator[] transliterators) { 68 this(transliterators, null); 69 }*/ 70 71 /** 72 * Constructs a new compound transliterator. 73 * @param ID compound ID 74 * @param direction either Transliterator.FORWARD or Transliterator.REVERSE 75 * @param filter a global filter for this compound transliterator 76 * or null 77 */ 78 /*public CompoundTransliterator(String ID, int direction, 79 UnicodeFilter filter) { 80 super(ID, filter); 81 init(ID, direction, true); 82 }*/ 83 84 /** 85 * Constructs a new compound transliterator with no filter. 86 * @param ID compound ID 87 * @param direction either Transliterator.FORWARD or Transliterator.REVERSE 88 */ 89 /*public CompoundTransliterator(String ID, int direction) { 90 this(ID, direction, null); 91 }*/ 92 93 /** 94 * Constructs a new forward compound transliterator with no filter. 95 * @param ID compound ID 96 */ 97 /*public CompoundTransliterator(String ID) { 98 this(ID, FORWARD, null); 99 }*/ 100 101 /** 102 * Package private constructor for Transliterator from a vector of 103 * transliterators. The caller is responsible for fixing up the 104 * ID. 105 */ CompoundTransliterator(List<Transliterator> list)106 CompoundTransliterator(List<Transliterator> list) { 107 this(list, 0); 108 } 109 CompoundTransliterator(List<Transliterator> list, int numAnonymousRBTs)110 CompoundTransliterator(List<Transliterator> list, int numAnonymousRBTs) { 111 super("", null); 112 trans = null; 113 init(list, FORWARD, false); 114 this.numAnonymousRBTs = numAnonymousRBTs; 115 // assume caller will fixup ID 116 } 117 118 /** 119 * Internal method for safeClone... 120 * @param id 121 * @param filter2 122 * @param trans2 123 * @param numAnonymousRBTs2 124 */ CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2)125 CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2) { 126 super(id, filter2); 127 trans = trans2; 128 numAnonymousRBTs = numAnonymousRBTs2; 129 } 130 131 /** 132 * Finish constructing a transliterator: only to be called by 133 * constructors. Before calling init(), set trans and filter to NULL. 134 * @param id the id containing ';'-separated entries 135 * @param direction either FORWARD or REVERSE 136 * @param idSplitPoint the index into id at which the 137 * splitTrans should be inserted, if there is one, or 138 * -1 if there is none. 139 * @param splitTrans a transliterator to be inserted 140 * before the entry at offset idSplitPoint in the id string. May be 141 * NULL to insert no entry. 142 * @param fixReverseID if TRUE, then reconstruct the ID of reverse 143 * entries by calling getID() of component entries. Some constructors 144 * do not require this because they apply a facade ID anyway. 145 */ 146 /*private void init(String id, 147 int direction, 148 boolean fixReverseID) { 149 // assert(trans == 0); 150 151 Vector list = new Vector(); 152 UnicodeSet[] compoundFilter = new UnicodeSet[1]; 153 StringBuffer regenID = new StringBuffer(); 154 if (!TransliteratorIDParser.parseCompoundID(id, direction, 155 regenID, list, compoundFilter)) { 156 throw new IllegalArgumentException("Invalid ID " + id); 157 } 158 159 TransliteratorIDParser.instantiateList(list); 160 161 init(list, direction, fixReverseID); 162 163 if (compoundFilter[0] != null) { 164 setFilter(compoundFilter[0]); 165 } 166 }*/ 167 168 169 /** 170 * Finish constructing a transliterator: only to be called by 171 * constructors. Before calling init(), set trans and filter to NULL. 172 * @param list a vector of transliterator objects to be adopted. It 173 * should NOT be empty. The list should be in declared order. That 174 * is, it should be in the FORWARD order; if direction is REVERSE then 175 * the list order will be reversed. 176 * @param direction either FORWARD or REVERSE 177 * @param fixReverseID if TRUE, then reconstruct the ID of reverse 178 * entries by calling getID() of component entries. Some constructors 179 * do not require this because they apply a facade ID anyway. 180 */ init(List<Transliterator> list, int direction, boolean fixReverseID)181 private void init(List<Transliterator> list, 182 int direction, 183 boolean fixReverseID) { 184 // assert(trans == 0); 185 186 // Allocate array 187 int count = list.size(); 188 trans = new Transliterator[count]; 189 190 // Move the transliterators from the vector into an array. 191 // Reverse the order if necessary. 192 int i; 193 for (i=0; i<count; ++i) { 194 int j = (direction == FORWARD) ? i : count - 1 - i; 195 trans[i] = list.get(j); 196 } 197 198 // If the direction is UTRANS_REVERSE then we may need to fix the 199 // ID. 200 if (direction == REVERSE && fixReverseID) { 201 StringBuilder newID = new StringBuilder(); 202 for (i=0; i<count; ++i) { 203 if (i > 0) { 204 newID.append(ID_DELIM); 205 } 206 newID.append(trans[i].getID()); 207 } 208 setID(newID.toString()); 209 } 210 211 computeMaximumContextLength(); 212 } 213 214 /** 215 * Return the IDs of the given list of transliterators, concatenated 216 * with ';' delimiting them. Equivalent to the perlish expression 217 * join(';', map($_.getID(), transliterators). 218 */ 219 /*private static String joinIDs(Transliterator[] transliterators) { 220 StringBuffer id = new StringBuffer(); 221 for (int i=0; i<transliterators.length; ++i) { 222 if (i > 0) { 223 id.append(';'); 224 } 225 id.append(transliterators[i].getID()); 226 } 227 return id.toString(); 228 }*/ 229 230 /** 231 * Returns the number of transliterators in this chain. 232 * @return number of transliterators in this chain. 233 */ getCount()234 public int getCount() { 235 return trans.length; 236 } 237 238 /** 239 * Returns the transliterator at the given index in this chain. 240 * @param index index into chain, from 0 to <code>getCount() - 1</code> 241 * @return transliterator at the given index 242 */ getTransliterator(int index)243 public Transliterator getTransliterator(int index) { 244 return trans[index]; 245 } 246 247 /** 248 * Append c to buf, unless buf is empty or buf already ends in c. 249 */ _smartAppend(StringBuilder buf, char c)250 private static void _smartAppend(StringBuilder buf, char c) { 251 if (buf.length() != 0 && 252 buf.charAt(buf.length() - 1) != c) { 253 buf.append(c); 254 } 255 } 256 257 /** 258 * Override Transliterator: 259 * Create a rule string that can be passed to createFromRules() 260 * to recreate this transliterator. 261 * @param escapeUnprintable if TRUE then convert unprintable 262 * character to their hex escape representations, \\uxxxx or 263 * \\Uxxxxxxxx. Unprintable characters are those other than 264 * U+000A, U+0020..U+007E. 265 * @return the rule string 266 */ 267 @Override toRules(boolean escapeUnprintable)268 public String toRules(boolean escapeUnprintable) { 269 // We do NOT call toRules() on our component transliterators, in 270 // general. If we have several rule-based transliterators, this 271 // yields a concatenation of the rules -- not what we want. We do 272 // handle compound RBT transliterators specially -- those for which 273 // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, 274 // we do call toRules() recursively. 275 StringBuilder rulesSource = new StringBuilder(); 276 if (numAnonymousRBTs >= 1 && getFilter() != null) { 277 // If we are a compound RBT and if we have a global 278 // filter, then emit it at the top. 279 rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM); 280 } 281 for (int i=0; i<trans.length; ++i) { 282 String rule; 283 284 // Anonymous RuleBasedTransliterators (inline rules and 285 // ::BEGIN/::END blocks) are given IDs that begin with 286 // "%Pass": use toRules() to write all the rules to the output 287 // (and insert "::Null;" if we have two in a row) 288 if (trans[i].getID().startsWith("%Pass")) { 289 rule = trans[i].toRules(escapeUnprintable); 290 if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1].getID().startsWith("%Pass")) 291 rule = "::Null;" + rule; 292 293 // we also use toRules() on CompoundTransliterators (which we 294 // check for by looking for a semicolon in the ID)-- this gets 295 // the list of their child transliterators output in the right 296 // format 297 } else if (trans[i].getID().indexOf(';') >= 0) { 298 rule = trans[i].toRules(escapeUnprintable); 299 300 // for everything else, use baseToRules() 301 } else { 302 rule = trans[i].baseToRules(escapeUnprintable); 303 } 304 _smartAppend(rulesSource, '\n'); 305 rulesSource.append(rule); 306 _smartAppend(rulesSource, ID_DELIM); 307 } 308 return rulesSource.toString(); 309 } 310 311 /** 312 * @hide draft / provisional / internal are hidden on OHOS 313 */ 314 @Override addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)315 public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { 316 UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter)); 317 UnicodeSet tempTargetSet = new UnicodeSet(); 318 for (int i=0; i<trans.length; ++i) { 319 // each time we produce targets, those can be used by subsequent items, despite the filter. 320 // so we get just those items, and add them to the filter each time. 321 tempTargetSet.clear(); 322 trans[i].addSourceTargetSet(myFilter, sourceSet, tempTargetSet); 323 targetSet.addAll(tempTargetSet); 324 myFilter.addAll(tempTargetSet); 325 } 326 } 327 328 // /** 329 // * Returns the set of all characters that may be generated as 330 // * replacement text by this transliterator. 331 // */ 332 // public UnicodeSet getTargetSet() { 333 // UnicodeSet set = new UnicodeSet(); 334 // for (int i=0; i<trans.length; ++i) { 335 // // This is a heuristic, and not 100% reliable. 336 // set.addAll(trans[i].getTargetSet()); 337 // } 338 // return set; 339 // } 340 341 /** 342 * Implements {@link Transliterator#handleTransliterate}. 343 */ 344 @Override handleTransliterate(Replaceable text, Position index, boolean incremental)345 protected void handleTransliterate(Replaceable text, 346 Position index, boolean incremental) { 347 /* Call each transliterator with the same start value and 348 * initial cursor index, but with the limit index as modified 349 * by preceding transliterators. The cursor index must be 350 * reset for each transliterator to give each a chance to 351 * transliterate the text. The initial cursor index is known 352 * to still point to the same place after each transliterator 353 * is called because each transliterator will not change the 354 * text between start and the initial value of cursor. 355 * 356 * IMPORTANT: After the first transliterator, each subsequent 357 * transliterator only gets to transliterate text committed by 358 * preceding transliterators; that is, the cursor (output 359 * value) of transliterator i becomes the limit (input value) 360 * of transliterator i+1. Finally, the overall limit is fixed 361 * up before we return. 362 * 363 * Assumptions we make here: 364 * (1) contextStart <= start <= limit <= contextLimit <= text.length() 365 * (2) start <= start' <= limit' ;cursor doesn't move back 366 * (3) start <= limit' ;text before cursor unchanged 367 * - start' is the value of start after calling handleKT 368 * - limit' is the value of limit after calling handleKT 369 */ 370 371 /** 372 * Example: 3 transliterators. This example illustrates the 373 * mechanics we need to implement. C, S, and L are the contextStart, 374 * start, and limit. gl is the globalLimit. contextLimit is 375 * equal to limit throughout. 376 * 377 * 1. h-u, changes hex to Unicode 378 * 379 * 4 7 a d 0 4 7 a 380 * abc/u0061/u => abca/u 381 * C S L C S L gl=f->a 382 * 383 * 2. upup, changes "x" to "XX" 384 * 385 * 4 7 a 4 7 a 386 * abca/u => abcAA/u 387 * C SL C S 388 * L gl=a->b 389 * 3. u-h, changes Unicode to hex 390 * 391 * 4 7 a 4 7 a d 0 3 392 * abcAA/u => abc/u0041/u0041/u 393 * C S L C S 394 * L gl=b->15 395 * 4. return 396 * 397 * 4 7 a d 0 3 398 * abc/u0041/u0041/u 399 * C S L 400 */ 401 402 if (trans.length < 1) { 403 index.start = index.limit; 404 return; // Short circuit for empty compound transliterators 405 } 406 407 // compoundLimit is the limit value for the entire compound 408 // operation. We overwrite index.limit with the previous 409 // index.start. After each transliteration, we update 410 // compoundLimit for insertions or deletions that have happened. 411 int compoundLimit = index.limit; 412 413 // compoundStart is the start for the entire compound 414 // operation. 415 int compoundStart = index.start; 416 417 int delta = 0; // delta in length 418 419 StringBuffer log = null; 420 ///CLOVER:OFF 421 if (DEBUG) { 422 log = new StringBuffer("CompoundTransliterator{" + getID() + 423 (incremental ? "}i: IN=" : "}: IN=")); 424 UtilityExtensions.formatInput(log, text, index); 425 System.out.println(Utility.escape(log.toString())); 426 } 427 ///CLOVER:ON 428 429 // Give each transliterator a crack at the run of characters. 430 // See comments at the top of the method for more detail. 431 for (int i=0; i<trans.length; ++i) { 432 index.start = compoundStart; // Reset start 433 int limit = index.limit; 434 435 if (index.start == index.limit) { 436 // Short circuit for empty range 437 ///CLOVER:OFF 438 if (DEBUG) { 439 System.out.println("CompoundTransliterator[" + i + 440 ".." + (trans.length-1) + 441 (incremental ? "]i: " : "]: ") + 442 UtilityExtensions.formatInput(text, index) + 443 " (NOTHING TO DO)"); 444 } 445 ///CLOVER:ON 446 break; 447 } 448 449 ///CLOVER:OFF 450 if (DEBUG) { 451 log.setLength(0); 452 log.append("CompoundTransliterator[" + i + "=" + 453 trans[i].getID() + 454 (incremental ? "]i: " : "]: ")); 455 UtilityExtensions.formatInput(log, text, index); 456 } 457 ///CLOVER:ON 458 459 trans[i].filteredTransliterate(text, index, incremental); 460 461 // In a properly written transliterator, start == limit after 462 // handleTransliterate() returns when incremental is false. 463 // Catch cases where the subclass doesn't do this, and throw 464 // an exception. (Just pinning start to limit is a bad idea, 465 // because what's probably happening is that the subclass 466 // isn't transliterating all the way to the end, and it should 467 // in non-incremental mode.) 468 if (!incremental && index.start != index.limit) { 469 throw new RuntimeException("ERROR: Incomplete non-incremental transliteration by " + trans[i].getID()); 470 } 471 472 ///CLOVER:OFF 473 if (DEBUG) { 474 log.append(" => "); 475 UtilityExtensions.formatInput(log, text, index); 476 System.out.println(Utility.escape(log.toString())); 477 } 478 ///CLOVER:ON 479 480 // Cumulative delta for insertions/deletions 481 delta += index.limit - limit; 482 483 if (incremental) { 484 // In the incremental case, only allow subsequent 485 // transliterators to modify what has already been 486 // completely processed by prior transliterators. In the 487 // non-incrmental case, allow each transliterator to 488 // process the entire text. 489 index.limit = index.start; 490 } 491 } 492 493 compoundLimit += delta; 494 495 // Start is good where it is -- where the last transliterator left 496 // it. Limit needs to be put back where it was, modulo 497 // adjustments for deletions/insertions. 498 index.limit = compoundLimit; 499 500 ///CLOVER:OFF 501 if (DEBUG) { 502 log.setLength(0); 503 log.append("CompoundTransliterator{" + getID() + 504 (incremental ? "}i: OUT=" : "}: OUT=")); 505 UtilityExtensions.formatInput(log, text, index); 506 System.out.println(Utility.escape(log.toString())); 507 } 508 ///CLOVER:ON 509 } 510 511 /** 512 * Compute and set the length of the longest context required by this transliterator. 513 * This is <em>preceding</em> context. 514 */ computeMaximumContextLength()515 private void computeMaximumContextLength() { 516 int max = 0; 517 for (int i=0; i<trans.length; ++i) { 518 int len = trans[i].getMaximumContextLength(); 519 if (len > max) { 520 max = len; 521 } 522 } 523 setMaximumContextLength(max); 524 } 525 526 /** 527 * Temporary hack for registry problem. Needs to be replaced by better architecture. 528 */ safeClone()529 public Transliterator safeClone() { 530 UnicodeFilter filter = getFilter(); 531 if (filter != null && filter instanceof UnicodeSet) { 532 filter = new UnicodeSet((UnicodeSet)filter); 533 } 534 return new CompoundTransliterator(getID(), filter, trans, numAnonymousRBTs); 535 } 536 } 537