1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 12 import java.text.CharacterIterator; 13 14 import ohos.global.icu.lang.UCharacter; 15 import ohos.global.icu.util.ICUCloneNotSupportedException; 16 import ohos.global.icu.util.ULocale; 17 18 19 /** 20 * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter. 21 * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator. 22 */ 23 final class BreakTransliterator extends Transliterator { 24 private BreakIterator bi; 25 private String insertion; 26 private int[] boundaries = new int[50]; 27 private int boundaryCount = 0; 28 BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion)29 public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) { 30 super(ID, filter); 31 this.bi = bi; 32 this.insertion = insertion; 33 } 34 BreakTransliterator(String ID, UnicodeFilter filter)35 public BreakTransliterator(String ID, UnicodeFilter filter) { 36 this(ID, filter, null, " "); 37 } 38 39 ///CLOVER:OFF 40 // The following method is not called by anything and can't be reached getInsertion()41 public String getInsertion() { 42 return insertion; 43 } 44 ///CLOVER:ON 45 46 ///CLOVER:OFF 47 // The following method is not called by anything and can't be reached setInsertion(String insertion)48 public void setInsertion(String insertion) { 49 this.insertion = insertion; 50 } 51 ///CLOVER:ON 52 getBreakIterator()53 public BreakIterator getBreakIterator() { 54 // Defer initialization of BreakIterator because it is slow, 55 // typically over 2000 ms. 56 if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH")); 57 return bi; 58 } 59 60 ///CLOVER:OFF 61 // The following method is not called by anything and can't be reached setBreakIterator(BreakIterator bi)62 public void setBreakIterator(BreakIterator bi) { 63 this.bi = bi; 64 } 65 ///CLOVER:ON 66 67 static final int LETTER_OR_MARK_MASK = 68 (1<<Character.UPPERCASE_LETTER) 69 | (1<<Character.LOWERCASE_LETTER) 70 | (1<<Character.TITLECASE_LETTER) 71 | (1<<Character.MODIFIER_LETTER) 72 | (1<<Character.OTHER_LETTER) 73 | (1<<Character.COMBINING_SPACING_MARK) 74 | (1<<Character.NON_SPACING_MARK) 75 | (1<<Character.ENCLOSING_MARK) 76 ; 77 @Override handleTransliterate(Replaceable text, Position pos, boolean incremental)78 protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) { 79 boundaryCount = 0; 80 int boundary = 0; 81 getBreakIterator(); // Lazy-create it if necessary 82 bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start)); 83 // TODO: fix clumsy workaround used below. 84 /* 85 char[] tempBuffer = new char[text.length()]; 86 text.getChars(0, text.length(), tempBuffer, 0); 87 bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start)); 88 */ 89 // end debugging 90 91 // To make things much easier, we will stack the boundaries, and then insert at the end. 92 // generally, we won't need too many, since we will be filtered. 93 94 for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) { 95 if (boundary == 0) continue; 96 // HACK: Check to see that preceeding item was a letter 97 98 int cp = UTF16.charAt(text, boundary-1); 99 int type = UCharacter.getType(cp); 100 //System.out.println(Integer.toString(cp,16) + " (before): " + type); 101 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; 102 103 cp = UTF16.charAt(text, boundary); 104 type = UCharacter.getType(cp); 105 //System.out.println(Integer.toString(cp,16) + " (after): " + type); 106 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; 107 108 if (boundaryCount >= boundaries.length) { // realloc if necessary 109 int[] temp = new int[boundaries.length * 2]; 110 System.arraycopy(boundaries, 0, temp, 0, boundaries.length); 111 boundaries = temp; 112 } 113 114 boundaries[boundaryCount++] = boundary; 115 //System.out.println(boundary); 116 } 117 118 int delta = 0; 119 int lastBoundary = 0; 120 121 if (boundaryCount != 0) { // if we found something, adjust 122 delta = boundaryCount * insertion.length(); 123 lastBoundary = boundaries[boundaryCount-1]; 124 125 // we do this from the end backwards, so that we don't have to keep updating. 126 127 while (boundaryCount > 0) { 128 boundary = boundaries[--boundaryCount]; 129 text.replace(boundary, boundary, insertion); 130 } 131 } 132 133 // Now fix up the return values 134 pos.contextLimit += delta; 135 pos.limit += delta; 136 pos.start = incremental ? lastBoundary + delta : pos.limit; 137 } 138 139 140 /** 141 * Registers standard variants with the system. Called by 142 * Transliterator during initialization. 143 */ register()144 static void register() { 145 // false means that it is invisible 146 Transliterator trans = new BreakTransliterator("Any-BreakInternal", null); 147 Transliterator.registerInstance(trans, false); 148 /* 149 Transliterator.registerFactory("Any-Break", new Transliterator.Factory() { 150 public Transliterator getInstance(String ID) { 151 return new BreakTransliterator("Any-Break", null); 152 } 153 }); 154 */ 155 } 156 157 // Hack, just to get a real character iterator. 158 static final class ReplaceableCharacterIterator implements CharacterIterator 159 { 160 private Replaceable text; 161 private int begin; 162 private int end; 163 // invariant: begin <= pos <= end 164 private int pos; 165 166 /** 167 * Constructs an iterator with an initial index of 0. 168 */ 169 /*public ReplaceableCharacterIterator(Replaceable text) 170 { 171 this(text, 0); 172 }*/ 173 174 /** 175 * Constructs an iterator with the specified initial index. 176 * 177 * @param text The String to be iterated over 178 * @param pos Initial iterator position 179 */ 180 /*public ReplaceableCharacterIterator(Replaceable text, int pos) 181 { 182 this(text, 0, text.length(), pos); 183 }*/ 184 185 /** 186 * Constructs an iterator over the given range of the given string, with the 187 * index set at the specified position. 188 * 189 * @param text The String to be iterated over 190 * @param begin Index of the first character 191 * @param end Index of the character following the last character 192 * @param pos Initial iterator position 193 */ ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos)194 public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) { 195 if (text == null) { 196 throw new NullPointerException(); 197 } 198 this.text = text; 199 200 if (begin < 0 || begin > end || end > text.length()) { 201 throw new IllegalArgumentException("Invalid substring range"); 202 } 203 204 if (pos < begin || pos > end) { 205 throw new IllegalArgumentException("Invalid position"); 206 } 207 208 this.begin = begin; 209 this.end = end; 210 this.pos = pos; 211 } 212 213 /** 214 * Reset this iterator to point to a new string. This package-visible 215 * method is used by other java.text classes that want to avoid allocating 216 * new ReplaceableCharacterIterator objects every time their setText method 217 * is called. 218 * 219 * @param text The String to be iterated over 220 */ setText(Replaceable text)221 public void setText(Replaceable text) { 222 if (text == null) { 223 throw new NullPointerException(); 224 } 225 this.text = text; 226 this.begin = 0; 227 this.end = text.length(); 228 this.pos = 0; 229 } 230 231 /** 232 * Implements CharacterIterator.first() for String. 233 * @see CharacterIterator#first 234 */ 235 @Override first()236 public char first() 237 { 238 pos = begin; 239 return current(); 240 } 241 242 /** 243 * Implements CharacterIterator.last() for String. 244 * @see CharacterIterator#last 245 */ 246 @Override last()247 public char last() 248 { 249 if (end != begin) { 250 pos = end - 1; 251 } else { 252 pos = end; 253 } 254 return current(); 255 } 256 257 /** 258 * Implements CharacterIterator.setIndex() for String. 259 * @see CharacterIterator#setIndex 260 */ 261 @Override setIndex(int p)262 public char setIndex(int p) 263 { 264 if (p < begin || p > end) { 265 throw new IllegalArgumentException("Invalid index"); 266 } 267 pos = p; 268 return current(); 269 } 270 271 /** 272 * Implements CharacterIterator.current() for String. 273 * @see CharacterIterator#current 274 */ 275 @Override current()276 public char current() 277 { 278 if (pos >= begin && pos < end) { 279 return text.charAt(pos); 280 } 281 else { 282 return DONE; 283 } 284 } 285 286 /** 287 * Implements CharacterIterator.next() for String. 288 * @see CharacterIterator#next 289 */ 290 @Override next()291 public char next() 292 { 293 if (pos < end - 1) { 294 pos++; 295 return text.charAt(pos); 296 } 297 else { 298 pos = end; 299 return DONE; 300 } 301 } 302 303 /** 304 * Implements CharacterIterator.previous() for String. 305 * @see CharacterIterator#previous 306 */ 307 @Override previous()308 public char previous() 309 { 310 if (pos > begin) { 311 pos--; 312 return text.charAt(pos); 313 } 314 else { 315 return DONE; 316 } 317 } 318 319 /** 320 * Implements CharacterIterator.getBeginIndex() for String. 321 * @see CharacterIterator#getBeginIndex 322 */ 323 @Override getBeginIndex()324 public int getBeginIndex() 325 { 326 return begin; 327 } 328 329 /** 330 * Implements CharacterIterator.getEndIndex() for String. 331 * @see CharacterIterator#getEndIndex 332 */ 333 @Override getEndIndex()334 public int getEndIndex() 335 { 336 return end; 337 } 338 339 /** 340 * Implements CharacterIterator.getIndex() for String. 341 * @see CharacterIterator#getIndex 342 */ 343 @Override getIndex()344 public int getIndex() 345 { 346 return pos; 347 } 348 349 /** 350 * Compares the equality of two ReplaceableCharacterIterator objects. 351 * @param obj the ReplaceableCharacterIterator object to be compared with. 352 * @return true if the given obj is the same as this 353 * ReplaceableCharacterIterator object; false otherwise. 354 */ 355 @Override equals(Object obj)356 public boolean equals(Object obj) 357 { 358 if (this == obj) { 359 return true; 360 } 361 if (!(obj instanceof ReplaceableCharacterIterator)) { 362 return false; 363 } 364 365 ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj; 366 367 if (hashCode() != that.hashCode()) { 368 return false; 369 } 370 if (!text.equals(that.text)) { 371 return false; 372 } 373 if (pos != that.pos || begin != that.begin || end != that.end) { 374 return false; 375 } 376 return true; 377 } 378 379 /** 380 * Computes a hashcode for this iterator. 381 * @return A hash code 382 */ 383 @Override hashCode()384 public int hashCode() 385 { 386 return text.hashCode() ^ pos ^ begin ^ end; 387 } 388 389 /** 390 * Creates a copy of this iterator. 391 * @return A copy of this 392 */ 393 @Override clone()394 public Object clone() 395 { 396 try { 397 ReplaceableCharacterIterator other 398 = (ReplaceableCharacterIterator) super.clone(); 399 return other; 400 } 401 catch (CloneNotSupportedException e) { 402 throw new ICUCloneNotSupportedException(); 403 } 404 } 405 406 } 407 /* (non-Javadoc) 408 * @see ohos.global.icu.text.Transliterator#addSourceTargetSet(ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet) 409 */ 410 @Override addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)411 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 412 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 413 // Doesn't actually modify the source characters, so leave them alone. 414 // add the characters inserted 415 if (myFilter.size() != 0) { 416 targetSet.addAll(insertion); 417 } 418 } 419 420 } 421