1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2014-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.impl; 11 12 import java.text.CharacterIterator; 13 import java.util.HashSet; 14 import java.util.Locale; 15 16 import ohos.global.icu.impl.ICUResourceBundle.OpenType; 17 import ohos.global.icu.text.BreakIterator; 18 import ohos.global.icu.text.FilteredBreakIteratorBuilder; 19 import ohos.global.icu.text.UCharacterIterator; 20 import ohos.global.icu.util.BytesTrie; 21 import ohos.global.icu.util.CharsTrie; 22 import ohos.global.icu.util.CharsTrieBuilder; 23 import ohos.global.icu.util.StringTrieBuilder; 24 import ohos.global.icu.util.ULocale; 25 26 /** 27 * @author tomzhang 28 * @hide exposed on OHOS 29 */ 30 public class SimpleFilteredSentenceBreakIterator extends BreakIterator { 31 32 private BreakIterator delegate; 33 private UCharacterIterator text; // TODO(Tom): suffice to move into the local scope in next() ? 34 private CharsTrie backwardsTrie; // i.e. ".srM" for Mrs. 35 private CharsTrie forwardsPartialTrie; // Has ".a" for "a.M." 36 37 /** 38 * @param adoptBreakIterator 39 * break iterator to adopt 40 * @param forwardsPartialTrie 41 * forward & partial char trie to adopt 42 * @param backwardsTrie 43 * backward trie to adopt 44 */ SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie, CharsTrie backwardsTrie)45 public SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie, 46 CharsTrie backwardsTrie) { 47 this.delegate = adoptBreakIterator; 48 this.forwardsPartialTrie = forwardsPartialTrie; 49 this.backwardsTrie = backwardsTrie; 50 } 51 52 53 /** 54 * Reset the filter from the delegate. 55 */ resetState()56 private final void resetState() { 57 text = UCharacterIterator.getInstance((CharacterIterator) delegate.getText().clone()); 58 } 59 60 /** 61 * Is there an exception at this point? 62 * 63 * @param n the location of the possible break 64 * @return 65 */ breakExceptionAt(int n)66 private final boolean breakExceptionAt(int n) { 67 // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() 68 69 int bestPosn = -1; 70 int bestValue = -1; 71 72 // loops while 'n' points to an exception 73 text.setIndex(n); 74 backwardsTrie.reset(); 75 int uch; 76 77 78 79 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") 80 if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here?? 81 // TODO only do this the 1st time? 82 } else { 83 uch = text.nextCodePoint(); 84 } 85 86 BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE; 87 88 while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and.. 89 ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie 90 if (r.hasValue()) { // remember the best match so far 91 bestPosn = text.getIndex(); 92 bestValue = backwardsTrie.getValue(); 93 } 94 } 95 96 if (r.matches()) { // exact match? 97 bestValue = backwardsTrie.getValue(); 98 bestPosn = text.getIndex(); 99 } 100 101 if (bestPosn >= 0) { 102 if (bestValue == Builder.MATCH) { // exact match! 103 return true; // Exception here. 104 } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) { 105 // make sure there's a forward trie 106 // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie 107 // to see if it matches something going forward. 108 forwardsPartialTrie.reset(); 109 110 BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE; 111 text.setIndex(bestPosn); // hope that's close .. 112 while ((uch = text.nextCodePoint()) != BreakIterator.DONE 113 && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) { 114 } 115 if (rfwd.matches()) { 116 // Exception here 117 return true; 118 } // else fall through 119 } // else fall through 120 } // else fall through 121 return false; // No exception here. 122 } 123 124 /** 125 * Given that the delegate has already given its "initial" answer, 126 * find the NEXT actual (non-suppressed) break. 127 * @param n initial position from delegate 128 * @return new break position or BreakIterator.DONE 129 */ internalNext(int n)130 private final int internalNext(int n) { 131 if (n == BreakIterator.DONE || // at end or 132 backwardsTrie == null) { // .. no backwards table loaded == no exceptions 133 return n; 134 } 135 resetState(); 136 137 final int textLen = text.getLength(); 138 139 while (n != BreakIterator.DONE && n != textLen) { 140 // outer loop runs once per underlying break (from fDelegate). 141 // loops while 'n' points to an exception. 142 143 if (breakExceptionAt(n)) { 144 // n points to a break exception 145 n = delegate.next(); 146 } else { 147 // no exception at this spot 148 return n; 149 } 150 } 151 return n; //hit underlying DONE or break at end of text 152 } 153 154 /** 155 * Given that the delegate has already given its "initial" answer, 156 * find the PREV actual (non-suppressed) break. 157 * @param n initial position from delegate 158 * @return new break position or BreakIterator.DONE 159 */ internalPrev(int n)160 private final int internalPrev(int n) { 161 if (n == 0 || n == BreakIterator.DONE || // at end or 162 backwardsTrie == null) { // .. no backwards table loaded == no exceptions 163 return n; 164 } 165 resetState(); 166 167 while (n != BreakIterator.DONE && n != 0) { 168 // outer loop runs once per underlying break (from fDelegate). 169 // loops while 'n' points to an exception. 170 171 if (breakExceptionAt(n)) { 172 // n points to a break exception 173 n = delegate.previous(); 174 } else { 175 // no exception at this spot 176 return n; 177 } 178 } 179 return n; //hit underlying DONE or break at end of text 180 } 181 182 @Override equals(Object obj)183 public boolean equals(Object obj) { 184 if (obj == null) 185 return false; 186 if (this == obj) 187 return true; 188 if (getClass() != obj.getClass()) 189 return false; 190 SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) obj; 191 return delegate.equals(other.delegate) && text.equals(other.text) && backwardsTrie.equals(other.backwardsTrie) 192 && forwardsPartialTrie.equals(other.forwardsPartialTrie); 193 } 194 195 @Override hashCode()196 public int hashCode() { 197 return (forwardsPartialTrie.hashCode() * 39) + (backwardsTrie.hashCode() * 11) + delegate.hashCode(); 198 } 199 200 @Override clone()201 public Object clone() { 202 SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) super.clone(); 203 return other; 204 } 205 206 207 @Override first()208 public int first() { 209 // Don't suppress a break opportunity at the beginning of text. 210 return delegate.first(); 211 } 212 213 @Override preceding(int offset)214 public int preceding(int offset) { 215 return internalPrev(delegate.preceding(offset)); 216 } 217 218 @Override previous()219 public int previous() { 220 return internalPrev(delegate.previous()); 221 } 222 223 @Override current()224 public int current() { 225 return delegate.current(); 226 } 227 228 @Override isBoundary(int offset)229 public boolean isBoundary(int offset) { 230 if(!delegate.isBoundary(offset)) { 231 return false; // No underlying break to suppress? 232 } 233 234 // delegate thinks there's a break… 235 if(backwardsTrie == null) { 236 return true; // no data 237 } 238 239 resetState(); 240 return !breakExceptionAt(offset); // if there's an exception: no break. 241 } 242 243 @Override next()244 public int next() { 245 return internalNext(delegate.next()); 246 } 247 248 @Override next(int n)249 public int next(int n) { 250 return internalNext(delegate.next(n)); 251 } 252 253 @Override following(int offset)254 public int following(int offset) { 255 return internalNext(delegate.following(offset)); 256 } 257 258 @Override last()259 public int last() { 260 // Don't suppress a break opportunity at the end of text. 261 return delegate.last(); 262 } 263 264 @Override getText()265 public CharacterIterator getText() { 266 return delegate.getText(); 267 } 268 269 @Override setText(CharacterIterator newText)270 public void setText(CharacterIterator newText) { 271 delegate.setText(newText); 272 } 273 274 /** 275 * @hide exposed on OHOS 276 */ 277 public static class Builder extends FilteredBreakIteratorBuilder { 278 /** 279 * filter set to store all exceptions 280 */ 281 private HashSet<CharSequence> filterSet = new HashSet<CharSequence>(); 282 283 static final int PARTIAL = (1 << 0); // < partial - need to run through forward trie 284 static final int MATCH = (1 << 1); // < exact match - skip this one. 285 static final int SuppressInReverse = (1 << 0); 286 static final int AddToForward = (1 << 1); 287 Builder(Locale loc)288 public Builder(Locale loc) { 289 this(ULocale.forLocale(loc)); 290 } 291 /** 292 * Create SimpleFilteredBreakIteratorBuilder using given locale 293 * @param loc the locale to get filtered iterators 294 */ Builder(ULocale loc)295 public Builder(ULocale loc) { 296 ICUResourceBundle rb = ICUResourceBundle.getBundleInstance( 297 ICUData.ICU_BRKITR_BASE_NAME, loc, OpenType.LOCALE_ROOT); 298 299 ICUResourceBundle breaks = rb.findWithFallback("exceptions/SentenceBreak"); 300 301 if (breaks != null) { 302 for (int index = 0, size = breaks.getSize(); index < size; ++index) { 303 ICUResourceBundle b = (ICUResourceBundle) breaks.get(index); 304 String br = b.getString(); 305 filterSet.add(br); 306 } 307 } 308 } 309 310 /** 311 * Create SimpleFilteredBreakIteratorBuilder with no exception 312 */ Builder()313 public Builder() { 314 } 315 316 @Override suppressBreakAfter(CharSequence str)317 public boolean suppressBreakAfter(CharSequence str) { 318 return filterSet.add(str); 319 } 320 321 @Override unsuppressBreakAfter(CharSequence str)322 public boolean unsuppressBreakAfter(CharSequence str) { 323 return filterSet.remove(str); 324 } 325 326 @Override wrapIteratorWithFilter(BreakIterator adoptBreakIterator)327 public BreakIterator wrapIteratorWithFilter(BreakIterator adoptBreakIterator) { 328 if( filterSet.isEmpty() ) { 329 // Short circuit - nothing to except. 330 return adoptBreakIterator; 331 } 332 333 CharsTrieBuilder builder = new CharsTrieBuilder(); 334 CharsTrieBuilder builder2 = new CharsTrieBuilder(); 335 336 int revCount = 0; 337 int fwdCount = 0; 338 339 int subCount = filterSet.size(); 340 CharSequence[] ustrs = new CharSequence[subCount]; 341 int[] partials = new int[subCount]; 342 343 CharsTrie backwardsTrie = null; // i.e. ".srM" for Mrs. 344 CharsTrie forwardsPartialTrie = null; // Has ".a" for "a.M." 345 346 int i = 0; 347 for (CharSequence s : filterSet) { 348 ustrs[i] = s; // copy by value? 349 partials[i] = 0; // default: no partial 350 i++; 351 } 352 353 for (i = 0; i < subCount; i++) { 354 String thisStr = ustrs[i].toString(); // TODO: don't cast to String? 355 int nn = thisStr.indexOf('.'); // TODO: non-'.' abbreviations 356 if (nn > -1 && (nn + 1) != thisStr.length()) { 357 // is partial. 358 // is it unique? 359 int sameAs = -1; 360 for (int j = 0; j < subCount; j++) { 361 if (j == i) 362 continue; 363 if (thisStr.regionMatches(0, ustrs[j].toString() /* TODO */, 0, nn + 1)) { 364 if (partials[j] == 0) { // hasn't been processed yet 365 partials[j] = SuppressInReverse | AddToForward; 366 } else if ((partials[j] & SuppressInReverse) != 0) { 367 sameAs = j; // the other entry is already in the reverse table. 368 } 369 } 370 } 371 372 if ((sameAs == -1) && (partials[i] == 0)) { 373 StringBuilder prefix = new StringBuilder(thisStr.substring(0, nn + 1)); 374 // first one - add the prefix to the reverse table. 375 prefix.reverse(); 376 builder.add(prefix, PARTIAL); 377 revCount++; 378 partials[i] = SuppressInReverse | AddToForward; 379 } 380 } 381 } 382 383 for (i = 0; i < subCount; i++) { 384 final String thisStr = ustrs[i].toString(); // TODO 385 if (partials[i] == 0) { 386 StringBuilder reversed = new StringBuilder(thisStr).reverse(); 387 builder.add(reversed, MATCH); 388 revCount++; 389 } else { 390 // an optimization would be to only add the portion after the '.' 391 // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the 392 // forward, 393 // instead of "Ph.D." since we already know the "Ph." part is a match. 394 // would need the trie to be able to hold 0-length strings, though. 395 builder2.add(thisStr, MATCH); // forward 396 fwdCount++; 397 } 398 } 399 400 if (revCount > 0) { 401 backwardsTrie = builder.build(StringTrieBuilder.Option.FAST); 402 } 403 404 if (fwdCount > 0) { 405 forwardsPartialTrie = builder2.build(StringTrieBuilder.Option.FAST); 406 } 407 return new SimpleFilteredSentenceBreakIterator(adoptBreakIterator, forwardsPartialTrie, backwardsTrie); 408 } 409 } 410 } 411