• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2014-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 package ohos.global.icu.impl;
11 
12 import java.text.CharacterIterator;
13 import java.util.HashSet;
14 import java.util.Locale;
15 
16 import ohos.global.icu.impl.ICUResourceBundle.OpenType;
17 import ohos.global.icu.text.BreakIterator;
18 import ohos.global.icu.text.FilteredBreakIteratorBuilder;
19 import ohos.global.icu.text.UCharacterIterator;
20 import ohos.global.icu.util.BytesTrie;
21 import ohos.global.icu.util.CharsTrie;
22 import ohos.global.icu.util.CharsTrieBuilder;
23 import ohos.global.icu.util.StringTrieBuilder;
24 import ohos.global.icu.util.ULocale;
25 
26 /**
27  * @author tomzhang
28  * @hide exposed on OHOS
29  */
30 public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
31 
32     private BreakIterator delegate;
33     private UCharacterIterator text; // TODO(Tom): suffice to move into the local scope in next() ?
34     private CharsTrie backwardsTrie; // i.e. ".srM" for Mrs.
35     private CharsTrie forwardsPartialTrie; // Has ".a" for "a.M."
36 
37     /**
38      * @param adoptBreakIterator
39      *            break iterator to adopt
40      * @param forwardsPartialTrie
41      *            forward & partial char trie to adopt
42      * @param backwardsTrie
43      *            backward trie to adopt
44      */
SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie, CharsTrie backwardsTrie)45     public SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie,
46             CharsTrie backwardsTrie) {
47         this.delegate = adoptBreakIterator;
48         this.forwardsPartialTrie = forwardsPartialTrie;
49         this.backwardsTrie = backwardsTrie;
50     }
51 
52 
53     /**
54      * Reset the filter from the delegate.
55      */
resetState()56     private final void resetState() {
57         text = UCharacterIterator.getInstance((CharacterIterator) delegate.getText().clone());
58     }
59 
60     /**
61      * Is there an exception at this point?
62      *
63      * @param n the location of the possible break
64      * @return
65      */
breakExceptionAt(int n)66     private final boolean breakExceptionAt(int n) {
67         // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()
68 
69         int bestPosn = -1;
70         int bestValue = -1;
71 
72         // loops while 'n' points to an exception
73         text.setIndex(n);
74         backwardsTrie.reset();
75         int uch;
76 
77 
78 
79         // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
80         if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
81             // TODO only do this the 1st time?
82         } else {
83             uch = text.nextCodePoint();
84         }
85 
86         BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;
87 
88         while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
89                 ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
90             if (r.hasValue()) { // remember the best match so far
91                 bestPosn = text.getIndex();
92                 bestValue = backwardsTrie.getValue();
93             }
94         }
95 
96         if (r.matches()) { // exact match?
97             bestValue = backwardsTrie.getValue();
98             bestPosn = text.getIndex();
99         }
100 
101         if (bestPosn >= 0) {
102             if (bestValue == Builder.MATCH) { // exact match!
103                 return true; // Exception here.
104             } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) {
105                 // make sure there's a forward trie
106                 // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
107                 // to see if it matches something going forward.
108                 forwardsPartialTrie.reset();
109 
110                 BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE;
111                 text.setIndex(bestPosn); // hope that's close ..
112                 while ((uch = text.nextCodePoint()) != BreakIterator.DONE
113                         && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
114                 }
115                 if (rfwd.matches()) {
116                     // Exception here
117                     return true;
118                 } // else fall through
119             } // else fall through
120         } // else fall through
121         return false; // No exception here.
122     }
123 
124     /**
125      * Given that the delegate has already given its "initial" answer,
126      * find the NEXT actual (non-suppressed) break.
127      * @param n initial position from delegate
128      * @return new break position or BreakIterator.DONE
129      */
internalNext(int n)130     private final int internalNext(int n) {
131         if (n == BreakIterator.DONE || // at end or
132                 backwardsTrie == null) { // .. no backwards table loaded == no exceptions
133             return n;
134         }
135         resetState();
136 
137         final int textLen = text.getLength();
138 
139         while (n != BreakIterator.DONE && n != textLen) {
140             // outer loop runs once per underlying break (from fDelegate).
141             // loops while 'n' points to an exception.
142 
143             if (breakExceptionAt(n)) {
144                 // n points to a break exception
145                 n = delegate.next();
146             } else {
147                 // no exception at this spot
148                 return n;
149             }
150         }
151         return n; //hit underlying DONE or break at end of text
152     }
153 
154     /**
155      * Given that the delegate has already given its "initial" answer,
156      * find the PREV actual (non-suppressed) break.
157      * @param n initial position from delegate
158      * @return new break position or BreakIterator.DONE
159      */
internalPrev(int n)160     private final int internalPrev(int n) {
161         if (n == 0 || n == BreakIterator.DONE || // at end or
162                 backwardsTrie == null) { // .. no backwards table loaded == no exceptions
163             return n;
164         }
165         resetState();
166 
167         while (n != BreakIterator.DONE && n != 0) {
168             // outer loop runs once per underlying break (from fDelegate).
169             // loops while 'n' points to an exception.
170 
171             if (breakExceptionAt(n)) {
172                 // n points to a break exception
173                 n = delegate.previous();
174             } else {
175                 // no exception at this spot
176                 return n;
177             }
178         }
179         return n; //hit underlying DONE or break at end of text
180     }
181 
182     @Override
equals(Object obj)183     public boolean equals(Object obj) {
184         if (obj == null)
185             return false;
186         if (this == obj)
187             return true;
188         if (getClass() != obj.getClass())
189             return false;
190         SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) obj;
191         return delegate.equals(other.delegate) && text.equals(other.text) && backwardsTrie.equals(other.backwardsTrie)
192                 && forwardsPartialTrie.equals(other.forwardsPartialTrie);
193     }
194 
195     @Override
hashCode()196     public int hashCode() {
197         return (forwardsPartialTrie.hashCode() * 39) + (backwardsTrie.hashCode() * 11) + delegate.hashCode();
198     }
199 
200     @Override
clone()201     public Object clone() {
202         SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) super.clone();
203         return other;
204     }
205 
206 
207     @Override
first()208     public int first() {
209         // Don't suppress a break opportunity at the beginning of text.
210         return delegate.first();
211     }
212 
213     @Override
preceding(int offset)214     public int preceding(int offset) {
215         return internalPrev(delegate.preceding(offset));
216     }
217 
218     @Override
previous()219     public int previous() {
220         return internalPrev(delegate.previous());
221     }
222 
223     @Override
current()224     public int current() {
225         return delegate.current();
226     }
227 
228     @Override
isBoundary(int offset)229     public boolean isBoundary(int offset) {
230         if(!delegate.isBoundary(offset)) {
231             return false; // No underlying break to suppress?
232         }
233 
234         // delegate thinks there's a break…
235         if(backwardsTrie == null) {
236             return true; // no data
237         }
238 
239         resetState();
240         return !breakExceptionAt(offset); // if there's an exception: no break.
241     }
242 
243     @Override
next()244     public int next() {
245         return internalNext(delegate.next());
246     }
247 
248     @Override
next(int n)249     public int next(int n) {
250         return internalNext(delegate.next(n));
251     }
252 
253     @Override
following(int offset)254     public int following(int offset) {
255         return internalNext(delegate.following(offset));
256     }
257 
258     @Override
last()259     public int last() {
260         // Don't suppress a break opportunity at the end of text.
261         return delegate.last();
262     }
263 
264     @Override
getText()265     public CharacterIterator getText() {
266         return delegate.getText();
267     }
268 
269     @Override
setText(CharacterIterator newText)270     public void setText(CharacterIterator newText) {
271         delegate.setText(newText);
272     }
273 
274     /**
275      * @hide exposed on OHOS
276      */
277     public static class Builder extends FilteredBreakIteratorBuilder {
278         /**
279          * filter set to store all exceptions
280          */
281         private HashSet<CharSequence> filterSet = new HashSet<CharSequence>();
282 
283         static final int PARTIAL = (1 << 0); // < partial - need to run through forward trie
284         static final int MATCH = (1 << 1); // < exact match - skip this one.
285         static final int SuppressInReverse = (1 << 0);
286         static final int AddToForward = (1 << 1);
287 
Builder(Locale loc)288         public Builder(Locale loc) {
289             this(ULocale.forLocale(loc));
290         }
291         /**
292          * Create SimpleFilteredBreakIteratorBuilder using given locale
293          * @param loc the locale to get filtered iterators
294          */
Builder(ULocale loc)295         public Builder(ULocale loc) {
296             ICUResourceBundle rb = ICUResourceBundle.getBundleInstance(
297                     ICUData.ICU_BRKITR_BASE_NAME, loc, OpenType.LOCALE_ROOT);
298 
299             ICUResourceBundle breaks = rb.findWithFallback("exceptions/SentenceBreak");
300 
301             if (breaks != null) {
302                 for (int index = 0, size = breaks.getSize(); index < size; ++index) {
303                     ICUResourceBundle b = (ICUResourceBundle) breaks.get(index);
304                     String br = b.getString();
305                     filterSet.add(br);
306                 }
307             }
308         }
309 
310         /**
311          * Create SimpleFilteredBreakIteratorBuilder with no exception
312          */
Builder()313         public Builder() {
314         }
315 
316         @Override
suppressBreakAfter(CharSequence str)317         public boolean suppressBreakAfter(CharSequence str) {
318             return filterSet.add(str);
319         }
320 
321         @Override
unsuppressBreakAfter(CharSequence str)322         public boolean unsuppressBreakAfter(CharSequence str) {
323             return filterSet.remove(str);
324         }
325 
326         @Override
wrapIteratorWithFilter(BreakIterator adoptBreakIterator)327         public BreakIterator wrapIteratorWithFilter(BreakIterator adoptBreakIterator) {
328             if( filterSet.isEmpty() ) {
329                 // Short circuit - nothing to except.
330                 return adoptBreakIterator;
331             }
332 
333             CharsTrieBuilder builder = new CharsTrieBuilder();
334             CharsTrieBuilder builder2 = new CharsTrieBuilder();
335 
336             int revCount = 0;
337             int fwdCount = 0;
338 
339             int subCount = filterSet.size();
340             CharSequence[] ustrs = new CharSequence[subCount];
341             int[] partials = new int[subCount];
342 
343             CharsTrie backwardsTrie = null; // i.e. ".srM" for Mrs.
344             CharsTrie forwardsPartialTrie = null; // Has ".a" for "a.M."
345 
346             int i = 0;
347             for (CharSequence s : filterSet) {
348                 ustrs[i] = s; // copy by value?
349                 partials[i] = 0; // default: no partial
350                 i++;
351             }
352 
353             for (i = 0; i < subCount; i++) {
354                 String thisStr = ustrs[i].toString(); // TODO: don't cast to String?
355                 int nn = thisStr.indexOf('.'); // TODO: non-'.' abbreviations
356                 if (nn > -1 && (nn + 1) != thisStr.length()) {
357                     // is partial.
358                     // is it unique?
359                     int sameAs = -1;
360                     for (int j = 0; j < subCount; j++) {
361                         if (j == i)
362                             continue;
363                         if (thisStr.regionMatches(0, ustrs[j].toString() /* TODO */, 0, nn + 1)) {
364                             if (partials[j] == 0) { // hasn't been processed yet
365                                 partials[j] = SuppressInReverse | AddToForward;
366                             } else if ((partials[j] & SuppressInReverse) != 0) {
367                                 sameAs = j; // the other entry is already in the reverse table.
368                             }
369                         }
370                     }
371 
372                     if ((sameAs == -1) && (partials[i] == 0)) {
373                         StringBuilder prefix = new StringBuilder(thisStr.substring(0, nn + 1));
374                         // first one - add the prefix to the reverse table.
375                         prefix.reverse();
376                         builder.add(prefix, PARTIAL);
377                         revCount++;
378                         partials[i] = SuppressInReverse | AddToForward;
379                     }
380                 }
381             }
382 
383             for (i = 0; i < subCount; i++) {
384                 final String thisStr = ustrs[i].toString(); // TODO
385                 if (partials[i] == 0) {
386                     StringBuilder reversed = new StringBuilder(thisStr).reverse();
387                     builder.add(reversed, MATCH);
388                     revCount++;
389                 } else {
390                     // an optimization would be to only add the portion after the '.'
391                     // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the
392                     // forward,
393                     // instead of "Ph.D." since we already know the "Ph." part is a match.
394                     // would need the trie to be able to hold 0-length strings, though.
395                     builder2.add(thisStr, MATCH); // forward
396                     fwdCount++;
397                 }
398             }
399 
400             if (revCount > 0) {
401                 backwardsTrie = builder.build(StringTrieBuilder.Option.FAST);
402             }
403 
404             if (fwdCount > 0) {
405                 forwardsPartialTrie = builder2.build(StringTrieBuilder.Option.FAST);
406             }
407             return new SimpleFilteredSentenceBreakIterator(adoptBreakIterator, forwardsPartialTrie, backwardsTrie);
408         }
409     }
410 }
411