• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 
11 package android.icu.text;
12 
13 import java.text.CharacterIterator;
14 import java.text.StringCharacterIterator;
15 import java.util.Locale;
16 import java.util.MissingResourceException;
17 
18 import android.icu.impl.CSCharacterIterator;
19 import android.icu.impl.CacheValue;
20 import android.icu.impl.ICUDebug;
21 import android.icu.util.ICUCloneNotSupportedException;
22 import android.icu.util.ULocale;
23 
24 /**
25  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.text.BreakIterator}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
26  *
27  * <p>A class that locates boundaries in text.  This class defines a protocol for
28  * objects that break up a piece of natural-language text according to a set
29  * of criteria.  Instances or subclasses of BreakIterator can be provided, for
30  * example, to break a piece of text into words, sentences, or logical characters
31  * according to the conventions of some language or group of languages.
32  *
33  * We provide five built-in types of BreakIterator:
34  * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries
35  * between title breaks.
36  * <li>getSentenceInstance() returns a BreakIterator that locates boundaries
37  * between sentences.  This is useful for triple-click selection, for example.
38  * <li>getWordInstance() returns a BreakIterator that locates boundaries between
39  * words.  This is useful for double-click selection or "find whole words" searches.
40  * This type of BreakIterator makes sure there is a boundary position at the
41  * beginning and end of each legal word.  (Numbers count as words, too.)  Whitespace
42  * and punctuation are kept separate from real words.
43  * <li>getLineInstance() returns a BreakIterator that locates positions where it is
44  * legal for a text editor to wrap lines.  This is similar to word breaking, but
45  * not the same: punctuation and whitespace are generally kept with words (you don't
46  * want a line to start with whitespace, for example), and some special characters
47  * can force a position to be considered a line-break position or prevent a position
48  * from being a line-break position.
49  * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between
50  * logical characters.  Because of the structure of the Unicode encoding, a logical
51  * character may be stored internally as more than one Unicode code point.  (A with an
52  * umlaut may be stored as an a followed by a separate combining umlaut character,
53  * for example, but the user still thinks of it as one character.)  This iterator allows
54  * various processes (especially text editors) to treat as characters the units of text
55  * that a user would think of as characters, rather than the units of text that the
56  * computer sees as "characters".</ul>
57  * The text boundary positions are found according to the rules
58  * described in Unicode Standard Annex #29, Text Boundaries, and
59  * Unicode Standard Annex #14, Line Breaking Properties.  These
60  * are available at http://www.unicode.org/reports/tr14/ and
61  * http://www.unicode.org/reports/tr29/.
62  * <p>
63  * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
64  * has a concept of a "current position" and methods like first(), last(), next(),
65  * and previous() that update the current position.  All BreakIterators uphold the
66  * following invariants:
67  * <ul><li>The beginning and end of the text are always treated as boundary positions.
68  * <li>The current position of the iterator is always a boundary position (random-
69  * access methods move the iterator to the nearest boundary position before or
70  * after the specified position, not <i>to</i> the specified position).
71  * <li>DONE is used as a flag to indicate when iteration has stopped.  DONE is only
72  * returned when the current position is the end of the text and the user calls next(),
73  * or when the current position is the beginning of the text and the user calls
74  * previous().
75  * <li>Break positions are numbered by the positions of the characters that follow
76  * them.  Thus, under normal circumstances, the position before the first character
77  * is 0, the position after the first character is 1, and the position after the
78  * last character is 1 plus the length of the string.
79  * <li>The client can change the position of an iterator, or the text it analyzes,
80  * at will, but cannot change the behavior.  If the user wants different behavior, he
81  * must instantiate a new iterator.</ul>
82  *
83  * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
84  * it possible to use BreakIterator to analyze text in any text-storage vehicle that
85  * provides a CharacterIterator interface.
86  *
87  * <b>Note:</b>  Some types of BreakIterator can take a long time to create, and
88  * instances of BreakIterator are not currently cached by the system.  For
89  * optimal performance, keep instances of BreakIterator around as long as makes
90  * sense.  For example, when word-wrapping a document, don't create and destroy a
91  * new BreakIterator for each line.  Create one break iterator for the whole document
92  * (or whatever stretch of text you're wrapping) and use it to do the whole job of
93  * wrapping the text.
94  *
95   * <P>
96  * <strong>Examples</strong>:<P>
97  * Creating and using text boundaries
98  * <blockquote>
99  * <pre>
100  * public static void main(String args[]) {
101  *      if (args.length == 1) {
102  *          String stringToExamine = args[0];
103  *          //print each word in order
104  *          BreakIterator boundary = BreakIterator.getWordInstance();
105  *          boundary.setText(stringToExamine);
106  *          printEachForward(boundary, stringToExamine);
107  *          //print each sentence in reverse order
108  *          boundary = BreakIterator.getSentenceInstance(Locale.US);
109  *          boundary.setText(stringToExamine);
110  *          printEachBackward(boundary, stringToExamine);
111  *          printFirst(boundary, stringToExamine);
112  *          printLast(boundary, stringToExamine);
113  *      }
114  * }
115  * </pre>
116  * </blockquote>
117  *
118  * Print each element in order
119  * <blockquote>
120  * <pre>
121  * public static void printEachForward(BreakIterator boundary, String source) {
122  *     int start = boundary.first();
123  *     for (int end = boundary.next();
124  *          end != BreakIterator.DONE;
125  *          start = end, end = boundary.next()) {
126  *          System.out.println(source.substring(start,end));
127  *     }
128  * }
129  * </pre>
130  * </blockquote>
131  *
132  * Print each element in reverse order
133  * <blockquote>
134  * <pre>
135  * public static void printEachBackward(BreakIterator boundary, String source) {
136  *     int end = boundary.last();
137  *     for (int start = boundary.previous();
138  *          start != BreakIterator.DONE;
139  *          end = start, start = boundary.previous()) {
140  *         System.out.println(source.substring(start,end));
141  *     }
142  * }
143  * </pre>
144  * </blockquote>
145  *
146  * Print first element
147  * <blockquote>
148  * <pre>
149  * public static void printFirst(BreakIterator boundary, String source) {
150  *     int start = boundary.first();
151  *     int end = boundary.next();
152  *     System.out.println(source.substring(start,end));
153  * }
154  * </pre>
155  * </blockquote>
156  *
157  * Print last element
158  * <blockquote>
159  * <pre>
160  * public static void printLast(BreakIterator boundary, String source) {
161  *     int end = boundary.last();
162  *     int start = boundary.previous();
163  *     System.out.println(source.substring(start,end));
164  * }
165  * </pre>
166  * </blockquote>
167  *
168  * Print the element at a specified position
169  * <blockquote>
170  * <pre>
171  * public static void printAt(BreakIterator boundary, int pos, String source) {
172  *     int end = boundary.following(pos);
173  *     int start = boundary.previous();
174  *     System.out.println(source.substring(start,end));
175  * }
176  * </pre>
177  * </blockquote>
178  *
179  * Find the next word
180  * <blockquote>
181  * <pre>
182  * public static int nextWordStartAfter(int pos, String text) {
183  *     BreakIterator wb = BreakIterator.getWordInstance();
184  *     wb.setText(text);
185  *     int wordStart = wb.following(pos);
186  *     for (;;) {
187  *         int wordLimit = wb.next();
188  *         if (wordLimit == BreakIterator.DONE) {
189  *             return BreakIterator.DONE;
190  *         }
191  *         int wordStatus = wb.getRuleStatus();
192  *         if (wordStatus != BreakIterator.WORD_NONE) {
193  *             return wordStart;
194  *         }
195  *         wordStart = wordLimit;
196  *      }
197  * }
198  * </pre>
199  * The iterator returned by {@link #getWordInstance} is unique in that
200  * the break positions it returns don't represent both the start and end of the
201  * thing being iterated over.  That is, a sentence-break iterator returns breaks
202  * that each represent the end of one sentence and the beginning of the next.
203  * With the word-break iterator, the characters between two boundaries might be a
204  * word, or they might be the punctuation or whitespace between two words.  The
205  * above code uses {@link #getRuleStatus} to identify and ignore boundaries associated
206  * with punctuation or other non-word characters.
207  * </blockquote>
208  *
209  * @see CharacterIterator
210  *
211  */
212 
213 public abstract class BreakIterator implements Cloneable
214 {
215 
216     private static final boolean DEBUG = ICUDebug.enabled("breakiterator");
217 
218     /**
219      * Default constructor.  There is no state that is carried by this abstract
220      * base class.
221      */
BreakIterator()222     protected BreakIterator()
223     {
224     }
225 
226     /**
227      * Clone method.  Creates another BreakIterator with the same behavior and
228      * current state as this one.
229      * @return The clone.
230      */
231     @Override
clone()232     public Object clone()
233     {
234         try {
235             return super.clone();
236         }
237         catch (CloneNotSupportedException e) {
238             ///CLOVER:OFF
239             throw new ICUCloneNotSupportedException(e);
240             ///CLOVER:ON
241         }
242     }
243 
244     /**
245      * DONE is returned by previous() and next() after all valid
246      * boundaries have been returned.
247      */
248     public static final int DONE = -1;
249 
250     /**
251      * Set the iterator to the first boundary position.  This is always the beginning
252      * index of the text this iterator iterates over.  For example, if
253      * the iterator iterates over a whole string, this function will
254      * always return 0.
255      * @return The character offset of the beginning of the stretch of text
256      * being broken.
257      */
first()258     public abstract int first();
259 
260     /**
261      * Set the iterator to the last boundary position.  This is always the "past-the-end"
262      * index of the text this iterator iterates over.  For example, if the
263      * iterator iterates over a whole string (call it "text"), this function
264      * will always return text.length().
265      * @return The character offset of the end of the stretch of text
266      * being broken.
267      */
last()268     public abstract int last();
269 
270     /**
271      * Move the iterator by the specified number of steps in the text.
272      * A positive number moves the iterator forward; a negative number
273      * moves the iterator backwards. If this causes the iterator
274      * to move off either end of the text, this function returns DONE;
275      * otherwise, this function returns the position of the appropriate
276      * boundary.  Calling this function is equivalent to calling next() or
277      * previous() n times.
278      * @param n The number of boundaries to advance over (if positive, moves
279      * forward; if negative, moves backwards).
280      * @return The position of the boundary n boundaries from the current
281      * iteration position, or DONE if moving n boundaries causes the iterator
282      * to advance off either end of the text.
283      */
next(int n)284     public abstract int next(int n);
285 
286     /**
287      * Advances the iterator forward one boundary.  The current iteration
288      * position is updated to point to the next boundary position after the
289      * current position, and this is also the value that is returned.  If
290      * the current position is equal to the value returned by last(), or to
291      * DONE, this function returns DONE and sets the current position to
292      * DONE.
293      * @return The position of the first boundary position following the
294      * iteration position.
295      */
next()296     public abstract int next();
297 
298     /**
299      * Move the iterator backward one boundary.  The current iteration
300      * position is updated to point to the last boundary position before
301      * the current position, and this is also the value that is returned.  If
302      * the current position is equal to the value returned by first(), or to
303      * DONE, this function returns DONE and sets the current position to
304      * DONE.
305      * @return The position of the last boundary position preceding the
306      * iteration position.
307      */
previous()308     public abstract int previous();
309 
310     /**
311      * Sets the iterator's current iteration position to be the first
312      * boundary position following the specified position.  (Whether the
313      * specified position is itself a boundary position or not doesn't
314      * matter-- this function always moves the iteration position to the
315      * first boundary after the specified position.)  If the specified
316      * position is the past-the-end position, returns DONE.
317      * @param offset The character position to start searching from.
318      * @return The position of the first boundary position following
319      * "offset" (whether or not "offset" itself is a boundary position),
320      * or DONE if "offset" is the past-the-end offset.
321      */
following(int offset)322     public abstract int following(int offset);
323 
324     /**
325      * Sets the iterator's current iteration position to be the last
326      * boundary position preceding the specified position.  (Whether the
327      * specified position is itself a boundary position or not doesn't
328      * matter-- this function always moves the iteration position to the
329      * last boundary before the specified position.)  If the specified
330      * position is the starting position, returns DONE.
331      * @param offset The character position to start searching from.
332      * @return The position of the last boundary position preceding
333      * "offset" (whether of not "offset" itself is a boundary position),
334      * or DONE if "offset" is the starting offset of the iterator.
335      */
preceding(int offset)336     public int preceding(int offset) {
337         // NOTE:  This implementation is here solely because we can't add new
338         // abstract methods to an existing class.  There is almost ALWAYS a
339         // better, faster way to do this.
340         int pos = following(offset);
341         while (pos >= offset && pos != DONE)
342             pos = previous();
343         return pos;
344     }
345 
346     /**
347      * Return true if the specified position is a boundary position.  If the
348      * function returns true, the current iteration position is set to the
349      * specified position; if the function returns false, the current
350      * iteration position is set as though following() had been called.
351      * @param offset the offset to check.
352      * @return True if "offset" is a boundary position.
353      */
isBoundary(int offset)354     public boolean isBoundary(int offset) {
355         // Again, this is the default implementation, which is provided solely because
356         // we couldn't add a new abstract method to an existing class.  The real
357         // implementations will usually need to do a little more work.
358         if (offset == 0) {
359             return true;
360         }
361         else
362             return following(offset - 1) == offset;
363     }
364 
365     /**
366      * Return the iterator's current position.
367      * @return The iterator's current position.
368      */
current()369     public abstract int current();
370 
371 
372     /**
373      * Tag value for "words" that do not fit into any of other categories.
374      * Includes spaces and most punctuation.
375      */
376     public static final int WORD_NONE           = 0;
377 
378     /**
379      * Upper bound for tags for uncategorized words.
380      */
381     public static final int WORD_NONE_LIMIT     = 100;
382 
383     /**
384      * Tag value for words that appear to be numbers, lower limit.
385      */
386     public static final int WORD_NUMBER         = 100;
387 
388     /**
389      * Tag value for words that appear to be numbers, upper limit.
390      */
391     public static final int WORD_NUMBER_LIMIT   = 200;
392 
393     /**
394      * Tag value for words that contain letters, excluding
395      * hiragana, katakana or ideographic characters, lower limit.
396      */
397     public static final int WORD_LETTER         = 200;
398 
399     /**
400      * Tag value for words containing letters, upper limit
401      */
402     public static final int WORD_LETTER_LIMIT   = 300;
403 
404     /**
405      * Tag value for words containing kana characters, lower limit
406      */
407     public static final int WORD_KANA           = 300;
408 
409     /**
410      * Tag value for words containing kana characters, upper limit
411      */
412     public static final int WORD_KANA_LIMIT     = 400;
413 
414     /**
415      * Tag value for words containing ideographic characters, lower limit
416      */
417     public static final int WORD_IDEO           = 400;
418 
419     /**
420      * Tag value for words containing ideographic characters, upper limit
421      */
422     public static final int WORD_IDEO_LIMIT     = 500;
423 
424     /**
425      * For RuleBasedBreakIterators, return the status tag from the
426      * break rule that determined the boundary at the current iteration position.
427      * <p>
428      * For break iterator types that do not support a rule status,
429      * a default value of 0 is returned.
430      * <p>
431      * @return The status from the break rule that determined the boundary
432      * at the current iteration position.
433      */
434 
getRuleStatus()435     public int  getRuleStatus() {
436         return 0;
437     }
438 
439     /**
440      * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
441      * that determined the the boundary at the current iteration position.
442      * <p>
443      * For break iterator types that do not support rule status,
444      * no values are returned.
445      * <p>
446      * If the size  of the output array is insufficient to hold the data,
447      *  the output will be truncated to the available length.  No exception
448      *  will be thrown.
449      *
450      * @param fillInArray an array to be filled in with the status values.
451      * @return          The number of rule status values from rules that determined
452      *                  the the boundary at the current iteration position.
453      *                  In the event that the array is too small, the return value
454      *                  is the total number of status values that were available,
455      *                  not the reduced number that were actually returned.
456      */
getRuleStatusVec(int[] fillInArray)457     public int getRuleStatusVec(int[] fillInArray) {
458         if (fillInArray != null && fillInArray.length > 0) {
459             fillInArray[0] = 0;
460         }
461         return 1;
462     }
463 
464     /**
465      * Returns a CharacterIterator over the text being analyzed.
466      * <p>
467      * <b><i>Caution:</i></b>The state of the returned CharacterIterator
468      * must not be modified in any way while the BreakIterator is still in use.
469      * Doing so will lead to undefined behavior of the BreakIterator.
470      * Clone the returned CharacterIterator first and work with that.
471      * <p>
472      * The returned CharacterIterator is a reference
473      * to the <b>actual iterator being used</b> by the BreakIterator.
474      * No guarantees are made about the current position
475      * of this iterator when it is returned; it may differ from the
476      * BreakIterators current position.  If you need to move that
477      * position to examine the text, clone this function's return value first.
478      *
479      * @return A CharacterIterator over the text being analyzed.
480      */
getText()481     public abstract CharacterIterator getText();
482 
483     /**
484      * Sets the iterator to analyze a new piece of text.  The new
485      * piece of text is passed in as a String, and the current
486      * iteration position is reset to the beginning of the string.
487      * (The old text is dropped.)
488      * @param newText A String containing the text to analyze with
489      * this BreakIterator.
490      */
setText(String newText)491     public void setText(String newText)
492     {
493         setText(new StringCharacterIterator(newText));
494     }
495 
496     /**
497      * Sets the iterator to analyze a new piece of text.  The new
498      * piece of text is passed in as a CharSequence, and the current
499      * iteration position is reset to the beginning of the text.
500      * (The old text is dropped.)
501      * <p>
502      * The text underlying the CharSequence must not be be modified while
503      * the BreakIterator holds a references to it. (As could possibly occur
504      * with a StringBuilder, for example).
505      * @param newText A CharSequence containing the text to analyze with
506      * this BreakIterator.
507      */
setText(CharSequence newText)508     public void setText(CharSequence newText) {
509         setText(new CSCharacterIterator(newText));
510     }
511 
512     /**
513      * Sets the iterator to analyze a new piece of text. This function resets
514      * the current iteration position to the beginning of the text.
515      * (The old iterator is dropped.)
516      * <p>
517      * <b><i>Caution:</i></b> The supplied CharacterIterator is used
518      * directly by the BreakIterator, and must not be altered in any
519      * way by code outside of the BreakIterator.
520      * Doing so will lead to undefined behavior of the BreakIterator.
521      *
522      * @param newText A CharacterIterator referring to the text
523      * to analyze with this BreakIterator (the iterator's current
524      * position is ignored, but its other state is significant).
525      */
setText(CharacterIterator newText)526     public abstract void setText(CharacterIterator newText);
527 
528     /**
529      * <strong>[icu]</strong>
530      */
531     public static final int KIND_CHARACTER = 0;
532     /**
533      * <strong>[icu]</strong>
534      */
535     public static final int KIND_WORD = 1;
536     /**
537      * <strong>[icu]</strong>
538      */
539     public static final int KIND_LINE = 2;
540     /**
541      * <strong>[icu]</strong>
542      */
543     public static final int KIND_SENTENCE = 3;
544     /**
545      * <strong>[icu]</strong>
546      * @see #getTitleInstance
547      * @see #getWordInstance
548      * @deprecated ICU 64 Use {@link #getWordInstance} instead.
549      */
550     @Deprecated
551     public static final int KIND_TITLE = 4;
552 
553     /**
554      */
555     private static final int KIND_COUNT = 5;
556 
557     private static final CacheValue<?>[] iterCache = new CacheValue<?>[5];
558 
559     /**
560      * Returns a new instance of BreakIterator that locates word boundaries.
561      * This function assumes that the text being analyzed is in the default
562      * locale's language.
563      * @return An instance of BreakIterator that locates word boundaries.
564      */
getWordInstance()565     public static BreakIterator getWordInstance()
566     {
567         return getWordInstance(ULocale.getDefault());
568     }
569 
570     /**
571      * Returns a new instance of BreakIterator that locates word boundaries.
572      * @param where A locale specifying the language of the text to be
573      * analyzed.
574      * @return An instance of BreakIterator that locates word boundaries.
575      * @throws NullPointerException if <code>where</code> is null.
576      */
getWordInstance(Locale where)577     public static BreakIterator getWordInstance(Locale where)
578     {
579         return getBreakInstance(ULocale.forLocale(where), KIND_WORD);
580     }
581 
582     /**
583      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates word boundaries.
584      * @param where A locale specifying the language of the text to be
585      * analyzed.
586      * @return An instance of BreakIterator that locates word boundaries.
587      * @throws NullPointerException if <code>where</code> is null.
588      */
getWordInstance(ULocale where)589     public static BreakIterator getWordInstance(ULocale where)
590     {
591         return getBreakInstance(where, KIND_WORD);
592     }
593 
594     /**
595      * Returns a new instance of BreakIterator that locates legal line-
596      * wrapping positions.  This function assumes the text being broken
597      * is in the default locale's language.
598      * @return A new instance of BreakIterator that locates legal
599      * line-wrapping positions.
600      */
getLineInstance()601     public static BreakIterator getLineInstance()
602     {
603         return getLineInstance(ULocale.getDefault());
604     }
605 
606     /**
607      * Returns a new instance of BreakIterator that locates legal line-
608      * wrapping positions.
609      * @param where A Locale specifying the language of the text being broken.
610      * @return A new instance of BreakIterator that locates legal
611      * line-wrapping positions.
612      * @throws NullPointerException if <code>where</code> is null.
613      */
getLineInstance(Locale where)614     public static BreakIterator getLineInstance(Locale where)
615     {
616         return getBreakInstance(ULocale.forLocale(where), KIND_LINE);
617     }
618 
619     /**
620      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates legal line-
621      * wrapping positions.
622      * @param where A Locale specifying the language of the text being broken.
623      * @return A new instance of BreakIterator that locates legal
624      * line-wrapping positions.
625      * @throws NullPointerException if <code>where</code> is null.
626      */
getLineInstance(ULocale where)627     public static BreakIterator getLineInstance(ULocale where)
628     {
629         return getBreakInstance(where, KIND_LINE);
630     }
631 
632     /**
633      * Returns a new instance of BreakIterator that locates logical-character
634      * boundaries.  This function assumes that the text being analyzed is
635      * in the default locale's language.
636      * @return A new instance of BreakIterator that locates logical-character
637      * boundaries.
638      */
getCharacterInstance()639     public static BreakIterator getCharacterInstance()
640     {
641         return getCharacterInstance(ULocale.getDefault());
642     }
643 
644     /**
645      * Returns a new instance of BreakIterator that locates logical-character
646      * boundaries.
647      * @param where A Locale specifying the language of the text being analyzed.
648      * @return A new instance of BreakIterator that locates logical-character
649      * boundaries.
650      * @throws NullPointerException if <code>where</code> is null.
651      */
getCharacterInstance(Locale where)652     public static BreakIterator getCharacterInstance(Locale where)
653     {
654         return getBreakInstance(ULocale.forLocale(where), KIND_CHARACTER);
655     }
656 
657     /**
658      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates logical-character
659      * boundaries.
660      * @param where A Locale specifying the language of the text being analyzed.
661      * @return A new instance of BreakIterator that locates logical-character
662      * boundaries.
663      * @throws NullPointerException if <code>where</code> is null.
664      */
getCharacterInstance(ULocale where)665     public static BreakIterator getCharacterInstance(ULocale where)
666     {
667         return getBreakInstance(where, KIND_CHARACTER);
668     }
669 
670     /**
671      * Returns a new instance of BreakIterator that locates sentence boundaries.
672      * This function assumes the text being analyzed is in the default locale's
673      * language.
674      * @return A new instance of BreakIterator that locates sentence boundaries.
675      */
getSentenceInstance()676     public static BreakIterator getSentenceInstance()
677     {
678         return getSentenceInstance(ULocale.getDefault());
679     }
680 
681     /**
682      * Returns a new instance of BreakIterator that locates sentence boundaries.
683      * @param where A Locale specifying the language of the text being analyzed.
684      * @return A new instance of BreakIterator that locates sentence boundaries.
685      * @throws NullPointerException if <code>where</code> is null.
686      */
getSentenceInstance(Locale where)687     public static BreakIterator getSentenceInstance(Locale where)
688     {
689         return getBreakInstance(ULocale.forLocale(where), KIND_SENTENCE);
690     }
691 
692     /**
693      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates sentence boundaries.
694      * @param where A Locale specifying the language of the text being analyzed.
695      * @return A new instance of BreakIterator that locates sentence boundaries.
696      * @throws NullPointerException if <code>where</code> is null.
697      */
getSentenceInstance(ULocale where)698     public static BreakIterator getSentenceInstance(ULocale where)
699     {
700         return getBreakInstance(where, KIND_SENTENCE);
701     }
702 
703     /**
704      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates title boundaries.
705      * This function assumes the text being analyzed is in the default locale's
706      * language. The iterator returned locates title boundaries as described for
707      * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
708      * please use a word boundary iterator. {@link #getWordInstance}
709      * @return A new instance of BreakIterator that locates title boundaries.
710      * @deprecated ICU 64 Use {@link #getWordInstance} instead.
711      */
712     @Deprecated
getTitleInstance()713     public static BreakIterator getTitleInstance()
714     {
715         return getTitleInstance(ULocale.getDefault());
716     }
717 
718     /**
719      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates title boundaries.
720      * The iterator returned locates title boundaries as described for
721      * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
722      * please use Word Boundary iterator.{@link #getWordInstance}
723      * @param where A Locale specifying the language of the text being analyzed.
724      * @return A new instance of BreakIterator that locates title boundaries.
725      * @throws NullPointerException if <code>where</code> is null.
726      * @deprecated ICU 64 Use {@link #getWordInstance} instead.
727      */
728     @Deprecated
getTitleInstance(Locale where)729     public static BreakIterator getTitleInstance(Locale where)
730     {
731         return getBreakInstance(ULocale.forLocale(where), KIND_TITLE);
732     }
733 
734     /**
735      * <strong>[icu]</strong> Returns a new instance of BreakIterator that locates title boundaries.
736      * The iterator returned locates title boundaries as described for
737      * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
738      * please use Word Boundary iterator.{@link #getWordInstance}
739      * @param where A Locale specifying the language of the text being analyzed.
740      * @return A new instance of BreakIterator that locates title boundaries.
741      * @throws NullPointerException if <code>where</code> is null.
742      * @deprecated ICU 64 Use {@link #getWordInstance} instead.
743      */
744     @Deprecated
getTitleInstance(ULocale where)745     public static BreakIterator getTitleInstance(ULocale where)
746     {
747         return getBreakInstance(where, KIND_TITLE);
748     }
749 
750     /**
751      * <strong>[icu]</strong> Registers a new break iterator of the indicated kind, to use in the given
752      * locale.  Clones of the iterator will be returned if a request for a break iterator
753      * of the given kind matches or falls back to this locale.
754      *
755      * <p>Because ICU may choose to cache BreakIterator objects internally, this must
756      * be called at application startup, prior to any calls to
757      * BreakIterator.getInstance to avoid undefined behavior.
758      *
759      * @param iter the BreakIterator instance to adopt.
760      * @param locale the Locale for which this instance is to be registered
761      * @param kind the type of iterator for which this instance is to be registered
762      * @return a registry key that can be used to unregister this instance
763      * @hide unsupported on Android
764      */
registerInstance(BreakIterator iter, Locale locale, int kind)765     public static Object registerInstance(BreakIterator iter, Locale locale, int kind) {
766         return registerInstance(iter, ULocale.forLocale(locale), kind);
767     }
768 
769     /**
770      * <strong>[icu]</strong> Registers a new break iterator of the indicated kind, to use in the given
771      * locale.  Clones of the iterator will be returned if a request for a break iterator
772      * of the given kind matches or falls back to this locale.
773      *
774      * <p>Because ICU may choose to cache BreakIterator objects internally, this must
775      * be called at application startup, prior to any calls to
776      * BreakIterator.getInstance to avoid undefined behavior.
777      *
778      * @param iter the BreakIterator instance to adopt.
779      * @param locale the Locale for which this instance is to be registered
780      * @param kind the type of iterator for which this instance is to be registered
781      * @return a registry key that can be used to unregister this instance
782      * @hide unsupported on Android
783      */
registerInstance(BreakIterator iter, ULocale locale, int kind)784     public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
785         // If the registered object matches the one in the cache, then
786         // flush the cached object.
787         if (iterCache[kind] != null) {
788             BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind].get();
789             if (cache != null) {
790                 if (cache.getLocale().equals(locale)) {
791                     iterCache[kind] = null;
792                 }
793             }
794         }
795         return getShim().registerInstance(iter, locale, kind);
796     }
797 
798     /**
799      * <strong>[icu]</strong> Unregisters a previously-registered BreakIterator using the key returned
800      * from the register call.  Key becomes invalid after this call and should not be used
801      * again.
802      * @param key the registry key returned by a previous call to registerInstance
803      * @return true if the iterator for the key was successfully unregistered
804      * @hide unsupported on Android
805      */
unregister(Object key)806     public static boolean unregister(Object key) {
807         if (key == null) {
808             throw new IllegalArgumentException("registry key must not be null");
809         }
810         // TODO: we don't do code coverage for the following lines
811         // because in getBreakInstance we always instantiate the shim,
812         // and test execution is such that we always instantiate a
813         // breakiterator before we get to the break iterator tests.
814         // this is for modularization, and we could remove the
815         // dependencies in getBreakInstance by rewriting part of the
816         // LocaleData code, or perhaps by accepting it into the
817         // module.
818         ///CLOVER:OFF
819         if (shim != null) {
820             // Unfortunately, we don't know what is being unregistered
821             // -- what `kind' and what locale -- so we flush all
822             // caches.  This is safe but inefficient if people are
823             // actively registering and unregistering.
824             for (int kind=0; kind<KIND_COUNT; ++kind) {
825                 iterCache[kind] = null;
826             }
827             return shim.unregister(key);
828         }
829         return false;
830         ///CLOVER:ON
831     }
832 
833     // end of registration
834 
835     /**
836      * Returns a particular kind of BreakIterator for a locale.
837      * Avoids writing a switch statement with getXYZInstance(where) calls.
838      * @deprecated This API is ICU internal only.
839      * @hide original deprecated declaration
840      * @hide draft / provisional / internal are hidden on Android
841      */
842     @Deprecated
getBreakInstance(ULocale where, int kind)843     public static BreakIterator getBreakInstance(ULocale where, int kind) {
844         if (where == null) {
845             throw new NullPointerException("Specified locale is null");
846         }
847         if (iterCache[kind] != null) {
848             BreakIteratorCache cache = (BreakIteratorCache)iterCache[kind].get();
849             if (cache != null) {
850                 if (cache.getLocale().equals(where)) {
851                     return cache.createBreakInstance();
852                 }
853             }
854         }
855 
856         // sigh, all to avoid linking in ICULocaleData...
857         BreakIterator result = getShim().createBreakIterator(where, kind);
858 
859         BreakIteratorCache cache = new BreakIteratorCache(where, result);
860         iterCache[kind] = CacheValue.getInstance(cache);
861 
862         return result;
863     }
864 
865 
866     /**
867      * Returns a list of locales for which BreakIterators can be used.
868      * @return An array of Locales.  All of the locales in the array can
869      * be used when creating a BreakIterator.
870      */
getAvailableLocales()871     public static synchronized Locale[] getAvailableLocales()
872     {
873         // to avoid linking ICULocaleData
874         return getShim().getAvailableLocales();
875     }
876 
877     /**
878      * <strong>[icu]</strong> Returns a list of locales for which BreakIterators can be used.
879      * @return An array of Locales.  All of the locales in the array can
880      * be used when creating a BreakIterator.
881      * @hide draft / provisional / internal are hidden on Android
882      */
getAvailableULocales()883     public static synchronized ULocale[] getAvailableULocales()
884     {
885         // to avoid linking ICULocaleData
886         return getShim().getAvailableULocales();
887     }
888 
889     private static final class BreakIteratorCache {
890 
891         private BreakIterator iter;
892         private ULocale where;
893 
BreakIteratorCache(ULocale where, BreakIterator iter)894         BreakIteratorCache(ULocale where, BreakIterator iter) {
895             this.where = where;
896             this.iter = (BreakIterator) iter.clone();
897         }
898 
getLocale()899         ULocale getLocale() {
900             return where;
901         }
902 
createBreakInstance()903         BreakIterator createBreakInstance() {
904             return (BreakIterator) iter.clone();
905         }
906     }
907 
908     static abstract class BreakIteratorServiceShim {
registerInstance(BreakIterator iter, ULocale l, int k)909         public abstract Object registerInstance(BreakIterator iter, ULocale l, int k);
unregister(Object key)910         public abstract boolean unregister(Object key);
getAvailableLocales()911         public abstract Locale[] getAvailableLocales();
getAvailableULocales()912         public abstract ULocale[] getAvailableULocales();
createBreakIterator(ULocale l, int k)913         public abstract BreakIterator createBreakIterator(ULocale l, int k);
914     }
915 
916     private static BreakIteratorServiceShim shim;
getShim()917     private static BreakIteratorServiceShim getShim() {
918         // Note: this instantiation is safe on loose-memory-model configurations
919         // despite lack of synchronization, since the shim instance has no state--
920         // it's all in the class init.  The worst problem is we might instantiate
921         // two shim instances, but they'll share the same state so that's ok.
922         if (shim == null) {
923             try {
924                 Class<?> cls = Class.forName("android.icu.text.BreakIteratorFactory");
925                 shim = (BreakIteratorServiceShim)cls.newInstance();
926             }
927             catch (MissingResourceException e)
928             {
929                 throw e;
930             }
931             catch (Exception e) {
932                 ///CLOVER:OFF
933                 if(DEBUG){
934                     e.printStackTrace();
935                 }
936                 throw new RuntimeException(e.getMessage());
937                 ///CLOVER:ON
938             }
939         }
940         return shim;
941     }
942 
943     // -------- BEGIN ULocale boilerplate --------
944 
945     /**
946      * <strong>[icu]</strong> Returns the locale that was used to create this object, or null.
947      * This may may differ from the locale requested at the time of
948      * this object's creation.  For example, if an object is created
949      * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
950      * drawn from <tt>en</tt> (the <i>actual</i> locale), and
951      * <tt>en_US</tt> may be the most specific locale that exists (the
952      * <i>valid</i> locale).
953      *
954      * <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i>
955      * locale is not, in most cases.
956      * @param type type of information requested, either {@link
957      * android.icu.util.ULocale#VALID_LOCALE} or {@link
958      * android.icu.util.ULocale#ACTUAL_LOCALE}.
959      * @return the information specified by <i>type</i>, or null if
960      * this object was not constructed from locale data.
961      * @see android.icu.util.ULocale
962      * @see android.icu.util.ULocale#VALID_LOCALE
963      * @see android.icu.util.ULocale#ACTUAL_LOCALE
964      * @hide draft / provisional / internal are hidden on Android
965      */
getLocale(ULocale.Type type)966     public final ULocale getLocale(ULocale.Type type) {
967         return type == ULocale.ACTUAL_LOCALE ?
968             this.actualLocale : this.validLocale;
969     }
970 
971     /**
972      * Set information about the locales that were used to create this
973      * object.  If the object was not constructed from locale data,
974      * both arguments should be set to null.  Otherwise, neither
975      * should be null.  The actual locale must be at the same level or
976      * less specific than the valid locale.  This method is intended
977      * for use by factories or other entities that create objects of
978      * this class.
979      * @param valid the most specific locale containing any resource
980      * data, or null
981      * @param actual the locale containing data used to construct this
982      * object, or null
983      * @see android.icu.util.ULocale
984      * @see android.icu.util.ULocale#VALID_LOCALE
985      * @see android.icu.util.ULocale#ACTUAL_LOCALE
986      */
setLocale(ULocale valid, ULocale actual)987     final void setLocale(ULocale valid, ULocale actual) {
988         // Change the following to an assertion later
989         if ((valid == null) != (actual == null)) {
990             ///CLOVER:OFF
991             throw new IllegalArgumentException();
992             ///CLOVER:ON
993         }
994         // Another check we could do is that the actual locale is at
995         // the same level or less specific than the valid locale.
996         this.validLocale = valid;
997         this.actualLocale = actual;
998     }
999 
1000     /**
1001      * The most specific locale containing any resource data, or null.
1002      * @see android.icu.util.ULocale
1003      */
1004     private ULocale validLocale;
1005 
1006     /**
1007      * The locale containing data used to construct this object, or
1008      * null.
1009      * @see android.icu.util.ULocale
1010      */
1011     private ULocale actualLocale;
1012 
1013     // -------- END ULocale boilerplate --------
1014 }
1015