• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.text;
18 
19 import android.annotation.IntRange;
20 import android.annotation.NonNull;
21 import android.icu.text.BreakIterator;
22 import android.icu.util.ULocale;
23 import android.text.method.WordIterator;
24 
25 /**
26  * Implementation of {@link SegmentFinder} using words as the text segment. Word boundaries are
27  * found using {@code WordIterator}. Whitespace characters are excluded, so they are not included in
28  * any text segments.
29  *
30  * <p>For example, the text "Hello, World!" would be subdivided into four text segments: "Hello",
31  * ",", "World", "!". The space character does not belong to any text segments.
32  *
33  * @see <a href="https://unicode.org/reports/tr29/#Word_Boundaries">Unicode Text Segmentation - Word
34  *     Boundaries</a>
35  */
36 @android.ravenwood.annotation.RavenwoodKeepWholeClass
37 public class WordSegmentFinder extends SegmentFinder {
38     private final CharSequence mText;
39     private final WordIterator mWordIterator;
40 
41     /**
42      * Constructs a WordSegmentFinder instance for the specified text which uses the provided locale
43      * to determine word boundaries.
44      *
45      * @param text text to be segmented
46      * @param locale locale used for analyzing the text
47      */
WordSegmentFinder( @onNull CharSequence text, @NonNull ULocale locale)48     public WordSegmentFinder(
49             @NonNull CharSequence text, @NonNull ULocale locale) {
50         mText = text;
51         mWordIterator = new WordIterator(locale);
52         mWordIterator.setCharSequence(text, 0, text.length());
53     }
54 
55     /**
56      * Constructs a WordSegmentFinder instance for the specified text which uses the provided
57      * WordIterator to determine word boundaries.
58      *
59      * @param text text to be segmented
60      * @param wordIterator word iterator used to find word boundaries in the text
61      * @hide
62      */
WordSegmentFinder(@onNull CharSequence text, @NonNull WordIterator wordIterator)63     public WordSegmentFinder(@NonNull CharSequence text, @NonNull WordIterator wordIterator) {
64         mText = text;
65         mWordIterator = wordIterator;
66     }
67 
68     @Override
previousStartBoundary(@ntRangefrom = 0) int offset)69     public int previousStartBoundary(@IntRange(from = 0) int offset) {
70         int boundary = offset;
71         do {
72             boundary = mWordIterator.prevBoundary(boundary);
73             if (boundary == BreakIterator.DONE) {
74                 return DONE;
75             }
76         } while (Character.isWhitespace(mText.charAt(boundary)));
77         return boundary;
78     }
79 
80     @Override
previousEndBoundary(@ntRangefrom = 0) int offset)81     public int previousEndBoundary(@IntRange(from = 0) int offset) {
82         int boundary = offset;
83         do {
84             boundary = mWordIterator.prevBoundary(boundary);
85             if (boundary == BreakIterator.DONE || boundary == 0) {
86                 return DONE;
87             }
88         } while (Character.isWhitespace(mText.charAt(boundary - 1)));
89         return boundary;
90     }
91 
92     @Override
nextStartBoundary(@ntRangefrom = 0) int offset)93     public int nextStartBoundary(@IntRange(from = 0) int offset) {
94         int boundary = offset;
95         do {
96             boundary = mWordIterator.nextBoundary(boundary);
97             if (boundary == BreakIterator.DONE || boundary == mText.length()) {
98                 return DONE;
99             }
100         } while (Character.isWhitespace(mText.charAt(boundary)));
101         return boundary;
102     }
103 
104     @Override
nextEndBoundary(@ntRangefrom = 0) int offset)105     public int nextEndBoundary(@IntRange(from = 0) int offset) {
106         int boundary = offset;
107         do {
108             boundary = mWordIterator.nextBoundary(boundary);
109             if (boundary == BreakIterator.DONE) {
110                 return DONE;
111             }
112         } while (Character.isWhitespace(mText.charAt(boundary - 1)));
113         return boundary;
114     }
115 }
116