• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*
3  * Copyright (C) 2011 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package android.text.method;
19 
20 import android.text.Selection;
21 import android.text.SpannableStringBuilder;
22 
23 import java.text.BreakIterator;
24 import java.util.Locale;
25 
26 /**
27  * Walks through cursor positions at word boundaries. Internally uses
28  * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
29  * for performance reasons.
30  *
31  * Also provides methods to determine word boundaries.
32  * {@hide}
33  */
34 public class WordIterator implements Selection.PositionIterator {
35     // Size of the window for the word iterator, should be greater than the longest word's length
36     private static final int WINDOW_WIDTH = 50;
37 
38     private String mString;
39     private int mOffsetShift;
40 
41     private BreakIterator mIterator;
42 
43     /**
44      * Constructs a WordIterator using the default locale.
45      */
WordIterator()46     public WordIterator() {
47         this(Locale.getDefault());
48     }
49 
50     /**
51      * Constructs a new WordIterator for the specified locale.
52      * @param locale The locale to be used when analysing the text.
53      */
WordIterator(Locale locale)54     public WordIterator(Locale locale) {
55         mIterator = BreakIterator.getWordInstance(locale);
56     }
57 
setCharSequence(CharSequence charSequence, int start, int end)58     public void setCharSequence(CharSequence charSequence, int start, int end) {
59         mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
60         final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
61 
62         if (charSequence instanceof SpannableStringBuilder) {
63             mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
64         } else {
65             mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
66         }
67         mIterator.setText(mString);
68     }
69 
70     /** {@inheritDoc} */
preceding(int offset)71     public int preceding(int offset) {
72         int shiftedOffset = offset - mOffsetShift;
73         do {
74             shiftedOffset = mIterator.preceding(shiftedOffset);
75             if (shiftedOffset == BreakIterator.DONE) {
76                 return BreakIterator.DONE;
77             }
78             if (isOnLetterOrDigit(shiftedOffset)) {
79                 return shiftedOffset + mOffsetShift;
80             }
81         } while (true);
82     }
83 
84     /** {@inheritDoc} */
following(int offset)85     public int following(int offset) {
86         int shiftedOffset = offset - mOffsetShift;
87         do {
88             shiftedOffset = mIterator.following(shiftedOffset);
89             if (shiftedOffset == BreakIterator.DONE) {
90                 return BreakIterator.DONE;
91             }
92             if (isAfterLetterOrDigit(shiftedOffset)) {
93                 return shiftedOffset + mOffsetShift;
94             }
95         } while (true);
96     }
97 
98     /** If <code>offset</code> is within a word, returns the index of the first character of that
99      * word, otherwise returns BreakIterator.DONE.
100      *
101      * The offsets that are considered to be part of a word are the indexes of its characters,
102      * <i>as well as</i> the index of its last character plus one.
103      * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
104      *
105      * Valid range for offset is [0..textLength] (note the inclusive upper bound).
106      * The returned value is within [0..offset] or BreakIterator.DONE.
107      *
108      * @throws IllegalArgumentException is offset is not valid.
109      */
getBeginning(int offset)110     public int getBeginning(int offset) {
111         final int shiftedOffset = offset - mOffsetShift;
112         checkOffsetIsValid(shiftedOffset);
113 
114         if (isOnLetterOrDigit(shiftedOffset)) {
115             if (mIterator.isBoundary(shiftedOffset)) {
116                 return shiftedOffset + mOffsetShift;
117             } else {
118                 return mIterator.preceding(shiftedOffset) + mOffsetShift;
119             }
120         } else {
121             if (isAfterLetterOrDigit(shiftedOffset)) {
122                 return mIterator.preceding(shiftedOffset) + mOffsetShift;
123             }
124         }
125         return BreakIterator.DONE;
126     }
127 
128     /** If <code>offset</code> is within a word, returns the index of the last character of that
129      * word plus one, otherwise returns BreakIterator.DONE.
130      *
131      * The offsets that are considered to be part of a word are the indexes of its characters,
132      * <i>as well as</i> the index of its last character plus one.
133      * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
134      *
135      * Valid range for offset is [0..textLength] (note the inclusive upper bound).
136      * The returned value is within [offset..textLength] or BreakIterator.DONE.
137      *
138      * @throws IllegalArgumentException is offset is not valid.
139      */
getEnd(int offset)140     public int getEnd(int offset) {
141         final int shiftedOffset = offset - mOffsetShift;
142         checkOffsetIsValid(shiftedOffset);
143 
144         if (isAfterLetterOrDigit(shiftedOffset)) {
145             if (mIterator.isBoundary(shiftedOffset)) {
146                 return shiftedOffset + mOffsetShift;
147             } else {
148                 return mIterator.following(shiftedOffset) + mOffsetShift;
149             }
150         } else {
151             if (isOnLetterOrDigit(shiftedOffset)) {
152                 return mIterator.following(shiftedOffset) + mOffsetShift;
153             }
154         }
155         return BreakIterator.DONE;
156     }
157 
isAfterLetterOrDigit(int shiftedOffset)158     private boolean isAfterLetterOrDigit(int shiftedOffset) {
159         if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
160             final int codePoint = mString.codePointBefore(shiftedOffset);
161             if (Character.isLetterOrDigit(codePoint)) return true;
162         }
163         return false;
164     }
165 
isOnLetterOrDigit(int shiftedOffset)166     private boolean isOnLetterOrDigit(int shiftedOffset) {
167         if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
168             final int codePoint = mString.codePointAt(shiftedOffset);
169             if (Character.isLetterOrDigit(codePoint)) return true;
170         }
171         return false;
172     }
173 
checkOffsetIsValid(int shiftedOffset)174     private void checkOffsetIsValid(int shiftedOffset) {
175         if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
176             throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
177                     ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
178                     "]");
179         }
180     }
181 }
182