• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.text.TextUtils;
20 
21 import com.android.inputmethod.keyboard.Keyboard; // For character constants
22 
23 import java.util.ArrayList;
24 import java.util.Locale;
25 
26 public final class StringUtils {
StringUtils()27     private StringUtils() {
28         // This utility class is not publicly instantiable.
29     }
30 
codePointCount(String text)31     public static int codePointCount(String text) {
32         if (TextUtils.isEmpty(text)) return 0;
33         return text.codePointCount(0, text.length());
34     }
35 
containsInArray(String key, String[] array)36     public static boolean containsInArray(String key, String[] array) {
37         for (final String element : array) {
38             if (key.equals(element)) return true;
39         }
40         return false;
41     }
42 
containsInCsv(String key, String csv)43     public static boolean containsInCsv(String key, String csv) {
44         if (TextUtils.isEmpty(csv)) return false;
45         return containsInArray(key, csv.split(","));
46     }
47 
appendToCsvIfNotExists(String key, String csv)48     public static String appendToCsvIfNotExists(String key, String csv) {
49         if (TextUtils.isEmpty(csv)) return key;
50         if (containsInCsv(key, csv)) return csv;
51         return csv + "," + key;
52     }
53 
removeFromCsvIfExists(String key, String csv)54     public static String removeFromCsvIfExists(String key, String csv) {
55         if (TextUtils.isEmpty(csv)) return "";
56         final String[] elements = csv.split(",");
57         if (!containsInArray(key, elements)) return csv;
58         final ArrayList<String> result = CollectionUtils.newArrayList(elements.length - 1);
59         for (final String element : elements) {
60             if (!key.equals(element)) result.add(element);
61         }
62         return TextUtils.join(",", result);
63     }
64 
65     /**
66      * Returns true if a and b are equal ignoring the case of the character.
67      * @param a first character to check
68      * @param b second character to check
69      * @return {@code true} if a and b are equal, {@code false} otherwise.
70      */
equalsIgnoreCase(char a, char b)71     public static boolean equalsIgnoreCase(char a, char b) {
72         // Some language, such as Turkish, need testing both cases.
73         return a == b
74                 || Character.toLowerCase(a) == Character.toLowerCase(b)
75                 || Character.toUpperCase(a) == Character.toUpperCase(b);
76     }
77 
78     /**
79      * Returns true if a and b are equal ignoring the case of the characters, including if they are
80      * both null.
81      * @param a first CharSequence to check
82      * @param b second CharSequence to check
83      * @return {@code true} if a and b are equal, {@code false} otherwise.
84      */
equalsIgnoreCase(CharSequence a, CharSequence b)85     public static boolean equalsIgnoreCase(CharSequence a, CharSequence b) {
86         if (a == b)
87             return true;  // including both a and b are null.
88         if (a == null || b == null)
89             return false;
90         final int length = a.length();
91         if (length != b.length())
92             return false;
93         for (int i = 0; i < length; i++) {
94             if (!equalsIgnoreCase(a.charAt(i), b.charAt(i)))
95                 return false;
96         }
97         return true;
98     }
99 
100     /**
101      * Returns true if a and b are equal ignoring the case of the characters, including if a is null
102      * and b is zero length.
103      * @param a CharSequence to check
104      * @param b character array to check
105      * @param offset start offset of array b
106      * @param length length of characters in array b
107      * @return {@code true} if a and b are equal, {@code false} otherwise.
108      * @throws IndexOutOfBoundsException
109      *   if {@code offset < 0 || length < 0 || offset + length > data.length}.
110      * @throws NullPointerException if {@code b == null}.
111      */
equalsIgnoreCase(CharSequence a, char[] b, int offset, int length)112     public static boolean equalsIgnoreCase(CharSequence a, char[] b, int offset, int length) {
113         if (offset < 0 || length < 0 || length > b.length - offset)
114             throw new IndexOutOfBoundsException("array.length=" + b.length + " offset=" + offset
115                     + " length=" + length);
116         if (a == null)
117             return length == 0;  // including a is null and b is zero length.
118         if (a.length() != length)
119             return false;
120         for (int i = 0; i < length; i++) {
121             if (!equalsIgnoreCase(a.charAt(i), b[offset + i]))
122                 return false;
123         }
124         return true;
125     }
126 
127     /**
128      * Remove duplicates from an array of strings.
129      *
130      * This method will always keep the first occurrence of all strings at their position
131      * in the array, removing the subsequent ones.
132      */
removeDupes(final ArrayList<CharSequence> suggestions)133     public static void removeDupes(final ArrayList<CharSequence> suggestions) {
134         if (suggestions.size() < 2) return;
135         int i = 1;
136         // Don't cache suggestions.size(), since we may be removing items
137         while (i < suggestions.size()) {
138             final CharSequence cur = suggestions.get(i);
139             // Compare each suggestion with each previous suggestion
140             for (int j = 0; j < i; j++) {
141                 CharSequence previous = suggestions.get(j);
142                 if (TextUtils.equals(cur, previous)) {
143                     suggestions.remove(i);
144                     i--;
145                     break;
146                 }
147             }
148             i++;
149         }
150     }
151 
toTitleCase(String s, Locale locale)152     public static String toTitleCase(String s, Locale locale) {
153         if (s.length() <= 1) {
154             // TODO: is this really correct? Shouldn't this be s.toUpperCase()?
155             return s;
156         }
157         // TODO: fix the bugs below
158         // - This does not work for Greek, because it returns upper case instead of title case.
159         // - It does not work for Serbian, because it fails to account for the "lj" character,
160         // which should be "Lj" in title case and "LJ" in upper case.
161         // - It does not work for Dutch, because it fails to account for the "ij" digraph, which
162         // are two different characters but both should be capitalized as "IJ" as if they were
163         // a single letter.
164         // - It also does not work with unicode surrogate code points.
165         return s.toUpperCase(locale).charAt(0) + s.substring(1);
166     }
167 
toCodePointArray(final String string)168     public static int[] toCodePointArray(final String string) {
169         final char[] characters = string.toCharArray();
170         final int length = characters.length;
171         final int[] codePoints = new int[Character.codePointCount(characters, 0, length)];
172         if (length <= 0) {
173             return new int[0];
174         }
175         int codePoint = Character.codePointAt(characters, 0);
176         int dsti = 0;
177         for (int srci = Character.charCount(codePoint);
178                 srci < length; srci += Character.charCount(codePoint), ++dsti) {
179             codePoints[dsti] = codePoint;
180             codePoint = Character.codePointAt(characters, srci);
181         }
182         codePoints[dsti] = codePoint;
183         return codePoints;
184     }
185 
186     /**
187      * Determine what caps mode should be in effect at the current offset in
188      * the text. Only the mode bits set in <var>reqModes</var> will be
189      * checked. Note that the caps mode flags here are explicitly defined
190      * to match those in {@link InputType}.
191      *
192      * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
193      * issues). This will change in the future as we simplify the code for our use and fix bugs.
194      *
195      * @param cs The text that should be checked for caps modes.
196      * @param reqModes The modes to be checked: may be any combination of
197      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
198      * {@link TextUtils#CAP_MODE_SENTENCES}.
199      * @param locale The locale to consider for capitalization rules
200      * @param hasSpaceBefore Whether we should consider there is a space inserted at the end of cs
201      *
202      * @return Returns the actual capitalization modes that can be in effect
203      * at the current position, which is any combination of
204      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
205      * {@link TextUtils#CAP_MODE_SENTENCES}.
206      */
getCapsMode(final CharSequence cs, final int reqModes, final Locale locale, final boolean hasSpaceBefore)207     public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale,
208             final boolean hasSpaceBefore) {
209         // Quick description of what we want to do:
210         // CAP_MODE_CHARACTERS is always on.
211         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
212         // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
213         //   of a sentence just before that.
214         // We ignore opening parentheses and the like just before the cursor for purposes of
215         // finding whitespace for WORDS and SENTENCES modes.
216         // The end of a sentence ends with a period, question mark or exclamation mark. If it's
217         // a period, it also needs not to be an abbreviation, which means it also needs to either
218         // be immediately preceded by punctuation, or by a string of only letters with single
219         // periods interleaved.
220 
221         // Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
222         if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
223             // Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
224             // evaluated MODE_CHARACTERS, we can return.
225             return TextUtils.CAP_MODE_CHARACTERS & reqModes;
226         }
227 
228         // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
229         // opening parentheses, brackets, opening quotes, everything that *opens* a span of
230         // text in the linguistic sense. In RTL languages, this is still an opening sign, although
231         // it may look like a right parenthesis for example. We also include double quote and
232         // single quote since they aren't start punctuation in the unicode sense, but should still
233         // be skipped for English. TODO: does this depend on the language?
234         int i;
235         if (hasSpaceBefore) {
236             i = cs.length() + 1;
237         } else {
238             for (i = cs.length(); i > 0; i--) {
239                 final char c = cs.charAt(i - 1);
240                 if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
241                         && Character.getType(c) != Character.START_PUNCTUATION) {
242                     break;
243                 }
244             }
245         }
246 
247         // We are now on the character that precedes any starting punctuation, so in the most
248         // frequent case this will be whitespace or a letter, although it may occasionally be a
249         // start of line, or some symbol.
250 
251         // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
252         // we go back over any space or tab char sitting there. We find the start of a paragraph
253         // if the first char that's not a space or tab is a start of line (as in \n, start of text,
254         // or some other similar characters).
255         int j = i;
256         char prevChar = Keyboard.CODE_SPACE;
257         if (hasSpaceBefore) --j;
258         while (j > 0) {
259             prevChar = cs.charAt(j - 1);
260             if (!Character.isSpaceChar(prevChar) && prevChar != Keyboard.CODE_TAB) break;
261             j--;
262         }
263         if (j <= 0 || Character.isWhitespace(prevChar)) {
264             // There are only spacing chars between the start of the paragraph and the cursor,
265             // defined as a isWhitespace() char that is neither a isSpaceChar() nor a tab. Both
266             // MODE_WORDS and MODE_SENTENCES should be active.
267             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
268                     | TextUtils.CAP_MODE_SENTENCES) & reqModes;
269         }
270         if (i == j) {
271             // If we don't have whitespace before index i, it means neither MODE_WORDS
272             // nor mode sentences should be on so we can return right away.
273             return TextUtils.CAP_MODE_CHARACTERS & reqModes;
274         }
275         if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
276             // Here we know we have whitespace before the cursor (if not, we returned in the above
277             // if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
278             // MODE_SENTENCES so we can return right away.
279             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
280         }
281         // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
282         // we know that MODE_SENTENCES is being requested.
283 
284         // Step 4 : Search for MODE_SENTENCES.
285         // English is a special case in that "American typography" rules, which are the most common
286         // in English, state that a sentence terminator immediately following a quotation mark
287         // should be swapped with it and de-duplicated (included in the quotation mark),
288         // e.g. <<Did he say, "let's go home?">>
289         // No other language has such a rule as far as I know, instead putting inside the quotation
290         // mark as the exact thing quoted and handling the surrounding punctuation independently,
291         // e.g. <<Did he say, "let's go home"?>>
292         // Hence, specifically for English, we treat this special case here.
293         if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
294             for (; j > 0; j--) {
295                 // Here we look to go over any closing punctuation. This is because in dominant
296                 // variants of English, the final period is placed within double quotes and maybe
297                 // other closing punctuation signs. This is generally not true in other languages.
298                 final char c = cs.charAt(j - 1);
299                 if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
300                         && Character.getType(c) != Character.END_PUNCTUATION) {
301                     break;
302                 }
303             }
304         }
305 
306         if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
307         char c = cs.charAt(--j);
308 
309         // We found the next interesting chunk of text ; next we need to determine if it's the
310         // end of a sentence. If we have a question mark or an exclamation mark, it's the end of
311         // a sentence. If it's neither, the only remaining case is the period so we get the opposite
312         // case out of the way.
313         if (c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) {
314             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
315         }
316         if (c != Keyboard.CODE_PERIOD || j <= 0) {
317             return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
318         }
319 
320         // We found out that we have a period. We need to determine if this is a full stop or
321         // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
322         // looks like (\w\.){2,}
323         // To find out, we will have a simple state machine with the following states :
324         // START, WORD, PERIOD, ABBREVIATION
325         // On START : (just before the first period)
326         //           letter => WORD
327         //           whitespace => end with no caps (it was a stand-alone period)
328         //           otherwise => end with caps (several periods/symbols in a row)
329         // On WORD : (within the word just before the first period)
330         //           letter => WORD
331         //           period => PERIOD
332         //           otherwise => end with caps (it was a word with a full stop at the end)
333         // On PERIOD : (period within a potential abbreviation)
334         //           letter => LETTER
335         //           otherwise => end with caps (it was not an abbreviation)
336         // On LETTER : (letter within a potential abbreviation)
337         //           letter => LETTER
338         //           period => PERIOD
339         //           otherwise => end with no caps (it was an abbreviation)
340         // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
341         // should capitalize.
342 
343         final int START = 0;
344         final int WORD = 1;
345         final int PERIOD = 2;
346         final int LETTER = 3;
347         final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
348                 | TextUtils.CAP_MODE_SENTENCES) & reqModes;
349         final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
350         int state = START;
351         while (j > 0) {
352             c = cs.charAt(--j);
353             switch (state) {
354             case START:
355                 if (Character.isLetter(c)) {
356                     state = WORD;
357                 } else if (Character.isWhitespace(c)) {
358                     return noCaps;
359                 } else {
360                     return caps;
361                 }
362                 break;
363             case WORD:
364                 if (Character.isLetter(c)) {
365                     state = WORD;
366                 } else if (c == Keyboard.CODE_PERIOD) {
367                     state = PERIOD;
368                 } else {
369                     return caps;
370                 }
371                 break;
372             case PERIOD:
373                 if (Character.isLetter(c)) {
374                     state = LETTER;
375                 } else {
376                     return caps;
377                 }
378                 break;
379             case LETTER:
380                 if (Character.isLetter(c)) {
381                     state = LETTER;
382                 } else if (c == Keyboard.CODE_PERIOD) {
383                     state = PERIOD;
384                 } else {
385                     return noCaps;
386                 }
387             }
388         }
389         // Here we arrived at the start of the line. This should behave exactly like whitespace.
390         return (START == state || LETTER == state) ? noCaps : caps;
391     }
392 }
393