• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.launcher3.search;
18 
19 import android.text.TextUtils;
20 
21 import com.android.launcher3.util.IntArray;
22 
23 import java.text.Collator;
24 import java.util.stream.IntStream;
25 
26 /**
27  * Utilities for matching query string to target string.
28  */
29 public class StringMatcherUtility {
30 
31     private static final Character SPACE = ' ';
32 
33     /**
34      * Returns {@code true} if {@code query} is a prefix of a substring in {@code target}. How to
35      * break target to valid substring is defined in the given {@code matcher}.
36      */
matches(String query, String target, StringMatcher matcher)37     public static boolean matches(String query, String target, StringMatcher matcher) {
38         int queryLength = query.length();
39 
40         int targetLength = target.length();
41 
42         if (targetLength < queryLength || queryLength <= 0) {
43             return false;
44         }
45 
46         if (requestSimpleFuzzySearch(query)) {
47             return target.toLowerCase().contains(query);
48         }
49 
50         int lastType;
51         int thisType = Character.UNASSIGNED;
52         int nextType = Character.getType(target.codePointAt(0));
53 
54         int end = targetLength - queryLength;
55         for (int i = 0; i <= end; i++) {
56             lastType = thisType;
57             thisType = nextType;
58             nextType = i < (targetLength - 1)
59                     ? Character.getType(target.codePointAt(i + 1)) : Character.UNASSIGNED;
60             if (matcher.isBreak(thisType, lastType, nextType)
61                     && matcher.matches(query, target.substring(i, i + queryLength))) {
62                 return true;
63             }
64         }
65         return false;
66     }
67 
68     /**
69      * Returns a list of breakpoints wherever the string contains a break. For example:
70      * "t-mobile" would have breakpoints at [0, 1]
71      * "Agar.io" would have breakpoints at [3, 4]
72      * "LEGO®Builder" would have a breakpoint at [4]
73      */
74     public static IntArray getListOfBreakpoints(CharSequence input, StringMatcher matcher) {
75         int inputLength = input.length();
76         if ((inputLength <= 2) || TextUtils.indexOf(input, SPACE) != -1) {
77             // when there is a space in the string, return a list where the elements are the
78             // position of the spaces - 1. This is to make the logic consistent where breakpoints
79             // are placed
80             return IntArray.wrap(IntStream.range(0, inputLength)
81                     .filter(i -> input.charAt(i) == SPACE)
82                     .map(i -> i - 1)
83                     .toArray());
84         }
85         IntArray listOfBreakPoints = new IntArray();
86         int prevType;
87         int thisType = Character.getType(Character.codePointAt(input, 0));
88         int nextType = Character.getType(Character.codePointAt(input, 1));
89         for (int i = 1; i < inputLength; i++) {
90             prevType = thisType;
91             thisType = nextType;
92             nextType = i < (inputLength - 1)
93                     ? Character.getType(Character.codePointAt(input, i + 1))
94                     : Character.UNASSIGNED;
95             if (matcher.isBreak(thisType, prevType, nextType)) {
96                 // breakpoint is at previous
97                 listOfBreakPoints.add(i-1);
98             }
99         }
100         return listOfBreakPoints;
101     }
102 
103     /**
104      * Performs locale sensitive string comparison using {@link Collator}.
105      */
106     public static class StringMatcher {
107 
108         private static final char MAX_UNICODE = '\uFFFF';
109 
110         private final Collator mCollator;
111 
112         StringMatcher() {
113             // On android N and above, Collator uses ICU implementation which has a much better
114             // support for non-latin locales.
115             mCollator = Collator.getInstance();
116             mCollator.setStrength(Collator.PRIMARY);
117             mCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
118         }
119 
120         /**
121          * Returns true if {@param query} is a prefix of {@param target}
122          */
123         public boolean matches(String query, String target) {
124             switch (mCollator.compare(query, target)) {
125                 case 0:
126                     return true;
127                 case -1:
128                     // The target string can contain a modifier which would make it larger than
129                     // the query string (even though the length is same). If the query becomes
130                     // larger after appending a unicode character, it was originally a prefix of
131                     // the target string and hence should match.
132                     return mCollator.compare(query + MAX_UNICODE, target) > -1;
133                 default:
134                     return false;
135             }
136         }
137 
getInstance()138         public static StringMatcher getInstance() {
139             return new StringMatcher();
140         }
141 
142         /**
143          * Returns true if the current point should be a break point.
144          *
145          * Following cases are considered as break points:
146          *     1) Any non space character after a space character
147          *     2) Any digit after a non-digit character
148          *     3) Any capital character after a digit or small character
149          *     4) Any capital character before a small character
150          *
151          * E.g., "YouTube" matches the input "you" and "tube", but not "out".
152          */
isBreak(int thisType, int prevType, int nextType)153         protected boolean isBreak(int thisType, int prevType, int nextType) {
154             switch (prevType) {
155                 case Character.UNASSIGNED:
156                 case Character.SPACE_SEPARATOR:
157                 case Character.LINE_SEPARATOR:
158                 case Character.PARAGRAPH_SEPARATOR:
159                     return true;
160             }
161             switch (thisType) {
162                 case Character.UPPERCASE_LETTER:
163                     // takes care of the case where there are consistent uppercase letters as well
164                     // as a special symbol following the capitalize letters for example: LEGO®
165                     if (nextType != Character.UPPERCASE_LETTER && nextType != Character.OTHER_SYMBOL
166                             && nextType != Character.DECIMAL_DIGIT_NUMBER
167                             && nextType != Character.UNASSIGNED) {
168                         return true;
169                     }
170                     // Follow through
171                 case Character.TITLECASE_LETTER:
172                     // Break point if previous was not a upper case
173                     return prevType != Character.UPPERCASE_LETTER;
174                 case Character.LOWERCASE_LETTER:
175                     // Break point if previous was not a letter.
176                     return prevType > Character.OTHER_LETTER || prevType <= Character.UNASSIGNED;
177                 case Character.DECIMAL_DIGIT_NUMBER:
178                 case Character.LETTER_NUMBER:
179                 case Character.OTHER_NUMBER:
180                     // Break point if previous was not a number
181                     return !(prevType == Character.DECIMAL_DIGIT_NUMBER
182                             || prevType == Character.LETTER_NUMBER
183                             || prevType == Character.OTHER_NUMBER);
184                 case Character.MATH_SYMBOL:
185                 case Character.CURRENCY_SYMBOL:
186                 case Character.OTHER_PUNCTUATION:
187                 case Character.DASH_PUNCTUATION:
188                     // Always a break point for a symbol
189                     return true;
190                 default:
191                     return  false;
192             }
193         }
194     }
195 
196     /**
197      * Subclass of {@code StringMatcher} using simple space break for prefix matching.
198      * E.g., "YouTube" matches the input "you". "Play Store" matches the input "play".
199      */
200     public static class StringMatcherSpace extends StringMatcher {
201 
getInstance()202         public static StringMatcherSpace getInstance() {
203             return new StringMatcherSpace();
204         }
205 
206         /**
207          * The first character or any character after a space is considered as a break point.
208          * Returns true if the current point should be a break point.
209          */
210         @Override
isBreak(int thisType, int prevType, int nextType)211         protected boolean isBreak(int thisType, int prevType, int nextType) {
212             return prevType == Character.UNASSIGNED || prevType == Character.SPACE_SEPARATOR;
213         }
214     }
215 
216     /**
217      * Matching optimization to search in Chinese.
218      */
requestSimpleFuzzySearch(String s)219     private static boolean requestSimpleFuzzySearch(String s) {
220         for (int i = 0; i < s.length(); ) {
221             int codepoint = s.codePointAt(i);
222             i += Character.charCount(codepoint);
223             switch (Character.UnicodeScript.of(codepoint)) {
224                 case HAN:
225                     //Character.UnicodeScript.HAN: use String.contains to match
226                     return true;
227             }
228         }
229         return false;
230     }
231 }
232