1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.launcher3.search; 18 19 import android.text.TextUtils; 20 21 import com.android.launcher3.util.IntArray; 22 23 import java.text.Collator; 24 import java.util.stream.IntStream; 25 26 /** 27 * Utilities for matching query string to target string. 28 */ 29 public class StringMatcherUtility { 30 31 private static final Character SPACE = ' '; 32 33 /** 34 * Returns {@code true} if {@code query} is a prefix of a substring in {@code target}. How to 35 * break target to valid substring is defined in the given {@code matcher}. 36 */ matches(String query, String target, StringMatcher matcher)37 public static boolean matches(String query, String target, StringMatcher matcher) { 38 int queryLength = query.length(); 39 40 int targetLength = target.length(); 41 42 if (targetLength < queryLength || queryLength <= 0) { 43 return false; 44 } 45 46 if (requestSimpleFuzzySearch(query)) { 47 return target.toLowerCase().contains(query); 48 } 49 50 int lastType; 51 int thisType = Character.UNASSIGNED; 52 int nextType = Character.getType(target.codePointAt(0)); 53 54 int end = targetLength - queryLength; 55 for (int i = 0; i <= end; i++) { 56 lastType = thisType; 57 thisType = nextType; 58 nextType = i < (targetLength - 1) 59 ? Character.getType(target.codePointAt(i + 1)) : Character.UNASSIGNED; 60 if (matcher.isBreak(thisType, lastType, nextType) 61 && matcher.matches(query, target.substring(i, i + queryLength))) { 62 return true; 63 } 64 } 65 return false; 66 } 67 68 /** 69 * Returns a list of breakpoints wherever the string contains a break. For example: 70 * "t-mobile" would have breakpoints at [0, 1] 71 * "Agar.io" would have breakpoints at [3, 4] 72 * "LEGO®Builder" would have a breakpoint at [4] 73 */ 74 public static IntArray getListOfBreakpoints(CharSequence input, StringMatcher matcher) { 75 int inputLength = input.length(); 76 if ((inputLength <= 2) || TextUtils.indexOf(input, SPACE) != -1) { 77 // when there is a space in the string, return a list where the elements are the 78 // position of the spaces - 1. This is to make the logic consistent where breakpoints 79 // are placed 80 return IntArray.wrap(IntStream.range(0, inputLength) 81 .filter(i -> input.charAt(i) == SPACE) 82 .map(i -> i - 1) 83 .toArray()); 84 } 85 IntArray listOfBreakPoints = new IntArray(); 86 int prevType; 87 int thisType = Character.getType(Character.codePointAt(input, 0)); 88 int nextType = Character.getType(Character.codePointAt(input, 1)); 89 for (int i = 1; i < inputLength; i++) { 90 prevType = thisType; 91 thisType = nextType; 92 nextType = i < (inputLength - 1) 93 ? Character.getType(Character.codePointAt(input, i + 1)) 94 : Character.UNASSIGNED; 95 if (matcher.isBreak(thisType, prevType, nextType)) { 96 // breakpoint is at previous 97 listOfBreakPoints.add(i-1); 98 } 99 } 100 return listOfBreakPoints; 101 } 102 103 /** 104 * Performs locale sensitive string comparison using {@link Collator}. 105 */ 106 public static class StringMatcher { 107 108 private static final char MAX_UNICODE = '\uFFFF'; 109 110 private final Collator mCollator; 111 112 StringMatcher() { 113 // On android N and above, Collator uses ICU implementation which has a much better 114 // support for non-latin locales. 115 mCollator = Collator.getInstance(); 116 mCollator.setStrength(Collator.PRIMARY); 117 mCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 118 } 119 120 /** 121 * Returns true if {@param query} is a prefix of {@param target} 122 */ 123 public boolean matches(String query, String target) { 124 switch (mCollator.compare(query, target)) { 125 case 0: 126 return true; 127 case -1: 128 // The target string can contain a modifier which would make it larger than 129 // the query string (even though the length is same). If the query becomes 130 // larger after appending a unicode character, it was originally a prefix of 131 // the target string and hence should match. 132 return mCollator.compare(query + MAX_UNICODE, target) > -1; 133 default: 134 return false; 135 } 136 } 137 getInstance()138 public static StringMatcher getInstance() { 139 return new StringMatcher(); 140 } 141 142 /** 143 * Returns true if the current point should be a break point. 144 * 145 * Following cases are considered as break points: 146 * 1) Any non space character after a space character 147 * 2) Any digit after a non-digit character 148 * 3) Any capital character after a digit or small character 149 * 4) Any capital character before a small character 150 * 151 * E.g., "YouTube" matches the input "you" and "tube", but not "out". 152 */ isBreak(int thisType, int prevType, int nextType)153 protected boolean isBreak(int thisType, int prevType, int nextType) { 154 switch (prevType) { 155 case Character.UNASSIGNED: 156 case Character.SPACE_SEPARATOR: 157 case Character.LINE_SEPARATOR: 158 case Character.PARAGRAPH_SEPARATOR: 159 return true; 160 } 161 switch (thisType) { 162 case Character.UPPERCASE_LETTER: 163 // takes care of the case where there are consistent uppercase letters as well 164 // as a special symbol following the capitalize letters for example: LEGO® 165 if (nextType != Character.UPPERCASE_LETTER && nextType != Character.OTHER_SYMBOL 166 && nextType != Character.DECIMAL_DIGIT_NUMBER 167 && nextType != Character.UNASSIGNED) { 168 return true; 169 } 170 // Follow through 171 case Character.TITLECASE_LETTER: 172 // Break point if previous was not a upper case 173 return prevType != Character.UPPERCASE_LETTER; 174 case Character.LOWERCASE_LETTER: 175 // Break point if previous was not a letter. 176 return prevType > Character.OTHER_LETTER || prevType <= Character.UNASSIGNED; 177 case Character.DECIMAL_DIGIT_NUMBER: 178 case Character.LETTER_NUMBER: 179 case Character.OTHER_NUMBER: 180 // Break point if previous was not a number 181 return !(prevType == Character.DECIMAL_DIGIT_NUMBER 182 || prevType == Character.LETTER_NUMBER 183 || prevType == Character.OTHER_NUMBER); 184 case Character.MATH_SYMBOL: 185 case Character.CURRENCY_SYMBOL: 186 case Character.OTHER_PUNCTUATION: 187 case Character.DASH_PUNCTUATION: 188 // Always a break point for a symbol 189 return true; 190 default: 191 return false; 192 } 193 } 194 } 195 196 /** 197 * Subclass of {@code StringMatcher} using simple space break for prefix matching. 198 * E.g., "YouTube" matches the input "you". "Play Store" matches the input "play". 199 */ 200 public static class StringMatcherSpace extends StringMatcher { 201 getInstance()202 public static StringMatcherSpace getInstance() { 203 return new StringMatcherSpace(); 204 } 205 206 /** 207 * The first character or any character after a space is considered as a break point. 208 * Returns true if the current point should be a break point. 209 */ 210 @Override isBreak(int thisType, int prevType, int nextType)211 protected boolean isBreak(int thisType, int prevType, int nextType) { 212 return prevType == Character.UNASSIGNED || prevType == Character.SPACE_SEPARATOR; 213 } 214 } 215 216 /** 217 * Matching optimization to search in Chinese. 218 */ requestSimpleFuzzySearch(String s)219 private static boolean requestSimpleFuzzySearch(String s) { 220 for (int i = 0; i < s.length(); ) { 221 int codepoint = s.codePointAt(i); 222 i += Character.charCount(codepoint); 223 switch (Character.UnicodeScript.of(codepoint)) { 224 case HAN: 225 //Character.UnicodeScript.HAN: use String.contains to match 226 return true; 227 } 228 } 229 return false; 230 } 231 } 232