1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.ext.services.notification; 18 19 import static android.app.Notification.CATEGORY_EMAIL; 20 import static android.app.Notification.CATEGORY_MESSAGE; 21 import static android.app.Notification.CATEGORY_SOCIAL; 22 import static android.app.Notification.EXTRA_BIG_TEXT; 23 import static android.app.Notification.EXTRA_MESSAGES; 24 import static android.app.Notification.EXTRA_SUB_TEXT; 25 import static android.app.Notification.EXTRA_SUMMARY_TEXT; 26 import static android.app.Notification.EXTRA_TEXT; 27 import static android.app.Notification.EXTRA_TEXT_LINES; 28 import static android.app.Notification.EXTRA_TITLE; 29 import static android.app.Notification.EXTRA_TITLE_BIG; 30 import static android.os.Build.VERSION.SDK_INT; 31 32 import static java.lang.String.format; 33 34 import android.annotation.SuppressLint; 35 import android.app.Notification; 36 import android.app.Notification.MessagingStyle; 37 import android.app.Notification.MessagingStyle.Message; 38 import android.icu.util.ULocale; 39 import android.os.Build; 40 import android.os.Bundle; 41 import android.os.Parcelable; 42 import android.util.ArrayMap; 43 import android.view.textclassifier.TextClassifier; 44 import android.view.textclassifier.TextLanguage; 45 46 import androidx.annotation.Nullable; 47 import androidx.annotation.RequiresApi; 48 import androidx.annotation.VisibleForTesting; 49 50 import java.util.ArrayList; 51 import java.util.Arrays; 52 import java.util.List; 53 import java.util.Objects; 54 import java.util.regex.Matcher; 55 import java.util.regex.Pattern; 56 57 /** 58 * Class with helper methods related to detecting OTP codes in notifications. 59 * This file needs to only use public android API methods, see b/361149088 60 */ 61 @SuppressLint("ObsoleteSdkInt") 62 @RequiresApi(Build.VERSION_CODES.VANILLA_ICE_CREAM) 63 public class NotificationOtpDetectionHelper { 64 65 // Use an ArrayList because a List.of list will throw NPE when calling "contains(null)" 66 private static final List<String> SENSITIVE_NOTIFICATION_CATEGORIES = 67 Arrays.asList(CATEGORY_MESSAGE, CATEGORY_EMAIL, CATEGORY_SOCIAL); 68 69 private static final List<String> SENSITIVE_STYLES = 70 Arrays.asList( 71 Notification.MessagingStyle.class.getName(), 72 Notification.InboxStyle.class.getName(), 73 Notification.BigTextStyle.class.getName() 74 ); 75 76 private static final List<String> EXCLUDED_STYLES = 77 Arrays.asList( 78 Notification.MediaStyle.class.getName(), 79 Notification.BigPictureStyle.class.getName(), 80 Notification.CallStyle.class.getName() 81 ); 82 83 private static final int PATTERN_FLAGS = 84 Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.MULTILINE; 85 compileToRegex(String pattern)86 private static ThreadLocal<Matcher> compileToRegex(String pattern) { 87 return ThreadLocal.withInitial(() -> Pattern.compile(pattern, PATTERN_FLAGS).matcher("")); 88 } 89 90 private static final float TC_THRESHOLD = 0.6f; 91 92 private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX = 93 new ArrayMap<>(); 94 95 private static final int MAX_SENSITIVE_TEXT_LEN = 600; 96 97 /** 98 * A regex matching a line start, open paren, arrow, colon (not proceeded by a digit), 99 * open square bracket, equals sign, double or single quote, ideographic char, or a space that 100 * is not preceded by a number. It will not consume the start char (meaning START won't be 101 * included in the matched string) 102 */ 103 private static final String START = 104 "(^|(?<=((^|[^0-9])\\s)|[>(\"'=\\[\\p{IsIdeographic}]|[^0-9]:))"; 105 106 107 /** 108 * One single OTP char. A number or alphabetical char (that isn't also ideographic) 109 */ 110 private static final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]])"; 111 112 /** 113 * One OTP char, followed by an optional dash 114 */ 115 private static final String OTP_CHAR_WITH_DASH = format("(%s-?)", OTP_CHAR); 116 117 /** 118 * Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential 119 * OTP code contains at least one number 120 */ 121 private static final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR_WITH_DASH); 122 123 /** 124 * Matches between 5 and 8 otp chars, with dashes in between. Here, we are assuming an OTP code 125 * is 5-8 characters long. The last char must not be followed by a dash 126 */ 127 private static final String OTP_CHARS = format("(%s{4,7}%s)", OTP_CHAR_WITH_DASH, OTP_CHAR); 128 129 /** 130 * A regex matching a line end, a space that is not followed by a number, an ideographic char, 131 * or a period, close paren, close square bracket, single or double quote, exclamation point, 132 * question mark, or comma. It will not consume the end char 133 */ 134 private static final String END = "(?=\\s[^0-9]|$|\\p{IsIdeographic}|[.?!,)'\\]\"])"; 135 136 /** 137 * A regex matching four digit numerical codes 138 */ 139 private static final String FOUR_DIGITS = "(\\d{4})"; 140 141 private static final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM = 142 format("(%s%s)", FIND_DIGIT, OTP_CHARS); 143 144 /** 145 * A regex matching two pairs of 3 digits (ex "123 456") 146 */ 147 private static final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})"; 148 149 /** 150 * Combining the regular expressions above, we get an OTP regex: 151 * 1. search for START, THEN 152 * 2. match ONE of 153 * a. alphanumeric sequence, at least one number, length 5-8, with optional dashes 154 * b. 4 numbers in a row 155 * c. pair of 3 digit codes separated by a space 156 * THEN 157 * 3. search for END Ex: 158 * "6454", " 345 678.", "[YDT-456]" 159 */ 160 private static final String ALL_OTP = 161 format("%s(%s|%s|%s)%s", 162 START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS, 163 SIX_DIGITS_WITH_SPACE, END); 164 165 166 167 private static final ThreadLocal<Matcher> OTP_REGEX = compileToRegex(ALL_OTP); 168 /** 169 * A Date regular expression. Looks for dates with the month, day, and year separated by dashes. 170 * Handles one and two digit months and days, and four or two-digit years. It makes the 171 * following assumptions: 172 * Dates and months will never be higher than 39 173 * If a four digit year is used, the leading digit will be 1 or 2 174 */ 175 private static final String DATE_WITH_DASHES = "([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)"; 176 177 /** 178 * matches a ten digit phone number, when the area code is separated by a space or dash. 179 * Supports optional parentheses around the area code, and an optional dash or space in between 180 * the rest of the numbers. 181 * This format registers as an otp match due to the space between the area code and the rest, 182 * but shouldn't. 183 */ 184 private static final String PHONE_WITH_SPACE = "(\\(?\\d{3}\\)?(-|\\s)?\\d{3}(-|\\s)?\\d{4})"; 185 186 /** 187 * A combination of common false positives. These matches are expected to be longer than (or 188 * equal in length to) otp matches, and are always run, even if we have a language specific 189 * regex 190 */ 191 private static final ThreadLocal<Matcher> FALSE_POSITIVE_LONGER_REGEX = 192 compileToRegex(format("%s(%s|%s)%s", START, DATE_WITH_DASHES, PHONE_WITH_SPACE, END)); 193 194 /** 195 * A regex matching the common years of 19xx and 20xx. Used for false positive reduction 196 */ 197 private static final String COMMON_YEARS = format("%s((19|20)\\d\\d)%s", START, END); 198 199 /** 200 * A regex matching three lower case letters. Used for false positive reduction, as no known 201 * OTPs have 3 lowercase letters in sequence. 202 */ 203 private static final String THREE_LOWERCASE = "(\\p{Ll}{3})"; 204 205 /** 206 * A combination of common false positives. Run in cases where we don't have a language specific 207 * regular expression. These matches are expect to be shorter than (or equal in length to) otp 208 * matches 209 */ 210 private static final ThreadLocal<Matcher> FALSE_POSITIVE_SHORTER_REGEX = 211 compileToRegex(format("%s|%s", COMMON_YEARS, THREE_LOWERCASE)); 212 213 /** 214 * A list of regular expressions representing words found in an OTP context (non case sensitive) 215 * Note: TAN is short for Transaction Authentication Number 216 */ 217 private static final String[] ENGLISH_CONTEXT_WORDS = new String[] { 218 "pin", "pass[-\\s]?(code|word)", "TAN", "otp", "2fa", "(two|2)[-\\s]?factor", 219 "log[-\\s]?in", "auth(enticat(e|ion))?", "code", "secret", "verif(y|ication)", 220 "one(\\s|-)?time", "access", "validat(e|ion)" 221 }; 222 223 /** 224 * Creates a regular expression to match any of a series of individual words, case insensitive. 225 * It also verifies the position of the word, relative to the OTP match 226 */ createDictionaryRegex(String[] words)227 private static ThreadLocal<Matcher> createDictionaryRegex(String[] words) { 228 StringBuilder regex = new StringBuilder("("); 229 for (int i = 0; i < words.length; i++) { 230 regex.append(findContextWordWithCode(words[i])); 231 if (i != words.length - 1) { 232 regex.append("|"); 233 } 234 } 235 regex.append(")"); 236 return compileToRegex(regex.toString()); 237 } 238 239 /** 240 * Creates a regular expression that will find a context word, if that word occurs in the 241 * sentence preceding an OTP, or in the same sentence as an OTP (before or after). In both 242 * cases, the context word must occur within 50 characters of the suspected OTP 243 * @param contextWord The context word we expect to find around the OTP match 244 * @return A string representing a regular expression that will determine if we found a context 245 * word occurring before an otp match, or after it, but in the same sentence. 246 */ findContextWordWithCode(String contextWord)247 private static String findContextWordWithCode(String contextWord) { 248 String boundedContext = "\\b" + contextWord + "\\b"; 249 // Asserts that we find the OTP code within 50 characters after the context word, with at 250 // most one sentence punctuation between the OTP code and the context word (i.e. they are 251 // in the same sentence, or the context word is in the previous sentence) 252 String contextWordBeforeOtpInSameOrPreviousSentence = 253 String.format("(%s(?=.{1,50}%s)[^.?!]*[.?!]?[^.?!]*%s)", 254 boundedContext, ALL_OTP, ALL_OTP); 255 // Asserts that we find the context word within 50 characters after the OTP code, with no 256 // sentence punctuation between the OTP code and the context word (i.e. they are in the same 257 // sentence) 258 String contextWordAfterOtpSameSentence = 259 String.format("(%s)[^.!?]{1,50}%s", ALL_OTP, boundedContext); 260 return String.format("(%s|%s)", contextWordBeforeOtpInSameOrPreviousSentence, 261 contextWordAfterOtpSameSentence); 262 } 263 264 static { ULocale.ENGLISH.toLanguageTag()265 EXTRA_LANG_OTP_REGEX.put(ULocale.ENGLISH.toLanguageTag(), 266 createDictionaryRegex(ENGLISH_CONTEXT_WORDS)); 267 } 268 isPreV()269 private static boolean isPreV() { 270 return SDK_INT < Build.VERSION_CODES.VANILLA_ICE_CREAM; 271 } 272 273 /** 274 * Checks if any text fields in a notification might contain an OTP, based on several 275 * regular expressions, and potentially using a textClassifier to eliminate false positives. 276 * Each text field will be examined individually. 277 * 278 * @param notification The notification whose content should be checked 279 * @param checkForFalsePositives If true, will ensure the content does not match the date regex. 280 * If a TextClassifier is provided, it will then try to find a 281 * language specific regex. If it is successful, it will use that 282 * regex to check for false positives. If it is not, it will use 283 * the TextClassifier (if provided), plus the year and three 284 * lowercase regexes to remove possible false positives. 285 * @param tc If non null, the provided TextClassifier will be used to find the language of the 286 * text, and look for a language-specific regex for it. If checkForFalsePositives is 287 * true will also use the classifier to find flight codes and addresses. 288 * @return True if we believe an OTP is in the message, false otherwise. 289 */ containsOtp(Notification notification, boolean checkForFalsePositives, @Nullable TextClassifier tc)290 public static boolean containsOtp(Notification notification, 291 boolean checkForFalsePositives, @Nullable TextClassifier tc) { 292 if (notification == null || notification.extras == null || isPreV()) { 293 return false; 294 } 295 296 // Get the language of the text once 297 ULocale textLocale = getLanguageWithRegex(getTextForDetection(notification), tc); 298 // Get all the individual fields 299 List<CharSequence> fields = getNotificationTextFields(notification); 300 for (CharSequence field : fields) { 301 if (field != null 302 && containsOtp(field.toString(), checkForFalsePositives, tc, textLocale)) { 303 return true; 304 } 305 } 306 307 return false; 308 } 309 310 /** 311 * Checks if a string of text might contain an OTP, based on several 312 * regular expressions, and potentially using a textClassifier to eliminate false positives 313 * 314 * @param sensitiveText The text whose content should be checked 315 * @param checkForFalsePositives If true, will ensure the content does not match the date regex. 316 * If a TextClassifier is provided, it will then try to find a 317 * language specific regex. If it is successful, it will use that 318 * regex to check for false positives. If it is not, it will use 319 * the TextClassifier (if provided), plus the year and three 320 * lowercase regexes to remove possible false positives. 321 * @param tc If non null, the provided TextClassifier will be used to find the language of the 322 * text, and look for a language-specific regex for it. If checkForFalsePositives is 323 * true will also use the classifier to find flight codes and addresses. 324 * @param language If non null, then the TextClassifier (if provided), will not perform language 325 * id, and the system will assume the text is in the specified language 326 * @return True if we believe an OTP is in the message, false otherwise. 327 */ containsOtp(String sensitiveText, boolean checkForFalsePositives, @Nullable TextClassifier tc, @Nullable ULocale language)328 public static boolean containsOtp(String sensitiveText, 329 boolean checkForFalsePositives, @Nullable TextClassifier tc, 330 @Nullable ULocale language) { 331 if (sensitiveText == null || isPreV()) { 332 return false; 333 } 334 335 Matcher otpMatcher = OTP_REGEX.get(); 336 otpMatcher.reset(sensitiveText); 337 boolean otpMatch = otpMatcher.find(); 338 if (!checkForFalsePositives || !otpMatch) { 339 return otpMatch; 340 } 341 342 if (allOtpMatchesAreFalsePositives( 343 sensitiveText, FALSE_POSITIVE_LONGER_REGEX.get(), true)) { 344 return false; 345 } 346 347 if (tc != null || language != null) { 348 if (language == null) { 349 language = getLanguageWithRegex(sensitiveText, tc); 350 } 351 Matcher languageSpecificMatcher = language != null 352 ? EXTRA_LANG_OTP_REGEX.get(language.toLanguageTag()).get() : null; 353 if (languageSpecificMatcher != null) { 354 languageSpecificMatcher.reset(sensitiveText); 355 // Only use the language-specific regex for false positives 356 return languageSpecificMatcher.find(); 357 } 358 // Only check for OTPs when there is a language specific matcher 359 return false; 360 } 361 362 return !allOtpMatchesAreFalsePositives(sensitiveText, FALSE_POSITIVE_SHORTER_REGEX.get(), 363 false); 364 } 365 366 /** 367 * Checks that a given text has at least one match for one regex, that doesn't match another 368 * @param text The full text to check 369 * @param falsePositiveRegex A regex that should not match the OTP regex (for at least one match 370 * found by the OTP regex). The false positive regex matches may be 371 * longer or shorter than the OTP matches. 372 * @param fpMatchesAreLongerThanOtp Whether the false positives are longer than the otp matches. 373 * If true, this method will search the whole text for false 374 * positives, and verify at least one OTP match is not 375 * contained by any of the false positives. If false, then this 376 * method will search individual OTP matches for false 377 * positives, and will verify at least one OTP match doesn't 378 * contain a false positive. 379 * @return true, if all matches found by OTP_REGEX are contained in, or themselves contain a 380 * match to falsePositiveRegex, or there are no OTP matches, false otherwise. 381 */ allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex, boolean fpMatchesAreLongerThanOtp)382 private static boolean allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex, 383 boolean fpMatchesAreLongerThanOtp) { 384 List<String> falsePositives = new ArrayList<>(); 385 if (fpMatchesAreLongerThanOtp) { 386 // if the false positives are longer than the otp, search for them in the whole text 387 falsePositives = getAllMatches(text, falsePositiveRegex); 388 } 389 List<String> otpMatches = getAllMatches(text, OTP_REGEX.get()); 390 for (String otpMatch: otpMatches) { 391 boolean otpMatchContainsNoFp = true; 392 boolean noFpContainsOtpMatch = true; 393 if (!fpMatchesAreLongerThanOtp) { 394 // if the false positives are shorter than the otp, search for them in the otp match 395 falsePositives = getAllMatches(otpMatch, falsePositiveRegex); 396 } 397 for (String falsePositive : falsePositives) { 398 otpMatchContainsNoFp = fpMatchesAreLongerThanOtp 399 || (otpMatchContainsNoFp && !otpMatch.contains(falsePositive)); 400 noFpContainsOtpMatch = !fpMatchesAreLongerThanOtp 401 || (noFpContainsOtpMatch && !falsePositive.contains(otpMatch)); 402 } 403 if (otpMatchContainsNoFp && noFpContainsOtpMatch) { 404 return false; 405 } 406 } 407 return true; 408 } 409 getAllMatches(String text, Matcher regex)410 private static List<String> getAllMatches(String text, Matcher regex) { 411 ArrayList<String> matches = new ArrayList<>(); 412 regex.reset(text); 413 while (regex.find()) { 414 matches.add(regex.group()); 415 } 416 return matches; 417 } 418 419 // Tries to determine the language of the given text. Will return the language with the highest 420 // confidence score that meets the minimum threshold, and has a language-specific regex, null 421 // otherwise 422 @Nullable getLanguageWithRegex(String text, @Nullable TextClassifier tc)423 private static ULocale getLanguageWithRegex(String text, 424 @Nullable TextClassifier tc) { 425 if (tc == null) { 426 return null; 427 } 428 429 float highestConfidence = 0; 430 ULocale highestConfidenceLocale = null; 431 TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build(); 432 TextLanguage lang = tc.detectLanguage(langRequest); 433 for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) { 434 ULocale locale = lang.getLocale(i); 435 float confidence = lang.getConfidenceScore(locale); 436 if (confidence >= TC_THRESHOLD && confidence >= highestConfidence 437 && EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) { 438 highestConfidence = confidence; 439 highestConfidenceLocale = locale; 440 } 441 } 442 return highestConfidenceLocale; 443 } 444 445 /** 446 * Gets the sections of text in a notification that should be checked for sensitive content. 447 * This includes the text, title, subtext, messages, and extra text lines. 448 * @param notification The notification whose content should be filtered 449 * @return The extracted text fields 450 */ 451 @VisibleForTesting getTextForDetection(Notification notification)452 protected static String getTextForDetection(Notification notification) { 453 if (notification == null || notification.extras == null || isPreV()) { 454 return ""; 455 } 456 StringBuilder builder = new StringBuilder(); 457 for (CharSequence line : getNotificationTextFields(notification)) { 458 builder.append(line != null ? line : "").append(" "); 459 } 460 return builder.length() <= MAX_SENSITIVE_TEXT_LEN ? builder.toString() 461 : builder.substring(0, MAX_SENSITIVE_TEXT_LEN); 462 } 463 getNotificationTextFields(Notification notification)464 protected static List<CharSequence> getNotificationTextFields(Notification notification) { 465 if (notification == null || notification.extras == null || isPreV()) { 466 return new ArrayList<>(); 467 } 468 ArrayList<CharSequence> fields = new ArrayList<>(); 469 Bundle extras = notification.extras; 470 fields.add(extras.getCharSequence(EXTRA_TITLE)); 471 fields.add(extras.getCharSequence(EXTRA_TEXT)); 472 fields.add(extras.getCharSequence(EXTRA_SUB_TEXT)); 473 fields.add(extras.getCharSequence(EXTRA_BIG_TEXT)); 474 fields.add(extras.getCharSequence(EXTRA_TITLE_BIG)); 475 fields.add(extras.getCharSequence(EXTRA_SUMMARY_TEXT)); 476 CharSequence[] textLines = extras.getCharSequenceArray(EXTRA_TEXT_LINES); 477 if (textLines != null) { 478 fields.addAll(Arrays.asList(textLines)); 479 } 480 List<Message> messages = Message.getMessagesFromBundleArray( 481 extras.getParcelableArray(EXTRA_MESSAGES, Parcelable.class)); 482 // Sort the newest messages (largest timestamp) first 483 messages.sort((MessagingStyle.Message lhs, MessagingStyle.Message rhs) -> 484 Long.compare(rhs.getTimestamp(), lhs.getTimestamp())); 485 for (MessagingStyle.Message message : messages) { 486 fields.add(message.getText()); 487 } 488 return fields; 489 } 490 491 /** 492 * Determines if a notification should be checked for an OTP, based on category, style, and 493 * possible otp content (as determined by a regular expression). 494 * @param notification The notification whose content should be checked 495 * @return true, if further checks for OTP codes should be performed, false otherwise 496 */ shouldCheckForOtp(Notification notification)497 public static boolean shouldCheckForOtp(Notification notification) { 498 if (notification == null || isPreV() 499 || EXCLUDED_STYLES.stream().anyMatch(s -> isStyle(notification, s))) { 500 return false; 501 } 502 return SENSITIVE_NOTIFICATION_CATEGORIES.contains(notification.category) 503 || SENSITIVE_STYLES.stream().anyMatch(s -> isStyle(notification, s)) 504 || containsOtp(notification, false, null) 505 || shouldCheckForOtp(notification.publicVersion); 506 } 507 isStyle(Notification notification, String styleClassName)508 private static boolean isStyle(Notification notification, String styleClassName) { 509 if (notification.extras == null) { 510 return false; 511 } 512 String templateClass = notification.extras.getString(Notification.EXTRA_TEMPLATE); 513 return Objects.equals(templateClass, styleClassName); 514 } 515 NotificationOtpDetectionHelper()516 private NotificationOtpDetectionHelper() { } 517 } 518