• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.ext.services.notification;
18 
19 import static android.app.Notification.CATEGORY_EMAIL;
20 import static android.app.Notification.CATEGORY_MESSAGE;
21 import static android.app.Notification.CATEGORY_SOCIAL;
22 import static android.app.Notification.EXTRA_BIG_TEXT;
23 import static android.app.Notification.EXTRA_MESSAGES;
24 import static android.app.Notification.EXTRA_SUB_TEXT;
25 import static android.app.Notification.EXTRA_SUMMARY_TEXT;
26 import static android.app.Notification.EXTRA_TEXT;
27 import static android.app.Notification.EXTRA_TEXT_LINES;
28 import static android.app.Notification.EXTRA_TITLE;
29 import static android.app.Notification.EXTRA_TITLE_BIG;
30 import static android.os.Build.VERSION.SDK_INT;
31 
32 import static java.lang.String.format;
33 
34 import android.annotation.SuppressLint;
35 import android.app.Notification;
36 import android.app.Notification.MessagingStyle;
37 import android.app.Notification.MessagingStyle.Message;
38 import android.icu.util.ULocale;
39 import android.os.Build;
40 import android.os.Bundle;
41 import android.os.Parcelable;
42 import android.util.ArrayMap;
43 import android.view.textclassifier.TextClassifier;
44 import android.view.textclassifier.TextLanguage;
45 
46 import androidx.annotation.Nullable;
47 import androidx.annotation.RequiresApi;
48 import androidx.annotation.VisibleForTesting;
49 
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.List;
53 import java.util.Objects;
54 import java.util.regex.Matcher;
55 import java.util.regex.Pattern;
56 
57 /**
58  * Class with helper methods related to detecting OTP codes in notifications.
59  * This file needs to only use public android API methods, see b/361149088
60  */
61 @SuppressLint("ObsoleteSdkInt")
62 @RequiresApi(Build.VERSION_CODES.VANILLA_ICE_CREAM)
63 public class NotificationOtpDetectionHelper {
64 
65     // Use an ArrayList because a List.of list will throw NPE when calling "contains(null)"
66     private static final List<String> SENSITIVE_NOTIFICATION_CATEGORIES =
67             Arrays.asList(CATEGORY_MESSAGE, CATEGORY_EMAIL, CATEGORY_SOCIAL);
68 
69     private static final List<String> SENSITIVE_STYLES =
70             Arrays.asList(
71                     Notification.MessagingStyle.class.getName(),
72                     Notification.InboxStyle.class.getName(),
73                     Notification.BigTextStyle.class.getName()
74             );
75 
76     private static final List<String> EXCLUDED_STYLES =
77             Arrays.asList(
78                     Notification.MediaStyle.class.getName(),
79                     Notification.BigPictureStyle.class.getName(),
80                     Notification.CallStyle.class.getName()
81             );
82 
83     private static final int PATTERN_FLAGS =
84             Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.MULTILINE;
85 
compileToRegex(String pattern)86     private static ThreadLocal<Matcher> compileToRegex(String pattern) {
87         return ThreadLocal.withInitial(() -> Pattern.compile(pattern, PATTERN_FLAGS).matcher(""));
88     }
89 
90     private static final float TC_THRESHOLD = 0.6f;
91 
92     private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX =
93             new ArrayMap<>();
94 
95     private static final int MAX_SENSITIVE_TEXT_LEN = 600;
96 
97     /**
98      * A regex matching a line start, open paren, arrow, colon (not proceeded by a digit),
99      * open square bracket, equals sign, double or single quote, ideographic char, or a space that
100      * is not preceded by a number. It will not consume the start char (meaning START won't be
101      * included in the matched string)
102      */
103     private static final String START =
104             "(^|(?<=((^|[^0-9])\\s)|[>(\"'=\\[\\p{IsIdeographic}]|[^0-9]:))";
105 
106 
107     /**
108      * One single OTP char. A number or alphabetical char (that isn't also ideographic)
109      */
110     private static final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]])";
111 
112     /**
113      * One OTP char, followed by an optional dash
114      */
115     private static final String OTP_CHAR_WITH_DASH = format("(%s-?)", OTP_CHAR);
116 
117     /**
118      * Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential
119      * OTP code contains at least one number
120      */
121     private static final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR_WITH_DASH);
122 
123     /**
124      * Matches between 5 and 8 otp chars, with dashes in between. Here, we are assuming an OTP code
125      * is 5-8 characters long. The last char must not be followed by a dash
126      */
127     private static final String OTP_CHARS = format("(%s{4,7}%s)", OTP_CHAR_WITH_DASH, OTP_CHAR);
128 
129     /**
130      * A regex matching a line end, a space that is not followed by a number, an ideographic char,
131      * or a period, close paren, close square bracket, single or double quote, exclamation point,
132      * question mark, or comma. It will not consume the end char
133      */
134     private static final String END = "(?=\\s[^0-9]|$|\\p{IsIdeographic}|[.?!,)'\\]\"])";
135 
136     /**
137      * A regex matching four digit numerical codes
138      */
139     private static final String FOUR_DIGITS = "(\\d{4})";
140 
141     private static final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM =
142             format("(%s%s)", FIND_DIGIT, OTP_CHARS);
143 
144     /**
145      * A regex matching two pairs of 3 digits (ex "123 456")
146      */
147     private static final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})";
148 
149     /**
150      * Combining the regular expressions above, we get an OTP regex:
151      * 1. search for START, THEN
152      * 2. match ONE of
153      *   a. alphanumeric sequence, at least one number, length 5-8, with optional dashes
154      *   b. 4 numbers in a row
155      *   c. pair of 3 digit codes separated by a space
156      * THEN
157      * 3. search for END Ex:
158      * "6454", " 345 678.", "[YDT-456]"
159      */
160     private static final String ALL_OTP =
161             format("%s(%s|%s|%s)%s",
162                     START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS,
163                     SIX_DIGITS_WITH_SPACE, END);
164 
165 
166 
167     private static final ThreadLocal<Matcher> OTP_REGEX = compileToRegex(ALL_OTP);
168     /**
169      * A Date regular expression. Looks for dates with the month, day, and year separated by dashes.
170      * Handles one and two digit months and days, and four or two-digit years. It makes the
171      * following assumptions:
172      * Dates and months will never be higher than 39
173      * If a four digit year is used, the leading digit will be 1 or 2
174      */
175     private static final String DATE_WITH_DASHES = "([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)";
176 
177     /**
178      * matches a ten digit phone number, when the area code is separated by a space or dash.
179      * Supports optional parentheses around the area code, and an optional dash or space in between
180      * the rest of the numbers.
181      * This format registers as an otp match due to the space between the area code and the rest,
182      * but shouldn't.
183      */
184     private static final String PHONE_WITH_SPACE = "(\\(?\\d{3}\\)?(-|\\s)?\\d{3}(-|\\s)?\\d{4})";
185 
186     /**
187      * A combination of common false positives. These matches are expected to be longer than (or
188      * equal in length to) otp matches, and are always run, even if we have a language specific
189      * regex
190      */
191     private static final ThreadLocal<Matcher> FALSE_POSITIVE_LONGER_REGEX =
192             compileToRegex(format("%s(%s|%s)%s", START, DATE_WITH_DASHES, PHONE_WITH_SPACE, END));
193 
194     /**
195      * A regex matching the common years of 19xx and 20xx. Used for false positive reduction
196      */
197     private static final String COMMON_YEARS = format("%s((19|20)\\d\\d)%s", START, END);
198 
199     /**
200      * A regex matching three lower case letters. Used for false positive reduction, as no known
201      *  OTPs have 3 lowercase letters in sequence.
202      */
203     private static final String THREE_LOWERCASE = "(\\p{Ll}{3})";
204 
205     /**
206      * A combination of common false positives. Run in cases where we don't have a language specific
207      * regular expression. These matches are expect to be shorter than (or equal in length to) otp
208      * matches
209      */
210     private static final ThreadLocal<Matcher> FALSE_POSITIVE_SHORTER_REGEX =
211                     compileToRegex(format("%s|%s", COMMON_YEARS, THREE_LOWERCASE));
212 
213     /**
214      * A list of regular expressions representing words found in an OTP context (non case sensitive)
215      * Note: TAN is short for Transaction Authentication Number
216      */
217     private static final String[] ENGLISH_CONTEXT_WORDS = new String[] {
218             "pin", "pass[-\\s]?(code|word)", "TAN", "otp", "2fa", "(two|2)[-\\s]?factor",
219             "log[-\\s]?in", "auth(enticat(e|ion))?", "code", "secret", "verif(y|ication)",
220             "one(\\s|-)?time", "access", "validat(e|ion)"
221     };
222 
223     /**
224      * Creates a regular expression to match any of a series of individual words, case insensitive.
225      * It also verifies the position of the word, relative to the OTP match
226      */
createDictionaryRegex(String[] words)227     private static ThreadLocal<Matcher> createDictionaryRegex(String[] words) {
228         StringBuilder regex = new StringBuilder("(");
229         for (int i = 0; i < words.length; i++) {
230             regex.append(findContextWordWithCode(words[i]));
231             if (i != words.length - 1) {
232                 regex.append("|");
233             }
234         }
235         regex.append(")");
236         return compileToRegex(regex.toString());
237     }
238 
239     /**
240      * Creates a regular expression that will find a context word, if that word occurs in the
241      * sentence preceding an OTP, or in the same sentence as an OTP (before or after). In both
242      * cases, the context word must occur within 50 characters of the suspected OTP
243      * @param contextWord The context word we expect to find around the OTP match
244      * @return A string representing a regular expression that will determine if we found a context
245      * word occurring before an otp match, or after it, but in the same sentence.
246      */
findContextWordWithCode(String contextWord)247     private static String findContextWordWithCode(String contextWord) {
248         String boundedContext = "\\b" + contextWord + "\\b";
249         // Asserts that we find the OTP code within 50 characters after the context word, with at
250         // most one sentence punctuation between the OTP code and the context word (i.e. they are
251         // in the same sentence, or the context word is in the previous sentence)
252         String contextWordBeforeOtpInSameOrPreviousSentence =
253                 String.format("(%s(?=.{1,50}%s)[^.?!]*[.?!]?[^.?!]*%s)",
254                         boundedContext, ALL_OTP, ALL_OTP);
255         // Asserts that we find the context word within 50 characters after the OTP code, with no
256         // sentence punctuation between the OTP code and the context word (i.e. they are in the same
257         // sentence)
258         String contextWordAfterOtpSameSentence =
259                 String.format("(%s)[^.!?]{1,50}%s", ALL_OTP, boundedContext);
260         return String.format("(%s|%s)", contextWordBeforeOtpInSameOrPreviousSentence,
261                 contextWordAfterOtpSameSentence);
262     }
263 
264     static {
ULocale.ENGLISH.toLanguageTag()265         EXTRA_LANG_OTP_REGEX.put(ULocale.ENGLISH.toLanguageTag(),
266                 createDictionaryRegex(ENGLISH_CONTEXT_WORDS));
267     }
268 
isPreV()269     private static boolean isPreV() {
270         return SDK_INT < Build.VERSION_CODES.VANILLA_ICE_CREAM;
271     }
272 
273     /**
274      * Checks if any text fields in a notification might contain an OTP, based on several
275      * regular expressions, and potentially using a textClassifier to eliminate false positives.
276      * Each text field will be examined individually.
277      *
278      * @param notification The notification whose content should be checked
279      * @param checkForFalsePositives If true, will ensure the content does not match the date regex.
280      *                               If a TextClassifier is provided, it will then try to find a
281      *                               language specific regex. If it is successful, it will use that
282      *                               regex to check for false positives. If it is not, it will use
283      *                               the TextClassifier (if provided), plus the year and three
284      *                               lowercase regexes to remove possible false positives.
285      * @param tc If non null, the provided TextClassifier will be used to find the language of the
286      *           text, and look for a language-specific regex for it. If checkForFalsePositives is
287      *           true will also use the classifier to find flight codes and addresses.
288      * @return True if we believe an OTP is in the message, false otherwise.
289      */
containsOtp(Notification notification, boolean checkForFalsePositives, @Nullable TextClassifier tc)290     public static boolean containsOtp(Notification notification,
291             boolean checkForFalsePositives, @Nullable TextClassifier tc) {
292         if (notification == null || notification.extras == null || isPreV()) {
293             return false;
294         }
295 
296         // Get the language of the text once
297         ULocale textLocale = getLanguageWithRegex(getTextForDetection(notification), tc);
298         // Get all the individual fields
299         List<CharSequence> fields = getNotificationTextFields(notification);
300         for (CharSequence field : fields) {
301             if (field != null
302                     && containsOtp(field.toString(), checkForFalsePositives, tc, textLocale)) {
303                 return true;
304             }
305         }
306 
307         return false;
308     }
309 
310     /**
311      * Checks if a string of text might contain an OTP, based on several
312      * regular expressions, and potentially using a textClassifier to eliminate false positives
313      *
314      * @param sensitiveText The text whose content should be checked
315      * @param checkForFalsePositives If true, will ensure the content does not match the date regex.
316      *                               If a TextClassifier is provided, it will then try to find a
317      *                               language specific regex. If it is successful, it will use that
318      *                               regex to check for false positives. If it is not, it will use
319      *                               the TextClassifier (if provided), plus the year and three
320      *                               lowercase regexes to remove possible false positives.
321      * @param tc If non null, the provided TextClassifier will be used to find the language of the
322      *           text, and look for a language-specific regex for it. If checkForFalsePositives is
323      *           true will also use the classifier to find flight codes and addresses.
324      * @param language If non null, then the TextClassifier (if provided), will not perform language
325      *                 id, and the system will assume the text is in the specified language
326      * @return True if we believe an OTP is in the message, false otherwise.
327      */
containsOtp(String sensitiveText, boolean checkForFalsePositives, @Nullable TextClassifier tc, @Nullable ULocale language)328     public static boolean containsOtp(String sensitiveText,
329             boolean checkForFalsePositives, @Nullable TextClassifier tc,
330             @Nullable ULocale language) {
331         if (sensitiveText == null || isPreV()) {
332             return false;
333         }
334 
335         Matcher otpMatcher = OTP_REGEX.get();
336         otpMatcher.reset(sensitiveText);
337         boolean otpMatch = otpMatcher.find();
338         if (!checkForFalsePositives || !otpMatch) {
339             return otpMatch;
340         }
341 
342         if (allOtpMatchesAreFalsePositives(
343                 sensitiveText, FALSE_POSITIVE_LONGER_REGEX.get(), true)) {
344             return false;
345         }
346 
347         if (tc != null || language != null) {
348             if (language == null) {
349                 language = getLanguageWithRegex(sensitiveText, tc);
350             }
351             Matcher languageSpecificMatcher = language != null
352                     ? EXTRA_LANG_OTP_REGEX.get(language.toLanguageTag()).get() : null;
353             if (languageSpecificMatcher != null) {
354                 languageSpecificMatcher.reset(sensitiveText);
355                 // Only use the language-specific regex for false positives
356                 return languageSpecificMatcher.find();
357             }
358             // Only check for OTPs when there is a language specific matcher
359             return false;
360         }
361 
362         return !allOtpMatchesAreFalsePositives(sensitiveText, FALSE_POSITIVE_SHORTER_REGEX.get(),
363                 false);
364     }
365 
366     /**
367      * Checks that a given text has at least one match for one regex, that doesn't match another
368      * @param text The full text to check
369      * @param falsePositiveRegex A regex that should not match the OTP regex (for at least one match
370      *                           found by the OTP regex). The false positive regex matches may be
371      *                           longer or shorter than the OTP matches.
372      * @param fpMatchesAreLongerThanOtp Whether the false positives are longer than the otp matches.
373      *                                  If true, this method will search the whole text for false
374      *                                  positives, and verify at least one OTP match is not
375      *                                  contained by any of the false positives. If false, then this
376      *                                  method will search individual OTP matches for false
377      *                                  positives, and will verify at least one OTP match doesn't
378      *                                  contain a false positive.
379      * @return true, if all matches found by OTP_REGEX are contained in, or themselves contain a
380      *         match to falsePositiveRegex, or there are no OTP matches, false otherwise.
381      */
allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex, boolean fpMatchesAreLongerThanOtp)382     private static boolean allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex,
383             boolean fpMatchesAreLongerThanOtp) {
384         List<String> falsePositives = new ArrayList<>();
385         if (fpMatchesAreLongerThanOtp) {
386             // if the false positives are longer than the otp, search for them in the whole text
387             falsePositives = getAllMatches(text, falsePositiveRegex);
388         }
389         List<String> otpMatches = getAllMatches(text, OTP_REGEX.get());
390         for (String otpMatch: otpMatches) {
391             boolean otpMatchContainsNoFp = true;
392             boolean noFpContainsOtpMatch = true;
393             if (!fpMatchesAreLongerThanOtp) {
394                 // if the false positives are shorter than the otp, search for them in the otp match
395                 falsePositives = getAllMatches(otpMatch, falsePositiveRegex);
396             }
397             for (String falsePositive : falsePositives) {
398                 otpMatchContainsNoFp = fpMatchesAreLongerThanOtp
399                         || (otpMatchContainsNoFp && !otpMatch.contains(falsePositive));
400                 noFpContainsOtpMatch = !fpMatchesAreLongerThanOtp
401                         || (noFpContainsOtpMatch && !falsePositive.contains(otpMatch));
402             }
403             if (otpMatchContainsNoFp && noFpContainsOtpMatch) {
404                 return false;
405             }
406         }
407         return true;
408     }
409 
getAllMatches(String text, Matcher regex)410     private static List<String> getAllMatches(String text, Matcher regex) {
411         ArrayList<String> matches = new ArrayList<>();
412         regex.reset(text);
413         while (regex.find()) {
414             matches.add(regex.group());
415         }
416         return matches;
417     }
418 
419     // Tries to determine the language of the given text. Will return the language with the highest
420     // confidence score that meets the minimum threshold, and has a language-specific regex, null
421     // otherwise
422     @Nullable
getLanguageWithRegex(String text, @Nullable TextClassifier tc)423     private static ULocale getLanguageWithRegex(String text,
424             @Nullable TextClassifier tc) {
425         if (tc == null) {
426             return null;
427         }
428 
429         float highestConfidence = 0;
430         ULocale highestConfidenceLocale = null;
431         TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build();
432         TextLanguage lang = tc.detectLanguage(langRequest);
433         for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) {
434             ULocale locale = lang.getLocale(i);
435             float confidence = lang.getConfidenceScore(locale);
436             if (confidence >= TC_THRESHOLD && confidence >= highestConfidence
437                     && EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) {
438                 highestConfidence = confidence;
439                 highestConfidenceLocale = locale;
440             }
441         }
442         return highestConfidenceLocale;
443     }
444 
445     /**
446      * Gets the sections of text in a notification that should be checked for sensitive content.
447      * This includes the text, title, subtext, messages, and extra text lines.
448      * @param notification The notification whose content should be filtered
449      * @return The extracted text fields
450      */
451     @VisibleForTesting
getTextForDetection(Notification notification)452     protected static String getTextForDetection(Notification notification) {
453         if (notification == null || notification.extras == null || isPreV()) {
454             return "";
455         }
456         StringBuilder builder = new StringBuilder();
457         for (CharSequence line : getNotificationTextFields(notification)) {
458             builder.append(line != null ? line : "").append(" ");
459         }
460         return builder.length() <= MAX_SENSITIVE_TEXT_LEN ? builder.toString()
461                 : builder.substring(0, MAX_SENSITIVE_TEXT_LEN);
462     }
463 
getNotificationTextFields(Notification notification)464     protected static List<CharSequence> getNotificationTextFields(Notification notification) {
465         if (notification == null || notification.extras == null || isPreV()) {
466             return new ArrayList<>();
467         }
468         ArrayList<CharSequence> fields = new ArrayList<>();
469         Bundle extras = notification.extras;
470         fields.add(extras.getCharSequence(EXTRA_TITLE));
471         fields.add(extras.getCharSequence(EXTRA_TEXT));
472         fields.add(extras.getCharSequence(EXTRA_SUB_TEXT));
473         fields.add(extras.getCharSequence(EXTRA_BIG_TEXT));
474         fields.add(extras.getCharSequence(EXTRA_TITLE_BIG));
475         fields.add(extras.getCharSequence(EXTRA_SUMMARY_TEXT));
476         CharSequence[] textLines = extras.getCharSequenceArray(EXTRA_TEXT_LINES);
477         if (textLines != null) {
478             fields.addAll(Arrays.asList(textLines));
479         }
480         List<Message> messages = Message.getMessagesFromBundleArray(
481                 extras.getParcelableArray(EXTRA_MESSAGES, Parcelable.class));
482         // Sort the newest messages (largest timestamp) first
483         messages.sort((MessagingStyle.Message lhs, MessagingStyle.Message rhs) ->
484                 Long.compare(rhs.getTimestamp(), lhs.getTimestamp()));
485         for (MessagingStyle.Message message : messages) {
486             fields.add(message.getText());
487         }
488         return fields;
489     }
490 
491     /**
492      * Determines if a notification should be checked for an OTP, based on category, style, and
493      * possible otp content (as determined by a regular expression).
494      * @param notification The notification whose content should be checked
495      * @return true, if further checks for OTP codes should be performed, false otherwise
496      */
shouldCheckForOtp(Notification notification)497     public static boolean shouldCheckForOtp(Notification notification) {
498         if (notification == null || isPreV()
499                 || EXCLUDED_STYLES.stream().anyMatch(s -> isStyle(notification, s))) {
500             return false;
501         }
502         return SENSITIVE_NOTIFICATION_CATEGORIES.contains(notification.category)
503                 || SENSITIVE_STYLES.stream().anyMatch(s -> isStyle(notification, s))
504                 || containsOtp(notification, false, null)
505                 || shouldCheckForOtp(notification.publicVersion);
506     }
507 
isStyle(Notification notification, String styleClassName)508     private static boolean isStyle(Notification notification, String styleClassName) {
509         if (notification.extras == null) {
510             return false;
511         }
512         String templateClass = notification.extras.getString(Notification.EXTRA_TEMPLATE);
513         return Objects.equals(templateClass, styleClassName);
514     }
515 
NotificationOtpDetectionHelper()516     private NotificationOtpDetectionHelper() { }
517 }
518