• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 package org.chromium.base;
6 
7 import android.text.TextUtils;
8 import android.util.Log;
9 import android.util.Patterns;
10 
11 import org.chromium.base.annotations.CalledByNative;
12 
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 
16 /**
17  * Provides public methods for detecting and eliding sensitive PII.
18  */
19 public class PiiElider {
20     private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED";
21 
22     private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED";
23 
24     private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
25 
26     private static final String IP_ADDRESS =
27             "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
28             + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
29             + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
30             + "|[1-9][0-9]|[0-9]))";
31 
32     private static final String IRI =
33             "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
34 
35     private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
36     private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
37     private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
38 
39     private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})";
40 
41     private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")";
42 
43     private static final String PATH_CHAR =
44             // Either a single valid path component character or a URI-encoded character.
45             "(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")";
46 
47     private static final String URI_SCHEME = "((http|https|Http|Https|rtsp|Rtsp)://"
48             + "(" + URI_CHAR + "{1,64}(:" + URI_CHAR + "{1,25})?@)?)";
49 
50     private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")";
51 
52     private static final String PORT = "(:\\d{1,5})";
53 
54     private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT =
55             "(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)";
56 
57     private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)";
58 
59     // Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component
60     private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://";
61 
62     private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")";
63 
64     private static final String URL_OR_INTENT =
65             "(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")";
66 
67     private static final Pattern WEB_URL =
68             Pattern.compile("(\\b|^)" // Always start on a word boundary or start of string.
69                     + "(" + URL_OR_INTENT + ")" // Main URL or Intent scheme/domain/root path.
70                     + "(/" + PATH_CHAR + "*)?" // Rest of the URI path.
71                     + "(\\b|$)"); // Always end on a word boundary or end of string.
72 
73     // Example variant info chromium-TrichromeChromeGoogle6432.aab
74     private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab";
75     private static final Pattern LIKELY_EXCEPTION_LOG = Pattern.compile("\\sat\\s"
76             // These are all package prefixes of classes that are likely to
77             // exist on a stacktrace and are very unlikely to be a PII url.
78             + "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|"
79             // if a line has what looks like line number info, it's probably an
80             // exception log.
81             + "\\(" + CHROME_VARIANT_INFO
82             + "[^:]+:\\d+\\)|"
83             // When a class is not found it can fail to satisfy our isClass
84             // check but is still worth noting what it was.
85             + "Caused by: java\\.lang\\."
86             + "(ClassNotFoundException|NoClassDefFoundError):");
87 
88     private static final String IP_ELISION = "1.2.3.4";
89     private static final String MAC_ELISION = "01:23:45:67:89:AB";
90     private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE";
91 
92     private static final Pattern MAC_ADDRESS =
93             Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}");
94 
95     private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*");
96 
97     private static final String[] APP_NAMESPACE =
98             new String[] {"org.chromium.", "com.google.", "com.chrome."};
99 
100     private static final String[] SYSTEM_NAMESPACE =
101             new String[] {"android.", "com.android.", "dalvik.", "java.", "javax.", "org.apache.",
102                     "org.json.", "org.w3c.dom.", "org.xml.", "org.xmlpull.", "System."};
103 
104     /**
105      * Elides any emails in the specified {@link String} with
106      * {@link #EMAIL_ELISION}.
107      *
108      * @param original String potentially containing emails.
109      * @return String with elided emails.
110      */
elideEmail(String original)111     public static String elideEmail(String original) {
112         return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION);
113     }
114 
115     /**
116      * Elides any URLs in the specified {@link String} with
117      * {@link #URL_ELISION}.
118      *
119      * @param original String potentially containing URLs.
120      * @return String with elided URLs.
121      */
elideUrl(String original)122     public static String elideUrl(String original) {
123         // Url-matching is fussy. If something looks like an exception message, just return.
124         if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original;
125         StringBuilder buffer = new StringBuilder(original);
126         Matcher matcher = WEB_URL.matcher(buffer);
127         int start = 0;
128         while (matcher.find(start)) {
129             start = matcher.start();
130             int end = matcher.end();
131             String url = buffer.substring(start, end);
132             if (!likelyToBeAppNamespace(url) && !likelyToBeSystemNamespace(url)
133                     && !likelyToBeClassOrMethodName(url)) {
134                 buffer.replace(start, end, URL_ELISION);
135                 end = start + URL_ELISION.length();
136                 matcher = WEB_URL.matcher(buffer);
137             }
138             start = end;
139         }
140         return buffer.toString();
141     }
142 
likelyToBeClassOrMethodName(String url)143     private static boolean likelyToBeClassOrMethodName(String url) {
144         if (isClassName(url)) return true;
145 
146         // Since the suspected URL could actually be a method name, check if the portion preceding
147         // the last subdomain is a class name.
148         int indexOfLastPeriod = url.lastIndexOf(".");
149         if (indexOfLastPeriod == -1) return false;
150         return isClassName(url.substring(0, indexOfLastPeriod));
151     }
152 
isClassName(String url)153     private static boolean isClassName(String url) {
154         try {
155             Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader());
156             return true;
157         } catch (Throwable e) {
158             // Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError.
159         }
160         return false;
161     }
162 
likelyToBeAppNamespace(String url)163     private static boolean likelyToBeAppNamespace(String url) {
164         for (String ns : APP_NAMESPACE) {
165             if (url.startsWith(ns)) {
166                 return true;
167             }
168         }
169         return false;
170     }
171 
likelyToBeSystemNamespace(String url)172     private static boolean likelyToBeSystemNamespace(String url) {
173         for (String ns : SYSTEM_NAMESPACE) {
174             if (url.startsWith(ns)) {
175                 return true;
176             }
177         }
178         return false;
179     }
180 
181     /**
182      * Elides any IP addresses in the specified {@link String} with
183      * {@link #IP_ELISION}.
184      *
185      * @param original String potentially containing IPs.
186      * @return String with elided IPs.
187      */
elideIp(String original)188     public static String elideIp(String original) {
189         return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION);
190     }
191 
192     /**
193      * Elides any MAC addresses in the specified {@link String} with
194      * {@link #MAC_ELISION}.
195      *
196      * @param original String potentially containing MACs.
197      * @return String with elided MACs.
198      */
elideMac(String original)199     public static String elideMac(String original) {
200         return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION);
201     }
202 
203     /**
204      * Elides any console messages in the specified {@link String} with
205      * {@link #CONSOLE_ELISION}.
206      *
207      * @param original String potentially containing console messages.
208      * @return String with elided console messages.
209      */
elideConsole(String original)210     public static String elideConsole(String original) {
211         return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION);
212     }
213 
214     /**
215      * Elides any URL in the exception messages contained inside a stacktrace with
216      * {@link #URL_ELISION}.
217      *
218      * @param stacktrace Multiline stacktrace as a string.
219      * @return Stacktrace with elided URLs.
220      */
sanitizeStacktrace(String stacktrace)221     public static String sanitizeStacktrace(String stacktrace) {
222         if (TextUtils.isEmpty(stacktrace)) {
223             return "";
224         }
225         String[] frames = stacktrace.split("\\n");
226         // Sanitize first stacktrace line which contains the exception message.
227         frames[0] = elideUrl(frames[0]);
228         for (int i = 1; i < frames.length; i++) {
229             // Nested exceptions should also have their message sanitized.
230             if (frames[i].startsWith("Caused by:")) {
231                 frames[i] = elideUrl(frames[i]);
232             }
233         }
234         return TextUtils.join("\n", frames);
235     }
236 
237     /**
238      * Returns a sanitized stacktrace (per {@link #sanitizeStacktrace(String)}) for the given
239      * throwable.
240      */
241     @CalledByNative
getSanitizedStacktrace(Throwable throwable)242     public static String getSanitizedStacktrace(Throwable throwable) {
243         return sanitizeStacktrace(Log.getStackTraceString(throwable));
244     }
245 }
246