• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 package org.chromium.base;
6 
7 import android.text.TextUtils;
8 import android.util.Patterns;
9 
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12 
13 /** Provides public methods for detecting and eliding sensitive PII. */
14 public class PiiElider {
15     private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED";
16 
17     private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED";
18 
19     private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
20 
21     private static final String IP_ADDRESS =
22             "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
23                     + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
24                     + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
25                     + "|[1-9][0-9]|[0-9]))";
26 
27     private static final String IRI =
28             "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
29 
30     private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
31     private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
32     private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
33 
34     private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})";
35 
36     private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")";
37 
38     private static final String PATH_CHAR =
39             // Either a single valid path component character or a URI-encoded character.
40             "(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")";
41 
42     private static final String URI_SCHEME =
43             "((http|https|Http|Https|rtsp|Rtsp)://"
44                     + "("
45                     + URI_CHAR
46                     + "{1,64}(:"
47                     + URI_CHAR
48                     + "{1,25})?@)?)";
49 
50     private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")";
51 
52     private static final String PORT = "(:\\d{1,5})";
53 
54     private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT =
55             "(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)";
56 
57     private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)";
58 
59     // Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component
60     private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://";
61 
62     private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")";
63 
64     private static final String URL_OR_INTENT =
65             "(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")";
66 
67     private static final Pattern WEB_URL =
68             Pattern.compile(
69                     "(\\b|^)" // Always start on a word boundary or start of string.
70                             + "("
71                             + URL_OR_INTENT
72                             + ")" // Main URL or Intent scheme/domain/root path.
73                             + "(/"
74                             + PATH_CHAR
75                             + "*)?" // Rest of the URI path.
76                             + "(\\b|$)"); // Always end on a word boundary or end of string.
77 
78     // Example variant info chromium-TrichromeChromeGoogle6432.aab
79     private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab";
80     private static final Pattern LIKELY_EXCEPTION_LOG =
81             Pattern.compile(
82                     "\\sat\\s"
83                             // These are all package prefixes of classes that are likely to
84                             // exist on a stacktrace and are very unlikely to be a PII url.
85                             + "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|"
86                             // if a line has what looks like line number info, it's probably an
87                             // exception log.
88                             + "\\("
89                             + CHROME_VARIANT_INFO
90                             + "[^:]+:\\d+\\)|"
91                             // When a class is not found it can fail to satisfy our isClass
92                             // check but is still worth noting what it was.
93                             + "Caused by: java\\.lang\\."
94                             + "(ClassNotFoundException|NoClassDefFoundError):");
95 
96     private static final String IP_ELISION = "1.2.3.4";
97     private static final String MAC_ELISION = "01:23:45:67:89:AB";
98     private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE";
99 
100     private static final Pattern MAC_ADDRESS =
101             Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}");
102 
103     private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*");
104 
105     private static final String[] APP_NAMESPACE =
106             new String[] {"org.chromium.", "com.google.", "com.chrome."};
107 
108     private static final String[] SYSTEM_NAMESPACE =
109             new String[] {
110                 "android.",
111                 "com.android.",
112                 "dalvik.",
113                 "java.",
114                 "javax.",
115                 "org.apache.",
116                 "org.json.",
117                 "org.w3c.dom.",
118                 "org.xml.",
119                 "org.xmlpull.",
120                 "System."
121             };
122 
123     /**
124      * Elides any emails in the specified {@link String} with
125      * {@link #EMAIL_ELISION}.
126      *
127      * @param original String potentially containing emails.
128      * @return String with elided emails.
129      */
elideEmail(String original)130     public static String elideEmail(String original) {
131         return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION);
132     }
133 
134     /**
135      * Elides any URLs in the specified {@link String} with
136      * {@link #URL_ELISION}.
137      *
138      * @param original String potentially containing URLs.
139      * @return String with elided URLs.
140      */
elideUrl(String original)141     public static String elideUrl(String original) {
142         // Url-matching is fussy. If something looks like an exception message, just return.
143         if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original;
144         StringBuilder buffer = new StringBuilder(original);
145         Matcher matcher = WEB_URL.matcher(buffer);
146         int start = 0;
147         while (matcher.find(start)) {
148             start = matcher.start();
149             int end = matcher.end();
150             String url = buffer.substring(start, end);
151             if (!likelyToBeAppNamespace(url)
152                     && !likelyToBeSystemNamespace(url)
153                     && !likelyToBeClassOrMethodName(url)) {
154                 buffer.replace(start, end, URL_ELISION);
155                 end = start + URL_ELISION.length();
156                 matcher = WEB_URL.matcher(buffer);
157             }
158             start = end;
159         }
160         return buffer.toString();
161     }
162 
likelyToBeClassOrMethodName(String url)163     private static boolean likelyToBeClassOrMethodName(String url) {
164         if (isClassName(url)) return true;
165 
166         // Since the suspected URL could actually be a method name, check if the portion preceding
167         // the last subdomain is a class name.
168         int indexOfLastPeriod = url.lastIndexOf(".");
169         if (indexOfLastPeriod == -1) return false;
170         return isClassName(url.substring(0, indexOfLastPeriod));
171     }
172 
isClassName(String url)173     private static boolean isClassName(String url) {
174         try {
175             Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader());
176             return true;
177         } catch (Throwable e) {
178             // Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError.
179         }
180         return false;
181     }
182 
likelyToBeAppNamespace(String url)183     private static boolean likelyToBeAppNamespace(String url) {
184         for (String ns : APP_NAMESPACE) {
185             if (url.startsWith(ns)) {
186                 return true;
187             }
188         }
189         return false;
190     }
191 
likelyToBeSystemNamespace(String url)192     private static boolean likelyToBeSystemNamespace(String url) {
193         for (String ns : SYSTEM_NAMESPACE) {
194             if (url.startsWith(ns)) {
195                 return true;
196             }
197         }
198         return false;
199     }
200 
201     /**
202      * Elides any IP addresses in the specified {@link String} with
203      * {@link #IP_ELISION}.
204      *
205      * @param original String potentially containing IPs.
206      * @return String with elided IPs.
207      */
elideIp(String original)208     public static String elideIp(String original) {
209         return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION);
210     }
211 
212     /**
213      * Elides any MAC addresses in the specified {@link String} with
214      * {@link #MAC_ELISION}.
215      *
216      * @param original String potentially containing MACs.
217      * @return String with elided MACs.
218      */
elideMac(String original)219     public static String elideMac(String original) {
220         return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION);
221     }
222 
223     /**
224      * Elides any console messages in the specified {@link String} with
225      * {@link #CONSOLE_ELISION}.
226      *
227      * @param original String potentially containing console messages.
228      * @return String with elided console messages.
229      */
elideConsole(String original)230     public static String elideConsole(String original) {
231         return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION);
232     }
233 
234     /**
235      * Elides any URL in the exception messages contained inside a stacktrace with
236      * {@link #URL_ELISION}.
237      *
238      * @param stacktrace Multiline stacktrace as a string.
239      * @return Stacktrace with elided URLs.
240      */
sanitizeStacktrace(String stacktrace)241     public static String sanitizeStacktrace(String stacktrace) {
242         if (TextUtils.isEmpty(stacktrace)) {
243             return "";
244         }
245         String[] frames = stacktrace.split("\\n");
246         // Sanitize first stacktrace line which contains the exception message.
247         frames[0] = elideUrl(frames[0]);
248         for (int i = 1; i < frames.length; i++) {
249             // Nested exceptions should also have their message sanitized.
250             if (frames[i].startsWith("Caused by:")) {
251                 frames[i] = elideUrl(frames[i]);
252             }
253         }
254         return TextUtils.join("\n", frames);
255     }
256 }
257