1 // Copyright 2018 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package org.chromium.base; 6 7 import android.text.TextUtils; 8 import android.util.Log; 9 import android.util.Patterns; 10 11 import org.chromium.base.annotations.CalledByNative; 12 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 /** 17 * Provides public methods for detecting and eliding sensitive PII. 18 */ 19 public class PiiElider { 20 private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED"; 21 22 private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED"; 23 24 private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; 25 26 private static final String IP_ADDRESS = 27 "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]" 28 + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]" 29 + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" 30 + "|[1-9][0-9]|[0-9]))"; 31 32 private static final String IRI = 33 "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}"; 34 35 private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; 36 private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}"; 37 private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD; 38 39 private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})"; 40 41 private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")"; 42 43 private static final String PATH_CHAR = 44 // Either a single valid path component character or a URI-encoded character. 45 "(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")"; 46 47 private static final String URI_SCHEME = "((http|https|Http|Https|rtsp|Rtsp)://" 48 + "(" + URI_CHAR + "{1,64}(:" + URI_CHAR + "{1,25})?@)?)"; 49 50 private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")"; 51 52 private static final String PORT = "(:\\d{1,5})"; 53 54 private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT = 55 "(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)"; 56 57 private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)"; 58 59 // Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component 60 private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://"; 61 62 private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")"; 63 64 private static final String URL_OR_INTENT = 65 "(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")"; 66 67 private static final Pattern WEB_URL = 68 Pattern.compile("(\\b|^)" // Always start on a word boundary or start of string. 69 + "(" + URL_OR_INTENT + ")" // Main URL or Intent scheme/domain/root path. 70 + "(/" + PATH_CHAR + "*)?" // Rest of the URI path. 71 + "(\\b|$)"); // Always end on a word boundary or end of string. 72 73 // Example variant info chromium-TrichromeChromeGoogle6432.aab 74 private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab"; 75 private static final Pattern LIKELY_EXCEPTION_LOG = Pattern.compile("\\sat\\s" 76 // These are all package prefixes of classes that are likely to 77 // exist on a stacktrace and are very unlikely to be a PII url. 78 + "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|" 79 // if a line has what looks like line number info, it's probably an 80 // exception log. 81 + "\\(" + CHROME_VARIANT_INFO 82 + "[^:]+:\\d+\\)|" 83 // When a class is not found it can fail to satisfy our isClass 84 // check but is still worth noting what it was. 85 + "Caused by: java\\.lang\\." 86 + "(ClassNotFoundException|NoClassDefFoundError):"); 87 88 private static final String IP_ELISION = "1.2.3.4"; 89 private static final String MAC_ELISION = "01:23:45:67:89:AB"; 90 private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE"; 91 92 private static final Pattern MAC_ADDRESS = 93 Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}"); 94 95 private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*"); 96 97 private static final String[] APP_NAMESPACE = 98 new String[] {"org.chromium.", "com.google.", "com.chrome."}; 99 100 private static final String[] SYSTEM_NAMESPACE = 101 new String[] {"android.", "com.android.", "dalvik.", "java.", "javax.", "org.apache.", 102 "org.json.", "org.w3c.dom.", "org.xml.", "org.xmlpull.", "System."}; 103 104 /** 105 * Elides any emails in the specified {@link String} with 106 * {@link #EMAIL_ELISION}. 107 * 108 * @param original String potentially containing emails. 109 * @return String with elided emails. 110 */ elideEmail(String original)111 public static String elideEmail(String original) { 112 return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION); 113 } 114 115 /** 116 * Elides any URLs in the specified {@link String} with 117 * {@link #URL_ELISION}. 118 * 119 * @param original String potentially containing URLs. 120 * @return String with elided URLs. 121 */ elideUrl(String original)122 public static String elideUrl(String original) { 123 // Url-matching is fussy. If something looks like an exception message, just return. 124 if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original; 125 StringBuilder buffer = new StringBuilder(original); 126 Matcher matcher = WEB_URL.matcher(buffer); 127 int start = 0; 128 while (matcher.find(start)) { 129 start = matcher.start(); 130 int end = matcher.end(); 131 String url = buffer.substring(start, end); 132 if (!likelyToBeAppNamespace(url) && !likelyToBeSystemNamespace(url) 133 && !likelyToBeClassOrMethodName(url)) { 134 buffer.replace(start, end, URL_ELISION); 135 end = start + URL_ELISION.length(); 136 matcher = WEB_URL.matcher(buffer); 137 } 138 start = end; 139 } 140 return buffer.toString(); 141 } 142 likelyToBeClassOrMethodName(String url)143 private static boolean likelyToBeClassOrMethodName(String url) { 144 if (isClassName(url)) return true; 145 146 // Since the suspected URL could actually be a method name, check if the portion preceding 147 // the last subdomain is a class name. 148 int indexOfLastPeriod = url.lastIndexOf("."); 149 if (indexOfLastPeriod == -1) return false; 150 return isClassName(url.substring(0, indexOfLastPeriod)); 151 } 152 isClassName(String url)153 private static boolean isClassName(String url) { 154 try { 155 Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader()); 156 return true; 157 } catch (Throwable e) { 158 // Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError. 159 } 160 return false; 161 } 162 likelyToBeAppNamespace(String url)163 private static boolean likelyToBeAppNamespace(String url) { 164 for (String ns : APP_NAMESPACE) { 165 if (url.startsWith(ns)) { 166 return true; 167 } 168 } 169 return false; 170 } 171 likelyToBeSystemNamespace(String url)172 private static boolean likelyToBeSystemNamespace(String url) { 173 for (String ns : SYSTEM_NAMESPACE) { 174 if (url.startsWith(ns)) { 175 return true; 176 } 177 } 178 return false; 179 } 180 181 /** 182 * Elides any IP addresses in the specified {@link String} with 183 * {@link #IP_ELISION}. 184 * 185 * @param original String potentially containing IPs. 186 * @return String with elided IPs. 187 */ elideIp(String original)188 public static String elideIp(String original) { 189 return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION); 190 } 191 192 /** 193 * Elides any MAC addresses in the specified {@link String} with 194 * {@link #MAC_ELISION}. 195 * 196 * @param original String potentially containing MACs. 197 * @return String with elided MACs. 198 */ elideMac(String original)199 public static String elideMac(String original) { 200 return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION); 201 } 202 203 /** 204 * Elides any console messages in the specified {@link String} with 205 * {@link #CONSOLE_ELISION}. 206 * 207 * @param original String potentially containing console messages. 208 * @return String with elided console messages. 209 */ elideConsole(String original)210 public static String elideConsole(String original) { 211 return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION); 212 } 213 214 /** 215 * Elides any URL in the exception messages contained inside a stacktrace with 216 * {@link #URL_ELISION}. 217 * 218 * @param stacktrace Multiline stacktrace as a string. 219 * @return Stacktrace with elided URLs. 220 */ sanitizeStacktrace(String stacktrace)221 public static String sanitizeStacktrace(String stacktrace) { 222 if (TextUtils.isEmpty(stacktrace)) { 223 return ""; 224 } 225 String[] frames = stacktrace.split("\\n"); 226 // Sanitize first stacktrace line which contains the exception message. 227 frames[0] = elideUrl(frames[0]); 228 for (int i = 1; i < frames.length; i++) { 229 // Nested exceptions should also have their message sanitized. 230 if (frames[i].startsWith("Caused by:")) { 231 frames[i] = elideUrl(frames[i]); 232 } 233 } 234 return TextUtils.join("\n", frames); 235 } 236 237 /** 238 * Returns a sanitized stacktrace (per {@link #sanitizeStacktrace(String)}) for the given 239 * throwable. 240 */ 241 @CalledByNative getSanitizedStacktrace(Throwable throwable)242 public static String getSanitizedStacktrace(Throwable throwable) { 243 return sanitizeStacktrace(Log.getStackTraceString(throwable)); 244 } 245 } 246