1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.webkit; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.regex.Matcher; 21 import java.util.regex.Pattern; 22 23 import android.net.Uri; 24 import android.net.ParseException; 25 import android.net.WebAddress; 26 import android.util.Log; 27 28 public final class URLUtil { 29 30 private static final String LOGTAG = "webkit"; 31 32 // to refer to bar.png under your package's asset/foo/ directory, use 33 // "file:///android_asset/foo/bar.png". 34 static final String ASSET_BASE = "file:///android_asset/"; 35 // to refer to bar.png under your package's res/drawable/ directory, use 36 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to 37 // "drawable-hdpi" directory as well. 38 static final String RESOURCE_BASE = "file:///android_res/"; 39 static final String FILE_BASE = "file://"; 40 static final String PROXY_BASE = "file:///cookieless_proxy/"; 41 42 /** 43 * Cleans up (if possible) user-entered web addresses 44 */ guessUrl(String inUrl)45 public static String guessUrl(String inUrl) { 46 47 String retVal = inUrl; 48 WebAddress webAddress; 49 50 if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 51 52 if (inUrl.length() == 0) return inUrl; 53 if (inUrl.startsWith("about:")) return inUrl; 54 // Do not try to interpret data scheme URLs 55 if (inUrl.startsWith("data:")) return inUrl; 56 // Do not try to interpret file scheme URLs 57 if (inUrl.startsWith("file:")) return inUrl; 58 // Do not try to interpret javascript scheme URLs 59 if (inUrl.startsWith("javascript:")) return inUrl; 60 61 // bug 762454: strip period off end of url 62 if (inUrl.endsWith(".") == true) { 63 inUrl = inUrl.substring(0, inUrl.length() - 1); 64 } 65 66 try { 67 webAddress = new WebAddress(inUrl); 68 } catch (ParseException ex) { 69 70 if (DebugFlags.URL_UTIL) { 71 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 72 } 73 return retVal; 74 } 75 76 // Check host 77 if (webAddress.getHost().indexOf('.') == -1) { 78 // no dot: user probably entered a bare domain. try .com 79 webAddress.setHost("www." + webAddress.getHost() + ".com"); 80 } 81 return webAddress.toString(); 82 } 83 composeSearchUrl(String inQuery, String template, String queryPlaceHolder)84 public static String composeSearchUrl(String inQuery, String template, 85 String queryPlaceHolder) { 86 int placeHolderIndex = template.indexOf(queryPlaceHolder); 87 if (placeHolderIndex < 0) { 88 return null; 89 } 90 91 String query; 92 StringBuilder buffer = new StringBuilder(); 93 buffer.append(template.substring(0, placeHolderIndex)); 94 95 try { 96 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 97 buffer.append(query); 98 } catch (UnsupportedEncodingException ex) { 99 return null; 100 } 101 102 buffer.append(template.substring( 103 placeHolderIndex + queryPlaceHolder.length())); 104 105 return buffer.toString(); 106 } 107 decode(byte[] url)108 public static byte[] decode(byte[] url) throws IllegalArgumentException { 109 if (url.length == 0) { 110 return new byte[0]; 111 } 112 113 // Create a new byte array with the same length to ensure capacity 114 byte[] tempData = new byte[url.length]; 115 116 int tempCount = 0; 117 for (int i = 0; i < url.length; i++) { 118 byte b = url[i]; 119 if (b == '%') { 120 if (url.length - i > 2) { 121 b = (byte) (parseHex(url[i + 1]) * 16 122 + parseHex(url[i + 2])); 123 i += 2; 124 } else { 125 throw new IllegalArgumentException("Invalid format"); 126 } 127 } 128 tempData[tempCount++] = b; 129 } 130 byte[] retData = new byte[tempCount]; 131 System.arraycopy(tempData, 0, retData, 0, tempCount); 132 return retData; 133 } 134 135 /** 136 * @return True iff the url is correctly URL encoded 137 */ verifyURLEncoding(String url)138 static boolean verifyURLEncoding(String url) { 139 int count = url.length(); 140 if (count == 0) { 141 return false; 142 } 143 144 int index = url.indexOf('%'); 145 while (index >= 0 && index < count) { 146 if (index < count - 2) { 147 try { 148 parseHex((byte) url.charAt(++index)); 149 parseHex((byte) url.charAt(++index)); 150 } catch (IllegalArgumentException e) { 151 return false; 152 } 153 } else { 154 return false; 155 } 156 index = url.indexOf('%', index + 1); 157 } 158 return true; 159 } 160 parseHex(byte b)161 private static int parseHex(byte b) { 162 if (b >= '0' && b <= '9') return (b - '0'); 163 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 164 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 165 166 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 167 } 168 169 /** 170 * @return True iff the url is an asset file. 171 */ isAssetUrl(String url)172 public static boolean isAssetUrl(String url) { 173 return (null != url) && url.startsWith(ASSET_BASE); 174 } 175 176 /** 177 * @return True iff the url is a resource file. 178 * @hide 179 */ isResourceUrl(String url)180 public static boolean isResourceUrl(String url) { 181 return (null != url) && url.startsWith(RESOURCE_BASE); 182 } 183 184 /** 185 * @return True iff the url is an proxy url to allow cookieless network 186 * requests from a file url. 187 * @deprecated Cookieless proxy is no longer supported. 188 */ 189 @Deprecated isCookielessProxyUrl(String url)190 public static boolean isCookielessProxyUrl(String url) { 191 return (null != url) && url.startsWith(PROXY_BASE); 192 } 193 194 /** 195 * @return True iff the url is a local file. 196 */ isFileUrl(String url)197 public static boolean isFileUrl(String url) { 198 return (null != url) && (url.startsWith(FILE_BASE) && 199 !url.startsWith(ASSET_BASE) && 200 !url.startsWith(PROXY_BASE)); 201 } 202 203 /** 204 * @return True iff the url is an about: url. 205 */ isAboutUrl(String url)206 public static boolean isAboutUrl(String url) { 207 return (null != url) && url.startsWith("about:"); 208 } 209 210 /** 211 * @return True iff the url is a data: url. 212 */ isDataUrl(String url)213 public static boolean isDataUrl(String url) { 214 return (null != url) && url.startsWith("data:"); 215 } 216 217 /** 218 * @return True iff the url is a javascript: url. 219 */ isJavaScriptUrl(String url)220 public static boolean isJavaScriptUrl(String url) { 221 return (null != url) && url.startsWith("javascript:"); 222 } 223 224 /** 225 * @return True iff the url is an http: url. 226 */ isHttpUrl(String url)227 public static boolean isHttpUrl(String url) { 228 return (null != url) && 229 (url.length() > 6) && 230 url.substring(0, 7).equalsIgnoreCase("http://"); 231 } 232 233 /** 234 * @return True iff the url is an https: url. 235 */ isHttpsUrl(String url)236 public static boolean isHttpsUrl(String url) { 237 return (null != url) && 238 (url.length() > 7) && 239 url.substring(0, 8).equalsIgnoreCase("https://"); 240 } 241 242 /** 243 * @return True iff the url is a network url. 244 */ isNetworkUrl(String url)245 public static boolean isNetworkUrl(String url) { 246 if (url == null || url.length() == 0) { 247 return false; 248 } 249 return isHttpUrl(url) || isHttpsUrl(url); 250 } 251 252 /** 253 * @return True iff the url is a content: url. 254 */ isContentUrl(String url)255 public static boolean isContentUrl(String url) { 256 return (null != url) && url.startsWith("content:"); 257 } 258 259 /** 260 * @return True iff the url is valid. 261 */ isValidUrl(String url)262 public static boolean isValidUrl(String url) { 263 if (url == null || url.length() == 0) { 264 return false; 265 } 266 267 return (isAssetUrl(url) || 268 isResourceUrl(url) || 269 isFileUrl(url) || 270 isAboutUrl(url) || 271 isHttpUrl(url) || 272 isHttpsUrl(url) || 273 isJavaScriptUrl(url) || 274 isContentUrl(url)); 275 } 276 277 /** 278 * Strips the url of the anchor. 279 */ stripAnchor(String url)280 public static String stripAnchor(String url) { 281 int anchorIndex = url.indexOf('#'); 282 if (anchorIndex != -1) { 283 return url.substring(0, anchorIndex); 284 } 285 return url; 286 } 287 288 /** 289 * Guesses canonical filename that a download would have, using 290 * the URL and contentDisposition. File extension, if not defined, 291 * is added based on the mimetype 292 * @param url Url to the content 293 * @param contentDisposition Content-Disposition HTTP header or null 294 * @param mimeType Mime-type of the content or null 295 * 296 * @return suggested filename 297 */ guessFileName( String url, String contentDisposition, String mimeType)298 public static final String guessFileName( 299 String url, 300 String contentDisposition, 301 String mimeType) { 302 String filename = null; 303 String extension = null; 304 305 // If we couldn't do anything with the hint, move toward the content disposition 306 if (filename == null && contentDisposition != null) { 307 filename = parseContentDisposition(contentDisposition); 308 if (filename != null) { 309 int index = filename.lastIndexOf('/') + 1; 310 if (index > 0) { 311 filename = filename.substring(index); 312 } 313 } 314 } 315 316 // If all the other http-related approaches failed, use the plain uri 317 if (filename == null) { 318 String decodedUrl = Uri.decode(url); 319 if (decodedUrl != null) { 320 int queryIndex = decodedUrl.indexOf('?'); 321 // If there is a query string strip it, same as desktop browsers 322 if (queryIndex > 0) { 323 decodedUrl = decodedUrl.substring(0, queryIndex); 324 } 325 if (!decodedUrl.endsWith("/")) { 326 int index = decodedUrl.lastIndexOf('/') + 1; 327 if (index > 0) { 328 filename = decodedUrl.substring(index); 329 } 330 } 331 } 332 } 333 334 // Finally, if couldn't get filename from URI, get a generic filename 335 if (filename == null) { 336 filename = "downloadfile"; 337 } 338 339 // Split filename between base and extension 340 // Add an extension if filename does not have one 341 int dotIndex = filename.indexOf('.'); 342 if (dotIndex < 0) { 343 if (mimeType != null) { 344 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 345 if (extension != null) { 346 extension = "." + extension; 347 } 348 } 349 if (extension == null) { 350 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) { 351 if (mimeType.equalsIgnoreCase("text/html")) { 352 extension = ".html"; 353 } else { 354 extension = ".txt"; 355 } 356 } else { 357 extension = ".bin"; 358 } 359 } 360 } else { 361 if (mimeType != null) { 362 // Compare the last segment of the extension against the mime type. 363 // If there's a mismatch, discard the entire extension. 364 int lastDotIndex = filename.lastIndexOf('.'); 365 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 366 filename.substring(lastDotIndex + 1)); 367 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 368 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 369 if (extension != null) { 370 extension = "." + extension; 371 } 372 } 373 } 374 if (extension == null) { 375 extension = filename.substring(dotIndex); 376 } 377 filename = filename.substring(0, dotIndex); 378 } 379 380 return filename + extension; 381 } 382 383 /** Regex used to parse content-disposition headers */ 384 private static final Pattern CONTENT_DISPOSITION_PATTERN = 385 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$", 386 Pattern.CASE_INSENSITIVE); 387 388 /* 389 * Parse the Content-Disposition HTTP Header. The format of the header 390 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 391 * This header provides a filename for content that is going to be 392 * downloaded to the file system. We only support the attachment type. 393 * Note that RFC 2616 specifies the filename value must be double-quoted. 394 * Unfortunately some servers do not quote the value so to maintain 395 * consistent behaviour with other browsers, we allow unquoted values too. 396 */ parseContentDisposition(String contentDisposition)397 static String parseContentDisposition(String contentDisposition) { 398 try { 399 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 400 if (m.find()) { 401 return m.group(2); 402 } 403 } catch (IllegalStateException ex) { 404 // This function is defined as returning null when it can't parse the header 405 } 406 return null; 407 } 408 } 409