1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.webkit; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.Locale; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 import android.net.Uri; 25 import android.net.ParseException; 26 import android.net.WebAddress; 27 import android.util.Log; 28 29 public final class URLUtil { 30 31 private static final String LOGTAG = "webkit"; 32 33 // to refer to bar.png under your package's asset/foo/ directory, use 34 // "file:///android_asset/foo/bar.png". 35 static final String ASSET_BASE = "file:///android_asset/"; 36 // to refer to bar.png under your package's res/drawable/ directory, use 37 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to 38 // "drawable-hdpi" directory as well. 39 static final String RESOURCE_BASE = "file:///android_res/"; 40 static final String FILE_BASE = "file://"; 41 static final String PROXY_BASE = "file:///cookieless_proxy/"; 42 static final String CONTENT_BASE = "content:"; 43 44 /** 45 * Cleans up (if possible) user-entered web addresses 46 */ guessUrl(String inUrl)47 public static String guessUrl(String inUrl) { 48 49 String retVal = inUrl; 50 WebAddress webAddress; 51 52 if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 53 54 if (inUrl.length() == 0) return inUrl; 55 if (inUrl.startsWith("about:")) return inUrl; 56 // Do not try to interpret data scheme URLs 57 if (inUrl.startsWith("data:")) return inUrl; 58 // Do not try to interpret file scheme URLs 59 if (inUrl.startsWith("file:")) return inUrl; 60 // Do not try to interpret javascript scheme URLs 61 if (inUrl.startsWith("javascript:")) return inUrl; 62 63 // bug 762454: strip period off end of url 64 if (inUrl.endsWith(".") == true) { 65 inUrl = inUrl.substring(0, inUrl.length() - 1); 66 } 67 68 try { 69 webAddress = new WebAddress(inUrl); 70 } catch (ParseException ex) { 71 72 if (DebugFlags.URL_UTIL) { 73 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 74 } 75 return retVal; 76 } 77 78 // Check host 79 if (webAddress.getHost().indexOf('.') == -1) { 80 // no dot: user probably entered a bare domain. try .com 81 webAddress.setHost("www." + webAddress.getHost() + ".com"); 82 } 83 return webAddress.toString(); 84 } 85 composeSearchUrl(String inQuery, String template, String queryPlaceHolder)86 public static String composeSearchUrl(String inQuery, String template, 87 String queryPlaceHolder) { 88 int placeHolderIndex = template.indexOf(queryPlaceHolder); 89 if (placeHolderIndex < 0) { 90 return null; 91 } 92 93 String query; 94 StringBuilder buffer = new StringBuilder(); 95 buffer.append(template.substring(0, placeHolderIndex)); 96 97 try { 98 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 99 buffer.append(query); 100 } catch (UnsupportedEncodingException ex) { 101 return null; 102 } 103 104 buffer.append(template.substring( 105 placeHolderIndex + queryPlaceHolder.length())); 106 107 return buffer.toString(); 108 } 109 decode(byte[] url)110 public static byte[] decode(byte[] url) throws IllegalArgumentException { 111 if (url.length == 0) { 112 return new byte[0]; 113 } 114 115 // Create a new byte array with the same length to ensure capacity 116 byte[] tempData = new byte[url.length]; 117 118 int tempCount = 0; 119 for (int i = 0; i < url.length; i++) { 120 byte b = url[i]; 121 if (b == '%') { 122 if (url.length - i > 2) { 123 b = (byte) (parseHex(url[i + 1]) * 16 124 + parseHex(url[i + 2])); 125 i += 2; 126 } else { 127 throw new IllegalArgumentException("Invalid format"); 128 } 129 } 130 tempData[tempCount++] = b; 131 } 132 byte[] retData = new byte[tempCount]; 133 System.arraycopy(tempData, 0, retData, 0, tempCount); 134 return retData; 135 } 136 137 /** 138 * @return True iff the url is correctly URL encoded 139 */ verifyURLEncoding(String url)140 static boolean verifyURLEncoding(String url) { 141 int count = url.length(); 142 if (count == 0) { 143 return false; 144 } 145 146 int index = url.indexOf('%'); 147 while (index >= 0 && index < count) { 148 if (index < count - 2) { 149 try { 150 parseHex((byte) url.charAt(++index)); 151 parseHex((byte) url.charAt(++index)); 152 } catch (IllegalArgumentException e) { 153 return false; 154 } 155 } else { 156 return false; 157 } 158 index = url.indexOf('%', index + 1); 159 } 160 return true; 161 } 162 parseHex(byte b)163 private static int parseHex(byte b) { 164 if (b >= '0' && b <= '9') return (b - '0'); 165 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 166 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 167 168 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 169 } 170 171 /** 172 * @return True iff the url is an asset file. 173 */ isAssetUrl(String url)174 public static boolean isAssetUrl(String url) { 175 return (null != url) && url.startsWith(ASSET_BASE); 176 } 177 178 /** 179 * @return True iff the url is a resource file. 180 * @hide 181 */ isResourceUrl(String url)182 public static boolean isResourceUrl(String url) { 183 return (null != url) && url.startsWith(RESOURCE_BASE); 184 } 185 186 /** 187 * @return True iff the url is a proxy url to allow cookieless network 188 * requests from a file url. 189 * @deprecated Cookieless proxy is no longer supported. 190 */ 191 @Deprecated isCookielessProxyUrl(String url)192 public static boolean isCookielessProxyUrl(String url) { 193 return (null != url) && url.startsWith(PROXY_BASE); 194 } 195 196 /** 197 * @return True iff the url is a local file. 198 */ isFileUrl(String url)199 public static boolean isFileUrl(String url) { 200 return (null != url) && (url.startsWith(FILE_BASE) && 201 !url.startsWith(ASSET_BASE) && 202 !url.startsWith(PROXY_BASE)); 203 } 204 205 /** 206 * @return True iff the url is an about: url. 207 */ isAboutUrl(String url)208 public static boolean isAboutUrl(String url) { 209 return (null != url) && url.startsWith("about:"); 210 } 211 212 /** 213 * @return True iff the url is a data: url. 214 */ isDataUrl(String url)215 public static boolean isDataUrl(String url) { 216 return (null != url) && url.startsWith("data:"); 217 } 218 219 /** 220 * @return True iff the url is a javascript: url. 221 */ isJavaScriptUrl(String url)222 public static boolean isJavaScriptUrl(String url) { 223 return (null != url) && url.startsWith("javascript:"); 224 } 225 226 /** 227 * @return True iff the url is an http: url. 228 */ isHttpUrl(String url)229 public static boolean isHttpUrl(String url) { 230 return (null != url) && 231 (url.length() > 6) && 232 url.substring(0, 7).equalsIgnoreCase("http://"); 233 } 234 235 /** 236 * @return True iff the url is an https: url. 237 */ isHttpsUrl(String url)238 public static boolean isHttpsUrl(String url) { 239 return (null != url) && 240 (url.length() > 7) && 241 url.substring(0, 8).equalsIgnoreCase("https://"); 242 } 243 244 /** 245 * @return True iff the url is a network url. 246 */ isNetworkUrl(String url)247 public static boolean isNetworkUrl(String url) { 248 if (url == null || url.length() == 0) { 249 return false; 250 } 251 return isHttpUrl(url) || isHttpsUrl(url); 252 } 253 254 /** 255 * @return True iff the url is a content: url. 256 */ isContentUrl(String url)257 public static boolean isContentUrl(String url) { 258 return (null != url) && url.startsWith(CONTENT_BASE); 259 } 260 261 /** 262 * @return True iff the url is valid. 263 */ isValidUrl(String url)264 public static boolean isValidUrl(String url) { 265 if (url == null || url.length() == 0) { 266 return false; 267 } 268 269 return (isAssetUrl(url) || 270 isResourceUrl(url) || 271 isFileUrl(url) || 272 isAboutUrl(url) || 273 isHttpUrl(url) || 274 isHttpsUrl(url) || 275 isJavaScriptUrl(url) || 276 isContentUrl(url)); 277 } 278 279 /** 280 * Strips the url of the anchor. 281 */ stripAnchor(String url)282 public static String stripAnchor(String url) { 283 int anchorIndex = url.indexOf('#'); 284 if (anchorIndex != -1) { 285 return url.substring(0, anchorIndex); 286 } 287 return url; 288 } 289 290 /** 291 * Guesses canonical filename that a download would have, using 292 * the URL and contentDisposition. File extension, if not defined, 293 * is added based on the mimetype 294 * @param url Url to the content 295 * @param contentDisposition Content-Disposition HTTP header or null 296 * @param mimeType Mime-type of the content or null 297 * 298 * @return suggested filename 299 */ guessFileName( String url, String contentDisposition, String mimeType)300 public static final String guessFileName( 301 String url, 302 String contentDisposition, 303 String mimeType) { 304 String filename = null; 305 String extension = null; 306 307 // If we couldn't do anything with the hint, move toward the content disposition 308 if (filename == null && contentDisposition != null) { 309 filename = parseContentDisposition(contentDisposition); 310 if (filename != null) { 311 int index = filename.lastIndexOf('/') + 1; 312 if (index > 0) { 313 filename = filename.substring(index); 314 } 315 } 316 } 317 318 // If all the other http-related approaches failed, use the plain uri 319 if (filename == null) { 320 String decodedUrl = Uri.decode(url); 321 if (decodedUrl != null) { 322 int queryIndex = decodedUrl.indexOf('?'); 323 // If there is a query string strip it, same as desktop browsers 324 if (queryIndex > 0) { 325 decodedUrl = decodedUrl.substring(0, queryIndex); 326 } 327 if (!decodedUrl.endsWith("/")) { 328 int index = decodedUrl.lastIndexOf('/') + 1; 329 if (index > 0) { 330 filename = decodedUrl.substring(index); 331 } 332 } 333 } 334 } 335 336 // Finally, if couldn't get filename from URI, get a generic filename 337 if (filename == null) { 338 filename = "downloadfile"; 339 } 340 341 // Split filename between base and extension 342 // Add an extension if filename does not have one 343 int dotIndex = filename.indexOf('.'); 344 if (dotIndex < 0) { 345 if (mimeType != null) { 346 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 347 if (extension != null) { 348 extension = "." + extension; 349 } 350 } 351 if (extension == null) { 352 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) { 353 if (mimeType.equalsIgnoreCase("text/html")) { 354 extension = ".html"; 355 } else { 356 extension = ".txt"; 357 } 358 } else { 359 extension = ".bin"; 360 } 361 } 362 } else { 363 if (mimeType != null) { 364 // Compare the last segment of the extension against the mime type. 365 // If there's a mismatch, discard the entire extension. 366 int lastDotIndex = filename.lastIndexOf('.'); 367 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 368 filename.substring(lastDotIndex + 1)); 369 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 370 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 371 if (extension != null) { 372 extension = "." + extension; 373 } 374 } 375 } 376 if (extension == null) { 377 extension = filename.substring(dotIndex); 378 } 379 filename = filename.substring(0, dotIndex); 380 } 381 382 return filename + extension; 383 } 384 385 /** Regex used to parse content-disposition headers */ 386 private static final Pattern CONTENT_DISPOSITION_PATTERN = 387 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$", 388 Pattern.CASE_INSENSITIVE); 389 390 /* 391 * Parse the Content-Disposition HTTP Header. The format of the header 392 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 393 * This header provides a filename for content that is going to be 394 * downloaded to the file system. We only support the attachment type. 395 * Note that RFC 2616 specifies the filename value must be double-quoted. 396 * Unfortunately some servers do not quote the value so to maintain 397 * consistent behaviour with other browsers, we allow unquoted values too. 398 */ parseContentDisposition(String contentDisposition)399 static String parseContentDisposition(String contentDisposition) { 400 try { 401 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 402 if (m.find()) { 403 return m.group(2); 404 } 405 } catch (IllegalStateException ex) { 406 // This function is defined as returning null when it can't parse the header 407 } 408 return null; 409 } 410 } 411