1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.webkit; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.regex.Matcher; 21 import java.util.regex.Pattern; 22 23 import android.net.Uri; 24 import android.net.ParseException; 25 import android.net.WebAddress; 26 import android.util.Log; 27 28 public final class URLUtil { 29 30 private static final String LOGTAG = "webkit"; 31 32 static final String ASSET_BASE = "file:///android_asset/"; 33 static final String FILE_BASE = "file://"; 34 static final String PROXY_BASE = "file:///cookieless_proxy/"; 35 36 /** 37 * Cleans up (if possible) user-entered web addresses 38 */ guessUrl(String inUrl)39 public static String guessUrl(String inUrl) { 40 41 String retVal = inUrl; 42 WebAddress webAddress; 43 44 Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 45 46 if (inUrl.length() == 0) return inUrl; 47 if (inUrl.startsWith("about:")) return inUrl; 48 // Do not try to interpret data scheme URLs 49 if (inUrl.startsWith("data:")) return inUrl; 50 // Do not try to interpret file scheme URLs 51 if (inUrl.startsWith("file:")) return inUrl; 52 // Do not try to interpret javascript scheme URLs 53 if (inUrl.startsWith("javascript:")) return inUrl; 54 55 // bug 762454: strip period off end of url 56 if (inUrl.endsWith(".") == true) { 57 inUrl = inUrl.substring(0, inUrl.length() - 1); 58 } 59 60 try { 61 webAddress = new WebAddress(inUrl); 62 } catch (ParseException ex) { 63 64 if (DebugFlags.URL_UTIL) { 65 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 66 } 67 return retVal; 68 } 69 70 // Check host 71 if (webAddress.mHost.indexOf('.') == -1) { 72 // no dot: user probably entered a bare domain. try .com 73 webAddress.mHost = "www." + webAddress.mHost + ".com"; 74 } 75 return webAddress.toString(); 76 } 77 composeSearchUrl(String inQuery, String template, String queryPlaceHolder)78 public static String composeSearchUrl(String inQuery, String template, 79 String queryPlaceHolder) { 80 int placeHolderIndex = template.indexOf(queryPlaceHolder); 81 if (placeHolderIndex < 0) { 82 return null; 83 } 84 85 String query; 86 StringBuilder buffer = new StringBuilder(); 87 buffer.append(template.substring(0, placeHolderIndex)); 88 89 try { 90 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 91 buffer.append(query); 92 } catch (UnsupportedEncodingException ex) { 93 return null; 94 } 95 96 buffer.append(template.substring( 97 placeHolderIndex + queryPlaceHolder.length())); 98 99 return buffer.toString(); 100 } 101 decode(byte[] url)102 public static byte[] decode(byte[] url) throws IllegalArgumentException { 103 if (url.length == 0) { 104 return new byte[0]; 105 } 106 107 // Create a new byte array with the same length to ensure capacity 108 byte[] tempData = new byte[url.length]; 109 110 int tempCount = 0; 111 for (int i = 0; i < url.length; i++) { 112 byte b = url[i]; 113 if (b == '%') { 114 if (url.length - i > 2) { 115 b = (byte) (parseHex(url[i + 1]) * 16 116 + parseHex(url[i + 2])); 117 i += 2; 118 } else { 119 throw new IllegalArgumentException("Invalid format"); 120 } 121 } 122 tempData[tempCount++] = b; 123 } 124 byte[] retData = new byte[tempCount]; 125 System.arraycopy(tempData, 0, retData, 0, tempCount); 126 return retData; 127 } 128 129 /** 130 * @return True iff the url is correctly URL encoded 131 */ verifyURLEncoding(String url)132 static boolean verifyURLEncoding(String url) { 133 int count = url.length(); 134 if (count == 0) { 135 return false; 136 } 137 138 int index = url.indexOf('%'); 139 while (index >= 0 && index < count) { 140 if (index < count - 2) { 141 try { 142 parseHex((byte) url.charAt(++index)); 143 parseHex((byte) url.charAt(++index)); 144 } catch (IllegalArgumentException e) { 145 return false; 146 } 147 } else { 148 return false; 149 } 150 index = url.indexOf('%', index + 1); 151 } 152 return true; 153 } 154 parseHex(byte b)155 private static int parseHex(byte b) { 156 if (b >= '0' && b <= '9') return (b - '0'); 157 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 158 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 159 160 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 161 } 162 163 /** 164 * @return True iff the url is an asset file. 165 */ isAssetUrl(String url)166 public static boolean isAssetUrl(String url) { 167 return (null != url) && url.startsWith(ASSET_BASE); 168 } 169 170 /** 171 * @return True iff the url is an proxy url to allow cookieless network 172 * requests from a file url. 173 * @deprecated Cookieless proxy is no longer supported. 174 */ 175 @Deprecated isCookielessProxyUrl(String url)176 public static boolean isCookielessProxyUrl(String url) { 177 return (null != url) && url.startsWith(PROXY_BASE); 178 } 179 180 /** 181 * @return True iff the url is a local file. 182 */ isFileUrl(String url)183 public static boolean isFileUrl(String url) { 184 return (null != url) && (url.startsWith(FILE_BASE) && 185 !url.startsWith(ASSET_BASE) && 186 !url.startsWith(PROXY_BASE)); 187 } 188 189 /** 190 * @return True iff the url is an about: url. 191 */ isAboutUrl(String url)192 public static boolean isAboutUrl(String url) { 193 return (null != url) && url.startsWith("about:"); 194 } 195 196 /** 197 * @return True iff the url is a data: url. 198 */ isDataUrl(String url)199 public static boolean isDataUrl(String url) { 200 return (null != url) && url.startsWith("data:"); 201 } 202 203 /** 204 * @return True iff the url is a javascript: url. 205 */ isJavaScriptUrl(String url)206 public static boolean isJavaScriptUrl(String url) { 207 return (null != url) && url.startsWith("javascript:"); 208 } 209 210 /** 211 * @return True iff the url is an http: url. 212 */ isHttpUrl(String url)213 public static boolean isHttpUrl(String url) { 214 return (null != url) && 215 (url.length() > 6) && 216 url.substring(0, 7).equalsIgnoreCase("http://"); 217 } 218 219 /** 220 * @return True iff the url is an https: url. 221 */ isHttpsUrl(String url)222 public static boolean isHttpsUrl(String url) { 223 return (null != url) && 224 (url.length() > 7) && 225 url.substring(0, 8).equalsIgnoreCase("https://"); 226 } 227 228 /** 229 * @return True iff the url is a network url. 230 */ isNetworkUrl(String url)231 public static boolean isNetworkUrl(String url) { 232 if (url == null || url.length() == 0) { 233 return false; 234 } 235 return isHttpUrl(url) || isHttpsUrl(url); 236 } 237 238 /** 239 * @return True iff the url is a content: url. 240 */ isContentUrl(String url)241 public static boolean isContentUrl(String url) { 242 return (null != url) && url.startsWith("content:"); 243 } 244 245 /** 246 * @return True iff the url is valid. 247 */ isValidUrl(String url)248 public static boolean isValidUrl(String url) { 249 if (url == null || url.length() == 0) { 250 return false; 251 } 252 253 return (isAssetUrl(url) || 254 isFileUrl(url) || 255 isAboutUrl(url) || 256 isHttpUrl(url) || 257 isHttpsUrl(url) || 258 isJavaScriptUrl(url) || 259 isContentUrl(url)); 260 } 261 262 /** 263 * Strips the url of the anchor. 264 */ stripAnchor(String url)265 public static String stripAnchor(String url) { 266 int anchorIndex = url.indexOf('#'); 267 if (anchorIndex != -1) { 268 return url.substring(0, anchorIndex); 269 } 270 return url; 271 } 272 273 /** 274 * Guesses canonical filename that a download would have, using 275 * the URL and contentDisposition. File extension, if not defined, 276 * is added based on the mimetype 277 * @param url Url to the content 278 * @param contentDisposition Content-Disposition HTTP header or null 279 * @param mimeType Mime-type of the content or null 280 * 281 * @return suggested filename 282 */ guessFileName( String url, String contentDisposition, String mimeType)283 public static final String guessFileName( 284 String url, 285 String contentDisposition, 286 String mimeType) { 287 String filename = null; 288 String extension = null; 289 290 // If we couldn't do anything with the hint, move toward the content disposition 291 if (filename == null && contentDisposition != null) { 292 filename = parseContentDisposition(contentDisposition); 293 if (filename != null) { 294 int index = filename.lastIndexOf('/') + 1; 295 if (index > 0) { 296 filename = filename.substring(index); 297 } 298 } 299 } 300 301 // If all the other http-related approaches failed, use the plain uri 302 if (filename == null) { 303 String decodedUrl = Uri.decode(url); 304 if (decodedUrl != null) { 305 int queryIndex = decodedUrl.indexOf('?'); 306 // If there is a query string strip it, same as desktop browsers 307 if (queryIndex > 0) { 308 decodedUrl = decodedUrl.substring(0, queryIndex); 309 } 310 if (!decodedUrl.endsWith("/")) { 311 int index = decodedUrl.lastIndexOf('/') + 1; 312 if (index > 0) { 313 filename = decodedUrl.substring(index); 314 } 315 } 316 } 317 } 318 319 // Finally, if couldn't get filename from URI, get a generic filename 320 if (filename == null) { 321 filename = "downloadfile"; 322 } 323 324 // Split filename between base and extension 325 // Add an extension if filename does not have one 326 int dotIndex = filename.indexOf('.'); 327 if (dotIndex < 0) { 328 if (mimeType != null) { 329 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 330 if (extension != null) { 331 extension = "." + extension; 332 } 333 } 334 if (extension == null) { 335 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) { 336 if (mimeType.equalsIgnoreCase("text/html")) { 337 extension = ".html"; 338 } else { 339 extension = ".txt"; 340 } 341 } else { 342 extension = ".bin"; 343 } 344 } 345 } else { 346 if (mimeType != null) { 347 // Compare the last segment of the extension against the mime type. 348 // If there's a mismatch, discard the entire extension. 349 int lastDotIndex = filename.lastIndexOf('.'); 350 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 351 filename.substring(lastDotIndex + 1)); 352 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 353 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 354 if (extension != null) { 355 extension = "." + extension; 356 } 357 } 358 } 359 if (extension == null) { 360 extension = filename.substring(dotIndex); 361 } 362 filename = filename.substring(0, dotIndex); 363 } 364 365 return filename + extension; 366 } 367 368 /** Regex used to parse content-disposition headers */ 369 private static final Pattern CONTENT_DISPOSITION_PATTERN = 370 Pattern.compile("attachment;\\s*filename\\s*=\\s*\"([^\"]*)\""); 371 372 /* 373 * Parse the Content-Disposition HTTP Header. The format of the header 374 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 375 * This header provides a filename for content that is going to be 376 * downloaded to the file system. We only support the attachment type. 377 */ parseContentDisposition(String contentDisposition)378 static String parseContentDisposition(String contentDisposition) { 379 try { 380 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 381 if (m.find()) { 382 return m.group(1); 383 } 384 } catch (IllegalStateException ex) { 385 // This function is defined as returning null when it can't parse the header 386 } 387 return null; 388 } 389 } 390