1 // 2 // ======================================================================== 3 // Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd. 4 // ------------------------------------------------------------------------ 5 // All rights reserved. This program and the accompanying materials 6 // are made available under the terms of the Eclipse Public License v1.0 7 // and Apache License v2.0 which accompanies this distribution. 8 // 9 // The Eclipse Public License is available at 10 // http://www.eclipse.org/legal/epl-v10.html 11 // 12 // The Apache License v2.0 is available at 13 // http://www.opensource.org/licenses/apache2.0.php 14 // 15 // You may elect to redistribute this code under either of these licenses. 16 // ======================================================================== 17 // 18 19 package org.eclipse.jetty.util; 20 21 import java.io.UnsupportedEncodingException; 22 import java.net.URI; 23 import java.net.URLEncoder; 24 25 import org.eclipse.jetty.util.log.Log; 26 27 28 29 /* ------------------------------------------------------------ */ 30 /** URI Holder. 31 * This class assists with the decoding and encoding or HTTP URI's. 32 * It differs from the java.net.URL class as it does not provide 33 * communications ability, but it does assist with query string 34 * formatting. 35 * <P>UTF-8 encoding is used by default for % encoded characters. This 36 * may be overridden with the org.eclipse.jetty.util.URI.charset system property. 37 * @see UrlEncoded 38 * 39 */ 40 public class URIUtil 41 implements Cloneable 42 { 43 public static final String SLASH="/"; 44 public static final String HTTP="http"; 45 public static final String HTTP_COLON="http:"; 46 public static final String HTTPS="https"; 47 public static final String HTTPS_COLON="https:"; 48 49 // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars 50 public static final String __CHARSET=System.getProperty("org.eclipse.jetty.util.URI.charset",StringUtil.__UTF8); 51 URIUtil()52 private URIUtil() 53 {} 54 55 /* ------------------------------------------------------------ */ 56 /** Encode a URI path. 57 * This is the same encoding offered by URLEncoder, except that 58 * the '/' character is not encoded. 59 * @param path The path the encode 60 * @return The encoded path 61 */ encodePath(String path)62 public static String encodePath(String path) 63 { 64 if (path==null || path.length()==0) 65 return path; 66 67 StringBuilder buf = encodePath(null,path); 68 return buf==null?path:buf.toString(); 69 } 70 71 /* ------------------------------------------------------------ */ 72 /** Encode a URI path. 73 * @param path The path the encode 74 * @param buf StringBuilder to encode path into (or null) 75 * @return The StringBuilder or null if no substitutions required. 76 */ encodePath(StringBuilder buf, String path)77 public static StringBuilder encodePath(StringBuilder buf, String path) 78 { 79 byte[] bytes=null; 80 if (buf==null) 81 { 82 loop: 83 for (int i=0;i<path.length();i++) 84 { 85 char c=path.charAt(i); 86 switch(c) 87 { 88 case '%': 89 case '?': 90 case ';': 91 case '#': 92 case '\'': 93 case '"': 94 case '<': 95 case '>': 96 case ' ': 97 buf=new StringBuilder(path.length()*2); 98 break loop; 99 default: 100 if (c>127) 101 { 102 try 103 { 104 bytes=path.getBytes(URIUtil.__CHARSET); 105 } 106 catch (UnsupportedEncodingException e) 107 { 108 throw new IllegalStateException(e); 109 } 110 buf=new StringBuilder(path.length()*2); 111 break loop; 112 } 113 114 } 115 } 116 if (buf==null) 117 return null; 118 } 119 120 synchronized(buf) 121 { 122 if (bytes!=null) 123 { 124 for (int i=0;i<bytes.length;i++) 125 { 126 byte c=bytes[i]; 127 switch(c) 128 { 129 case '%': 130 buf.append("%25"); 131 continue; 132 case '?': 133 buf.append("%3F"); 134 continue; 135 case ';': 136 buf.append("%3B"); 137 continue; 138 case '#': 139 buf.append("%23"); 140 continue; 141 case '"': 142 buf.append("%22"); 143 continue; 144 case '\'': 145 buf.append("%27"); 146 continue; 147 case '<': 148 buf.append("%3C"); 149 continue; 150 case '>': 151 buf.append("%3E"); 152 continue; 153 case ' ': 154 buf.append("%20"); 155 continue; 156 default: 157 if (c<0) 158 { 159 buf.append('%'); 160 TypeUtil.toHex(c,buf); 161 } 162 else 163 buf.append((char)c); 164 continue; 165 } 166 } 167 168 } 169 else 170 { 171 for (int i=0;i<path.length();i++) 172 { 173 char c=path.charAt(i); 174 switch(c) 175 { 176 case '%': 177 buf.append("%25"); 178 continue; 179 case '?': 180 buf.append("%3F"); 181 continue; 182 case ';': 183 buf.append("%3B"); 184 continue; 185 case '#': 186 buf.append("%23"); 187 continue; 188 case '"': 189 buf.append("%22"); 190 continue; 191 case '\'': 192 buf.append("%27"); 193 continue; 194 case '<': 195 buf.append("%3C"); 196 continue; 197 case '>': 198 buf.append("%3E"); 199 continue; 200 case ' ': 201 buf.append("%20"); 202 continue; 203 default: 204 buf.append(c); 205 continue; 206 } 207 } 208 } 209 } 210 211 return buf; 212 } 213 214 /* ------------------------------------------------------------ */ 215 /** Encode a URI path. 216 * @param path The path the encode 217 * @param buf StringBuilder to encode path into (or null) 218 * @param encode String of characters to encode. % is always encoded. 219 * @return The StringBuilder or null if no substitutions required. 220 */ encodeString(StringBuilder buf, String path, String encode)221 public static StringBuilder encodeString(StringBuilder buf, 222 String path, 223 String encode) 224 { 225 if (buf==null) 226 { 227 loop: 228 for (int i=0;i<path.length();i++) 229 { 230 char c=path.charAt(i); 231 if (c=='%' || encode.indexOf(c)>=0) 232 { 233 buf=new StringBuilder(path.length()<<1); 234 break loop; 235 } 236 } 237 if (buf==null) 238 return null; 239 } 240 241 synchronized(buf) 242 { 243 for (int i=0;i<path.length();i++) 244 { 245 char c=path.charAt(i); 246 if (c=='%' || encode.indexOf(c)>=0) 247 { 248 buf.append('%'); 249 StringUtil.append(buf,(byte)(0xff&c),16); 250 } 251 else 252 buf.append(c); 253 } 254 } 255 256 return buf; 257 } 258 259 /* ------------------------------------------------------------ */ 260 /* Decode a URI path and strip parameters 261 * @param path The path the encode 262 * @param buf StringBuilder to encode path into 263 */ decodePath(String path)264 public static String decodePath(String path) 265 { 266 if (path==null) 267 return null; 268 // Array to hold all converted characters 269 char[] chars=null; 270 int n=0; 271 // Array to hold a sequence of %encodings 272 byte[] bytes=null; 273 int b=0; 274 275 int len=path.length(); 276 277 for (int i=0;i<len;i++) 278 { 279 char c = path.charAt(i); 280 281 if (c=='%' && (i+2)<len) 282 { 283 if (chars==null) 284 { 285 chars=new char[len]; 286 bytes=new byte[len]; 287 path.getChars(0,i,chars,0); 288 } 289 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16)); 290 i+=2; 291 continue; 292 } 293 else if (c==';') 294 { 295 if (chars==null) 296 { 297 chars=new char[len]; 298 path.getChars(0,i,chars,0); 299 n=i; 300 } 301 break; 302 } 303 else if (bytes==null) 304 { 305 n++; 306 continue; 307 } 308 309 // Do we have some bytes to convert? 310 if (b>0) 311 { 312 // convert series of bytes and add to chars 313 String s; 314 try 315 { 316 s=new String(bytes,0,b,__CHARSET); 317 } 318 catch (UnsupportedEncodingException e) 319 { 320 s=new String(bytes,0,b); 321 } 322 s.getChars(0,s.length(),chars,n); 323 n+=s.length(); 324 b=0; 325 } 326 327 chars[n++]=c; 328 } 329 330 if (chars==null) 331 return path; 332 333 // if we have a remaining sequence of bytes 334 if (b>0) 335 { 336 // convert series of bytes and add to chars 337 String s; 338 try 339 { 340 s=new String(bytes,0,b,__CHARSET); 341 } 342 catch (UnsupportedEncodingException e) 343 { 344 s=new String(bytes,0,b); 345 } 346 s.getChars(0,s.length(),chars,n); 347 n+=s.length(); 348 } 349 350 return new String(chars,0,n); 351 } 352 353 /* ------------------------------------------------------------ */ 354 /* Decode a URI path and strip parameters. 355 * @param path The path the encode 356 * @param buf StringBuilder to encode path into 357 */ decodePath(byte[] buf, int offset, int length)358 public static String decodePath(byte[] buf, int offset, int length) 359 { 360 byte[] bytes=null; 361 int n=0; 362 363 for (int i=0;i<length;i++) 364 { 365 byte b = buf[i + offset]; 366 367 if (b=='%' && (i+2)<length) 368 { 369 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16)); 370 i+=2; 371 } 372 else if (b==';') 373 { 374 length=i; 375 break; 376 } 377 else if (bytes==null) 378 { 379 n++; 380 continue; 381 } 382 383 if (bytes==null) 384 { 385 bytes=new byte[length]; 386 for (int j=0;j<n;j++) 387 bytes[j]=buf[j + offset]; 388 } 389 390 bytes[n++]=b; 391 } 392 393 if (bytes==null) 394 return StringUtil.toString(buf,offset,length,__CHARSET); 395 return StringUtil.toString(bytes,0,n,__CHARSET); 396 } 397 398 399 /* ------------------------------------------------------------ */ 400 /** Add two URI path segments. 401 * Handles null and empty paths, path and query params (eg ?a=b or 402 * ;JSESSIONID=xxx) and avoids duplicate '/' 403 * @param p1 URI path segment (should be encoded) 404 * @param p2 URI path segment (should be encoded) 405 * @return Legally combined path segments. 406 */ addPaths(String p1, String p2)407 public static String addPaths(String p1, String p2) 408 { 409 if (p1==null || p1.length()==0) 410 { 411 if (p1!=null && p2==null) 412 return p1; 413 return p2; 414 } 415 if (p2==null || p2.length()==0) 416 return p1; 417 418 int split=p1.indexOf(';'); 419 if (split<0) 420 split=p1.indexOf('?'); 421 if (split==0) 422 return p2+p1; 423 if (split<0) 424 split=p1.length(); 425 426 StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2); 427 buf.append(p1); 428 429 if (buf.charAt(split-1)=='/') 430 { 431 if (p2.startsWith(URIUtil.SLASH)) 432 { 433 buf.deleteCharAt(split-1); 434 buf.insert(split-1,p2); 435 } 436 else 437 buf.insert(split,p2); 438 } 439 else 440 { 441 if (p2.startsWith(URIUtil.SLASH)) 442 buf.insert(split,p2); 443 else 444 { 445 buf.insert(split,'/'); 446 buf.insert(split+1,p2); 447 } 448 } 449 450 return buf.toString(); 451 } 452 453 /* ------------------------------------------------------------ */ 454 /** Return the parent Path. 455 * Treat a URI like a directory path and return the parent directory. 456 */ parentPath(String p)457 public static String parentPath(String p) 458 { 459 if (p==null || URIUtil.SLASH.equals(p)) 460 return null; 461 int slash=p.lastIndexOf('/',p.length()-2); 462 if (slash>=0) 463 return p.substring(0,slash+1); 464 return null; 465 } 466 467 /* ------------------------------------------------------------ */ 468 /** Convert a path to a cananonical form. 469 * All instances of "." and ".." are factored out. Null is returned 470 * if the path tries to .. above its root. 471 * @param path 472 * @return path or null. 473 */ canonicalPath(String path)474 public static String canonicalPath(String path) 475 { 476 if (path==null || path.length()==0) 477 return path; 478 479 int end=path.length(); 480 int start = path.lastIndexOf('/', end); 481 482 search: 483 while (end>0) 484 { 485 switch(end-start) 486 { 487 case 2: // possible single dot 488 if (path.charAt(start+1)!='.') 489 break; 490 break search; 491 case 3: // possible double dot 492 if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.') 493 break; 494 break search; 495 } 496 497 end=start; 498 start=path.lastIndexOf('/',end-1); 499 } 500 501 // If we have checked the entire string 502 if (start>=end) 503 return path; 504 505 StringBuilder buf = new StringBuilder(path); 506 int delStart=-1; 507 int delEnd=-1; 508 int skip=0; 509 510 while (end>0) 511 { 512 switch(end-start) 513 { 514 case 2: // possible single dot 515 if (buf.charAt(start+1)!='.') 516 { 517 if (skip>0 && --skip==0) 518 { 519 delStart=start>=0?start:0; 520 if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.') 521 delStart++; 522 } 523 break; 524 } 525 526 if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/') 527 break; 528 529 if(delEnd<0) 530 delEnd=end; 531 delStart=start; 532 if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/') 533 { 534 delStart++; 535 if (delEnd<buf.length() && buf.charAt(delEnd)=='/') 536 delEnd++; 537 break; 538 } 539 if (end==buf.length()) 540 delStart++; 541 542 end=start--; 543 while (start>=0 && buf.charAt(start)!='/') 544 start--; 545 continue; 546 547 case 3: // possible double dot 548 if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.') 549 { 550 if (skip>0 && --skip==0) 551 { delStart=start>=0?start:0; 552 if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.') 553 delStart++; 554 } 555 break; 556 } 557 558 delStart=start; 559 if (delEnd<0) 560 delEnd=end; 561 562 skip++; 563 end=start--; 564 while (start>=0 && buf.charAt(start)!='/') 565 start--; 566 continue; 567 568 default: 569 if (skip>0 && --skip==0) 570 { 571 delStart=start>=0?start:0; 572 if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.') 573 delStart++; 574 } 575 } 576 577 // Do the delete 578 if (skip<=0 && delStart>=0 && delEnd>=delStart) 579 { 580 buf.delete(delStart,delEnd); 581 delStart=delEnd=-1; 582 if (skip>0) 583 delEnd=end; 584 } 585 586 end=start--; 587 while (start>=0 && buf.charAt(start)!='/') 588 start--; 589 } 590 591 // Too many .. 592 if (skip>0) 593 return null; 594 595 // Do the delete 596 if (delEnd>=0) 597 buf.delete(delStart,delEnd); 598 599 return buf.toString(); 600 } 601 602 /* ------------------------------------------------------------ */ 603 /** Convert a path to a compact form. 604 * All instances of "//" and "///" etc. are factored out to single "/" 605 * @param path 606 * @return path 607 */ compactPath(String path)608 public static String compactPath(String path) 609 { 610 if (path==null || path.length()==0) 611 return path; 612 613 int state=0; 614 int end=path.length(); 615 int i=0; 616 617 loop: 618 while (i<end) 619 { 620 char c=path.charAt(i); 621 switch(c) 622 { 623 case '?': 624 return path; 625 case '/': 626 state++; 627 if (state==2) 628 break loop; 629 break; 630 default: 631 state=0; 632 } 633 i++; 634 } 635 636 if (state<2) 637 return path; 638 639 StringBuffer buf = new StringBuffer(path.length()); 640 buf.append(path,0,i); 641 642 loop2: 643 while (i<end) 644 { 645 char c=path.charAt(i); 646 switch(c) 647 { 648 case '?': 649 buf.append(path,i,end); 650 break loop2; 651 case '/': 652 if (state++==0) 653 buf.append(c); 654 break; 655 default: 656 state=0; 657 buf.append(c); 658 } 659 i++; 660 } 661 662 return buf.toString(); 663 } 664 665 /* ------------------------------------------------------------ */ 666 /** 667 * @param uri URI 668 * @return True if the uri has a scheme 669 */ hasScheme(String uri)670 public static boolean hasScheme(String uri) 671 { 672 for (int i=0;i<uri.length();i++) 673 { 674 char c=uri.charAt(i); 675 if (c==':') 676 return true; 677 if (!(c>='a'&&c<='z' || 678 c>='A'&&c<='Z' || 679 (i>0 &&(c>='0'&&c<='9' || 680 c=='.' || 681 c=='+' || 682 c=='-')) 683 )) 684 break; 685 } 686 return false; 687 } 688 689 } 690 691 692 693