1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import java.io.Serializable; 21 import java.io.UnsupportedEncodingException; 22 import java.nio.ByteBuffer; 23 import java.nio.CharBuffer; 24 import java.nio.charset.Charset; 25 import java.nio.charset.Charsets; 26 import java.util.Arrays; 27 import java.util.Comparator; 28 import java.util.Formatter; 29 import java.util.Locale; 30 import java.util.regex.Pattern; 31 import libcore.util.EmptyArray; 32 33 /** 34 * An immutable sequence of characters/code units ({@code char}s). A 35 * {@code String} is represented by array of UTF-16 values, such that 36 * Unicode supplementary characters (code points) are stored/encoded as 37 * surrogate pairs via Unicode code units ({@code char}). 38 * 39 * <a name="backing_array"><h3>Backing Arrays</h3></a> 40 * This class is implemented using a char[]. The length of the array may exceed 41 * the length of the string. For example, the string "Hello" may be backed by 42 * the array {@code ['H', 'e', 'l', 'l', 'o', 'W'. 'o', 'r', 'l', 'd']} with 43 * offset 0 and length 5. 44 * 45 * <p>Multiple strings can share the same char[] because strings are immutable. 46 * The {@link #substring} method <strong>always</strong> returns a string that 47 * shares the backing array of its source string. Generally this is an 48 * optimization: fewer character arrays need to be allocated, and less copying 49 * is necessary. But this can also lead to unwanted heap retention. Taking a 50 * short substring of long string means that the long shared char[] won't be 51 * garbage until both strings are garbage. This typically happens when parsing 52 * small substrings out of a large input. To avoid this where necessary, call 53 * {@code new String(longString.subString(...))}. The string copy constructor 54 * always ensures that the backing array is no larger than necessary. 55 * 56 * @see StringBuffer 57 * @see StringBuilder 58 * @see Charset 59 * @since 1.0 60 */ 61 public final class String implements Serializable, Comparable<String>, CharSequence { 62 63 private static final long serialVersionUID = -6849794470754667710L; 64 65 private static final char REPLACEMENT_CHAR = (char) 0xfffd; 66 67 /** 68 * CaseInsensitiveComparator compares Strings ignoring the case of the 69 * characters. 70 */ 71 private static final class CaseInsensitiveComparator implements 72 Comparator<String>, Serializable { 73 private static final long serialVersionUID = 8575799808933029326L; 74 75 /** 76 * Compare the two objects to determine the relative ordering. 77 * 78 * @param o1 79 * an Object to compare 80 * @param o2 81 * an Object to compare 82 * @return an int < 0 if object1 is less than object2, 0 if they are 83 * equal, and > 0 if object1 is greater 84 * 85 * @exception ClassCastException 86 * if objects are not the correct type 87 */ compare(String o1, String o2)88 public int compare(String o1, String o2) { 89 return o1.compareToIgnoreCase(o2); 90 } 91 } 92 93 /** 94 * A comparator ignoring the case of the characters. 95 */ 96 public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator(); 97 98 private static final char[] ASCII; 99 static { 100 ASCII = new char[128]; 101 for (int i = 0; i < ASCII.length; ++i) { 102 ASCII[i] = (char) i; 103 } 104 } 105 106 private final char[] value; 107 108 private final int offset; 109 110 private final int count; 111 112 private int hashCode; 113 114 /** 115 * Creates an empty string. 116 */ String()117 public String() { 118 value = EmptyArray.CHAR; 119 offset = 0; 120 count = 0; 121 } 122 123 /* 124 * Private constructor used for JIT optimization. 125 */ 126 @SuppressWarnings("unused") String(String s, char c)127 private String(String s, char c) { 128 offset = 0; 129 value = new char[s.count + 1]; 130 count = s.count + 1; 131 System.arraycopy(s.value, s.offset, value, 0, s.count); 132 value[s.count] = c; 133 } 134 135 /** 136 * Converts the byte array to a string using the system's 137 * {@link java.nio.charset.Charset#defaultCharset default charset}. 138 */ 139 @FindBugsSuppressWarnings("DM_DEFAULT_ENCODING") String(byte[] data)140 public String(byte[] data) { 141 this(data, 0, data.length); 142 } 143 144 /** 145 * Converts the byte array to a string, setting the high byte of every 146 * character to the specified value. 147 * 148 * @param data 149 * the byte array to convert to a string. 150 * @param high 151 * the high byte to use. 152 * @throws NullPointerException 153 * if {@code data == null}. 154 * @deprecated Use {@link #String(byte[])} or {@link #String(byte[], String)} instead. 155 */ 156 @Deprecated String(byte[] data, int high)157 public String(byte[] data, int high) { 158 this(data, high, 0, data.length); 159 } 160 161 /** 162 * Converts a subsequence of the byte array to a string using the system's 163 * {@link java.nio.charset.Charset#defaultCharset default charset}. 164 * 165 * @throws NullPointerException 166 * if {@code data == null}. 167 * @throws IndexOutOfBoundsException 168 * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}. 169 */ String(byte[] data, int offset, int byteCount)170 public String(byte[] data, int offset, int byteCount) { 171 if ((offset | byteCount) < 0 || byteCount > data.length - offset) { 172 throw failedBoundsCheck(data.length, offset, byteCount); 173 } 174 CharBuffer cb = Charset.defaultCharset().decode(ByteBuffer.wrap(data, offset, byteCount)); 175 this.count = cb.length(); 176 this.offset = 0; 177 if (count > 0) { 178 value = cb.array(); 179 } else { 180 value = EmptyArray.CHAR; 181 } 182 } 183 184 /** 185 * Converts the byte array to a string, setting the high byte of every 186 * character to {@code high}. 187 * 188 * @throws NullPointerException 189 * if {@code data == null}. 190 * @throws IndexOutOfBoundsException 191 * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length} 192 * 193 * @deprecated Use {@link #String(byte[], int, int)} instead. 194 */ 195 @Deprecated String(byte[] data, int high, int offset, int byteCount)196 public String(byte[] data, int high, int offset, int byteCount) { 197 if ((offset | byteCount) < 0 || byteCount > data.length - offset) { 198 throw failedBoundsCheck(data.length, offset, byteCount); 199 } 200 this.offset = 0; 201 this.value = new char[byteCount]; 202 this.count = byteCount; 203 high <<= 8; 204 for (int i = 0; i < count; i++) { 205 value[i] = (char) (high + (data[offset++] & 0xff)); 206 } 207 } 208 209 /** 210 * Converts the byte array to a string using the named charset. 211 * 212 * <p>The behavior when the bytes cannot be decoded by the named charset 213 * is unspecified. Use {@link java.nio.charset.CharsetDecoder} for more control. 214 * 215 * @throws NullPointerException 216 * if {@code data == null}. 217 * @throws IndexOutOfBoundsException 218 * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}. 219 * @throws UnsupportedEncodingException 220 * if the named charset is not supported. 221 */ String(byte[] data, int offset, int byteCount, String charsetName)222 public String(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { 223 this(data, offset, byteCount, Charset.forNameUEE(charsetName)); 224 } 225 226 /** 227 * Converts the byte array to a string using the named charset. 228 * 229 * <p>The behavior when the bytes cannot be decoded by the named charset 230 * is unspecified. Use {@link java.nio.charset.CharsetDecoder} for more control. 231 * 232 * @throws NullPointerException 233 * if {@code data == null}. 234 * @throws UnsupportedEncodingException 235 * if {@code charsetName} is not supported. 236 */ String(byte[] data, String charsetName)237 public String(byte[] data, String charsetName) throws UnsupportedEncodingException { 238 this(data, 0, data.length, Charset.forNameUEE(charsetName)); 239 } 240 241 /** 242 * Converts the byte array to a string using the given charset. 243 * 244 * <p>The behavior when the bytes cannot be decoded by the given charset 245 * is to replace malformed input and unmappable characters with the charset's default 246 * replacement string. Use {@link java.nio.charset.CharsetDecoder} for more control. 247 * 248 * @throws IndexOutOfBoundsException 249 * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length} 250 * @throws NullPointerException 251 * if {@code data == null} 252 * 253 * @since 1.6 254 */ String(byte[] data, int offset, int byteCount, Charset charset)255 public String(byte[] data, int offset, int byteCount, Charset charset) { 256 if ((offset | byteCount) < 0 || byteCount > data.length - offset) { 257 throw failedBoundsCheck(data.length, offset, byteCount); 258 } 259 260 // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and 261 // 'value' are final. 262 String canonicalCharsetName = charset.name(); 263 if (canonicalCharsetName.equals("UTF-8")) { 264 byte[] d = data; 265 char[] v = new char[byteCount]; 266 267 int idx = offset; 268 int last = offset + byteCount; 269 int s = 0; 270 outer: 271 while (idx < last) { 272 byte b0 = d[idx++]; 273 if ((b0 & 0x80) == 0) { 274 // 0xxxxxxx 275 // Range: U-00000000 - U-0000007F 276 int val = b0 & 0xff; 277 v[s++] = (char) val; 278 } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || 279 ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { 280 int utfCount = 1; 281 if ((b0 & 0xf0) == 0xe0) utfCount = 2; 282 else if ((b0 & 0xf8) == 0xf0) utfCount = 3; 283 else if ((b0 & 0xfc) == 0xf8) utfCount = 4; 284 else if ((b0 & 0xfe) == 0xfc) utfCount = 5; 285 286 // 110xxxxx (10xxxxxx)+ 287 // Range: U-00000080 - U-000007FF (count == 1) 288 // Range: U-00000800 - U-0000FFFF (count == 2) 289 // Range: U-00010000 - U-001FFFFF (count == 3) 290 // Range: U-00200000 - U-03FFFFFF (count == 4) 291 // Range: U-04000000 - U-7FFFFFFF (count == 5) 292 293 if (idx + utfCount > last) { 294 v[s++] = REPLACEMENT_CHAR; 295 break; 296 } 297 298 // Extract usable bits from b0 299 int val = b0 & (0x1f >> (utfCount - 1)); 300 for (int i = 0; i < utfCount; i++) { 301 byte b = d[idx++]; 302 if ((b & 0xC0) != 0x80) { 303 v[s++] = REPLACEMENT_CHAR; 304 idx--; // Put the input char back 305 continue outer; 306 } 307 // Push new bits in from the right side 308 val <<= 6; 309 val |= b & 0x3f; 310 } 311 312 // Note: Java allows overlong char 313 // specifications To disallow, check that val 314 // is greater than or equal to the minimum 315 // value for each count: 316 // 317 // count min value 318 // ----- ---------- 319 // 1 0x80 320 // 2 0x800 321 // 3 0x10000 322 // 4 0x200000 323 // 5 0x4000000 324 325 // Allow surrogate values (0xD800 - 0xDFFF) to 326 // be specified using 3-byte UTF values only 327 if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { 328 v[s++] = REPLACEMENT_CHAR; 329 continue; 330 } 331 332 // Reject chars greater than the Unicode maximum of U+10FFFF. 333 if (val > 0x10FFFF) { 334 v[s++] = REPLACEMENT_CHAR; 335 continue; 336 } 337 338 // Encode chars from U+10000 up as surrogate pairs 339 if (val < 0x10000) { 340 v[s++] = (char) val; 341 } else { 342 int x = val & 0xffff; 343 int u = (val >> 16) & 0x1f; 344 int w = (u - 1) & 0xffff; 345 int hi = 0xd800 | (w << 6) | (x >> 10); 346 int lo = 0xdc00 | (x & 0x3ff); 347 v[s++] = (char) hi; 348 v[s++] = (char) lo; 349 } 350 } else { 351 // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff 352 v[s++] = REPLACEMENT_CHAR; 353 } 354 } 355 356 if (s == byteCount) { 357 // We guessed right, so we can use our temporary array as-is. 358 this.offset = 0; 359 this.value = v; 360 this.count = s; 361 } else { 362 // Our temporary array was too big, so reallocate and copy. 363 this.offset = 0; 364 this.value = new char[s]; 365 this.count = s; 366 System.arraycopy(v, 0, value, 0, s); 367 } 368 } else if (canonicalCharsetName.equals("ISO-8859-1")) { 369 this.offset = 0; 370 this.value = new char[byteCount]; 371 this.count = byteCount; 372 Charsets.isoLatin1BytesToChars(data, offset, byteCount, value); 373 } else if (canonicalCharsetName.equals("US-ASCII")) { 374 this.offset = 0; 375 this.value = new char[byteCount]; 376 this.count = byteCount; 377 Charsets.asciiBytesToChars(data, offset, byteCount, value); 378 } else { 379 CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); 380 this.offset = 0; 381 this.count = cb.length(); 382 if (count > 0) { 383 // We could use cb.array() directly, but that would mean we'd have to trust 384 // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, 385 // which would break String's immutability guarantee. It would also tend to 386 // mean that we'd be wasting memory because CharsetDecoder doesn't trim the 387 // array. So we copy. 388 this.value = new char[count]; 389 System.arraycopy(cb.array(), 0, value, 0, count); 390 } else { 391 this.value = EmptyArray.CHAR; 392 } 393 } 394 } 395 396 /** 397 * Converts the byte array to a String using the given charset. 398 * 399 * @throws NullPointerException if {@code data == null} 400 * @since 1.6 401 */ String(byte[] data, Charset charset)402 public String(byte[] data, Charset charset) { 403 this(data, 0, data.length, charset); 404 } 405 406 /** 407 * Initializes this string to contain the characters in the specified 408 * character array. Modifying the character array after creating the string 409 * has no effect on the string. 410 * 411 * @throws NullPointerException if {@code data == null} 412 */ String(char[] data)413 public String(char[] data) { 414 this(data, 0, data.length); 415 } 416 417 /** 418 * Initializes this string to contain the specified characters in the 419 * character array. Modifying the character array after creating the string 420 * has no effect on the string. 421 * 422 * @throws NullPointerException 423 * if {@code data == null}. 424 * @throws IndexOutOfBoundsException 425 * if {@code charCount < 0 || offset < 0 || offset + charCount > data.length} 426 */ String(char[] data, int offset, int charCount)427 public String(char[] data, int offset, int charCount) { 428 if ((offset | charCount) < 0 || charCount > data.length - offset) { 429 throw failedBoundsCheck(data.length, offset, charCount); 430 } 431 this.offset = 0; 432 this.value = new char[charCount]; 433 this.count = charCount; 434 System.arraycopy(data, offset, value, 0, count); 435 } 436 437 /* 438 * Internal version of the String(char[], int, int) constructor. 439 * Does not range check, null check, or copy the character array. 440 */ String(int offset, int charCount, char[] chars)441 String(int offset, int charCount, char[] chars) { 442 this.value = chars; 443 this.offset = offset; 444 this.count = charCount; 445 } 446 447 /** 448 * Constructs a new string with the same sequence of characters as {@code 449 * toCopy}. The returned string's <a href="#backing_array">backing array</a> 450 * is no larger than necessary. 451 */ String(String toCopy)452 public String(String toCopy) { 453 value = (toCopy.value.length == toCopy.count) 454 ? toCopy.value 455 : Arrays.copyOfRange(toCopy.value, toCopy.offset, toCopy.offset + toCopy.length()); 456 offset = 0; 457 count = value.length; 458 } 459 460 /* 461 * Private constructor useful for JIT optimization. 462 */ 463 @SuppressWarnings( { "unused", "nls" }) String(String s1, String s2)464 private String(String s1, String s2) { 465 if (s1 == null) { 466 s1 = "null"; 467 } 468 if (s2 == null) { 469 s2 = "null"; 470 } 471 count = s1.count + s2.count; 472 value = new char[count]; 473 offset = 0; 474 System.arraycopy(s1.value, s1.offset, value, 0, s1.count); 475 System.arraycopy(s2.value, s2.offset, value, s1.count, s2.count); 476 } 477 478 /* 479 * Private constructor useful for JIT optimization. 480 */ 481 @SuppressWarnings( { "unused", "nls" }) String(String s1, String s2, String s3)482 private String(String s1, String s2, String s3) { 483 if (s1 == null) { 484 s1 = "null"; 485 } 486 if (s2 == null) { 487 s2 = "null"; 488 } 489 if (s3 == null) { 490 s3 = "null"; 491 } 492 count = s1.count + s2.count + s3.count; 493 value = new char[count]; 494 offset = 0; 495 System.arraycopy(s1.value, s1.offset, value, 0, s1.count); 496 System.arraycopy(s2.value, s2.offset, value, s1.count, s2.count); 497 System.arraycopy(s3.value, s3.offset, value, s1.count + s2.count, s3.count); 498 } 499 500 /** 501 * Creates a {@code String} from the contents of the specified 502 * {@code StringBuffer}. 503 */ String(StringBuffer stringBuffer)504 public String(StringBuffer stringBuffer) { 505 offset = 0; 506 synchronized (stringBuffer) { 507 value = stringBuffer.shareValue(); 508 count = stringBuffer.length(); 509 } 510 } 511 512 /** 513 * Creates a {@code String} from the sub-array of Unicode code points. 514 * 515 * @throws NullPointerException 516 * if {@code codePoints == null}. 517 * @throws IllegalArgumentException 518 * if any of the elements of {@code codePoints} are not valid 519 * Unicode code points. 520 * @throws IndexOutOfBoundsException 521 * if {@code offset} or {@code count} are not within the bounds 522 * of {@code codePoints}. 523 * @since 1.5 524 */ String(int[] codePoints, int offset, int count)525 public String(int[] codePoints, int offset, int count) { 526 if (codePoints == null) { 527 throw new NullPointerException(); 528 } 529 if ((offset | count) < 0 || count > codePoints.length - offset) { 530 throw failedBoundsCheck(codePoints.length, offset, count); 531 } 532 this.offset = 0; 533 this.value = new char[count * 2]; 534 int end = offset + count; 535 int c = 0; 536 for (int i = offset; i < end; i++) { 537 c += Character.toChars(codePoints[i], this.value, c); 538 } 539 this.count = c; 540 } 541 542 /** 543 * Creates a {@code String} from the contents of the specified {@code 544 * StringBuilder}. 545 * 546 * @throws NullPointerException 547 * if {@code stringBuilder == null}. 548 * @since 1.5 549 */ String(StringBuilder stringBuilder)550 public String(StringBuilder stringBuilder) { 551 if (stringBuilder == null) { 552 throw new NullPointerException("stringBuilder == null"); 553 } 554 this.offset = 0; 555 this.count = stringBuilder.length(); 556 this.value = new char[this.count]; 557 stringBuilder.getChars(0, this.count, this.value, 0); 558 } 559 560 /* 561 * Creates a {@code String} that is s1 + v1. May be used by JIT code. 562 */ 563 @SuppressWarnings("unused") String(String s1, int v1)564 private String(String s1, int v1) { 565 if (s1 == null) { 566 s1 = "null"; 567 } 568 String s2 = String.valueOf(v1); 569 int len = s1.count + s2.count; 570 value = new char[len]; 571 offset = 0; 572 System.arraycopy(s1.value, s1.offset, value, 0, s1.count); 573 System.arraycopy(s2.value, s2.offset, value, s1.count, s2.count); 574 count = len; 575 } 576 577 /** 578 * Returns the character at the specified offset in this string. 579 * 580 * @param index 581 * the zero-based index in this string. 582 * @return the character at the index. 583 * @throws IndexOutOfBoundsException 584 * if {@code index < 0} or {@code index >= length()}. 585 */ charAt(int index)586 public native char charAt(int index); 587 indexAndLength(int index)588 private StringIndexOutOfBoundsException indexAndLength(int index) { 589 throw new StringIndexOutOfBoundsException(this, index); 590 } 591 startEndAndLength(int start, int end)592 private StringIndexOutOfBoundsException startEndAndLength(int start, int end) { 593 throw new StringIndexOutOfBoundsException(this, start, end - start); 594 } 595 failedBoundsCheck(int arrayLength, int offset, int count)596 private StringIndexOutOfBoundsException failedBoundsCheck(int arrayLength, int offset, int count) { 597 throw new StringIndexOutOfBoundsException(arrayLength, offset, count); 598 } 599 600 /** 601 * This isn't equivalent to either of ICU's u_foldCase case folds, and thus any of the Unicode 602 * case folds, but it's what the RI uses. 603 */ foldCase(char ch)604 private char foldCase(char ch) { 605 if (ch < 128) { 606 if ('A' <= ch && ch <= 'Z') { 607 return (char) (ch + ('a' - 'A')); 608 } 609 return ch; 610 } 611 return Character.toLowerCase(Character.toUpperCase(ch)); 612 } 613 614 /** 615 * Compares the specified string to this string using the Unicode values of 616 * the characters. Returns 0 if the strings contain the same characters in 617 * the same order. Returns a negative integer if the first non-equal 618 * character in this string has a Unicode value which is less than the 619 * Unicode value of the character at the same position in the specified 620 * string, or if this string is a prefix of the specified string. Returns a 621 * positive integer if the first non-equal character in this string has a 622 * Unicode value which is greater than the Unicode value of the character at 623 * the same position in the specified string, or if the specified string is 624 * a prefix of this string. 625 * 626 * @param string 627 * the string to compare. 628 * @return 0 if the strings are equal, a negative integer if this string is 629 * before the specified string, or a positive integer if this string 630 * is after the specified string. 631 * @throws NullPointerException 632 * if {@code string} is {@code null}. 633 */ compareTo(String string)634 public native int compareTo(String string); 635 636 /** 637 * Compares the specified string to this string using the Unicode values of 638 * the characters, ignoring case differences. Returns 0 if the strings 639 * contain the same characters in the same order. Returns a negative integer 640 * if the first non-equal character in this string has a Unicode value which 641 * is less than the Unicode value of the character at the same position in 642 * the specified string, or if this string is a prefix of the specified 643 * string. Returns a positive integer if the first non-equal character in 644 * this string has a Unicode value which is greater than the Unicode value 645 * of the character at the same position in the specified string, or if the 646 * specified string is a prefix of this string. 647 * 648 * @param string 649 * the string to compare. 650 * @return 0 if the strings are equal, a negative integer if this string is 651 * before the specified string, or a positive integer if this string 652 * is after the specified string. 653 * @throws NullPointerException 654 * if {@code string} is {@code null}. 655 */ compareToIgnoreCase(String string)656 public int compareToIgnoreCase(String string) { 657 int o1 = offset, o2 = string.offset, result; 658 int end = offset + (count < string.count ? count : string.count); 659 char c1, c2; 660 char[] target = string.value; 661 while (o1 < end) { 662 if ((c1 = value[o1++]) == (c2 = target[o2++])) { 663 continue; 664 } 665 c1 = foldCase(c1); 666 c2 = foldCase(c2); 667 if ((result = c1 - c2) != 0) { 668 return result; 669 } 670 } 671 return count - string.count; 672 } 673 674 /** 675 * Concatenates this string and the specified string. 676 * 677 * @param string 678 * the string to concatenate 679 * @return a new string which is the concatenation of this string and the 680 * specified string. 681 */ concat(String string)682 public String concat(String string) { 683 if (string.count > 0 && count > 0) { 684 char[] buffer = new char[count + string.count]; 685 System.arraycopy(value, offset, buffer, 0, count); 686 System.arraycopy(string.value, string.offset, buffer, count, string.count); 687 return new String(0, buffer.length, buffer); 688 } 689 return count == 0 ? string : this; 690 } 691 692 /** 693 * Creates a new string containing the characters in the specified character 694 * array. Modifying the character array after creating the string has no 695 * effect on the string. 696 * 697 * @param data 698 * the array of characters. 699 * @return the new string. 700 * @throws NullPointerException 701 * if {@code data} is {@code null}. 702 */ copyValueOf(char[] data)703 public static String copyValueOf(char[] data) { 704 return new String(data, 0, data.length); 705 } 706 707 /** 708 * Creates a new string containing the specified characters in the character 709 * array. Modifying the character array after creating the string has no 710 * effect on the string. 711 * 712 * @param data 713 * the array of characters. 714 * @param start 715 * the starting offset in the character array. 716 * @param length 717 * the number of characters to use. 718 * @return the new string. 719 * @throws NullPointerException 720 * if {@code data} is {@code null}. 721 * @throws IndexOutOfBoundsException 722 * if {@code length < 0, start < 0} or {@code start + length > 723 * data.length}. 724 */ copyValueOf(char[] data, int start, int length)725 public static String copyValueOf(char[] data, int start, int length) { 726 return new String(data, start, length); 727 } 728 729 /** 730 * Compares the specified string to this string to determine if the 731 * specified string is a suffix. 732 * 733 * @param suffix 734 * the suffix to look for. 735 * @return {@code true} if the specified string is a suffix of this string, 736 * {@code false} otherwise. 737 * @throws NullPointerException 738 * if {@code suffix} is {@code null}. 739 */ endsWith(String suffix)740 public boolean endsWith(String suffix) { 741 return regionMatches(count - suffix.count, suffix, 0, suffix.count); 742 } 743 744 /** 745 * Compares the specified object to this string and returns true if they are 746 * equal. The object must be an instance of string with the same characters 747 * in the same order. 748 * 749 * @param object 750 * the object to compare. 751 * @return {@code true} if the specified object is equal to this string, 752 * {@code false} otherwise. 753 * @see #hashCode 754 */ equals(Object object)755 @Override public native boolean equals(Object object); 756 757 /** 758 * Compares the specified string to this string ignoring the case of the 759 * characters and returns true if they are equal. 760 * 761 * @param string 762 * the string to compare. 763 * @return {@code true} if the specified string is equal to this string, 764 * {@code false} otherwise. 765 */ 766 @FindBugsSuppressWarnings("ES_COMPARING_PARAMETER_STRING_WITH_EQ") equalsIgnoreCase(String string)767 public boolean equalsIgnoreCase(String string) { 768 if (string == this) { 769 return true; 770 } 771 if (string == null || count != string.count) { 772 return false; 773 } 774 int o1 = offset, o2 = string.offset; 775 int end = offset + count; 776 char[] target = string.value; 777 while (o1 < end) { 778 char c1 = value[o1++]; 779 char c2 = target[o2++]; 780 if (c1 != c2 && foldCase(c1) != foldCase(c2)) { 781 return false; 782 } 783 } 784 return true; 785 } 786 787 /** 788 * Mangles this string into a byte array by stripping the high order bits from 789 * each character. Use {@link #getBytes()} or {@link #getBytes(String)} instead. 790 * 791 * @param start 792 * the starting offset of characters to copy. 793 * @param end 794 * the ending offset of characters to copy. 795 * @param data 796 * the destination byte array. 797 * @param index 798 * the starting offset in the destination byte array. 799 * @throws NullPointerException 800 * if {@code data} is {@code null}. 801 * @throws IndexOutOfBoundsException 802 * if {@code start < 0}, {@code end > length()}, {@code index < 803 * 0} or {@code end - start > data.length - index}. 804 * @deprecated Use {@link #getBytes()} or {@link #getBytes(String)} 805 */ 806 @Deprecated getBytes(int start, int end, byte[] data, int index)807 public void getBytes(int start, int end, byte[] data, int index) { 808 // Note: last character not copied! 809 if (start >= 0 && start <= end && end <= count) { 810 end += offset; 811 try { 812 for (int i = offset + start; i < end; i++) { 813 data[index++] = (byte) value[i]; 814 } 815 } catch (ArrayIndexOutOfBoundsException ignored) { 816 throw failedBoundsCheck(data.length, index, end - start); 817 } 818 } else { 819 throw startEndAndLength(start, end); 820 } 821 } 822 823 /** 824 * Returns a new byte array containing the characters of this string encoded using the 825 * system's {@link java.nio.charset.Charset#defaultCharset default charset}. 826 * 827 * <p>The behavior when this string cannot be represented in the system's default charset 828 * is unspecified. In practice, when the default charset is UTF-8 (as it is on Android), 829 * all strings can be encoded. 830 */ getBytes()831 public byte[] getBytes() { 832 return getBytes(Charset.defaultCharset()); 833 } 834 835 /** 836 * Returns a new byte array containing the characters of this string encoded using the 837 * named charset. 838 * 839 * <p>The behavior when this string cannot be represented in the named charset 840 * is unspecified. Use {@link java.nio.charset.CharsetEncoder} for more control. 841 * 842 * @throws UnsupportedEncodingException if the charset is not supported 843 */ getBytes(String charsetName)844 public byte[] getBytes(String charsetName) throws UnsupportedEncodingException { 845 return getBytes(Charset.forNameUEE(charsetName)); 846 } 847 848 /** 849 * Returns a new byte array containing the characters of this string encoded using the 850 * given charset. 851 * 852 * <p>The behavior when this string cannot be represented in the given charset 853 * is to replace malformed input and unmappable characters with the charset's default 854 * replacement byte array. Use {@link java.nio.charset.CharsetEncoder} for more control. 855 * 856 * @since 1.6 857 */ getBytes(Charset charset)858 public byte[] getBytes(Charset charset) { 859 String canonicalCharsetName = charset.name(); 860 if (canonicalCharsetName.equals("UTF-8")) { 861 return Charsets.toUtf8Bytes(value, offset, count); 862 } else if (canonicalCharsetName.equals("ISO-8859-1")) { 863 return Charsets.toIsoLatin1Bytes(value, offset, count); 864 } else if (canonicalCharsetName.equals("US-ASCII")) { 865 return Charsets.toAsciiBytes(value, offset, count); 866 } else if (canonicalCharsetName.equals("UTF-16BE")) { 867 return Charsets.toBigEndianUtf16Bytes(value, offset, count); 868 } else { 869 CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count); 870 ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer()); 871 byte[] bytes = new byte[buffer.limit()]; 872 buffer.get(bytes); 873 return bytes; 874 } 875 } 876 877 /** 878 * Copies the specified characters in this string to the character array 879 * starting at the specified offset in the character array. 880 * 881 * @param start 882 * the starting offset of characters to copy. 883 * @param end 884 * the ending offset of characters to copy. 885 * @param buffer 886 * the destination character array. 887 * @param index 888 * the starting offset in the character array. 889 * @throws NullPointerException 890 * if {@code buffer} is {@code null}. 891 * @throws IndexOutOfBoundsException 892 * if {@code start < 0}, {@code end > length()}, {@code start > 893 * end}, {@code index < 0}, {@code end - start > buffer.length - 894 * index} 895 */ getChars(int start, int end, char[] buffer, int index)896 public void getChars(int start, int end, char[] buffer, int index) { 897 // Note: last character not copied! 898 if (start >= 0 && start <= end && end <= count) { 899 System.arraycopy(value, start + offset, buffer, index, end - start); 900 } else { 901 // We throw StringIndexOutOfBoundsException rather than System.arraycopy's AIOOBE. 902 throw startEndAndLength(start, end); 903 } 904 } 905 906 /** 907 * Version of getChars without bounds checks, for use by other classes 908 * within the java.lang package only. The caller is responsible for 909 * ensuring that start >= 0 && start <= end && end <= count. 910 */ _getChars(int start, int end, char[] buffer, int index)911 void _getChars(int start, int end, char[] buffer, int index) { 912 // NOTE last character not copied! 913 System.arraycopy(value, start + offset, buffer, index, end - start); 914 } 915 hashCode()916 @Override public int hashCode() { 917 int hash = hashCode; 918 if (hash == 0) { 919 if (count == 0) { 920 return 0; 921 } 922 final int end = count + offset; 923 final char[] chars = value; 924 for (int i = offset; i < end; ++i) { 925 hash = 31*hash + chars[i]; 926 } 927 hashCode = hash; 928 } 929 return hash; 930 } 931 932 /** 933 * Searches in this string for the first index of the specified character. 934 * The search for the character starts at the beginning and moves towards 935 * the end of this string. 936 * 937 * @param c 938 * the character to find. 939 * @return the index in this string of the specified character, -1 if the 940 * character isn't found. 941 */ indexOf(int c)942 public int indexOf(int c) { 943 // TODO: just "return indexOf(c, 0);" when the JIT can inline that deep. 944 if (c > 0xffff) { 945 return indexOfSupplementary(c, 0); 946 } 947 return fastIndexOf(c, 0); 948 } 949 950 /** 951 * Searches in this string for the index of the specified character. The 952 * search for the character starts at the specified offset and moves towards 953 * the end of this string. 954 * 955 * @param c 956 * the character to find. 957 * @param start 958 * the starting offset. 959 * @return the index in this string of the specified character, -1 if the 960 * character isn't found. 961 */ indexOf(int c, int start)962 public int indexOf(int c, int start) { 963 if (c > 0xffff) { 964 return indexOfSupplementary(c, start); 965 } 966 return fastIndexOf(c, start); 967 } 968 fastIndexOf(int c, int start)969 private native int fastIndexOf(int c, int start); 970 indexOfSupplementary(int c, int start)971 private int indexOfSupplementary(int c, int start) { 972 if (!Character.isSupplementaryCodePoint(c)) { 973 return -1; 974 } 975 char[] chars = Character.toChars(c); 976 String needle = new String(0, chars.length, chars); 977 return indexOf(needle, start); 978 } 979 980 /** 981 * Searches in this string for the first index of the specified string. The 982 * search for the string starts at the beginning and moves towards the end 983 * of this string. 984 * 985 * @param string 986 * the string to find. 987 * @return the index of the first character of the specified string in this 988 * string, -1 if the specified string is not a substring. 989 * @throws NullPointerException 990 * if {@code string} is {@code null}. 991 */ indexOf(String string)992 public int indexOf(String string) { 993 int start = 0; 994 int subCount = string.count; 995 int _count = count; 996 if (subCount > 0) { 997 if (subCount > _count) { 998 return -1; 999 } 1000 char[] target = string.value; 1001 int subOffset = string.offset; 1002 char firstChar = target[subOffset]; 1003 int end = subOffset + subCount; 1004 while (true) { 1005 int i = indexOf(firstChar, start); 1006 if (i == -1 || subCount + i > _count) { 1007 return -1; // handles subCount > count || start >= count 1008 } 1009 int o1 = offset + i, o2 = subOffset; 1010 char[] _value = value; 1011 while (++o2 < end && _value[++o1] == target[o2]) { 1012 // Intentionally empty 1013 } 1014 if (o2 == end) { 1015 return i; 1016 } 1017 start = i + 1; 1018 } 1019 } 1020 return start < _count ? start : _count; 1021 } 1022 1023 /** 1024 * Searches in this string for the index of the specified string. The search 1025 * for the string starts at the specified offset and moves towards the end 1026 * of this string. 1027 * 1028 * @param subString 1029 * the string to find. 1030 * @param start 1031 * the starting offset. 1032 * @return the index of the first character of the specified string in this 1033 * string, -1 if the specified string is not a substring. 1034 * @throws NullPointerException 1035 * if {@code subString} is {@code null}. 1036 */ indexOf(String subString, int start)1037 public int indexOf(String subString, int start) { 1038 if (start < 0) { 1039 start = 0; 1040 } 1041 int subCount = subString.count; 1042 int _count = count; 1043 if (subCount > 0) { 1044 if (subCount + start > _count) { 1045 return -1; 1046 } 1047 char[] target = subString.value; 1048 int subOffset = subString.offset; 1049 char firstChar = target[subOffset]; 1050 int end = subOffset + subCount; 1051 while (true) { 1052 int i = indexOf(firstChar, start); 1053 if (i == -1 || subCount + i > _count) { 1054 return -1; // handles subCount > count || start >= count 1055 } 1056 int o1 = offset + i, o2 = subOffset; 1057 char[] _value = value; 1058 while (++o2 < end && _value[++o1] == target[o2]) { 1059 // Intentionally empty 1060 } 1061 if (o2 == end) { 1062 return i; 1063 } 1064 start = i + 1; 1065 } 1066 } 1067 return start < _count ? start : _count; 1068 } 1069 1070 /** 1071 * Returns an interned string equal to this string. The VM maintains an internal set of 1072 * unique strings. All string literals found in loaded classes' 1073 * constant pools are automatically interned. Manually-interned strings are only weakly 1074 * referenced, so calling {@code intern} won't lead to unwanted retention. 1075 * 1076 * <p>Interning is typically used because it guarantees that for interned strings 1077 * {@code a} and {@code b}, {@code a.equals(b)} can be simplified to 1078 * {@code a == b}. (This is not true of non-interned strings.) 1079 * 1080 * <p>Many applications find it simpler and more convenient to use an explicit 1081 * {@link java.util.HashMap} to implement their own pools. 1082 */ intern()1083 public native String intern(); 1084 1085 /** 1086 * Returns true if the length of this string is 0. 1087 * 1088 * @since 1.6 1089 */ isEmpty()1090 public native boolean isEmpty(); 1091 1092 /** 1093 * Returns the last index of the code point {@code c}, or -1. 1094 * The search for the character starts at the end and moves towards the 1095 * beginning of this string. 1096 */ lastIndexOf(int c)1097 public int lastIndexOf(int c) { 1098 if (c > 0xffff) { 1099 return lastIndexOfSupplementary(c, Integer.MAX_VALUE); 1100 } 1101 int _count = count; 1102 int _offset = offset; 1103 char[] _value = value; 1104 for (int i = _offset + _count - 1; i >= _offset; --i) { 1105 if (_value[i] == c) { 1106 return i - _offset; 1107 } 1108 } 1109 return -1; 1110 } 1111 1112 /** 1113 * Returns the last index of the code point {@code c}, or -1. 1114 * The search for the character starts at offset {@code start} and moves towards 1115 * the beginning of this string. 1116 */ lastIndexOf(int c, int start)1117 public int lastIndexOf(int c, int start) { 1118 if (c > 0xffff) { 1119 return lastIndexOfSupplementary(c, start); 1120 } 1121 int _count = count; 1122 int _offset = offset; 1123 char[] _value = value; 1124 if (start >= 0) { 1125 if (start >= _count) { 1126 start = _count - 1; 1127 } 1128 for (int i = _offset + start; i >= _offset; --i) { 1129 if (_value[i] == c) { 1130 return i - _offset; 1131 } 1132 } 1133 } 1134 return -1; 1135 } 1136 lastIndexOfSupplementary(int c, int start)1137 private int lastIndexOfSupplementary(int c, int start) { 1138 if (!Character.isSupplementaryCodePoint(c)) { 1139 return -1; 1140 } 1141 char[] chars = Character.toChars(c); 1142 String needle = new String(0, chars.length, chars); 1143 return lastIndexOf(needle, start); 1144 } 1145 1146 /** 1147 * Searches in this string for the last index of the specified string. The 1148 * search for the string starts at the end and moves towards the beginning 1149 * of this string. 1150 * 1151 * @param string 1152 * the string to find. 1153 * @return the index of the first character of the specified string in this 1154 * string, -1 if the specified string is not a substring. 1155 * @throws NullPointerException 1156 * if {@code string} is {@code null}. 1157 */ lastIndexOf(String string)1158 public int lastIndexOf(String string) { 1159 // Use count instead of count - 1 so lastIndexOf("") returns count 1160 return lastIndexOf(string, count); 1161 } 1162 1163 /** 1164 * Searches in this string for the index of the specified string. The search 1165 * for the string starts at the specified offset and moves towards the 1166 * beginning of this string. 1167 * 1168 * @param subString 1169 * the string to find. 1170 * @param start 1171 * the starting offset. 1172 * @return the index of the first character of the specified string in this 1173 * string , -1 if the specified string is not a substring. 1174 * @throws NullPointerException 1175 * if {@code subString} is {@code null}. 1176 */ lastIndexOf(String subString, int start)1177 public int lastIndexOf(String subString, int start) { 1178 int subCount = subString.count; 1179 if (subCount <= count && start >= 0) { 1180 if (subCount > 0) { 1181 if (start > count - subCount) { 1182 start = count - subCount; 1183 } 1184 // count and subCount are both >= 1 1185 char[] target = subString.value; 1186 int subOffset = subString.offset; 1187 char firstChar = target[subOffset]; 1188 int end = subOffset + subCount; 1189 while (true) { 1190 int i = lastIndexOf(firstChar, start); 1191 if (i == -1) { 1192 return -1; 1193 } 1194 int o1 = offset + i, o2 = subOffset; 1195 while (++o2 < end && value[++o1] == target[o2]) { 1196 // Intentionally empty 1197 } 1198 if (o2 == end) { 1199 return i; 1200 } 1201 start = i - 1; 1202 } 1203 } 1204 return start < count ? start : count; 1205 } 1206 return -1; 1207 } 1208 1209 /** 1210 * Returns the size of this string. 1211 * 1212 * @return the number of characters in this string. 1213 */ length()1214 public native int length(); 1215 1216 /** 1217 * Compares the specified string to this string and compares the specified 1218 * range of characters to determine if they are the same. 1219 * 1220 * @param thisStart 1221 * the starting offset in this string. 1222 * @param string 1223 * the string to compare. 1224 * @param start 1225 * the starting offset in the specified string. 1226 * @param length 1227 * the number of characters to compare. 1228 * @return {@code true} if the ranges of characters are equal, {@code false} 1229 * otherwise 1230 * @throws NullPointerException 1231 * if {@code string} is {@code null}. 1232 */ regionMatches(int thisStart, String string, int start, int length)1233 public boolean regionMatches(int thisStart, String string, int start, int length) { 1234 if (string == null) { 1235 throw new NullPointerException(); 1236 } 1237 if (start < 0 || string.count - start < length) { 1238 return false; 1239 } 1240 if (thisStart < 0 || count - thisStart < length) { 1241 return false; 1242 } 1243 if (length <= 0) { 1244 return true; 1245 } 1246 int o1 = offset + thisStart, o2 = string.offset + start; 1247 char[] value1 = value; 1248 char[] value2 = string.value; 1249 for (int i = 0; i < length; ++i) { 1250 if (value1[o1 + i] != value2[o2 + i]) { 1251 return false; 1252 } 1253 } 1254 return true; 1255 } 1256 1257 /** 1258 * Compares the specified string to this string and compares the specified 1259 * range of characters to determine if they are the same. When ignoreCase is 1260 * true, the case of the characters is ignored during the comparison. 1261 * 1262 * @param ignoreCase 1263 * specifies if case should be ignored. 1264 * @param thisStart 1265 * the starting offset in this string. 1266 * @param string 1267 * the string to compare. 1268 * @param start 1269 * the starting offset in the specified string. 1270 * @param length 1271 * the number of characters to compare. 1272 * @return {@code true} if the ranges of characters are equal, {@code false} 1273 * otherwise. 1274 * @throws NullPointerException 1275 * if {@code string} is {@code null}. 1276 */ regionMatches(boolean ignoreCase, int thisStart, String string, int start, int length)1277 public boolean regionMatches(boolean ignoreCase, int thisStart, String string, int start, int length) { 1278 if (!ignoreCase) { 1279 return regionMatches(thisStart, string, start, length); 1280 } 1281 if (string == null) { 1282 throw new NullPointerException("string == null"); 1283 } 1284 if (thisStart < 0 || length > count - thisStart) { 1285 return false; 1286 } 1287 if (start < 0 || length > string.count - start) { 1288 return false; 1289 } 1290 thisStart += offset; 1291 start += string.offset; 1292 int end = thisStart + length; 1293 char[] target = string.value; 1294 while (thisStart < end) { 1295 char c1 = value[thisStart++]; 1296 char c2 = target[start++]; 1297 if (c1 != c2 && foldCase(c1) != foldCase(c2)) { 1298 return false; 1299 } 1300 } 1301 return true; 1302 } 1303 1304 /** 1305 * Copies this string replacing occurrences of the specified character with 1306 * another character. 1307 * 1308 * @param oldChar 1309 * the character to replace. 1310 * @param newChar 1311 * the replacement character. 1312 * @return a new string with occurrences of oldChar replaced by newChar. 1313 */ replace(char oldChar, char newChar)1314 public String replace(char oldChar, char newChar) { 1315 char[] buffer = value; 1316 int _offset = offset; 1317 int _count = count; 1318 1319 int idx = _offset; 1320 int last = _offset + _count; 1321 boolean copied = false; 1322 while (idx < last) { 1323 if (buffer[idx] == oldChar) { 1324 if (!copied) { 1325 char[] newBuffer = new char[_count]; 1326 System.arraycopy(buffer, _offset, newBuffer, 0, _count); 1327 buffer = newBuffer; 1328 idx -= _offset; 1329 last -= _offset; 1330 copied = true; 1331 } 1332 buffer[idx] = newChar; 1333 } 1334 idx++; 1335 } 1336 1337 return copied ? new String(0, count, buffer) : this; 1338 } 1339 1340 /** 1341 * Copies this string replacing occurrences of the specified target sequence 1342 * with another sequence. The string is processed from the beginning to the 1343 * end. 1344 * 1345 * @param target 1346 * the sequence to replace. 1347 * @param replacement 1348 * the replacement sequence. 1349 * @return the resulting string. 1350 * @throws NullPointerException 1351 * if {@code target} or {@code replacement} is {@code null}. 1352 */ replace(CharSequence target, CharSequence replacement)1353 public String replace(CharSequence target, CharSequence replacement) { 1354 if (target == null) { 1355 throw new NullPointerException("target == null"); 1356 } 1357 if (replacement == null) { 1358 throw new NullPointerException("replacement == null"); 1359 } 1360 1361 String targetString = target.toString(); 1362 int matchStart = indexOf(targetString, 0); 1363 if (matchStart == -1) { 1364 // If there's nothing to replace, return the original string untouched. 1365 return this; 1366 } 1367 1368 String replacementString = replacement.toString(); 1369 1370 // The empty target matches at the start and end and between each character. 1371 int targetLength = targetString.length(); 1372 if (targetLength == 0) { 1373 int resultLength = (count + 2) * replacementString.length(); 1374 StringBuilder result = new StringBuilder(resultLength); 1375 result.append(replacementString); 1376 for (int i = offset; i < count; ++i) { 1377 result.append(value[i]); 1378 result.append(replacementString); 1379 } 1380 return result.toString(); 1381 } 1382 1383 StringBuilder result = new StringBuilder(count); 1384 int searchStart = 0; 1385 do { 1386 // Copy characters before the match... 1387 result.append(value, offset + searchStart, matchStart - searchStart); 1388 // Insert the replacement... 1389 result.append(replacementString); 1390 // And skip over the match... 1391 searchStart = matchStart + targetLength; 1392 } while ((matchStart = indexOf(targetString, searchStart)) != -1); 1393 // Copy any trailing chars... 1394 result.append(value, offset + searchStart, count - searchStart); 1395 return result.toString(); 1396 } 1397 1398 /** 1399 * Compares the specified string to this string to determine if the 1400 * specified string is a prefix. 1401 * 1402 * @param prefix 1403 * the string to look for. 1404 * @return {@code true} if the specified string is a prefix of this string, 1405 * {@code false} otherwise 1406 * @throws NullPointerException 1407 * if {@code prefix} is {@code null}. 1408 */ startsWith(String prefix)1409 public boolean startsWith(String prefix) { 1410 return startsWith(prefix, 0); 1411 } 1412 1413 /** 1414 * Compares the specified string to this string, starting at the specified 1415 * offset, to determine if the specified string is a prefix. 1416 * 1417 * @param prefix 1418 * the string to look for. 1419 * @param start 1420 * the starting offset. 1421 * @return {@code true} if the specified string occurs in this string at the 1422 * specified offset, {@code false} otherwise. 1423 * @throws NullPointerException 1424 * if {@code prefix} is {@code null}. 1425 */ startsWith(String prefix, int start)1426 public boolean startsWith(String prefix, int start) { 1427 return regionMatches(start, prefix, 0, prefix.count); 1428 } 1429 1430 /** 1431 * Returns a string containing a suffix of this string. The returned string 1432 * shares this string's <a href="#backing_array">backing array</a>. 1433 * 1434 * @param start 1435 * the offset of the first character. 1436 * @return a new string containing the characters from start to the end of 1437 * the string. 1438 * @throws IndexOutOfBoundsException 1439 * if {@code start < 0} or {@code start > length()}. 1440 */ substring(int start)1441 public String substring(int start) { 1442 if (start == 0) { 1443 return this; 1444 } 1445 if (start >= 0 && start <= count) { 1446 return new String(offset + start, count - start, value); 1447 } 1448 throw indexAndLength(start); 1449 } 1450 1451 /** 1452 * Returns a string containing a subsequence of characters from this string. 1453 * The returned string shares this string's <a href="#backing_array">backing 1454 * array</a>. 1455 * 1456 * @param start 1457 * the offset of the first character. 1458 * @param end 1459 * the offset one past the last character. 1460 * @return a new string containing the characters from start to end - 1 1461 * @throws IndexOutOfBoundsException 1462 * if {@code start < 0}, {@code start > end} or {@code end > 1463 * length()}. 1464 */ substring(int start, int end)1465 public String substring(int start, int end) { 1466 if (start == 0 && end == count) { 1467 return this; 1468 } 1469 // NOTE last character not copied! 1470 // Fast range check. 1471 if (start >= 0 && start <= end && end <= count) { 1472 return new String(offset + start, end - start, value); 1473 } 1474 throw startEndAndLength(start, end); 1475 } 1476 1477 /** 1478 * Copies the characters in this string to a character array. 1479 * 1480 * @return a character array containing the characters of this string. 1481 */ toCharArray()1482 public char[] toCharArray() { 1483 char[] buffer = new char[count]; 1484 System.arraycopy(value, offset, buffer, 0, count); 1485 return buffer; 1486 } 1487 1488 /** 1489 * Converts this string to lower case, using the rules of the user's default locale. 1490 * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>". 1491 * 1492 * @return a new lower case string, or {@code this} if it's already all lower case. 1493 */ toLowerCase()1494 public String toLowerCase() { 1495 return CaseMapper.toLowerCase(Locale.getDefault(), this, value, offset, count); 1496 } 1497 1498 /** 1499 * Converts this string to lower case, using the rules of {@code locale}. 1500 * 1501 * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include 1502 * dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in 1503 * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get 1504 * correct case mapping of Greek characters: any locale will do. 1505 * 1506 * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a> 1507 * for full details of context- and language-specific special cases. 1508 * 1509 * @return a new lower case string, or {@code this} if it's already all lower case. 1510 */ toLowerCase(Locale locale)1511 public String toLowerCase(Locale locale) { 1512 return CaseMapper.toLowerCase(locale, this, value, offset, count); 1513 } 1514 1515 /** 1516 * Returns this string. 1517 */ 1518 @Override toString()1519 public String toString() { 1520 return this; 1521 } 1522 1523 /** 1524 * Converts this this string to upper case, using the rules of the user's default locale. 1525 * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>". 1526 * 1527 * @return a new upper case string, or {@code this} if it's already all upper case. 1528 */ toUpperCase()1529 public String toUpperCase() { 1530 return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count); 1531 } 1532 1533 /** 1534 * Converts this this string to upper case, using the rules of {@code locale}. 1535 * 1536 * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include 1537 * dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in 1538 * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get 1539 * correct case mapping of Greek characters: any locale will do. 1540 * 1541 * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a> 1542 * for full details of context- and language-specific special cases. 1543 * 1544 * @return a new upper case string, or {@code this} if it's already all upper case. 1545 */ toUpperCase(Locale locale)1546 public String toUpperCase(Locale locale) { 1547 return CaseMapper.toUpperCase(locale, this, value, offset, count); 1548 } 1549 1550 /** 1551 * Copies this string removing white space characters from the beginning and 1552 * end of the string. 1553 * 1554 * @return a new string with characters <code><= \\u0020</code> removed from 1555 * the beginning and the end. 1556 */ trim()1557 public String trim() { 1558 int start = offset, last = offset + count - 1; 1559 int end = last; 1560 while ((start <= end) && (value[start] <= ' ')) { 1561 start++; 1562 } 1563 while ((end >= start) && (value[end] <= ' ')) { 1564 end--; 1565 } 1566 if (start == offset && end == last) { 1567 return this; 1568 } 1569 return new String(start, end - start + 1, value); 1570 } 1571 1572 /** 1573 * Creates a new string containing the characters in the specified character 1574 * array. Modifying the character array after creating the string has no 1575 * effect on the string. 1576 * 1577 * @param data 1578 * the array of characters. 1579 * @return the new string. 1580 * @throws NullPointerException 1581 * if {@code data} is {@code null}. 1582 */ valueOf(char[] data)1583 public static String valueOf(char[] data) { 1584 return new String(data, 0, data.length); 1585 } 1586 1587 /** 1588 * Creates a new string containing the specified characters in the character 1589 * array. Modifying the character array after creating the string has no 1590 * effect on the string. 1591 * 1592 * @param data 1593 * the array of characters. 1594 * @param start 1595 * the starting offset in the character array. 1596 * @param length 1597 * the number of characters to use. 1598 * @return the new string. 1599 * @throws IndexOutOfBoundsException 1600 * if {@code length < 0}, {@code start < 0} or {@code start + 1601 * length > data.length} 1602 * @throws NullPointerException 1603 * if {@code data} is {@code null}. 1604 */ valueOf(char[] data, int start, int length)1605 public static String valueOf(char[] data, int start, int length) { 1606 return new String(data, start, length); 1607 } 1608 1609 /** 1610 * Converts the specified character to its string representation. 1611 * 1612 * @param value 1613 * the character. 1614 * @return the character converted to a string. 1615 */ valueOf(char value)1616 public static String valueOf(char value) { 1617 String s; 1618 if (value < 128) { 1619 s = new String(value, 1, ASCII); 1620 } else { 1621 s = new String(0, 1, new char[] { value }); 1622 } 1623 s.hashCode = value; 1624 return s; 1625 } 1626 1627 /** 1628 * Converts the specified double to its string representation. 1629 * 1630 * @param value 1631 * the double. 1632 * @return the double converted to a string. 1633 */ valueOf(double value)1634 public static String valueOf(double value) { 1635 return Double.toString(value); 1636 } 1637 1638 /** 1639 * Converts the specified float to its string representation. 1640 * 1641 * @param value 1642 * the float. 1643 * @return the float converted to a string. 1644 */ valueOf(float value)1645 public static String valueOf(float value) { 1646 return Float.toString(value); 1647 } 1648 1649 /** 1650 * Converts the specified integer to its string representation. 1651 * 1652 * @param value 1653 * the integer. 1654 * @return the integer converted to a string. 1655 */ valueOf(int value)1656 public static String valueOf(int value) { 1657 return Integer.toString(value); 1658 } 1659 1660 /** 1661 * Converts the specified long to its string representation. 1662 * 1663 * @param value 1664 * the long. 1665 * @return the long converted to a string. 1666 */ valueOf(long value)1667 public static String valueOf(long value) { 1668 return Long.toString(value); 1669 } 1670 1671 /** 1672 * Converts the specified object to its string representation. If the object 1673 * is null return the string {@code "null"}, otherwise use {@code 1674 * toString()} to get the string representation. 1675 * 1676 * @param value 1677 * the object. 1678 * @return the object converted to a string, or the string {@code "null"}. 1679 */ valueOf(Object value)1680 public static String valueOf(Object value) { 1681 return value != null ? value.toString() : "null"; 1682 } 1683 1684 /** 1685 * Converts the specified boolean to its string representation. When the 1686 * boolean is {@code true} return {@code "true"}, otherwise return {@code 1687 * "false"}. 1688 * 1689 * @param value 1690 * the boolean. 1691 * @return the boolean converted to a string. 1692 */ valueOf(boolean value)1693 public static String valueOf(boolean value) { 1694 return value ? "true" : "false"; 1695 } 1696 1697 /** 1698 * Returns whether the characters in the StringBuffer {@code strbuf} are the 1699 * same as those in this string. 1700 * 1701 * @param strbuf 1702 * the StringBuffer to compare this string to. 1703 * @return {@code true} if the characters in {@code strbuf} are identical to 1704 * those in this string. If they are not, {@code false} will be 1705 * returned. 1706 * @throws NullPointerException 1707 * if {@code strbuf} is {@code null}. 1708 * @since 1.4 1709 */ contentEquals(StringBuffer strbuf)1710 public boolean contentEquals(StringBuffer strbuf) { 1711 synchronized (strbuf) { 1712 int size = strbuf.length(); 1713 if (count != size) { 1714 return false; 1715 } 1716 return regionMatches(0, new String(0, size, strbuf.getValue()), 0, 1717 size); 1718 } 1719 } 1720 1721 /** 1722 * Compares a {@code CharSequence} to this {@code String} to determine if 1723 * their contents are equal. 1724 * 1725 * @param cs 1726 * the character sequence to compare to. 1727 * @return {@code true} if equal, otherwise {@code false} 1728 * @since 1.5 1729 */ contentEquals(CharSequence cs)1730 public boolean contentEquals(CharSequence cs) { 1731 if (cs == null) { 1732 throw new NullPointerException(); 1733 } 1734 1735 int len = cs.length(); 1736 1737 if (len != count) { 1738 return false; 1739 } 1740 1741 if (len == 0 && count == 0) { 1742 return true; // since both are empty strings 1743 } 1744 1745 return regionMatches(0, cs.toString(), 0, len); 1746 } 1747 1748 /** 1749 * Tests whether this string matches the given {@code regularExpression}. This method returns 1750 * true only if the regular expression matches the <i>entire</i> input string. A common mistake is 1751 * to assume that this method behaves like {@link #contains}; if you want to match anywhere 1752 * within the input string, you need to add {@code .*} to the beginning and end of your 1753 * regular expression. See {@link Pattern#matches}. 1754 * 1755 * <p>If the same regular expression is to be used for multiple operations, it may be more 1756 * efficient to reuse a compiled {@code Pattern}. 1757 * 1758 * @throws PatternSyntaxException 1759 * if the syntax of the supplied regular expression is not 1760 * valid. 1761 * @throws NullPointerException if {@code regularExpression == null} 1762 * @since 1.4 1763 */ matches(String regularExpression)1764 public boolean matches(String regularExpression) { 1765 return Pattern.matches(regularExpression, this); 1766 } 1767 1768 /** 1769 * Replaces all matches for {@code regularExpression} within this string with the given 1770 * {@code replacement}. 1771 * See {@link Pattern} for regular expression syntax. 1772 * 1773 * <p>If the same regular expression is to be used for multiple operations, it may be more 1774 * efficient to reuse a compiled {@code Pattern}. 1775 * 1776 * @throws PatternSyntaxException 1777 * if the syntax of the supplied regular expression is not 1778 * valid. 1779 * @throws NullPointerException if {@code regularExpression == null} 1780 * @see Pattern 1781 * @since 1.4 1782 */ replaceAll(String regularExpression, String replacement)1783 public String replaceAll(String regularExpression, String replacement) { 1784 return Pattern.compile(regularExpression).matcher(this).replaceAll(replacement); 1785 } 1786 1787 /** 1788 * Replaces the first match for {@code regularExpression} within this string with the given 1789 * {@code replacement}. 1790 * See {@link Pattern} for regular expression syntax. 1791 * 1792 * <p>If the same regular expression is to be used for multiple operations, it may be more 1793 * efficient to reuse a compiled {@code Pattern}. 1794 * 1795 * @throws PatternSyntaxException 1796 * if the syntax of the supplied regular expression is not 1797 * valid. 1798 * @throws NullPointerException if {@code regularExpression == null} 1799 * @see Pattern 1800 * @since 1.4 1801 */ replaceFirst(String regularExpression, String replacement)1802 public String replaceFirst(String regularExpression, String replacement) { 1803 return Pattern.compile(regularExpression).matcher(this).replaceFirst(replacement); 1804 } 1805 1806 /** 1807 * Splits this string using the supplied {@code regularExpression}. 1808 * Equivalent to {@code split(regularExpression, 0)}. 1809 * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}. 1810 * See {@link Pattern} for regular expression syntax. 1811 * 1812 * <p>If the same regular expression is to be used for multiple operations, it may be more 1813 * efficient to reuse a compiled {@code Pattern}. 1814 * 1815 * @throws NullPointerException if {@code regularExpression == null} 1816 * @throws PatternSyntaxException 1817 * if the syntax of the supplied regular expression is not 1818 * valid. 1819 * @see Pattern 1820 * @since 1.4 1821 */ split(String regularExpression)1822 public String[] split(String regularExpression) { 1823 return split(regularExpression, 0); 1824 } 1825 1826 /** 1827 * Splits this string using the supplied {@code regularExpression}. 1828 * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}. 1829 * See {@link Pattern} for regular expression syntax. 1830 * 1831 * <p>If the same regular expression is to be used for multiple operations, it may be more 1832 * efficient to reuse a compiled {@code Pattern}. 1833 * 1834 * @throws NullPointerException if {@code regularExpression == null} 1835 * @throws PatternSyntaxException 1836 * if the syntax of the supplied regular expression is not 1837 * valid. 1838 * @since 1.4 1839 */ split(String regularExpression, int limit)1840 public String[] split(String regularExpression, int limit) { 1841 String[] result = java.util.regex.Splitter.fastSplit(regularExpression, this, limit); 1842 return result != null ? result : Pattern.compile(regularExpression).split(this, limit); 1843 } 1844 1845 /** 1846 * Has the same result as the substring function, but is present so that 1847 * string may implement the CharSequence interface. 1848 * 1849 * @param start 1850 * the offset the first character. 1851 * @param end 1852 * the offset of one past the last character to include. 1853 * @return the subsequence requested. 1854 * @throws IndexOutOfBoundsException 1855 * if {@code start < 0}, {@code end < 0}, {@code start > end} or 1856 * {@code end > length()}. 1857 * @see java.lang.CharSequence#subSequence(int, int) 1858 * @since 1.4 1859 */ subSequence(int start, int end)1860 public CharSequence subSequence(int start, int end) { 1861 return substring(start, end); 1862 } 1863 1864 /** 1865 * Returns the Unicode code point at the given {@code index}. 1866 * 1867 * @throws IndexOutOfBoundsException if {@code index < 0 || index >= length()} 1868 * @see Character#codePointAt(char[], int, int) 1869 * @since 1.5 1870 */ codePointAt(int index)1871 public int codePointAt(int index) { 1872 if (index < 0 || index >= count) { 1873 throw indexAndLength(index); 1874 } 1875 return Character.codePointAt(value, offset + index, offset + count); 1876 } 1877 1878 /** 1879 * Returns the Unicode code point that precedes the given {@code index}. 1880 * 1881 * @throws IndexOutOfBoundsException if {@code index < 1 || index > length()} 1882 * @see Character#codePointBefore(char[], int, int) 1883 * @since 1.5 1884 */ codePointBefore(int index)1885 public int codePointBefore(int index) { 1886 if (index < 1 || index > count) { 1887 throw indexAndLength(index); 1888 } 1889 return Character.codePointBefore(value, offset + index, offset); 1890 } 1891 1892 /** 1893 * Calculates the number of Unicode code points between {@code start} 1894 * and {@code end}. 1895 * 1896 * @param start 1897 * the inclusive beginning index of the subsequence. 1898 * @param end 1899 * the exclusive end index of the subsequence. 1900 * @return the number of Unicode code points in the subsequence. 1901 * @throws IndexOutOfBoundsException 1902 * if {@code start < 0 || end > length() || start > end} 1903 * @see Character#codePointCount(CharSequence, int, int) 1904 * @since 1.5 1905 */ codePointCount(int start, int end)1906 public int codePointCount(int start, int end) { 1907 if (start < 0 || end > count || start > end) { 1908 throw startEndAndLength(start, end); 1909 } 1910 return Character.codePointCount(value, offset + start, end - start); 1911 } 1912 1913 /** 1914 * Determines if this {@code String} contains the sequence of characters in 1915 * the {@code CharSequence} passed. 1916 * 1917 * @param cs 1918 * the character sequence to search for. 1919 * @return {@code true} if the sequence of characters are contained in this 1920 * string, otherwise {@code false}. 1921 * @since 1.5 1922 */ contains(CharSequence cs)1923 public boolean contains(CharSequence cs) { 1924 if (cs == null) { 1925 throw new NullPointerException(); 1926 } 1927 return indexOf(cs.toString()) >= 0; 1928 } 1929 1930 /** 1931 * Returns the index within this object that is offset from {@code index} by 1932 * {@code codePointOffset} code points. 1933 * 1934 * @param index 1935 * the index within this object to calculate the offset from. 1936 * @param codePointOffset 1937 * the number of code points to count. 1938 * @return the index within this object that is the offset. 1939 * @throws IndexOutOfBoundsException 1940 * if {@code index} is negative or greater than {@code length()} 1941 * or if there aren't enough code points before or after {@code 1942 * index} to match {@code codePointOffset}. 1943 * @since 1.5 1944 */ offsetByCodePoints(int index, int codePointOffset)1945 public int offsetByCodePoints(int index, int codePointOffset) { 1946 int s = index + offset; 1947 int r = Character.offsetByCodePoints(value, offset, count, s, codePointOffset); 1948 return r - offset; 1949 } 1950 1951 /** 1952 * Returns a localized formatted string, using the supplied format and arguments, 1953 * using the user's default locale. 1954 * 1955 * <p>If you're formatting a string other than for human 1956 * consumption, you should use the {@code format(Locale, String, Object...)} 1957 * overload and supply {@code Locale.US}. See 1958 * "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>". 1959 * 1960 * @param format the format string (see {@link java.util.Formatter#format}) 1961 * @param args 1962 * the list of arguments passed to the formatter. If there are 1963 * more arguments than required by {@code format}, 1964 * additional arguments are ignored. 1965 * @return the formatted string. 1966 * @throws NullPointerException if {@code format == null} 1967 * @throws java.util.IllegalFormatException 1968 * if the format is invalid. 1969 * @since 1.5 1970 */ format(String format, Object... args)1971 public static String format(String format, Object... args) { 1972 return format(Locale.getDefault(), format, args); 1973 } 1974 1975 /** 1976 * Returns a formatted string, using the supplied format and arguments, 1977 * localized to the given locale. 1978 * 1979 * @param locale 1980 * the locale to apply; {@code null} value means no localization. 1981 * @param format the format string (see {@link java.util.Formatter#format}) 1982 * @param args 1983 * the list of arguments passed to the formatter. If there are 1984 * more arguments than required by {@code format}, 1985 * additional arguments are ignored. 1986 * @return the formatted string. 1987 * @throws NullPointerException if {@code format == null} 1988 * @throws java.util.IllegalFormatException 1989 * if the format is invalid. 1990 * @since 1.5 1991 */ format(Locale locale, String format, Object... args)1992 public static String format(Locale locale, String format, Object... args) { 1993 if (format == null) { 1994 throw new NullPointerException("null format argument"); 1995 } 1996 int bufferSize = format.length() + (args == null ? 0 : args.length * 10); 1997 Formatter f = new Formatter(new StringBuilder(bufferSize), locale); 1998 return f.format(format, args).toString(); 1999 } 2000 2001 /* 2002 * An implementation of a String.indexOf that is supposed to perform 2003 * substantially better than the default algorithm if the "needle" (the 2004 * subString being searched for) is a constant string. 2005 * 2006 * For example, a JIT, upon encountering a call to String.indexOf(String), 2007 * where the needle is a constant string, may compute the values cache, md2 2008 * and lastChar, and change the call to the following method. 2009 */ 2010 @FindBugsSuppressWarnings("UPM_UNCALLED_PRIVATE_METHOD") 2011 @SuppressWarnings("unused") indexOf(String haystackString, String needleString, int cache, int md2, char lastChar)2012 private static int indexOf(String haystackString, String needleString, 2013 int cache, int md2, char lastChar) { 2014 char[] haystack = haystackString.value; 2015 int haystackOffset = haystackString.offset; 2016 int haystackLength = haystackString.count; 2017 char[] needle = needleString.value; 2018 int needleOffset = needleString.offset; 2019 int needleLength = needleString.count; 2020 int needleLengthMinus1 = needleLength - 1; 2021 int haystackEnd = haystackOffset + haystackLength; 2022 outer_loop: for (int i = haystackOffset + needleLengthMinus1; i < haystackEnd;) { 2023 if (lastChar == haystack[i]) { 2024 for (int j = 0; j < needleLengthMinus1; ++j) { 2025 if (needle[j + needleOffset] != haystack[i + j 2026 - needleLengthMinus1]) { 2027 int skip = 1; 2028 if ((cache & (1 << haystack[i])) == 0) { 2029 skip += j; 2030 } 2031 i += Math.max(md2, skip); 2032 continue outer_loop; 2033 } 2034 } 2035 return i - needleLengthMinus1 - haystackOffset; 2036 } 2037 2038 if ((cache & (1 << haystack[i])) == 0) { 2039 i += needleLengthMinus1; 2040 } 2041 i++; 2042 } 2043 return -1; 2044 } 2045 } 2046