1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2015, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.impl; 11 12 import java.io.IOException; 13 import java.util.ArrayList; 14 import java.util.Locale; 15 import java.util.regex.Pattern; 16 17 import ohos.global.icu.lang.UCharacter; 18 import ohos.global.icu.text.Replaceable; 19 import ohos.global.icu.text.UTF16; 20 import ohos.global.icu.text.UnicodeMatcher; 21 import ohos.global.icu.util.ICUUncheckedIOException; 22 23 /** 24 * @hide exposed on OHOS 25 */ 26 public final class Utility { 27 28 private static final char APOSTROPHE = '\''; 29 private static final char BACKSLASH = '\\'; 30 private static final int MAGIC_UNSIGNED = 0x80000000; 31 32 /** 33 * Convenience utility to compare two Object[]s. 34 * Ought to be in System 35 */ arrayEquals(Object[] source, Object target)36 public final static boolean arrayEquals(Object[] source, Object target) { 37 if (source == null) return (target == null); 38 if (!(target instanceof Object[])) return false; 39 Object[] targ = (Object[]) target; 40 return (source.length == targ.length 41 && arrayRegionMatches(source, 0, targ, 0, source.length)); 42 } 43 44 /** 45 * Convenience utility to compare two int[]s 46 * Ought to be in System 47 */ arrayEquals(int[] source, Object target)48 public final static boolean arrayEquals(int[] source, Object target) { 49 if (source == null) return (target == null); 50 if (!(target instanceof int[])) return false; 51 int[] targ = (int[]) target; 52 return (source.length == targ.length 53 && arrayRegionMatches(source, 0, targ, 0, source.length)); 54 } 55 56 /** 57 * Convenience utility to compare two double[]s 58 * Ought to be in System 59 */ arrayEquals(double[] source, Object target)60 public final static boolean arrayEquals(double[] source, Object target) { 61 if (source == null) return (target == null); 62 if (!(target instanceof double[])) return false; 63 double[] targ = (double[]) target; 64 return (source.length == targ.length 65 && arrayRegionMatches(source, 0, targ, 0, source.length)); 66 } arrayEquals(byte[] source, Object target)67 public final static boolean arrayEquals(byte[] source, Object target) { 68 if (source == null) return (target == null); 69 if (!(target instanceof byte[])) return false; 70 byte[] targ = (byte[]) target; 71 return (source.length == targ.length 72 && arrayRegionMatches(source, 0, targ, 0, source.length)); 73 } 74 75 /** 76 * Convenience utility to compare two Object[]s 77 * Ought to be in System 78 */ arrayEquals(Object source, Object target)79 public final static boolean arrayEquals(Object source, Object target) { 80 if (source == null) return (target == null); 81 // for some reason, the correct arrayEquals is not being called 82 // so do it by hand for now. 83 if (source instanceof Object[]) 84 return(arrayEquals((Object[]) source,target)); 85 if (source instanceof int[]) 86 return(arrayEquals((int[]) source,target)); 87 if (source instanceof double[]) 88 return(arrayEquals((double[]) source, target)); 89 if (source instanceof byte[]) 90 return(arrayEquals((byte[]) source,target)); 91 return source.equals(target); 92 } 93 94 /** 95 * Convenience utility to compare two Object[]s 96 * Ought to be in System. 97 * @param len the length to compare. 98 * The start indices and start+len must be valid. 99 */ arrayRegionMatches(Object[] source, int sourceStart, Object[] target, int targetStart, int len)100 public final static boolean arrayRegionMatches(Object[] source, int sourceStart, 101 Object[] target, int targetStart, 102 int len) 103 { 104 int sourceEnd = sourceStart + len; 105 int delta = targetStart - sourceStart; 106 for (int i = sourceStart; i < sourceEnd; i++) { 107 if (!arrayEquals(source[i],target[i + delta])) 108 return false; 109 } 110 return true; 111 } 112 113 /** 114 * Convenience utility to compare two Object[]s 115 * Ought to be in System. 116 * @param len the length to compare. 117 * The start indices and start+len must be valid. 118 */ arrayRegionMatches(char[] source, int sourceStart, char[] target, int targetStart, int len)119 public final static boolean arrayRegionMatches(char[] source, int sourceStart, 120 char[] target, int targetStart, 121 int len) 122 { 123 int sourceEnd = sourceStart + len; 124 int delta = targetStart - sourceStart; 125 for (int i = sourceStart; i < sourceEnd; i++) { 126 if (source[i]!=target[i + delta]) 127 return false; 128 } 129 return true; 130 } 131 132 /** 133 * Convenience utility to compare two int[]s. 134 * @param len the length to compare. 135 * The start indices and start+len must be valid. 136 * Ought to be in System 137 */ arrayRegionMatches(int[] source, int sourceStart, int[] target, int targetStart, int len)138 public final static boolean arrayRegionMatches(int[] source, int sourceStart, 139 int[] target, int targetStart, 140 int len) 141 { 142 int sourceEnd = sourceStart + len; 143 int delta = targetStart - sourceStart; 144 for (int i = sourceStart; i < sourceEnd; i++) { 145 if (source[i] != target[i + delta]) 146 return false; 147 } 148 return true; 149 } 150 151 /** 152 * Convenience utility to compare two arrays of doubles. 153 * @param len the length to compare. 154 * The start indices and start+len must be valid. 155 * Ought to be in System 156 */ arrayRegionMatches(double[] source, int sourceStart, double[] target, int targetStart, int len)157 public final static boolean arrayRegionMatches(double[] source, int sourceStart, 158 double[] target, int targetStart, 159 int len) 160 { 161 int sourceEnd = sourceStart + len; 162 int delta = targetStart - sourceStart; 163 for (int i = sourceStart; i < sourceEnd; i++) { 164 if (source[i] != target[i + delta]) 165 return false; 166 } 167 return true; 168 } arrayRegionMatches(byte[] source, int sourceStart, byte[] target, int targetStart, int len)169 public final static boolean arrayRegionMatches(byte[] source, int sourceStart, 170 byte[] target, int targetStart, int len){ 171 int sourceEnd = sourceStart + len; 172 int delta = targetStart - sourceStart; 173 for (int i = sourceStart; i < sourceEnd; i++) { 174 if (source[i] != target[i + delta]) 175 return false; 176 } 177 return true; 178 } 179 180 /** 181 * Trivial reference equality. 182 * This method should help document that we really want == not equals(), 183 * and to have a single place to suppress warnings from static analysis tools. 184 */ sameObjects(Object a, Object b)185 public static final boolean sameObjects(Object a, Object b) { 186 return a == b; 187 } 188 189 /** 190 * Convenience utility. Does null checks on objects, then calls compare. 191 */ checkCompare(T a, T b)192 public static <T extends Comparable<T>> int checkCompare(T a, T b) { 193 return a == null ? 194 b == null ? 0 : -1 : 195 b == null ? 1 : a.compareTo(b); 196 } 197 198 /** 199 * Convenience utility. Does null checks on object, then calls hashCode. 200 */ checkHash(Object a)201 public static int checkHash(Object a) { 202 return a == null ? 0 : a.hashCode(); 203 } 204 205 /** 206 * The ESCAPE character is used during run-length encoding. It signals 207 * a run of identical chars. 208 */ 209 private static final char ESCAPE = '\uA5A5'; 210 211 /** 212 * The ESCAPE_BYTE character is used during run-length encoding. It signals 213 * a run of identical bytes. 214 */ 215 static final byte ESCAPE_BYTE = (byte)0xA5; 216 217 /** 218 * Construct a string representing an int array. Use run-length encoding. 219 * A character represents itself, unless it is the ESCAPE character. Then 220 * the following notations are possible: 221 * ESCAPE ESCAPE ESCAPE literal 222 * ESCAPE n c n instances of character c 223 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 224 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 225 * If we encounter a run where n == ESCAPE, we represent this as: 226 * c ESCAPE n-1 c 227 * The ESCAPE value is chosen so as not to collide with commonly 228 * seen values. 229 */ arrayToRLEString(int[] a)230 static public final String arrayToRLEString(int[] a) { 231 StringBuilder buffer = new StringBuilder(); 232 233 appendInt(buffer, a.length); 234 int runValue = a[0]; 235 int runLength = 1; 236 for (int i=1; i<a.length; ++i) { 237 int s = a[i]; 238 if (s == runValue && runLength < 0xFFFF) { 239 ++runLength; 240 } else { 241 encodeRun(buffer, runValue, runLength); 242 runValue = s; 243 runLength = 1; 244 } 245 } 246 encodeRun(buffer, runValue, runLength); 247 return buffer.toString(); 248 } 249 250 /** 251 * Construct a string representing a short array. Use run-length encoding. 252 * A character represents itself, unless it is the ESCAPE character. Then 253 * the following notations are possible: 254 * ESCAPE ESCAPE ESCAPE literal 255 * ESCAPE n c n instances of character c 256 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 257 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 258 * If we encounter a run where n == ESCAPE, we represent this as: 259 * c ESCAPE n-1 c 260 * The ESCAPE value is chosen so as not to collide with commonly 261 * seen values. 262 */ arrayToRLEString(short[] a)263 static public final String arrayToRLEString(short[] a) { 264 StringBuilder buffer = new StringBuilder(); 265 // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]); 266 buffer.append((char) (a.length >> 16)); 267 buffer.append((char) a.length); 268 short runValue = a[0]; 269 int runLength = 1; 270 for (int i=1; i<a.length; ++i) { 271 short s = a[i]; 272 if (s == runValue && runLength < 0xFFFF) ++runLength; 273 else { 274 encodeRun(buffer, runValue, runLength); 275 runValue = s; 276 runLength = 1; 277 } 278 } 279 encodeRun(buffer, runValue, runLength); 280 return buffer.toString(); 281 } 282 283 /** 284 * Construct a string representing a char array. Use run-length encoding. 285 * A character represents itself, unless it is the ESCAPE character. Then 286 * the following notations are possible: 287 * ESCAPE ESCAPE ESCAPE literal 288 * ESCAPE n c n instances of character c 289 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 290 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 291 * If we encounter a run where n == ESCAPE, we represent this as: 292 * c ESCAPE n-1 c 293 * The ESCAPE value is chosen so as not to collide with commonly 294 * seen values. 295 */ arrayToRLEString(char[] a)296 static public final String arrayToRLEString(char[] a) { 297 StringBuilder buffer = new StringBuilder(); 298 buffer.append((char) (a.length >> 16)); 299 buffer.append((char) a.length); 300 char runValue = a[0]; 301 int runLength = 1; 302 for (int i=1; i<a.length; ++i) { 303 char s = a[i]; 304 if (s == runValue && runLength < 0xFFFF) ++runLength; 305 else { 306 encodeRun(buffer, (short)runValue, runLength); 307 runValue = s; 308 runLength = 1; 309 } 310 } 311 encodeRun(buffer, (short)runValue, runLength); 312 return buffer.toString(); 313 } 314 315 /** 316 * Construct a string representing a byte array. Use run-length encoding. 317 * Two bytes are packed into a single char, with a single extra zero byte at 318 * the end if needed. A byte represents itself, unless it is the 319 * ESCAPE_BYTE. Then the following notations are possible: 320 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 321 * ESCAPE_BYTE n b n instances of byte b 322 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 323 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 324 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 325 * b ESCAPE_BYTE n-1 b 326 * The ESCAPE_BYTE value is chosen so as not to collide with commonly 327 * seen values. 328 */ arrayToRLEString(byte[] a)329 static public final String arrayToRLEString(byte[] a) { 330 StringBuilder buffer = new StringBuilder(); 331 buffer.append((char) (a.length >> 16)); 332 buffer.append((char) a.length); 333 byte runValue = a[0]; 334 int runLength = 1; 335 byte[] state = new byte[2]; 336 for (int i=1; i<a.length; ++i) { 337 byte b = a[i]; 338 if (b == runValue && runLength < 0xFF) ++runLength; 339 else { 340 encodeRun(buffer, runValue, runLength, state); 341 runValue = b; 342 runLength = 1; 343 } 344 } 345 encodeRun(buffer, runValue, runLength, state); 346 347 // We must save the final byte, if there is one, by padding 348 // an extra zero. 349 if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state); 350 351 return buffer.toString(); 352 } 353 354 /** 355 * Encode a run, possibly a degenerate run (of < 4 values). 356 * @param length The length of the run; must be > 0 && <= 0xFFFF. 357 */ encodeRun(T buffer, int value, int length)358 private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) { 359 if (length < 4) { 360 for (int j=0; j<length; ++j) { 361 if (value == ESCAPE) { 362 appendInt(buffer, value); 363 } 364 appendInt(buffer, value); 365 } 366 } 367 else { 368 if (length == ESCAPE) { 369 if (value == ESCAPE) { 370 appendInt(buffer, ESCAPE); 371 } 372 appendInt(buffer, value); 373 --length; 374 } 375 appendInt(buffer, ESCAPE); 376 appendInt(buffer, length); 377 appendInt(buffer, value); // Don't need to escape this value 378 } 379 } 380 appendInt(T buffer, int value)381 private static final <T extends Appendable> void appendInt(T buffer, int value) { 382 try { 383 buffer.append((char)(value >>> 16)); 384 buffer.append((char)(value & 0xFFFF)); 385 } catch (IOException e) { 386 throw new IllegalIcuArgumentException(e); 387 } 388 } 389 390 /** 391 * Encode a run, possibly a degenerate run (of < 4 values). 392 * @param length The length of the run; must be > 0 && <= 0xFFFF. 393 */ encodeRun(T buffer, short value, int length)394 private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) { 395 try { 396 char valueChar = (char) value; 397 if (length < 4) { 398 for (int j=0; j<length; ++j) { 399 if (valueChar == ESCAPE) { 400 buffer.append(ESCAPE); 401 } 402 buffer.append(valueChar); 403 } 404 } 405 else { 406 if (length == ESCAPE) { 407 if (valueChar == ESCAPE) { 408 buffer.append(ESCAPE); 409 } 410 buffer.append(valueChar); 411 --length; 412 } 413 buffer.append(ESCAPE); 414 buffer.append((char) length); 415 buffer.append(valueChar); // Don't need to escape this value 416 } 417 } catch (IOException e) { 418 throw new IllegalIcuArgumentException(e); 419 } 420 } 421 422 /** 423 * Encode a run, possibly a degenerate run (of < 4 values). 424 * @param length The length of the run; must be > 0 && <= 0xFF. 425 */ encodeRun(T buffer, byte value, int length, byte[] state)426 private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length, 427 byte[] state) { 428 if (length < 4) { 429 for (int j=0; j<length; ++j) { 430 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 431 appendEncodedByte(buffer, value, state); 432 } 433 } 434 else { 435 if ((byte)length == ESCAPE_BYTE) { 436 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 437 appendEncodedByte(buffer, value, state); 438 --length; 439 } 440 appendEncodedByte(buffer, ESCAPE_BYTE, state); 441 appendEncodedByte(buffer, (byte)length, state); 442 appendEncodedByte(buffer, value, state); // Don't need to escape this value 443 } 444 } 445 446 /** 447 * Append a byte to the given Appendable, packing two bytes into each 448 * character. The state parameter maintains intermediary data between 449 * calls. 450 * @param state A two-element array, with state[0] == 0 if this is the 451 * first byte of a pair, or state[0] != 0 if this is the second byte 452 * of a pair, in which case state[1] is the first byte. 453 */ appendEncodedByte(T buffer, byte value, byte[] state)454 private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value, 455 byte[] state) { 456 try { 457 if (state[0] != 0) { 458 char c = (char) ((state[1] << 8) | ((value) & 0xFF)); 459 buffer.append(c); 460 state[0] = 0; 461 } 462 else { 463 state[0] = 1; 464 state[1] = value; 465 } 466 } catch (IOException e) { 467 throw new IllegalIcuArgumentException(e); 468 } 469 } 470 471 /** 472 * Construct an array of ints from a run-length encoded string. 473 */ RLEStringToIntArray(String s)474 static public final int[] RLEStringToIntArray(String s) { 475 int length = getInt(s, 0); 476 int[] array = new int[length]; 477 int ai = 0, i = 1; 478 479 int maxI = s.length() / 2; 480 while (ai < length && i < maxI) { 481 int c = getInt(s, i++); 482 483 if (c == ESCAPE) { 484 c = getInt(s, i++); 485 if (c == ESCAPE) { 486 array[ai++] = c; 487 } else { 488 int runLength = c; 489 int runValue = getInt(s, i++); 490 for (int j=0; j<runLength; ++j) { 491 array[ai++] = runValue; 492 } 493 } 494 } 495 else { 496 array[ai++] = c; 497 } 498 } 499 500 if (ai != length || i != maxI) { 501 throw new IllegalStateException("Bad run-length encoded int array"); 502 } 503 504 return array; 505 } getInt(String s, int i)506 static final int getInt(String s, int i) { 507 return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1); 508 } 509 510 /** 511 * Construct an array of shorts from a run-length encoded string. 512 */ RLEStringToShortArray(String s)513 static public final short[] RLEStringToShortArray(String s) { 514 int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 515 short[] array = new short[length]; 516 int ai = 0; 517 for (int i=2; i<s.length(); ++i) { 518 char c = s.charAt(i); 519 if (c == ESCAPE) { 520 c = s.charAt(++i); 521 if (c == ESCAPE) { 522 array[ai++] = (short) c; 523 } else { 524 int runLength = c; 525 short runValue = (short) s.charAt(++i); 526 for (int j=0; j<runLength; ++j) array[ai++] = runValue; 527 } 528 } 529 else { 530 array[ai++] = (short) c; 531 } 532 } 533 534 if (ai != length) 535 throw new IllegalStateException("Bad run-length encoded short array"); 536 537 return array; 538 } 539 540 /** 541 * Construct an array of shorts from a run-length encoded string. 542 */ RLEStringToCharArray(String s)543 static public final char[] RLEStringToCharArray(String s) { 544 int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 545 char[] array = new char[length]; 546 int ai = 0; 547 for (int i=2; i<s.length(); ++i) { 548 char c = s.charAt(i); 549 if (c == ESCAPE) { 550 c = s.charAt(++i); 551 if (c == ESCAPE) { 552 array[ai++] = c; 553 } else { 554 int runLength = c; 555 char runValue = s.charAt(++i); 556 for (int j=0; j<runLength; ++j) array[ai++] = runValue; 557 } 558 } 559 else { 560 array[ai++] = c; 561 } 562 } 563 564 if (ai != length) 565 throw new IllegalStateException("Bad run-length encoded short array"); 566 567 return array; 568 } 569 570 /** 571 * Construct an array of bytes from a run-length encoded string. 572 */ RLEStringToByteArray(String s)573 static public final byte[] RLEStringToByteArray(String s) { 574 int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 575 byte[] array = new byte[length]; 576 boolean nextChar = true; 577 char c = 0; 578 int node = 0; 579 int runLength = 0; 580 int i = 2; 581 for (int ai=0; ai<length; ) { 582 // This part of the loop places the next byte into the local 583 // variable 'b' each time through the loop. It keeps the 584 // current character in 'c' and uses the boolean 'nextChar' 585 // to see if we've taken both bytes out of 'c' yet. 586 byte b; 587 if (nextChar) { 588 c = s.charAt(i++); 589 b = (byte) (c >> 8); 590 nextChar = false; 591 } 592 else { 593 b = (byte) (c & 0xFF); 594 nextChar = true; 595 } 596 597 // This part of the loop is a tiny state machine which handles 598 // the parsing of the run-length encoding. This would be simpler 599 // if we could look ahead, but we can't, so we use 'node' to 600 // move between three nodes in the state machine. 601 switch (node) { 602 case 0: 603 // Normal idle node 604 if (b == ESCAPE_BYTE) { 605 node = 1; 606 } 607 else { 608 array[ai++] = b; 609 } 610 break; 611 case 1: 612 // We have seen one ESCAPE_BYTE; we expect either a second 613 // one, or a run length and value. 614 if (b == ESCAPE_BYTE) { 615 array[ai++] = ESCAPE_BYTE; 616 node = 0; 617 } 618 else { 619 runLength = b; 620 // Interpret signed byte as unsigned 621 if (runLength < 0) runLength += 0x100; 622 node = 2; 623 } 624 break; 625 case 2: 626 // We have seen an ESCAPE_BYTE and length byte. We interpret 627 // the next byte as the value to be repeated. 628 for (int j=0; j<runLength; ++j) array[ai++] = b; 629 node = 0; 630 break; 631 } 632 } 633 634 if (node != 0) 635 throw new IllegalStateException("Bad run-length encoded byte array"); 636 637 if (i != s.length()) 638 throw new IllegalStateException("Excess data in RLE byte array string"); 639 640 return array; 641 } 642 643 static public String LINE_SEPARATOR = System.getProperty("line.separator"); 644 645 /** 646 * Format a String for representation in a source file. This includes 647 * breaking it into lines and escaping characters using octal notation 648 * when necessary (control characters and double quotes). 649 */ formatForSource(String s)650 static public final String formatForSource(String s) { 651 StringBuilder buffer = new StringBuilder(); 652 for (int i=0; i<s.length();) { 653 if (i > 0) buffer.append('+').append(LINE_SEPARATOR); 654 buffer.append(" \""); 655 int count = 11; 656 while (i<s.length() && count<80) { 657 char c = s.charAt(i++); 658 if (c < '\u0020' || c == '"' || c == '\\') { 659 if (c == '\n') { 660 buffer.append("\\n"); 661 count += 2; 662 } else if (c == '\t') { 663 buffer.append("\\t"); 664 count += 2; 665 } else if (c == '\r') { 666 buffer.append("\\r"); 667 count += 2; 668 } else { 669 // Represent control characters, backslash and double quote 670 // using octal notation; otherwise the string we form 671 // won't compile, since Unicode escape sequences are 672 // processed before tokenization. 673 buffer.append('\\'); 674 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 675 buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 676 buffer.append(HEX_DIGIT[(c & 0007)]); 677 count += 4; 678 } 679 } 680 else if (c <= '\u007E') { 681 buffer.append(c); 682 count += 1; 683 } 684 else { 685 buffer.append("\\u"); 686 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 687 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 688 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 689 buffer.append(HEX_DIGIT[(c & 0x000F)]); 690 count += 6; 691 } 692 } 693 buffer.append('"'); 694 } 695 return buffer.toString(); 696 } 697 698 static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7', 699 '8','9','A','B','C','D','E','F'}; 700 701 /** 702 * Format a String for representation in a source file. Like 703 * formatForSource but does not do line breaking. 704 */ format1ForSource(String s)705 static public final String format1ForSource(String s) { 706 StringBuilder buffer = new StringBuilder(); 707 buffer.append("\""); 708 for (int i=0; i<s.length();) { 709 char c = s.charAt(i++); 710 if (c < '\u0020' || c == '"' || c == '\\') { 711 if (c == '\n') { 712 buffer.append("\\n"); 713 } else if (c == '\t') { 714 buffer.append("\\t"); 715 } else if (c == '\r') { 716 buffer.append("\\r"); 717 } else { 718 // Represent control characters, backslash and double quote 719 // using octal notation; otherwise the string we form 720 // won't compile, since Unicode escape sequences are 721 // processed before tokenization. 722 buffer.append('\\'); 723 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 724 buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 725 buffer.append(HEX_DIGIT[(c & 0007)]); 726 } 727 } 728 else if (c <= '\u007E') { 729 buffer.append(c); 730 } 731 else { 732 buffer.append("\\u"); 733 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 734 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 735 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 736 buffer.append(HEX_DIGIT[(c & 0x000F)]); 737 } 738 } 739 buffer.append('"'); 740 return buffer.toString(); 741 } 742 743 /** 744 * Convert characters outside the range U+0020 to U+007F to 745 * Unicode escapes, and convert backslash to a double backslash. 746 */ escape(String s)747 public static final String escape(String s) { 748 StringBuilder buf = new StringBuilder(); 749 for (int i=0; i<s.length(); ) { 750 int c = Character.codePointAt(s, i); 751 i += UTF16.getCharCount(c); 752 if (c >= ' ' && c <= 0x007F) { 753 if (c == '\\') { 754 buf.append("\\\\"); // That is, "\\" 755 } else { 756 buf.append((char)c); 757 } 758 } else { 759 boolean four = c <= 0xFFFF; 760 buf.append(four ? "\\u" : "\\U"); 761 buf.append(hex(c, four ? 4 : 8)); 762 } 763 } 764 return buf.toString(); 765 } 766 767 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 768 static private final char[] UNESCAPE_MAP = { 769 /*" 0x22, 0x22 */ 770 /*' 0x27, 0x27 */ 771 /*? 0x3F, 0x3F */ 772 /*\ 0x5C, 0x5C */ 773 /*a*/ 0x61, 0x07, 774 /*b*/ 0x62, 0x08, 775 /*e*/ 0x65, 0x1b, 776 /*f*/ 0x66, 0x0c, 777 /*n*/ 0x6E, 0x0a, 778 /*r*/ 0x72, 0x0d, 779 /*t*/ 0x74, 0x09, 780 /*v*/ 0x76, 0x0b 781 }; 782 783 /** 784 * Convert an escape to a 32-bit code point value. We attempt 785 * to parallel the icu4c unescapeAt() function. 786 * @param offset16 an array containing offset to the character 787 * <em>after</em> the backslash. Upon return offset16[0] will 788 * be updated to point after the escape sequence. 789 * @return character value from 0 to 10FFFF, or -1 on error. 790 */ unescapeAt(String s, int[] offset16)791 public static int unescapeAt(String s, int[] offset16) { 792 int c; 793 int result = 0; 794 int n = 0; 795 int minDig = 0; 796 int maxDig = 0; 797 int bitsPerDigit = 4; 798 int dig; 799 int i; 800 boolean braces = false; 801 802 /* Check that offset is in range */ 803 int offset = offset16[0]; 804 int length = s.length(); 805 if (offset < 0 || offset >= length) { 806 return -1; 807 } 808 809 /* Fetch first UChar after '\\' */ 810 c = Character.codePointAt(s, offset); 811 offset += UTF16.getCharCount(c); 812 813 /* Convert hexadecimal and octal escapes */ 814 switch (c) { 815 case 'u': 816 minDig = maxDig = 4; 817 break; 818 case 'U': 819 minDig = maxDig = 8; 820 break; 821 case 'x': 822 minDig = 1; 823 if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { 824 ++offset; 825 braces = true; 826 maxDig = 8; 827 } else { 828 maxDig = 2; 829 } 830 break; 831 default: 832 dig = UCharacter.digit(c, 8); 833 if (dig >= 0) { 834 minDig = 1; 835 maxDig = 3; 836 n = 1; /* Already have first octal digit */ 837 bitsPerDigit = 3; 838 result = dig; 839 } 840 break; 841 } 842 if (minDig != 0) { 843 while (offset < length && n < maxDig) { 844 c = UTF16.charAt(s, offset); 845 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); 846 if (dig < 0) { 847 break; 848 } 849 result = (result << bitsPerDigit) | dig; 850 offset += UTF16.getCharCount(c); 851 ++n; 852 } 853 if (n < minDig) { 854 return -1; 855 } 856 if (braces) { 857 if (c != 0x7D /*}*/) { 858 return -1; 859 } 860 ++offset; 861 } 862 if (result < 0 || result >= 0x110000) { 863 return -1; 864 } 865 // If an escape sequence specifies a lead surrogate, see 866 // if there is a trail surrogate after it, either as an 867 // escape or as a literal. If so, join them up into a 868 // supplementary. 869 if (offset < length && 870 UTF16.isLeadSurrogate((char) result)) { 871 int ahead = offset+1; 872 c = s.charAt(offset); // [sic] get 16-bit code unit 873 if (c == '\\' && ahead < length) { 874 int o[] = new int[] { ahead }; 875 c = unescapeAt(s, o); 876 ahead = o[0]; 877 } 878 if (UTF16.isTrailSurrogate((char) c)) { 879 offset = ahead; 880 result = Character.toCodePoint((char) result, (char) c); 881 } 882 } 883 offset16[0] = offset; 884 return result; 885 } 886 887 /* Convert C-style escapes in table */ 888 for (i=0; i<UNESCAPE_MAP.length; i+=2) { 889 if (c == UNESCAPE_MAP[i]) { 890 offset16[0] = offset; 891 return UNESCAPE_MAP[i+1]; 892 } else if (c < UNESCAPE_MAP[i]) { 893 break; 894 } 895 } 896 897 /* Map \cX to control-X: X & 0x1F */ 898 if (c == 'c' && offset < length) { 899 c = UTF16.charAt(s, offset); 900 offset16[0] = offset + UTF16.getCharCount(c); 901 return 0x1F & c; 902 } 903 904 /* If no special forms are recognized, then consider 905 * the backslash to generically escape the next character. */ 906 offset16[0] = offset; 907 return c; 908 } 909 910 /** 911 * Convert all escapes in a given string using unescapeAt(). 912 * @exception IllegalArgumentException if an invalid escape is 913 * seen. 914 */ unescape(String s)915 public static String unescape(String s) { 916 StringBuilder buf = new StringBuilder(); 917 int[] pos = new int[1]; 918 for (int i=0; i<s.length(); ) { 919 char c = s.charAt(i++); 920 if (c == '\\') { 921 pos[0] = i; 922 int e = unescapeAt(s, pos); 923 if (e < 0) { 924 throw new IllegalArgumentException("Invalid escape sequence " + 925 s.substring(i-1, Math.min(i+8, s.length()))); 926 } 927 buf.appendCodePoint(e); 928 i = pos[0]; 929 } else { 930 buf.append(c); 931 } 932 } 933 return buf.toString(); 934 } 935 936 /** 937 * Convert all escapes in a given string using unescapeAt(). 938 * Leave invalid escape sequences unchanged. 939 */ unescapeLeniently(String s)940 public static String unescapeLeniently(String s) { 941 StringBuilder buf = new StringBuilder(); 942 int[] pos = new int[1]; 943 for (int i=0; i<s.length(); ) { 944 char c = s.charAt(i++); 945 if (c == '\\') { 946 pos[0] = i; 947 int e = unescapeAt(s, pos); 948 if (e < 0) { 949 buf.append(c); 950 } else { 951 buf.appendCodePoint(e); 952 i = pos[0]; 953 } 954 } else { 955 buf.append(c); 956 } 957 } 958 return buf.toString(); 959 } 960 961 /** 962 * Convert a char to 4 hex uppercase digits. E.g., hex('a') => 963 * "0041". 964 */ hex(long ch)965 public static String hex(long ch) { 966 return hex(ch, 4); 967 } 968 969 /** 970 * Supplies a zero-padded hex representation of an integer (without 0x) 971 */ hex(long i, int places)972 static public String hex(long i, int places) { 973 if (i == Long.MIN_VALUE) return "-8000000000000000"; 974 boolean negative = i < 0; 975 if (negative) { 976 i = -i; 977 } 978 String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH); 979 if (result.length() < places) { 980 result = "0000000000000000".substring(result.length(),places) + result; 981 } 982 if (negative) { 983 return '-' + result; 984 } 985 return result; 986 } 987 988 /** 989 * Convert a string to comma-separated groups of 4 hex uppercase 990 * digits. E.g., hex('ab') => "0041,0042". 991 */ 992 public static String hex(CharSequence s) { 993 return hex(s, 4, ",", true, new StringBuilder()).toString(); 994 } 995 996 /** 997 * Convert a string to separated groups of hex uppercase 998 * digits. E.g., hex('ab'...) => "0041,0042". Append the output 999 * to the given Appendable. 1000 */ 1001 public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) { 1002 try { 1003 if (useCodePoints) { 1004 int cp; 1005 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 1006 cp = Character.codePointAt(s, i); 1007 if (i != 0) { 1008 result.append(separator); 1009 } 1010 result.append(hex(cp,width)); 1011 } 1012 } else { 1013 for (int i = 0; i < s.length(); ++i) { 1014 if (i != 0) { 1015 result.append(separator); 1016 } 1017 result.append(hex(s.charAt(i),width)); 1018 } 1019 } 1020 return result; 1021 } catch (IOException e) { 1022 throw new IllegalIcuArgumentException(e); 1023 } 1024 } 1025 1026 public static String hex(byte[] o, int start, int end, String separator) { 1027 StringBuilder result = new StringBuilder(); 1028 //int ch; 1029 for (int i = start; i < end; ++i) { 1030 if (i != 0) result.append(separator); 1031 result.append(hex(o[i])); 1032 } 1033 return result.toString(); 1034 } 1035 1036 /** 1037 * Convert a string to comma-separated groups of 4 hex uppercase 1038 * digits. E.g., hex('ab') => "0041,0042". 1039 */ 1040 public static <S extends CharSequence> String hex(S s, int width, S separator) { 1041 return hex(s, width, separator, true, new StringBuilder()).toString(); 1042 } 1043 1044 /** 1045 * Split a string into pieces based on the given divider character 1046 * @param s the string to split 1047 * @param divider the character on which to split. Occurrences of 1048 * this character are not included in the output 1049 * @param output an array to receive the substrings between 1050 * instances of divider. It must be large enough on entry to 1051 * accomodate all output. Adjacent instances of the divider 1052 * character will place empty strings into output. Before 1053 * returning, output is padded out with empty strings. 1054 */ 1055 public static void split(String s, char divider, String[] output) { 1056 int last = 0; 1057 int current = 0; 1058 int i; 1059 for (i = 0; i < s.length(); ++i) { 1060 if (s.charAt(i) == divider) { 1061 output[current++] = s.substring(last,i); 1062 last = i+1; 1063 } 1064 } 1065 output[current++] = s.substring(last,i); 1066 while (current < output.length) { 1067 output[current++] = ""; 1068 } 1069 } 1070 1071 /** 1072 * Split a string into pieces based on the given divider character 1073 * @param s the string to split 1074 * @param divider the character on which to split. Occurrences of 1075 * this character are not included in the output 1076 * @return output an array to receive the substrings between 1077 * instances of divider. Adjacent instances of the divider 1078 * character will place empty strings into output. 1079 */ 1080 public static String[] split(String s, char divider) { 1081 int last = 0; 1082 int i; 1083 ArrayList<String> output = new ArrayList<>(); 1084 for (i = 0; i < s.length(); ++i) { 1085 if (s.charAt(i) == divider) { 1086 output.add(s.substring(last,i)); 1087 last = i+1; 1088 } 1089 } 1090 output.add( s.substring(last,i)); 1091 return output.toArray(new String[output.size()]); 1092 } 1093 1094 /** 1095 * Look up a given string in a string array. Returns the index at 1096 * which the first occurrence of the string was found in the 1097 * array, or -1 if it was not found. 1098 * @param source the string to search for 1099 * @param target the array of zero or more strings in which to 1100 * look for source 1101 * @return the index of target at which source first occurs, or -1 1102 * if not found 1103 */ 1104 public static int lookup(String source, String[] target) { 1105 for (int i = 0; i < target.length; ++i) { 1106 if (source.equals(target[i])) return i; 1107 } 1108 return -1; 1109 } 1110 1111 /** 1112 * Parse a single non-whitespace character 'ch', optionally 1113 * preceded by whitespace. 1114 * @param id the string to be parsed 1115 * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the 1116 * offset of the first character to be parsed. On output, pos[0] 1117 * is the index after the last parsed character. If the parse 1118 * fails, pos[0] will be unchanged. 1119 * @param ch the non-whitespace character to be parsed. 1120 * @return true if 'ch' is seen preceded by zero or more 1121 * whitespace characters. 1122 */ 1123 public static boolean parseChar(String id, int[] pos, char ch) { 1124 int start = pos[0]; 1125 pos[0] = PatternProps.skipWhiteSpace(id, pos[0]); 1126 if (pos[0] == id.length() || 1127 id.charAt(pos[0]) != ch) { 1128 pos[0] = start; 1129 return false; 1130 } 1131 ++pos[0]; 1132 return true; 1133 } 1134 1135 /** 1136 * Parse a pattern string starting at offset pos. Keywords are 1137 * matched case-insensitively. Spaces may be skipped and may be 1138 * optional or required. Integer values may be parsed, and if 1139 * they are, they will be returned in the given array. If 1140 * successful, the offset of the next non-space character is 1141 * returned. On failure, -1 is returned. 1142 * @param pattern must only contain lowercase characters, which 1143 * will match their uppercase equivalents as well. A space 1144 * character matches one or more required spaces. A '~' character 1145 * matches zero or more optional spaces. A '#' character matches 1146 * an integer and stores it in parsedInts, which the caller must 1147 * ensure has enough capacity. 1148 * @param parsedInts array to receive parsed integers. Caller 1149 * must ensure that parsedInts.length is >= the number of '#' 1150 * signs in 'pattern'. 1151 * @return the position after the last character parsed, or -1 if 1152 * the parse failed 1153 */ 1154 @SuppressWarnings("fallthrough") 1155 public static int parsePattern(String rule, int pos, int limit, 1156 String pattern, int[] parsedInts) { 1157 // TODO Update this to handle surrogates 1158 int[] p = new int[1]; 1159 int intCount = 0; // number of integers parsed 1160 for (int i=0; i<pattern.length(); ++i) { 1161 char cpat = pattern.charAt(i); 1162 char c; 1163 switch (cpat) { 1164 case ' ': 1165 if (pos >= limit) { 1166 return -1; 1167 } 1168 c = rule.charAt(pos++); 1169 if (!PatternProps.isWhiteSpace(c)) { 1170 return -1; 1171 } 1172 // FALL THROUGH to skipWhitespace 1173 case '~': 1174 pos = PatternProps.skipWhiteSpace(rule, pos); 1175 break; 1176 case '#': 1177 p[0] = pos; 1178 parsedInts[intCount++] = parseInteger(rule, p, limit); 1179 if (p[0] == pos) { 1180 // Syntax error; failed to parse integer 1181 return -1; 1182 } 1183 pos = p[0]; 1184 break; 1185 default: 1186 if (pos >= limit) { 1187 return -1; 1188 } 1189 c = (char) UCharacter.toLowerCase(rule.charAt(pos++)); 1190 if (c != cpat) { 1191 return -1; 1192 } 1193 break; 1194 } 1195 } 1196 return pos; 1197 } 1198 1199 /** 1200 * Parse a pattern string within the given Replaceable and a parsing 1201 * pattern. Characters are matched literally and case-sensitively 1202 * except for the following special characters: 1203 * 1204 * ~ zero or more Pattern_White_Space chars 1205 * 1206 * If end of pattern is reached with all matches along the way, 1207 * pos is advanced to the first unparsed index and returned. 1208 * Otherwise -1 is returned. 1209 * @param pat pattern that controls parsing 1210 * @param text text to be parsed, starting at index 1211 * @param index offset to first character to parse 1212 * @param limit offset after last character to parse 1213 * @return index after last parsed character, or -1 on parse failure. 1214 */ 1215 public static int parsePattern(String pat, 1216 Replaceable text, 1217 int index, 1218 int limit) { 1219 int ipat = 0; 1220 1221 // empty pattern matches immediately 1222 if (ipat == pat.length()) { 1223 return index; 1224 } 1225 1226 int cpat = Character.codePointAt(pat, ipat); 1227 1228 while (index < limit) { 1229 int c = text.char32At(index); 1230 1231 // parse \s* 1232 if (cpat == '~') { 1233 if (PatternProps.isWhiteSpace(c)) { 1234 index += UTF16.getCharCount(c); 1235 continue; 1236 } else { 1237 if (++ipat == pat.length()) { 1238 return index; // success; c unparsed 1239 } 1240 // fall thru; process c again with next cpat 1241 } 1242 } 1243 1244 // parse literal 1245 else if (c == cpat) { 1246 int n = UTF16.getCharCount(c); 1247 index += n; 1248 ipat += n; 1249 if (ipat == pat.length()) { 1250 return index; // success; c parsed 1251 } 1252 // fall thru; get next cpat 1253 } 1254 1255 // match failure of literal 1256 else { 1257 return -1; 1258 } 1259 1260 cpat = UTF16.charAt(pat, ipat); 1261 } 1262 1263 return -1; // text ended before end of pat 1264 } 1265 1266 /** 1267 * Parse an integer at pos, either of the form \d+ or of the form 1268 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, 1269 * or octal format. 1270 * @param pos INPUT-OUTPUT parameter. On input, the first 1271 * character to parse. On output, the character after the last 1272 * parsed character. 1273 */ 1274 public static int parseInteger(String rule, int[] pos, int limit) { 1275 int count = 0; 1276 int value = 0; 1277 int p = pos[0]; 1278 int radix = 10; 1279 1280 if (rule.regionMatches(true, p, "0x", 0, 2)) { 1281 p += 2; 1282 radix = 16; 1283 } else if (p < limit && rule.charAt(p) == '0') { 1284 p++; 1285 count = 1; 1286 radix = 8; 1287 } 1288 1289 while (p < limit) { 1290 int d = UCharacter.digit(rule.charAt(p++), radix); 1291 if (d < 0) { 1292 --p; 1293 break; 1294 } 1295 ++count; 1296 int v = (value * radix) + d; 1297 if (v <= value) { 1298 // If there are too many input digits, at some point 1299 // the value will go negative, e.g., if we have seen 1300 // "0x8000000" already and there is another '0', when 1301 // we parse the next 0 the value will go negative. 1302 return 0; 1303 } 1304 value = v; 1305 } 1306 if (count > 0) { 1307 pos[0] = p; 1308 } 1309 return value; 1310 } 1311 1312 /** 1313 * Parse a Unicode identifier from the given string at the given 1314 * position. Return the identifier, or null if there is no 1315 * identifier. 1316 * @param str the string to parse 1317 * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the 1318 * first character to examine. It must be less than str.length(), 1319 * and it must not point to a whitespace character. That is, must 1320 * have pos[0] < str.length(). On 1321 * OUTPUT, the position after the last parsed character. 1322 * @return the Unicode identifier, or null if there is no valid 1323 * identifier at pos[0]. 1324 */ 1325 public static String parseUnicodeIdentifier(String str, int[] pos) { 1326 // assert(pos[0] < str.length()); 1327 StringBuilder buf = new StringBuilder(); 1328 int p = pos[0]; 1329 while (p < str.length()) { 1330 int ch = Character.codePointAt(str, p); 1331 if (buf.length() == 0) { 1332 if (UCharacter.isUnicodeIdentifierStart(ch)) { 1333 buf.appendCodePoint(ch); 1334 } else { 1335 return null; 1336 } 1337 } else { 1338 if (UCharacter.isUnicodeIdentifierPart(ch)) { 1339 buf.appendCodePoint(ch); 1340 } else { 1341 break; 1342 } 1343 } 1344 p += UTF16.getCharCount(ch); 1345 } 1346 pos[0] = p; 1347 return buf.toString(); 1348 } 1349 1350 static final char DIGITS[] = { 1351 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 1352 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 1353 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 1354 'U', 'V', 'W', 'X', 'Y', 'Z' 1355 }; 1356 1357 /** 1358 * Append the digits of a positive integer to the given 1359 * <code>Appendable</code> in the given radix. This is 1360 * done recursively since it is easiest to generate the low- 1361 * order digit first, but it must be appended last. 1362 * 1363 * @param result is the <code>Appendable</code> to append to 1364 * @param n is the positive integer 1365 * @param radix is the radix, from 2 to 36 inclusive 1366 * @param minDigits is the minimum number of digits to append. 1367 */ 1368 private static <T extends Appendable> void recursiveAppendNumber(T result, int n, 1369 int radix, int minDigits) 1370 { 1371 try { 1372 int digit = n % radix; 1373 1374 if (n >= radix || minDigits > 1) { 1375 recursiveAppendNumber(result, n / radix, radix, minDigits - 1); 1376 } 1377 result.append(DIGITS[digit]); 1378 } catch (IOException e) { 1379 throw new IllegalIcuArgumentException(e); 1380 } 1381 } 1382 1383 /** 1384 * Append a number to the given Appendable in the given radix. 1385 * Standard digits '0'-'9' are used and letters 'A'-'Z' for 1386 * radices 11 through 36. 1387 * @param result the digits of the number are appended here 1388 * @param n the number to be converted to digits; may be negative. 1389 * If negative, a '-' is prepended to the digits. 1390 * @param radix a radix from 2 to 36 inclusive. 1391 * @param minDigits the minimum number of digits, not including 1392 * any '-', to produce. Values less than 2 have no effect. One 1393 * digit is always emitted regardless of this parameter. 1394 * @return a reference to result 1395 */ 1396 public static <T extends Appendable> T appendNumber(T result, int n, 1397 int radix, int minDigits) 1398 { 1399 try { 1400 if (radix < 2 || radix > 36) { 1401 throw new IllegalArgumentException("Illegal radix " + radix); 1402 } 1403 1404 1405 int abs = n; 1406 1407 if (n < 0) { 1408 abs = -n; 1409 result.append("-"); 1410 } 1411 1412 recursiveAppendNumber(result, abs, radix, minDigits); 1413 1414 return result; 1415 } catch (IOException e) { 1416 throw new IllegalIcuArgumentException(e); 1417 } 1418 1419 } 1420 1421 /** 1422 * Parse an unsigned 31-bit integer at the given offset. Use 1423 * UCharacter.digit() to parse individual characters into digits. 1424 * @param text the text to be parsed 1425 * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the 1426 * offset within text at which to start parsing; it should point 1427 * to a valid digit. On exit, pos[0] is the offset after the last 1428 * parsed character. If the parse failed, it will be unchanged on 1429 * exit. Must be >= 0 on entry. 1430 * @param radix the radix in which to parse; must be >= 2 and <= 1431 * 36. 1432 * @return a non-negative parsed number, or -1 upon parse failure. 1433 * Parse fails if there are no digits, that is, if pos[0] does not 1434 * point to a valid digit on entry, or if the number to be parsed 1435 * does not fit into a 31-bit unsigned integer. 1436 */ 1437 public static int parseNumber(String text, int[] pos, int radix) { 1438 // assert(pos[0] >= 0); 1439 // assert(radix >= 2); 1440 // assert(radix <= 36); 1441 int n = 0; 1442 int p = pos[0]; 1443 while (p < text.length()) { 1444 int ch = Character.codePointAt(text, p); 1445 int d = UCharacter.digit(ch, radix); 1446 if (d < 0) { 1447 break; 1448 } 1449 n = radix*n + d; 1450 // ASSUME that when a 32-bit integer overflows it becomes 1451 // negative. E.g., 214748364 * 10 + 8 => negative value. 1452 if (n < 0) { 1453 return -1; 1454 } 1455 ++p; 1456 } 1457 if (p == pos[0]) { 1458 return -1; 1459 } 1460 pos[0] = p; 1461 return n; 1462 } 1463 1464 /** 1465 * Return true if the character is NOT printable ASCII. The tab, 1466 * newline and linefeed characters are considered unprintable. 1467 */ 1468 public static boolean isUnprintable(int c) { 1469 //0x20 = 32 and 0x7E = 126 1470 return !(c >= 0x20 && c <= 0x7E); 1471 } 1472 1473 /** 1474 * Escape unprintable characters using <backslash>uxxxx notation 1475 * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and 1476 * above. If the character is printable ASCII, then do nothing 1477 * and return FALSE. Otherwise, append the escaped notation and 1478 * return TRUE. 1479 */ 1480 public static <T extends Appendable> boolean escapeUnprintable(T result, int c) { 1481 try { 1482 if (isUnprintable(c)) { 1483 result.append('\\'); 1484 if ((c & ~0xFFFF) != 0) { 1485 result.append('U'); 1486 result.append(DIGITS[0xF&(c>>28)]); 1487 result.append(DIGITS[0xF&(c>>24)]); 1488 result.append(DIGITS[0xF&(c>>20)]); 1489 result.append(DIGITS[0xF&(c>>16)]); 1490 } else { 1491 result.append('u'); 1492 } 1493 result.append(DIGITS[0xF&(c>>12)]); 1494 result.append(DIGITS[0xF&(c>>8)]); 1495 result.append(DIGITS[0xF&(c>>4)]); 1496 result.append(DIGITS[0xF&c]); 1497 return true; 1498 } 1499 return false; 1500 } catch (IOException e) { 1501 throw new IllegalIcuArgumentException(e); 1502 } 1503 } 1504 1505 /** 1506 * Returns the index of the first character in a set, ignoring quoted text. 1507 * For example, in the string "abc'hide'h", the 'h' in "hide" will not be 1508 * found by a search for "h". Unlike String.indexOf(), this method searches 1509 * not for a single character, but for any character of the string 1510 * <code>setOfChars</code>. 1511 * @param text text to be searched 1512 * @param start the beginning index, inclusive; <code>0 <= start 1513 * <= limit</code>. 1514 * @param limit the ending index, exclusive; <code>start <= limit 1515 * <= text.length()</code>. 1516 * @param setOfChars string with one or more distinct characters 1517 * @return Offset of the first character in <code>setOfChars</code> 1518 * found, or -1 if not found. 1519 * @see String#indexOf 1520 */ 1521 public static int quotedIndexOf(String text, int start, int limit, 1522 String setOfChars) { 1523 for (int i=start; i<limit; ++i) { 1524 char c = text.charAt(i); 1525 if (c == BACKSLASH) { 1526 ++i; 1527 } else if (c == APOSTROPHE) { 1528 while (++i < limit 1529 && text.charAt(i) != APOSTROPHE) {} 1530 } else if (setOfChars.indexOf(c) >= 0) { 1531 return i; 1532 } 1533 } 1534 return -1; 1535 } 1536 1537 /** 1538 * Append a character to a rule that is being built up. To flush 1539 * the quoteBuf to rule, make one final call with isLiteral == true. 1540 * If there is no final character, pass in (int)-1 as c. 1541 * @param rule the string to append the character to 1542 * @param c the character to append, or (int)-1 if none. 1543 * @param isLiteral if true, then the given character should not be 1544 * quoted or escaped. Usually this means it is a syntactic element 1545 * such as > or $ 1546 * @param escapeUnprintable if true, then unprintable characters 1547 * should be escaped using escapeUnprintable(). These escapes will 1548 * appear outside of quotes. 1549 * @param quoteBuf a buffer which is used to build up quoted 1550 * substrings. The caller should initially supply an empty buffer, 1551 * and thereafter should not modify the buffer. The buffer should be 1552 * cleared out by, at the end, calling this method with a literal 1553 * character (which may be -1). 1554 */ 1555 public static void appendToRule(StringBuffer rule, 1556 int c, 1557 boolean isLiteral, 1558 boolean escapeUnprintable, 1559 StringBuffer quoteBuf) { 1560 // If we are escaping unprintables, then escape them outside 1561 // quotes. \\u and \\U are not recognized within quotes. The same 1562 // logic applies to literals, but literals are never escaped. 1563 if (isLiteral || 1564 (escapeUnprintable && Utility.isUnprintable(c))) { 1565 if (quoteBuf.length() > 0) { 1566 // We prefer backslash APOSTROPHE to double APOSTROPHE 1567 // (more readable, less similar to ") so if there are 1568 // double APOSTROPHEs at the ends, we pull them outside 1569 // of the quote. 1570 1571 // If the first thing in the quoteBuf is APOSTROPHE 1572 // (doubled) then pull it out. 1573 while (quoteBuf.length() >= 2 && 1574 quoteBuf.charAt(0) == APOSTROPHE && 1575 quoteBuf.charAt(1) == APOSTROPHE) { 1576 rule.append(BACKSLASH).append(APOSTROPHE); 1577 quoteBuf.delete(0, 2); 1578 } 1579 // If the last thing in the quoteBuf is APOSTROPHE 1580 // (doubled) then remove and count it and add it after. 1581 int trailingCount = 0; 1582 while (quoteBuf.length() >= 2 && 1583 quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && 1584 quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { 1585 quoteBuf.setLength(quoteBuf.length()-2); 1586 ++trailingCount; 1587 } 1588 if (quoteBuf.length() > 0) { 1589 rule.append(APOSTROPHE); 1590 rule.append(quoteBuf); 1591 rule.append(APOSTROPHE); 1592 quoteBuf.setLength(0); 1593 } 1594 while (trailingCount-- > 0) { 1595 rule.append(BACKSLASH).append(APOSTROPHE); 1596 } 1597 } 1598 if (c != -1) { 1599 /* Since spaces are ignored during parsing, they are 1600 * emitted only for readability. We emit one here 1601 * only if there isn't already one at the end of the 1602 * rule. 1603 */ 1604 if (c == ' ') { 1605 int len = rule.length(); 1606 if (len > 0 && rule.charAt(len-1) != ' ') { 1607 rule.append(' '); 1608 } 1609 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) { 1610 rule.appendCodePoint(c); 1611 } 1612 } 1613 } 1614 1615 // Escape ' and '\' and don't begin a quote just for them 1616 else if (quoteBuf.length() == 0 && 1617 (c == APOSTROPHE || c == BACKSLASH)) { 1618 rule.append(BACKSLASH).append((char)c); 1619 } 1620 1621 // Specials (printable ascii that isn't [0-9a-zA-Z]) and 1622 // whitespace need quoting. Also append stuff to quotes if we are 1623 // building up a quoted substring already. 1624 else if (quoteBuf.length() > 0 || 1625 (c >= 0x0021 && c <= 0x007E && 1626 !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || 1627 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || 1628 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || 1629 PatternProps.isWhiteSpace(c)) { 1630 quoteBuf.appendCodePoint(c); 1631 // Double ' within a quote 1632 if (c == APOSTROPHE) { 1633 quoteBuf.append((char)c); 1634 } 1635 } 1636 1637 // Otherwise just append 1638 else { 1639 rule.appendCodePoint(c); 1640 } 1641 } 1642 1643 /** 1644 * Append the given string to the rule. Calls the single-character 1645 * version of appendToRule for each character. 1646 */ 1647 public static void appendToRule(StringBuffer rule, 1648 String text, 1649 boolean isLiteral, 1650 boolean escapeUnprintable, 1651 StringBuffer quoteBuf) { 1652 for (int i=0; i<text.length(); ++i) { 1653 // Okay to process in 16-bit code units here 1654 appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf); 1655 } 1656 } 1657 1658 /** 1659 * Given a matcher reference, which may be null, append its 1660 * pattern as a literal to the given rule. 1661 */ 1662 public static void appendToRule(StringBuffer rule, 1663 UnicodeMatcher matcher, 1664 boolean escapeUnprintable, 1665 StringBuffer quoteBuf) { 1666 if (matcher != null) { 1667 appendToRule(rule, matcher.toPattern(escapeUnprintable), 1668 true, escapeUnprintable, quoteBuf); 1669 } 1670 } 1671 1672 /** 1673 * Compares 2 unsigned integers 1674 * @param source 32 bit unsigned integer 1675 * @param target 32 bit unsigned integer 1676 * @return 0 if equals, 1 if source is greater than target and -1 1677 * otherwise 1678 */ 1679 public static final int compareUnsigned(int source, int target) 1680 { 1681 source += MAGIC_UNSIGNED; 1682 target += MAGIC_UNSIGNED; 1683 if (source < target) { 1684 return -1; 1685 } 1686 else if (source > target) { 1687 return 1; 1688 } 1689 return 0; 1690 } 1691 1692 /** 1693 * Find the highest bit in a positive integer. This is done 1694 * by doing a binary search through the bits. 1695 * 1696 * @param n is the integer 1697 * 1698 * @return the bit number of the highest bit, with 0 being 1699 * the low order bit, or -1 if <code>n</code> is not positive 1700 */ 1701 public static final byte highBit(int n) 1702 { 1703 if (n <= 0) { 1704 return -1; 1705 } 1706 1707 byte bit = 0; 1708 1709 if (n >= 1 << 16) { 1710 n >>= 16; 1711 bit += 16; 1712 } 1713 1714 if (n >= 1 << 8) { 1715 n >>= 8; 1716 bit += 8; 1717 } 1718 1719 if (n >= 1 << 4) { 1720 n >>= 4; 1721 bit += 4; 1722 } 1723 1724 if (n >= 1 << 2) { 1725 n >>= 2; 1726 bit += 2; 1727 } 1728 1729 if (n >= 1 << 1) { 1730 n >>= 1; 1731 bit += 1; 1732 } 1733 1734 return bit; 1735 } 1736 /** 1737 * Utility method to take a int[] containing codepoints and return 1738 * a string representation with code units. 1739 */ 1740 public static String valueOf(int[]source){ 1741 // TODO: Investigate why this method is not on UTF16 class 1742 StringBuilder result = new StringBuilder(source.length); 1743 for(int i=0; i<source.length; i++){ 1744 result.appendCodePoint(source[i]); 1745 } 1746 return result.toString(); 1747 } 1748 1749 1750 /** 1751 * Utility to duplicate a string count times 1752 * @param s String to be duplicated. 1753 * @param count Number of times to duplicate a string. 1754 */ 1755 public static String repeat(String s, int count) { 1756 if (count <= 0) return ""; 1757 if (count == 1) return s; 1758 StringBuilder result = new StringBuilder(); 1759 for (int i = 0; i < count; ++i) { 1760 result.append(s); 1761 } 1762 return result.toString(); 1763 } 1764 1765 public static String[] splitString(String src, String target) { 1766 return src.split("\\Q" + target + "\\E"); 1767 } 1768 1769 /** 1770 * Split the string at runs of ascii whitespace characters. 1771 */ 1772 public static String[] splitWhitespace(String src) { 1773 return src.split("\\s+"); 1774 } 1775 1776 /** 1777 * Parse a list of hex numbers and return a string 1778 * @param string String of hex numbers. 1779 * @param minLength Minimal length. 1780 * @param separator Separator. 1781 * @return A string from hex numbers. 1782 */ 1783 public static String fromHex(String string, int minLength, String separator) { 1784 return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+")); 1785 } 1786 1787 /** 1788 * Parse a list of hex numbers and return a string 1789 * @param string String of hex numbers. 1790 * @param minLength Minimal length. 1791 * @param separator Separator. 1792 * @return A string from hex numbers. 1793 */ 1794 public static String fromHex(String string, int minLength, Pattern separator) { 1795 StringBuilder buffer = new StringBuilder(); 1796 String[] parts = separator.split(string); 1797 for (String part : parts) { 1798 if (part.length() < minLength) { 1799 throw new IllegalArgumentException("code point too short: " + part); 1800 } 1801 int cp = Integer.parseInt(part, 16); 1802 buffer.appendCodePoint(cp); 1803 } 1804 return buffer.toString(); 1805 } 1806 1807 /** 1808 * This implementation is equivalent to Java 8+ Math#addExact(int, int) 1809 * @param x the first value 1810 * @param y the second value 1811 * @return the result 1812 */ 1813 public static int addExact(int x, int y) { 1814 int r = x + y; 1815 // HD 2-12 Overflow iff both arguments have the opposite sign of the result 1816 if (((x ^ r) & (y ^ r)) < 0) { 1817 throw new ArithmeticException("integer overflow"); 1818 } 1819 return r; 1820 } 1821 1822 /** 1823 * Returns whether the chars in the two CharSequences are equal. 1824 */ 1825 public static boolean charSequenceEquals(CharSequence a, CharSequence b) { 1826 if (a == b) { 1827 return true; 1828 } 1829 if (a == null || b == null) { 1830 return false; 1831 } 1832 if (a.length() != b.length()) { 1833 return false; 1834 } 1835 for (int i = 0; i < a.length(); i++) { 1836 if (a.charAt(i) != b.charAt(i)) 1837 return false; 1838 } 1839 return true; 1840 } 1841 1842 /** 1843 * Returns a hash code for a CharSequence that is equivalent to calling 1844 * charSequence.toString().hashCode() 1845 */ 1846 public static int charSequenceHashCode(CharSequence value) { 1847 int hash = 0; 1848 for (int i = 0; i < value.length(); i++) { 1849 hash = hash * 31 + value.charAt(i); 1850 } 1851 return hash; 1852 } 1853 1854 /** 1855 * Appends a CharSequence to an Appendable, converting IOException to ICUUncheckedIOException. 1856 */ 1857 public static <A extends Appendable> A appendTo(CharSequence string, A appendable) { 1858 try { 1859 appendable.append(string); 1860 return appendable; 1861 } catch (IOException e) { 1862 throw new ICUUncheckedIOException(e); 1863 } 1864 } 1865 } 1866