• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2015, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.impl;
11 
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.Locale;
15 import java.util.regex.Pattern;
16 
17 import ohos.global.icu.lang.UCharacter;
18 import ohos.global.icu.text.Replaceable;
19 import ohos.global.icu.text.UTF16;
20 import ohos.global.icu.text.UnicodeMatcher;
21 import ohos.global.icu.util.ICUUncheckedIOException;
22 
23 /**
24  * @hide exposed on OHOS
25  */
26 public final class Utility {
27 
28     private static final char APOSTROPHE = '\'';
29     private static final char BACKSLASH  = '\\';
30     private static final int MAGIC_UNSIGNED = 0x80000000;
31 
32     /**
33      * Convenience utility to compare two Object[]s.
34      * Ought to be in System
35      */
arrayEquals(Object[] source, Object target)36     public final static boolean arrayEquals(Object[] source, Object target) {
37         if (source == null) return (target == null);
38         if (!(target instanceof Object[])) return false;
39         Object[] targ = (Object[]) target;
40         return (source.length == targ.length
41                 && arrayRegionMatches(source, 0, targ, 0, source.length));
42     }
43 
44     /**
45      * Convenience utility to compare two int[]s
46      * Ought to be in System
47      */
arrayEquals(int[] source, Object target)48     public final static boolean arrayEquals(int[] source, Object target) {
49         if (source == null) return (target == null);
50         if (!(target instanceof int[])) return false;
51         int[] targ = (int[]) target;
52         return (source.length == targ.length
53                 && arrayRegionMatches(source, 0, targ, 0, source.length));
54     }
55 
56     /**
57      * Convenience utility to compare two double[]s
58      * Ought to be in System
59      */
arrayEquals(double[] source, Object target)60     public final static boolean arrayEquals(double[] source, Object target) {
61         if (source == null) return (target == null);
62         if (!(target instanceof double[])) return false;
63         double[] targ = (double[]) target;
64         return (source.length == targ.length
65                 && arrayRegionMatches(source, 0, targ, 0, source.length));
66     }
arrayEquals(byte[] source, Object target)67     public final static boolean arrayEquals(byte[] source, Object target) {
68         if (source == null) return (target == null);
69         if (!(target instanceof byte[])) return false;
70         byte[] targ = (byte[]) target;
71         return (source.length == targ.length
72                 && arrayRegionMatches(source, 0, targ, 0, source.length));
73     }
74 
75     /**
76      * Convenience utility to compare two Object[]s
77      * Ought to be in System
78      */
arrayEquals(Object source, Object target)79     public final static boolean arrayEquals(Object source, Object target) {
80         if (source == null) return (target == null);
81         // for some reason, the correct arrayEquals is not being called
82         // so do it by hand for now.
83         if (source instanceof Object[])
84             return(arrayEquals((Object[]) source,target));
85         if (source instanceof int[])
86             return(arrayEquals((int[]) source,target));
87         if (source instanceof double[])
88             return(arrayEquals((double[]) source, target));
89         if (source instanceof byte[])
90             return(arrayEquals((byte[]) source,target));
91         return source.equals(target);
92     }
93 
94     /**
95      * Convenience utility to compare two Object[]s
96      * Ought to be in System.
97      * @param len the length to compare.
98      * The start indices and start+len must be valid.
99      */
arrayRegionMatches(Object[] source, int sourceStart, Object[] target, int targetStart, int len)100     public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
101             Object[] target, int targetStart,
102             int len)
103     {
104         int sourceEnd = sourceStart + len;
105         int delta = targetStart - sourceStart;
106         for (int i = sourceStart; i < sourceEnd; i++) {
107             if (!arrayEquals(source[i],target[i + delta]))
108                 return false;
109         }
110         return true;
111     }
112 
113     /**
114      * Convenience utility to compare two Object[]s
115      * Ought to be in System.
116      * @param len the length to compare.
117      * The start indices and start+len must be valid.
118      */
arrayRegionMatches(char[] source, int sourceStart, char[] target, int targetStart, int len)119     public final static boolean arrayRegionMatches(char[] source, int sourceStart,
120             char[] target, int targetStart,
121             int len)
122     {
123         int sourceEnd = sourceStart + len;
124         int delta = targetStart - sourceStart;
125         for (int i = sourceStart; i < sourceEnd; i++) {
126             if (source[i]!=target[i + delta])
127                 return false;
128         }
129         return true;
130     }
131 
132     /**
133      * Convenience utility to compare two int[]s.
134      * @param len the length to compare.
135      * The start indices and start+len must be valid.
136      * Ought to be in System
137      */
arrayRegionMatches(int[] source, int sourceStart, int[] target, int targetStart, int len)138     public final static boolean arrayRegionMatches(int[] source, int sourceStart,
139             int[] target, int targetStart,
140             int len)
141     {
142         int sourceEnd = sourceStart + len;
143         int delta = targetStart - sourceStart;
144         for (int i = sourceStart; i < sourceEnd; i++) {
145             if (source[i] != target[i + delta])
146                 return false;
147         }
148         return true;
149     }
150 
151     /**
152      * Convenience utility to compare two arrays of doubles.
153      * @param len the length to compare.
154      * The start indices and start+len must be valid.
155      * Ought to be in System
156      */
arrayRegionMatches(double[] source, int sourceStart, double[] target, int targetStart, int len)157     public final static boolean arrayRegionMatches(double[] source, int sourceStart,
158             double[] target, int targetStart,
159             int len)
160     {
161         int sourceEnd = sourceStart + len;
162         int delta = targetStart - sourceStart;
163         for (int i = sourceStart; i < sourceEnd; i++) {
164             if (source[i] != target[i + delta])
165                 return false;
166         }
167         return true;
168     }
arrayRegionMatches(byte[] source, int sourceStart, byte[] target, int targetStart, int len)169     public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
170             byte[] target, int targetStart, int len){
171         int sourceEnd = sourceStart + len;
172         int delta = targetStart - sourceStart;
173         for (int i = sourceStart; i < sourceEnd; i++) {
174             if (source[i] != target[i + delta])
175                 return false;
176         }
177         return true;
178     }
179 
180     /**
181      * Trivial reference equality.
182      * This method should help document that we really want == not equals(),
183      * and to have a single place to suppress warnings from static analysis tools.
184      */
sameObjects(Object a, Object b)185     public static final boolean sameObjects(Object a, Object b) {
186         return a == b;
187     }
188 
189     /**
190      * Convenience utility. Does null checks on objects, then calls compare.
191      */
checkCompare(T a, T b)192     public static <T extends Comparable<T>> int checkCompare(T a, T b) {
193         return a == null ?
194                 b == null ? 0 : -1 :
195                     b == null ? 1 : a.compareTo(b);
196       }
197 
198     /**
199      * Convenience utility. Does null checks on object, then calls hashCode.
200      */
checkHash(Object a)201     public static int checkHash(Object a) {
202         return a == null ? 0 : a.hashCode();
203       }
204 
205     /**
206      * The ESCAPE character is used during run-length encoding.  It signals
207      * a run of identical chars.
208      */
209     private static final char ESCAPE = '\uA5A5';
210 
211     /**
212      * The ESCAPE_BYTE character is used during run-length encoding.  It signals
213      * a run of identical bytes.
214      */
215     static final byte ESCAPE_BYTE = (byte)0xA5;
216 
217     /**
218      * Construct a string representing an int array.  Use run-length encoding.
219      * A character represents itself, unless it is the ESCAPE character.  Then
220      * the following notations are possible:
221      *   ESCAPE ESCAPE   ESCAPE literal
222      *   ESCAPE n c      n instances of character c
223      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
224      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
225      * If we encounter a run where n == ESCAPE, we represent this as:
226      *   c ESCAPE n-1 c
227      * The ESCAPE value is chosen so as not to collide with commonly
228      * seen values.
229      */
arrayToRLEString(int[] a)230     static public final String arrayToRLEString(int[] a) {
231         StringBuilder buffer = new StringBuilder();
232 
233         appendInt(buffer, a.length);
234         int runValue = a[0];
235         int runLength = 1;
236         for (int i=1; i<a.length; ++i) {
237             int s = a[i];
238             if (s == runValue && runLength < 0xFFFF) {
239                 ++runLength;
240             } else {
241                 encodeRun(buffer, runValue, runLength);
242                 runValue = s;
243                 runLength = 1;
244             }
245         }
246         encodeRun(buffer, runValue, runLength);
247         return buffer.toString();
248     }
249 
250     /**
251      * Construct a string representing a short array.  Use run-length encoding.
252      * A character represents itself, unless it is the ESCAPE character.  Then
253      * the following notations are possible:
254      *   ESCAPE ESCAPE   ESCAPE literal
255      *   ESCAPE n c      n instances of character c
256      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
257      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
258      * If we encounter a run where n == ESCAPE, we represent this as:
259      *   c ESCAPE n-1 c
260      * The ESCAPE value is chosen so as not to collide with commonly
261      * seen values.
262      */
arrayToRLEString(short[] a)263     static public final String arrayToRLEString(short[] a) {
264         StringBuilder buffer = new StringBuilder();
265         // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
266         buffer.append((char) (a.length >> 16));
267         buffer.append((char) a.length);
268         short runValue = a[0];
269         int runLength = 1;
270         for (int i=1; i<a.length; ++i) {
271             short s = a[i];
272             if (s == runValue && runLength < 0xFFFF) ++runLength;
273             else {
274                 encodeRun(buffer, runValue, runLength);
275                 runValue = s;
276                 runLength = 1;
277             }
278         }
279         encodeRun(buffer, runValue, runLength);
280         return buffer.toString();
281     }
282 
283     /**
284      * Construct a string representing a char array.  Use run-length encoding.
285      * A character represents itself, unless it is the ESCAPE character.  Then
286      * the following notations are possible:
287      *   ESCAPE ESCAPE   ESCAPE literal
288      *   ESCAPE n c      n instances of character c
289      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
290      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
291      * If we encounter a run where n == ESCAPE, we represent this as:
292      *   c ESCAPE n-1 c
293      * The ESCAPE value is chosen so as not to collide with commonly
294      * seen values.
295      */
arrayToRLEString(char[] a)296     static public final String arrayToRLEString(char[] a) {
297         StringBuilder buffer = new StringBuilder();
298         buffer.append((char) (a.length >> 16));
299         buffer.append((char) a.length);
300         char runValue = a[0];
301         int runLength = 1;
302         for (int i=1; i<a.length; ++i) {
303             char s = a[i];
304             if (s == runValue && runLength < 0xFFFF) ++runLength;
305             else {
306                 encodeRun(buffer, (short)runValue, runLength);
307                 runValue = s;
308                 runLength = 1;
309             }
310         }
311         encodeRun(buffer, (short)runValue, runLength);
312         return buffer.toString();
313     }
314 
315     /**
316      * Construct a string representing a byte array.  Use run-length encoding.
317      * Two bytes are packed into a single char, with a single extra zero byte at
318      * the end if needed.  A byte represents itself, unless it is the
319      * ESCAPE_BYTE.  Then the following notations are possible:
320      *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
321      *   ESCAPE_BYTE n b           n instances of byte b
322      * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
323      * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
324      * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
325      *   b ESCAPE_BYTE n-1 b
326      * The ESCAPE_BYTE value is chosen so as not to collide with commonly
327      * seen values.
328      */
arrayToRLEString(byte[] a)329     static public final String arrayToRLEString(byte[] a) {
330         StringBuilder buffer = new StringBuilder();
331         buffer.append((char) (a.length >> 16));
332         buffer.append((char) a.length);
333         byte runValue = a[0];
334         int runLength = 1;
335         byte[] state = new byte[2];
336         for (int i=1; i<a.length; ++i) {
337             byte b = a[i];
338             if (b == runValue && runLength < 0xFF) ++runLength;
339             else {
340                 encodeRun(buffer, runValue, runLength, state);
341                 runValue = b;
342                 runLength = 1;
343             }
344         }
345         encodeRun(buffer, runValue, runLength, state);
346 
347         // We must save the final byte, if there is one, by padding
348         // an extra zero.
349         if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
350 
351         return buffer.toString();
352     }
353 
354     /**
355      * Encode a run, possibly a degenerate run (of < 4 values).
356      * @param length The length of the run; must be > 0 && <= 0xFFFF.
357      */
encodeRun(T buffer, int value, int length)358     private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
359         if (length < 4) {
360             for (int j=0; j<length; ++j) {
361                 if (value == ESCAPE) {
362                     appendInt(buffer, value);
363                 }
364                 appendInt(buffer, value);
365             }
366         }
367         else {
368             if (length == ESCAPE) {
369                 if (value == ESCAPE) {
370                     appendInt(buffer, ESCAPE);
371                 }
372                 appendInt(buffer, value);
373                 --length;
374             }
375             appendInt(buffer, ESCAPE);
376             appendInt(buffer, length);
377             appendInt(buffer, value); // Don't need to escape this value
378         }
379     }
380 
appendInt(T buffer, int value)381     private static final <T extends Appendable> void appendInt(T buffer, int value) {
382         try {
383             buffer.append((char)(value >>> 16));
384             buffer.append((char)(value & 0xFFFF));
385         } catch (IOException e) {
386             throw new IllegalIcuArgumentException(e);
387         }
388     }
389 
390     /**
391      * Encode a run, possibly a degenerate run (of < 4 values).
392      * @param length The length of the run; must be > 0 && <= 0xFFFF.
393      */
encodeRun(T buffer, short value, int length)394     private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
395         try {
396             char valueChar = (char) value;
397             if (length < 4) {
398                 for (int j=0; j<length; ++j) {
399                     if (valueChar == ESCAPE) {
400                         buffer.append(ESCAPE);
401                     }
402                     buffer.append(valueChar);
403                 }
404             }
405             else {
406                 if (length == ESCAPE) {
407                     if (valueChar == ESCAPE) {
408                         buffer.append(ESCAPE);
409                     }
410                     buffer.append(valueChar);
411                     --length;
412                 }
413                 buffer.append(ESCAPE);
414                 buffer.append((char) length);
415                 buffer.append(valueChar); // Don't need to escape this value
416             }
417         } catch (IOException e) {
418             throw new IllegalIcuArgumentException(e);
419         }
420     }
421 
422     /**
423      * Encode a run, possibly a degenerate run (of < 4 values).
424      * @param length The length of the run; must be > 0 && <= 0xFF.
425      */
encodeRun(T buffer, byte value, int length, byte[] state)426     private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
427             byte[] state) {
428         if (length < 4) {
429             for (int j=0; j<length; ++j) {
430                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
431                 appendEncodedByte(buffer, value, state);
432             }
433         }
434         else {
435             if ((byte)length == ESCAPE_BYTE) {
436                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
437                 appendEncodedByte(buffer, value, state);
438                 --length;
439             }
440             appendEncodedByte(buffer, ESCAPE_BYTE, state);
441             appendEncodedByte(buffer, (byte)length, state);
442             appendEncodedByte(buffer, value, state); // Don't need to escape this value
443         }
444     }
445 
446     /**
447      * Append a byte to the given Appendable, packing two bytes into each
448      * character.  The state parameter maintains intermediary data between
449      * calls.
450      * @param state A two-element array, with state[0] == 0 if this is the
451      * first byte of a pair, or state[0] != 0 if this is the second byte
452      * of a pair, in which case state[1] is the first byte.
453      */
appendEncodedByte(T buffer, byte value, byte[] state)454     private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
455             byte[] state) {
456         try {
457             if (state[0] != 0) {
458                 char c = (char) ((state[1] << 8) | ((value) & 0xFF));
459                 buffer.append(c);
460                 state[0] = 0;
461             }
462             else {
463                 state[0] = 1;
464                 state[1] = value;
465             }
466         } catch (IOException e) {
467             throw new IllegalIcuArgumentException(e);
468         }
469     }
470 
471     /**
472      * Construct an array of ints from a run-length encoded string.
473      */
RLEStringToIntArray(String s)474     static public final int[] RLEStringToIntArray(String s) {
475         int length = getInt(s, 0);
476         int[] array = new int[length];
477         int ai = 0, i = 1;
478 
479         int maxI = s.length() / 2;
480         while (ai < length && i < maxI) {
481             int c = getInt(s, i++);
482 
483             if (c == ESCAPE) {
484                 c = getInt(s, i++);
485                 if (c == ESCAPE) {
486                     array[ai++] = c;
487                 } else {
488                     int runLength = c;
489                     int runValue = getInt(s, i++);
490                     for (int j=0; j<runLength; ++j) {
491                         array[ai++] = runValue;
492                     }
493                 }
494             }
495             else {
496                 array[ai++] = c;
497             }
498         }
499 
500         if (ai != length || i != maxI) {
501             throw new IllegalStateException("Bad run-length encoded int array");
502         }
503 
504         return array;
505     }
getInt(String s, int i)506     static final int getInt(String s, int i) {
507         return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1);
508     }
509 
510     /**
511      * Construct an array of shorts from a run-length encoded string.
512      */
RLEStringToShortArray(String s)513     static public final short[] RLEStringToShortArray(String s) {
514         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
515         short[] array = new short[length];
516         int ai = 0;
517         for (int i=2; i<s.length(); ++i) {
518             char c = s.charAt(i);
519             if (c == ESCAPE) {
520                 c = s.charAt(++i);
521                 if (c == ESCAPE) {
522                     array[ai++] = (short) c;
523                 } else {
524                     int runLength = c;
525                     short runValue = (short) s.charAt(++i);
526                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
527                 }
528             }
529             else {
530                 array[ai++] = (short) c;
531             }
532         }
533 
534         if (ai != length)
535             throw new IllegalStateException("Bad run-length encoded short array");
536 
537         return array;
538     }
539 
540     /**
541      * Construct an array of shorts from a run-length encoded string.
542      */
RLEStringToCharArray(String s)543     static public final char[] RLEStringToCharArray(String s) {
544         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
545         char[] array = new char[length];
546         int ai = 0;
547         for (int i=2; i<s.length(); ++i) {
548             char c = s.charAt(i);
549             if (c == ESCAPE) {
550                 c = s.charAt(++i);
551                 if (c == ESCAPE) {
552                     array[ai++] = c;
553                 } else {
554                     int runLength = c;
555                     char runValue = s.charAt(++i);
556                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
557                 }
558             }
559             else {
560                 array[ai++] = c;
561             }
562         }
563 
564         if (ai != length)
565             throw new IllegalStateException("Bad run-length encoded short array");
566 
567         return array;
568     }
569 
570     /**
571      * Construct an array of bytes from a run-length encoded string.
572      */
RLEStringToByteArray(String s)573     static public final byte[] RLEStringToByteArray(String s) {
574         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
575         byte[] array = new byte[length];
576         boolean nextChar = true;
577         char c = 0;
578         int node = 0;
579         int runLength = 0;
580         int i = 2;
581         for (int ai=0; ai<length; ) {
582             // This part of the loop places the next byte into the local
583             // variable 'b' each time through the loop.  It keeps the
584             // current character in 'c' and uses the boolean 'nextChar'
585             // to see if we've taken both bytes out of 'c' yet.
586             byte b;
587             if (nextChar) {
588                 c = s.charAt(i++);
589                 b = (byte) (c >> 8);
590                 nextChar = false;
591             }
592             else {
593                 b = (byte) (c & 0xFF);
594                 nextChar = true;
595             }
596 
597             // This part of the loop is a tiny state machine which handles
598             // the parsing of the run-length encoding.  This would be simpler
599             // if we could look ahead, but we can't, so we use 'node' to
600             // move between three nodes in the state machine.
601             switch (node) {
602             case 0:
603                 // Normal idle node
604                 if (b == ESCAPE_BYTE) {
605                     node = 1;
606                 }
607                 else {
608                     array[ai++] = b;
609                 }
610                 break;
611             case 1:
612                 // We have seen one ESCAPE_BYTE; we expect either a second
613                 // one, or a run length and value.
614                 if (b == ESCAPE_BYTE) {
615                     array[ai++] = ESCAPE_BYTE;
616                     node = 0;
617                 }
618                 else {
619                     runLength = b;
620                     // Interpret signed byte as unsigned
621                     if (runLength < 0) runLength += 0x100;
622                     node = 2;
623                 }
624                 break;
625             case 2:
626                 // We have seen an ESCAPE_BYTE and length byte.  We interpret
627                 // the next byte as the value to be repeated.
628                 for (int j=0; j<runLength; ++j) array[ai++] = b;
629                 node = 0;
630                 break;
631             }
632         }
633 
634         if (node != 0)
635             throw new IllegalStateException("Bad run-length encoded byte array");
636 
637         if (i != s.length())
638             throw new IllegalStateException("Excess data in RLE byte array string");
639 
640         return array;
641     }
642 
643     static public String LINE_SEPARATOR = System.getProperty("line.separator");
644 
645     /**
646      * Format a String for representation in a source file.  This includes
647      * breaking it into lines and escaping characters using octal notation
648      * when necessary (control characters and double quotes).
649      */
formatForSource(String s)650     static public final String formatForSource(String s) {
651         StringBuilder buffer = new StringBuilder();
652         for (int i=0; i<s.length();) {
653             if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
654             buffer.append("        \"");
655             int count = 11;
656             while (i<s.length() && count<80) {
657                 char c = s.charAt(i++);
658                 if (c < '\u0020' || c == '"' || c == '\\') {
659                     if (c == '\n') {
660                         buffer.append("\\n");
661                         count += 2;
662                     } else if (c == '\t') {
663                         buffer.append("\\t");
664                         count += 2;
665                     } else if (c == '\r') {
666                         buffer.append("\\r");
667                         count += 2;
668                     } else {
669                         // Represent control characters, backslash and double quote
670                         // using octal notation; otherwise the string we form
671                         // won't compile, since Unicode escape sequences are
672                         // processed before tokenization.
673                         buffer.append('\\');
674                         buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
675                         buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
676                         buffer.append(HEX_DIGIT[(c & 0007)]);
677                         count += 4;
678                     }
679                 }
680                 else if (c <= '\u007E') {
681                     buffer.append(c);
682                     count += 1;
683                 }
684                 else {
685                     buffer.append("\\u");
686                     buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
687                     buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
688                     buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
689                     buffer.append(HEX_DIGIT[(c & 0x000F)]);
690                     count += 6;
691                 }
692             }
693             buffer.append('"');
694         }
695         return buffer.toString();
696     }
697 
698     static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
699         '8','9','A','B','C','D','E','F'};
700 
701     /**
702      * Format a String for representation in a source file.  Like
703      * formatForSource but does not do line breaking.
704      */
format1ForSource(String s)705     static public final String format1ForSource(String s) {
706         StringBuilder buffer = new StringBuilder();
707         buffer.append("\"");
708         for (int i=0; i<s.length();) {
709             char c = s.charAt(i++);
710             if (c < '\u0020' || c == '"' || c == '\\') {
711                 if (c == '\n') {
712                     buffer.append("\\n");
713                 } else if (c == '\t') {
714                     buffer.append("\\t");
715                 } else if (c == '\r') {
716                     buffer.append("\\r");
717                 } else {
718                     // Represent control characters, backslash and double quote
719                     // using octal notation; otherwise the string we form
720                     // won't compile, since Unicode escape sequences are
721                     // processed before tokenization.
722                     buffer.append('\\');
723                     buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
724                     buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
725                     buffer.append(HEX_DIGIT[(c & 0007)]);
726                 }
727             }
728             else if (c <= '\u007E') {
729                 buffer.append(c);
730             }
731             else {
732                 buffer.append("\\u");
733                 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
734                 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
735                 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
736                 buffer.append(HEX_DIGIT[(c & 0x000F)]);
737             }
738         }
739         buffer.append('"');
740         return buffer.toString();
741     }
742 
743     /**
744      * Convert characters outside the range U+0020 to U+007F to
745      * Unicode escapes, and convert backslash to a double backslash.
746      */
escape(String s)747     public static final String escape(String s) {
748         StringBuilder buf = new StringBuilder();
749         for (int i=0; i<s.length(); ) {
750             int c = Character.codePointAt(s, i);
751             i += UTF16.getCharCount(c);
752             if (c >= ' ' && c <= 0x007F) {
753                 if (c == '\\') {
754                     buf.append("\\\\"); // That is, "\\"
755                 } else {
756                     buf.append((char)c);
757                 }
758             } else {
759                 boolean four = c <= 0xFFFF;
760                 buf.append(four ? "\\u" : "\\U");
761                 buf.append(hex(c, four ? 4 : 8));
762             }
763         }
764         return buf.toString();
765     }
766 
767     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
768     static private final char[] UNESCAPE_MAP = {
769         /*"   0x22, 0x22 */
770         /*'   0x27, 0x27 */
771         /*?   0x3F, 0x3F */
772         /*\   0x5C, 0x5C */
773         /*a*/ 0x61, 0x07,
774         /*b*/ 0x62, 0x08,
775         /*e*/ 0x65, 0x1b,
776         /*f*/ 0x66, 0x0c,
777         /*n*/ 0x6E, 0x0a,
778         /*r*/ 0x72, 0x0d,
779         /*t*/ 0x74, 0x09,
780         /*v*/ 0x76, 0x0b
781     };
782 
783     /**
784      * Convert an escape to a 32-bit code point value.  We attempt
785      * to parallel the icu4c unescapeAt() function.
786      * @param offset16 an array containing offset to the character
787      * <em>after</em> the backslash.  Upon return offset16[0] will
788      * be updated to point after the escape sequence.
789      * @return character value from 0 to 10FFFF, or -1 on error.
790      */
unescapeAt(String s, int[] offset16)791     public static int unescapeAt(String s, int[] offset16) {
792         int c;
793         int result = 0;
794         int n = 0;
795         int minDig = 0;
796         int maxDig = 0;
797         int bitsPerDigit = 4;
798         int dig;
799         int i;
800         boolean braces = false;
801 
802         /* Check that offset is in range */
803         int offset = offset16[0];
804         int length = s.length();
805         if (offset < 0 || offset >= length) {
806             return -1;
807         }
808 
809         /* Fetch first UChar after '\\' */
810         c = Character.codePointAt(s, offset);
811         offset += UTF16.getCharCount(c);
812 
813         /* Convert hexadecimal and octal escapes */
814         switch (c) {
815         case 'u':
816             minDig = maxDig = 4;
817             break;
818         case 'U':
819             minDig = maxDig = 8;
820             break;
821         case 'x':
822             minDig = 1;
823             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
824                 ++offset;
825                 braces = true;
826                 maxDig = 8;
827             } else {
828                 maxDig = 2;
829             }
830             break;
831         default:
832             dig = UCharacter.digit(c, 8);
833             if (dig >= 0) {
834                 minDig = 1;
835                 maxDig = 3;
836                 n = 1; /* Already have first octal digit */
837                 bitsPerDigit = 3;
838                 result = dig;
839             }
840             break;
841         }
842         if (minDig != 0) {
843             while (offset < length && n < maxDig) {
844                 c = UTF16.charAt(s, offset);
845                 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
846                 if (dig < 0) {
847                     break;
848                 }
849                 result = (result << bitsPerDigit) | dig;
850                 offset += UTF16.getCharCount(c);
851                 ++n;
852             }
853             if (n < minDig) {
854                 return -1;
855             }
856             if (braces) {
857                 if (c != 0x7D /*}*/) {
858                     return -1;
859                 }
860                 ++offset;
861             }
862             if (result < 0 || result >= 0x110000) {
863                 return -1;
864             }
865             // If an escape sequence specifies a lead surrogate, see
866             // if there is a trail surrogate after it, either as an
867             // escape or as a literal.  If so, join them up into a
868             // supplementary.
869             if (offset < length &&
870                     UTF16.isLeadSurrogate((char) result)) {
871                 int ahead = offset+1;
872                 c = s.charAt(offset); // [sic] get 16-bit code unit
873                 if (c == '\\' && ahead < length) {
874                     int o[] = new int[] { ahead };
875                     c = unescapeAt(s, o);
876                     ahead = o[0];
877                 }
878                 if (UTF16.isTrailSurrogate((char) c)) {
879                     offset = ahead;
880                     result = Character.toCodePoint((char) result, (char) c);
881                 }
882             }
883             offset16[0] = offset;
884             return result;
885         }
886 
887         /* Convert C-style escapes in table */
888         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
889             if (c == UNESCAPE_MAP[i]) {
890                 offset16[0] = offset;
891                 return UNESCAPE_MAP[i+1];
892             } else if (c < UNESCAPE_MAP[i]) {
893                 break;
894             }
895         }
896 
897         /* Map \cX to control-X: X & 0x1F */
898         if (c == 'c' && offset < length) {
899             c = UTF16.charAt(s, offset);
900             offset16[0] = offset + UTF16.getCharCount(c);
901             return 0x1F & c;
902         }
903 
904         /* If no special forms are recognized, then consider
905          * the backslash to generically escape the next character. */
906         offset16[0] = offset;
907         return c;
908     }
909 
910     /**
911      * Convert all escapes in a given string using unescapeAt().
912      * @exception IllegalArgumentException if an invalid escape is
913      * seen.
914      */
unescape(String s)915     public static String unescape(String s) {
916         StringBuilder buf = new StringBuilder();
917         int[] pos = new int[1];
918         for (int i=0; i<s.length(); ) {
919             char c = s.charAt(i++);
920             if (c == '\\') {
921                 pos[0] = i;
922                 int e = unescapeAt(s, pos);
923                 if (e < 0) {
924                     throw new IllegalArgumentException("Invalid escape sequence " +
925                             s.substring(i-1, Math.min(i+8, s.length())));
926                 }
927                 buf.appendCodePoint(e);
928                 i = pos[0];
929             } else {
930                 buf.append(c);
931             }
932         }
933         return buf.toString();
934     }
935 
936     /**
937      * Convert all escapes in a given string using unescapeAt().
938      * Leave invalid escape sequences unchanged.
939      */
unescapeLeniently(String s)940     public static String unescapeLeniently(String s) {
941         StringBuilder buf = new StringBuilder();
942         int[] pos = new int[1];
943         for (int i=0; i<s.length(); ) {
944             char c = s.charAt(i++);
945             if (c == '\\') {
946                 pos[0] = i;
947                 int e = unescapeAt(s, pos);
948                 if (e < 0) {
949                     buf.append(c);
950                 } else {
951                     buf.appendCodePoint(e);
952                     i = pos[0];
953                 }
954             } else {
955                 buf.append(c);
956             }
957         }
958         return buf.toString();
959     }
960 
961     /**
962      * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
963      * "0041".
964      */
hex(long ch)965     public static String hex(long ch) {
966         return hex(ch, 4);
967     }
968 
969     /**
970      * Supplies a zero-padded hex representation of an integer (without 0x)
971      */
hex(long i, int places)972     static public String hex(long i, int places) {
973         if (i == Long.MIN_VALUE) return "-8000000000000000";
974         boolean negative = i < 0;
975         if (negative) {
976             i = -i;
977         }
978         String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
979         if (result.length() < places) {
980             result = "0000000000000000".substring(result.length(),places) + result;
981         }
982         if (negative) {
983             return '-' + result;
984         }
985         return result;
986     }
987 
988     /**
989      * Convert a string to comma-separated groups of 4 hex uppercase
990      * digits.  E.g., hex('ab') => "0041,0042".
991      */
992     public static String hex(CharSequence s) {
993         return hex(s, 4, ",", true, new StringBuilder()).toString();
994     }
995 
996     /**
997      * Convert a string to separated groups of hex uppercase
998      * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
999      * to the given Appendable.
1000      */
1001     public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
1002         try {
1003             if (useCodePoints) {
1004                 int cp;
1005                 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1006                     cp = Character.codePointAt(s, i);
1007                     if (i != 0) {
1008                         result.append(separator);
1009                     }
1010                     result.append(hex(cp,width));
1011                 }
1012             } else {
1013                 for (int i = 0; i < s.length(); ++i) {
1014                     if (i != 0) {
1015                         result.append(separator);
1016                     }
1017                     result.append(hex(s.charAt(i),width));
1018                 }
1019             }
1020             return result;
1021         } catch (IOException e) {
1022             throw new IllegalIcuArgumentException(e);
1023         }
1024     }
1025 
1026     public static String hex(byte[] o, int start, int end, String separator) {
1027         StringBuilder result = new StringBuilder();
1028         //int ch;
1029         for (int i = start; i < end; ++i) {
1030           if (i != 0) result.append(separator);
1031           result.append(hex(o[i]));
1032         }
1033         return result.toString();
1034       }
1035 
1036     /**
1037      * Convert a string to comma-separated groups of 4 hex uppercase
1038      * digits.  E.g., hex('ab') => "0041,0042".
1039      */
1040     public static <S extends CharSequence> String hex(S s, int width, S separator) {
1041         return hex(s, width, separator, true, new StringBuilder()).toString();
1042     }
1043 
1044     /**
1045      * Split a string into pieces based on the given divider character
1046      * @param s the string to split
1047      * @param divider the character on which to split.  Occurrences of
1048      * this character are not included in the output
1049      * @param output an array to receive the substrings between
1050      * instances of divider.  It must be large enough on entry to
1051      * accomodate all output.  Adjacent instances of the divider
1052      * character will place empty strings into output.  Before
1053      * returning, output is padded out with empty strings.
1054      */
1055     public static void split(String s, char divider, String[] output) {
1056         int last = 0;
1057         int current = 0;
1058         int i;
1059         for (i = 0; i < s.length(); ++i) {
1060             if (s.charAt(i) == divider) {
1061                 output[current++] = s.substring(last,i);
1062                 last = i+1;
1063             }
1064         }
1065         output[current++] = s.substring(last,i);
1066         while (current < output.length) {
1067             output[current++] = "";
1068         }
1069     }
1070 
1071     /**
1072      * Split a string into pieces based on the given divider character
1073      * @param s the string to split
1074      * @param divider the character on which to split.  Occurrences of
1075      * this character are not included in the output
1076      * @return output an array to receive the substrings between
1077      * instances of divider. Adjacent instances of the divider
1078      * character will place empty strings into output.
1079      */
1080     public static String[] split(String s, char divider) {
1081         int last = 0;
1082         int i;
1083         ArrayList<String> output = new ArrayList<>();
1084         for (i = 0; i < s.length(); ++i) {
1085             if (s.charAt(i) == divider) {
1086                 output.add(s.substring(last,i));
1087                 last = i+1;
1088             }
1089         }
1090         output.add( s.substring(last,i));
1091         return output.toArray(new String[output.size()]);
1092     }
1093 
1094     /**
1095      * Look up a given string in a string array.  Returns the index at
1096      * which the first occurrence of the string was found in the
1097      * array, or -1 if it was not found.
1098      * @param source the string to search for
1099      * @param target the array of zero or more strings in which to
1100      * look for source
1101      * @return the index of target at which source first occurs, or -1
1102      * if not found
1103      */
1104     public static int lookup(String source, String[] target) {
1105         for (int i = 0; i < target.length; ++i) {
1106             if (source.equals(target[i])) return i;
1107         }
1108         return -1;
1109     }
1110 
1111     /**
1112      * Parse a single non-whitespace character 'ch', optionally
1113      * preceded by whitespace.
1114      * @param id the string to be parsed
1115      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
1116      * offset of the first character to be parsed.  On output, pos[0]
1117      * is the index after the last parsed character.  If the parse
1118      * fails, pos[0] will be unchanged.
1119      * @param ch the non-whitespace character to be parsed.
1120      * @return true if 'ch' is seen preceded by zero or more
1121      * whitespace characters.
1122      */
1123     public static boolean parseChar(String id, int[] pos, char ch) {
1124         int start = pos[0];
1125         pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
1126         if (pos[0] == id.length() ||
1127                 id.charAt(pos[0]) != ch) {
1128             pos[0] = start;
1129             return false;
1130         }
1131         ++pos[0];
1132         return true;
1133     }
1134 
1135     /**
1136      * Parse a pattern string starting at offset pos.  Keywords are
1137      * matched case-insensitively.  Spaces may be skipped and may be
1138      * optional or required.  Integer values may be parsed, and if
1139      * they are, they will be returned in the given array.  If
1140      * successful, the offset of the next non-space character is
1141      * returned.  On failure, -1 is returned.
1142      * @param pattern must only contain lowercase characters, which
1143      * will match their uppercase equivalents as well.  A space
1144      * character matches one or more required spaces.  A '~' character
1145      * matches zero or more optional spaces.  A '#' character matches
1146      * an integer and stores it in parsedInts, which the caller must
1147      * ensure has enough capacity.
1148      * @param parsedInts array to receive parsed integers.  Caller
1149      * must ensure that parsedInts.length is >= the number of '#'
1150      * signs in 'pattern'.
1151      * @return the position after the last character parsed, or -1 if
1152      * the parse failed
1153      */
1154     @SuppressWarnings("fallthrough")
1155     public static int parsePattern(String rule, int pos, int limit,
1156             String pattern, int[] parsedInts) {
1157         // TODO Update this to handle surrogates
1158         int[] p = new int[1];
1159         int intCount = 0; // number of integers parsed
1160         for (int i=0; i<pattern.length(); ++i) {
1161             char cpat = pattern.charAt(i);
1162             char c;
1163             switch (cpat) {
1164             case ' ':
1165                 if (pos >= limit) {
1166                     return -1;
1167                 }
1168                 c = rule.charAt(pos++);
1169                 if (!PatternProps.isWhiteSpace(c)) {
1170                     return -1;
1171                 }
1172                 // FALL THROUGH to skipWhitespace
1173             case '~':
1174                 pos = PatternProps.skipWhiteSpace(rule, pos);
1175                 break;
1176             case '#':
1177                 p[0] = pos;
1178                 parsedInts[intCount++] = parseInteger(rule, p, limit);
1179                 if (p[0] == pos) {
1180                     // Syntax error; failed to parse integer
1181                     return -1;
1182                 }
1183                 pos = p[0];
1184                 break;
1185             default:
1186                 if (pos >= limit) {
1187                     return -1;
1188                 }
1189                 c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
1190                 if (c != cpat) {
1191                     return -1;
1192                 }
1193                 break;
1194             }
1195         }
1196         return pos;
1197     }
1198 
1199     /**
1200      * Parse a pattern string within the given Replaceable and a parsing
1201      * pattern.  Characters are matched literally and case-sensitively
1202      * except for the following special characters:
1203      *
1204      * ~  zero or more Pattern_White_Space chars
1205      *
1206      * If end of pattern is reached with all matches along the way,
1207      * pos is advanced to the first unparsed index and returned.
1208      * Otherwise -1 is returned.
1209      * @param pat pattern that controls parsing
1210      * @param text text to be parsed, starting at index
1211      * @param index offset to first character to parse
1212      * @param limit offset after last character to parse
1213      * @return index after last parsed character, or -1 on parse failure.
1214      */
1215     public static int parsePattern(String pat,
1216             Replaceable text,
1217             int index,
1218             int limit) {
1219         int ipat = 0;
1220 
1221         // empty pattern matches immediately
1222         if (ipat == pat.length()) {
1223             return index;
1224         }
1225 
1226         int cpat = Character.codePointAt(pat, ipat);
1227 
1228         while (index < limit) {
1229             int c = text.char32At(index);
1230 
1231             // parse \s*
1232             if (cpat == '~') {
1233                 if (PatternProps.isWhiteSpace(c)) {
1234                     index += UTF16.getCharCount(c);
1235                     continue;
1236                 } else {
1237                     if (++ipat == pat.length()) {
1238                         return index; // success; c unparsed
1239                     }
1240                     // fall thru; process c again with next cpat
1241                 }
1242             }
1243 
1244             // parse literal
1245             else if (c == cpat) {
1246                 int n = UTF16.getCharCount(c);
1247                 index += n;
1248                 ipat += n;
1249                 if (ipat == pat.length()) {
1250                     return index; // success; c parsed
1251                 }
1252                 // fall thru; get next cpat
1253             }
1254 
1255             // match failure of literal
1256             else {
1257                 return -1;
1258             }
1259 
1260             cpat = UTF16.charAt(pat, ipat);
1261         }
1262 
1263         return -1; // text ended before end of pat
1264     }
1265 
1266     /**
1267      * Parse an integer at pos, either of the form \d+ or of the form
1268      * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
1269      * or octal format.
1270      * @param pos INPUT-OUTPUT parameter.  On input, the first
1271      * character to parse.  On output, the character after the last
1272      * parsed character.
1273      */
1274     public static int parseInteger(String rule, int[] pos, int limit) {
1275         int count = 0;
1276         int value = 0;
1277         int p = pos[0];
1278         int radix = 10;
1279 
1280         if (rule.regionMatches(true, p, "0x", 0, 2)) {
1281             p += 2;
1282             radix = 16;
1283         } else if (p < limit && rule.charAt(p) == '0') {
1284             p++;
1285             count = 1;
1286             radix = 8;
1287         }
1288 
1289         while (p < limit) {
1290             int d = UCharacter.digit(rule.charAt(p++), radix);
1291             if (d < 0) {
1292                 --p;
1293                 break;
1294             }
1295             ++count;
1296             int v = (value * radix) + d;
1297             if (v <= value) {
1298                 // If there are too many input digits, at some point
1299                 // the value will go negative, e.g., if we have seen
1300                 // "0x8000000" already and there is another '0', when
1301                 // we parse the next 0 the value will go negative.
1302                 return 0;
1303             }
1304             value = v;
1305         }
1306         if (count > 0) {
1307             pos[0] = p;
1308         }
1309         return value;
1310     }
1311 
1312     /**
1313      * Parse a Unicode identifier from the given string at the given
1314      * position.  Return the identifier, or null if there is no
1315      * identifier.
1316      * @param str the string to parse
1317      * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
1318      * first character to examine.  It must be less than str.length(),
1319      * and it must not point to a whitespace character.  That is, must
1320      * have pos[0] < str.length().  On
1321      * OUTPUT, the position after the last parsed character.
1322      * @return the Unicode identifier, or null if there is no valid
1323      * identifier at pos[0].
1324      */
1325     public static String parseUnicodeIdentifier(String str, int[] pos) {
1326         // assert(pos[0] < str.length());
1327         StringBuilder buf = new StringBuilder();
1328         int p = pos[0];
1329         while (p < str.length()) {
1330             int ch = Character.codePointAt(str, p);
1331             if (buf.length() == 0) {
1332                 if (UCharacter.isUnicodeIdentifierStart(ch)) {
1333                     buf.appendCodePoint(ch);
1334                 } else {
1335                     return null;
1336                 }
1337             } else {
1338                 if (UCharacter.isUnicodeIdentifierPart(ch)) {
1339                     buf.appendCodePoint(ch);
1340                 } else {
1341                     break;
1342                 }
1343             }
1344             p += UTF16.getCharCount(ch);
1345         }
1346         pos[0] = p;
1347         return buf.toString();
1348     }
1349 
1350     static final char DIGITS[] = {
1351         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
1352         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
1353         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
1354         'U', 'V', 'W', 'X', 'Y', 'Z'
1355     };
1356 
1357     /**
1358      * Append the digits of a positive integer to the given
1359      * <code>Appendable</code> in the given radix. This is
1360      * done recursively since it is easiest to generate the low-
1361      * order digit first, but it must be appended last.
1362      *
1363      * @param result is the <code>Appendable</code> to append to
1364      * @param n is the positive integer
1365      * @param radix is the radix, from 2 to 36 inclusive
1366      * @param minDigits is the minimum number of digits to append.
1367      */
1368     private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
1369             int radix, int minDigits)
1370     {
1371         try {
1372             int digit = n % radix;
1373 
1374             if (n >= radix || minDigits > 1) {
1375                 recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
1376             }
1377             result.append(DIGITS[digit]);
1378         } catch (IOException e) {
1379             throw new IllegalIcuArgumentException(e);
1380         }
1381     }
1382 
1383     /**
1384      * Append a number to the given Appendable in the given radix.
1385      * Standard digits '0'-'9' are used and letters 'A'-'Z' for
1386      * radices 11 through 36.
1387      * @param result the digits of the number are appended here
1388      * @param n the number to be converted to digits; may be negative.
1389      * If negative, a '-' is prepended to the digits.
1390      * @param radix a radix from 2 to 36 inclusive.
1391      * @param minDigits the minimum number of digits, not including
1392      * any '-', to produce.  Values less than 2 have no effect.  One
1393      * digit is always emitted regardless of this parameter.
1394      * @return a reference to result
1395      */
1396     public static <T extends Appendable> T appendNumber(T result, int n,
1397             int radix, int minDigits)
1398     {
1399         try {
1400             if (radix < 2 || radix > 36) {
1401                 throw new IllegalArgumentException("Illegal radix " + radix);
1402             }
1403 
1404 
1405             int abs = n;
1406 
1407             if (n < 0) {
1408                 abs = -n;
1409                 result.append("-");
1410             }
1411 
1412             recursiveAppendNumber(result, abs, radix, minDigits);
1413 
1414             return result;
1415         } catch (IOException e) {
1416             throw new IllegalIcuArgumentException(e);
1417         }
1418 
1419     }
1420 
1421     /**
1422      * Parse an unsigned 31-bit integer at the given offset.  Use
1423      * UCharacter.digit() to parse individual characters into digits.
1424      * @param text the text to be parsed
1425      * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
1426      * offset within text at which to start parsing; it should point
1427      * to a valid digit.  On exit, pos[0] is the offset after the last
1428      * parsed character.  If the parse failed, it will be unchanged on
1429      * exit.  Must be >= 0 on entry.
1430      * @param radix the radix in which to parse; must be >= 2 and <=
1431      * 36.
1432      * @return a non-negative parsed number, or -1 upon parse failure.
1433      * Parse fails if there are no digits, that is, if pos[0] does not
1434      * point to a valid digit on entry, or if the number to be parsed
1435      * does not fit into a 31-bit unsigned integer.
1436      */
1437     public static int parseNumber(String text, int[] pos, int radix) {
1438         // assert(pos[0] >= 0);
1439         // assert(radix >= 2);
1440         // assert(radix <= 36);
1441         int n = 0;
1442         int p = pos[0];
1443         while (p < text.length()) {
1444             int ch = Character.codePointAt(text, p);
1445             int d = UCharacter.digit(ch, radix);
1446             if (d < 0) {
1447                 break;
1448             }
1449             n = radix*n + d;
1450             // ASSUME that when a 32-bit integer overflows it becomes
1451             // negative.  E.g., 214748364 * 10 + 8 => negative value.
1452             if (n < 0) {
1453                 return -1;
1454             }
1455             ++p;
1456         }
1457         if (p == pos[0]) {
1458             return -1;
1459         }
1460         pos[0] = p;
1461         return n;
1462     }
1463 
1464     /**
1465      * Return true if the character is NOT printable ASCII.  The tab,
1466      * newline and linefeed characters are considered unprintable.
1467      */
1468     public static boolean isUnprintable(int c) {
1469         //0x20 = 32 and 0x7E = 126
1470         return !(c >= 0x20 && c <= 0x7E);
1471     }
1472 
1473     /**
1474      * Escape unprintable characters using <backslash>uxxxx notation
1475      * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
1476      * above.  If the character is printable ASCII, then do nothing
1477      * and return FALSE.  Otherwise, append the escaped notation and
1478      * return TRUE.
1479      */
1480     public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
1481         try {
1482             if (isUnprintable(c)) {
1483                 result.append('\\');
1484                 if ((c & ~0xFFFF) != 0) {
1485                     result.append('U');
1486                     result.append(DIGITS[0xF&(c>>28)]);
1487                     result.append(DIGITS[0xF&(c>>24)]);
1488                     result.append(DIGITS[0xF&(c>>20)]);
1489                     result.append(DIGITS[0xF&(c>>16)]);
1490                 } else {
1491                     result.append('u');
1492                 }
1493                 result.append(DIGITS[0xF&(c>>12)]);
1494                 result.append(DIGITS[0xF&(c>>8)]);
1495                 result.append(DIGITS[0xF&(c>>4)]);
1496                 result.append(DIGITS[0xF&c]);
1497                 return true;
1498             }
1499             return false;
1500         } catch (IOException e) {
1501             throw new IllegalIcuArgumentException(e);
1502         }
1503     }
1504 
1505     /**
1506      * Returns the index of the first character in a set, ignoring quoted text.
1507      * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
1508      * found by a search for "h".  Unlike String.indexOf(), this method searches
1509      * not for a single character, but for any character of the string
1510      * <code>setOfChars</code>.
1511      * @param text text to be searched
1512      * @param start the beginning index, inclusive; <code>0 <= start
1513      * <= limit</code>.
1514      * @param limit the ending index, exclusive; <code>start <= limit
1515      * <= text.length()</code>.
1516      * @param setOfChars string with one or more distinct characters
1517      * @return Offset of the first character in <code>setOfChars</code>
1518      * found, or -1 if not found.
1519      * @see String#indexOf
1520      */
1521     public static int quotedIndexOf(String text, int start, int limit,
1522             String setOfChars) {
1523         for (int i=start; i<limit; ++i) {
1524             char c = text.charAt(i);
1525             if (c == BACKSLASH) {
1526                 ++i;
1527             } else if (c == APOSTROPHE) {
1528                 while (++i < limit
1529                         && text.charAt(i) != APOSTROPHE) {}
1530             } else if (setOfChars.indexOf(c) >= 0) {
1531                 return i;
1532             }
1533         }
1534         return -1;
1535     }
1536 
1537     /**
1538      * Append a character to a rule that is being built up.  To flush
1539      * the quoteBuf to rule, make one final call with isLiteral == true.
1540      * If there is no final character, pass in (int)-1 as c.
1541      * @param rule the string to append the character to
1542      * @param c the character to append, or (int)-1 if none.
1543      * @param isLiteral if true, then the given character should not be
1544      * quoted or escaped.  Usually this means it is a syntactic element
1545      * such as > or $
1546      * @param escapeUnprintable if true, then unprintable characters
1547      * should be escaped using escapeUnprintable().  These escapes will
1548      * appear outside of quotes.
1549      * @param quoteBuf a buffer which is used to build up quoted
1550      * substrings.  The caller should initially supply an empty buffer,
1551      * and thereafter should not modify the buffer.  The buffer should be
1552      * cleared out by, at the end, calling this method with a literal
1553      * character (which may be -1).
1554      */
1555     public static void appendToRule(StringBuffer rule,
1556             int c,
1557             boolean isLiteral,
1558             boolean escapeUnprintable,
1559             StringBuffer quoteBuf) {
1560         // If we are escaping unprintables, then escape them outside
1561         // quotes.  \\u and \\U are not recognized within quotes.  The same
1562         // logic applies to literals, but literals are never escaped.
1563         if (isLiteral ||
1564                 (escapeUnprintable && Utility.isUnprintable(c))) {
1565             if (quoteBuf.length() > 0) {
1566                 // We prefer backslash APOSTROPHE to double APOSTROPHE
1567                 // (more readable, less similar to ") so if there are
1568                 // double APOSTROPHEs at the ends, we pull them outside
1569                 // of the quote.
1570 
1571                 // If the first thing in the quoteBuf is APOSTROPHE
1572                 // (doubled) then pull it out.
1573                 while (quoteBuf.length() >= 2 &&
1574                         quoteBuf.charAt(0) == APOSTROPHE &&
1575                         quoteBuf.charAt(1) == APOSTROPHE) {
1576                     rule.append(BACKSLASH).append(APOSTROPHE);
1577                     quoteBuf.delete(0, 2);
1578                 }
1579                 // If the last thing in the quoteBuf is APOSTROPHE
1580                 // (doubled) then remove and count it and add it after.
1581                 int trailingCount = 0;
1582                 while (quoteBuf.length() >= 2 &&
1583                         quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
1584                         quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
1585                     quoteBuf.setLength(quoteBuf.length()-2);
1586                     ++trailingCount;
1587                 }
1588                 if (quoteBuf.length() > 0) {
1589                     rule.append(APOSTROPHE);
1590                     rule.append(quoteBuf);
1591                     rule.append(APOSTROPHE);
1592                     quoteBuf.setLength(0);
1593                 }
1594                 while (trailingCount-- > 0) {
1595                     rule.append(BACKSLASH).append(APOSTROPHE);
1596                 }
1597             }
1598             if (c != -1) {
1599                 /* Since spaces are ignored during parsing, they are
1600                  * emitted only for readability.  We emit one here
1601                  * only if there isn't already one at the end of the
1602                  * rule.
1603                  */
1604                 if (c == ' ') {
1605                     int len = rule.length();
1606                     if (len > 0 && rule.charAt(len-1) != ' ') {
1607                         rule.append(' ');
1608                     }
1609                 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
1610                     rule.appendCodePoint(c);
1611                 }
1612             }
1613         }
1614 
1615         // Escape ' and '\' and don't begin a quote just for them
1616         else if (quoteBuf.length() == 0 &&
1617                 (c == APOSTROPHE || c == BACKSLASH)) {
1618             rule.append(BACKSLASH).append((char)c);
1619         }
1620 
1621         // Specials (printable ascii that isn't [0-9a-zA-Z]) and
1622         // whitespace need quoting.  Also append stuff to quotes if we are
1623         // building up a quoted substring already.
1624         else if (quoteBuf.length() > 0 ||
1625                 (c >= 0x0021 && c <= 0x007E &&
1626                         !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
1627                                 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
1628                                 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
1629                                 PatternProps.isWhiteSpace(c)) {
1630             quoteBuf.appendCodePoint(c);
1631             // Double ' within a quote
1632             if (c == APOSTROPHE) {
1633                 quoteBuf.append((char)c);
1634             }
1635         }
1636 
1637         // Otherwise just append
1638         else {
1639             rule.appendCodePoint(c);
1640         }
1641     }
1642 
1643     /**
1644      * Append the given string to the rule.  Calls the single-character
1645      * version of appendToRule for each character.
1646      */
1647     public static void appendToRule(StringBuffer rule,
1648             String text,
1649             boolean isLiteral,
1650             boolean escapeUnprintable,
1651             StringBuffer quoteBuf) {
1652         for (int i=0; i<text.length(); ++i) {
1653             // Okay to process in 16-bit code units here
1654             appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
1655         }
1656     }
1657 
1658     /**
1659      * Given a matcher reference, which may be null, append its
1660      * pattern as a literal to the given rule.
1661      */
1662     public static void appendToRule(StringBuffer rule,
1663             UnicodeMatcher matcher,
1664             boolean escapeUnprintable,
1665             StringBuffer quoteBuf) {
1666         if (matcher != null) {
1667             appendToRule(rule, matcher.toPattern(escapeUnprintable),
1668                     true, escapeUnprintable, quoteBuf);
1669         }
1670     }
1671 
1672     /**
1673      * Compares 2 unsigned integers
1674      * @param source 32 bit unsigned integer
1675      * @param target 32 bit unsigned integer
1676      * @return 0 if equals, 1 if source is greater than target and -1
1677      *         otherwise
1678      */
1679     public static final int compareUnsigned(int source, int target)
1680     {
1681         source += MAGIC_UNSIGNED;
1682         target += MAGIC_UNSIGNED;
1683         if (source < target) {
1684             return -1;
1685         }
1686         else if (source > target) {
1687             return 1;
1688         }
1689         return 0;
1690     }
1691 
1692     /**
1693      * Find the highest bit in a positive integer. This is done
1694      * by doing a binary search through the bits.
1695      *
1696      * @param n is the integer
1697      *
1698      * @return the bit number of the highest bit, with 0 being
1699      * the low order bit, or -1 if <code>n</code> is not positive
1700      */
1701     public static final byte highBit(int n)
1702     {
1703         if (n <= 0) {
1704             return -1;
1705         }
1706 
1707         byte bit = 0;
1708 
1709         if (n >= 1 << 16) {
1710             n >>= 16;
1711         bit += 16;
1712         }
1713 
1714         if (n >= 1 << 8) {
1715             n >>= 8;
1716         bit += 8;
1717         }
1718 
1719         if (n >= 1 << 4) {
1720             n >>= 4;
1721         bit += 4;
1722         }
1723 
1724         if (n >= 1 << 2) {
1725             n >>= 2;
1726         bit += 2;
1727         }
1728 
1729         if (n >= 1 << 1) {
1730             n >>= 1;
1731         bit += 1;
1732         }
1733 
1734         return bit;
1735     }
1736     /**
1737      * Utility method to take a int[] containing codepoints and return
1738      * a string representation with code units.
1739      */
1740     public static String valueOf(int[]source){
1741         // TODO: Investigate why this method is not on UTF16 class
1742         StringBuilder result = new StringBuilder(source.length);
1743         for(int i=0; i<source.length; i++){
1744             result.appendCodePoint(source[i]);
1745         }
1746         return result.toString();
1747     }
1748 
1749 
1750     /**
1751      * Utility to duplicate a string count times
1752      * @param s String to be duplicated.
1753      * @param count Number of times to duplicate a string.
1754      */
1755     public static String repeat(String s, int count) {
1756         if (count <= 0) return "";
1757         if (count == 1) return s;
1758         StringBuilder result = new StringBuilder();
1759         for (int i = 0; i < count; ++i) {
1760             result.append(s);
1761         }
1762         return result.toString();
1763     }
1764 
1765     public static String[] splitString(String src, String target) {
1766         return src.split("\\Q" + target + "\\E");
1767     }
1768 
1769     /**
1770      * Split the string at runs of ascii whitespace characters.
1771      */
1772     public static String[] splitWhitespace(String src) {
1773         return src.split("\\s+");
1774     }
1775 
1776     /**
1777      * Parse a list of hex numbers and return a string
1778      * @param string String of hex numbers.
1779      * @param minLength Minimal length.
1780      * @param separator Separator.
1781      * @return A string from hex numbers.
1782      */
1783     public static String fromHex(String string, int minLength, String separator) {
1784         return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
1785     }
1786 
1787     /**
1788      * Parse a list of hex numbers and return a string
1789      * @param string String of hex numbers.
1790      * @param minLength Minimal length.
1791      * @param separator Separator.
1792      * @return A string from hex numbers.
1793      */
1794     public static String fromHex(String string, int minLength, Pattern separator) {
1795         StringBuilder buffer = new StringBuilder();
1796         String[] parts = separator.split(string);
1797         for (String part : parts) {
1798             if (part.length() < minLength) {
1799                 throw new IllegalArgumentException("code point too short: " + part);
1800             }
1801             int cp = Integer.parseInt(part, 16);
1802             buffer.appendCodePoint(cp);
1803         }
1804         return buffer.toString();
1805     }
1806 
1807     /**
1808      * This implementation is equivalent to Java 8+ Math#addExact(int, int)
1809      * @param x the first value
1810      * @param y the second value
1811      * @return the result
1812      */
1813     public static int addExact(int x, int y) {
1814         int r = x + y;
1815         // HD 2-12 Overflow iff both arguments have the opposite sign of the result
1816         if (((x ^ r) & (y ^ r)) < 0) {
1817             throw new ArithmeticException("integer overflow");
1818         }
1819         return r;
1820     }
1821 
1822     /**
1823      * Returns whether the chars in the two CharSequences are equal.
1824      */
1825     public static boolean charSequenceEquals(CharSequence a, CharSequence b) {
1826         if (a == b) {
1827             return true;
1828         }
1829         if (a == null || b == null) {
1830             return false;
1831         }
1832         if (a.length() != b.length()) {
1833             return false;
1834         }
1835         for (int i = 0; i < a.length(); i++) {
1836             if (a.charAt(i) != b.charAt(i))
1837                 return false;
1838         }
1839         return true;
1840     }
1841 
1842     /**
1843      * Returns a hash code for a CharSequence that is equivalent to calling
1844      * charSequence.toString().hashCode()
1845      */
1846     public static int charSequenceHashCode(CharSequence value) {
1847         int hash = 0;
1848         for (int i = 0; i < value.length(); i++) {
1849             hash = hash * 31 + value.charAt(i);
1850         }
1851         return hash;
1852     }
1853 
1854     /**
1855      * Appends a CharSequence to an Appendable, converting IOException to ICUUncheckedIOException.
1856      */
1857     public static <A extends Appendable> A appendTo(CharSequence string, A appendable) {
1858         try {
1859             appendable.append(string);
1860             return appendable;
1861         } catch (IOException e) {
1862             throw new ICUUncheckedIOException(e);
1863         }
1864     }
1865 }
1866