• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5  *******************************************************************************
6  * Copyright (C) 2000-2010, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.dev.test.translit;
11 
12 import ohos.global.icu.text.UTF16;
13 import ohos.global.icu.text.UnicodeSet;
14 
15 
16 
17 public final class TestUtility {
18 
hex(char ch)19     public static String hex(char ch) {
20         String foo = Integer.toString(ch,16).toUpperCase();
21         return "0000".substring(0,4-foo.length()) + foo;
22     }
23 
hex(int ch)24     public static String hex(int ch) {
25         String foo = Integer.toString(ch,16).toUpperCase();
26         return "00000000".substring(0,4-foo.length()) + foo;
27     }
28 
hex(String s)29     public static String hex(String s) {
30       return hex(s,",");
31     }
32 
hex(String s, String sep)33     public static String hex(String s, String sep) {
34       if (s.length() == 0) return "";
35       String result = hex(s.charAt(0));
36       for (int i = 1; i < s.length(); ++i) {
37         result += sep;
38         result += hex(s.charAt(i));
39       }
40       return result;
41     }
42 
replace(String source, String toBeReplaced, String replacement)43     public static String replace(String source, String toBeReplaced, String replacement) {
44         StringBuffer results = new StringBuffer();
45         int len = toBeReplaced.length();
46         for (int i = 0; i < source.length(); ++i) {
47             if (source.regionMatches(false, i, toBeReplaced, 0, len)) {
48                 results.append(replacement);
49                 i += len - 1; // minus one, since we will increment
50             } else {
51                 results.append(source.charAt(i));
52             }
53         }
54         return results.toString();
55     }
56 
replaceAll(String source, UnicodeSet set, String replacement)57     public static String replaceAll(String source, UnicodeSet set, String replacement) {
58         StringBuffer results = new StringBuffer();
59         int cp;
60         for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
61             cp = UTF16.charAt(source,i);
62             if (set.contains(cp)) {
63                 results.append(replacement);
64             } else {
65                 UTF16.append(results, cp);
66             }
67         }
68         return results.toString();
69     }
70 
71     // COMMENTED OUT ALL THE OLD SCRIPT STUFF
72     /*
73     public static byte getScript(char c) {
74       return getScript(getBlock(c));
75     }
76 
77     public static byte getScript(byte block) {
78       return blockToScript[block];
79     }
80 
81     public static byte getBlock(char c) {
82       int index = c >> 7;
83       byte block = charToBlock[index];
84       while (block < 0) { // take care of exceptions, blocks split across 128 boundaries
85           int[] tuple = split[-block-1];
86           if (c < tuple[0]) block = (byte)tuple[1];
87           else block = (byte)tuple[2];
88       }
89       return block;
90     }
91 
92     // returns next letter of script, or 0xFFFF if done
93 
94     public static char getNextLetter(char c, byte script) {
95         while (c < 0xFFFF) {
96             ++c;
97             if (getScript(c) == script && Character.isLetter(c)) {
98                 return c;
99             }
100         }
101         return c;
102     }
103 
104     // Supplements to Character methods; these methods go through
105     // UCharacter if possible.  If not, they fall back to Character.
106 
107     public static boolean isUnassigned(char c) {
108         try {
109             return UCharacter.getType(c) == UCharacterCategory.UNASSIGNED;
110         } catch (NullPointerException e) {
111             System.out.print("");
112         }
113         return Character.getType(c) == Character.UNASSIGNED;
114     }
115 
116     public static boolean isLetter(char c) {
117         try {
118             return UCharacter.isLetter(c);
119         } catch (NullPointerException e) {
120             System.out.print("");
121         }
122         return Character.isLetter(c);
123     }
124 
125   public static void main(String[] args) {
126     System.out.println("Blocks: ");
127     byte lastblock = -128;
128     for (char cc = 0; cc < 0xFFFF; ++cc) {
129       byte block = TestUtility.getBlock(cc);
130       if (block != lastblock) {
131         System.out.println(TestUtility.hex(cc) + "\t" + block);
132         lastblock = block;
133       }
134     }
135     System.out.println();
136     System.out.println("Scripts: ");
137     byte lastScript = -128;
138     for (char cc = 0; cc < 0xFFFF; ++cc) {
139       byte script = TestUtility.getScript(cc);
140       if (script != lastScript) {
141         System.out.println(TestUtility.hex(cc) + "\t" + script);
142         lastScript = script;
143       }
144     }
145   }
146 
147 
148 
149     public static final byte // SCRIPT CODE
150         COMMON_SCRIPT = 0,
151         LATIN_SCRIPT = 1,
152         GREEK_SCRIPT = 2,
153         CYRILLIC_SCRIPT = 3,
154         ARMENIAN_SCRIPT = 4,
155         HEBREW_SCRIPT = 5,
156         ARABIC_SCRIPT = 6,
157         SYRIAC_SCRIPT = 7,
158         THAANA_SCRIPT = 8,
159         DEVANAGARI_SCRIPT = 9,
160         BENGALI_SCRIPT = 10,
161         GURMUKHI_SCRIPT = 11,
162         GUJARATI_SCRIPT = 12,
163         ORIYA_SCRIPT = 13,
164         TAMIL_SCRIPT = 14,
165         TELUGU_SCRIPT = 15,
166         KANNADA_SCRIPT = 16,
167         MALAYALAM_SCRIPT = 17,
168         SINHALA_SCRIPT = 18,
169         THAI_SCRIPT = 19,
170         LAO_SCRIPT = 20,
171         TIBETAN_SCRIPT = 21,
172         MYANMAR_SCRIPT = 22,
173         GEORGIAN_SCRIPT = 23,
174         JAMO_SCRIPT = 24,
175         HANGUL_SCRIPT = 25,
176         ETHIOPIC_SCRIPT = 26,
177         CHEROKEE_SCRIPT = 27,
178         ABORIGINAL_SCRIPT = 28,
179         OGHAM_SCRIPT = 29,
180         RUNIC_SCRIPT = 30,
181         KHMER_SCRIPT = 31,
182         MONGOLIAN_SCRIPT = 32,
183         HIRAGANA_SCRIPT = 33,
184         KATAKANA_SCRIPT = 34,
185         BOPOMOFO_SCRIPT = 35,
186         HAN_SCRIPT = 36,
187         YI_SCRIPT = 37;
188 
189     public static final byte // block code
190         RESERVED_BLOCK = 0,
191         BASIC_LATIN = 1,
192         LATIN_1_SUPPLEMENT = 2,
193         LATIN_EXTENDED_A = 3,
194         LATIN_EXTENDED_B = 4,
195         IPA_EXTENSIONS = 5,
196         SPACING_MODIFIER_LETTERS = 6,
197         COMBINING_DIACRITICAL_MARKS = 7,
198         GREEK = 8,
199         CYRILLIC = 9,
200         ARMENIAN = 10,
201         HEBREW = 11,
202         ARABIC = 12,
203         SYRIAC = 13,
204         THAANA = 14,
205         DEVANAGARI = 15,
206         BENGALI = 16,
207         GURMUKHI = 17,
208         GUJARATI = 18,
209         ORIYA = 19,
210         TAMIL = 20,
211         TELUGU = 21,
212         KANNADA = 22,
213         MALAYALAM = 23,
214         SINHALA = 24,
215         THAI = 25,
216         LAO = 26,
217         TIBETAN = 27,
218         MYANMAR = 28,
219         GEORGIAN = 29,
220         HANGUL_JAMO = 30,
221         ETHIOPIC = 31,
222         CHEROKEE = 32,
223         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
224         OGHAM = 34,
225         RUNIC = 35,
226         KHMER = 36,
227         MONGOLIAN = 37,
228         LATIN_EXTENDED_ADDITIONAL = 38,
229         GREEK_EXTENDED = 39,
230         GENERAL_PUNCTUATION = 40,
231         SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
232         CURRENCY_SYMBOLS = 42,
233         COMBINING_MARKS_FOR_SYMBOLS = 43,
234         LETTERLIKE_SYMBOLS = 44,
235         NUMBER_FORMS = 45,
236         ARROWS = 46,
237         MATHEMATICAL_OPERATORS = 47,
238         MISCELLANEOUS_TECHNICAL = 48,
239         CONTROL_PICTURES = 49,
240         OPTICAL_CHARACTER_RECOGNITION = 50,
241         ENCLOSED_ALPHANUMERICS = 51,
242         BOX_DRAWING = 52,
243         BLOCK_ELEMENTS = 53,
244         GEOMETRIC_SHAPES = 54,
245         MISCELLANEOUS_SYMBOLS = 55,
246         DINGBATS = 56,
247         BRAILLE_PATTERNS = 57,
248         CJK_RADICALS_SUPPLEMENT = 58,
249         KANGXI_RADICALS = 59,
250         IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
251         CJK_SYMBOLS_AND_PUNCTUATION = 61,
252         HIRAGANA = 62,
253         KATAKANA = 63,
254         BOPOMOFO = 64,
255         HANGUL_COMPATIBILITY_JAMO = 65,
256         KANBUN = 66,
257         BOPOMOFO_EXTENDED = 67,
258         ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
259         CJK_COMPATIBILITY = 69,
260         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
261         CJK_UNIFIED_IDEOGRAPHS = 71,
262         YI_SYLLABLES = 72,
263         YI_RADICALS = 73,
264         HANGUL_SYLLABLES = 74,
265         HIGH_SURROGATES = 75,
266         HIGH_PRIVATE_USE_SURROGATES = 76,
267         LOW_SURROGATES = 77,
268         PRIVATE_USE = 78,
269         CJK_COMPATIBILITY_IDEOGRAPHS = 79,
270         ALPHABETIC_PRESENTATION_FORMS = 80,
271         ARABIC_PRESENTATION_FORMS_A = 81,
272         COMBINING_HALF_MARKS = 82,
273         CJK_COMPATIBILITY_FORMS = 83,
274         SMALL_FORM_VARIANTS = 84,
275         ARABIC_PRESENTATION_FORMS_B = 85,
276         SPECIALS = 86,
277         HALFWIDTH_AND_FULLWIDTH_FORMS = 87;
278 
279     static final byte[] blockToScript = {
280         COMMON_SCRIPT, // 0, <RESERVED_BLOCK>
281         LATIN_SCRIPT, // 1, BASIC_LATIN
282         LATIN_SCRIPT, // 2, LATIN_1_SUPPLEMENT
283         LATIN_SCRIPT, // 3, LATIN_EXTENDED_A
284         LATIN_SCRIPT, // 4, LATIN_EXTENDED_B
285         LATIN_SCRIPT, // 5, IPA_EXTENSIONS
286         COMMON_SCRIPT, // 6, SPACING_MODIFIER_LETTERS
287         COMMON_SCRIPT, // 7, COMBINING_DIACRITICAL_MARKS
288         GREEK_SCRIPT, // 8, GREEK
289         CYRILLIC_SCRIPT, // 9, CYRILLIC
290         ARMENIAN_SCRIPT, // 10, ARMENIAN
291         HEBREW_SCRIPT, // 11, HEBREW
292         ARABIC_SCRIPT, // 12, ARABIC
293         SYRIAC_SCRIPT, // 13, SYRIAC
294         THAANA_SCRIPT, // 14, THAANA
295         DEVANAGARI_SCRIPT, // 15, DEVANAGARI
296         BENGALI_SCRIPT, // 16, BENGALI
297         GURMUKHI_SCRIPT, // 17, GURMUKHI
298         GUJARATI_SCRIPT, // 18, GUJARATI
299         ORIYA_SCRIPT, // 19, ORIYA
300         TAMIL_SCRIPT, // 20, TAMIL
301         TELUGU_SCRIPT, // 21, TELUGU
302         KANNADA_SCRIPT, // 22, KANNADA
303         MALAYALAM_SCRIPT, // 23, MALAYALAM
304         SINHALA_SCRIPT, // 24, SINHALA
305         THAI_SCRIPT, // 25, THAI
306         LAO_SCRIPT, // 26, LAO
307         TIBETAN_SCRIPT, // 27, TIBETAN
308         MYANMAR_SCRIPT, // 28, MYANMAR
309         GEORGIAN_SCRIPT, // 29, GEORGIAN
310         JAMO_SCRIPT, // 30, HANGUL_JAMO
311         ETHIOPIC_SCRIPT, // 31, ETHIOPIC
312         CHEROKEE_SCRIPT, // 32, CHEROKEE
313         ABORIGINAL_SCRIPT, // 33, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
314         OGHAM_SCRIPT, // 34, OGHAM
315         RUNIC_SCRIPT, // 35, RUNIC
316         KHMER_SCRIPT, // 36, KHMER
317         MONGOLIAN_SCRIPT, // 37, MONGOLIAN
318         LATIN_SCRIPT, // 38, LATIN_EXTENDED_ADDITIONAL
319         GREEK_SCRIPT, // 39, GREEK_EXTENDED
320         COMMON_SCRIPT, // 40, GENERAL_PUNCTUATION
321         COMMON_SCRIPT, // 41, SUPERSCRIPTS_AND_SUBSCRIPTS
322         COMMON_SCRIPT, // 42, CURRENCY_SYMBOLS
323         COMMON_SCRIPT, // 43, COMBINING_MARKS_FOR_SYMBOLS
324         COMMON_SCRIPT, // 44, LETTERLIKE_SYMBOLS
325         COMMON_SCRIPT, // 45, NUMBER_FORMS
326         COMMON_SCRIPT, // 46, ARROWS
327         COMMON_SCRIPT, // 47, MATHEMATICAL_OPERATORS
328         COMMON_SCRIPT, // 48, MISCELLANEOUS_TECHNICAL
329         COMMON_SCRIPT, // 49, CONTROL_PICTURES
330         COMMON_SCRIPT, // 50, OPTICAL_CHARACTER_RECOGNITION
331         COMMON_SCRIPT, // 51, ENCLOSED_ALPHANUMERICS
332         COMMON_SCRIPT, // 52, BOX_DRAWING
333         COMMON_SCRIPT, // 53, BLOCK_ELEMENTS
334         COMMON_SCRIPT, // 54, GEOMETRIC_SHAPES
335         COMMON_SCRIPT, // 55, MISCELLANEOUS_SYMBOLS
336         COMMON_SCRIPT, // 56, DINGBATS
337         COMMON_SCRIPT, // 57, BRAILLE_PATTERNS
338         HAN_SCRIPT, // 58, CJK_RADICALS_SUPPLEMENT
339         HAN_SCRIPT, // 59, KANGXI_RADICALS
340         HAN_SCRIPT, // 60, IDEOGRAPHIC_DESCRIPTION_CHARACTERS
341         COMMON_SCRIPT, // 61, CJK_SYMBOLS_AND_PUNCTUATION
342         HIRAGANA_SCRIPT, // 62, HIRAGANA
343         KATAKANA_SCRIPT, // 63, KATAKANA
344         BOPOMOFO_SCRIPT, // 64, BOPOMOFO
345         JAMO_SCRIPT, // 65, HANGUL_COMPATIBILITY_JAMO
346         HAN_SCRIPT, // 66, KANBUN
347         BOPOMOFO_SCRIPT, // 67, BOPOMOFO_EXTENDED
348         COMMON_SCRIPT, // 68, ENCLOSED_CJK_LETTERS_AND_MONTHS
349         COMMON_SCRIPT, // 69, CJK_COMPATIBILITY
350         HAN_SCRIPT, // 70, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
351         HAN_SCRIPT, // 71, CJK_UNIFIED_IDEOGRAPHS
352         YI_SCRIPT, // 72, YI_SYLLABLES
353         YI_SCRIPT, // 73, YI_RADICALS
354         HANGUL_SCRIPT, // 74, HANGUL_SYLLABLES
355         COMMON_SCRIPT, // 75, HIGH_SURROGATES
356         COMMON_SCRIPT, // 76, HIGH_PRIVATE_USE_SURROGATES
357         COMMON_SCRIPT, // 77, LOW_SURROGATES
358         COMMON_SCRIPT, // 78, PRIVATE_USE
359         HAN_SCRIPT, // 79, CJK_COMPATIBILITY_IDEOGRAPHS
360         COMMON_SCRIPT, // 80, ALPHABETIC_PRESENTATION_FORMS
361         ARABIC_SCRIPT, // 81, ARABIC_PRESENTATION_FORMS_A
362         COMMON_SCRIPT, // 82, COMBINING_HALF_MARKS
363         COMMON_SCRIPT, // 83, CJK_COMPATIBILITY_FORMS
364         COMMON_SCRIPT, // 84, SMALL_FORM_VARIANTS
365         ARABIC_SCRIPT, // 85, ARABIC_PRESENTATION_FORMS_B
366         COMMON_SCRIPT, // 86, SPECIALS
367         COMMON_SCRIPT, // 87, HALFWIDTH_AND_FULLWIDTH_FORMS
368         COMMON_SCRIPT, // 88, SPECIALS
369     };
370 
371     // could be further reduced to a byte array, but I didn't bother.
372     static final int[][] split = {
373         {0x0250, 4, 5}, // -1
374         {0x02B0, 5, 6}, // -2
375         {0x0370, 7, 8}, // -3
376         {0x0530, 0, 10}, // -4
377         {0x0590, 10, 11}, // -5
378         {0x0750, 13, 0}, // -6
379         {0x07C0, 14, 0}, // -7
380         {0x10A0, 28, 29}, // -8
381         {0x13A0, 0, 32}, // -9
382         {0x16A0, 34, 35}, // -10
383         {0x18B0, 37, 0}, // -11
384         {0x2070, 40, 41}, // -12
385         {0x20A0, 41, -31}, // -13
386         {0x2150, 44, 45}, // -14
387         {0x2190, 45, 46}, // -15
388         {0x2440, 49, -32}, // -16
389         {0x25A0, 53, 54}, // -17
390         {0x27C0, 56, 0}, // -18
391         {0x2FE0, 59, -33}, // -19
392         {0x3040, 61, 62}, // -20
393         {0x30A0, 62, 63}, // -21
394         {0x3130, 64, 65}, // -22
395         {0x3190, 65, -34}, // -23
396         {0x4DB6, 70, 0}, // -24
397         {0xA490, 72, -35}, // -25
398         {0xD7A4, 74, 0}, // -26
399         {0xFB50, 80, 81}, // -27
400         {0xFE20, 0, -36}, // -28
401         {0xFEFF, 85, 86}, // -29
402         {0xFFF0, 87, -37}, // -30
403         {0x20D0, 42, 43}, // -31
404         {0x2460, 50, 51}, // -32
405         {0x2FF0, 0, 60}, // -33
406         {0x31A0, 66, -38}, // -34
407         {0xA4D0, 73, 0}, //-35
408         {0xFE30, 82, -39}, //-36
409         {0xFFFE, 88, 0}, //-37
410         {0x31C0, 67, 0}, // -38
411         {0xFE50, 83, -40}, //-39
412         {0xFE70, 84, 85} // -40
413     };
414 
415     static final byte[] charToBlock = {
416       1, 2, 3, 4, -1, -2, -3, 8, 9, 9, -4, -5, 12, 12, -6, -7,
417       0, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27,
418       28, -8, 30, 30, 31, 31, 31, -9, 33, 33, 33, 33, 33, -10, 0, 36,
419       37, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 38, 39, 39,
420       -12, -13, -14, -15, 47, 47, 48, 48, -16, 51, 52, -17, 55, 55, 56, -18,
421       57, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, -19,
422       -20, -21, -22, -23, 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
423       70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
424       70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
425       70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -24, 71, 71, 71, 71,
426       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
427       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
428       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
429       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
430       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
431       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
432       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
433       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
434       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
435       71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
436       72, 72, 72, 72, 72, 72, 72, 72, 72, -25, 0, 0, 0, 0, 0, 0,
437       0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74,
438       74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
439       74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
440       74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
441       74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
442       74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -26,
443       75, 75, 75, 75, 75, 75, 75, 76, 77, 77, 77, 77, 77, 77, 77, 77,
444       78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
445       78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
446       78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
447       78, 78, 79, 79, 79, 79, -27, 81, 81, 81, 81, 81, -28, -29, 87, -30
448     };
449     */
450 }
451