• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 **********************************************************************
6 *   Copyright (c) 2001-2011, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 *   Date        Name        Description
10 *   11/19/2001  aliu        Creation.
11 **********************************************************************
12 */
13 package ohos.global.icu.text;
14 import ohos.global.icu.impl.Utility;
15 import ohos.global.icu.lang.UCharacter;
16 
17 /**
18  * A transliterator that converts Unicode escape forms to the
19  * characters they represent.  Escape forms have a prefix, a suffix, a
20  * radix, and minimum and maximum digit counts.
21  *
22  * <p>This class is package private.  It registers several standard
23  * variants with the system which are then accessed via their IDs.
24  *
25  * @author Alan Liu
26  */
27 class UnescapeTransliterator extends Transliterator {
28 
29     /**
30      * The encoded pattern specification.  The pattern consists of
31      * zero or more forms.  Each form consists of a prefix, suffix,
32      * radix, minimum digit count, and maximum digit count.  These
33      * values are stored as a five character header.  That is, their
34      * numeric values are cast to 16-bit characters and stored in the
35      * string.  Following these five characters, the prefix
36      * characters, then suffix characters are stored.  Each form thus
37      * takes n+5 characters, where n is the total length of the prefix
38      * and suffix.  The end is marked by a header of length one
39      * consisting of the character END.
40      */
41     private char spec[];
42 
43     /**
44      * Special character marking the end of the spec[] array.
45      */
46     private static final char END = 0xFFFF;
47 
48     /**
49      * Registers standard variants with the system.  Called by
50      * Transliterator during initialization.
51      */
register()52     static void register() {
53         // Unicode: "U+10FFFF" hex, min=4, max=6
54         Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() {
55             @Override
56             public Transliterator getInstance(String ID) {
57                 return new UnescapeTransliterator("Hex-Any/Unicode", new char[] {
58                     2, 0, 16, 4, 6, 'U', '+',
59                     END
60                 });
61             }
62         });
63 
64         // Java: "\\uFFFF" hex, min=4, max=4
65         Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() {
66             @Override
67             public Transliterator getInstance(String ID) {
68                 return new UnescapeTransliterator("Hex-Any/Java", new char[] {
69                     2, 0, 16, 4, 4, '\\', 'u',
70                     END
71                 });
72             }
73         });
74 
75         // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
76         Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() {
77             @Override
78             public Transliterator getInstance(String ID) {
79                 return new UnescapeTransliterator("Hex-Any/C", new char[] {
80                     2, 0, 16, 4, 4, '\\', 'u',
81                     2, 0, 16, 8, 8, '\\', 'U',
82                     END
83                 });
84             }
85         });
86 
87         // XML: "&#x10FFFF;" hex, min=1, max=6
88         Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() {
89             @Override
90             public Transliterator getInstance(String ID) {
91                 return new UnescapeTransliterator("Hex-Any/XML", new char[] {
92                     3, 1, 16, 1, 6, '&', '#', 'x', ';',
93                     END
94                 });
95             }
96         });
97 
98         // XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any")
99         Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() {
100             @Override
101             public Transliterator getInstance(String ID) {
102                 return new UnescapeTransliterator("Hex-Any/XML10", new char[] {
103                     2, 1, 10, 1, 7, '&', '#', ';',
104                     END
105                 });
106             }
107         });
108 
109         // Perl: "\\x{263A}" hex, min=1, max=6
110         Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() {
111             @Override
112             public Transliterator getInstance(String ID) {
113                 return new UnescapeTransliterator("Hex-Any/Perl", new char[] {
114                     3, 1, 16, 1, 6, '\\', 'x', '{', '}',
115                     END
116                 });
117             }
118         });
119 
120         // All: Java, C, Perl, XML, XML10, Unicode
121         Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() {
122             @Override
123             public Transliterator getInstance(String ID) {
124                 return new UnescapeTransliterator("Hex-Any", new char[] {
125                     2, 0, 16, 4, 6, 'U', '+',            // Unicode
126                     2, 0, 16, 4, 4, '\\', 'u',           // Java
127                     2, 0, 16, 8, 8, '\\', 'U',           // C (surrogates)
128                     3, 1, 16, 1, 6, '&', '#', 'x', ';',  // XML
129                     2, 1, 10, 1, 7, '&', '#', ';',       // XML10
130                     3, 1, 16, 1, 6, '\\', 'x', '{', '}', // Perl
131                     END
132                 });
133             }
134         });
135     }
136 
137     /**
138      * Package private constructor.  Takes the encoded spec array.
139      */
UnescapeTransliterator(String ID, char spec[])140     UnescapeTransliterator(String ID, char spec[]) {
141         super(ID, null);
142         this.spec = spec;
143     }
144 
145     /**
146      * Implements {@link Transliterator#handleTransliterate}.
147      */
148     @Override
handleTransliterate(Replaceable text, Position pos, boolean isIncremental)149     protected void handleTransliterate(Replaceable text,
150                                        Position pos, boolean isIncremental) {
151         int start = pos.start;
152         int limit = pos.limit;
153         int i, ipat;
154 
155       loop:
156         while (start < limit) {
157             // Loop over the forms in spec[].  Exit this loop when we
158             // match one of the specs.  Exit the outer loop if a
159             // partial match is detected and isIncremental is true.
160             for (ipat = 0; spec[ipat] != END;) {
161 
162                 // Read the header
163                 int prefixLen = spec[ipat++];
164                 int suffixLen = spec[ipat++];
165                 int radix     = spec[ipat++];
166                 int minDigits = spec[ipat++];
167                 int maxDigits = spec[ipat++];
168 
169                 // s is a copy of start that is advanced over the
170                 // characters as we parse them.
171                 int s = start;
172                 boolean match = true;
173 
174                 for (i=0; i<prefixLen; ++i) {
175                     if (s >= limit) {
176                         if (i > 0) {
177                             // We've already matched a character.  This is
178                             // a partial match, so we return if in
179                             // incremental mode.  In non-incremental mode,
180                             // go to the next spec.
181                             if (isIncremental) {
182                                 break loop;
183                             }
184                             match = false;
185                             break;
186                         }
187                     }
188                     char c = text.charAt(s++);
189                     if (c != spec[ipat + i]) {
190                         match = false;
191                         break;
192                     }
193                 }
194 
195                 if (match) {
196                     int u = 0;
197                     int digitCount = 0;
198                     for (;;) {
199                         if (s >= limit) {
200                             // Check for partial match in incremental mode.
201                             if (s > start && isIncremental) {
202                                 break loop;
203                             }
204                             break;
205                         }
206                         int ch = text.char32At(s);
207                         int digit = UCharacter.digit(ch, radix);
208                         if (digit < 0) {
209                             break;
210                         }
211                         s += UTF16.getCharCount(ch);
212                         u = (u * radix) + digit;
213                         if (++digitCount == maxDigits) {
214                             break;
215                         }
216                     }
217 
218                     match = (digitCount >= minDigits);
219 
220                     if (match) {
221                         for (i=0; i<suffixLen; ++i) {
222                             if (s >= limit) {
223                                 // Check for partial match in incremental mode.
224                                 if (s > start && isIncremental) {
225                                     break loop;
226                                 }
227                                 match = false;
228                                 break;
229                             }
230                             char c = text.charAt(s++);
231                             if (c != spec[ipat + prefixLen + i]) {
232                                 match = false;
233                                 break;
234                             }
235                         }
236 
237                         if (match) {
238                             // At this point, we have a match
239                             String str = UTF16.valueOf(u);
240                             text.replace(start, s, str);
241                             limit -= s - start - str.length();
242                             // The following break statement leaves the
243                             // loop that is traversing the forms in
244                             // spec[].  We then parse the next input
245                             // character.
246                             break;
247                         }
248                     }
249                 }
250 
251                 ipat += prefixLen + suffixLen;
252             }
253 
254             if (start < limit) {
255                 start += UTF16.getCharCount(text.char32At(start));
256             }
257         }
258 
259         pos.contextLimit += limit - pos.limit;
260         pos.limit = limit;
261         pos.start = start;
262     }
263 
264     /* (non-Javadoc)
265      * @see ohos.global.icu.text.Transliterator#addSourceTargetSet(ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet)
266      */
267     @Override
addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)268     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
269         // Each form consists of a prefix, suffix,
270         // * radix, minimum digit count, and maximum digit count.  These
271         // * values are stored as a five character header. ...
272         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
273         UnicodeSet items = new UnicodeSet();
274         StringBuilder buffer = new StringBuilder();
275         for (int i = 0; spec[i] != END;) {
276             // first 5 items are header
277             int end = i + spec[i] + spec[i+1] + 5;
278             int radix = spec[i+2];
279             for (int j = 0; j < radix; ++j) {
280                 Utility.appendNumber(buffer, j, radix, 0);
281             }
282             // then add the characters
283             for (int j = i + 5; j < end; ++j) {
284                 items.add(spec[j]);
285             }
286             // and go to next block
287             i = end;
288         }
289         items.addAll(buffer.toString());
290         items.retainAll(myFilter);
291 
292         if (items.size() > 0) {
293             sourceSet.addAll(items);
294             targetSet.addAll(0,0x10FFFF); // assume we can produce any character
295         }
296     }
297 }
298