• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 **********************************************************************
6 *   Copyright (c) 2001-2011, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 *   Date        Name        Description
10 *   11/19/2001  aliu        Creation.
11 **********************************************************************
12 */
13 package ohos.global.icu.text;
14 import ohos.global.icu.impl.Utility;
15 
16 /**
17  * A transliterator that converts Unicode characters to an escape
18  * form.  Examples of escape forms are "U+4E01" and "".
19  * Escape forms have a prefix and suffix, either of which may be
20  * empty, a radix, typically 16 or 10, a minimum digit count,
21  * typically 1, 4, or 8, and a boolean that specifies whether
22  * supplemental characters are handled as 32-bit code points or as two
23  * 16-bit code units.  Most escape forms handle 32-bit code points,
24  * but some, such as the Java form, intentionally break them into two
25  * surrogate pairs, for backward compatibility.
26  *
27  * <p>Some escape forms actually have two different patterns, one for
28  * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
29  * handle this, a second EscapeTransliterator may be defined that
30  * specifies the pattern to be produced for supplementals.  An example
31  * of a form that requires this is the C form, which uses "\\uFFFF"
32  * for BMP characters and "\\U0010FFFF" for supplementals.
33  *
34  * <p>This class is package private.  It registers several standard
35  * variants with the system which are then accessed via their IDs.
36  *
37  * @author Alan Liu
38  */
39 class EscapeTransliterator extends Transliterator {
40 
41     /**
42      * The prefix of the escape form; may be empty, but usually isn't.
43      * May not be null.
44      */
45     private String prefix;
46 
47     /**
48      * The prefix of the escape form; often empty.  May not be null.
49      */
50     private String suffix;
51 
52     /**
53      * The radix to display the number in.  Typically 16 or 10.  Must
54      * be in the range 2 to 36.
55      */
56     private int radix;
57 
58     /**
59      * The minimum number of digits.  Typically 1, 4, or 8.  Values
60      * less than 1 are equivalent to 1.
61      */
62     private int minDigits;
63 
64     /**
65      * If true, supplementals are handled as 32-bit code points.  If
66      * false, they are handled as two 16-bit code units.
67      */
68     private boolean grokSupplementals;
69 
70     /**
71      * The form to be used for supplementals.  If this is null then
72      * the same form is used for BMP characters and supplementals.  If
73      * this is not null and if grokSupplementals is true then the
74      * prefix, suffix, radix, and minDigits of this object are used
75      * for supplementals.
76      */
77     private EscapeTransliterator supplementalHandler;
78 
79     /**
80      * Registers standard variants with the system.  Called by
81      * Transliterator during initialization.
82      */
register()83     static void register() {
84         // Unicode: "U+10FFFF" hex, min=4, max=6
85         Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() {
86             @Override
87             public Transliterator getInstance(String ID) {
88                 return new EscapeTransliterator("Any-Hex/Unicode",
89                                                 "U+", "", 16, 4, true, null);
90             }
91         });
92 
93         // Java: "\\uFFFF" hex, min=4, max=4
94         Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() {
95             @Override
96             public Transliterator getInstance(String ID) {
97                 return new EscapeTransliterator("Any-Hex/Java",
98                                                 "\\u", "", 16, 4, false, null);
99             }
100         });
101 
102         // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
103         Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() {
104             @Override
105             public Transliterator getInstance(String ID) {
106                 return new EscapeTransliterator("Any-Hex/C",
107                                                 "\\u", "", 16, 4, true,
108                        new EscapeTransliterator("", "\\U", "", 16, 8, true, null));
109             }
110         });
111 
112         // XML: "&#x10FFFF;" hex, min=1, max=6
113         Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() {
114             @Override
115             public Transliterator getInstance(String ID) {
116                 return new EscapeTransliterator("Any-Hex/XML",
117                                                 "&#x", ";", 16, 1, true, null);
118             }
119         });
120 
121         // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
122         Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() {
123             @Override
124             public Transliterator getInstance(String ID) {
125                 return new EscapeTransliterator("Any-Hex/XML10",
126                                                 "&#", ";", 10, 1, true, null);
127             }
128         });
129 
130         // Perl: "\\x{263A}" hex, min=1, max=6
131         Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() {
132             @Override
133             public Transliterator getInstance(String ID) {
134                 return new EscapeTransliterator("Any-Hex/Perl",
135                                                 "\\x{", "}", 16, 1, true, null);
136             }
137         });
138 
139         // Plain: "FFFF" hex, min=4, max=6
140         Transliterator.registerFactory("Any-Hex/Plain", new Transliterator.Factory() {
141             @Override
142             public Transliterator getInstance(String ID) {
143                 return new EscapeTransliterator("Any-Hex/Plain",
144                                                 "", "", 16, 4, true, null);
145             }
146         });
147 
148         // Generic
149         Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() {
150             @Override
151             public Transliterator getInstance(String ID) {
152                 return new EscapeTransliterator("Any-Hex",
153                                                 "\\u", "", 16, 4, false, null);
154             }
155         });
156     }
157 
158     /**
159      * Constructs an escape transliterator with the given ID and
160      * parameters.  See the class member documentation for details.
161      */
EscapeTransliterator(String ID, String prefix, String suffix, int radix, int minDigits, boolean grokSupplementals, EscapeTransliterator supplementalHandler)162     EscapeTransliterator(String ID, String prefix, String suffix,
163                          int radix, int minDigits,
164                          boolean grokSupplementals,
165                          EscapeTransliterator supplementalHandler) {
166         super(ID, null);
167         this.prefix = prefix;
168         this.suffix = suffix;
169         this.radix = radix;
170         this.minDigits = minDigits;
171         this.grokSupplementals = grokSupplementals;
172         this.supplementalHandler = supplementalHandler;
173     }
174 
175     /**
176      * Implements {@link Transliterator#handleTransliterate}.
177      */
178     @Override
handleTransliterate(Replaceable text, Position pos, boolean incremental)179     protected void handleTransliterate(Replaceable text,
180                                        Position pos, boolean incremental) {
181         int start = pos.start;
182         int limit = pos.limit;
183 
184         StringBuilder buf = new StringBuilder(prefix);
185         int prefixLen = prefix.length();
186         boolean redoPrefix = false;
187 
188         while (start < limit) {
189             int c = grokSupplementals ? text.char32At(start) : text.charAt(start);
190             int charLen = grokSupplementals ? UTF16.getCharCount(c) : 1;
191 
192             if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) {
193                 buf.setLength(0);
194                 buf.append(supplementalHandler.prefix);
195                 Utility.appendNumber(buf, c, supplementalHandler.radix,
196                                      supplementalHandler.minDigits);
197                 buf.append(supplementalHandler.suffix);
198                 redoPrefix = true;
199             } else {
200                 if (redoPrefix) {
201                     buf.setLength(0);
202                     buf.append(prefix);
203                     redoPrefix = false;
204                 } else {
205                     buf.setLength(prefixLen);
206                 }
207                 Utility.appendNumber(buf, c, radix, minDigits);
208                 buf.append(suffix);
209             }
210 
211             text.replace(start, start + charLen, buf.toString());
212             start += buf.length();
213             limit += buf.length() - charLen;
214         }
215 
216         pos.contextLimit += limit - pos.limit;
217         pos.limit = limit;
218         pos.start = start;
219     }
220 
221     /* (non-Javadoc)
222      * @see ohos.global.icu.text.Transliterator#addSourceTargetSet(ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet)
223      */
224     @Override
addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)225     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
226         sourceSet.addAll(getFilterAsUnicodeSet(inputFilter));
227         for (EscapeTransliterator it = this; it != null ; it = it.supplementalHandler) {
228             if (inputFilter.size() != 0) {
229                 targetSet.addAll(it.prefix);
230                 targetSet.addAll(it.suffix);
231                 StringBuilder buffer = new StringBuilder();
232                 for (int i = 0; i < it.radix; ++i) {
233                     Utility.appendNumber(buffer, i, it.radix, it.minDigits);
234                 }
235                 targetSet.addAll(buffer.toString()); // TODO drop once String is changed to CharSequence in UnicodeSet
236             }
237         }
238     }
239 }
240