• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *****************************************************************
6 * Copyright (c) 2002-2014, International Business Machines Corporation
7 * and others.  All Rights Reserved.
8 *****************************************************************
9 * Date        Name        Description
10 * 06/06/2002  aliu        Creation.
11 *****************************************************************
12 */
13 package ohos.global.icu.text;
14 
15 import java.util.ArrayList;
16 import java.util.Enumeration;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.List;
20 import java.util.MissingResourceException;
21 import java.util.Set;
22 import java.util.concurrent.ConcurrentHashMap;
23 
24 import ohos.global.icu.lang.UScript;
25 /**
26  * A transliterator that translates multiple input scripts to a single
27  * output script.  It is named Any-T or Any-T/V, where T is the target
28  * and V is the optional variant.  The target T is a script.
29  *
30  * <p>An AnyTransliterator partitions text into runs of the same
31  * script, together with adjacent COMMON or INHERITED characters.
32  * After determining the script of each run, it transliterates from
33  * that script to the given target/variant.  It does so by
34  * instantiating a transliterator from the source script to the
35  * target/variant.  If a run consists only of the target script,
36  * COMMON, or INHERITED characters, then the run is not changed.
37  *
38  * <p>At startup, all possible AnyTransliterators are registered with
39  * the system, as determined by examining the registered script
40  * transliterators.
41  *
42  * @author Alan Liu
43  */
44 class AnyTransliterator extends Transliterator {
45 
46     //------------------------------------------------------------
47     // Constants
48 
49     static final char TARGET_SEP = '-';
50     static final char VARIANT_SEP = '/';
51     static final String ANY = "Any";
52     static final String NULL_ID = "Null";
53     static final String LATIN_PIVOT = "-Latin;Latin-";
54 
55     /**
56      * Cache mapping UScriptCode values to Transliterator*.
57      */
58     private ConcurrentHashMap<Integer, Transliterator> cache;
59 
60     /**
61      * The target or target/variant string.
62      */
63     private String target;
64 
65     /**
66      * The target script code.  Never USCRIPT_INVALID_CODE.
67      */
68     private int targetScript;
69 
70     /**
71      * Special code for handling width characters
72      */
73     private Transliterator widthFix = Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd");
74 
75     /**
76      * Implements {@link Transliterator#handleTransliterate}.
77      */
78     @Override
handleTransliterate(Replaceable text, Position pos, boolean isIncremental)79     protected void handleTransliterate(Replaceable text,
80                                        Position pos, boolean isIncremental) {
81         int allStart = pos.start;
82         int allLimit = pos.limit;
83 
84         ScriptRunIterator it =
85             new ScriptRunIterator(text, pos.contextStart, pos.contextLimit);
86 
87         while (it.next()) {
88             // Ignore runs in the ante context
89             if (it.limit <= allStart) continue;
90 
91             // Try to instantiate transliterator from it.scriptCode to
92             // our target or target/variant
93             Transliterator t = getTransliterator(it.scriptCode);
94 
95             if (t == null) {
96                 // We have no transliterator.  Do nothing, but keep
97                 // pos.start up to date.
98                 pos.start = it.limit;
99                 continue;
100             }
101 
102             // If the run end is before the transliteration limit, do
103             // a non-incremental transliteration.  Otherwise do an
104             // incremental one.
105             boolean incremental = isIncremental && (it.limit >= allLimit);
106 
107             pos.start = Math.max(allStart, it.start);
108             pos.limit = Math.min(allLimit, it.limit);
109             int limit = pos.limit;
110             t.filteredTransliterate(text, pos, incremental);
111             int delta = pos.limit - limit;
112             allLimit += delta;
113             it.adjustLimit(delta);
114 
115             // We're done if we enter the post context
116             if (it.limit >= allLimit) break;
117         }
118 
119         // Restore limit.  pos.start is fine where the last transliterator
120         // left it, or at the end of the last run.
121         pos.limit = allLimit;
122     }
123 
124     /**
125      * Private constructor
126      * @param id the ID of the form S-T or S-T/V, where T is theTarget
127      * and V is theVariant.  Must not be empty.
128      * @param theTarget the target name.  Must not be empty, and must
129      * name a script corresponding to theTargetScript.
130      * @param theVariant the variant name, or the empty string if
131      * there is no variant
132      * @param theTargetScript the script code corresponding to
133      * theTarget.
134      */
AnyTransliterator(String id, String theTarget, String theVariant, int theTargetScript)135     private AnyTransliterator(String id,
136                               String theTarget,
137                               String theVariant,
138                               int theTargetScript) {
139         super(id, null);
140         targetScript = theTargetScript;
141         cache = new ConcurrentHashMap<Integer, Transliterator>();
142 
143         target = theTarget;
144         if (theVariant.length() > 0) {
145             target = theTarget + VARIANT_SEP + theVariant;
146         }
147     }
148 
149     /**
150      * @param id the ID of the form S-T or S-T/V, where T is theTarget
151      * and V is theVariant.  Must not be empty.
152      * @param filter The Unicode filter.
153      * @param target2 the target name.
154      * @param targetScript2 the script code corresponding to theTarget.
155      * @param widthFix2 The Transliterator width fix.
156      * @param cache2 The Map object for cache.
157      */
AnyTransliterator(String id, UnicodeFilter filter, String target2, int targetScript2, Transliterator widthFix2, ConcurrentHashMap<Integer, Transliterator> cache2)158     public AnyTransliterator(String id, UnicodeFilter filter, String target2,
159             int targetScript2, Transliterator widthFix2, ConcurrentHashMap<Integer, Transliterator> cache2) {
160         super(id, filter);
161         targetScript = targetScript2;
162         cache = cache2;
163         target = target2;
164     }
165 
166     /**
167      * Returns a transliterator from the given source to our target or
168      * target/variant.  Returns NULL if the source is the same as our
169      * target script, or if the source is USCRIPT_INVALID_CODE.
170      * Caches the result and returns the same transliterator the next
171      * time.  The caller does NOT own the result and must not delete
172      * it.
173      */
getTransliterator(int source)174     private Transliterator getTransliterator(int source) {
175         if (source == targetScript || source == UScript.INVALID_CODE) {
176             if (isWide(targetScript)) {
177                 return null;
178             } else {
179                 return widthFix;
180             }
181         }
182 
183         Integer key = Integer.valueOf(source);
184         Transliterator t = cache.get(key);
185         if (t == null) {
186             String sourceName = UScript.getName(source);
187             String id = sourceName + TARGET_SEP + target;
188 
189             try {
190                 t = Transliterator.getInstance(id, FORWARD);
191             } catch (RuntimeException e) { }
192             if (t == null) {
193 
194                 // Try to pivot around Latin, our most common script
195                 id = sourceName + LATIN_PIVOT + target;
196                 try {
197                     t = Transliterator.getInstance(id, FORWARD);
198                 } catch (RuntimeException e) { }
199             }
200 
201             if (t != null) {
202                 if (!isWide(targetScript)) {
203                     List<Transliterator> v = new ArrayList<Transliterator>();
204                     v.add(widthFix);
205                     v.add(t);
206                     t = new CompoundTransliterator(v);
207                 }
208                 Transliterator prevCachedT = cache.putIfAbsent(key, t);
209                 if (prevCachedT != null) {
210                     t = prevCachedT;
211                 }
212             } else if (!isWide(targetScript)) {
213                 return widthFix;
214             }
215         }
216 
217         return t;
218     }
219 
220     /**
221      * @param targetScript2
222      * @return
223      */
isWide(int script)224     private boolean isWide(int script) {
225         return script == UScript.BOPOMOFO || script == UScript.HAN || script == UScript.HANGUL || script == UScript.HIRAGANA || script == UScript.KATAKANA;
226     }
227 
228     /**
229      * Registers standard transliterators with the system.  Called by
230      * Transliterator during initialization.  Scan all current targets
231      * and register those that are scripts T as Any-T/V.
232      */
register()233     static void register() {
234 
235         HashMap<String, Set<String>> seen = new HashMap<String, Set<String>>(); // old code used set, but was dependent on order
236 
237         for (Enumeration<String> s = Transliterator.getAvailableSources(); s.hasMoreElements(); ) {
238             String source = s.nextElement();
239 
240             // Ignore the "Any" source
241             if (source.equalsIgnoreCase(ANY)) continue;
242 
243             for (Enumeration<String> t = Transliterator.getAvailableTargets(source);
244                  t.hasMoreElements(); ) {
245                 String target = t.nextElement();
246 
247                 // Get the script code for the target.  If not a script, ignore.
248                 int targetScript = scriptNameToCode(target);
249                 if (targetScript == UScript.INVALID_CODE) {
250                     continue;
251                 }
252 
253                 Set<String> seenVariants = seen.get(target);
254                 if (seenVariants == null) {
255                     seen.put(target, seenVariants = new HashSet<String>());
256                 }
257 
258                 for (Enumeration<String> v = Transliterator.getAvailableVariants(source, target);
259                      v.hasMoreElements(); ) {
260                     String variant = v.nextElement();
261 
262                     // Only process each target/variant pair once
263                     if (seenVariants.contains(variant)) {
264                         continue;
265                     }
266                     seenVariants.add(variant);
267 
268                     String id;
269                     id = TransliteratorIDParser.STVtoID(ANY, target, variant);
270                     AnyTransliterator trans = new AnyTransliterator(id, target, variant,
271                                                                     targetScript);
272                     Transliterator.registerInstance(trans);
273                     Transliterator.registerSpecialInverse(target, NULL_ID, false);
274                 }
275             }
276         }
277     }
278 
279     /**
280      * Return the script code for a given name, or
281      * UScript.INVALID_CODE if not found.
282      */
scriptNameToCode(String name)283     private static int scriptNameToCode(String name) {
284         try{
285             int[] codes = UScript.getCode(name);
286             return codes != null ? codes[0] : UScript.INVALID_CODE;
287         }catch( MissingResourceException e){
288             ///CLOVER:OFF
289             return UScript.INVALID_CODE;
290             ///CLOVER:ON
291         }
292     }
293 
294     //------------------------------------------------------------
295     // ScriptRunIterator
296 
297     /**
298      * Returns a series of ranges corresponding to scripts. They will be
299      * of the form:
300      *
301      * ccccSScSSccccTTcTcccc   - c = common, S = first script, T = second
302      * |            |          - first run (start, limit)
303      *          |           |  - second run (start, limit)
304      *
305      * That is, the runs will overlap. The reason for this is so that a
306      * transliterator can consider common characters both before and after
307      * the scripts.
308      */
309     private static class ScriptRunIterator {
310 
311         private Replaceable text;
312         private int textStart;
313         private int textLimit;
314 
315         /**
316          * The code of the current run, valid after next() returns.  May
317          * be UScript.INVALID_CODE if and only if the entire text is
318          * COMMON/INHERITED.
319          */
320         public int scriptCode;
321 
322         /**
323          * The start of the run, inclusive, valid after next() returns.
324          */
325         public int start;
326 
327         /**
328          * The end of the run, exclusive, valid after next() returns.
329          */
330         public int limit;
331 
332         /**
333          * Constructs a run iterator over the given text from start
334          * (inclusive) to limit (exclusive).
335          */
ScriptRunIterator(Replaceable text, int start, int limit)336         public ScriptRunIterator(Replaceable text, int start, int limit) {
337             this.text = text;
338             this.textStart = start;
339             this.textLimit = limit;
340             this.limit = start;
341         }
342 
343 
344         /**
345          * Returns TRUE if there are any more runs.  TRUE is always
346          * returned at least once.  Upon return, the caller should
347          * examine scriptCode, start, and limit.
348          */
next()349         public boolean next() {
350             int ch;
351             int s;
352 
353             scriptCode = UScript.INVALID_CODE; // don't know script yet
354             start = limit;
355 
356             // Are we done?
357             if (start == textLimit) {
358                 return false;
359             }
360 
361             // Move start back to include adjacent COMMON or INHERITED
362             // characters
363             while (start > textStart) {
364                 ch = text.char32At(start - 1); // look back
365                 s = UScript.getScript(ch);
366                 if (s == UScript.COMMON || s == UScript.INHERITED) {
367                     --start;
368                 } else {
369                     break;
370                 }
371             }
372 
373             // Move limit ahead to include COMMON, INHERITED, and characters
374             // of the current script.
375             while (limit < textLimit) {
376                 ch = text.char32At(limit); // look ahead
377                 s = UScript.getScript(ch);
378                 if (s != UScript.COMMON && s != UScript.INHERITED) {
379                     if (scriptCode == UScript.INVALID_CODE) {
380                         scriptCode = s;
381                     } else if (s != scriptCode) {
382                         break;
383                     }
384                 }
385                 ++limit;
386             }
387 
388             // Return TRUE even if the entire text is COMMON / INHERITED, in
389             // which case scriptCode will be UScript.INVALID_CODE.
390             return true;
391         }
392 
393         /**
394          * Adjusts internal indices for a change in the limit index of the
395          * given delta.  A positive delta means the limit has increased.
396          */
adjustLimit(int delta)397         public void adjustLimit(int delta) {
398             limit += delta;
399             textLimit += delta;
400         }
401     }
402 
403     /**
404      * Temporary hack for registry problem. Needs to be replaced by better architecture.
405      */
safeClone()406     public Transliterator safeClone() {
407         UnicodeFilter filter = getFilter();
408         if (filter != null && filter instanceof UnicodeSet) {
409             filter = new UnicodeSet((UnicodeSet)filter);
410         }
411         return new AnyTransliterator(getID(), filter, target, targetScript, widthFix, cache);
412     }
413 
414     /* (non-Javadoc)
415      * @see ohos.global.icu.text.Transliterator#addSourceTargetSet(ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet, ohos.global.icu.text.UnicodeSet)
416      */
417     @Override
addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)418     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
419         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
420         // Assume that it can modify any character to any other character
421         sourceSet.addAll(myFilter);
422         if (myFilter.size() != 0) {
423             targetSet.addAll(0, 0x10FFFF);
424         }
425     }
426 }
427 
428