• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.util.List;
13 
14 import ohos.global.icu.impl.Utility;
15 import ohos.global.icu.impl.UtilityExtensions;
16 
17 /**
18  * A transliterator that is composed of two or more other
19  * transliterator objects linked together.  For example, if one
20  * transliterator transliterates from script A to script B, and
21  * another transliterates from script B to script C, the two may be
22  * combined to form a new transliterator from A to C.
23  *
24  * <p>Composed transliterators may not behave as expected.  For
25  * example, inverses may not combine to form the identity
26  * transliterator.  See the class documentation for {@link
27  * Transliterator} for details.
28  *
29  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
30  *
31  * @author Alan Liu
32  */
33 class CompoundTransliterator extends Transliterator {
34 
35     private Transliterator[] trans;
36 
37     private int numAnonymousRBTs = 0;
38 
39     /**
40      * Constructs a new compound transliterator given an array of
41      * transliterators.  The array of transliterators may be of any
42      * length, including zero or one, however, useful compound
43      * transliterators have at least two components.
44      * @param transliterators array of <code>Transliterator</code>
45      * objects
46      * @param filter the filter.  Any character for which
47      * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
48      * altered by this transliterator.  If <tt>filter</tt> is
49      * <tt>null</tt> then no filtering is applied.
50      */
51     /*public CompoundTransliterator(Transliterator[] transliterators,
52                                   UnicodeFilter filter) {
53         super(joinIDs(transliterators), filter);
54         trans = new Transliterator[transliterators.length];
55         System.arraycopy(transliterators, 0, trans, 0, trans.length);
56         computeMaximumContextLength();
57     }*/
58 
59     /**
60      * Constructs a new compound transliterator given an array of
61      * transliterators.  The array of transliterators may be of any
62      * length, including zero or one, however, useful compound
63      * transliterators have at least two components.
64      * @param transliterators array of <code>Transliterator</code>
65      * objects
66      */
67     /*public CompoundTransliterator(Transliterator[] transliterators) {
68         this(transliterators, null);
69     }*/
70 
71     /**
72      * Constructs a new compound transliterator.
73      * @param ID compound ID
74      * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
75      * @param filter a global filter for this compound transliterator
76      * or null
77      */
78     /*public CompoundTransliterator(String ID, int direction,
79                                   UnicodeFilter filter) {
80         super(ID, filter);
81         init(ID, direction, true);
82     }*/
83 
84     /**
85      * Constructs a new compound transliterator with no filter.
86      * @param ID compound ID
87      * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
88      */
89     /*public CompoundTransliterator(String ID, int direction) {
90         this(ID, direction, null);
91     }*/
92 
93     /**
94      * Constructs a new forward compound transliterator with no filter.
95      * @param ID compound ID
96      */
97     /*public CompoundTransliterator(String ID) {
98         this(ID, FORWARD, null);
99     }*/
100 
101     /**
102      * Package private constructor for Transliterator from a vector of
103      * transliterators.  The caller is responsible for fixing up the
104      * ID.
105      */
CompoundTransliterator(List<Transliterator> list)106     CompoundTransliterator(List<Transliterator> list) {
107         this(list, 0);
108     }
109 
CompoundTransliterator(List<Transliterator> list, int numAnonymousRBTs)110     CompoundTransliterator(List<Transliterator> list, int numAnonymousRBTs) {
111         super("", null);
112         trans = null;
113         init(list, FORWARD, false);
114         this.numAnonymousRBTs = numAnonymousRBTs;
115         // assume caller will fixup ID
116     }
117 
118     /**
119      * Internal method for safeClone...
120      * @param id
121      * @param filter2
122      * @param trans2
123      * @param numAnonymousRBTs2
124      */
CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2)125     CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2) {
126         super(id, filter2);
127         trans = trans2;
128         numAnonymousRBTs = numAnonymousRBTs2;
129     }
130 
131     /**
132      * Finish constructing a transliterator: only to be called by
133      * constructors.  Before calling init(), set trans and filter to NULL.
134      * @param id the id containing ';'-separated entries
135      * @param direction either FORWARD or REVERSE
136      * @param idSplitPoint the index into id at which the
137      * splitTrans should be inserted, if there is one, or
138      * -1 if there is none.
139      * @param splitTrans a transliterator to be inserted
140      * before the entry at offset idSplitPoint in the id string.  May be
141      * NULL to insert no entry.
142      * @param fixReverseID if TRUE, then reconstruct the ID of reverse
143      * entries by calling getID() of component entries.  Some constructors
144      * do not require this because they apply a facade ID anyway.
145      */
146     /*private void init(String id,
147                       int direction,
148                       boolean fixReverseID) {
149         // assert(trans == 0);
150 
151         Vector list = new Vector();
152         UnicodeSet[] compoundFilter = new UnicodeSet[1];
153         StringBuffer regenID = new StringBuffer();
154         if (!TransliteratorIDParser.parseCompoundID(id, direction,
155                  regenID, list, compoundFilter)) {
156             throw new IllegalArgumentException("Invalid ID " + id);
157         }
158 
159         TransliteratorIDParser.instantiateList(list);
160 
161         init(list, direction, fixReverseID);
162 
163         if (compoundFilter[0] != null) {
164             setFilter(compoundFilter[0]);
165         }
166     }*/
167 
168 
169     /**
170      * Finish constructing a transliterator: only to be called by
171      * constructors.  Before calling init(), set trans and filter to NULL.
172      * @param list a vector of transliterator objects to be adopted.  It
173      * should NOT be empty.  The list should be in declared order.  That
174      * is, it should be in the FORWARD order; if direction is REVERSE then
175      * the list order will be reversed.
176      * @param direction either FORWARD or REVERSE
177      * @param fixReverseID if TRUE, then reconstruct the ID of reverse
178      * entries by calling getID() of component entries.  Some constructors
179      * do not require this because they apply a facade ID anyway.
180      */
init(List<Transliterator> list, int direction, boolean fixReverseID)181     private void init(List<Transliterator> list,
182                       int direction,
183                       boolean fixReverseID) {
184         // assert(trans == 0);
185 
186         // Allocate array
187         int count = list.size();
188         trans = new Transliterator[count];
189 
190         // Move the transliterators from the vector into an array.
191         // Reverse the order if necessary.
192         int i;
193         for (i=0; i<count; ++i) {
194             int j = (direction == FORWARD) ? i : count - 1 - i;
195             trans[i] = list.get(j);
196         }
197 
198         // If the direction is UTRANS_REVERSE then we may need to fix the
199         // ID.
200         if (direction == REVERSE && fixReverseID) {
201             StringBuilder newID = new StringBuilder();
202             for (i=0; i<count; ++i) {
203                 if (i > 0) {
204                     newID.append(ID_DELIM);
205                 }
206                 newID.append(trans[i].getID());
207             }
208             setID(newID.toString());
209         }
210 
211         computeMaximumContextLength();
212     }
213 
214     /**
215      * Return the IDs of the given list of transliterators, concatenated
216      * with ';' delimiting them.  Equivalent to the perlish expression
217      * join(';', map($_.getID(), transliterators).
218      */
219     /*private static String joinIDs(Transliterator[] transliterators) {
220         StringBuffer id = new StringBuffer();
221         for (int i=0; i<transliterators.length; ++i) {
222             if (i > 0) {
223                 id.append(';');
224             }
225             id.append(transliterators[i].getID());
226         }
227         return id.toString();
228     }*/
229 
230     /**
231      * Returns the number of transliterators in this chain.
232      * @return number of transliterators in this chain.
233      */
getCount()234     public int getCount() {
235         return trans.length;
236     }
237 
238     /**
239      * Returns the transliterator at the given index in this chain.
240      * @param index index into chain, from 0 to <code>getCount() - 1</code>
241      * @return transliterator at the given index
242      */
getTransliterator(int index)243     public Transliterator getTransliterator(int index) {
244         return trans[index];
245     }
246 
247     /**
248      * Append c to buf, unless buf is empty or buf already ends in c.
249      */
_smartAppend(StringBuilder buf, char c)250     private static void _smartAppend(StringBuilder buf, char c) {
251         if (buf.length() != 0 &&
252             buf.charAt(buf.length() - 1) != c) {
253             buf.append(c);
254         }
255     }
256 
257     /**
258      * Override Transliterator:
259      * Create a rule string that can be passed to createFromRules()
260      * to recreate this transliterator.
261      * @param escapeUnprintable if TRUE then convert unprintable
262      * character to their hex escape representations, \\uxxxx or
263      * \\Uxxxxxxxx.  Unprintable characters are those other than
264      * U+000A, U+0020..U+007E.
265      * @return the rule string
266      */
267     @Override
toRules(boolean escapeUnprintable)268     public String toRules(boolean escapeUnprintable) {
269         // We do NOT call toRules() on our component transliterators, in
270         // general.  If we have several rule-based transliterators, this
271         // yields a concatenation of the rules -- not what we want.  We do
272         // handle compound RBT transliterators specially -- those for which
273         // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
274         // we do call toRules() recursively.
275         StringBuilder rulesSource = new StringBuilder();
276         if (numAnonymousRBTs >= 1 && getFilter() != null) {
277             // If we are a compound RBT and if we have a global
278             // filter, then emit it at the top.
279             rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
280         }
281         for (int i=0; i<trans.length; ++i) {
282             String rule;
283 
284             // Anonymous RuleBasedTransliterators (inline rules and
285             // ::BEGIN/::END blocks) are given IDs that begin with
286             // "%Pass": use toRules() to write all the rules to the output
287             // (and insert "::Null;" if we have two in a row)
288             if (trans[i].getID().startsWith("%Pass")) {
289                 rule = trans[i].toRules(escapeUnprintable);
290                 if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1].getID().startsWith("%Pass"))
291                     rule = "::Null;" + rule;
292 
293             // we also use toRules() on CompoundTransliterators (which we
294             // check for by looking for a semicolon in the ID)-- this gets
295             // the list of their child transliterators output in the right
296             // format
297             } else if (trans[i].getID().indexOf(';') >= 0) {
298                 rule = trans[i].toRules(escapeUnprintable);
299 
300             // for everything else, use baseToRules()
301             } else {
302                 rule = trans[i].baseToRules(escapeUnprintable);
303             }
304             _smartAppend(rulesSource, '\n');
305             rulesSource.append(rule);
306             _smartAppend(rulesSource, ID_DELIM);
307         }
308         return rulesSource.toString();
309     }
310 
311     /**
312      * @hide draft / provisional / internal are hidden on OHOS
313      */
314     @Override
addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)315     public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
316         UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter));
317         UnicodeSet tempTargetSet = new UnicodeSet();
318         for (int i=0; i<trans.length; ++i) {
319             // each time we produce targets, those can be used by subsequent items, despite the filter.
320             // so we get just those items, and add them to the filter each time.
321             tempTargetSet.clear();
322             trans[i].addSourceTargetSet(myFilter, sourceSet, tempTargetSet);
323             targetSet.addAll(tempTargetSet);
324             myFilter.addAll(tempTargetSet);
325         }
326     }
327 
328 //    /**
329 //     * Returns the set of all characters that may be generated as
330 //     * replacement text by this transliterator.
331 //     */
332 //    public UnicodeSet getTargetSet() {
333 //        UnicodeSet set = new UnicodeSet();
334 //        for (int i=0; i<trans.length; ++i) {
335 //            // This is a heuristic, and not 100% reliable.
336 //            set.addAll(trans[i].getTargetSet());
337 //        }
338 //        return set;
339 //    }
340 
341     /**
342      * Implements {@link Transliterator#handleTransliterate}.
343      */
344     @Override
handleTransliterate(Replaceable text, Position index, boolean incremental)345     protected void handleTransliterate(Replaceable text,
346                                        Position index, boolean incremental) {
347         /* Call each transliterator with the same start value and
348          * initial cursor index, but with the limit index as modified
349          * by preceding transliterators.  The cursor index must be
350          * reset for each transliterator to give each a chance to
351          * transliterate the text.  The initial cursor index is known
352          * to still point to the same place after each transliterator
353          * is called because each transliterator will not change the
354          * text between start and the initial value of cursor.
355          *
356          * IMPORTANT: After the first transliterator, each subsequent
357          * transliterator only gets to transliterate text committed by
358          * preceding transliterators; that is, the cursor (output
359          * value) of transliterator i becomes the limit (input value)
360          * of transliterator i+1.  Finally, the overall limit is fixed
361          * up before we return.
362          *
363          * Assumptions we make here:
364          * (1) contextStart <= start <= limit <= contextLimit <= text.length()
365          * (2) start <= start' <= limit'  ;cursor doesn't move back
366          * (3) start <= limit'            ;text before cursor unchanged
367          * - start' is the value of start after calling handleKT
368          * - limit' is the value of limit after calling handleKT
369          */
370 
371         /**
372          * Example: 3 transliterators.  This example illustrates the
373          * mechanics we need to implement.  C, S, and L are the contextStart,
374          * start, and limit.  gl is the globalLimit.  contextLimit is
375          * equal to limit throughout.
376          *
377          * 1. h-u, changes hex to Unicode
378          *
379          *    4  7  a  d  0      4  7  a
380          *    abc/u0061/u    =>  abca/u
381          *    C  S       L       C   S L   gl=f->a
382          *
383          * 2. upup, changes "x" to "XX"
384          *
385          *    4  7  a       4  7  a
386          *    abca/u    =>  abcAA/u
387          *    C  SL         C    S
388          *                       L    gl=a->b
389          * 3. u-h, changes Unicode to hex
390          *
391          *    4  7  a        4  7  a  d  0  3
392          *    abcAA/u    =>  abc/u0041/u0041/u
393          *    C  S L         C              S
394          *                                  L   gl=b->15
395          * 4. return
396          *
397          *    4  7  a  d  0  3
398          *    abc/u0041/u0041/u
399          *    C S L
400          */
401 
402         if (trans.length < 1) {
403             index.start = index.limit;
404             return; // Short circuit for empty compound transliterators
405         }
406 
407         // compoundLimit is the limit value for the entire compound
408         // operation.  We overwrite index.limit with the previous
409         // index.start.  After each transliteration, we update
410         // compoundLimit for insertions or deletions that have happened.
411         int compoundLimit = index.limit;
412 
413         // compoundStart is the start for the entire compound
414         // operation.
415         int compoundStart = index.start;
416 
417         int delta = 0; // delta in length
418 
419         StringBuffer log = null;
420         ///CLOVER:OFF
421         if (DEBUG) {
422             log = new StringBuffer("CompoundTransliterator{" + getID() +
423                                    (incremental ? "}i: IN=" : "}: IN="));
424             UtilityExtensions.formatInput(log, text, index);
425             System.out.println(Utility.escape(log.toString()));
426         }
427         ///CLOVER:ON
428 
429         // Give each transliterator a crack at the run of characters.
430         // See comments at the top of the method for more detail.
431         for (int i=0; i<trans.length; ++i) {
432             index.start = compoundStart; // Reset start
433             int limit = index.limit;
434 
435             if (index.start == index.limit) {
436                 // Short circuit for empty range
437                 ///CLOVER:OFF
438                 if (DEBUG) {
439                     System.out.println("CompoundTransliterator[" + i +
440                                        ".." + (trans.length-1) +
441                                        (incremental ? "]i: " : "]: ") +
442                                        UtilityExtensions.formatInput(text, index) +
443                                        " (NOTHING TO DO)");
444                 }
445                 ///CLOVER:ON
446                 break;
447             }
448 
449             ///CLOVER:OFF
450             if (DEBUG) {
451                 log.setLength(0);
452                 log.append("CompoundTransliterator[" + i + "=" +
453                            trans[i].getID() +
454                            (incremental ? "]i: " : "]: "));
455                 UtilityExtensions.formatInput(log, text, index);
456             }
457             ///CLOVER:ON
458 
459             trans[i].filteredTransliterate(text, index, incremental);
460 
461             // In a properly written transliterator, start == limit after
462             // handleTransliterate() returns when incremental is false.
463             // Catch cases where the subclass doesn't do this, and throw
464             // an exception.  (Just pinning start to limit is a bad idea,
465             // because what's probably happening is that the subclass
466             // isn't transliterating all the way to the end, and it should
467             // in non-incremental mode.)
468             if (!incremental && index.start != index.limit) {
469                 throw new RuntimeException("ERROR: Incomplete non-incremental transliteration by " + trans[i].getID());
470             }
471 
472             ///CLOVER:OFF
473             if (DEBUG) {
474                 log.append(" => ");
475                 UtilityExtensions.formatInput(log, text, index);
476                 System.out.println(Utility.escape(log.toString()));
477             }
478             ///CLOVER:ON
479 
480             // Cumulative delta for insertions/deletions
481             delta += index.limit - limit;
482 
483             if (incremental) {
484                 // In the incremental case, only allow subsequent
485                 // transliterators to modify what has already been
486                 // completely processed by prior transliterators.  In the
487                 // non-incrmental case, allow each transliterator to
488                 // process the entire text.
489                 index.limit = index.start;
490             }
491         }
492 
493         compoundLimit += delta;
494 
495         // Start is good where it is -- where the last transliterator left
496         // it.  Limit needs to be put back where it was, modulo
497         // adjustments for deletions/insertions.
498         index.limit = compoundLimit;
499 
500         ///CLOVER:OFF
501         if (DEBUG) {
502             log.setLength(0);
503             log.append("CompoundTransliterator{" + getID() +
504                        (incremental ? "}i: OUT=" : "}: OUT="));
505             UtilityExtensions.formatInput(log, text, index);
506             System.out.println(Utility.escape(log.toString()));
507         }
508         ///CLOVER:ON
509     }
510 
511     /**
512      * Compute and set the length of the longest context required by this transliterator.
513      * This is <em>preceding</em> context.
514      */
computeMaximumContextLength()515     private void computeMaximumContextLength() {
516         int max = 0;
517         for (int i=0; i<trans.length; ++i) {
518             int len = trans[i].getMaximumContextLength();
519             if (len > max) {
520                 max = len;
521             }
522         }
523         setMaximumContextLength(max);
524     }
525 
526     /**
527      * Temporary hack for registry problem. Needs to be replaced by better architecture.
528      */
safeClone()529     public Transliterator safeClone() {
530         UnicodeFilter filter = getFilter();
531         if (filter != null && filter instanceof UnicodeSet) {
532             filter = new UnicodeSet((UnicodeSet)filter);
533         }
534         return new CompoundTransliterator(getID(), filter, trans, numAnonymousRBTs);
535     }
536 }
537