• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.util.HashMap;
13 import java.util.Map;
14 
15 /**
16  * <code>RuleBasedTransliterator</code> is a transliterator
17  * built from a set of rules as defined for
18  * {@link Transliterator#createFromRules(String, String, int)}.
19  * See the class {@link Transliterator} documentation for the rule syntax.
20  *
21  * @author Alan Liu
22  * @deprecated This API is ICU internal only.
23  * @hide exposed on OHOS
24  * @hide draft / provisional / internal are hidden on OHOS
25  */
26 @Deprecated
27 public class RuleBasedTransliterator extends Transliterator {
28 
29     private final Data data;
30 
31 //    /**
32 //     * Constructs a new transliterator from the given rules.
33 //     * @param rules rules, separated by ';'
34 //     * @param direction either FORWARD or REVERSE.
35 //     * @exception IllegalArgumentException if rules are malformed
36 //     * or direction is invalid.
37 //     */
38 //     public RuleBasedTransliterator(String ID, String rules, int direction,
39 //                                   UnicodeFilter filter) {
40 //        super(ID, filter);
41 //        if (direction != FORWARD && direction != REVERSE) {
42 //            throw new IllegalArgumentException("Invalid direction");
43 //        }
44 //
45 //        TransliteratorParser parser = new TransliteratorParser();
46 //        parser.parse(rules, direction);
47 //        if (parser.idBlockVector.size() != 0 ||
48 //            parser.compoundFilter != null) {
49 //            throw new IllegalArgumentException("::ID blocks illegal in RuleBasedTransliterator constructor");
50 //        }
51 //
52 //        data = (Data)parser.dataVector.get(0);
53 //        setMaximumContextLength(data.ruleSet.getMaximumContextLength());
54 //     }
55 
56 //    /**
57 //     * Constructs a new transliterator from the given rules in the
58 //     * <code>FORWARD</code> direction.
59 //     * @param rules rules, separated by ';'
60 //     * @exception IllegalArgumentException if rules are malformed
61 //     * or direction is invalid.
62 //     */
63 //    public RuleBasedTransliterator(String ID, String rules) {
64 //        this(ID, rules, FORWARD, null);
65 //    }
66 
RuleBasedTransliterator(String ID, Data data, UnicodeFilter filter)67     RuleBasedTransliterator(String ID, Data data, UnicodeFilter filter) {
68         super(ID, filter);
69         this.data = data;
70         setMaximumContextLength(data.ruleSet.getMaximumContextLength());
71     }
72 
73     /**
74      * Implements {@link Transliterator#handleTransliterate}.
75      * @deprecated This API is ICU internal only.
76      * @hide draft / provisional / internal are hidden on OHOS
77      */
78     @Override
79     @Deprecated
handleTransliterate(Replaceable text, Position index, boolean incremental)80     protected void handleTransliterate(Replaceable text,
81                                        Position index, boolean incremental) {
82         /* We keep start and limit fixed the entire time,
83          * relative to the text -- limit may move numerically if text is
84          * inserted or removed.  The cursor moves from start to limit, with
85          * replacements happening under it.
86          *
87          * Example: rules 1. ab>x|y
88          *                2. yc>z
89          *
90          * |eabcd   start - no match, advance cursor
91          * e|abcd   match rule 1 - change text & adjust cursor
92          * ex|ycd   match rule 2 - change text & adjust cursor
93          * exz|d    no match, advance cursor
94          * exzd|    done
95          */
96 
97         /* A rule like
98          *   a>b|a
99          * creates an infinite loop. To prevent that, we put an arbitrary
100          * limit on the number of iterations that we take, one that is
101          * high enough that any reasonable rules are ok, but low enough to
102          * prevent a server from hanging.  The limit is 16 times the
103          * number of characters n, unless n is so large that 16n exceeds a
104          * uint32_t.
105          */
106         synchronized(data)  {
107             int loopCount = 0;
108             int loopLimit = (index.limit - index.start) << 4;
109             if (loopLimit < 0) {
110                 loopLimit = 0x7FFFFFFF;
111             }
112 
113             while (index.start < index.limit &&
114                     loopCount <= loopLimit &&
115                     data.ruleSet.transliterate(text, index, incremental)) {
116                 ++loopCount;
117             }
118         }
119     }
120 
121 
122     static class Data {
Data()123         public Data() {
124             variableNames = new HashMap<>();
125             ruleSet = new TransliterationRuleSet();
126         }
127 
128         /**
129          * Rule table.  May be empty.
130          */
131         public TransliterationRuleSet ruleSet;
132 
133         /**
134          * Map variable name (String) to variable (char[]).  A variable name
135          * corresponds to zero or more characters, stored in a char[] array in
136          * this hash.  One or more of these chars may also correspond to a
137          * UnicodeSet, in which case the character in the char[] in this hash is
138          * a stand-in: it is an index for a secondary lookup in
139          * data.variables.  The stand-in also represents the UnicodeSet in
140          * the stored rules.
141          */
142         Map<String, char[]> variableNames;
143 
144         /**
145          * Map category variable (Character) to UnicodeMatcher or UnicodeReplacer.
146          * Variables that correspond to a set of characters are mapped
147          * from variable name to a stand-in character in data.variableNames.
148          * The stand-in then serves as a key in this hash to lookup the
149          * actual UnicodeSet object.  In addition, the stand-in is
150          * stored in the rule text to represent the set of characters.
151          * variables[i] represents character (variablesBase + i).
152          */
153         Object[] variables;
154 
155         /**
156          * The character that represents variables[0].  Characters
157          * variablesBase through variablesBase +
158          * variables.length - 1 represent UnicodeSet objects.
159          */
160         char variablesBase;
161 
162         /**
163          * Return the UnicodeMatcher represented by the given character, or
164          * null if none.
165          */
lookupMatcher(int standIn)166         public UnicodeMatcher lookupMatcher(int standIn) {
167             int i = standIn - variablesBase;
168             return (i >= 0 && i < variables.length)
169                 ? (UnicodeMatcher) variables[i] : null;
170         }
171 
172         /**
173          * Return the UnicodeReplacer represented by the given character, or
174          * null if none.
175          */
lookupReplacer(int standIn)176         public UnicodeReplacer lookupReplacer(int standIn) {
177             int i = standIn - variablesBase;
178             return (i >= 0 && i < variables.length)
179                 ? (UnicodeReplacer) variables[i] : null;
180         }
181     }
182 
183 
184     /**
185      * Return a representation of this transliterator as source rules.
186      * These rules will produce an equivalent transliterator if used
187      * to construct a new transliterator.
188      * @param escapeUnprintable if TRUE then convert unprintable
189      * character to their hex escape representations, \\uxxxx or
190      * \\Uxxxxxxxx.  Unprintable characters are those other than
191      * U+000A, U+0020..U+007E.
192      * @return rules string
193      * @deprecated This API is ICU internal only.
194      * @hide draft / provisional / internal are hidden on OHOS
195      */
196     @Override
197     @Deprecated
toRules(boolean escapeUnprintable)198     public String toRules(boolean escapeUnprintable) {
199         return data.ruleSet.toRules(escapeUnprintable);
200     }
201 
202 //    /**
203 //     * Return the set of all characters that may be modified by this
204 //     * Transliterator, ignoring the effect of our filter.
205 //     */
206 //    protected UnicodeSet handleGetSourceSet() {
207 //        return data.ruleSet.getSourceTargetSet(false, unicodeFilter);
208 //    }
209 //
210 //    /**
211 //     * Returns the set of all characters that may be generated as
212 //     * replacement text by this transliterator.
213 //     */
214 //    public UnicodeSet getTargetSet() {
215 //        return data.ruleSet.getSourceTargetSet(true, unicodeFilter);
216 //    }
217 
218     /**
219      * @deprecated This API is ICU internal only.
220      * @hide draft / provisional / internal are hidden on OHOS
221      */
222     @Deprecated
223     @Override
addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)224     public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
225         data.ruleSet.addSourceTargetSet(filter, sourceSet, targetSet);
226     }
227 
228     /**
229      * Temporary hack for registry problem. Needs to be replaced by better architecture.
230      * @deprecated This API is ICU internal only.
231      * @hide draft / provisional / internal are hidden on OHOS
232      */
233     @Deprecated
safeClone()234     public Transliterator safeClone() {
235         UnicodeFilter filter = getFilter();
236         if (filter != null && filter instanceof UnicodeSet) {
237             filter = new UnicodeSet((UnicodeSet)filter);
238         }
239         return new RuleBasedTransliterator(getID(), data, filter);
240     }
241 }
242