• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.util.ArrayList;
13 import java.util.List;
14 
15 import ohos.global.icu.impl.UtilityExtensions;
16 
17 /**
18  * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
19  * the transliteration in one direction from one set of characters or short
20  * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
21  * two such sets, one for the forward direction, and one for the reverse.
22  *
23  * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
24  * finding a matching rule at a given point in the text.  This is accomplished
25  * by the <code>findMatch()</code> method.
26  *
27  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
28  *
29  * @author Alan Liu
30  */
31 class TransliterationRuleSet {
32     /**
33      * Vector of rules, in the order added.
34      */
35     private List<TransliterationRule> ruleVector;
36 
37     /**
38      * Length of the longest preceding context
39      */
40     private int maxContextLength;
41 
42     /**
43      * Sorted and indexed table of rules.  This is created by freeze() from
44      * the rules in ruleVector.  rules.length >= ruleVector.size(), and the
45      * references in rules[] are aliases of the references in ruleVector.
46      * A single rule in ruleVector is listed one or more times in rules[].
47      */
48     private TransliterationRule[] rules;
49 
50     /**
51      * Index table.  For text having a first character c, compute x = c&0xFF.
52      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
53      * freeze().
54      */
55     private int[] index;
56 
57     /**
58      * Construct a new empty rule set.
59      */
TransliterationRuleSet()60     public TransliterationRuleSet() {
61         ruleVector = new ArrayList<TransliterationRule>();
62         maxContextLength = 0;
63     }
64 
65     /**
66      * Return the maximum context length.
67      * @return the length of the longest preceding context.
68      */
getMaximumContextLength()69     public int getMaximumContextLength() {
70         return maxContextLength;
71     }
72 
73     /**
74      * Add a rule to this set.  Rules are added in order, and order is
75      * significant.
76      * @param rule the rule to add
77      */
addRule(TransliterationRule rule)78     public void addRule(TransliterationRule rule) {
79         ruleVector.add(rule);
80         int len;
81         if ((len = rule.getAnteContextLength()) > maxContextLength) {
82             maxContextLength = len;
83         }
84 
85         rules = null;
86     }
87 
88     /**
89      * Close this rule set to further additions, check it for masked rules,
90      * and index it to optimize performance.
91      * @exception IllegalArgumentException if some rules are masked
92      */
freeze()93     public void freeze() {
94         /* Construct the rule array and index table.  We reorder the
95          * rules by sorting them into 256 bins.  Each bin contains all
96          * rules matching the index value for that bin.  A rule
97          * matches an index value if string whose first key character
98          * has a low byte equal to the index value can match the rule.
99          *
100          * Each bin contains zero or more rules, in the same order
101          * they were found originally.  However, the total rules in
102          * the bins may exceed the number in the original vector,
103          * since rules that have a variable as their first key
104          * character will generally fall into more than one bin.
105          *
106          * That is, each bin contains all rules that either have that
107          * first index value as their first key character, or have
108          * a set containing the index value as their first character.
109          */
110         int n = ruleVector.size();
111         index = new int[257]; // [sic]
112         List<TransliterationRule> v = new ArrayList<TransliterationRule>(2*n); // heuristic; adjust as needed
113 
114         /* Precompute the index values.  This saves a LOT of time.
115          */
116         int[] indexValue = new int[n];
117         for (int j=0; j<n; ++j) {
118             TransliterationRule r = ruleVector.get(j);
119             indexValue[j] = r.getIndexValue();
120         }
121         for (int x=0; x<256; ++x) {
122             index[x] = v.size();
123             for (int j=0; j<n; ++j) {
124                 if (indexValue[j] >= 0) {
125                     if (indexValue[j] == x) {
126                         v.add(ruleVector.get(j));
127                     }
128                 } else {
129                     // If the indexValue is < 0, then the first key character is
130                     // a set, and we must use the more time-consuming
131                     // matchesIndexValue check.  In practice this happens
132                     // rarely, so we seldom tread this code path.
133                     TransliterationRule r = ruleVector.get(j);
134                     if (r.matchesIndexValue(x)) {
135                         v.add(r);
136                     }
137                 }
138             }
139         }
140         index[256] = v.size();
141 
142         /* Freeze things into an array.
143          */
144         rules = new TransliterationRule[v.size()];
145         v.toArray(rules);
146 
147         StringBuilder errors = null;
148 
149         /* Check for masking.  This is MUCH faster than our old check,
150          * which was each rule against each following rule, since we
151          * only have to check for masking within each bin now.  It's
152          * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule
153          * count, and n2 is the per-bin rule count.  But n2<<n1, so
154          * it's a big win.
155          */
156         for (int x=0; x<256; ++x) {
157             for (int j=index[x]; j<index[x+1]-1; ++j) {
158                 TransliterationRule r1 = rules[j];
159                 for (int k=j+1; k<index[x+1]; ++k) {
160                     TransliterationRule r2 = rules[k];
161                     if (r1.masks(r2)) {
162                         if (errors == null) {
163                             errors = new StringBuilder();
164                         } else {
165                             errors.append("\n");
166                         }
167                         errors.append("Rule " + r1 + " masks " + r2);
168                     }
169                 }
170             }
171         }
172 
173         if (errors != null) {
174             throw new IllegalArgumentException(errors.toString());
175         }
176     }
177 
178     /**
179      * Transliterate the given text with the given UTransPosition
180      * indices.  Return TRUE if the transliteration should continue
181      * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
182      * Note that FALSE is only ever returned if isIncremental is TRUE.
183      * @param text the text to be transliterated
184      * @param pos the position indices, which will be updated
185      * @param incremental if TRUE, assume new text may be inserted
186      * at index.limit, and return FALSE if thre is a partial match.
187      * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
188      * indicating that transliteration should stop until more text
189      * arrives.
190      */
transliterate(Replaceable text, Transliterator.Position pos, boolean incremental)191     public boolean transliterate(Replaceable text,
192                                  Transliterator.Position pos,
193                                  boolean incremental) {
194         int indexByte = text.char32At(pos.start) & 0xFF;
195         for (int i=index[indexByte]; i<index[indexByte+1]; ++i) {
196             int m = rules[i].matchAndReplace(text, pos, incremental);
197             switch (m) {
198             case UnicodeMatcher.U_MATCH:
199                 if (Transliterator.DEBUG) {
200                     System.out.println((incremental ? "Rule.i: match ":"Rule: match ") +
201                                        rules[i].toRule(true) + " => " +
202                                        UtilityExtensions.formatInput(text, pos));
203                 }
204                 return true;
205             case UnicodeMatcher.U_PARTIAL_MATCH:
206                 if (Transliterator.DEBUG) {
207                     System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") +
208                                        rules[i].toRule(true) + " => " +
209                                        UtilityExtensions.formatInput(text, pos));
210                 }
211                 return false;
212                 default:
213                     if (Transliterator.DEBUG) {
214                         System.out.println("Rule: no match " + rules[i]);
215                     }
216             }
217         }
218         // No match or partial match from any rule
219         pos.start += UTF16.getCharCount(text.char32At(pos.start));
220         if (Transliterator.DEBUG) {
221             System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") +
222                                UtilityExtensions.formatInput(text, pos));
223         }
224         return true;
225     }
226 
227     /**
228      * Create rule strings that represents this rule set.
229      */
toRules(boolean escapeUnprintable)230     String toRules(boolean escapeUnprintable) {
231         int i;
232         int count = ruleVector.size();
233         StringBuilder ruleSource = new StringBuilder();
234         for (i=0; i<count; ++i) {
235             if (i != 0) {
236                 ruleSource.append('\n');
237             }
238             TransliterationRule r = ruleVector.get(i);
239             ruleSource.append(r.toRule(escapeUnprintable));
240         }
241         return ruleSource.toString();
242     }
243 
244     // TODO Handle the case where we have :: [a] ; a > |b ; b > c ;
245     // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing
addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)246     void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
247         UnicodeSet currentFilter = new UnicodeSet(filter);
248         UnicodeSet revisiting = new UnicodeSet();
249         int count = ruleVector.size();
250         for (int i=0; i<count; ++i) {
251             TransliterationRule r = ruleVector.get(i);
252             r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear());
253             currentFilter.addAll(revisiting);
254         }
255     }
256 
257 }
258