• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2001-2004, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package android.icu.text;
11 import android.icu.impl.Utility;
12 
13 /**
14  * An object that matches a fixed input string, implementing the
15  * UnicodeMatcher API.  This object also implements the
16  * UnicodeReplacer API, allowing it to emit the matched text as
17  * output.  Since the match text may contain flexible match elements,
18  * such as UnicodeSets, the emitted text is not the match pattern, but
19  * instead a substring of the actual matched text.  Following
20  * convention, the output text is the leftmost match seen up to this
21  * point.
22  *
23  * A StringMatcher may represent a segment, in which case it has a
24  * positive segment number.  This affects how the matcher converts
25  * itself to a pattern but does not otherwise affect its function.
26  *
27  * A StringMatcher that is not a segment should not be used as a
28  * UnicodeReplacer.
29  */
30 class StringMatcher implements UnicodeMatcher, UnicodeReplacer {
31 
32     /**
33      * The text to be matched.
34      */
35     private String pattern;
36 
37     /**
38      * Start offset, in the match text, of the <em>rightmost</em>
39      * match.
40      */
41     private int matchStart;
42 
43     /**
44      * Limit offset, in the match text, of the <em>rightmost</em>
45      * match.
46      */
47     private int matchLimit;
48 
49     /**
50      * The segment number, 1-based, or 0 if not a segment.
51      */
52     private int segmentNumber;
53 
54     /**
55      * Context object that maps stand-ins to matcher and replacer
56      * objects.
57      */
58     private final RuleBasedTransliterator.Data data;
59 
60     /**
61      * Construct a matcher that matches the given pattern string.
62      * @param theString the pattern to be matched, possibly containing
63      * stand-ins that represent nested UnicodeMatcher objects.
64      * @param segmentNum the segment number from 1..n, or 0 if this is
65      * not a segment.
66      * @param theData context object mapping stand-ins to
67      * UnicodeMatcher objects.
68      */
StringMatcher(String theString, int segmentNum, RuleBasedTransliterator.Data theData)69     public StringMatcher(String theString,
70                          int segmentNum,
71                          RuleBasedTransliterator.Data theData) {
72         data = theData;
73         pattern = theString;
74         matchStart = matchLimit = -1;
75         segmentNumber = segmentNum;
76     }
77 
78     /**
79      * Construct a matcher that matches a substring of the given
80      * pattern string.
81      * @param theString the pattern to be matched, possibly containing
82      * stand-ins that represent nested UnicodeMatcher objects.
83      * @param start first character of theString to be matched
84      * @param limit index after the last character of theString to be
85      * matched.
86      * @param segmentNum the segment number from 1..n, or 0 if this is
87      * not a segment.
88      * @param theData context object mapping stand-ins to
89      * UnicodeMatcher objects.
90      */
StringMatcher(String theString, int start, int limit, int segmentNum, RuleBasedTransliterator.Data theData)91     public StringMatcher(String theString,
92                          int start,
93                          int limit,
94                          int segmentNum,
95                          RuleBasedTransliterator.Data theData) {
96         this(theString.substring(start, limit), segmentNum, theData);
97     }
98 
99     /**
100      * Implement UnicodeMatcher
101      */
102     @Override
matches(Replaceable text, int[] offset, int limit, boolean incremental)103     public int matches(Replaceable text,
104                        int[] offset,
105                        int limit,
106                        boolean incremental) {
107         // Note (1): We process text in 16-bit code units, rather than
108         // 32-bit code points.  This works because stand-ins are
109         // always in the BMP and because we are doing a literal match
110         // operation, which can be done 16-bits at a time.
111         int i;
112         int[] cursor = new int[] { offset[0] };
113         if (limit < cursor[0]) {
114             // Match in the reverse direction
115             for (i=pattern.length()-1; i>=0; --i) {
116                 char keyChar = pattern.charAt(i); // OK; see note (1) above
117                 UnicodeMatcher subm = data.lookupMatcher(keyChar);
118                 if (subm == null) {
119                     if (cursor[0] > limit &&
120                         keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
121                         --cursor[0];
122                     } else {
123                         return U_MISMATCH;
124                     }
125                 } else {
126                     int m =
127                         subm.matches(text, cursor, limit, incremental);
128                     if (m != U_MATCH) {
129                         return m;
130                     }
131                 }
132             }
133             // Record the match position, but adjust for a normal
134             // forward start, limit, and only if a prior match does not
135             // exist -- we want the rightmost match.
136             if (matchStart < 0) {
137                 matchStart = cursor[0]+1;
138                 matchLimit = offset[0]+1;
139             }
140         } else {
141             for (i=0; i<pattern.length(); ++i) {
142                 if (incremental && cursor[0] == limit) {
143                     // We've reached the context limit without a mismatch and
144                     // without completing our match.
145                     return U_PARTIAL_MATCH;
146                 }
147                 char keyChar = pattern.charAt(i); // OK; see note (1) above
148                 UnicodeMatcher subm = data.lookupMatcher(keyChar);
149                 if (subm == null) {
150                     // Don't need the cursor < limit check if
151                     // incremental is true (because it's done above); do need
152                     // it otherwise.
153                     if (cursor[0] < limit &&
154                         keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
155                         ++cursor[0];
156                     } else {
157                         return U_MISMATCH;
158                     }
159                 } else {
160                     int m =
161                         subm.matches(text, cursor, limit, incremental);
162                     if (m != U_MATCH) {
163                         return m;
164                     }
165                 }
166             }
167             // Record the match position
168             matchStart = offset[0];
169             matchLimit = cursor[0];
170         }
171 
172         offset[0] = cursor[0];
173         return U_MATCH;
174     }
175 
176     /**
177      * Implement UnicodeMatcher
178      */
179     @Override
toPattern(boolean escapeUnprintable)180     public String toPattern(boolean escapeUnprintable) {
181         StringBuffer result = new StringBuffer();
182         StringBuffer quoteBuf = new StringBuffer();
183         if (segmentNumber > 0) { // i.e., if this is a segment
184             result.append('(');
185         }
186         for (int i=0; i<pattern.length(); ++i) {
187             char keyChar = pattern.charAt(i); // OK; see note (1) above
188             UnicodeMatcher m = data.lookupMatcher(keyChar);
189             if (m == null) {
190                 Utility.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);
191             } else {
192                 Utility.appendToRule(result, m.toPattern(escapeUnprintable),
193                                      true, escapeUnprintable, quoteBuf);
194             }
195         }
196         if (segmentNumber > 0) { // i.e., if this is a segment
197             result.append(')');
198         }
199         // Flush quoteBuf out to result
200         Utility.appendToRule(result, -1,
201                              true, escapeUnprintable, quoteBuf);
202         return result.toString();
203     }
204 
205     /**
206      * Implement UnicodeMatcher
207      */
208     @Override
matchesIndexValue(int v)209     public boolean matchesIndexValue(int v) {
210         if (pattern.length() == 0) {
211             return true;
212         }
213         int c = UTF16.charAt(pattern, 0);
214         UnicodeMatcher m = data.lookupMatcher(c);
215         return (m == null) ? ((c & 0xFF) == v) : m.matchesIndexValue(v);
216     }
217 
218     /**
219      * Implementation of UnicodeMatcher API.  Union the set of all
220      * characters that may be matched by this object into the given
221      * set.
222      * @param toUnionTo the set into which to union the source characters
223      */
224     @Override
addMatchSetTo(UnicodeSet toUnionTo)225     public void addMatchSetTo(UnicodeSet toUnionTo) {
226         int ch;
227         for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) {
228             ch = UTF16.charAt(pattern, i);
229             UnicodeMatcher matcher = data.lookupMatcher(ch);
230             if (matcher == null) {
231                 toUnionTo.add(ch);
232             } else {
233                 matcher.addMatchSetTo(toUnionTo);
234             }
235         }
236     }
237 
238     /**
239      * UnicodeReplacer API
240      */
241     @Override
replace(Replaceable text, int start, int limit, int[] cursor)242     public int replace(Replaceable text,
243                        int start,
244                        int limit,
245                        int[] cursor) {
246 
247         int outLen = 0;
248 
249         // Copy segment with out-of-band data
250         int dest = limit;
251         // If there was no match, that means that a quantifier
252         // matched zero-length.  E.g., x (a)* y matched "xy".
253         if (matchStart >= 0) {
254             if (matchStart != matchLimit) {
255                 text.copy(matchStart, matchLimit, dest);
256                 outLen = matchLimit - matchStart;
257             }
258         }
259 
260         text.replace(start, limit, ""); // delete original text
261 
262         return outLen;
263     }
264 
265     /**
266      * UnicodeReplacer API
267      */
268     @Override
toReplacerPattern(boolean escapeUnprintable)269     public String toReplacerPattern(boolean escapeUnprintable) {
270         // assert(segmentNumber > 0);
271         StringBuffer rule = new StringBuffer("$");
272         Utility.appendNumber(rule, segmentNumber, 10, 1);
273         return rule.toString();
274     }
275 
276     /**
277      * Remove any match data.  This must be called before performing a
278      * set of matches with this segment.
279      */
resetMatch()280     public void resetMatch() {
281         matchStart = matchLimit = -1;
282     }
283 
284     /**
285      * Union the set of all characters that may output by this object
286      * into the given set.
287      * @param toUnionTo the set into which to union the output characters
288      */
289     @Override
addReplacementSetTo(UnicodeSet toUnionTo)290     public void addReplacementSetTo(UnicodeSet toUnionTo) {
291         // The output of this replacer varies; it is the source text between
292         // matchStart and matchLimit.  Since this varies depending on the
293         // input text, we can't compute it here.  We can either do nothing
294         // or we can add ALL characters to the set.  It's probably more useful
295         // to do nothing.
296     }
297 }
298 
299 //eof
300