1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2001-2004, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 import ohos.global.icu.impl.Utility; 12 13 /** 14 * An object that matches a fixed input string, implementing the 15 * UnicodeMatcher API. This object also implements the 16 * UnicodeReplacer API, allowing it to emit the matched text as 17 * output. Since the match text may contain flexible match elements, 18 * such as UnicodeSets, the emitted text is not the match pattern, but 19 * instead a substring of the actual matched text. Following 20 * convention, the output text is the leftmost match seen up to this 21 * point. 22 * 23 * A StringMatcher may represent a segment, in which case it has a 24 * positive segment number. This affects how the matcher converts 25 * itself to a pattern but does not otherwise affect its function. 26 * 27 * A StringMatcher that is not a segment should not be used as a 28 * UnicodeReplacer. 29 */ 30 class StringMatcher implements UnicodeMatcher, UnicodeReplacer { 31 32 /** 33 * The text to be matched. 34 */ 35 private String pattern; 36 37 /** 38 * Start offset, in the match text, of the <em>rightmost</em> 39 * match. 40 */ 41 private int matchStart; 42 43 /** 44 * Limit offset, in the match text, of the <em>rightmost</em> 45 * match. 46 */ 47 private int matchLimit; 48 49 /** 50 * The segment number, 1-based, or 0 if not a segment. 51 */ 52 private int segmentNumber; 53 54 /** 55 * Context object that maps stand-ins to matcher and replacer 56 * objects. 57 */ 58 private final RuleBasedTransliterator.Data data; 59 60 /** 61 * Construct a matcher that matches the given pattern string. 62 * @param theString the pattern to be matched, possibly containing 63 * stand-ins that represent nested UnicodeMatcher objects. 64 * @param segmentNum the segment number from 1..n, or 0 if this is 65 * not a segment. 66 * @param theData context object mapping stand-ins to 67 * UnicodeMatcher objects. 68 */ StringMatcher(String theString, int segmentNum, RuleBasedTransliterator.Data theData)69 public StringMatcher(String theString, 70 int segmentNum, 71 RuleBasedTransliterator.Data theData) { 72 data = theData; 73 pattern = theString; 74 matchStart = matchLimit = -1; 75 segmentNumber = segmentNum; 76 } 77 78 /** 79 * Construct a matcher that matches a substring of the given 80 * pattern string. 81 * @param theString the pattern to be matched, possibly containing 82 * stand-ins that represent nested UnicodeMatcher objects. 83 * @param start first character of theString to be matched 84 * @param limit index after the last character of theString to be 85 * matched. 86 * @param segmentNum the segment number from 1..n, or 0 if this is 87 * not a segment. 88 * @param theData context object mapping stand-ins to 89 * UnicodeMatcher objects. 90 */ StringMatcher(String theString, int start, int limit, int segmentNum, RuleBasedTransliterator.Data theData)91 public StringMatcher(String theString, 92 int start, 93 int limit, 94 int segmentNum, 95 RuleBasedTransliterator.Data theData) { 96 this(theString.substring(start, limit), segmentNum, theData); 97 } 98 99 /** 100 * Implement UnicodeMatcher 101 */ 102 @Override matches(Replaceable text, int[] offset, int limit, boolean incremental)103 public int matches(Replaceable text, 104 int[] offset, 105 int limit, 106 boolean incremental) { 107 // Note (1): We process text in 16-bit code units, rather than 108 // 32-bit code points. This works because stand-ins are 109 // always in the BMP and because we are doing a literal match 110 // operation, which can be done 16-bits at a time. 111 int i; 112 int[] cursor = new int[] { offset[0] }; 113 if (limit < cursor[0]) { 114 // Match in the reverse direction 115 for (i=pattern.length()-1; i>=0; --i) { 116 char keyChar = pattern.charAt(i); // OK; see note (1) above 117 UnicodeMatcher subm = data.lookupMatcher(keyChar); 118 if (subm == null) { 119 if (cursor[0] > limit && 120 keyChar == text.charAt(cursor[0])) { // OK; see note (1) above 121 --cursor[0]; 122 } else { 123 return U_MISMATCH; 124 } 125 } else { 126 int m = 127 subm.matches(text, cursor, limit, incremental); 128 if (m != U_MATCH) { 129 return m; 130 } 131 } 132 } 133 // Record the match position, but adjust for a normal 134 // forward start, limit, and only if a prior match does not 135 // exist -- we want the rightmost match. 136 if (matchStart < 0) { 137 matchStart = cursor[0]+1; 138 matchLimit = offset[0]+1; 139 } 140 } else { 141 for (i=0; i<pattern.length(); ++i) { 142 if (incremental && cursor[0] == limit) { 143 // We've reached the context limit without a mismatch and 144 // without completing our match. 145 return U_PARTIAL_MATCH; 146 } 147 char keyChar = pattern.charAt(i); // OK; see note (1) above 148 UnicodeMatcher subm = data.lookupMatcher(keyChar); 149 if (subm == null) { 150 // Don't need the cursor < limit check if 151 // incremental is true (because it's done above); do need 152 // it otherwise. 153 if (cursor[0] < limit && 154 keyChar == text.charAt(cursor[0])) { // OK; see note (1) above 155 ++cursor[0]; 156 } else { 157 return U_MISMATCH; 158 } 159 } else { 160 int m = 161 subm.matches(text, cursor, limit, incremental); 162 if (m != U_MATCH) { 163 return m; 164 } 165 } 166 } 167 // Record the match position 168 matchStart = offset[0]; 169 matchLimit = cursor[0]; 170 } 171 172 offset[0] = cursor[0]; 173 return U_MATCH; 174 } 175 176 /** 177 * Implement UnicodeMatcher 178 */ 179 @Override toPattern(boolean escapeUnprintable)180 public String toPattern(boolean escapeUnprintable) { 181 StringBuffer result = new StringBuffer(); 182 StringBuffer quoteBuf = new StringBuffer(); 183 if (segmentNumber > 0) { // i.e., if this is a segment 184 result.append('('); 185 } 186 for (int i=0; i<pattern.length(); ++i) { 187 char keyChar = pattern.charAt(i); // OK; see note (1) above 188 UnicodeMatcher m = data.lookupMatcher(keyChar); 189 if (m == null) { 190 Utility.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf); 191 } else { 192 Utility.appendToRule(result, m.toPattern(escapeUnprintable), 193 true, escapeUnprintable, quoteBuf); 194 } 195 } 196 if (segmentNumber > 0) { // i.e., if this is a segment 197 result.append(')'); 198 } 199 // Flush quoteBuf out to result 200 Utility.appendToRule(result, -1, 201 true, escapeUnprintable, quoteBuf); 202 return result.toString(); 203 } 204 205 /** 206 * Implement UnicodeMatcher 207 */ 208 @Override matchesIndexValue(int v)209 public boolean matchesIndexValue(int v) { 210 if (pattern.length() == 0) { 211 return true; 212 } 213 int c = UTF16.charAt(pattern, 0); 214 UnicodeMatcher m = data.lookupMatcher(c); 215 return (m == null) ? ((c & 0xFF) == v) : m.matchesIndexValue(v); 216 } 217 218 /** 219 * Implementation of UnicodeMatcher API. Union the set of all 220 * characters that may be matched by this object into the given 221 * set. 222 * @param toUnionTo the set into which to union the source characters 223 */ 224 @Override addMatchSetTo(UnicodeSet toUnionTo)225 public void addMatchSetTo(UnicodeSet toUnionTo) { 226 int ch; 227 for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) { 228 ch = UTF16.charAt(pattern, i); 229 UnicodeMatcher matcher = data.lookupMatcher(ch); 230 if (matcher == null) { 231 toUnionTo.add(ch); 232 } else { 233 matcher.addMatchSetTo(toUnionTo); 234 } 235 } 236 } 237 238 /** 239 * UnicodeReplacer API 240 */ 241 @Override replace(Replaceable text, int start, int limit, int[] cursor)242 public int replace(Replaceable text, 243 int start, 244 int limit, 245 int[] cursor) { 246 247 int outLen = 0; 248 249 // Copy segment with out-of-band data 250 int dest = limit; 251 // If there was no match, that means that a quantifier 252 // matched zero-length. E.g., x (a)* y matched "xy". 253 if (matchStart >= 0) { 254 if (matchStart != matchLimit) { 255 text.copy(matchStart, matchLimit, dest); 256 outLen = matchLimit - matchStart; 257 } 258 } 259 260 text.replace(start, limit, ""); // delete original text 261 262 return outLen; 263 } 264 265 /** 266 * UnicodeReplacer API 267 */ 268 @Override toReplacerPattern(boolean escapeUnprintable)269 public String toReplacerPattern(boolean escapeUnprintable) { 270 // assert(segmentNumber > 0); 271 StringBuffer rule = new StringBuffer("$"); 272 Utility.appendNumber(rule, segmentNumber, 10, 1); 273 return rule.toString(); 274 } 275 276 /** 277 * Remove any match data. This must be called before performing a 278 * set of matches with this segment. 279 */ resetMatch()280 public void resetMatch() { 281 matchStart = matchLimit = -1; 282 } 283 284 /** 285 * Union the set of all characters that may output by this object 286 * into the given set. 287 * @param toUnionTo the set into which to union the output characters 288 */ 289 @Override addReplacementSetTo(UnicodeSet toUnionTo)290 public void addReplacementSetTo(UnicodeSet toUnionTo) { 291 // The output of this replacer varies; it is the source text between 292 // matchStart and matchLimit. Since this varies depending on the 293 // input text, we can't compute it here. We can either do nothing 294 // or we can add ALL characters to the set. It's probably more useful 295 // to do nothing. 296 } 297 } 298 299 //eof 300