1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2009-2014, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.impl.text; 12 13 import java.util.HashMap; 14 import java.util.Map; 15 16 import ohos.global.icu.impl.ICUDebug; 17 import ohos.global.icu.text.CollationElementIterator; 18 import ohos.global.icu.text.Collator; 19 import ohos.global.icu.text.RbnfLenientScanner; 20 import ohos.global.icu.text.RbnfLenientScannerProvider; 21 import ohos.global.icu.text.RuleBasedCollator; 22 import ohos.global.icu.util.ULocale; 23 24 /** 25 * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat 26 * implementation behind setLenientParseMode, which is based on Collator. 27 * @deprecated This API is ICU internal only. 28 * @hide exposed on OHOS 29 * @hide draft / provisional / internal are hidden on OHOS 30 */ 31 @Deprecated 32 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider { 33 private static final boolean DEBUG = ICUDebug.enabled("rbnf"); 34 private Map<String, RbnfLenientScanner> cache; 35 36 /** 37 * @deprecated This API is ICU internal only. 38 * @hide draft / provisional / internal are hidden on OHOS 39 */ 40 @Deprecated RbnfScannerProviderImpl()41 public RbnfScannerProviderImpl() { 42 cache = new HashMap<String, RbnfLenientScanner>(); 43 } 44 45 /** 46 * Returns a collation-based scanner. 47 * 48 * Only primary differences are treated as significant. This means that case 49 * differences, accent differences, alternate spellings of the same letter 50 * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in 51 * matching the text. In many cases, numerals will be accepted in place of words 52 * or phrases as well. 53 * 54 * For example, all of the following will correctly parse as 255 in English in 55 * lenient-parse mode: 56 * <br>"two hundred fifty-five" 57 * <br>"two hundred fifty five" 58 * <br>"TWO HUNDRED FIFTY-FIVE" 59 * <br>"twohundredfiftyfive" 60 * <br>"2 hundred fifty-5" 61 * 62 * The Collator used is determined by the locale that was 63 * passed to this object on construction. The description passed to this object 64 * on construction may supply additional collation rules that are appended to the 65 * end of the default collator for the locale, enabling additional equivalences 66 * (such as adding more ignorable characters or permitting spelled-out version of 67 * symbols; see the demo program for examples). 68 * 69 * It's important to emphasize that even strict parsing is relatively lenient: it 70 * will accept some text that it won't produce as output. In English, for example, 71 * it will correctly parse "two hundred zero" and "fifteen hundred". 72 * 73 * @deprecated This API is ICU internal only. 74 * @hide draft / provisional / internal are hidden on OHOS 75 */ 76 @Deprecated get(ULocale locale, String extras)77 public RbnfLenientScanner get(ULocale locale, String extras) { 78 RbnfLenientScanner result = null; 79 String key = locale.toString() + "/" + extras; 80 synchronized(cache) { 81 result = cache.get(key); 82 if (result != null) { 83 return result; 84 } 85 } 86 result = createScanner(locale, extras); 87 synchronized(cache) { 88 cache.put(key, result); 89 } 90 return result; 91 } 92 93 /** 94 * @deprecated This API is ICU internal only. 95 * @hide draft / provisional / internal are hidden on OHOS 96 */ 97 @Deprecated createScanner(ULocale locale, String extras)98 protected RbnfLenientScanner createScanner(ULocale locale, String extras) { 99 RuleBasedCollator collator = null; 100 try { 101 // create a default collator based on the locale, 102 // then pull out that collator's rules, append any additional 103 // rules specified in the description, and create a _new_ 104 // collator based on the combination of those rules 105 collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale()); 106 if (extras != null) { 107 String rules = collator.getRules() + extras; 108 collator = new RuleBasedCollator(rules); 109 } 110 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 111 } 112 catch (Exception e) { 113 // If we get here, it means we have a malformed set of 114 // collation rules, which hopefully won't happen 115 ///CLOVER:OFF 116 if (DEBUG){ // debug hook 117 e.printStackTrace(); System.out.println("++++"); 118 } 119 collator = null; 120 ///CLOVER:ON 121 } 122 123 return new RbnfLenientScannerImpl(collator); 124 } 125 126 private static class RbnfLenientScannerImpl implements RbnfLenientScanner { 127 private final RuleBasedCollator collator; 128 RbnfLenientScannerImpl(RuleBasedCollator rbc)129 private RbnfLenientScannerImpl(RuleBasedCollator rbc) { 130 this.collator = rbc; 131 } 132 allIgnorable(String s)133 public boolean allIgnorable(String s) { 134 CollationElementIterator iter = collator.getCollationElementIterator(s); 135 136 int o = iter.next(); 137 while (o != CollationElementIterator.NULLORDER 138 && CollationElementIterator.primaryOrder(o) == 0) { 139 o = iter.next(); 140 } 141 return o == CollationElementIterator.NULLORDER; 142 } 143 findText(String str, String key, int startingAt)144 public int[] findText(String str, String key, int startingAt) { 145 int p = startingAt; 146 int keyLen = 0; 147 148 // basically just isolate smaller and smaller substrings of 149 // the target string (each running to the end of the string, 150 // and with the first one running from startingAt to the end) 151 // and then use prefixLength() to see if the search key is at 152 // the beginning of each substring. This is excruciatingly 153 // slow, but it will locate the key and tell use how long the 154 // matching text was. 155 while (p < str.length() && keyLen == 0) { 156 keyLen = prefixLength(str.substring(p), key); 157 if (keyLen != 0) { 158 return new int[] { p, keyLen }; 159 } 160 ++p; 161 } 162 // if we make it to here, we didn't find it. Return -1 for the 163 // location. The length should be ignored, but set it to 0, 164 // which should be "safe" 165 return new int[] { -1, 0 }; 166 } 167 168 ///CLOVER:OFF 169 // The following method contains the same signature as findText 170 // and has never been used by anything once. 171 @SuppressWarnings("unused") findText2(String str, String key, int startingAt)172 public int[] findText2(String str, String key, int startingAt) { 173 174 CollationElementIterator strIter = collator.getCollationElementIterator(str); 175 CollationElementIterator keyIter = collator.getCollationElementIterator(key); 176 177 int keyStart = -1; 178 179 strIter.setOffset(startingAt); 180 181 int oStr = strIter.next(); 182 int oKey = keyIter.next(); 183 while (oKey != CollationElementIterator.NULLORDER) { 184 while (oStr != CollationElementIterator.NULLORDER && 185 CollationElementIterator.primaryOrder(oStr) == 0) { 186 oStr = strIter.next(); 187 } 188 189 while (oKey != CollationElementIterator.NULLORDER && 190 CollationElementIterator.primaryOrder(oKey) == 0) { 191 oKey = keyIter.next(); 192 } 193 194 if (oStr == CollationElementIterator.NULLORDER) { 195 return new int[] { -1, 0 }; 196 } 197 198 if (oKey == CollationElementIterator.NULLORDER) { 199 break; 200 } 201 202 if (CollationElementIterator.primaryOrder(oStr) == 203 CollationElementIterator.primaryOrder(oKey)) { 204 keyStart = strIter.getOffset(); 205 oStr = strIter.next(); 206 oKey = keyIter.next(); 207 } else { 208 if (keyStart != -1) { 209 keyStart = -1; 210 keyIter.reset(); 211 } else { 212 oStr = strIter.next(); 213 } 214 } 215 } 216 217 return new int[] { keyStart, strIter.getOffset() - keyStart }; 218 } 219 ///CLOVER:ON 220 prefixLength(String str, String prefix)221 public int prefixLength(String str, String prefix) { 222 // Create two collation element iterators, one over the target string 223 // and another over the prefix. 224 // 225 // Previous code was matching "fifty-" against " fifty" and leaving 226 // the number " fifty-7" to parse as 43 (50 - 7). 227 // Also it seems that if we consume the entire prefix, that's ok even 228 // if we've consumed the entire string, so I switched the logic to 229 // reflect this. 230 231 CollationElementIterator strIter = collator.getCollationElementIterator(str); 232 CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); 233 234 // match collation elements between the strings 235 int oStr = strIter.next(); 236 int oPrefix = prefixIter.next(); 237 238 while (oPrefix != CollationElementIterator.NULLORDER) { 239 // skip over ignorable characters in the target string 240 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != 241 CollationElementIterator.NULLORDER) { 242 oStr = strIter.next(); 243 } 244 245 // skip over ignorable characters in the prefix 246 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != 247 CollationElementIterator.NULLORDER) { 248 oPrefix = prefixIter.next(); 249 } 250 251 // if skipping over ignorables brought to the end of 252 // the prefix, we DID match: drop out of the loop 253 if (oPrefix == CollationElementIterator.NULLORDER) { 254 break; 255 } 256 257 // if skipping over ignorables brought us to the end 258 // of the target string, we didn't match and return 0 259 if (oStr == CollationElementIterator.NULLORDER) { 260 return 0; 261 } 262 263 // match collation elements from the two strings 264 // (considering only primary differences). If we 265 // get a mismatch, dump out and return 0 266 if (CollationElementIterator.primaryOrder(oStr) != 267 CollationElementIterator.primaryOrder(oPrefix)) { 268 return 0; 269 } 270 271 // otherwise, advance to the next character in each string 272 // and loop (we drop out of the loop when we exhaust 273 // collation elements in the prefix) 274 275 oStr = strIter.next(); 276 oPrefix = prefixIter.next(); 277 } 278 279 int result = strIter.getOffset(); 280 if (oStr != CollationElementIterator.NULLORDER) { 281 --result; 282 } 283 return result; 284 } 285 } 286 } 287