1 /* Copyright (C) 2007-2013 Google and others. All Rights Reserved. */ 2 /* Copyright (C) 2007-2013 IBM Corp. and others. All Rights Reserved. */ 3 4 package org.unicode.cldr.test; 5 6 import java.util.Arrays; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Set; 12 import java.util.TreeSet; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import org.unicode.cldr.test.CheckExemplars.ExemplarType; 17 import org.unicode.cldr.util.Builder; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CLDRLocale; 20 import org.unicode.cldr.util.CldrUtility; 21 import org.unicode.cldr.util.DateTimeCanonicalizer; 22 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; 23 import org.unicode.cldr.util.Emoji; 24 import org.unicode.cldr.util.ICUServiceBuilder; 25 import org.unicode.cldr.util.PatternCache; 26 import org.unicode.cldr.util.UnicodeSetPrettyPrinter; 27 import org.unicode.cldr.util.With; 28 import org.unicode.cldr.util.XPathParts; 29 30 import com.google.common.base.Joiner; 31 import com.google.common.base.Splitter; 32 import com.google.myanmartools.ZawgyiDetector; 33 import com.ibm.icu.lang.UCharacter; 34 import com.ibm.icu.text.Collator; 35 import com.ibm.icu.text.DateIntervalInfo; 36 import com.ibm.icu.text.DateTimePatternGenerator; 37 import com.ibm.icu.text.DecimalFormat; 38 import com.ibm.icu.text.Normalizer; 39 import com.ibm.icu.text.RuleBasedCollator; 40 import com.ibm.icu.text.Transform; 41 import com.ibm.icu.text.Transliterator; 42 import com.ibm.icu.text.UnicodeSet; 43 import com.ibm.icu.text.UnicodeSetIterator; 44 import com.ibm.icu.util.ULocale; 45 46 /** 47 * Class for processing the input and output of CLDR data for use in the 48 * Survey Tool and other tools. 49 */ 50 public class DisplayAndInputProcessor { 51 52 private static final boolean FIX_YEARS = true; 53 54 public static final boolean DEBUG_DAIP = CldrUtility.getProperty("DEBUG_DAIP", false); 55 56 public static final UnicodeSet RTL = new UnicodeSet("[[:Bidi_Class=Arabic_Letter:][:Bidi_Class=Right_To_Left:]]") 57 .freeze(); 58 59 public static final UnicodeSet TO_QUOTE = new UnicodeSet( 60 "[[:Cn:]" + 61 "[:Default_Ignorable_Code_Point:]" + 62 "[:patternwhitespace:]" + 63 "[:Me:][:Mn:]]" // add non-spacing marks 64 ).freeze(); 65 66 public static final Pattern NUMBER_FORMAT_XPATH = Pattern 67 .compile("//ldml/numbers/.*Format\\[@type=\"standard\"]/pattern.*"); 68 69 public static final Pattern NUMBER_SEPARATOR_PATTERN = Pattern 70 .compile("//ldml/numbers/symbols.*/(decimal|group)"); 71 72 private static final Pattern APOSTROPHE_SKIP_PATHS = PatternCache.get("//ldml/(" 73 + "localeDisplayNames/languages/language\\[@type=\"mic\"].*|" 74 + "characters/.*|" 75 + "delimiters/.*|" 76 + "dates/.+/(pattern|intervalFormatItem|dateFormatItem).*|" 77 + "units/.+/unitPattern.*|" 78 + "units/.+/durationUnitPattern.*|" 79 + "numbers/symbols.*|" 80 + "numbers/miscPatterns.*|" 81 + "numbers/(decimal|currency|percent|scientific)Formats.+/(decimal|currency|percent|scientific)Format.*)"); 82 private static final Pattern INTERVAL_FORMAT_PATHS = PatternCache.get("//ldml/dates/.+/intervalFormatItem.*"); 83 private static final Pattern NON_DECIMAL_PERIOD = PatternCache.get("(?<![0#'])\\.(?![0#'])"); 84 85 /** 86 * string of whitespace not including NBSP, i.e. [\t\n\r]+ 87 */ 88 private static final Pattern WHITESPACE_NO_NBSP_TO_NORMALIZE = PatternCache.get("\\s+"); // 89 90 /** 91 * string of whitespace including NBSP, i.e. [\u00A0\t\n\r]+ 92 */ 93 private static final Pattern WHITESPACE_AND_NBSP_TO_NORMALIZE = PatternCache.get("[\\s\\u00A0]+"); 94 95 /** 96 * one or more NBSP followed by one or more regular spaces 97 */ 98 private static final Pattern NBSP_PLUS_SPACE_TO_NORMALIZE = PatternCache.get("\\u00A0+\\u0020+"); 99 100 /** 101 * one or more regular spaces followed by one or more NBSP 102 */ 103 private static final Pattern SPACE_PLUS_NBSP_TO_NORMALIZE = PatternCache.get("\\u0020+\\u00A0+"); 104 105 private static final Pattern INITIAL_NBSP = PatternCache.get("^\\u00A0+"); 106 private static final Pattern FINAL_NBSP = PatternCache.get("\\u00A0+$"); 107 private static final Pattern MULTIPLE_NBSP = PatternCache.get("\\u00A0\\u00A0+"); 108 109 private static final UnicodeSet UNICODE_WHITESPACE = new UnicodeSet("[:whitespace:]").freeze(); 110 111 private static final CLDRLocale MALAYALAM = CLDRLocale.getInstance("ml"); 112 private static final CLDRLocale ROMANIAN = CLDRLocale.getInstance("ro"); 113 private static final CLDRLocale CATALAN = CLDRLocale.getInstance("ca"); 114 private static final CLDRLocale NGOMBA = CLDRLocale.getInstance("jgo"); 115 private static final CLDRLocale KWASIO = CLDRLocale.getInstance("nmg"); 116 private static final CLDRLocale HEBREW = CLDRLocale.getInstance("he"); 117 private static final CLDRLocale MYANMAR = CLDRLocale.getInstance("my"); 118 private static final CLDRLocale KYRGYZ = CLDRLocale.getInstance("ky"); 119 private static final CLDRLocale URDU = CLDRLocale.getInstance("ur"); 120 private static final CLDRLocale PASHTO = CLDRLocale.getInstance("ps"); 121 private static final CLDRLocale FARSI = CLDRLocale.getInstance("fa"); 122 private static final CLDRLocale GERMAN_SWITZERLAND = CLDRLocale.getInstance("de_CH"); 123 private static final CLDRLocale SWISS_GERMAN = CLDRLocale.getInstance("gsw"); 124 private static final CLDRLocale FF_ADLAM = CLDRLocale.getInstance("ff_Adlm"); 125 public static final Set<String> LANGUAGES_USING_MODIFIER_APOSTROPHE = new HashSet<>( 126 Arrays.asList("br", "bss", "cad", "cic", "cch", "gn", "ha", "ha_Latn", "lkt", "mgo", "moh", "mus", "nnh", "qu", "quc", "uk", "uz", "uz_Latn")); 127 128 // Ş ş Ţ ţ => Ș ș Ț ț 129 private static final char[][] ROMANIAN_CONVERSIONS = { 130 { '\u015E', '\u0218' }, { '\u015F', '\u0219' }, { '\u0162', '\u021A' }, 131 { '\u0163', '\u021B' } }; 132 133 private static final char[][] CATALAN_CONVERSIONS = { 134 { '\u013F', '\u004C', '\u00B7' }, // Ŀ -> L· 135 { '\u0140', '\u006C', '\u00B7' } }; // ŀ -> l· 136 137 private static final char[][] NGOMBA_CONVERSIONS = { 138 { '\u0251', '\u0061' }, { '\u0261', '\u0067' }, // ɑ -> a , ɡ -> g , See ticket #5691 139 { '\u2019', '\uA78C' }, { '\u02BC', '\uA78C' } }; // Saltillo, see ticket #6805 140 141 private static final char[][] KWASIO_CONVERSIONS = { 142 { '\u0306', '\u030C' }, // See ticket #6571, use caron instead of breve 143 { '\u0103', '\u01CE' }, { '\u0102', '\u01CD' }, // a-breve -> a-caron 144 { '\u0115', '\u011B' }, { '\u011A', '\u01CD' }, // e-breve -> e-caron 145 { '\u012D', '\u01D0' }, { '\u012C', '\u01CF' }, // i-breve -> i-caron 146 { '\u014F', '\u01D2' }, { '\u014E', '\u01D1' }, // o-breve -> o-caron 147 { '\u016D', '\u01D4' }, { '\u016C', '\u01D3' } // u-breve -> u-caron 148 }; 149 150 private static final char[][] HEBREW_CONVERSIONS = { 151 { '\'', '\u05F3' }, { '"', '\u05F4' } }; // ' -> geresh " -> gershayim 152 153 private static final char[][] KYRGYZ_CONVERSIONS = { 154 { 'ӊ', 'ң' }, { 'Ӊ', 'Ң' } }; // right modifier 155 156 private static final char[][] URDU_PLUS_CONVERSIONS = { 157 { '\u0643', '\u06A9' }}; // wrong char 158 159 private static final ZawgyiDetector detector = new ZawgyiDetector(); 160 private static final Transliterator zawgyiUnicodeTransliterator = 161 Transliterator.getInstance("Zawgyi-my"); 162 163 private Collator col; 164 165 private Collator spaceCol; 166 167 private UnicodeSetPrettyPrinter pp = null; 168 169 final private CLDRLocale locale; 170 private boolean isPosix; 171 172 /** 173 * Constructor, taking cldrFile. 174 * 175 * @param cldrFileToCheck 176 */ DisplayAndInputProcessor(CLDRFile cldrFileToCheck, boolean needsCollator)177 public DisplayAndInputProcessor(CLDRFile cldrFileToCheck, boolean needsCollator) { 178 init(this.locale = CLDRLocale.getInstance(cldrFileToCheck.getLocaleID()), needsCollator); 179 } 180 DisplayAndInputProcessor(CLDRFile cldrFileToCheck)181 public DisplayAndInputProcessor(CLDRFile cldrFileToCheck) { 182 init(this.locale = CLDRLocale.getInstance(cldrFileToCheck.getLocaleID()), true); 183 } 184 init(CLDRLocale locale, boolean needsCollator)185 void init(CLDRLocale locale, boolean needsCollator) { 186 isPosix = locale.toString().indexOf("POSIX") >= 0; 187 if (needsCollator) { 188 ICUServiceBuilder isb = null; 189 try { 190 isb = ICUServiceBuilder.forLocale(locale); 191 } catch (Exception e) { 192 } 193 194 if (isb != null) { 195 try { 196 col = isb.getRuleBasedCollator(); 197 } catch (Exception e) { 198 col = Collator.getInstance(ULocale.ROOT); 199 } 200 } else { 201 col = Collator.getInstance(ULocale.ROOT); 202 } 203 204 spaceCol = Collator.getInstance(locale.toULocale()); 205 if (spaceCol instanceof RuleBasedCollator) { 206 ((RuleBasedCollator) spaceCol).setAlternateHandlingShifted(false); 207 } 208 pp = new UnicodeSetPrettyPrinter().setOrdering(Collator.getInstance(ULocale.ROOT)) 209 .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY)) 210 .setCompressRanges(true) 211 .setToQuote(new UnicodeSet(TO_QUOTE)) 212 .setOrdering(col) 213 .setSpaceComparator(spaceCol); 214 } 215 } 216 getPrettyPrinter()217 public UnicodeSetPrettyPrinter getPrettyPrinter() { 218 return pp; 219 } 220 221 /** 222 * Constructor, taking ULocale and boolean. 223 * 224 * @param locale the ULocale 225 * @param needsCollator true or false 226 * 227 * Called by getProcessor, with locale = SurveyMain.TRANS_HINT_LOCALE 228 */ DisplayAndInputProcessor(ULocale locale, boolean needsCollator)229 public DisplayAndInputProcessor(ULocale locale, boolean needsCollator) { 230 init(this.locale = CLDRLocale.getInstance(locale), needsCollator); 231 } 232 233 /** 234 * Constructor, taking ULocale. 235 * 236 * @param locale the ULocale 237 */ DisplayAndInputProcessor(ULocale locale)238 public DisplayAndInputProcessor(ULocale locale) { 239 init(this.locale = CLDRLocale.getInstance(locale), true /* needsCollator */); 240 } 241 242 /** 243 * Constructor, taking CLDRLocale and boolean. 244 * 245 * @param locale the CLDRLocale 246 * @param needsCollator true or false 247 */ DisplayAndInputProcessor(CLDRLocale locale, boolean needsCollator)248 public DisplayAndInputProcessor(CLDRLocale locale, boolean needsCollator) { 249 init(this.locale = locale, needsCollator); 250 } 251 252 /** 253 * Constructor, taking locale. 254 * 255 * @param locale 256 */ DisplayAndInputProcessor(CLDRLocale locale)257 public DisplayAndInputProcessor(CLDRLocale locale) { 258 init(this.locale = locale, true); 259 } 260 261 /** 262 * Process the value for display. The result is a string for display in the 263 * Survey tool or similar program. 264 * 265 * @param path 266 * @param value 267 * @param fullPath 268 * @return 269 */ processForDisplay(String path, String value)270 public synchronized String processForDisplay(String path, String value) { 271 value = Normalizer.compose(value, false); // Always normalize all text to NFC. 272 if (hasUnicodeSetValue(path)) { 273 value = displayUnicodeSet(value); 274 } else if (path.contains("stopword")) { 275 return value.trim().isEmpty() ? "NONE" : value; 276 } else { 277 NumericType numericType = NumericType.getNumericType(path); 278 if (numericType != NumericType.NOT_NUMERIC) { 279 // Canonicalize existing values that aren't canonicalized yet. 280 // New values will be canonicalized on input using processInput(). 281 try { 282 value = getCanonicalPattern(value, numericType, isPosix); 283 } catch (IllegalArgumentException e) { 284 if (DEBUG_DAIP) System.err.println("Illegal pattern: " + value); 285 } 286 if (numericType != NumericType.CURRENCY && numericType != NumericType.CURRENCY_ABBREVIATED) { 287 value = value.replace("'", ""); 288 } 289 } 290 } 291 // Fix up any apostrophes in number symbols 292 if (NUMBER_SEPARATOR_PATTERN.matcher(path).matches()) { 293 value = value.replace('\'', '\u2019'); 294 } 295 // Fix up any apostrophes as appropriate (Don't do so for things like date patterns... 296 if (!APOSTROPHE_SKIP_PATHS.matcher(path).matches()) { 297 value = normalizeApostrophes(value); 298 } 299 // Fix up hyphens, replacing with N-dash as appropriate 300 if (INTERVAL_FORMAT_PATHS.matcher(path).matches()) { 301 value = normalizeIntervalHyphens(value); 302 } else { 303 value = normalizeHyphens(value); 304 } 305 return value; 306 } 307 hasUnicodeSetValue(String path)308 private boolean hasUnicodeSetValue(String path) { 309 return path.startsWith("//ldml/characters/exemplarCharacters") || path.startsWith("//ldml/characters/parseLenients"); 310 } 311 312 static final UnicodeSet WHITESPACE = new UnicodeSet("[:whitespace:]").freeze(); 313 static final DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(FIX_YEARS); 314 315 private static final String BAR_VL = "\\|"; // U+007C VERTICAL LINE (pipe, bar) literal 316 private static final String BAR_EL = "\\s+l\\s+"; // U+006C LATIN SMALL LETTER L with space 317 private static final String BAR_DANDA = "।"; // U+0964 DEVANAGARI DANDA 318 private static final String BAR_REGEX = "(" + BAR_VL + "|" + BAR_EL + "|" + BAR_DANDA + ")"; 319 public static final Splitter SPLIT_BAR = Splitter.on(Pattern.compile(BAR_REGEX)).trimResults().omitEmptyStrings(); 320 static final Splitter SPLIT_SPACE = Splitter.on(' ').trimResults().omitEmptyStrings(); 321 static final Joiner JOIN_BAR = Joiner.on(" | "); 322 323 /** 324 * Process the value for input. The result is a cleaned-up value. For example, 325 * an exemplar set is modified to be in the normal format, and any missing [ ] 326 * are added (a common omission on entry). If there are any failures then the 327 * original value is returned, so that the proper error message can be given. 328 * 329 * @param path 330 * @param value 331 * @param internalException 332 * TODO 333 * @param fullPath 334 * @return 335 */ processInput(String path, String value, Exception[] internalException)336 public synchronized String processInput(String path, String value, Exception[] internalException) { 337 String original = value; 338 value = stripProblematicControlCharacters(value); 339 value = Normalizer.compose(value, false); // Always normalize all input to NFC. 340 value = value.replace('\u00B5', '\u03BC'); // use the right Greek mu character 341 342 if (internalException != null) { 343 internalException[0] = null; 344 } 345 // skip processing for inheritance marker 346 if (CldrUtility.INHERITANCE_MARKER.equals(value)) { 347 return value; // Reference: https://unicode.org/cldr/trac/ticket/11261 348 } 349 // for root annotations 350 if (CLDRLocale.ROOT.equals(locale) && path.contains("/annotations")) { 351 return value; // Reference: https://unicode.org/cldr/trac/ticket/11261 352 } 353 354 try { 355 // Normalise Malayalam characters. 356 boolean isUnicodeSet = hasUnicodeSetValue(path); 357 if (locale.childOf(MALAYALAM)) { 358 String newvalue = normalizeMalayalam(value); 359 if (DEBUG_DAIP) System.out.println("DAIP: Normalized Malayalam '" + value + "' to '" + newvalue + "'"); 360 value = newvalue; 361 } else if (locale.childOf(ROMANIAN) && !isUnicodeSet) { 362 value = standardizeRomanian(value); 363 } else if (locale.childOf(CATALAN) && !isUnicodeSet) { 364 value = standardizeCatalan(value); 365 } else if (locale.childOf(NGOMBA) && !isUnicodeSet) { 366 value = standardizeNgomba(value); 367 } else if (locale.childOf(KWASIO) && !isUnicodeSet) { 368 value = standardizeKwasio(value); 369 } else if (locale.childOf(HEBREW) && !APOSTROPHE_SKIP_PATHS.matcher(path).matches()) { 370 value = replaceChars(path, value, HEBREW_CONVERSIONS, false); 371 } else if ((locale.childOf(SWISS_GERMAN) || locale.childOf(GERMAN_SWITZERLAND)) && !isUnicodeSet) { 372 value = standardizeSwissGerman(value); 373 } else if (locale.childOf(MYANMAR) && !isUnicodeSet) { 374 value = standardizeMyanmar(value); 375 } else if (locale.childOf(KYRGYZ)) { 376 value = replaceChars(path, value, KYRGYZ_CONVERSIONS, false); 377 } else if (locale.childOf(URDU) || locale.childOf(PASHTO) || locale.childOf(FARSI)) { 378 value = replaceChars(path, value, URDU_PLUS_CONVERSIONS, true); 379 } else if (locale.childOf(FF_ADLAM) && !isUnicodeSet) { 380 value = fixAdlamNasalization(value); 381 } 382 383 if (UNICODE_WHITESPACE.containsSome(value)) { 384 value = normalizeWhitespace(path, value); 385 } 386 387 // all of our values should not have leading or trailing spaces, except insertBetween 388 if (!path.contains("/insertBetween") && !isUnicodeSet) { 389 value = value.trim(); 390 } 391 392 // fix grouping separator if space 393 if (path.startsWith("//ldml/numbers/symbols") && !path.contains("/alias")) { 394 if (value.isEmpty()) { 395 value = "\u00A0"; 396 } 397 value = value.replace(' ', '\u00A0'); 398 } 399 400 // fix date patterns 401 DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(path); 402 if (DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) { 403 try { 404 value = dtc.getCanonicalDatePattern(path, value, datetimePatternType); 405 } catch (IllegalArgumentException ex) { 406 return value; 407 } 408 } 409 410 if (path.startsWith("//ldml/numbers/currencies/currency") && path.contains("displayName")) { 411 value = normalizeCurrencyDisplayName(value); 412 } 413 NumericType numericType = NumericType.getNumericType(path); 414 if (numericType != NumericType.NOT_NUMERIC) { 415 if (numericType == NumericType.CURRENCY) { 416 value = value.replaceAll(" ", "\u00A0"); 417 if (numericType == NumericType.CURRENCY_ABBREVIATED) { 418 value = value.replaceAll("0\\.0+", "0"); 419 } 420 } else { 421 value = value.replaceAll("([%\u00A4]) ", "$1\u00A0") 422 .replaceAll(" ([%\u00A4])", "\u00A0$1"); 423 value = replace(NON_DECIMAL_PERIOD, value, "'.'"); 424 if (numericType == NumericType.DECIMAL_ABBREVIATED) { 425 value = value.replaceAll("0\\.0+", "0"); 426 } 427 } 428 value = getCanonicalPattern(value, numericType, isPosix); 429 } 430 431 // fix [,] 432 if (path.startsWith("//ldml/localeDisplayNames/languages/language") 433 || path.startsWith("//ldml/localeDisplayNames/scripts/script") 434 || path.startsWith("//ldml/localeDisplayNames/territories/territory") 435 || path.startsWith("//ldml/localeDisplayNames/variants/variant") 436 || path.startsWith("//ldml/localeDisplayNames/keys/key") 437 || path.startsWith("//ldml/localeDisplayNames/types/type")) { 438 value = value.replace('[', '(').replace(']', ')').replace('[', '(').replace(']', ')'); 439 } 440 441 // Normalize two single quotes for the inches symbol. 442 if (path.contains("/units")) { 443 value = value.replace("''", "″"); 444 } 445 446 // check specific cases 447 if (isUnicodeSet) { 448 value = inputUnicodeSet(path, value); 449 } else if (path.contains("stopword")) { 450 if (value.equals("NONE")) { 451 value = ""; 452 } 453 } 454 455 // Normalize ellipsis data. 456 if (path.startsWith("//ldml/characters/ellipsis")) { 457 value = value.replace("...", "…"); 458 } 459 460 // Replace Arabic presentation forms with their nominal counterparts 461 value = replaceArabicPresentationForms(value); 462 463 // Fix up any apostrophes as appropriate (Don't do so for things like date patterns... 464 if (!APOSTROPHE_SKIP_PATHS.matcher(path).matches()) { 465 value = normalizeApostrophes(value); 466 } 467 // Fix up any apostrophes in number symbols 468 if (NUMBER_SEPARATOR_PATTERN.matcher(path).matches()) { 469 value = value.replace('\'', '\u2019'); 470 } 471 // Fix up hyphens, replacing with N-dash as appropriate 472 if (INTERVAL_FORMAT_PATHS.matcher(path).matches()) { 473 value = normalizeIntervalHyphens(value); 474 } else if (!isUnicodeSet) { 475 value = normalizeHyphens(value); 476 } 477 478 if (path.startsWith("//ldml/annotations/annotation")) { 479 if (path.contains(Emoji.TYPE_TTS)) { 480 // The row has something like " -name" in the first column. Cf. namePath, getNamePaths. 481 // Normally the value is like "zebra" or "unicorn face", without "|". 482 // If the user enters a value with "|", discard anything after "|"; e.g., change "a | b | c" to "a". 483 value = SPLIT_BAR.split(value).iterator().next(); 484 } else { 485 // The row has something like " –keywords" in the first column. Cf. keywordPath, getKeywordPaths. 486 // Normally the value is like "stripe | zebra", with "|". 487 value = annotationsForDisplay(value); 488 } 489 } 490 491 return value; 492 } catch (RuntimeException e) { 493 if (internalException != null) { 494 internalException[0] = e; 495 } 496 return original; 497 } 498 } 499 500 /** 501 * Strip out all code points less than U+0020 except for U+0009 tab, 502 * U+000A line feed, and U+000D carriage return. 503 * 504 * @param s the string 505 * @return the resulting string 506 */ stripProblematicControlCharacters(String s)507 private String stripProblematicControlCharacters(String s) { 508 if (s == null || s.isEmpty()) { 509 return s; 510 } 511 return s.codePoints() 512 .filter(c -> (c >= 0x20 || c == 9 || c == 0xA || c == 0xD)) 513 .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) 514 .toString(); 515 } 516 517 private static final boolean REMOVE_COVERED_KEYWORDS = true; 518 519 /** 520 * Produce a modification of the given annotation by sorting its components and filtering covered keywords. 521 * 522 * Examples: Given "b | a", return "a | b". Given "bear | panda | panda bear", return "bear | panda". 523 * 524 * @param value the string 525 * @return the possibly modified string 526 */ annotationsForDisplay(String value)527 private static String annotationsForDisplay(String value) { 528 TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT)); 529 sorted.addAll(SPLIT_BAR.splitToList(value)); 530 if (REMOVE_COVERED_KEYWORDS) { 531 filterCoveredKeywords(sorted); 532 } 533 value = JOIN_BAR.join(sorted); 534 return value; 535 } 536 537 /** 538 * Filter from the given set some keywords that include spaces, if they duplicate, 539 * or are "covered by", other keywords in the set. 540 * 541 * For example, if the set is {"bear", "panda", "panda bear"} (annotation was "bear | panda | panda bear"), 542 * then remove "panda bear", treating it as "covered" since the set already includes "panda" and "bear". 543 * 544 * @param sorted the set from which items may be removed 545 */ filterCoveredKeywords(TreeSet<String> sorted)546 public static void filterCoveredKeywords(TreeSet<String> sorted) { 547 // for now, just do single items 548 HashSet<String> toRemove = new HashSet<>(); 549 550 for (String item : sorted) { 551 List<String> list = SPLIT_SPACE.splitToList(item); 552 if (list.size() < 2) { 553 continue; 554 } 555 if (sorted.containsAll(list)) { 556 toRemove.add(item); 557 } 558 } 559 sorted.removeAll(toRemove); 560 } 561 displayUnicodeSet(String value)562 private String displayUnicodeSet(String value) { 563 if (value.startsWith("[") && value.endsWith("]")) { 564 value = value.substring(1, value.length() - 1); 565 } 566 567 value = replace(NEEDS_QUOTE1, value, "$1\\\\$2$3"); 568 value = replace(NEEDS_QUOTE2, value, "$1\\\\$2$3"); 569 570 // if (RTL.containsSome(value) && value.startsWith("[") && value.endsWith("]")) { 571 // return "\u200E[\u200E" + value.substring(1,value.length()-2) + "\u200E]\u200E"; 572 // } 573 return value; 574 } 575 inputUnicodeSet(String path, String value)576 private String inputUnicodeSet(String path, String value) { 577 // clean up the user's input. 578 // first, fix up the '[' 579 value = value.trim(); 580 581 // remove brackets and trim again before regex 582 if (value.startsWith("[")) { 583 value = value.substring(1); 584 } 585 if (value.endsWith("]") && (!value.endsWith("\\]") || value.endsWith("\\\\]"))) { 586 value = value.substring(0, value.length() - 1); 587 } 588 value = value.trim(); 589 590 value = replace(NEEDS_QUOTE1, value, "$1\\\\$2$3"); 591 value = replace(NEEDS_QUOTE2, value, "$1\\\\$2$3"); 592 593 // re-add brackets. 594 value = "[" + value + "]"; 595 596 UnicodeSet exemplar = new UnicodeSet(value); 597 XPathParts parts = XPathParts.getFrozenInstance(path); 598 if (parts.getElement(2).equals("parseLenients")) { 599 return exemplar.toPattern(false); 600 } 601 final String type = parts.getAttributeValue(-1, "type"); 602 ExemplarType exemplarType = type == null ? ExemplarType.main : ExemplarType.valueOf(type); 603 value = getCleanedUnicodeSet(exemplar, pp, exemplarType); 604 return value; 605 } 606 normalizeCurrencyDisplayName(String value)607 private String normalizeCurrencyDisplayName(String value) { 608 StringBuilder result = new StringBuilder(); 609 boolean inParentheses = false; 610 for (int i = 0; i < value.length(); i++) { 611 char c = value.charAt(i); 612 if (c == '(') { 613 inParentheses = true; 614 } else if (c == ')') { 615 inParentheses = false; 616 } 617 if (inParentheses && c == '-' && Character.isDigit(value.charAt(i - 1))) { 618 c = 0x2013; /* Replace hyphen-minus with dash for date ranges */ 619 } 620 result.append(c); 621 } 622 return result.toString(); 623 } 624 normalizeApostrophes(String value)625 private String normalizeApostrophes(String value) { 626 // If our DAIP always had a CLDRFile to work with, then we could just check the exemplar set in it to see. 627 // But since we don't, we just maintain the list internally and use it. 628 if (LANGUAGES_USING_MODIFIER_APOSTROPHE.contains(locale.getLanguage())) { 629 return value.replace('\'', '\u02bc'); 630 } else { 631 char prev = 0; 632 StringBuilder builder = new StringBuilder(); 633 for (char c : value.toCharArray()) { 634 if (c == '\'') { 635 if (Character.isLetter(prev)) { 636 builder.append('\u2019'); 637 } else { 638 builder.append('\u2018'); 639 } 640 } else { 641 builder.append(c); 642 } 643 prev = c; 644 } 645 return builder.toString(); 646 } 647 } 648 normalizeIntervalHyphens(String value)649 private String normalizeIntervalHyphens(String value) { 650 DateTimePatternGenerator.FormatParser fp = new DateTimePatternGenerator.FormatParser(); 651 fp.set(DateIntervalInfo.genPatternInfo(value, false).getFirstPart()); 652 List<Object> items = fp.getItems(); 653 Object last = items.get(items.size() - 1); 654 if (last instanceof String) { 655 String separator = last.toString(); 656 if (separator.contains("-")) { 657 StringBuilder sb = new StringBuilder(); 658 sb.append(DateIntervalInfo.genPatternInfo(value, false).getFirstPart()); 659 if (sb.lastIndexOf(separator) >= 0) { 660 sb.delete(sb.lastIndexOf(separator), sb.length()); 661 sb.append(separator.replace("-", "\u2013")); 662 sb.append(DateIntervalInfo.genPatternInfo(value, false).getSecondPart()); 663 return sb.toString(); 664 } 665 } 666 } 667 return value; 668 } 669 normalizeHyphens(String value)670 private String normalizeHyphens(String value) { 671 int hyphenLocation = value.indexOf("-"); 672 if (hyphenLocation > 0 && 673 Character.isDigit(value.charAt(hyphenLocation - 1)) && 674 hyphenLocation < value.length() - 1 && 675 Character.isDigit(value.charAt(hyphenLocation + 1))) { 676 StringBuilder sb = new StringBuilder(); 677 sb.append(value.substring(0, hyphenLocation)); 678 sb.append("\u2013"); 679 sb.append(value.substring(hyphenLocation + 1)); 680 return sb.toString(); 681 } 682 return value; 683 } 684 standardizeRomanian(String value)685 private String standardizeRomanian(String value) { 686 StringBuilder builder = new StringBuilder(); 687 for (char c : value.toCharArray()) { 688 for (char[] pair : ROMANIAN_CONVERSIONS) { 689 if (c == pair[0]) { 690 c = pair[1]; 691 break; 692 } 693 } 694 builder.append(c); 695 } 696 return builder.toString(); 697 } 698 standardizeKwasio(String value)699 private String standardizeKwasio(String value) { 700 StringBuilder builder = new StringBuilder(); 701 for (char c : value.toCharArray()) { 702 for (char[] pair : KWASIO_CONVERSIONS) { 703 if (c == pair[0]) { 704 c = pair[1]; 705 break; 706 } 707 } 708 builder.append(c); 709 } 710 return builder.toString(); 711 } 712 713 // Use the myanmar-tools detector. standardizeMyanmar(String value)714 private String standardizeMyanmar(String value) { 715 if (detector.getZawgyiProbability(value) > 0.90) { 716 return zawgyiUnicodeTransliterator.transform(value); 717 } 718 return value; 719 } 720 standardizeNgomba(String value)721 private String standardizeNgomba(String value) { 722 StringBuilder builder = new StringBuilder(); 723 char[] charArray = value.toCharArray(); 724 for (int i = 0; i < charArray.length; i++) { 725 char c = charArray[i]; 726 boolean convertedSaltillo = false; 727 for (char[] pair : NGOMBA_CONVERSIONS) { 728 if (c == pair[0]) { 729 c = pair[1]; 730 if (c == '\uA78C') { 731 convertedSaltillo = true; 732 } 733 break; 734 } 735 } 736 if (convertedSaltillo && 737 ((i > 0 && i < charArray.length - 1 && Character.isUpperCase(charArray[i - 1]) && Character.isUpperCase(charArray[i + 1])) || 738 (i > 1 && Character.isUpperCase(charArray[i - 1]) && Character.isUpperCase(charArray[i - 2])))) { 739 c = '\uA78B'; // UPPER CASE SALTILLO 740 } 741 builder.append(c); 742 } 743 return builder.toString(); 744 } 745 replaceChars(String path, String value, char[][] charsToReplace, boolean skipAuxExemplars)746 private String replaceChars(String path, String value, char[][] charsToReplace, boolean skipAuxExemplars) { 747 if (skipAuxExemplars && path.contains("/exemplarCharacters[@type=\"auxiliary\"]")) { 748 return value; 749 } 750 StringBuilder builder = new StringBuilder(); 751 for (char c : value.toCharArray()) { 752 for (char[] pair : charsToReplace) { 753 if (c == pair[0]) { 754 c = pair[1]; 755 break; 756 } 757 } 758 builder.append(c); 759 } 760 return builder.toString(); 761 } 762 standardizeSwissGerman(String value)763 private String standardizeSwissGerman(String value) { 764 return value.replaceAll("\u00DF", "ss"); 765 } 766 standardizeCatalan(String value)767 private String standardizeCatalan(String value) { 768 StringBuilder builder = new StringBuilder(); 769 for (char c : value.toCharArray()) { 770 boolean didSubstitute = false; 771 for (char[] triple : CATALAN_CONVERSIONS) { 772 if (c == triple[0]) { 773 builder.append(triple[1]); 774 builder.append(triple[2]); 775 didSubstitute = true; 776 break; 777 } 778 } 779 if (!didSubstitute) { 780 builder.append(c); 781 } 782 } 783 return builder.toString(); 784 } 785 replace(Pattern pattern, String value, String replacement)786 private String replace(Pattern pattern, String value, String replacement) { 787 String value2 = pattern.matcher(value).replaceAll(replacement); 788 if (DEBUG_DAIP && !value.equals(value2)) { 789 System.out.println("\n" + value + " => " + value2); 790 } 791 return value2; 792 } 793 794 private static Pattern UNNORMALIZED_MALAYALAM = PatternCache.get( 795 "(\u0D23|\u0D28|\u0D30|\u0D32|\u0D33|\u0D15)\u0D4D\u200D"); 796 797 private static Map<Character, Character> NORMALIZING_MAP = Builder.with(new HashMap<Character, Character>()) 798 .put('\u0D23', '\u0D7A').put('\u0D28', '\u0D7B') 799 .put('\u0D30', '\u0D7C').put('\u0D32', '\u0D7D') 800 .put('\u0D33', '\u0D7E').put('\u0D15', '\u0D7F').get(); 801 802 /** 803 * Normalizes the Malayalam characters in the specified input. 804 * 805 * @param value 806 * the input to be normalized 807 * @return 808 */ normalizeMalayalam(String value)809 private String normalizeMalayalam(String value) { 810 // Normalize Malayalam characters. 811 Matcher matcher = UNNORMALIZED_MALAYALAM.matcher(value); 812 if (matcher.find()) { 813 StringBuffer buffer = new StringBuffer(); 814 int start = 0; 815 do { 816 buffer.append(value.substring(start, matcher.start(0))); 817 char codePoint = matcher.group(1).charAt(0); 818 buffer.append(NORMALIZING_MAP.get(codePoint)); 819 start = matcher.end(0); 820 } while (matcher.find()); 821 buffer.append(value.substring(start)); 822 value = buffer.toString(); 823 } 824 return value; 825 } 826 827 static final Transform<String, String> fixArabicPresentation = Transliterator.getInstance( 828 "[[:block=Arabic_Presentation_Forms_A:][:block=Arabic_Presentation_Forms_B:]] nfkc"); 829 830 /** 831 * Normalizes the Arabic presentation forms characters in the specified input. 832 * 833 * @param value 834 * the input to be normalized 835 * @return 836 */ replaceArabicPresentationForms(String value)837 private String replaceArabicPresentationForms(String value) { 838 value = fixArabicPresentation.transform(value); 839 return value; 840 } 841 842 static Pattern ADLAM_MISNASALIZED = PatternCache.get("([])['’‘]([])"); 843 public static String ADLAM_NASALIZATION = ""; // U+1E94B (Unicode 12.0) 844 fixAdlamNasalization(String fromString)845 public static String fixAdlamNasalization(String fromString) { 846 return ADLAM_MISNASALIZED.matcher(fromString) 847 .replaceAll("$1"+ADLAM_NASALIZATION+"$2"); // replace quote with 848 } 849 850 static Pattern REMOVE_QUOTE1 = PatternCache.get("(\\s)(\\\\[-\\}\\]\\&])()"); 851 static Pattern REMOVE_QUOTE2 = PatternCache.get("(\\\\[\\-\\{\\[\\&])(\\s)"); // ([^\\])([\\-\\{\\[])(\\s) 852 853 static Pattern NEEDS_QUOTE1 = PatternCache.get("(\\s|$)([-\\}\\]\\&])()"); 854 static Pattern NEEDS_QUOTE2 = PatternCache.get("([^\\\\])([\\-\\{\\[\\&])(\\s)"); // ([^\\])([\\-\\{\\[])(\\s) 855 getCleanedUnicodeSet(UnicodeSet exemplar, UnicodeSetPrettyPrinter prettyPrinter, ExemplarType exemplarType)856 public static String getCleanedUnicodeSet(UnicodeSet exemplar, UnicodeSetPrettyPrinter prettyPrinter, 857 ExemplarType exemplarType) { 858 if (prettyPrinter == null) { 859 return exemplar.toPattern(false); 860 } 861 String value; 862 prettyPrinter.setCompressRanges(exemplar.size() > 300); 863 value = exemplar.toPattern(false); 864 UnicodeSet toAdd = new UnicodeSet(); 865 866 for (UnicodeSetIterator usi = new UnicodeSetIterator(exemplar); usi.next();) { 867 String string = usi.getString(); 868 if (string.equals("ß") || string.equals("İ")) { 869 toAdd.add(string); 870 continue; 871 } 872 switch (string) { 873 case "\u2011": toAdd.add("-"); break; // nobreak hyphen 874 case "-": toAdd.add("\u2011"); break; // nobreak hyphen 875 876 case " ": toAdd.add("\u00a0"); break; // nobreak space 877 case "\u00a0": toAdd.add(" "); break; // nobreak space 878 879 case "\u202F": toAdd.add("\u2009"); break; // nobreak narrow space 880 case "\u2009": toAdd.add("\u202F"); break; // nobreak narrow space 881 } 882 if (exemplarType.convertUppercase) { 883 string = UCharacter.toLowerCase(ULocale.ENGLISH, string); 884 } 885 toAdd.add(string); 886 String composed = Normalizer.compose(string, false); 887 if (!string.equals(composed)) { 888 toAdd.add(composed); 889 } 890 } 891 892 toAdd.removeAll(exemplarType.toRemove); 893 894 if (DEBUG_DAIP && !toAdd.equals(exemplar)) { 895 UnicodeSet oldOnly = new UnicodeSet(exemplar).removeAll(toAdd); 896 UnicodeSet newOnly = new UnicodeSet(toAdd).removeAll(exemplar); 897 System.out.println("Exemplar:\t" + exemplarType + ",\tremoved\t" + oldOnly + ",\tadded\t" + newOnly); 898 } 899 900 String fixedExemplar = prettyPrinter.format(toAdd); 901 UnicodeSet doubleCheck = new UnicodeSet(fixedExemplar); 902 if (!toAdd.equals(doubleCheck)) { 903 // something went wrong, leave as is 904 } else if (!value.equals(fixedExemplar)) { // put in this condition just for debugging 905 if (DEBUG_DAIP) { 906 System.out.println(TestMetadata.showDifference( 907 With.codePoints(value), 908 With.codePoints(fixedExemplar), 909 "\n")); 910 } 911 value = fixedExemplar; 912 } 913 return value; 914 } 915 916 /** 917 * @return a canonical numeric pattern, based on the type, and the isPOSIX flag. The latter is set for en_US_POSIX. 918 */ 919 static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults(); 920 getCanonicalPattern(String inpattern, NumericType type, boolean isPOSIX)921 public static String getCanonicalPattern(String inpattern, NumericType type, boolean isPOSIX) { 922 // TODO fix later to properly handle quoted ; 923 924 DecimalFormat df = new DecimalFormat(inpattern); 925 if (type == NumericType.DECIMAL_ABBREVIATED || type == NumericType.CURRENCY_ABBREVIATED 926 || CldrUtility.INHERITANCE_MARKER.equals(inpattern)) { 927 return inpattern; // TODO fix when ICU bug is fixed 928 // df.setMaximumFractionDigits(df.getMinimumFractionDigits()); 929 // df.setMaximumIntegerDigits(Math.max(1, df.getMinimumIntegerDigits())); 930 } else { 931 // int decimals = type == CURRENCY_TYPE ? 2 : 1; 932 int[] digits = isPOSIX ? type.posixDigitCount : type.digitCount; 933 df.setMinimumIntegerDigits(digits[0]); 934 df.setMinimumFractionDigits(digits[1]); 935 df.setMaximumFractionDigits(digits[2]); 936 } 937 String pattern = df.toPattern(); 938 List<String> parts = SEMI_SPLITTER.splitToList(pattern); 939 String pattern2 = parts.get(0); 940 if (parts.size() > 1) { 941 pattern2 += ";" + parts.get(1); 942 } 943 if (!pattern2.equals(pattern)) { 944 pattern = pattern2; 945 } 946 // int pos = pattern.indexOf(';'); 947 // if (pos < 0) return pattern + ";-" + pattern; 948 return pattern; 949 } 950 951 /* 952 * This tests what type a numeric pattern is. 953 */ 954 public enum NumericType { 955 CURRENCY(new int[] { 1, 2, 2 }, new int[] { 1, 2, 2 }), CURRENCY_ABBREVIATED(), DECIMAL(new int[] { 1, 0, 3 }, 956 new int[] { 1, 0, 6 }), DECIMAL_ABBREVIATED(), PERCENT(new int[] { 1, 0, 0 }, 957 new int[] { 1, 0, 0 }), SCIENTIFIC(new int[] { 0, 0, 0 }, new int[] { 1, 6, 6 }), NOT_NUMERIC; 958 959 private static final Pattern NUMBER_PATH = Pattern 960 .compile("//ldml/numbers/((currency|decimal|percent|scientific)Formats|currencies/currency).*"); 961 private int[] digitCount; 962 private int[] posixDigitCount; 963 NumericType()964 private NumericType() { 965 } 966 NumericType(int[] digitCount, int[] posixDigitCount)967 private NumericType(int[] digitCount, int[] posixDigitCount) { 968 this.digitCount = digitCount; 969 this.posixDigitCount = posixDigitCount; 970 } 971 972 /** 973 * @return the numeric type of the xpath 974 */ getNumericType(String xpath)975 public static NumericType getNumericType(String xpath) { 976 Matcher matcher = NUMBER_PATH.matcher(xpath); 977 if (xpath.indexOf("/pattern") < 0) { 978 return NOT_NUMERIC; 979 } else if (matcher.matches()) { 980 if (matcher.group(1).equals("currencies/currency")) { 981 return CURRENCY; 982 } else { 983 NumericType type = NumericType.valueOf(matcher.group(2).toUpperCase()); 984 if (xpath.contains("=\"1000")) { 985 if (type == DECIMAL) { 986 type = DECIMAL_ABBREVIATED; 987 } else if (type == CURRENCY) { 988 type = CURRENCY_ABBREVIATED; 989 } else { 990 throw new IllegalArgumentException("Internal Error"); 991 } 992 } 993 return type; 994 } 995 } else { 996 return NOT_NUMERIC; 997 } 998 } 999 getDigitCount()1000 public int[] getDigitCount() { 1001 return digitCount; 1002 } 1003 getPosixDigitCount()1004 public int[] getPosixDigitCount() { 1005 return posixDigitCount; 1006 } 1007 } 1008 1009 /** 1010 * Turn all whitespace sequences (including tab and newline, and NBSP for certain paths) 1011 * into a single space or a single NBSP depending on path. 1012 * Also trim initial/final NBSP, unless the value is only the one character, "\u00A0" 1013 * 1014 * @param path 1015 * @param value 1016 * @return the normalized value 1017 */ normalizeWhitespace(String path, String value)1018 private String normalizeWhitespace(String path, String value) { 1019 PathSpaceType pst = PathSpaceType.get(path); 1020 if (pst == PathSpaceType.allowSp) { 1021 value = WHITESPACE_AND_NBSP_TO_NORMALIZE.matcher(value).replaceAll(" "); // replace with regular space 1022 } else if (pst == PathSpaceType.allowNbsp) { 1023 value = WHITESPACE_AND_NBSP_TO_NORMALIZE.matcher(value).replaceAll("\u00A0"); // replace with NBSP 1024 value = trimNBSP(value); 1025 } else if (pst == PathSpaceType.allowSpOrNbsp) { 1026 /* 1027 * in this case don't normalize away NBSP 1028 */ 1029 value = WHITESPACE_NO_NBSP_TO_NORMALIZE.matcher(value).replaceAll(" "); // replace with regular space 1030 /* 1031 * if any NBSP and regular space are adjacent, replace with NBSP 1032 */ 1033 value = NBSP_PLUS_SPACE_TO_NORMALIZE.matcher(value).replaceAll("\u00A0"); 1034 value = SPACE_PLUS_NBSP_TO_NORMALIZE.matcher(value).replaceAll("\u00A0"); 1035 value = MULTIPLE_NBSP.matcher(value).replaceAll("\u00A0"); 1036 value = trimNBSP(value); 1037 } else { 1038 throw new IllegalArgumentException("Unknown PathSpaceType " + pst); 1039 } 1040 return value; 1041 } 1042 1043 /** 1044 * Delete any initial or final NBSP, unless the value is just NBSP 1045 * 1046 * @param value 1047 * @return the trimmed value 1048 */ trimNBSP(String value)1049 private String trimNBSP(String value) { 1050 if (!"\u00A0".equals(value)) { 1051 value = INITIAL_NBSP.matcher(value).replaceAll(""); 1052 value = FINAL_NBSP.matcher(value).replaceAll(""); 1053 } 1054 return value; 1055 } 1056 1057 /** 1058 * Categorize xpaths according to whether they allow space, NBSP, or both 1059 */ 1060 public enum PathSpaceType { 1061 allowSp, allowNbsp, allowSpOrNbsp; 1062 get(String path)1063 public static PathSpaceType get(String path) { 1064 if (wantsRegularSpace(path)) { 1065 return allowSp; 1066 } else if (wantsNBSP(path)) { 1067 return allowNbsp; 1068 } else { 1069 return allowSpOrNbsp; 1070 } 1071 } 1072 wantsRegularSpace(String path)1073 private static boolean wantsRegularSpace(String path) { 1074 if ((path.contains("/dateFormatLength") && path.contains("/pattern")) || 1075 path.contains("/availableFormats/dateFormatItem") || 1076 (path.startsWith("//ldml/dates/timeZoneNames/metazone") && path.contains("/long")) || 1077 path.startsWith("//ldml/dates/timeZoneNames/regionFormat") || 1078 path.startsWith("//ldml/localeDisplayNames/codePatterns/codePattern") || 1079 path.startsWith("//ldml/localeDisplayNames/languages/language") || 1080 path.startsWith("//ldml/localeDisplayNames/territories/territory") || 1081 path.startsWith("//ldml/localeDisplayNames/types/type") || 1082 (path.startsWith("//ldml/numbers/currencies/currency") && path.contains("/displayName")) || 1083 (path.contains("/decimalFormatLength[@type=\"long\"]") && path.contains("/pattern")) || 1084 path.startsWith("//ldml/posix/messages") || 1085 (path.startsWith("//ldml/units/uni") && path.contains("/unitPattern "))) { 1086 return true; 1087 } 1088 return false; 1089 } 1090 wantsNBSP(String path)1091 private static boolean wantsNBSP(String path) { 1092 if ((path.contains("/currencies/currency") && (path.contains("/group") || path.contains("/pattern"))) || 1093 (path.contains("/currencyFormatLength") && path.contains("/pattern")) || 1094 (path.contains("/currencySpacing") && path.contains("/insertBetween")) || 1095 (path.contains("/decimalFormatLength") && path.contains("/pattern")) || // i.e. the non-long ones 1096 (path.contains("/percentFormatLength") && path.contains("/pattern")) || 1097 (path.startsWith("//ldml/numbers/symbols") && (path.contains("/group") || path.contains("/nan")))) { 1098 return true; 1099 } 1100 return false; 1101 } 1102 } 1103 } 1104