• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (C) 2007-2013 Google and others.  All Rights Reserved. */
2 /* Copyright (C) 2007-2013 IBM Corp. and others. All Rights Reserved. */
3 
4 package org.unicode.cldr.test;
5 
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Set;
12 import java.util.TreeSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 
16 import org.unicode.cldr.test.CheckExemplars.ExemplarType;
17 import org.unicode.cldr.util.Builder;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CLDRLocale;
20 import org.unicode.cldr.util.CldrUtility;
21 import org.unicode.cldr.util.DateTimeCanonicalizer;
22 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType;
23 import org.unicode.cldr.util.Emoji;
24 import org.unicode.cldr.util.ICUServiceBuilder;
25 import org.unicode.cldr.util.PatternCache;
26 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
27 import org.unicode.cldr.util.With;
28 import org.unicode.cldr.util.XPathParts;
29 
30 import com.google.common.base.Joiner;
31 import com.google.common.base.Splitter;
32 import com.google.myanmartools.ZawgyiDetector;
33 import com.ibm.icu.lang.UCharacter;
34 import com.ibm.icu.text.Collator;
35 import com.ibm.icu.text.DateIntervalInfo;
36 import com.ibm.icu.text.DateTimePatternGenerator;
37 import com.ibm.icu.text.DecimalFormat;
38 import com.ibm.icu.text.Normalizer;
39 import com.ibm.icu.text.RuleBasedCollator;
40 import com.ibm.icu.text.Transform;
41 import com.ibm.icu.text.Transliterator;
42 import com.ibm.icu.text.UnicodeSet;
43 import com.ibm.icu.text.UnicodeSetIterator;
44 import com.ibm.icu.util.ULocale;
45 
46 /**
47  * Class for processing the input and output of CLDR data for use in the
48  * Survey Tool and other tools.
49  */
50 public class DisplayAndInputProcessor {
51 
52     private static final boolean FIX_YEARS = true;
53 
54     public static final boolean DEBUG_DAIP = CldrUtility.getProperty("DEBUG_DAIP", false);
55 
56     public static final UnicodeSet RTL = new UnicodeSet("[[:Bidi_Class=Arabic_Letter:][:Bidi_Class=Right_To_Left:]]")
57         .freeze();
58 
59     public static final UnicodeSet TO_QUOTE = new UnicodeSet(
60         "[[:Cn:]" +
61             "[:Default_Ignorable_Code_Point:]" +
62             "[:patternwhitespace:]" +
63             "[:Me:][:Mn:]]" // add non-spacing marks
64     ).freeze();
65 
66     public static final Pattern NUMBER_FORMAT_XPATH = Pattern
67         .compile("//ldml/numbers/.*Format\\[@type=\"standard\"]/pattern.*");
68 
69     public static final Pattern NUMBER_SEPARATOR_PATTERN = Pattern
70         .compile("//ldml/numbers/symbols.*/(decimal|group)");
71 
72     private static final Pattern APOSTROPHE_SKIP_PATHS = PatternCache.get("//ldml/("
73         + "localeDisplayNames/languages/language\\[@type=\"mic\"].*|"
74         + "characters/.*|"
75         + "delimiters/.*|"
76         + "dates/.+/(pattern|intervalFormatItem|dateFormatItem).*|"
77         + "units/.+/unitPattern.*|"
78         + "units/.+/durationUnitPattern.*|"
79         + "numbers/symbols.*|"
80         + "numbers/miscPatterns.*|"
81         + "numbers/(decimal|currency|percent|scientific)Formats.+/(decimal|currency|percent|scientific)Format.*)");
82     private static final Pattern INTERVAL_FORMAT_PATHS = PatternCache.get("//ldml/dates/.+/intervalFormatItem.*");
83     private static final Pattern NON_DECIMAL_PERIOD = PatternCache.get("(?<![0#'])\\.(?![0#'])");
84 
85     /**
86      * string of whitespace not including NBSP, i.e. [\t\n\r]+
87      */
88     private static final Pattern WHITESPACE_NO_NBSP_TO_NORMALIZE = PatternCache.get("\\s+"); //
89 
90     /**
91      * string of whitespace including NBSP, i.e. [\u00A0\t\n\r]+
92      */
93     private static final Pattern WHITESPACE_AND_NBSP_TO_NORMALIZE = PatternCache.get("[\\s\\u00A0]+");
94 
95     /**
96      * one or more NBSP followed by one or more regular spaces
97      */
98     private static final Pattern NBSP_PLUS_SPACE_TO_NORMALIZE = PatternCache.get("\\u00A0+\\u0020+");
99 
100     /**
101      * one or more regular spaces followed by one or more NBSP
102      */
103     private static final Pattern SPACE_PLUS_NBSP_TO_NORMALIZE = PatternCache.get("\\u0020+\\u00A0+");
104 
105     private static final Pattern INITIAL_NBSP = PatternCache.get("^\\u00A0+");
106     private static final Pattern FINAL_NBSP = PatternCache.get("\\u00A0+$");
107     private static final Pattern MULTIPLE_NBSP = PatternCache.get("\\u00A0\\u00A0+");
108 
109     private static final UnicodeSet UNICODE_WHITESPACE = new UnicodeSet("[:whitespace:]").freeze();
110 
111     private static final CLDRLocale MALAYALAM = CLDRLocale.getInstance("ml");
112     private static final CLDRLocale ROMANIAN = CLDRLocale.getInstance("ro");
113     private static final CLDRLocale CATALAN = CLDRLocale.getInstance("ca");
114     private static final CLDRLocale NGOMBA = CLDRLocale.getInstance("jgo");
115     private static final CLDRLocale KWASIO = CLDRLocale.getInstance("nmg");
116     private static final CLDRLocale HEBREW = CLDRLocale.getInstance("he");
117     private static final CLDRLocale MYANMAR = CLDRLocale.getInstance("my");
118     private static final CLDRLocale KYRGYZ = CLDRLocale.getInstance("ky");
119     private static final CLDRLocale URDU = CLDRLocale.getInstance("ur");
120     private static final CLDRLocale PASHTO = CLDRLocale.getInstance("ps");
121     private static final CLDRLocale FARSI = CLDRLocale.getInstance("fa");
122     private static final CLDRLocale GERMAN_SWITZERLAND = CLDRLocale.getInstance("de_CH");
123     private static final CLDRLocale SWISS_GERMAN = CLDRLocale.getInstance("gsw");
124     private static final CLDRLocale FF_ADLAM = CLDRLocale.getInstance("ff_Adlm");
125     public static final Set<String> LANGUAGES_USING_MODIFIER_APOSTROPHE = new HashSet<>(
126         Arrays.asList("br", "bss", "cad", "cic", "cch", "gn", "ha", "ha_Latn", "lkt", "mgo", "moh", "mus", "nnh", "qu", "quc", "uk", "uz", "uz_Latn"));
127 
128     // Ş ş Ţ ţ  =>  Ș ș Ț ț
129     private static final char[][] ROMANIAN_CONVERSIONS = {
130         { '\u015E', '\u0218' }, { '\u015F', '\u0219' }, { '\u0162', '\u021A' },
131         { '\u0163', '\u021B' } };
132 
133     private static final char[][] CATALAN_CONVERSIONS = {
134         { '\u013F', '\u004C', '\u00B7' }, // Ŀ -> L·
135         { '\u0140', '\u006C', '\u00B7' } }; // ŀ -> l·
136 
137     private static final char[][] NGOMBA_CONVERSIONS = {
138         { '\u0251', '\u0061' }, { '\u0261', '\u0067' }, //  ɑ -> a , ɡ -> g , See ticket #5691
139         { '\u2019', '\uA78C' }, { '\u02BC', '\uA78C' } }; //  Saltillo, see ticket #6805
140 
141     private static final char[][] KWASIO_CONVERSIONS = {
142         { '\u0306', '\u030C' }, // See ticket #6571, use caron instead of breve
143         { '\u0103', '\u01CE' }, { '\u0102', '\u01CD' }, // a-breve -> a-caron
144         { '\u0115', '\u011B' }, { '\u011A', '\u01CD' }, // e-breve -> e-caron
145         { '\u012D', '\u01D0' }, { '\u012C', '\u01CF' }, // i-breve -> i-caron
146         { '\u014F', '\u01D2' }, { '\u014E', '\u01D1' }, // o-breve -> o-caron
147         { '\u016D', '\u01D4' }, { '\u016C', '\u01D3' } // u-breve -> u-caron
148     };
149 
150     private static final char[][] HEBREW_CONVERSIONS = {
151         { '\'', '\u05F3' }, { '"', '\u05F4' } }; //  ' -> geresh  " -> gershayim
152 
153     private static final char[][] KYRGYZ_CONVERSIONS = {
154         { 'ӊ', 'ң' }, { 'Ӊ', 'Ң' } }; //  right modifier
155 
156     private static final char[][] URDU_PLUS_CONVERSIONS = {
157         { '\u0643', '\u06A9' }}; //  wrong char
158 
159     private static final ZawgyiDetector detector = new ZawgyiDetector();
160     private static final Transliterator zawgyiUnicodeTransliterator =
161         Transliterator.getInstance("Zawgyi-my");
162 
163     private Collator col;
164 
165     private Collator spaceCol;
166 
167     private UnicodeSetPrettyPrinter pp = null;
168 
169     final private CLDRLocale locale;
170     private boolean isPosix;
171 
172     /**
173      * Constructor, taking cldrFile.
174      *
175      * @param cldrFileToCheck
176      */
DisplayAndInputProcessor(CLDRFile cldrFileToCheck, boolean needsCollator)177     public DisplayAndInputProcessor(CLDRFile cldrFileToCheck, boolean needsCollator) {
178         init(this.locale = CLDRLocale.getInstance(cldrFileToCheck.getLocaleID()), needsCollator);
179     }
180 
DisplayAndInputProcessor(CLDRFile cldrFileToCheck)181     public DisplayAndInputProcessor(CLDRFile cldrFileToCheck) {
182         init(this.locale = CLDRLocale.getInstance(cldrFileToCheck.getLocaleID()), true);
183     }
184 
init(CLDRLocale locale, boolean needsCollator)185     void init(CLDRLocale locale, boolean needsCollator) {
186         isPosix = locale.toString().indexOf("POSIX") >= 0;
187         if (needsCollator) {
188             ICUServiceBuilder isb = null;
189             try {
190                 isb = ICUServiceBuilder.forLocale(locale);
191             } catch (Exception e) {
192             }
193 
194             if (isb != null) {
195                 try {
196                     col = isb.getRuleBasedCollator();
197                 } catch (Exception e) {
198                     col = Collator.getInstance(ULocale.ROOT);
199                 }
200             } else {
201                 col = Collator.getInstance(ULocale.ROOT);
202             }
203 
204             spaceCol = Collator.getInstance(locale.toULocale());
205             if (spaceCol instanceof RuleBasedCollator) {
206                 ((RuleBasedCollator) spaceCol).setAlternateHandlingShifted(false);
207             }
208             pp = new UnicodeSetPrettyPrinter().setOrdering(Collator.getInstance(ULocale.ROOT))
209                 .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY))
210                 .setCompressRanges(true)
211                 .setToQuote(new UnicodeSet(TO_QUOTE))
212                 .setOrdering(col)
213                 .setSpaceComparator(spaceCol);
214         }
215     }
216 
getPrettyPrinter()217     public UnicodeSetPrettyPrinter getPrettyPrinter() {
218         return pp;
219     }
220 
221     /**
222      * Constructor, taking ULocale and boolean.
223      *
224      * @param locale the ULocale
225      * @param needsCollator true or false
226      *
227      * Called by getProcessor, with locale = SurveyMain.TRANS_HINT_LOCALE
228      */
DisplayAndInputProcessor(ULocale locale, boolean needsCollator)229     public DisplayAndInputProcessor(ULocale locale, boolean needsCollator) {
230         init(this.locale = CLDRLocale.getInstance(locale), needsCollator);
231     }
232 
233     /**
234      * Constructor, taking ULocale.
235      *
236      * @param locale the ULocale
237      */
DisplayAndInputProcessor(ULocale locale)238     public DisplayAndInputProcessor(ULocale locale) {
239         init(this.locale = CLDRLocale.getInstance(locale), true /* needsCollator */);
240     }
241 
242     /**
243      * Constructor, taking CLDRLocale and boolean.
244      *
245      * @param locale the CLDRLocale
246      * @param needsCollator true or false
247      */
DisplayAndInputProcessor(CLDRLocale locale, boolean needsCollator)248     public DisplayAndInputProcessor(CLDRLocale locale, boolean needsCollator) {
249         init(this.locale = locale, needsCollator);
250     }
251 
252     /**
253      * Constructor, taking locale.
254      *
255      * @param locale
256      */
DisplayAndInputProcessor(CLDRLocale locale)257     public DisplayAndInputProcessor(CLDRLocale locale) {
258         init(this.locale = locale, true);
259     }
260 
261     /**
262      * Process the value for display. The result is a string for display in the
263      * Survey tool or similar program.
264      *
265      * @param path
266      * @param value
267      * @param fullPath
268      * @return
269      */
processForDisplay(String path, String value)270     public synchronized String processForDisplay(String path, String value) {
271         value = Normalizer.compose(value, false); // Always normalize all text to NFC.
272         if (hasUnicodeSetValue(path)) {
273             value = displayUnicodeSet(value);
274         } else if (path.contains("stopword")) {
275             return value.trim().isEmpty() ? "NONE" : value;
276         } else {
277             NumericType numericType = NumericType.getNumericType(path);
278             if (numericType != NumericType.NOT_NUMERIC) {
279                 // Canonicalize existing values that aren't canonicalized yet.
280                 // New values will be canonicalized on input using processInput().
281                 try {
282                     value = getCanonicalPattern(value, numericType, isPosix);
283                 } catch (IllegalArgumentException e) {
284                     if (DEBUG_DAIP) System.err.println("Illegal pattern: " + value);
285                 }
286                 if (numericType != NumericType.CURRENCY && numericType != NumericType.CURRENCY_ABBREVIATED) {
287                     value = value.replace("'", "");
288                 }
289             }
290         }
291         // Fix up any apostrophes in number symbols
292         if (NUMBER_SEPARATOR_PATTERN.matcher(path).matches()) {
293             value = value.replace('\'', '\u2019');
294         }
295         // Fix up any apostrophes as appropriate (Don't do so for things like date patterns...
296         if (!APOSTROPHE_SKIP_PATHS.matcher(path).matches()) {
297             value = normalizeApostrophes(value);
298         }
299         // Fix up hyphens, replacing with N-dash as appropriate
300         if (INTERVAL_FORMAT_PATHS.matcher(path).matches()) {
301             value = normalizeIntervalHyphens(value);
302         } else {
303             value = normalizeHyphens(value);
304         }
305         return value;
306     }
307 
hasUnicodeSetValue(String path)308     private boolean hasUnicodeSetValue(String path) {
309         return path.startsWith("//ldml/characters/exemplarCharacters") || path.startsWith("//ldml/characters/parseLenients");
310     }
311 
312     static final UnicodeSet WHITESPACE = new UnicodeSet("[:whitespace:]").freeze();
313     static final DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(FIX_YEARS);
314 
315     private static final String BAR_VL = "\\|"; // U+007C VERTICAL LINE (pipe, bar) literal
316     private static final String BAR_EL = "\\s+l\\s+"; // U+006C LATIN SMALL LETTER L with space
317     private static final String BAR_DANDA = "।"; // U+0964 DEVANAGARI DANDA
318     private static final String BAR_REGEX = "(" + BAR_VL + "|" + BAR_EL + "|" + BAR_DANDA + ")";
319     public static final Splitter SPLIT_BAR = Splitter.on(Pattern.compile(BAR_REGEX)).trimResults().omitEmptyStrings();
320     static final Splitter SPLIT_SPACE = Splitter.on(' ').trimResults().omitEmptyStrings();
321     static final Joiner JOIN_BAR = Joiner.on(" | ");
322 
323     /**
324      * Process the value for input. The result is a cleaned-up value. For example,
325      * an exemplar set is modified to be in the normal format, and any missing [ ]
326      * are added (a common omission on entry). If there are any failures then the
327      * original value is returned, so that the proper error message can be given.
328      *
329      * @param path
330      * @param value
331      * @param internalException
332      *            TODO
333      * @param fullPath
334      * @return
335      */
processInput(String path, String value, Exception[] internalException)336     public synchronized String processInput(String path, String value, Exception[] internalException) {
337         String original = value;
338         value = stripProblematicControlCharacters(value);
339         value = Normalizer.compose(value, false); // Always normalize all input to NFC.
340         value = value.replace('\u00B5', '\u03BC'); // use the right Greek mu character
341 
342         if (internalException != null) {
343             internalException[0] = null;
344         }
345         // skip processing for inheritance marker
346         if (CldrUtility.INHERITANCE_MARKER.equals(value)) {
347             return value; // Reference: https://unicode.org/cldr/trac/ticket/11261
348         }
349         // for root annotations
350         if (CLDRLocale.ROOT.equals(locale) && path.contains("/annotations")) {
351             return value; // Reference: https://unicode.org/cldr/trac/ticket/11261
352         }
353 
354         try {
355             // Normalise Malayalam characters.
356             boolean isUnicodeSet = hasUnicodeSetValue(path);
357             if (locale.childOf(MALAYALAM)) {
358                 String newvalue = normalizeMalayalam(value);
359                 if (DEBUG_DAIP) System.out.println("DAIP: Normalized Malayalam '" + value + "' to '" + newvalue + "'");
360                 value = newvalue;
361             } else if (locale.childOf(ROMANIAN) && !isUnicodeSet) {
362                 value = standardizeRomanian(value);
363             } else if (locale.childOf(CATALAN) && !isUnicodeSet) {
364                 value = standardizeCatalan(value);
365             } else if (locale.childOf(NGOMBA) && !isUnicodeSet) {
366                 value = standardizeNgomba(value);
367             } else if (locale.childOf(KWASIO) && !isUnicodeSet) {
368                 value = standardizeKwasio(value);
369             } else if (locale.childOf(HEBREW) && !APOSTROPHE_SKIP_PATHS.matcher(path).matches()) {
370                 value = replaceChars(path, value, HEBREW_CONVERSIONS, false);
371             } else if ((locale.childOf(SWISS_GERMAN) || locale.childOf(GERMAN_SWITZERLAND)) && !isUnicodeSet) {
372                 value = standardizeSwissGerman(value);
373             } else if (locale.childOf(MYANMAR) && !isUnicodeSet) {
374                 value = standardizeMyanmar(value);
375             } else if (locale.childOf(KYRGYZ)) {
376                 value = replaceChars(path, value, KYRGYZ_CONVERSIONS, false);
377             } else if (locale.childOf(URDU) || locale.childOf(PASHTO) || locale.childOf(FARSI)) {
378                 value = replaceChars(path, value, URDU_PLUS_CONVERSIONS, true);
379             } else if (locale.childOf(FF_ADLAM) && !isUnicodeSet) {
380                 value = fixAdlamNasalization(value);
381             }
382 
383             if (UNICODE_WHITESPACE.containsSome(value)) {
384                 value = normalizeWhitespace(path, value);
385             }
386 
387             // all of our values should not have leading or trailing spaces, except insertBetween
388             if (!path.contains("/insertBetween") && !isUnicodeSet) {
389                 value = value.trim();
390             }
391 
392             // fix grouping separator if space
393             if (path.startsWith("//ldml/numbers/symbols") && !path.contains("/alias")) {
394                 if (value.isEmpty()) {
395                     value = "\u00A0";
396                 }
397                 value = value.replace(' ', '\u00A0');
398             }
399 
400             // fix date patterns
401             DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(path);
402             if (DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) {
403                 try {
404                     value = dtc.getCanonicalDatePattern(path, value, datetimePatternType);
405                 } catch (IllegalArgumentException ex) {
406                     return value;
407                 }
408             }
409 
410             if (path.startsWith("//ldml/numbers/currencies/currency") && path.contains("displayName")) {
411                 value = normalizeCurrencyDisplayName(value);
412             }
413             NumericType numericType = NumericType.getNumericType(path);
414             if (numericType != NumericType.NOT_NUMERIC) {
415                 if (numericType == NumericType.CURRENCY) {
416                     value = value.replaceAll(" ", "\u00A0");
417                     if (numericType == NumericType.CURRENCY_ABBREVIATED) {
418                         value = value.replaceAll("0\\.0+", "0");
419                     }
420                 } else {
421                     value = value.replaceAll("([%\u00A4]) ", "$1\u00A0")
422                         .replaceAll(" ([%\u00A4])", "\u00A0$1");
423                     value = replace(NON_DECIMAL_PERIOD, value, "'.'");
424                     if (numericType == NumericType.DECIMAL_ABBREVIATED) {
425                         value = value.replaceAll("0\\.0+", "0");
426                     }
427                 }
428                 value = getCanonicalPattern(value, numericType, isPosix);
429             }
430 
431             // fix [,]
432             if (path.startsWith("//ldml/localeDisplayNames/languages/language")
433                 || path.startsWith("//ldml/localeDisplayNames/scripts/script")
434                 || path.startsWith("//ldml/localeDisplayNames/territories/territory")
435                 || path.startsWith("//ldml/localeDisplayNames/variants/variant")
436                 || path.startsWith("//ldml/localeDisplayNames/keys/key")
437                 || path.startsWith("//ldml/localeDisplayNames/types/type")) {
438                 value = value.replace('[', '(').replace(']', ')').replace('[', '(').replace(']', ')');
439             }
440 
441             // Normalize two single quotes for the inches symbol.
442             if (path.contains("/units")) {
443                 value = value.replace("''", "″");
444             }
445 
446             // check specific cases
447             if (isUnicodeSet) {
448                 value = inputUnicodeSet(path, value);
449             } else if (path.contains("stopword")) {
450                 if (value.equals("NONE")) {
451                     value = "";
452                 }
453             }
454 
455             // Normalize ellipsis data.
456             if (path.startsWith("//ldml/characters/ellipsis")) {
457                 value = value.replace("...", "…");
458             }
459 
460             // Replace Arabic presentation forms with their nominal counterparts
461             value = replaceArabicPresentationForms(value);
462 
463             // Fix up any apostrophes as appropriate (Don't do so for things like date patterns...
464             if (!APOSTROPHE_SKIP_PATHS.matcher(path).matches()) {
465                 value = normalizeApostrophes(value);
466             }
467             // Fix up any apostrophes in number symbols
468             if (NUMBER_SEPARATOR_PATTERN.matcher(path).matches()) {
469                 value = value.replace('\'', '\u2019');
470             }
471             // Fix up hyphens, replacing with N-dash as appropriate
472             if (INTERVAL_FORMAT_PATHS.matcher(path).matches()) {
473                 value = normalizeIntervalHyphens(value);
474             } else if (!isUnicodeSet) {
475                 value = normalizeHyphens(value);
476             }
477 
478             if (path.startsWith("//ldml/annotations/annotation")) {
479                 if (path.contains(Emoji.TYPE_TTS)) {
480                     // The row has something like "�� -name" in the first column. Cf. namePath, getNamePaths.
481                     // Normally the value is like "zebra" or "unicorn face", without "|".
482                     // If the user enters a value with "|",  discard anything after "|"; e.g., change "a | b | c" to "a".
483                     value = SPLIT_BAR.split(value).iterator().next();
484                 } else {
485                     // The row has something like "�� –keywords" in the first column. Cf. keywordPath, getKeywordPaths.
486                     // Normally the value is like "stripe | zebra", with "|".
487                     value = annotationsForDisplay(value);
488                 }
489             }
490 
491             return value;
492         } catch (RuntimeException e) {
493             if (internalException != null) {
494                 internalException[0] = e;
495             }
496             return original;
497         }
498     }
499 
500     /**
501      * Strip out all code points less than U+0020 except for U+0009 tab,
502      * U+000A line feed, and U+000D carriage return.
503      *
504      * @param s the string
505      * @return the resulting string
506      */
stripProblematicControlCharacters(String s)507     private String stripProblematicControlCharacters(String s) {
508         if (s == null || s.isEmpty()) {
509             return s;
510         }
511         return s.codePoints()
512             .filter(c -> (c >= 0x20 || c == 9 || c == 0xA || c == 0xD))
513             .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
514             .toString();
515     }
516 
517     private static final boolean REMOVE_COVERED_KEYWORDS = true;
518 
519     /**
520      * Produce a modification of the given annotation by sorting its components and filtering covered keywords.
521      *
522      * Examples: Given "b | a", return "a | b". Given "bear | panda | panda bear", return "bear | panda".
523      *
524      * @param value the string
525      * @return the possibly modified string
526      */
annotationsForDisplay(String value)527     private static String annotationsForDisplay(String value) {
528         TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT));
529         sorted.addAll(SPLIT_BAR.splitToList(value));
530         if (REMOVE_COVERED_KEYWORDS) {
531             filterCoveredKeywords(sorted);
532         }
533         value = JOIN_BAR.join(sorted);
534         return value;
535     }
536 
537     /**
538      * Filter from the given set some keywords that include spaces, if they duplicate,
539      * or are "covered by", other keywords in the set.
540      *
541      * For example, if the set is {"bear", "panda", "panda bear"} (annotation was "bear | panda | panda bear"),
542      * then remove "panda bear", treating it as "covered" since the set already includes "panda" and "bear".
543      *
544      * @param sorted the set from which items may be removed
545      */
filterCoveredKeywords(TreeSet<String> sorted)546     public static void filterCoveredKeywords(TreeSet<String> sorted) {
547         // for now, just do single items
548         HashSet<String> toRemove = new HashSet<>();
549 
550         for (String item : sorted) {
551             List<String> list = SPLIT_SPACE.splitToList(item);
552             if (list.size() < 2) {
553                 continue;
554             }
555             if (sorted.containsAll(list)) {
556                 toRemove.add(item);
557             }
558         }
559         sorted.removeAll(toRemove);
560     }
561 
displayUnicodeSet(String value)562     private String displayUnicodeSet(String value) {
563         if (value.startsWith("[") && value.endsWith("]")) {
564             value = value.substring(1, value.length() - 1);
565         }
566 
567         value = replace(NEEDS_QUOTE1, value, "$1\\\\$2$3");
568         value = replace(NEEDS_QUOTE2, value, "$1\\\\$2$3");
569 
570         // if (RTL.containsSome(value) && value.startsWith("[") && value.endsWith("]")) {
571         // return "\u200E[\u200E" + value.substring(1,value.length()-2) + "\u200E]\u200E";
572         // }
573         return value;
574     }
575 
inputUnicodeSet(String path, String value)576     private String inputUnicodeSet(String path, String value) {
577         // clean up the user's input.
578         // first, fix up the '['
579         value = value.trim();
580 
581         // remove brackets and trim again before regex
582         if (value.startsWith("[")) {
583             value = value.substring(1);
584         }
585         if (value.endsWith("]") && (!value.endsWith("\\]") || value.endsWith("\\\\]"))) {
586             value = value.substring(0, value.length() - 1);
587         }
588         value = value.trim();
589 
590         value = replace(NEEDS_QUOTE1, value, "$1\\\\$2$3");
591         value = replace(NEEDS_QUOTE2, value, "$1\\\\$2$3");
592 
593         // re-add brackets.
594         value = "[" + value + "]";
595 
596         UnicodeSet exemplar = new UnicodeSet(value);
597         XPathParts parts = XPathParts.getFrozenInstance(path);
598         if (parts.getElement(2).equals("parseLenients")) {
599             return exemplar.toPattern(false);
600         }
601         final String type = parts.getAttributeValue(-1, "type");
602         ExemplarType exemplarType = type == null ? ExemplarType.main : ExemplarType.valueOf(type);
603         value = getCleanedUnicodeSet(exemplar, pp, exemplarType);
604         return value;
605     }
606 
normalizeCurrencyDisplayName(String value)607     private String normalizeCurrencyDisplayName(String value) {
608         StringBuilder result = new StringBuilder();
609         boolean inParentheses = false;
610         for (int i = 0; i < value.length(); i++) {
611             char c = value.charAt(i);
612             if (c == '(') {
613                 inParentheses = true;
614             } else if (c == ')') {
615                 inParentheses = false;
616             }
617             if (inParentheses && c == '-' && Character.isDigit(value.charAt(i - 1))) {
618                 c = 0x2013; /* Replace hyphen-minus with dash for date ranges */
619             }
620             result.append(c);
621         }
622         return result.toString();
623     }
624 
normalizeApostrophes(String value)625     private String normalizeApostrophes(String value) {
626         // If our DAIP always had a CLDRFile to work with, then we could just check the exemplar set in it to see.
627         // But since we don't, we just maintain the list internally and use it.
628         if (LANGUAGES_USING_MODIFIER_APOSTROPHE.contains(locale.getLanguage())) {
629             return value.replace('\'', '\u02bc');
630         } else {
631             char prev = 0;
632             StringBuilder builder = new StringBuilder();
633             for (char c : value.toCharArray()) {
634                 if (c == '\'') {
635                     if (Character.isLetter(prev)) {
636                         builder.append('\u2019');
637                     } else {
638                         builder.append('\u2018');
639                     }
640                 } else {
641                     builder.append(c);
642                 }
643                 prev = c;
644             }
645             return builder.toString();
646         }
647     }
648 
normalizeIntervalHyphens(String value)649     private String normalizeIntervalHyphens(String value) {
650         DateTimePatternGenerator.FormatParser fp = new DateTimePatternGenerator.FormatParser();
651         fp.set(DateIntervalInfo.genPatternInfo(value, false).getFirstPart());
652         List<Object> items = fp.getItems();
653         Object last = items.get(items.size() - 1);
654         if (last instanceof String) {
655             String separator = last.toString();
656             if (separator.contains("-")) {
657                 StringBuilder sb = new StringBuilder();
658                 sb.append(DateIntervalInfo.genPatternInfo(value, false).getFirstPart());
659                 if (sb.lastIndexOf(separator) >= 0) {
660                     sb.delete(sb.lastIndexOf(separator), sb.length());
661                     sb.append(separator.replace("-", "\u2013"));
662                     sb.append(DateIntervalInfo.genPatternInfo(value, false).getSecondPart());
663                     return sb.toString();
664                 }
665             }
666         }
667         return value;
668     }
669 
normalizeHyphens(String value)670     private String normalizeHyphens(String value) {
671         int hyphenLocation = value.indexOf("-");
672         if (hyphenLocation > 0 &&
673             Character.isDigit(value.charAt(hyphenLocation - 1)) &&
674             hyphenLocation < value.length() - 1 &&
675             Character.isDigit(value.charAt(hyphenLocation + 1))) {
676             StringBuilder sb = new StringBuilder();
677             sb.append(value.substring(0, hyphenLocation));
678             sb.append("\u2013");
679             sb.append(value.substring(hyphenLocation + 1));
680             return sb.toString();
681         }
682         return value;
683     }
684 
standardizeRomanian(String value)685     private String standardizeRomanian(String value) {
686         StringBuilder builder = new StringBuilder();
687         for (char c : value.toCharArray()) {
688             for (char[] pair : ROMANIAN_CONVERSIONS) {
689                 if (c == pair[0]) {
690                     c = pair[1];
691                     break;
692                 }
693             }
694             builder.append(c);
695         }
696         return builder.toString();
697     }
698 
standardizeKwasio(String value)699     private String standardizeKwasio(String value) {
700         StringBuilder builder = new StringBuilder();
701         for (char c : value.toCharArray()) {
702             for (char[] pair : KWASIO_CONVERSIONS) {
703                 if (c == pair[0]) {
704                     c = pair[1];
705                     break;
706                 }
707             }
708             builder.append(c);
709         }
710         return builder.toString();
711     }
712 
713     // Use the myanmar-tools detector.
standardizeMyanmar(String value)714     private String standardizeMyanmar(String value) {
715         if (detector.getZawgyiProbability(value) > 0.90) {
716             return zawgyiUnicodeTransliterator.transform(value);
717         }
718         return value;
719     }
720 
standardizeNgomba(String value)721     private String standardizeNgomba(String value) {
722         StringBuilder builder = new StringBuilder();
723         char[] charArray = value.toCharArray();
724         for (int i = 0; i < charArray.length; i++) {
725             char c = charArray[i];
726             boolean convertedSaltillo = false;
727             for (char[] pair : NGOMBA_CONVERSIONS) {
728                 if (c == pair[0]) {
729                     c = pair[1];
730                     if (c == '\uA78C') {
731                         convertedSaltillo = true;
732                     }
733                     break;
734                 }
735             }
736             if (convertedSaltillo &&
737                 ((i > 0 && i < charArray.length - 1 && Character.isUpperCase(charArray[i - 1]) && Character.isUpperCase(charArray[i + 1])) ||
738                     (i > 1 && Character.isUpperCase(charArray[i - 1]) && Character.isUpperCase(charArray[i - 2])))) {
739                 c = '\uA78B'; // UPPER CASE SALTILLO
740             }
741             builder.append(c);
742         }
743         return builder.toString();
744     }
745 
replaceChars(String path, String value, char[][] charsToReplace, boolean skipAuxExemplars)746     private String replaceChars(String path, String value, char[][] charsToReplace, boolean skipAuxExemplars) {
747         if (skipAuxExemplars && path.contains("/exemplarCharacters[@type=\"auxiliary\"]")) {
748             return value;
749         }
750         StringBuilder builder = new StringBuilder();
751         for (char c : value.toCharArray()) {
752             for (char[] pair : charsToReplace) {
753                 if (c == pair[0]) {
754                     c = pair[1];
755                     break;
756                 }
757             }
758             builder.append(c);
759         }
760         return builder.toString();
761     }
762 
standardizeSwissGerman(String value)763     private String standardizeSwissGerman(String value) {
764         return value.replaceAll("\u00DF", "ss");
765     }
766 
standardizeCatalan(String value)767     private String standardizeCatalan(String value) {
768         StringBuilder builder = new StringBuilder();
769         for (char c : value.toCharArray()) {
770             boolean didSubstitute = false;
771             for (char[] triple : CATALAN_CONVERSIONS) {
772                 if (c == triple[0]) {
773                     builder.append(triple[1]);
774                     builder.append(triple[2]);
775                     didSubstitute = true;
776                     break;
777                 }
778             }
779             if (!didSubstitute) {
780                 builder.append(c);
781             }
782         }
783         return builder.toString();
784     }
785 
replace(Pattern pattern, String value, String replacement)786     private String replace(Pattern pattern, String value, String replacement) {
787         String value2 = pattern.matcher(value).replaceAll(replacement);
788         if (DEBUG_DAIP && !value.equals(value2)) {
789             System.out.println("\n" + value + " => " + value2);
790         }
791         return value2;
792     }
793 
794     private static Pattern UNNORMALIZED_MALAYALAM = PatternCache.get(
795         "(\u0D23|\u0D28|\u0D30|\u0D32|\u0D33|\u0D15)\u0D4D\u200D");
796 
797     private static Map<Character, Character> NORMALIZING_MAP = Builder.with(new HashMap<Character, Character>())
798         .put('\u0D23', '\u0D7A').put('\u0D28', '\u0D7B')
799         .put('\u0D30', '\u0D7C').put('\u0D32', '\u0D7D')
800         .put('\u0D33', '\u0D7E').put('\u0D15', '\u0D7F').get();
801 
802     /**
803      * Normalizes the Malayalam characters in the specified input.
804      *
805      * @param value
806      *            the input to be normalized
807      * @return
808      */
normalizeMalayalam(String value)809     private String normalizeMalayalam(String value) {
810         // Normalize Malayalam characters.
811         Matcher matcher = UNNORMALIZED_MALAYALAM.matcher(value);
812         if (matcher.find()) {
813             StringBuffer buffer = new StringBuffer();
814             int start = 0;
815             do {
816                 buffer.append(value.substring(start, matcher.start(0)));
817                 char codePoint = matcher.group(1).charAt(0);
818                 buffer.append(NORMALIZING_MAP.get(codePoint));
819                 start = matcher.end(0);
820             } while (matcher.find());
821             buffer.append(value.substring(start));
822             value = buffer.toString();
823         }
824         return value;
825     }
826 
827     static final Transform<String, String> fixArabicPresentation = Transliterator.getInstance(
828         "[[:block=Arabic_Presentation_Forms_A:][:block=Arabic_Presentation_Forms_B:]] nfkc");
829 
830     /**
831      * Normalizes the Arabic presentation forms characters in the specified input.
832      *
833      * @param value
834      *            the input to be normalized
835      * @return
836      */
replaceArabicPresentationForms(String value)837     private String replaceArabicPresentationForms(String value) {
838         value = fixArabicPresentation.transform(value);
839         return value;
840     }
841 
842     static Pattern ADLAM_MISNASALIZED = PatternCache.get("([����])['’‘]([����������������])");
843     public static String ADLAM_NASALIZATION = "��"; // U+1E94B (Unicode 12.0)
844 
fixAdlamNasalization(String fromString)845     public static String fixAdlamNasalization(String fromString) {
846         return ADLAM_MISNASALIZED.matcher(fromString)
847         .replaceAll("$1"+ADLAM_NASALIZATION+"$2");  // replace quote with ��
848     }
849 
850     static Pattern REMOVE_QUOTE1 = PatternCache.get("(\\s)(\\\\[-\\}\\]\\&])()");
851     static Pattern REMOVE_QUOTE2 = PatternCache.get("(\\\\[\\-\\{\\[\\&])(\\s)"); // ([^\\])([\\-\\{\\[])(\\s)
852 
853     static Pattern NEEDS_QUOTE1 = PatternCache.get("(\\s|$)([-\\}\\]\\&])()");
854     static Pattern NEEDS_QUOTE2 = PatternCache.get("([^\\\\])([\\-\\{\\[\\&])(\\s)"); // ([^\\])([\\-\\{\\[])(\\s)
855 
getCleanedUnicodeSet(UnicodeSet exemplar, UnicodeSetPrettyPrinter prettyPrinter, ExemplarType exemplarType)856     public static String getCleanedUnicodeSet(UnicodeSet exemplar, UnicodeSetPrettyPrinter prettyPrinter,
857         ExemplarType exemplarType) {
858         if (prettyPrinter == null) {
859             return exemplar.toPattern(false);
860         }
861         String value;
862         prettyPrinter.setCompressRanges(exemplar.size() > 300);
863         value = exemplar.toPattern(false);
864         UnicodeSet toAdd = new UnicodeSet();
865 
866         for (UnicodeSetIterator usi = new UnicodeSetIterator(exemplar); usi.next();) {
867             String string = usi.getString();
868             if (string.equals("ß") || string.equals("İ")) {
869                 toAdd.add(string);
870                 continue;
871             }
872             switch (string) {
873             case "\u2011": toAdd.add("-"); break; // nobreak hyphen
874             case "-": toAdd.add("\u2011"); break; // nobreak hyphen
875 
876             case " ": toAdd.add("\u00a0"); break; // nobreak space
877             case "\u00a0": toAdd.add(" "); break; // nobreak space
878 
879             case "\u202F": toAdd.add("\u2009"); break; // nobreak narrow space
880             case "\u2009": toAdd.add("\u202F"); break; // nobreak narrow space
881             }
882             if (exemplarType.convertUppercase) {
883                 string = UCharacter.toLowerCase(ULocale.ENGLISH, string);
884             }
885             toAdd.add(string);
886             String composed = Normalizer.compose(string, false);
887             if (!string.equals(composed)) {
888                 toAdd.add(composed);
889             }
890         }
891 
892         toAdd.removeAll(exemplarType.toRemove);
893 
894         if (DEBUG_DAIP && !toAdd.equals(exemplar)) {
895             UnicodeSet oldOnly = new UnicodeSet(exemplar).removeAll(toAdd);
896             UnicodeSet newOnly = new UnicodeSet(toAdd).removeAll(exemplar);
897             System.out.println("Exemplar:\t" + exemplarType + ",\tremoved\t" + oldOnly + ",\tadded\t" + newOnly);
898         }
899 
900         String fixedExemplar = prettyPrinter.format(toAdd);
901         UnicodeSet doubleCheck = new UnicodeSet(fixedExemplar);
902         if (!toAdd.equals(doubleCheck)) {
903             // something went wrong, leave as is
904         } else if (!value.equals(fixedExemplar)) { // put in this condition just for debugging
905             if (DEBUG_DAIP) {
906                 System.out.println(TestMetadata.showDifference(
907                     With.codePoints(value),
908                     With.codePoints(fixedExemplar),
909                     "\n"));
910             }
911             value = fixedExemplar;
912         }
913         return value;
914     }
915 
916     /**
917      * @return a canonical numeric pattern, based on the type, and the isPOSIX flag. The latter is set for en_US_POSIX.
918      */
919     static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults();
920 
getCanonicalPattern(String inpattern, NumericType type, boolean isPOSIX)921     public static String getCanonicalPattern(String inpattern, NumericType type, boolean isPOSIX) {
922         // TODO fix later to properly handle quoted ;
923 
924         DecimalFormat df = new DecimalFormat(inpattern);
925         if (type == NumericType.DECIMAL_ABBREVIATED || type == NumericType.CURRENCY_ABBREVIATED
926             || CldrUtility.INHERITANCE_MARKER.equals(inpattern)) {
927             return inpattern; // TODO fix when ICU bug is fixed
928             // df.setMaximumFractionDigits(df.getMinimumFractionDigits());
929             // df.setMaximumIntegerDigits(Math.max(1, df.getMinimumIntegerDigits()));
930         } else {
931             // int decimals = type == CURRENCY_TYPE ? 2 : 1;
932             int[] digits = isPOSIX ? type.posixDigitCount : type.digitCount;
933             df.setMinimumIntegerDigits(digits[0]);
934             df.setMinimumFractionDigits(digits[1]);
935             df.setMaximumFractionDigits(digits[2]);
936         }
937         String pattern = df.toPattern();
938         List<String> parts = SEMI_SPLITTER.splitToList(pattern);
939         String pattern2 = parts.get(0);
940         if (parts.size() > 1) {
941             pattern2 += ";" + parts.get(1);
942         }
943         if (!pattern2.equals(pattern)) {
944             pattern = pattern2;
945         }
946         // int pos = pattern.indexOf(';');
947         // if (pos < 0) return pattern + ";-" + pattern;
948         return pattern;
949     }
950 
951     /*
952      * This tests what type a numeric pattern is.
953      */
954     public enum NumericType {
955         CURRENCY(new int[] { 1, 2, 2 }, new int[] { 1, 2, 2 }), CURRENCY_ABBREVIATED(), DECIMAL(new int[] { 1, 0, 3 },
956             new int[] { 1, 0, 6 }), DECIMAL_ABBREVIATED(), PERCENT(new int[] { 1, 0, 0 },
957                 new int[] { 1, 0, 0 }), SCIENTIFIC(new int[] { 0, 0, 0 }, new int[] { 1, 6, 6 }), NOT_NUMERIC;
958 
959         private static final Pattern NUMBER_PATH = Pattern
960             .compile("//ldml/numbers/((currency|decimal|percent|scientific)Formats|currencies/currency).*");
961         private int[] digitCount;
962         private int[] posixDigitCount;
963 
NumericType()964         private NumericType() {
965         }
966 
NumericType(int[] digitCount, int[] posixDigitCount)967         private NumericType(int[] digitCount, int[] posixDigitCount) {
968             this.digitCount = digitCount;
969             this.posixDigitCount = posixDigitCount;
970         }
971 
972         /**
973          * @return the numeric type of the xpath
974          */
getNumericType(String xpath)975         public static NumericType getNumericType(String xpath) {
976             Matcher matcher = NUMBER_PATH.matcher(xpath);
977             if (xpath.indexOf("/pattern") < 0) {
978                 return NOT_NUMERIC;
979             } else if (matcher.matches()) {
980                 if (matcher.group(1).equals("currencies/currency")) {
981                     return CURRENCY;
982                 } else {
983                     NumericType type = NumericType.valueOf(matcher.group(2).toUpperCase());
984                     if (xpath.contains("=\"1000")) {
985                         if (type == DECIMAL) {
986                             type = DECIMAL_ABBREVIATED;
987                         } else if (type == CURRENCY) {
988                             type = CURRENCY_ABBREVIATED;
989                         } else {
990                             throw new IllegalArgumentException("Internal Error");
991                         }
992                     }
993                     return type;
994                 }
995             } else {
996                 return NOT_NUMERIC;
997             }
998         }
999 
getDigitCount()1000         public int[] getDigitCount() {
1001             return digitCount;
1002         }
1003 
getPosixDigitCount()1004         public int[] getPosixDigitCount() {
1005             return posixDigitCount;
1006         }
1007     }
1008 
1009     /**
1010      * Turn all whitespace sequences (including tab and newline, and NBSP for certain paths)
1011      * into a single space or a single NBSP depending on path.
1012      * Also trim initial/final NBSP, unless the value is only the one character, "\u00A0"
1013      *
1014      * @param path
1015      * @param value
1016      * @return the normalized value
1017      */
normalizeWhitespace(String path, String value)1018     private String normalizeWhitespace(String path, String value) {
1019         PathSpaceType pst = PathSpaceType.get(path);
1020         if (pst == PathSpaceType.allowSp) {
1021             value = WHITESPACE_AND_NBSP_TO_NORMALIZE.matcher(value).replaceAll(" "); // replace with regular space
1022         } else if (pst == PathSpaceType.allowNbsp) {
1023             value = WHITESPACE_AND_NBSP_TO_NORMALIZE.matcher(value).replaceAll("\u00A0"); // replace with NBSP
1024             value = trimNBSP(value);
1025         } else if (pst == PathSpaceType.allowSpOrNbsp) {
1026             /*
1027              * in this case don't normalize away NBSP
1028              */
1029             value = WHITESPACE_NO_NBSP_TO_NORMALIZE.matcher(value).replaceAll(" "); // replace with regular space
1030             /*
1031              * if any NBSP and regular space are adjacent, replace with NBSP
1032              */
1033             value = NBSP_PLUS_SPACE_TO_NORMALIZE.matcher(value).replaceAll("\u00A0");
1034             value = SPACE_PLUS_NBSP_TO_NORMALIZE.matcher(value).replaceAll("\u00A0");
1035             value = MULTIPLE_NBSP.matcher(value).replaceAll("\u00A0");
1036             value = trimNBSP(value);
1037         } else {
1038             throw new IllegalArgumentException("Unknown PathSpaceType " + pst);
1039         }
1040         return value;
1041     }
1042 
1043     /**
1044      * Delete any initial or final NBSP, unless the value is just NBSP
1045      *
1046      * @param value
1047      * @return the trimmed value
1048      */
trimNBSP(String value)1049     private String trimNBSP(String value) {
1050         if (!"\u00A0".equals(value)) {
1051             value = INITIAL_NBSP.matcher(value).replaceAll("");
1052             value = FINAL_NBSP.matcher(value).replaceAll("");
1053         }
1054         return value;
1055     }
1056 
1057     /**
1058      * Categorize xpaths according to whether they allow space, NBSP, or both
1059      */
1060     public enum PathSpaceType {
1061         allowSp, allowNbsp, allowSpOrNbsp;
1062 
get(String path)1063         public static PathSpaceType get(String path) {
1064             if (wantsRegularSpace(path)) {
1065                 return allowSp;
1066             } else if (wantsNBSP(path)) {
1067                 return allowNbsp;
1068             } else {
1069                 return allowSpOrNbsp;
1070             }
1071         }
1072 
wantsRegularSpace(String path)1073         private static boolean wantsRegularSpace(String path) {
1074             if ((path.contains("/dateFormatLength") && path.contains("/pattern")) ||
1075                 path.contains("/availableFormats/dateFormatItem") ||
1076                 (path.startsWith("//ldml/dates/timeZoneNames/metazone") && path.contains("/long")) ||
1077                 path.startsWith("//ldml/dates/timeZoneNames/regionFormat") ||
1078                 path.startsWith("//ldml/localeDisplayNames/codePatterns/codePattern") ||
1079                 path.startsWith("//ldml/localeDisplayNames/languages/language") ||
1080                 path.startsWith("//ldml/localeDisplayNames/territories/territory") ||
1081                 path.startsWith("//ldml/localeDisplayNames/types/type") ||
1082                 (path.startsWith("//ldml/numbers/currencies/currency") && path.contains("/displayName")) ||
1083                 (path.contains("/decimalFormatLength[@type=\"long\"]") && path.contains("/pattern")) ||
1084                 path.startsWith("//ldml/posix/messages") ||
1085                 (path.startsWith("//ldml/units/uni") && path.contains("/unitPattern "))) {
1086                 return true;
1087             }
1088             return false;
1089         }
1090 
wantsNBSP(String path)1091         private static boolean wantsNBSP(String path) {
1092             if ((path.contains("/currencies/currency") && (path.contains("/group") || path.contains("/pattern"))) ||
1093                 (path.contains("/currencyFormatLength") && path.contains("/pattern")) ||
1094                 (path.contains("/currencySpacing") && path.contains("/insertBetween")) ||
1095                 (path.contains("/decimalFormatLength") && path.contains("/pattern")) || // i.e. the non-long ones
1096                 (path.contains("/percentFormatLength") && path.contains("/pattern")) ||
1097                 (path.startsWith("//ldml/numbers/symbols") && (path.contains("/group") || path.contains("/nan")))) {
1098                 return true;
1099             }
1100             return false;
1101         }
1102     }
1103 }
1104