• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.util.HashSet;
4 import java.util.List;
5 import java.util.Set;
6 import java.util.TreeSet;
7 import java.util.concurrent.ConcurrentHashMap;
8 
9 import org.unicode.cldr.util.DayPeriodInfo.DayPeriod;
10 import org.unicode.cldr.util.PluralRulesUtil.KeywordStatus;
11 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
12 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
13 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
14 
15 import com.google.common.collect.ArrayListMultimap;
16 import com.google.common.collect.ImmutableList;
17 import com.google.common.collect.ImmutableSet;
18 import com.google.common.collect.Multimap;
19 import com.ibm.icu.text.PluralRules;
20 
21 public class LogicalGrouping {
22 
23     public static final ImmutableSet<String> metazonesDSTSet = ImmutableSet.of(
24         "Acre", "Africa_Western", "Alaska", "Almaty", "Amazon",
25         "America_Central", "America_Eastern", "America_Mountain", "America_Pacific", "Anadyr", "Apia",
26         "Aqtau", "Aqtobe", "Arabian", "Argentina", "Argentina_Western", "Armenia",
27         "Atlantic", "Australia_Central", "Australia_CentralWestern", "Australia_Eastern", "Australia_Western",
28         "Azerbaijan", "Azores", "Bangladesh", "Brasilia", "Cape_Verde",
29         "Chatham", "Chile", "China", "Choibalsan", "Colombia", "Cook", "Cuba", "Easter",
30         "Europe_Central", "Europe_Eastern", "Europe_Western", "Falkland", "Fiji", "Georgia",
31         "Greenland_Eastern", "Greenland_Western", "Hawaii_Aleutian", "Hong_Kong", "Hovd",
32         "Iran", "Irkutsk", "Israel", "Japan", "Kamchatka", "Korea", "Krasnoyarsk",
33         "Lord_Howe", "Macau", "Magadan", "Mauritius", "Mexico_Northwest", "Mexico_Pacific", "Mongolia", "Moscow", "New_Caledonia",
34         "New_Zealand", "Newfoundland", "Noronha", "Novosibirsk", "Omsk", "Pakistan", "Paraguay", "Peru", "Philippines",
35         "Pierre_Miquelon", "Qyzylorda", "Sakhalin", "Samara", "Samoa",
36         "Taipei", "Tonga", "Turkmenistan", "Uruguay", "Uzbekistan",
37         "Vanuatu", "Vladivostok", "Volgograd", "Yakutsk", "Yekaterinburg");
38 
39     public static final ImmutableList<String> days = ImmutableList.of("sun", "mon", "tue", "wed", "thu", "fri", "sat");
40 
41     public static final ImmutableSet<String> calendarsWith13Months = ImmutableSet.of("coptic", "ethiopic", "hebrew");
42     public static final ImmutableSet<String> compactDecimalFormatLengths = ImmutableSet.of("short", "long");
43     private static final ImmutableSet<String> ampm = ImmutableSet.of("am", "pm");
44     private static final ImmutableSet<String> nowUnits = ImmutableSet.of("second", "second-short", "second-narrow",
45         "minute", "minute-short", "minute-narrow", "hour", "hour-short", "hour-narrow");
46 
47     /**
48      * Cache from path (String) to logical group (Set<String>)
49      */
50     private static Multimap<String, String> cachePathToLogicalGroup = ArrayListMultimap.create();
51 
52     /**
53      * Cache from locale and path (<Pair<String, String>), to logical group (Set<String>)
54      */
55     private static ConcurrentHashMap<Pair<String, String>, Set<String>> cacheLocaleAndPathToLogicalGroup = new ConcurrentHashMap<Pair<String, String>, Set<String>>();
56 
57     /**
58      * Statistics on occurrences of types of logical groups, for performance testing, debugging.
59      * GET_TYPE_COUNTS should be false for production to maximize performance.
60      */
61     public static final boolean GET_TYPE_COUNTS = false;
62     public static final ConcurrentHashMap<String, Long> typeCount = GET_TYPE_COUNTS ? new ConcurrentHashMap<String, Long>() : null;
63 
64     /**
65      * GET_TYPE_FROM_PARTS is more elegant when true, but performance is a little faster when it's false.
66      * This might change if XPathParts.getInstance and/or XPathParts.set are made faster.
67      */
68     private static final boolean GET_TYPE_FROM_PARTS = false;
69 
70     /**
71      * Return a sorted set of paths that are in the same logical set as the given path
72      *
73      * @param path the distinguishing xpath
74      * @return the set of paths
75      *
76      * For example, given the path
77      *
78      * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="1"]
79      *
80      * return the set of four paths
81      *
82      * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="1"]
83      * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="2"]
84      * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="3"]
85      * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="4"]
86      *
87      * Caches: Most of the calculations are independent of the locale, and can be cached on a static basis.
88      * The paths that are locale-dependent are /dayPeriods and @count. Those can be computed on a per-locale basis;
89      * and cached (they are shared across a number of locales).
90      *
91      * Reference: https://unicode.org/cldr/trac/ticket/11854
92      */
getPaths(CLDRFile cldrFile, String path)93     public static Set<String> getPaths(CLDRFile cldrFile, String path) {
94         if (path == null) {
95             return new TreeSet<String>(); // return empty set for null path
96         }
97         XPathParts parts = null;
98         PathType pathType = null;
99         if (GET_TYPE_FROM_PARTS) {
100             parts = XPathParts.getInstance(path); // can't always be frozen, some addPath do setAttribute
101             pathType = PathType.getPathTypeFromParts(parts);
102         } else {
103             /*
104              * XPathParts.set is expensive, so avoid it (not needed for singletons) if !GET_TYPE_FROM_PARTS
105              */
106             pathType = PathType.getPathTypeFromPath(path);
107         }
108 
109         if (GET_TYPE_COUNTS) {
110             typeCount.compute(pathType.toString(), (k, v) -> (v == null) ? 1 : v + 1);
111         }
112 
113         if (pathType == PathType.SINGLETON) {
114             /*
115              * Skip cache for PathType.SINGLETON and simply return a set of one.
116              */
117             Set<String> set = new TreeSet<String>();
118             set.add(path);
119             return set;
120         }
121 
122         if (!GET_TYPE_FROM_PARTS) {
123             parts = XPathParts.getInstance(path);
124         }
125 
126         if (PathType.isLocaleDependent(pathType)) {
127             String locale = cldrFile.getLocaleID();
128             Pair<String, String> key = new Pair<String, String>(locale, path);
129             if (cacheLocaleAndPathToLogicalGroup.containsKey(key)) {
130                 return new TreeSet<String>(cacheLocaleAndPathToLogicalGroup.get(key));
131             }
132             Set<String> set = new TreeSet<String>();
133             pathType.addPaths(set, cldrFile, path, parts);
134             cacheLocaleAndPathToLogicalGroup.put(key, set);
135             return set;
136         } else {
137             /*
138              * All other paths are locale-independent.
139              */
140             if (cachePathToLogicalGroup.containsKey(path)) {
141                 return new TreeSet<String>(cachePathToLogicalGroup.get(path));
142             }
143             Set<String> set = new TreeSet<String>();
144             pathType.addPaths(set, cldrFile, path, parts);
145             cachePathToLogicalGroup.putAll(path, set);
146             return set;
147         }
148     }
149 
150     /**
151      * Returns the plural info for a given locale.
152      */
getPluralInfo(CLDRFile cldrFile)153     private static PluralInfo getPluralInfo(CLDRFile cldrFile) {
154         SupplementalDataInfo supplementalData = SupplementalDataInfo.getInstance(
155             cldrFile.getSupplementalDirectory());
156         return supplementalData.getPlurals(PluralType.cardinal,
157             cldrFile.getLocaleID());
158     }
159 
160     /**
161      * @param cldrFile
162      * @param path
163      * @return true if the specified path is optional in the logical grouping
164      *         that it belongs to.
165      */
isOptional(CLDRFile cldrFile, String path)166     public static boolean isOptional(CLDRFile cldrFile, String path) {
167         XPathParts parts = XPathParts.getInstance(path);
168 
169         if (parts.containsElement("relative")) {
170             String fieldType = parts.findAttributeValue("field", "type");
171             String relativeType = parts.findAttributeValue("relative", "type");
172             Integer relativeValue = relativeType == null ? 999 : Integer.valueOf(relativeType);
173             if (fieldType != null && fieldType.startsWith("day") && Math.abs(relativeValue.intValue()) >= 2) {
174                 return true; // relative days +2 +3 -2 -3 are optional in a logical group.
175             }
176         }
177         // Paths with count="(zero|one)" are optional if their usage is covered
178         // fully by paths with count="(0|1)", which are always optional themselves.
179         if (!path.contains("[@count=")) return false;
180         String pluralType = parts.getAttributeValue(-1, "count");
181         if (pluralType.equals("0") || pluralType.equals("1")) return true;
182         if (!pluralType.equals("zero") && !pluralType.equals("one")) return false;
183 
184         PluralRules pluralRules = getPluralInfo(cldrFile).getPluralRules();
185         parts.setAttribute(-1, "count", "0");
186         Set<Double> explicits = new HashSet<Double>();
187         if (cldrFile.isHere(parts.toString())) {
188             explicits.add(0.0);
189         }
190         parts.setAttribute(-1, "count", "1");
191         if (cldrFile.isHere(parts.toString())) {
192             explicits.add(1.0);
193         }
194         if (!explicits.isEmpty()) {
195             // HACK: The com.ibm.icu.text prefix is needed so that ST can find it
196             // (no idea why).
197             KeywordStatus status = org.unicode.cldr.util.PluralRulesUtil.getKeywordStatus(
198                 pluralRules, pluralType, 0, explicits, true);
199             if (status == KeywordStatus.SUPPRESSED) {
200                 return true;
201             }
202         }
203         return false;
204     }
205 
206     /**
207      * Path types for logical groupings
208      */
209     private enum PathType {
210         SINGLETON { // no logical groups for singleton paths
211             @Override
212             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)213             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
214                 // Do nothing. This function won't be called.
215             }
216         },
217         METAZONE {
218             @Override
219             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)220             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
221                 String metazoneName = parts.getAttributeValue(3, "type");
222                 if (metazonesDSTSet.contains(metazoneName)) {
223                     for (String str : ImmutableSet.of("generic", "standard", "daylight")) {
224                         set.add(path.substring(0, path.lastIndexOf('/') + 1) + str);
225                     }
226                 }
227             }
228         },
229         DAYS {
230             @Override
231             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)232             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
233                String dayName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null;
234                 // This is just a quick check to make sure the path is good.
235                 if (dayName != null && days.contains(dayName)) {
236                     for (String str : days) {
237                         parts.setAttribute("day", "type", str);
238                         set.add(parts.toString());
239                     }
240                 }
241             }
242         },
243         DAY_PERIODS {
244             @Override
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)245             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
246                 if (path.endsWith("alias")) {
247                     set.add(path);
248                 } else {
249                     String dayPeriodType = parts.findAttributeValue("dayPeriod", "type");
250                     if (ampm.contains(dayPeriodType)) {
251                         for (String s : ampm) {
252                             parts.setAttribute("dayPeriod", "type", s);
253                             set.add(parts.toString());
254                         }
255                     } else {
256                         SupplementalDataInfo supplementalData = SupplementalDataInfo.getInstance(cldrFile.getSupplementalDirectory());
257                         DayPeriodInfo.Type dayPeriodContext = DayPeriodInfo.Type.fromString(parts.findAttributeValue("dayPeriodContext", "type"));
258                         DayPeriodInfo dpi = supplementalData.getDayPeriods(dayPeriodContext, cldrFile.getLocaleID());
259                         List<DayPeriod> dayPeriods = dpi.getPeriods();
260                         DayPeriod thisDayPeriod = DayPeriod.fromString(dayPeriodType);
261                         if (dayPeriods.contains(thisDayPeriod)) {
262                             for (DayPeriod d : dayPeriods) {
263                                 parts.setAttribute("dayPeriod", "type", d.name());
264                                 set.add(parts.toString());
265                             }
266                         }
267                     }
268                 }
269             }
270         },
271         QUARTERS {
272             @Override
273             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)274             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
275                 String quarterName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null;
276                 Integer quarter = quarterName == null ? 0 : Integer.valueOf(quarterName);
277                 if (quarter > 0 && quarter <= 4) { // This is just a quick check to make sure the path is good.
278                     for (Integer i = 1; i <= 4; i++) {
279                         parts.setAttribute("quarter", "type", i.toString());
280                         set.add(parts.toString());
281                     }
282                 }
283             }
284         },
285         MONTHS {
286             @Override
287             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)288             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
289                 String calType = parts.size() > 3 ? parts.getAttributeValue(3, "type") : null;
290                 String monthName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null;
291                 Integer month = monthName == null ? 0 : Integer.valueOf(monthName);
292                 int calendarMonthMax = calendarsWith13Months.contains(calType) ? 13 : 12;
293                 if (month > 0 && month <= calendarMonthMax) { // This is just a quick check to make sure the path is good.
294                     for (Integer i = 1; i <= calendarMonthMax; i++) {
295                         parts.setAttribute("month", "type", i.toString());
296                         if ("hebrew".equals(calType)) {
297                             parts.removeAttribute("month", "yeartype");
298                         }
299                         set.add(parts.toString());
300                     }
301                     if ("hebrew".equals(calType)) { // Add extra hebrew calendar leap month
302                         parts.setAttribute("month", "type", Integer.toString(7));
303                         parts.setAttribute("month", "yeartype", "leap");
304                         set.add(parts.toString());
305                     }
306                 }
307             }
308         },
309         RELATIVE {
310             @Override
311             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)312             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
313                 String fieldType = parts.findAttributeValue("field", "type");
314                 String relativeType = parts.findAttributeValue("relative", "type");
315                 Integer relativeValue = relativeType == null ? 999 : Integer.valueOf(relativeType);
316                 if (relativeValue >= -3 && relativeValue <= 3) { // This is just a quick check to make sure the path is good.
317                     if (!(nowUnits.contains(fieldType) && relativeValue == 0)) { // Workaround for "now", "this hour", "this minute"
318                         int limit = 1;
319                         if (fieldType != null && fieldType.startsWith("day")) {
320                             limit = 3;
321                         }
322                         for (Integer i = -1 * limit; i <= limit; i++) {
323                             parts.setAttribute("relative", "type", i.toString());
324                             set.add(parts.toString());
325                         }
326                     }
327                 }
328             }
329         },
330         DECIMAL_FORMAT_LENGTH {
331             @Override
332             @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)333             void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
334                 PluralInfo pluralInfo = getPluralInfo(cldrFile);
335                 Set<Count> pluralTypes = pluralInfo.getCounts();
336                 String decimalFormatLengthType = parts.size() > 3 ? parts.getAttributeValue(3, "type") : null;
337                 String decimalFormatPatternType = parts.size() > 5 ? parts.getAttributeValue(5, "type") : null;
338                 if (decimalFormatLengthType != null && decimalFormatPatternType != null &&
339                         compactDecimalFormatLengths.contains(decimalFormatLengthType)) {
340                     int numZeroes = decimalFormatPatternType.length() - 1;
341                     int baseZeroes = (numZeroes / 3) * 3;
342                     for (int i = 0; i < 3; i++) {
343                         // This gives us "baseZeroes+i" zeroes at the end.
344                         String patType = "1" + String.format(String.format("%%0%dd", baseZeroes + i), 0);
345                         parts.setAttribute(5, "type", patType);
346                         for (Count count : pluralTypes) {
347                             parts.setAttribute(5, "count", count.toString());
348                             set.add(parts.toString());
349                         }
350                     }
351                 }
352             }
353         },
354         COUNT {
355              @Override
356              @SuppressWarnings("unused")
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)357              void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) {
358                  PluralInfo pluralInfo = getPluralInfo(cldrFile);
359                  Set<Count> pluralTypes = pluralInfo.getCounts();
360                  String lastElement = parts.getElement(-1);
361                  for (Count count : pluralTypes) {
362                      parts.setAttribute(lastElement, "count", count.toString());
363                      set.add(parts.toString());
364                  }
365              }
366         };
367 
addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)368         abstract void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts);
369 
370         /**
371          * Is the given PathType locale-dependent (for caching)?
372          *
373          * @param pathType the PathType
374          * @return the boolean
375          */
isLocaleDependent(PathType pathType)376         private static boolean isLocaleDependent(PathType pathType) {
377             /*
378              * The paths that are locale-dependent are @count and /dayPeriods.
379              */
380             return (pathType == COUNT || pathType == DAY_PERIODS);
381         }
382 
383         /**
384          * Get the PathType from the given path
385          *
386          * @param path the path
387          * @return the PathType
388          *
389          * Note: it would be more elegant and cleaner, but slower, if we used XPathParts to
390          * determine the PathType. We avoid that since XPathParts.set is a performance hot spot.
391          */
getPathTypeFromPath(String path)392         private static PathType getPathTypeFromPath(String path) {
393             /*
394              * Would changing the order of these tests ever change the return value?
395              * Assume it could if in doubt.
396              */
397             if (path.indexOf("/metazone") > 0) {
398                 return PathType.METAZONE;
399             }
400             if (path.indexOf("/days") > 0) {
401                 return PathType.DAYS;
402             }
403             if (path.indexOf("/dayPeriods") > 0) {
404                 return PathType.DAY_PERIODS;
405             }
406             if (path.indexOf("/quarters") > 0) {
407                 return PathType.QUARTERS;
408             }
409             if (path.indexOf("/months") > 0) {
410                 return PathType.MONTHS;
411             }
412             if (path.indexOf("/relative[") > 0) {
413                 /*
414                  * include "[" in "/relative[" to avoid matching "/relativeTime" or "/relativeTimePattern".
415                  */
416                 return PathType.RELATIVE;
417             }
418             if (path.indexOf("/decimalFormatLength") > 0) {
419                 return PathType.DECIMAL_FORMAT_LENGTH;
420             }
421             if (path.indexOf("[@count=") > 0) {
422                 return PathType.COUNT;
423             }
424             return PathType.SINGLETON;
425         }
426 
427         /**
428          * Get the PathType from the given XPathParts
429          *
430          * @param parts the XPathParts
431          * @return the PathType
432          */
getPathTypeFromParts(XPathParts parts)433         private static PathType getPathTypeFromParts(XPathParts parts) {
434             /*
435              * Would changing the order of these tests ever change the return value?
436              * Assume it could if in doubt.
437              */
438             if (parts.containsElement("metazone")) {
439                 return PathType.METAZONE;
440             }
441             if (parts.containsElement("days")) {
442                 return PathType.DAYS;
443             }
444             if (parts.containsElement("dayPeriods")) {
445                 return PathType.DAY_PERIODS;
446             }
447             if (parts.containsElement("quarters")) {
448                 return PathType.QUARTERS;
449             }
450             if (parts.containsElement("months")) {
451                 return PathType.MONTHS;
452             }
453             if (parts.containsElement("relative")) {
454                 return PathType.RELATIVE;
455             }
456             if (parts.containsElement("decimalFormatLength")) {
457                 return PathType.DECIMAL_FORMAT_LENGTH;
458             }
459             if (parts.containsAttribute("count")) { // containsAttribute not containsElement
460                 return PathType.COUNT;
461             }
462             return PathType.SINGLETON;
463         }
464     }
465 }
466