1 package org.unicode.cldr.util; 2 3 import java.util.HashSet; 4 import java.util.List; 5 import java.util.Set; 6 import java.util.TreeSet; 7 import java.util.concurrent.ConcurrentHashMap; 8 9 import org.unicode.cldr.util.DayPeriodInfo.DayPeriod; 10 import org.unicode.cldr.util.PluralRulesUtil.KeywordStatus; 11 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 12 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 13 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 14 15 import com.google.common.collect.ArrayListMultimap; 16 import com.google.common.collect.ImmutableList; 17 import com.google.common.collect.ImmutableSet; 18 import com.google.common.collect.Multimap; 19 import com.ibm.icu.text.PluralRules; 20 21 public class LogicalGrouping { 22 23 public static final ImmutableSet<String> metazonesDSTSet = ImmutableSet.of( 24 "Acre", "Africa_Western", "Alaska", "Almaty", "Amazon", 25 "America_Central", "America_Eastern", "America_Mountain", "America_Pacific", "Anadyr", "Apia", 26 "Aqtau", "Aqtobe", "Arabian", "Argentina", "Argentina_Western", "Armenia", 27 "Atlantic", "Australia_Central", "Australia_CentralWestern", "Australia_Eastern", "Australia_Western", 28 "Azerbaijan", "Azores", "Bangladesh", "Brasilia", "Cape_Verde", 29 "Chatham", "Chile", "China", "Choibalsan", "Colombia", "Cook", "Cuba", "Easter", 30 "Europe_Central", "Europe_Eastern", "Europe_Western", "Falkland", "Fiji", "Georgia", 31 "Greenland_Eastern", "Greenland_Western", "Hawaii_Aleutian", "Hong_Kong", "Hovd", 32 "Iran", "Irkutsk", "Israel", "Japan", "Kamchatka", "Korea", "Krasnoyarsk", 33 "Lord_Howe", "Macau", "Magadan", "Mauritius", "Mexico_Northwest", "Mexico_Pacific", "Mongolia", "Moscow", "New_Caledonia", 34 "New_Zealand", "Newfoundland", "Noronha", "Novosibirsk", "Omsk", "Pakistan", "Paraguay", "Peru", "Philippines", 35 "Pierre_Miquelon", "Qyzylorda", "Sakhalin", "Samara", "Samoa", 36 "Taipei", "Tonga", "Turkmenistan", "Uruguay", "Uzbekistan", 37 "Vanuatu", "Vladivostok", "Volgograd", "Yakutsk", "Yekaterinburg"); 38 39 public static final ImmutableList<String> days = ImmutableList.of("sun", "mon", "tue", "wed", "thu", "fri", "sat"); 40 41 public static final ImmutableSet<String> calendarsWith13Months = ImmutableSet.of("coptic", "ethiopic", "hebrew"); 42 public static final ImmutableSet<String> compactDecimalFormatLengths = ImmutableSet.of("short", "long"); 43 private static final ImmutableSet<String> ampm = ImmutableSet.of("am", "pm"); 44 private static final ImmutableSet<String> nowUnits = ImmutableSet.of("second", "second-short", "second-narrow", 45 "minute", "minute-short", "minute-narrow", "hour", "hour-short", "hour-narrow"); 46 47 /** 48 * Cache from path (String) to logical group (Set<String>) 49 */ 50 private static Multimap<String, String> cachePathToLogicalGroup = ArrayListMultimap.create(); 51 52 /** 53 * Cache from locale and path (<Pair<String, String>), to logical group (Set<String>) 54 */ 55 private static ConcurrentHashMap<Pair<String, String>, Set<String>> cacheLocaleAndPathToLogicalGroup = new ConcurrentHashMap<Pair<String, String>, Set<String>>(); 56 57 /** 58 * Statistics on occurrences of types of logical groups, for performance testing, debugging. 59 * GET_TYPE_COUNTS should be false for production to maximize performance. 60 */ 61 public static final boolean GET_TYPE_COUNTS = false; 62 public static final ConcurrentHashMap<String, Long> typeCount = GET_TYPE_COUNTS ? new ConcurrentHashMap<String, Long>() : null; 63 64 /** 65 * GET_TYPE_FROM_PARTS is more elegant when true, but performance is a little faster when it's false. 66 * This might change if XPathParts.getInstance and/or XPathParts.set are made faster. 67 */ 68 private static final boolean GET_TYPE_FROM_PARTS = false; 69 70 /** 71 * Return a sorted set of paths that are in the same logical set as the given path 72 * 73 * @param path the distinguishing xpath 74 * @return the set of paths 75 * 76 * For example, given the path 77 * 78 * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="1"] 79 * 80 * return the set of four paths 81 * 82 * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="1"] 83 * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="2"] 84 * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="3"] 85 * //ldml/dates/calendars/calendar[@type="gregorian"]/quarters/quarterContext[@type="format"]/quarterWidth[@type="abbreviated"]/quarter[@type="4"] 86 * 87 * Caches: Most of the calculations are independent of the locale, and can be cached on a static basis. 88 * The paths that are locale-dependent are /dayPeriods and @count. Those can be computed on a per-locale basis; 89 * and cached (they are shared across a number of locales). 90 * 91 * Reference: https://unicode.org/cldr/trac/ticket/11854 92 */ getPaths(CLDRFile cldrFile, String path)93 public static Set<String> getPaths(CLDRFile cldrFile, String path) { 94 if (path == null) { 95 return new TreeSet<String>(); // return empty set for null path 96 } 97 XPathParts parts = null; 98 PathType pathType = null; 99 if (GET_TYPE_FROM_PARTS) { 100 parts = XPathParts.getInstance(path); // can't always be frozen, some addPath do setAttribute 101 pathType = PathType.getPathTypeFromParts(parts); 102 } else { 103 /* 104 * XPathParts.set is expensive, so avoid it (not needed for singletons) if !GET_TYPE_FROM_PARTS 105 */ 106 pathType = PathType.getPathTypeFromPath(path); 107 } 108 109 if (GET_TYPE_COUNTS) { 110 typeCount.compute(pathType.toString(), (k, v) -> (v == null) ? 1 : v + 1); 111 } 112 113 if (pathType == PathType.SINGLETON) { 114 /* 115 * Skip cache for PathType.SINGLETON and simply return a set of one. 116 */ 117 Set<String> set = new TreeSet<String>(); 118 set.add(path); 119 return set; 120 } 121 122 if (!GET_TYPE_FROM_PARTS) { 123 parts = XPathParts.getInstance(path); 124 } 125 126 if (PathType.isLocaleDependent(pathType)) { 127 String locale = cldrFile.getLocaleID(); 128 Pair<String, String> key = new Pair<String, String>(locale, path); 129 if (cacheLocaleAndPathToLogicalGroup.containsKey(key)) { 130 return new TreeSet<String>(cacheLocaleAndPathToLogicalGroup.get(key)); 131 } 132 Set<String> set = new TreeSet<String>(); 133 pathType.addPaths(set, cldrFile, path, parts); 134 cacheLocaleAndPathToLogicalGroup.put(key, set); 135 return set; 136 } else { 137 /* 138 * All other paths are locale-independent. 139 */ 140 if (cachePathToLogicalGroup.containsKey(path)) { 141 return new TreeSet<String>(cachePathToLogicalGroup.get(path)); 142 } 143 Set<String> set = new TreeSet<String>(); 144 pathType.addPaths(set, cldrFile, path, parts); 145 cachePathToLogicalGroup.putAll(path, set); 146 return set; 147 } 148 } 149 150 /** 151 * Returns the plural info for a given locale. 152 */ getPluralInfo(CLDRFile cldrFile)153 private static PluralInfo getPluralInfo(CLDRFile cldrFile) { 154 SupplementalDataInfo supplementalData = SupplementalDataInfo.getInstance( 155 cldrFile.getSupplementalDirectory()); 156 return supplementalData.getPlurals(PluralType.cardinal, 157 cldrFile.getLocaleID()); 158 } 159 160 /** 161 * @param cldrFile 162 * @param path 163 * @return true if the specified path is optional in the logical grouping 164 * that it belongs to. 165 */ isOptional(CLDRFile cldrFile, String path)166 public static boolean isOptional(CLDRFile cldrFile, String path) { 167 XPathParts parts = XPathParts.getInstance(path); 168 169 if (parts.containsElement("relative")) { 170 String fieldType = parts.findAttributeValue("field", "type"); 171 String relativeType = parts.findAttributeValue("relative", "type"); 172 Integer relativeValue = relativeType == null ? 999 : Integer.valueOf(relativeType); 173 if (fieldType != null && fieldType.startsWith("day") && Math.abs(relativeValue.intValue()) >= 2) { 174 return true; // relative days +2 +3 -2 -3 are optional in a logical group. 175 } 176 } 177 // Paths with count="(zero|one)" are optional if their usage is covered 178 // fully by paths with count="(0|1)", which are always optional themselves. 179 if (!path.contains("[@count=")) return false; 180 String pluralType = parts.getAttributeValue(-1, "count"); 181 if (pluralType.equals("0") || pluralType.equals("1")) return true; 182 if (!pluralType.equals("zero") && !pluralType.equals("one")) return false; 183 184 PluralRules pluralRules = getPluralInfo(cldrFile).getPluralRules(); 185 parts.setAttribute(-1, "count", "0"); 186 Set<Double> explicits = new HashSet<Double>(); 187 if (cldrFile.isHere(parts.toString())) { 188 explicits.add(0.0); 189 } 190 parts.setAttribute(-1, "count", "1"); 191 if (cldrFile.isHere(parts.toString())) { 192 explicits.add(1.0); 193 } 194 if (!explicits.isEmpty()) { 195 // HACK: The com.ibm.icu.text prefix is needed so that ST can find it 196 // (no idea why). 197 KeywordStatus status = org.unicode.cldr.util.PluralRulesUtil.getKeywordStatus( 198 pluralRules, pluralType, 0, explicits, true); 199 if (status == KeywordStatus.SUPPRESSED) { 200 return true; 201 } 202 } 203 return false; 204 } 205 206 /** 207 * Path types for logical groupings 208 */ 209 private enum PathType { 210 SINGLETON { // no logical groups for singleton paths 211 @Override 212 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)213 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 214 // Do nothing. This function won't be called. 215 } 216 }, 217 METAZONE { 218 @Override 219 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)220 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 221 String metazoneName = parts.getAttributeValue(3, "type"); 222 if (metazonesDSTSet.contains(metazoneName)) { 223 for (String str : ImmutableSet.of("generic", "standard", "daylight")) { 224 set.add(path.substring(0, path.lastIndexOf('/') + 1) + str); 225 } 226 } 227 } 228 }, 229 DAYS { 230 @Override 231 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)232 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 233 String dayName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null; 234 // This is just a quick check to make sure the path is good. 235 if (dayName != null && days.contains(dayName)) { 236 for (String str : days) { 237 parts.setAttribute("day", "type", str); 238 set.add(parts.toString()); 239 } 240 } 241 } 242 }, 243 DAY_PERIODS { 244 @Override addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)245 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 246 if (path.endsWith("alias")) { 247 set.add(path); 248 } else { 249 String dayPeriodType = parts.findAttributeValue("dayPeriod", "type"); 250 if (ampm.contains(dayPeriodType)) { 251 for (String s : ampm) { 252 parts.setAttribute("dayPeriod", "type", s); 253 set.add(parts.toString()); 254 } 255 } else { 256 SupplementalDataInfo supplementalData = SupplementalDataInfo.getInstance(cldrFile.getSupplementalDirectory()); 257 DayPeriodInfo.Type dayPeriodContext = DayPeriodInfo.Type.fromString(parts.findAttributeValue("dayPeriodContext", "type")); 258 DayPeriodInfo dpi = supplementalData.getDayPeriods(dayPeriodContext, cldrFile.getLocaleID()); 259 List<DayPeriod> dayPeriods = dpi.getPeriods(); 260 DayPeriod thisDayPeriod = DayPeriod.fromString(dayPeriodType); 261 if (dayPeriods.contains(thisDayPeriod)) { 262 for (DayPeriod d : dayPeriods) { 263 parts.setAttribute("dayPeriod", "type", d.name()); 264 set.add(parts.toString()); 265 } 266 } 267 } 268 } 269 } 270 }, 271 QUARTERS { 272 @Override 273 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)274 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 275 String quarterName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null; 276 Integer quarter = quarterName == null ? 0 : Integer.valueOf(quarterName); 277 if (quarter > 0 && quarter <= 4) { // This is just a quick check to make sure the path is good. 278 for (Integer i = 1; i <= 4; i++) { 279 parts.setAttribute("quarter", "type", i.toString()); 280 set.add(parts.toString()); 281 } 282 } 283 } 284 }, 285 MONTHS { 286 @Override 287 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)288 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 289 String calType = parts.size() > 3 ? parts.getAttributeValue(3, "type") : null; 290 String monthName = parts.size() > 7 ? parts.getAttributeValue(7, "type") : null; 291 Integer month = monthName == null ? 0 : Integer.valueOf(monthName); 292 int calendarMonthMax = calendarsWith13Months.contains(calType) ? 13 : 12; 293 if (month > 0 && month <= calendarMonthMax) { // This is just a quick check to make sure the path is good. 294 for (Integer i = 1; i <= calendarMonthMax; i++) { 295 parts.setAttribute("month", "type", i.toString()); 296 if ("hebrew".equals(calType)) { 297 parts.removeAttribute("month", "yeartype"); 298 } 299 set.add(parts.toString()); 300 } 301 if ("hebrew".equals(calType)) { // Add extra hebrew calendar leap month 302 parts.setAttribute("month", "type", Integer.toString(7)); 303 parts.setAttribute("month", "yeartype", "leap"); 304 set.add(parts.toString()); 305 } 306 } 307 } 308 }, 309 RELATIVE { 310 @Override 311 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)312 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 313 String fieldType = parts.findAttributeValue("field", "type"); 314 String relativeType = parts.findAttributeValue("relative", "type"); 315 Integer relativeValue = relativeType == null ? 999 : Integer.valueOf(relativeType); 316 if (relativeValue >= -3 && relativeValue <= 3) { // This is just a quick check to make sure the path is good. 317 if (!(nowUnits.contains(fieldType) && relativeValue == 0)) { // Workaround for "now", "this hour", "this minute" 318 int limit = 1; 319 if (fieldType != null && fieldType.startsWith("day")) { 320 limit = 3; 321 } 322 for (Integer i = -1 * limit; i <= limit; i++) { 323 parts.setAttribute("relative", "type", i.toString()); 324 set.add(parts.toString()); 325 } 326 } 327 } 328 } 329 }, 330 DECIMAL_FORMAT_LENGTH { 331 @Override 332 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)333 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 334 PluralInfo pluralInfo = getPluralInfo(cldrFile); 335 Set<Count> pluralTypes = pluralInfo.getCounts(); 336 String decimalFormatLengthType = parts.size() > 3 ? parts.getAttributeValue(3, "type") : null; 337 String decimalFormatPatternType = parts.size() > 5 ? parts.getAttributeValue(5, "type") : null; 338 if (decimalFormatLengthType != null && decimalFormatPatternType != null && 339 compactDecimalFormatLengths.contains(decimalFormatLengthType)) { 340 int numZeroes = decimalFormatPatternType.length() - 1; 341 int baseZeroes = (numZeroes / 3) * 3; 342 for (int i = 0; i < 3; i++) { 343 // This gives us "baseZeroes+i" zeroes at the end. 344 String patType = "1" + String.format(String.format("%%0%dd", baseZeroes + i), 0); 345 parts.setAttribute(5, "type", patType); 346 for (Count count : pluralTypes) { 347 parts.setAttribute(5, "count", count.toString()); 348 set.add(parts.toString()); 349 } 350 } 351 } 352 } 353 }, 354 COUNT { 355 @Override 356 @SuppressWarnings("unused") addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)357 void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts) { 358 PluralInfo pluralInfo = getPluralInfo(cldrFile); 359 Set<Count> pluralTypes = pluralInfo.getCounts(); 360 String lastElement = parts.getElement(-1); 361 for (Count count : pluralTypes) { 362 parts.setAttribute(lastElement, "count", count.toString()); 363 set.add(parts.toString()); 364 } 365 } 366 }; 367 addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts)368 abstract void addPaths(Set<String> set, CLDRFile cldrFile, String path, XPathParts parts); 369 370 /** 371 * Is the given PathType locale-dependent (for caching)? 372 * 373 * @param pathType the PathType 374 * @return the boolean 375 */ isLocaleDependent(PathType pathType)376 private static boolean isLocaleDependent(PathType pathType) { 377 /* 378 * The paths that are locale-dependent are @count and /dayPeriods. 379 */ 380 return (pathType == COUNT || pathType == DAY_PERIODS); 381 } 382 383 /** 384 * Get the PathType from the given path 385 * 386 * @param path the path 387 * @return the PathType 388 * 389 * Note: it would be more elegant and cleaner, but slower, if we used XPathParts to 390 * determine the PathType. We avoid that since XPathParts.set is a performance hot spot. 391 */ getPathTypeFromPath(String path)392 private static PathType getPathTypeFromPath(String path) { 393 /* 394 * Would changing the order of these tests ever change the return value? 395 * Assume it could if in doubt. 396 */ 397 if (path.indexOf("/metazone") > 0) { 398 return PathType.METAZONE; 399 } 400 if (path.indexOf("/days") > 0) { 401 return PathType.DAYS; 402 } 403 if (path.indexOf("/dayPeriods") > 0) { 404 return PathType.DAY_PERIODS; 405 } 406 if (path.indexOf("/quarters") > 0) { 407 return PathType.QUARTERS; 408 } 409 if (path.indexOf("/months") > 0) { 410 return PathType.MONTHS; 411 } 412 if (path.indexOf("/relative[") > 0) { 413 /* 414 * include "[" in "/relative[" to avoid matching "/relativeTime" or "/relativeTimePattern". 415 */ 416 return PathType.RELATIVE; 417 } 418 if (path.indexOf("/decimalFormatLength") > 0) { 419 return PathType.DECIMAL_FORMAT_LENGTH; 420 } 421 if (path.indexOf("[@count=") > 0) { 422 return PathType.COUNT; 423 } 424 return PathType.SINGLETON; 425 } 426 427 /** 428 * Get the PathType from the given XPathParts 429 * 430 * @param parts the XPathParts 431 * @return the PathType 432 */ getPathTypeFromParts(XPathParts parts)433 private static PathType getPathTypeFromParts(XPathParts parts) { 434 /* 435 * Would changing the order of these tests ever change the return value? 436 * Assume it could if in doubt. 437 */ 438 if (parts.containsElement("metazone")) { 439 return PathType.METAZONE; 440 } 441 if (parts.containsElement("days")) { 442 return PathType.DAYS; 443 } 444 if (parts.containsElement("dayPeriods")) { 445 return PathType.DAY_PERIODS; 446 } 447 if (parts.containsElement("quarters")) { 448 return PathType.QUARTERS; 449 } 450 if (parts.containsElement("months")) { 451 return PathType.MONTHS; 452 } 453 if (parts.containsElement("relative")) { 454 return PathType.RELATIVE; 455 } 456 if (parts.containsElement("decimalFormatLength")) { 457 return PathType.DECIMAL_FORMAT_LENGTH; 458 } 459 if (parts.containsAttribute("count")) { // containsAttribute not containsElement 460 return PathType.COUNT; 461 } 462 return PathType.SINGLETON; 463 } 464 } 465 } 466