1 package org.unicode.cldr.util; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.List; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 import java.util.stream.Collectors; 15 16 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 17 18 import com.google.common.base.Joiner; 19 import com.google.common.collect.ImmutableListMultimap; 20 import com.google.common.collect.ImmutableMap; 21 import com.google.common.collect.ImmutableMultimap; 22 import com.google.common.collect.ImmutableSet; 23 import com.ibm.icu.util.Freezable; 24 import com.ibm.icu.util.Output; 25 26 /** 27 * Get the info from supplemental data, eg CLDRConfig.getInstance().getSupplementalDataInfo().getGrammarInfo("fr"); Use hasGrammarInfo() to see which locales have it. 28 * @author markdavis 29 * 30 */ 31 public class GrammarInfo implements Freezable<GrammarInfo>{ 32 33 public enum GrammaticalTarget {nominal} 34 35 public enum GrammaticalFeature { 36 grammaticalNumber("plural", "Ⓟ", "other"), 37 grammaticalCase("case", "Ⓒ", "nominative"), 38 grammaticalDefiniteness("definiteness", "Ⓓ", "indefinite"), 39 grammaticalGender("gender", "Ⓖ", "neuter"); 40 41 private final String shortName; 42 private final String symbol; 43 private final String defaultValue; 44 45 public static final Pattern PATH_HAS_FEATURE = Pattern.compile("\\[@(count|case|gender|definiteness)="); 46 GrammaticalFeature(String shortName, String symbol, String defaultValue)47 GrammaticalFeature(String shortName, String symbol, String defaultValue) { 48 this.shortName = shortName; 49 this.symbol = symbol; 50 this.defaultValue = defaultValue; 51 } getShortName()52 public String getShortName() { 53 return shortName; 54 } getSymbol()55 public CharSequence getSymbol() { 56 return symbol; 57 } getDefault(Collection<String> values)58 public String getDefault(Collection<String> values) { 59 return this == grammaticalGender && values != null && !values.contains("neuter") ? "masculine" : defaultValue; 60 } pathHasFeature(String path)61 public static Matcher pathHasFeature(String path) { 62 Matcher result = PATH_HAS_FEATURE.matcher(path); 63 return result.find() ? result : null; 64 } 65 static final Map<String, GrammaticalFeature> shortNameToEnum = 66 ImmutableMap.copyOf(Arrays.asList(GrammaticalFeature.values()) 67 .stream() 68 .collect(Collectors.toMap(e -> e.shortName, e -> e))); 69 fromName(String name)70 public static GrammaticalFeature fromName(String name) { 71 GrammaticalFeature result = shortNameToEnum.get(name); 72 return result != null ? result : valueOf(name); 73 } 74 } 75 76 public enum GrammaticalScope {general, units} 77 78 private Map<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> targetToFeatureToUsageToValues = new TreeMap<>(); 79 private boolean frozen = false; 80 add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, String value)81 public void add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, String value) { 82 Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target); 83 if (featureToUsageToValues == null) { 84 targetToFeatureToUsageToValues.put(target, featureToUsageToValues = new TreeMap<>()); 85 } 86 if (feature != null) { 87 Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature); 88 if (usageToValues == null) { 89 featureToUsageToValues.put(feature, usageToValues = new TreeMap<>()); 90 } 91 Set<String> values = usageToValues.get(usage); 92 if (values == null) { 93 usageToValues.put(usage, values = new TreeSet<>()); 94 } 95 if (value != null) { 96 values.add(value); 97 } else { 98 int debug = 0; 99 } 100 } 101 } 102 add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, Collection<String> valueSet)103 public void add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, Collection<String> valueSet) { 104 Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target); 105 if (featureToUsageToValues == null) { 106 targetToFeatureToUsageToValues.put(target, featureToUsageToValues = new TreeMap<>()); 107 } 108 if (feature != null) { 109 Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature); 110 if (usageToValues == null) { 111 featureToUsageToValues.put(feature, usageToValues = new TreeMap<>()); 112 } 113 Set<String> values = usageToValues.get(usage); 114 if (values == null) { 115 usageToValues.put(usage, values = new TreeSet<>()); 116 } 117 values.addAll(valueSet); 118 } 119 } 120 121 122 /** 123 * Note: when there is known to be no features, the featureRaw will be null 124 */ add(String targetsRaw, String featureRaw, String usagesRaw, String valuesRaw)125 public void add(String targetsRaw, String featureRaw, String usagesRaw, String valuesRaw) { 126 for (String targetString : SupplementalDataInfo.split_space.split(targetsRaw)) { 127 GrammaticalTarget target = GrammaticalTarget.valueOf(targetString); 128 if (featureRaw == null) { 129 add(target, null, null, (String)null); 130 } else { 131 final GrammaticalFeature feature = GrammaticalFeature.valueOf(featureRaw); 132 133 List<String> usages = usagesRaw == null ? Collections.singletonList(GrammaticalScope.general.toString()) : SupplementalDataInfo.split_space.splitToList(usagesRaw); 134 135 List<String> values = valuesRaw == null ? Collections.emptyList() : SupplementalDataInfo.split_space.splitToList(valuesRaw); 136 for (String usageRaw : usages) { 137 GrammaticalScope usage = GrammaticalScope.valueOf(usageRaw); 138 add(target, feature, usage, values); 139 } 140 } 141 } 142 } 143 144 @Override isFrozen()145 public boolean isFrozen() { 146 return frozen; 147 } 148 149 @Override freeze()150 public GrammarInfo freeze() { 151 if (!frozen) { 152 Map<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope, Set<String>>>> temp = CldrUtility.protectCollection(targetToFeatureToUsageToValues); 153 if (!temp.equals(targetToFeatureToUsageToValues)) { 154 throw new IllegalArgumentException(); 155 } 156 targetToFeatureToUsageToValues = temp; 157 frozen = true; 158 } 159 return this; 160 } 161 162 @Override cloneAsThawed()163 public GrammarInfo cloneAsThawed() { 164 GrammarInfo result = new GrammarInfo(); 165 this.forEach3((t,f,u,v) -> result.add(t,f,u,v)); 166 return result; 167 } 168 169 static interface Handler4<T,F,U,V> { apply(T t, F f, U u, V v)170 void apply(T t, F f, U u, V v); 171 } 172 forEach(Handler4<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, String> handler)173 public void forEach(Handler4<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, String> handler) { 174 for (Entry<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> entry1 : targetToFeatureToUsageToValues.entrySet()) { 175 GrammaticalTarget target = entry1.getKey(); 176 final Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = entry1.getValue(); 177 if (featureToUsageToValues.isEmpty()) { 178 handler.apply(target, null, null, null); 179 } else 180 for (Entry<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> entry2 : featureToUsageToValues.entrySet()) { 181 GrammaticalFeature feature = entry2.getKey(); 182 for (Entry<GrammaticalScope, Set<String>> entry3 : entry2.getValue().entrySet()) { 183 final GrammaticalScope usage = entry3.getKey(); 184 for (String value : entry3.getValue()) { 185 handler.apply(target, feature, usage, value); 186 } 187 } 188 } 189 } 190 } 191 192 static interface Handler3<T,F,U, V> { apply(T t, F f, U u, V v)193 void apply(T t, F f, U u, V v); 194 } 195 forEach3(Handler3<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, Collection<String>> handler)196 public void forEach3(Handler3<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, Collection<String>> handler) { 197 for (Entry<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> entry1 : targetToFeatureToUsageToValues.entrySet()) { 198 GrammaticalTarget target = entry1.getKey(); 199 final Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = entry1.getValue(); 200 if (featureToUsageToValues.isEmpty()) { 201 handler.apply(target, null, null, null); 202 } else 203 for (Entry<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> entry2 : featureToUsageToValues.entrySet()) { 204 GrammaticalFeature feature = entry2.getKey(); 205 for (Entry<GrammaticalScope, Set<String>> entry3 : entry2.getValue().entrySet()) { 206 final GrammaticalScope usage = entry3.getKey(); 207 final Collection<String> values = entry3.getValue(); 208 handler.apply(target, feature, usage, values); 209 } 210 } 211 } 212 } 213 214 /** Returns null if there is no known information. Otherwise returns the information for the locale (which may be empty if there are no variants) */ get(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage)215 public Collection<String> get(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage) { 216 Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target); 217 if (featureToUsageToValues == null) { 218 return Collections.emptySet(); 219 } 220 Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature); 221 if (usageToValues == null) { 222 return Collections.emptySet(); 223 } 224 Collection<String> result = usageToValues.get(usage); 225 return result == null 226 ? usageToValues.get(GrammaticalScope.general) 227 : result; 228 } 229 hasInfo(GrammaticalTarget target)230 public boolean hasInfo(GrammaticalTarget target) { 231 return targetToFeatureToUsageToValues.containsKey(target); 232 } 233 234 @Override toString()235 public String toString() { 236 return toString("\n"); 237 } toString(String lineSep)238 public String toString(String lineSep) { 239 StringBuilder result = new StringBuilder(); 240 this.forEach3((t,f,u, v) -> 241 { 242 result.append(lineSep); 243 result.append("{" + (t == null ? "" : t.toString()) + "}" 244 + "\t{" + (f == null ? "" : f.toString()) + "}" 245 + "\t{" + (u == null ? "" : u.toString()) + "}" 246 + "\t{" + (v == null ? "" : Joiner.on(' ').join(v)) + "}"); 247 }); 248 return result.toString(); 249 } 250 getGrammaticalInfoAttributes(GrammarInfo grammarInfo, UnitPathType pathType, String plural, String gender, String caseVariant)251 static public String getGrammaticalInfoAttributes(GrammarInfo grammarInfo, UnitPathType pathType, String plural, String gender, String caseVariant) { 252 String grammaticalAttributes = ""; 253 if (pathType.features.contains(GrammaticalFeature.grammaticalNumber)) { // count is special 254 grammaticalAttributes += "[@count=\"" + (plural == null ? "other" : plural) + "\"]"; 255 } 256 if (grammarInfo != null && gender != null 257 && pathType.features.contains(GrammaticalFeature.grammaticalGender) 258 ) { 259 Collection<String> genders = grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalGender, GrammaticalScope.units); 260 if (!gender.equals(GrammaticalFeature.grammaticalGender.getDefault(genders))) { 261 grammaticalAttributes += "[@gender=\"" + gender + "\"]"; 262 } 263 } 264 if (grammarInfo != null && caseVariant != null 265 && pathType.features.contains(GrammaticalFeature.grammaticalCase) 266 && !caseVariant.equals(GrammaticalFeature.grammaticalCase.getDefault(null))) { 267 grammaticalAttributes += "[@case=\"" + caseVariant + "\"]"; 268 } 269 return grammaticalAttributes; 270 } 271 272 public static final ImmutableMultimap<String,PluralInfo.Count> NON_COMPUTABLE_PLURALS = ImmutableListMultimap.of( 273 "pl", PluralInfo.Count.one, 274 "pl", PluralInfo.Count.other, 275 "ru", PluralInfo.Count.one, 276 "ru", PluralInfo.Count.other); 277 /** 278 * TODO: change this to be data-file driven 279 */ 280 public static final Set<String> SEED_LOCALES = ImmutableSet.of("pl", "ru", "da", "de", "nb", "sv", "hi", "id", "es", "fr", "it", "nl", "pt", "en", "ja", "th", "vi", "zh", "zh_TW", "ko", "yue"); 281 282 /** 283 * TODO: change this to be data-file driven 284 */ 285 public static final Set<String> SPECIAL_TRANSLATION_UNITS = ImmutableSet.of( 286 // new in v38 287 "mass-grain", 288 "volume-dessert-spoon", 289 "volume-dessert-spoon-imperial", 290 "volume-drop", 291 "volume-dram", 292 "volume-jigger", 293 "volume-pinch", 294 "volume-quart-imperial", 295 // "volume-pint-imperial", 296 297 "acceleration-meter-per-square-second", "area-acre", "area-hectare", 298 "area-square-centimeter", "area-square-foot", "area-square-kilometer", "area-square-mile", "concentr-percent", "consumption-mile-per-gallon", 299 "consumption-mile-per-gallon-imperial", "duration-day", "duration-hour", "duration-minute", "duration-month", "duration-second", "duration-week", 300 "duration-year", "energy-foodcalorie", "energy-kilocalorie", "length-centimeter", "length-foot", "length-inch", "length-kilometer", "length-meter", 301 "length-mile", "length-millimeter", "length-parsec", "length-picometer", "length-solar-radius", "length-yard", "light-solar-luminosity", "mass-dalton", 302 "mass-earth-mass", "mass-milligram", "mass-solar-mass", "pressure-kilopascal", "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", 303 "temperature-celsius", "temperature-fahrenheit", "temperature-generic", "temperature-kelvin", "acceleration-g-force", "consumption-liter-per-100-kilometer", 304 "mass-gram", "mass-kilogram", "mass-ounce", "mass-pound", "volume-centiliter", "volume-cubic-centimeter", "volume-cubic-foot", "volume-cubic-mile", 305 "volume-cup", "volume-deciliter", "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-gallon", "volume-gallon", "volume-gallon-imperial", 306 "volume-liter", "volume-milliliter", "volume-pint", "volume-quart", "volume-tablespoon", "volume-teaspoon"); 307 // compounds 308 // "kilogram-per-cubic-meter", "kilometer-per-liter", "concentr-gram-per-mole", "speed-mile-per-second", "volumetricflow-cubic-foot-per-second", 309 // "volumetricflow-cubic-meter-per-second", "gram-per-cubic-centimeter", 310 311 getSourceCaseAndPlural(String locale, String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)312 public void getSourceCaseAndPlural(String locale, String gender, String value, String desiredCase, String desiredPlural, 313 Output<String> sourceCase, Output<String> sourcePlural) { 314 switch(locale) { 315 case "pl": 316 getSourceCaseAndPluralPolish(gender, value, desiredCase, desiredPlural, sourceCase, sourcePlural); 317 break; 318 case "ru": 319 getSourceCaseAndPluralRussian(gender, value, desiredCase, desiredPlural, sourceCase, sourcePlural); 320 break; 321 default: 322 throw new UnsupportedOperationException(locale); 323 } 324 } 325 326 /** Russian rules for paucal (few) and fractional (other) 327 * <pre> 328 * plural = other 329 * Nominative ⇒ genitive singular 330 * Accusative + masculine ⇒ genitive singular 331 * All other combinations of gender + case ⇒ same-case, plural 332 * 333 * Other 334 * genitive singular 335 * 336 * Plurals: 337 * one, 338 * few (2~4), 339 * many, = plural 340 * other (where other is 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0) 341 * </pre> 342 */ getSourceCaseAndPluralRussian(String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)343 private void getSourceCaseAndPluralRussian(String gender, String value, 344 String desiredCase, String desiredPlural, 345 Output<String> sourceCase, Output<String> sourcePlural) { 346 switch (desiredPlural) { 347 case "few": 348 // default source 349 sourceCase.value = desiredCase; 350 sourcePlural.value = "many"; 351 // special cases 352 switch (desiredCase) { 353 case "nominative": 354 sourceCase.value = "genitive"; 355 sourcePlural.value = "one"; 356 break; 357 case "accusative": 358 switch (gender) { 359 case "masculine": 360 sourceCase.value = "genitive"; 361 sourcePlural.value = "one"; 362 break; 363 } 364 break; 365 } 366 case "other": 367 sourceCase.value = "genitive"; 368 sourcePlural.value = "one"; 369 return; 370 } 371 } 372 373 /** Polish rules 374 * <pre> 375 * plural = few 376 * 377 * neuter + ending in -um + (nominative, accusative) ⇒ vocative plural 378 * Feminine||neuter + (nominative, accusative) ⇒ genitive singular 379 * Animate||inanimate + (nominative, accusative) ⇒ vocative plural 380 * Personal + nominative ⇒ vocative plural 381 * Personal + accusative ⇒ genitive plural 382 * All other combinations of gender + case ⇒ same-case, plural 383 * 384 * plural = other 385 * genitive singular 386 * 387 * Plurals: 388 * one, 389 * few (2~4), 390 * many, = plural 391 * other (where other is 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0) 392 * </pre> 393 */ getSourceCaseAndPluralPolish(String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)394 private void getSourceCaseAndPluralPolish(String gender, String value, 395 String desiredCase, String desiredPlural, 396 Output<String> sourceCase, Output<String> sourcePlural) { 397 switch (desiredPlural) { 398 case "few": 399 // default 400 sourceCase.value = desiredCase; 401 sourcePlural.value = "many"; 402 // special cases 403 boolean isNominative = false; 404 switch (desiredCase) { 405 case "nominative": 406 isNominative = true; 407 case "vocative": 408 case "accusative": 409 switch (gender) { 410 case "neuter": 411 if (value.endsWith("um")) { 412 sourceCase.value = "vocative"; 413 break; 414 } 415 // otherwise fall thorugh to feminine 416 case "feminine": 417 sourceCase.value = "nominative"; 418 sourcePlural.value = "few"; 419 break; 420 case "animate": 421 case "inanimate": 422 sourceCase.value = "vocative"; 423 break; 424 case "personal": 425 sourceCase.value = isNominative ? "vocative" : "genitive"; 426 break; 427 } 428 break; 429 } 430 return; 431 case "other": 432 sourceCase.value = "genitive"; 433 sourcePlural.value = "one"; 434 return; 435 } 436 } 437 }