1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.mapper; 4 5 import static com.google.common.base.Preconditions.checkState; 6 import static org.unicode.cldr.api.AttributeKey.keyOf; 7 import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING; 8 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; 9 10 import java.util.ArrayList; 11 import java.util.List; 12 13 import org.unicode.cldr.api.AttributeKey; 14 import org.unicode.cldr.api.CldrData; 15 import org.unicode.cldr.api.CldrDataSupplier; 16 import org.unicode.cldr.api.CldrDataType; 17 import org.unicode.cldr.api.CldrPath; 18 import org.unicode.cldr.api.CldrValue; 19 import org.unicode.cldr.api.FilteredData; 20 import org.unicode.cldr.api.PathMatcher; 21 import org.unicode.icu.tool.cldrtoicu.IcuData; 22 import org.unicode.icu.tool.cldrtoicu.RbPath; 23 import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor; 24 25 import com.google.common.annotations.VisibleForTesting; 26 import com.google.common.collect.ImmutableList; 27 import com.google.common.collect.ImmutableMap; 28 29 /** 30 * A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via 31 * the paths: 32 * <pre>{@code 33 * //supplementalData/plurals[@type=*]/pluralRules[@locales=*]/pluralRule[@count=*] 34 * }</pre> 35 */ 36 public final class PluralsMapper { 37 38 private static final AttributeKey PLURALS_TYPE = keyOf("plurals", "type"); 39 private static final AttributeKey RULES_LOCALES = keyOf("pluralRules", "locales"); 40 private static final AttributeKey RULE_COUNT = keyOf("pluralRule", "count"); 41 42 private static final CldrDataProcessor<PluralsMapper> CLDR_PROCESSOR; 43 static { 44 CldrDataProcessor.Builder<PluralsMapper> processor = CldrDataProcessor.builder(); 45 processor 46 .addAction("//supplementalData/plurals[@type=*]", (m, p) -> m.new Plurals(p)) 47 .addAction("pluralRules[@locales=*]", Rules::new, Plurals::addRules) 48 .addValueAction("pluralRule[@count=*]", Rules::addRule); 49 CLDR_PROCESSOR = processor.build(); 50 } 51 52 private static final ImmutableMap<String, RbPath> ICU_PREFIX_MAP = 53 ImmutableMap.of("cardinal", RbPath.of("locales"), "ordinal", RbPath.of("locales_ordinals")); 54 55 /** 56 * Processes data from the given supplier to generate plural ICU data. 57 * 58 * @param src the CLDR data supplier to process. 59 * @return the IcuData instance to be written to a file. 60 */ process(CldrDataSupplier src)61 public static IcuData process(CldrDataSupplier src) { 62 return process(src.getDataForType(SUPPLEMENTAL)); 63 } 64 65 @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. process(CldrData data)66 static IcuData process(CldrData data) { 67 PluralsMapper mapper = new PluralsMapper(); 68 // Note: We explicitly filter by type to mimic the order of the existing code, since this 69 // affects the set indices we generate during processing. Ideally this would all be immune 70 // to ordering (or just enforce DTD ordering) but right now it's very dependent on 71 // mimicking the order of the existing code to get identical output. Once DTD order is 72 // everywhere, this can just be a single pass over the original data. 73 CLDR_PROCESSOR.process(filterByType(data, "cardinal"), mapper, NESTED_GROUPING); 74 CLDR_PROCESSOR.process(filterByType(data, "ordinal"), mapper, NESTED_GROUPING); 75 return mapper.icuData; 76 } 77 78 // Mutable ICU data collected into during visitation. 79 // In a post XML-aware API, is recording the XML file names really a good idea? 80 private final IcuData icuData = new IcuData("plurals", false); 81 private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>(); 82 83 private class Plurals { 84 private final RbPath icuPrefix; 85 Plurals(CldrPath prefix)86 Plurals(CldrPath prefix) { 87 // Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a 88 // default via comments. It should probably be changed to just have a default in 89 // the DTD. 90 this.icuPrefix = ICU_PREFIX_MAP.get(PLURALS_TYPE.valueFrom(prefix, "cardinal")); 91 } 92 addRules(Rules r)93 private void addRules(Rules r) { 94 ImmutableMap<String, String> rules = r.getRules(); 95 // Note: The original mapper code "sort of" coped with empty rules, but it's not 96 // completely well behaved (or documented), so since this doesn't happen in the 97 // current CLDR data, I decided to just prohibit it in the new code. Support can 98 // easily be added in once the expected semantics are clear. 99 checkState(!rules.isEmpty(), "missing rule data for plurals"); 100 101 // Have we seen this set of rules before? If so, reuse the existing index. Note 102 // that an IDE might report this call as suspicious because the key is not yet an 103 // immutable map (saves creating immutable maps just to check for inclusion) but 104 // this is fine because collection equality is based only on contents, not 105 // collection type. 106 int idx = previousRules.indexOf(rules); 107 if (idx == -1) { 108 int newIdx = previousRules.size(); 109 rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v)); 110 // Since "rules" is mutable and reused, we must take an immutable copy here. 111 previousRules.add(rules); 112 idx = newIdx; 113 } 114 String setName = "set" + idx; 115 r.getLocales().forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName)); 116 } 117 } 118 119 private static class Rules { 120 private final ImmutableList<String> locales; 121 private final ImmutableMap.Builder<String, String> map = ImmutableMap.builder(); 122 Rules(CldrPath prefix)123 private Rules(CldrPath prefix) { 124 this.locales = ImmutableList.copyOf(RULES_LOCALES.listOfValuesFrom(prefix)); 125 checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix); 126 } 127 addRule(CldrValue value)128 private void addRule(CldrValue value) { 129 map.put(RULE_COUNT.valueFrom(value), value.getValue()); 130 } 131 getLocales()132 private ImmutableList<String> getLocales() { 133 return locales; 134 } 135 getRules()136 private ImmutableMap<String, String> getRules() { 137 return map.build(); 138 } 139 } 140 141 // A hack to allow us to process "cardinal" data before "ordinal" data (even though DTD order 142 // is the other way round). Once DTD order is the only ordering used, this can be removed. filterByType(CldrData data, String pluralType)143 private static CldrData filterByType(CldrData data, String pluralType) { 144 PathMatcher matcher = 145 PathMatcher.of("//supplementalData/plurals[@type=\"" + pluralType + "\"]"); 146 return new FilteredData(data) { 147 @Override protected CldrValue filter(CldrValue value) { 148 return matcher.matchesPrefixOf(value.getPath()) ? value : null; 149 } 150 }; 151 } 152 } 153