• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.mapper;
4 
5 import static com.google.common.base.Preconditions.checkState;
6 import static org.unicode.cldr.api.AttributeKey.keyOf;
7 import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
8 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
9 
10 import java.util.ArrayList;
11 import java.util.List;
12 
13 import org.unicode.cldr.api.AttributeKey;
14 import org.unicode.cldr.api.CldrData;
15 import org.unicode.cldr.api.CldrDataSupplier;
16 import org.unicode.cldr.api.CldrDataType;
17 import org.unicode.cldr.api.CldrPath;
18 import org.unicode.cldr.api.CldrValue;
19 import org.unicode.cldr.api.FilteredData;
20 import org.unicode.cldr.api.PathMatcher;
21 import org.unicode.icu.tool.cldrtoicu.IcuData;
22 import org.unicode.icu.tool.cldrtoicu.RbPath;
23 import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
24 
25 import com.google.common.annotations.VisibleForTesting;
26 import com.google.common.collect.ImmutableList;
27 import com.google.common.collect.ImmutableMap;
28 
29 /**
30  * A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
31  * the paths:
32  * <pre>{@code
33  *   //supplementalData/plurals[@type=*]/pluralRules[@locales=*]/pluralRule[@count=*]
34  * }</pre>
35  */
36 public final class PluralsMapper {
37 
38     private static final AttributeKey PLURALS_TYPE = keyOf("plurals", "type");
39     private static final AttributeKey RULES_LOCALES = keyOf("pluralRules", "locales");
40     private static final AttributeKey RULE_COUNT = keyOf("pluralRule", "count");
41 
42     private static final CldrDataProcessor<PluralsMapper> CLDR_PROCESSOR;
43     static {
44         CldrDataProcessor.Builder<PluralsMapper> processor = CldrDataProcessor.builder();
45         processor
46             .addAction("//supplementalData/plurals[@type=*]", (m, p) -> m.new Plurals(p))
47             .addAction("pluralRules[@locales=*]", Rules::new, Plurals::addRules)
48             .addValueAction("pluralRule[@count=*]", Rules::addRule);
49         CLDR_PROCESSOR = processor.build();
50     }
51 
52     private static final ImmutableMap<String, RbPath> ICU_PREFIX_MAP =
53         ImmutableMap.of("cardinal", RbPath.of("locales"), "ordinal", RbPath.of("locales_ordinals"));
54 
55     /**
56      * Processes data from the given supplier to generate plural ICU data.
57      *
58      * @param src the CLDR data supplier to process.
59      * @return the IcuData instance to be written to a file.
60      */
process(CldrDataSupplier src)61     public static IcuData process(CldrDataSupplier src) {
62         return process(src.getDataForType(SUPPLEMENTAL));
63     }
64 
65     @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
process(CldrData data)66     static IcuData process(CldrData data) {
67         PluralsMapper mapper = new PluralsMapper();
68         // Note: We explicitly filter by type to mimic the order of the existing code, since this
69         // affects the set indices we generate during processing. Ideally this would all be immune
70         // to ordering (or just enforce DTD ordering) but right now it's very dependent on
71         // mimicking the order of the existing code to get identical output. Once DTD order is
72         // everywhere, this can just be a single pass over the original data.
73         CLDR_PROCESSOR.process(filterByType(data, "cardinal"), mapper, NESTED_GROUPING);
74         CLDR_PROCESSOR.process(filterByType(data, "ordinal"), mapper, NESTED_GROUPING);
75         return mapper.icuData;
76     }
77 
78     // Mutable ICU data collected into during visitation.
79     // In a post XML-aware API, is recording the XML file names really a good idea?
80     private final IcuData icuData = new IcuData("plurals", false);
81     private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>();
82 
83     private class Plurals {
84         private final RbPath icuPrefix;
85 
Plurals(CldrPath prefix)86         Plurals(CldrPath prefix) {
87             // Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a
88             // default via comments. It should probably be changed to just have a default in
89             // the DTD.
90             this.icuPrefix = ICU_PREFIX_MAP.get(PLURALS_TYPE.valueFrom(prefix, "cardinal"));
91         }
92 
addRules(Rules r)93         private void addRules(Rules r) {
94             ImmutableMap<String, String> rules = r.getRules();
95             // Note: The original mapper code "sort of" coped with empty rules, but it's not
96             // completely well behaved (or documented), so since this doesn't happen in the
97             // current CLDR data, I decided to just prohibit it in the new code. Support can
98             // easily be added in once the expected semantics are clear.
99             checkState(!rules.isEmpty(), "missing rule data for plurals");
100 
101             // Have we seen this set of rules before? If so, reuse the existing index. Note
102             // that an IDE might report this call as suspicious because the key is not yet an
103             // immutable map (saves creating immutable maps just to check for inclusion) but
104             // this is fine because collection equality is based only on contents, not
105             // collection type.
106             int idx = previousRules.indexOf(rules);
107             if (idx == -1) {
108                 int newIdx = previousRules.size();
109                 rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v));
110                 // Since "rules" is mutable and reused, we must take an immutable copy here.
111                 previousRules.add(rules);
112                 idx = newIdx;
113             }
114             String setName = "set" + idx;
115             r.getLocales().forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName));
116         }
117     }
118 
119     private static class Rules {
120         private final ImmutableList<String> locales;
121         private final ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
122 
Rules(CldrPath prefix)123         private Rules(CldrPath prefix) {
124             this.locales = ImmutableList.copyOf(RULES_LOCALES.listOfValuesFrom(prefix));
125             checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix);
126         }
127 
addRule(CldrValue value)128         private void addRule(CldrValue value) {
129             map.put(RULE_COUNT.valueFrom(value), value.getValue());
130         }
131 
getLocales()132         private ImmutableList<String> getLocales() {
133             return locales;
134         }
135 
getRules()136         private ImmutableMap<String, String> getRules() {
137             return map.build();
138         }
139     }
140 
141     // A hack to allow us to process "cardinal" data before "ordinal" data (even though DTD order
142     // is the other way round). Once DTD order is the only ordering used, this can be removed.
filterByType(CldrData data, String pluralType)143     private static CldrData filterByType(CldrData data, String pluralType) {
144         PathMatcher matcher =
145             PathMatcher.of("//supplementalData/plurals[@type=\"" + pluralType + "\"]");
146         return new FilteredData(data) {
147             @Override protected CldrValue filter(CldrValue value) {
148                 return matcher.matchesPrefixOf(value.getPath()) ? value : null;
149             }
150         };
151     }
152 }
153