• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // © 2019 and later: Unicode, Inc. and others.
5 // License & terms of use: http://www.unicode.org/copyright.html
6 package org.unicode.icu.tool.cldrtoicu.ant;
7 
8 import static com.google.common.base.Preconditions.checkNotNull;
9 import static com.google.common.collect.ImmutableSet.toImmutableSet;
10 
11 import java.util.Set;
12 import java.util.TreeSet;
13 import java.util.function.Predicate;
14 import java.util.regex.Pattern;
15 
16 import org.unicode.icu.tool.cldrtoicu.SupplementalData;
17 
18 import com.google.common.base.Ascii;
19 import com.google.common.collect.ImmutableListMultimap;
20 import com.google.common.collect.ImmutableSet;
21 import com.google.common.collect.Iterables;
22 import com.google.common.collect.Multimaps;
23 import com.google.common.collect.Sets;
24 
25 /** Helper class to reslove ID configuration. */
26 final class LocaleIdResolver {
27     /** Returns the expanded set of target locale IDs based on the given ID specifications. */
expandTargetIds( Set<String> idSpecs, SupplementalData supplementalData)28     public static ImmutableSet<String> expandTargetIds(
29         Set<String> idSpecs, SupplementalData supplementalData) {
30         return new LocaleIdResolver(supplementalData).resolve(idSpecs);
31     }
32 
33     private final SupplementalData supplementalData;
34 
LocaleIdResolver(SupplementalData supplementalData)35     private LocaleIdResolver(SupplementalData supplementalData) {
36         this.supplementalData = checkNotNull(supplementalData);
37     }
38 
39     // ---- Code below here is to expand the incoming set of locale IDs ----
40 
41     private static final Pattern WILDCARD_LOCALE = Pattern.compile("[a-z]{2,3}(?:_[A-Z][a-z]{3})?");
42 
resolve(Set<String> idSpecs)43     private ImmutableSet<String> resolve(Set<String> idSpecs) {
44         ImmutableSet<String> allAvailableIds = supplementalData.getAvailableLocaleIds();
45         // Get the minimized wildcard set, converting things like "en_Latn" --> "en".
46         ImmutableSet<String> wildcardIds = idSpecs.stream()
47             .filter(supplementalData.getAvailableLocaleIds()::contains)
48             .filter(id -> WILDCARD_LOCALE.matcher(id).matches())
49             .map(this::removeDefaultScript)
50             .collect(toImmutableSet());
51 
52         // Get the set of IDs which are implied by the wildcard IDs.
53         Set<String> targetIds = new TreeSet<>();
54         allAvailableIds.forEach(id -> addWildcardMatches(id, wildcardIds::contains, targetIds));
55 
56         // Get the IDs which don't need to be in the config (because they are implied).
57         Set<String> redundant = Sets.intersection(idSpecs, targetIds);
58         if (!redundant.isEmpty()) {
59             System.err.println("Configuration lists redundant locale IDs");
60             System.err.println("The following IDs should be removed from the configuration:");
61             Iterables.partition(redundant, 16)
62                 .forEach(ids -> System.err.println(String.join(", ", ids)));
63 
64             // Note that the minimal configuration includes aliases.
65             Set<String> minimalConfigIds = new TreeSet<>(Sets.difference(idSpecs, targetIds));
66             minimalConfigIds.remove("root");
67             ImmutableListMultimap<Character, String> idsByFirstChar =
68                 Multimaps.index(minimalConfigIds, s -> s.charAt(0));
69 
70             System.err.println("Canonical ID list is:");
71             for (char c: idsByFirstChar.keySet()) {
72                 System.err.println("    // " + Ascii.toUpperCase(c));
73                 Iterables.partition(idsByFirstChar.get(c), 16)
74                     .forEach(ids -> System.err.println("    " + String.join(", ", ids)));
75                 System.err.println();
76             }
77             System.err.flush();
78             throw new IllegalStateException("Non-canonical configuration");
79         }
80 
81         // We return the set of IDs made up of:
82         // 1: The original IDs specified by the configuration (and any parent IDs).
83         // 2: IDs expanded from wildcard IDs (e.g. "en_Latn_GB" & "en_Latn" from "en").
84         //    (this is what's already in targetIds).
85         // 3: The "root" ID.
86         idSpecs.forEach(id -> addRecursively(id, targetIds));
87         return ImmutableSet.<String>builder().add("root").addAll(targetIds).build();
88     }
89 
90     // E.g. "xx_Fooo" --> "xx" --> "xx_Baar_YY" ==> "xx_Fooo"
91     // E.g. "xx_Fooo" --> "xx" --> "xx_Fooo_YY" ==> "xx"
removeDefaultScript(String id)92     private String removeDefaultScript(String id) {
93         if (id.contains("_")) {
94             String lang = id.substring(0, id.indexOf("_"));
95             String maxId = supplementalData.maximize(lang)
96                 .orElseThrow(
97                     () -> new IllegalStateException("cannot maximize language subtag: " + lang));
98             if (maxId.startsWith(id)) {
99                 return lang;
100             }
101         }
102         return id;
103     }
104 
addRecursively(String id, Set<String> dst)105     private void addRecursively(String id, Set<String> dst) {
106         // One of the strings we get here is "no_NO_NY", need to make sure that
107         // supplementalData.getParent properly canonicalizes that before determining parent
108         while (!id.equals("root") && dst.add(id)) {
109             id = supplementalData.getParent(id);
110         }
111     }
112 
addWildcardMatches( String id, Predicate<String> isWildcard, Set<String> dst)113     private boolean addWildcardMatches(
114         String id, Predicate<String> isWildcard, Set<String> dst) {
115         if (id.equals("root")) {
116             return false;
117         }
118         String parentId = supplementalData.getParent(id);
119         int index = parentId.indexOf("_");
120         String parentIdLang = (index < 0)? parentId: parentId.substring(0, index);
121         index = id.indexOf("_");
122         String idLang = (index < 0)? id: id.substring(0, index);
123         if (parentIdLang.equals(idLang) && (isWildcard.test(parentId) || addWildcardMatches(parentId, isWildcard, dst))) {
124             // Only add child locales here if their language matches their parent; need this to handle nn (child of no)
125             dst.add(id);
126             return true;
127         }
128         return false;
129     }
130 }
131