1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // © 2019 and later: Unicode, Inc. and others. 5 // License & terms of use: http://www.unicode.org/copyright.html 6 package org.unicode.icu.tool.cldrtoicu.ant; 7 8 import static com.google.common.base.Preconditions.checkNotNull; 9 import static com.google.common.collect.ImmutableSet.toImmutableSet; 10 11 import java.util.Set; 12 import java.util.TreeSet; 13 import java.util.function.Predicate; 14 import java.util.regex.Pattern; 15 16 import org.unicode.icu.tool.cldrtoicu.SupplementalData; 17 18 import com.google.common.base.Ascii; 19 import com.google.common.collect.ImmutableListMultimap; 20 import com.google.common.collect.ImmutableSet; 21 import com.google.common.collect.Iterables; 22 import com.google.common.collect.Multimaps; 23 import com.google.common.collect.Sets; 24 25 /** Helper class to reslove ID configuration. */ 26 final class LocaleIdResolver { 27 /** Returns the expanded set of target locale IDs based on the given ID specifications. */ expandTargetIds( Set<String> idSpecs, SupplementalData supplementalData)28 public static ImmutableSet<String> expandTargetIds( 29 Set<String> idSpecs, SupplementalData supplementalData) { 30 return new LocaleIdResolver(supplementalData).resolve(idSpecs); 31 } 32 33 private final SupplementalData supplementalData; 34 LocaleIdResolver(SupplementalData supplementalData)35 private LocaleIdResolver(SupplementalData supplementalData) { 36 this.supplementalData = checkNotNull(supplementalData); 37 } 38 39 // ---- Code below here is to expand the incoming set of locale IDs ---- 40 41 private static final Pattern WILDCARD_LOCALE = Pattern.compile("[a-z]{2,3}(?:_[A-Z][a-z]{3})?"); 42 resolve(Set<String> idSpecs)43 private ImmutableSet<String> resolve(Set<String> idSpecs) { 44 ImmutableSet<String> allAvailableIds = supplementalData.getAvailableLocaleIds(); 45 // Get the minimized wildcard set, converting things like "en_Latn" --> "en". 46 ImmutableSet<String> wildcardIds = idSpecs.stream() 47 .filter(supplementalData.getAvailableLocaleIds()::contains) 48 .filter(id -> WILDCARD_LOCALE.matcher(id).matches()) 49 .map(this::removeDefaultScript) 50 .collect(toImmutableSet()); 51 52 // Get the set of IDs which are implied by the wildcard IDs. 53 Set<String> targetIds = new TreeSet<>(); 54 allAvailableIds.forEach(id -> addWildcardMatches(id, wildcardIds::contains, targetIds)); 55 56 // Get the IDs which don't need to be in the config (because they are implied). 57 Set<String> redundant = Sets.intersection(idSpecs, targetIds); 58 if (!redundant.isEmpty()) { 59 System.err.println("Configuration lists redundant locale IDs"); 60 System.err.println("The following IDs should be removed from the configuration:"); 61 Iterables.partition(redundant, 16) 62 .forEach(ids -> System.err.println(String.join(", ", ids))); 63 64 // Note that the minimal configuration includes aliases. 65 Set<String> minimalConfigIds = new TreeSet<>(Sets.difference(idSpecs, targetIds)); 66 minimalConfigIds.remove("root"); 67 ImmutableListMultimap<Character, String> idsByFirstChar = 68 Multimaps.index(minimalConfigIds, s -> s.charAt(0)); 69 70 System.err.println("Canonical ID list is:"); 71 for (char c: idsByFirstChar.keySet()) { 72 System.err.println(" // " + Ascii.toUpperCase(c)); 73 Iterables.partition(idsByFirstChar.get(c), 16) 74 .forEach(ids -> System.err.println(" " + String.join(", ", ids))); 75 System.err.println(); 76 } 77 System.err.flush(); 78 throw new IllegalStateException("Non-canonical configuration"); 79 } 80 81 // We return the set of IDs made up of: 82 // 1: The original IDs specified by the configuration (and any parent IDs). 83 // 2: IDs expanded from wildcard IDs (e.g. "en_Latn_GB" & "en_Latn" from "en"). 84 // (this is what's already in targetIds). 85 // 3: The "root" ID. 86 idSpecs.forEach(id -> addRecursively(id, targetIds)); 87 return ImmutableSet.<String>builder().add("root").addAll(targetIds).build(); 88 } 89 90 // E.g. "xx_Fooo" --> "xx" --> "xx_Baar_YY" ==> "xx_Fooo" 91 // E.g. "xx_Fooo" --> "xx" --> "xx_Fooo_YY" ==> "xx" removeDefaultScript(String id)92 private String removeDefaultScript(String id) { 93 if (id.contains("_")) { 94 String lang = id.substring(0, id.indexOf("_")); 95 String maxId = supplementalData.maximize(lang) 96 .orElseThrow( 97 () -> new IllegalStateException("cannot maximize language subtag: " + lang)); 98 if (maxId.startsWith(id)) { 99 return lang; 100 } 101 } 102 return id; 103 } 104 addRecursively(String id, Set<String> dst)105 private void addRecursively(String id, Set<String> dst) { 106 // One of the strings we get here is "no_NO_NY", need to make sure that 107 // supplementalData.getParent properly canonicalizes that before determining parent 108 while (!id.equals("root") && dst.add(id)) { 109 id = supplementalData.getParent(id); 110 } 111 } 112 addWildcardMatches( String id, Predicate<String> isWildcard, Set<String> dst)113 private boolean addWildcardMatches( 114 String id, Predicate<String> isWildcard, Set<String> dst) { 115 if (id.equals("root")) { 116 return false; 117 } 118 String parentId = supplementalData.getParent(id); 119 int index = parentId.indexOf("_"); 120 String parentIdLang = (index < 0)? parentId: parentId.substring(0, index); 121 index = id.indexOf("_"); 122 String idLang = (index < 0)? id: id.substring(0, index); 123 if (parentIdLang.equals(idLang) && (isWildcard.test(parentId) || addWildcardMatches(parentId, isWildcard, dst))) { 124 // Only add child locales here if their language matches their parent; need this to handle nn (child of no) 125 dst.add(id); 126 return true; 127 } 128 return false; 129 } 130 } 131