1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.lang.invoke.MethodHandles; 6 import java.util.Collection; 7 import java.util.Date; 8 import java.util.EnumMap; 9 import java.util.LinkedHashMap; 10 import java.util.List; 11 import java.util.Locale; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 18 import org.unicode.cldr.draft.FileUtilities; 19 import org.unicode.cldr.draft.ScriptMetadata; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.CLDRTool; 22 import org.unicode.cldr.util.DtdType; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.StandardCodes.LstrField; 25 import org.unicode.cldr.util.StandardCodes.LstrType; 26 import org.unicode.cldr.util.StringRange; 27 import org.unicode.cldr.util.StringRange.Adder; 28 import org.unicode.cldr.util.SupplementalDataInfo; 29 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 30 import org.unicode.cldr.util.Validity; 31 import org.unicode.cldr.util.Validity.Status; 32 33 import com.google.common.base.Joiner; 34 import com.google.common.base.Objects; 35 import com.google.common.collect.ImmutableSet; 36 import com.google.common.collect.ImmutableSetMultimap; 37 import com.google.common.collect.Multimap; 38 import com.google.common.collect.Multimaps; 39 import com.google.common.collect.SetMultimap; 40 import com.google.common.collect.TreeMultimap; 41 import com.ibm.icu.impl.Row.R2; 42 import com.ibm.icu.util.ICUUncheckedIOException; 43 44 @CLDRTool( 45 alias = "generate-validity-data", 46 url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml") 47 public class GenerateValidityXml { 48 49 private static final Validity VALIDITY = Validity.getInstance(); 50 private static Validity OLD_VALIDITY = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 51 52 private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG = StandardCodes.getEnumLstreg(); 53 private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 54 55 private static class MyAdder implements Adder { 56 Appendable target; 57 boolean twoCodePoints = false; 58 long lastCodePoint = -1; 59 60 @Override add(String start, String end)61 public void add(String start, String end) { 62 try { 63 long firstCodePoint = start.codePointAt(0); 64 if (twoCodePoints) { 65 firstCodePoint <<= 22; 66 firstCodePoint |= start.codePointAt(1); 67 } 68 if (firstCodePoint == lastCodePoint) { 69 target.append(' '); 70 } else { 71 target.append("\n\t\t\t"); 72 } 73 target.append(start); 74 if (end != null) { 75 target.append('~').append(end); 76 } 77 lastCodePoint = firstCodePoint; 78 } catch (IOException e) { 79 throw new ICUUncheckedIOException(e); 80 } 81 } 82 reset(boolean b)83 public void reset(boolean b) { 84 lastCodePoint = -1; 85 twoCodePoints = b; 86 } 87 } 88 89 static Set<String> containment = SDI.getContainers(); 90 static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region); 91 92 static class Info { 93 String mainComment; 94 //private Relation<Validity.Status, String> statusMap = Relation.of(new EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class); 95 Map<String, Validity.Status> codeToStatus = new TreeMap<>(); 96 Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class); 97 Set<String> newCodes = new TreeSet<>(); 98 99 static Map<String, Info> types = new LinkedHashMap<>(); 100 getInfo(String myType)101 static Info getInfo(String myType) { 102 Info info = types.get(myType); 103 if (info == null) { 104 types.put(myType, info = new Info()); 105 } 106 return info; 107 } getStatusMap()108 public SetMultimap<Status, String> getStatusMap() { 109 TreeMultimap<Status, String> result = TreeMultimap.create(); 110 Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result); 111 return ImmutableSetMultimap.copyOf(result); 112 } put(String key, Status value)113 public void put(String key, Status value) { 114 codeToStatus.put(key, value); 115 } remove(String key, Status value)116 public void remove(String key, Status value) { 117 codeToStatus.remove(key, value); 118 } clear()119 public void clear() { 120 codeToStatus.clear(); 121 } entrySet()122 public Set<Entry<String, Status>> entrySet() { 123 return codeToStatus.entrySet(); 124 } get(String key)125 public Status get(String key) { 126 return codeToStatus.get(key); 127 } putBest(String currency, Status newStatus)128 public void putBest(String currency, Status newStatus) { 129 Status oldStatus = get(currency); 130 if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) { 131 put(currency, newStatus); 132 } 133 } 134 } 135 136 static final Map<String, Info> types = Info.types; 137 main(String[] args)138 public static void main(String[] args) throws IOException { 139 140 doLstr(types); 141 doSubdivisions(types); 142 doCurrency(types); 143 // write file 144 MyAdder adder = new MyAdder(); 145 for (Entry<String, Info> entry : types.entrySet()) { 146 String type = entry.getKey(); 147 final Info info = entry.getValue(); 148 Multimap<Status, String> subtypeMap = info.getStatusMap(); 149 try (PrintWriter output = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "validity/" + type + ".xml")) { 150 adder.target = output; 151 output.append(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) 152 + "\t<version number=\"$Revision" /*hack to stop SVN changing this*/ + "$\"/>\n" 153 + "\t<idValidity>\n"); 154 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) { 155 Validity.Status subtype = entry2.getKey(); 156 Set<String> set = (Set<String>) entry2.getValue(); 157 String comment = info.statusComment.get(entry2.getKey()); 158 if (comment != null) { 159 output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n"); 160 } 161 output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>"); 162 final int size = set.size(); 163 output.append("\t\t<!-- " + size + " item" + (size > 1 ? "s" : "") // we know it’s English ;-) 164 + " -->"); 165 adder.reset(size > 600); // || type.equals("subdivision") 166 StringRange.compact(set, adder, true); 167 output.append("\n\t\t</id>\n"); 168 } 169 // if (!info.newCodes.isEmpty()) { 170 // output.append("\t\t<!-- Codes added this release:\n\t\t\t" + showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n"); 171 // } 172 output.append("\t</idValidity>\n</supplementalData>\n"); 173 } 174 } 175 // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ..."); 176 } 177 showCodes(Set<String> newCodes, String linePrefix)178 private static String showCodes(Set<String> newCodes, String linePrefix) { 179 StringBuilder result = new StringBuilder(); 180 String last = ""; 181 for (String s : newCodes) { 182 String newPrefix = s.substring(0, s.indexOf('-')); 183 if (last.equals(newPrefix)) { 184 result.append(" "); 185 } else { 186 if (!last.isEmpty()) { 187 result.append(linePrefix); 188 } 189 last = newPrefix; 190 } 191 result.append(s); 192 } 193 return result.toString(); 194 } 195 doCurrency(Map<String, Info> types)196 private static void doCurrency(Map<String, Info> types) { 197 Info info = Info.getInfo("currency"); 198 Date now = new Date(); 199 Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec 200 for (String region : SDI.getCurrencyTerritories()) { 201 for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) { 202 String currency = data.getCurrency(); 203 Date end = data.getEnd(); 204 boolean legalTender = data.isLegalTender(); 205 Status newStatus = end.after(eoy) && legalTender ? Status.regular : Status.deprecated; 206 info.putBest(currency, newStatus); 207 } 208 } 209 info.put(LstrType.currency.unknown, Status.unknown); 210 // make sure we don't overlap. 211 // we want to keep any code that is valid in any territory, so 212 info.remove("XXX", Status.deprecated); 213 info.remove("XXX", Status.regular); 214 215 // just to make sure info never disappears 216 Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency); 217 for (Entry<String, Status> entry : oldCodes.entrySet()) { 218 String key = entry.getKey(); 219 Status oldStatus = entry.getValue(); 220 Status newStatus = info.get(key); 221 if (!Objects.equal(oldStatus, newStatus)) { 222 System.out.println("Status changed: " + key + ", " + oldStatus + " => " + newStatus); 223 } 224 } 225 226 info.statusComment.put(Status.deprecated, 227 "Deprecated values are those that are not legal tender in some country after " + (1900 + now.getYear()) + ".\n" 228 + "More detailed usage information needed for some implementations is in supplemental data."); 229 } 230 doSubdivisions(Map<String, Info> types)231 private static void doSubdivisions(Map<String, Info> types) { 232 Info info = Info.getInfo("subdivision"); 233 Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision"); 234 for (String container : SDI.getContainersForSubdivisions()) { 235 for (String contained : SDI.getContainedSubdivisions(container)) { 236 Status status = aliases.containsKey(contained) ? Validity.Status.deprecated : Validity.Status.regular; 237 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status); 238 } 239 } 240 241 // find out which items were valid, but are no longer in the containment map 242 // add them as deprecated 243 Map<Status, Set<String>> oldSubdivisionData = OLD_VALIDITY.getStatusToCodes(LstrType.subdivision); 244 for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) { 245 for (String oldSdId : entry.getValue()) { 246 if (info.get(oldSdId) == null) { 247 info.put(oldSdId, Status.deprecated); 248 } 249 } 250 } 251 252 info.statusComment.put(Status.deprecated, 253 "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n" 254 + "It also include codes that were previously in CLDR, for compatibility."); 255 info.statusComment.put(Status.unknown, 256 "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes."); 257 } 258 doLstr(Map<String, Info> types)259 private static void doLstr(Map<String, Info> types) throws IOException { 260 Set<String> skippedScripts = new TreeSet<>(); 261 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) { 262 LstrType type = entry.getKey(); 263 if (!type.isLstr || !type.isUnicode) { 264 continue; 265 } 266 Info info = Info.getInfo(type.toString()); 267 Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get(type == LstrType.region ? "territory" : type.toString()); 268 if (aliases == null) { 269 System.out.println("No aliases for: " + type); 270 } 271 // gather data 272 info.clear(); 273 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 274 String code = entry2.getKey(); 275 if (type == LstrType.language && code.startsWith("bh")) { 276 int debug = 0; 277 } 278 Map<LstrField, String> data = entry2.getValue(); 279 Validity.Status subtype = Validity.Status.regular; 280 if (code.equals(type.unknown)) { 281 subtype = Validity.Status.unknown; 282 } else if (type.specials.contains(code)) { 283 subtype = Validity.Status.special; 284 } else if (aliases != null && aliases.containsKey(code)) { 285 subtype = Validity.Status.deprecated; 286 } else if (data.get(LstrField.Description).startsWith("Private use")) { 287 subtype = Validity.Status.private_use; 288 } 289 switch (type) { 290 case language: 291 if (code.equals("root")) { 292 continue; 293 } 294 break; 295 case region: 296 if (containment.contains(code)) { 297 subtype = Validity.Status.macroregion; 298 } else if (code.equals("XA") || code.equals("XB")) { 299 subtype = Validity.Status.special; 300 } 301 if (subtype == Status.regular) { 302 Info subInfo = Info.getInfo("subdivision"); 303 subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown); 304 } 305 break; 306 case script: 307 switch (code) { 308 case "Qaag": 309 case "Zsye": 310 subtype = Status.special; 311 break; 312 default: 313 if (subtype == Validity.Status.regular) { 314 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(code); 315 if (scriptInfo == null && !code.equals("Hrkt")) { 316 skippedScripts.add(code); 317 continue; 318 } 319 } 320 break; 321 } 322 break; 323 case variant: 324 if (VARIANT_EXTRAS.contains(code)) { 325 continue; 326 } 327 default: 328 break; 329 } 330 info.put(code, subtype); 331 } 332 } 333 System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts)); 334 } 335 336 static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); 337 } 338