1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.lang.invoke.MethodHandles; 5 import java.util.Collection; 6 import java.util.Date; 7 import java.util.EnumMap; 8 import java.util.LinkedHashMap; 9 import java.util.List; 10 import java.util.Locale; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 17 import org.unicode.cldr.draft.ScriptMetadata; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.CLDRTool; 20 import org.unicode.cldr.util.DtdType; 21 import org.unicode.cldr.util.StandardCodes; 22 import org.unicode.cldr.util.StandardCodes.LstrField; 23 import org.unicode.cldr.util.StandardCodes.LstrType; 24 import org.unicode.cldr.util.StringRange; 25 import org.unicode.cldr.util.StringRange.Adder; 26 import org.unicode.cldr.util.SupplementalDataInfo; 27 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 28 import org.unicode.cldr.util.TempPrintWriter; 29 import org.unicode.cldr.util.Validity; 30 import org.unicode.cldr.util.Validity.Status; 31 32 import com.google.common.base.Joiner; 33 import com.google.common.base.Objects; 34 import com.google.common.collect.ImmutableSet; 35 import com.google.common.collect.ImmutableSetMultimap; 36 import com.google.common.collect.Multimap; 37 import com.google.common.collect.Multimaps; 38 import com.google.common.collect.SetMultimap; 39 import com.google.common.collect.TreeMultimap; 40 import com.ibm.icu.impl.Row.R2; 41 import com.ibm.icu.util.ICUUncheckedIOException; 42 43 @CLDRTool( 44 alias = "generate-validity-data", 45 url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml") 46 public class GenerateValidityXml { 47 48 private static final Validity VALIDITY = Validity.getInstance(); 49 private static Validity OLD_VALIDITY = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 50 51 private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG = StandardCodes.getEnumLstreg(); 52 private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 53 54 private static class MyAdder implements Adder { 55 Appendable target; 56 boolean twoCodePoints = false; 57 long lastCodePoint = -1; 58 59 @Override add(String start, String end)60 public void add(String start, String end) { 61 try { 62 long firstCodePoint = start.codePointAt(0); 63 if (twoCodePoints) { 64 firstCodePoint <<= 22; 65 firstCodePoint |= start.codePointAt(1); 66 } 67 if (firstCodePoint == lastCodePoint) { 68 target.append(' '); 69 } else { 70 target.append("\n\t\t\t"); 71 } 72 target.append(start); 73 if (end != null) { 74 target.append('~').append(end); 75 } 76 lastCodePoint = firstCodePoint; 77 } catch (IOException e) { 78 throw new ICUUncheckedIOException(e); 79 } 80 } 81 reset(boolean b)82 public void reset(boolean b) { 83 lastCodePoint = -1; 84 twoCodePoints = b; 85 } 86 } 87 88 static Set<String> containment = SDI.getContainers(); 89 static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region); 90 91 static class Info { 92 String mainComment; 93 //private Relation<Validity.Status, String> statusMap = Relation.of(new EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class); 94 Map<String, Validity.Status> codeToStatus = new TreeMap<>(); 95 Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class); 96 Set<String> newCodes = new TreeSet<>(); 97 98 static Map<String, Info> types = new LinkedHashMap<>(); 99 getInfo(String myType)100 static Info getInfo(String myType) { 101 Info info = types.get(myType); 102 if (info == null) { 103 types.put(myType, info = new Info()); 104 } 105 return info; 106 } getStatusMap()107 public SetMultimap<Status, String> getStatusMap() { 108 TreeMultimap<Status, String> result = TreeMultimap.create(); 109 Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result); 110 return ImmutableSetMultimap.copyOf(result); 111 } put(String key, Status value)112 public void put(String key, Status value) { 113 codeToStatus.put(key, value); 114 } remove(String key, Status value)115 public void remove(String key, Status value) { 116 codeToStatus.remove(key, value); 117 } clear()118 public void clear() { 119 codeToStatus.clear(); 120 } entrySet()121 public Set<Entry<String, Status>> entrySet() { 122 return codeToStatus.entrySet(); 123 } get(String key)124 public Status get(String key) { 125 return codeToStatus.get(key); 126 } putBest(String currency, Status newStatus)127 public void putBest(String currency, Status newStatus) { 128 Status oldStatus = get(currency); 129 if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) { 130 put(currency, newStatus); 131 } 132 } 133 } 134 135 static final Map<String, Info> types = Info.types; 136 main(String[] args)137 public static void main(String[] args) throws IOException { 138 139 doLstr(types); 140 doSubdivisions(types); 141 doCurrency(types); 142 // write file 143 MyAdder adder = new MyAdder(); 144 for (Entry<String, Info> entry : types.entrySet()) { 145 String type = entry.getKey(); 146 final Info info = entry.getValue(); 147 Multimap<Status, String> subtypeMap = info.getStatusMap(); 148 try (TempPrintWriter output = TempPrintWriter.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY, "validity/" + type + ".xml")) { 149 adder.target = output; 150 output.append(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) 151 + "\t<version number=\"$Revision" + "$\"/>\n" 152 + "\t<idValidity>\n"); 153 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) { 154 Validity.Status subtype = entry2.getKey(); 155 Set<String> set = (Set<String>) entry2.getValue(); 156 String comment = info.statusComment.get(entry2.getKey()); 157 if (comment != null) { 158 output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n"); 159 } 160 output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>"); 161 final int size = set.size(); 162 output.append("\t\t<!-- " + size + " item" + (size > 1 ? "s" : "") // we know it’s English ;-) 163 + " -->"); 164 adder.reset(size > 600); // || type.equals("subdivision") 165 StringRange.compact(set, adder, true); 166 output.append("\n\t\t</id>\n"); 167 } 168 // if (!info.newCodes.isEmpty()) { 169 // output.append("\t\t<!-- Codes added this release:\n\t\t\t" + showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n"); 170 // } 171 output.append("\t</idValidity>\n</supplementalData>\n"); 172 } 173 } 174 // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ..."); 175 } 176 showCodes(Set<String> newCodes, String linePrefix)177 private static String showCodes(Set<String> newCodes, String linePrefix) { 178 StringBuilder result = new StringBuilder(); 179 String last = ""; 180 for (String s : newCodes) { 181 String newPrefix = s.substring(0, s.indexOf('-')); 182 if (last.equals(newPrefix)) { 183 result.append(" "); 184 } else { 185 if (!last.isEmpty()) { 186 result.append(linePrefix); 187 } 188 last = newPrefix; 189 } 190 result.append(s); 191 } 192 return result.toString(); 193 } 194 doCurrency(Map<String, Info> types)195 private static void doCurrency(Map<String, Info> types) { 196 Info info = Info.getInfo("currency"); 197 Date now = new Date(); 198 Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec 199 for (String region : SDI.getCurrencyTerritories()) { 200 for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) { 201 String currency = data.getCurrency(); 202 Date end = data.getEnd(); 203 boolean legalTender = data.isLegalTender(); 204 Status newStatus = end.after(eoy) && legalTender ? Status.regular : Status.deprecated; 205 info.putBest(currency, newStatus); 206 } 207 } 208 info.put(LstrType.currency.unknown, Status.unknown); 209 // make sure we don't overlap. 210 // we want to keep any code that is valid in any territory, so 211 info.remove("XXX", Status.deprecated); 212 info.remove("XXX", Status.regular); 213 214 // just to make sure info never disappears 215 Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency); 216 for (Entry<String, Status> entry : oldCodes.entrySet()) { 217 String key = entry.getKey(); 218 Status oldStatus = entry.getValue(); 219 Status newStatus = info.get(key); 220 if (!Objects.equal(oldStatus, newStatus)) { 221 System.out.println("Status changed: " + key + ", " + oldStatus + " => " + newStatus); 222 } 223 } 224 225 info.statusComment.put(Status.deprecated, 226 "Deprecated values are those that are not legal tender in some country after " + (1900 + now.getYear()) + ".\n" 227 + "More detailed usage information needed for some implementations is in supplemental data."); 228 } 229 doSubdivisions(Map<String, Info> types)230 private static void doSubdivisions(Map<String, Info> types) { 231 Info info = Info.getInfo("subdivision"); 232 Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision"); 233 for (String container : SDI.getContainersForSubdivisions()) { 234 for (String contained : SDI.getContainedSubdivisions(container)) { 235 Status status = aliases.containsKey(contained) ? Validity.Status.deprecated : Validity.Status.regular; 236 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status); 237 } 238 } 239 240 // find out which items were valid, but are no longer in the containment map 241 // add them as deprecated 242 Map<Status, Set<String>> oldSubdivisionData = OLD_VALIDITY.getStatusToCodes(LstrType.subdivision); 243 for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) { 244 for (String oldSdId : entry.getValue()) { 245 if (info.get(oldSdId) == null) { 246 info.put(oldSdId, Status.deprecated); 247 } 248 } 249 } 250 251 info.statusComment.put(Status.deprecated, 252 "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n" 253 + "It also include codes that were previously in CLDR, for compatibility."); 254 info.statusComment.put(Status.unknown, 255 "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes."); 256 } 257 doLstr(Map<String, Info> types)258 private static void doLstr(Map<String, Info> types) throws IOException { 259 Set<String> skippedScripts = new TreeSet<>(); 260 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) { 261 LstrType type = entry.getKey(); 262 if (!type.isLstr || !type.isUnicode) { 263 continue; 264 } 265 Info info = Info.getInfo(type.toString()); 266 Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get(type == LstrType.region ? "territory" : type.toString()); 267 if (aliases == null) { 268 System.out.println("No aliases for: " + type); 269 } 270 // gather data 271 info.clear(); 272 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 273 String code = entry2.getKey(); 274 if (type == LstrType.language && code.equals("aam") 275 || type == LstrType.variant && code.equals("arevela") 276 || type == LstrType.extlang && code.equals("lsg") 277 ) { 278 int debug = 0; 279 } 280 Map<LstrField, String> data = entry2.getValue(); 281 Validity.Status subtype = Validity.Status.regular; 282 if (code.equals(type.unknown)) { 283 subtype = Validity.Status.unknown; 284 } else if (type.specials.contains(code)) { 285 subtype = Validity.Status.special; 286 } else if (aliases != null && aliases.containsKey(code) 287 || data.containsKey(LstrField.Deprecated)) { 288 subtype = Validity.Status.deprecated; 289 } else if (data.get(LstrField.Description).startsWith("Private use")) { 290 subtype = Validity.Status.private_use; 291 } 292 switch (type) { 293 case language: 294 if (subtype == Status.private_use && code.compareTo("qfz") < 0) { 295 subtype = Status.reserved; 296 } else if (code.equals("root")) { 297 continue; 298 } 299 break; 300 case region: 301 if (containment.contains(code)) { 302 subtype = Validity.Status.macroregion; 303 } else if (code.equals("XA") || code.equals("XB")) { 304 subtype = Validity.Status.special; 305 } 306 switch (subtype) { 307 case regular: 308 Info subInfo = Info.getInfo("subdivision"); 309 subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown); 310 break; 311 case private_use: 312 if (code.compareTo("X") < 0) { 313 subtype = Status.reserved; 314 } 315 break; 316 default: 317 break; 318 } 319 break; 320 case script: 321 switch (code) { 322 case "Aran": 323 case "Qaag": 324 case "Zsye": 325 case "Zanb": 326 case "Zinh": 327 case "Zyyy": 328 subtype = Status.special; 329 break; 330 default: 331 switch (subtype) { 332 case private_use: 333 if (code.compareTo("Qaaq") < 0) { 334 subtype = Validity.Status.reserved; 335 } 336 break; 337 case regular: 338 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(code); 339 if (scriptInfo == null && !code.equals("Hrkt")) { 340 skippedScripts.add(code); 341 continue; 342 } 343 break; 344 default: // don't care about rest 345 break; 346 } 347 break; 348 } 349 break; 350 case variant: 351 if (VARIANT_EXTRAS.contains(code)) { 352 continue; 353 } 354 default: 355 break; 356 } 357 info.put(code, subtype); 358 } 359 } 360 System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts)); 361 } 362 363 static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); 364 } 365