1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.Arrays; 6 import java.util.HashMap; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeSet; 12 13 import org.unicode.cldr.draft.FileUtilities; 14 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor; 15 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRPaths; 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 import org.unicode.cldr.util.SupplementalDataInfo; 20 import org.unicode.cldr.util.Validity; 21 import org.unicode.cldr.util.Validity.Status; 22 23 import com.google.common.collect.HashMultimap; 24 import com.google.common.collect.Multimap; 25 import com.ibm.icu.impl.Relation; 26 import com.ibm.icu.impl.Row.R2; 27 28 public class GenerateSubdivisions { 29 private static final String ISO_COUNTRY_CODES = CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/"; 30 static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml"; 31 32 33 // TODO: consider whether to use the last archive directory to generate 34 // There are pros and cons. 35 // Pros are that we don't introduce "fake" deprecated elements that are introduced and deprecated during the 6 month CLDR cycle 36 // Cons are that we may have to repeat work 37 38 39 static final class SubdivisionInfo { 40 static final SupplementalDataInfo SDI_LAST = SupplementalDataInfo.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/"); 41 42 static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER = SDI_LAST.getLocaleAliasInfo().get("subdivision"); 43 44 static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER = new SubdivisionNames("en", "main", "subdivisions"); 45 46 static final Validity VALIDITY_FORMER = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 47 48 static final Relation<String, String> formerRegionToSubdivisions = Relation.of(new HashMap<String, Set<String>>(), TreeSet.class, CLDRConfig.getInstance().getComparatorRoot()); 49 static { 50 Map<Status, Set<String>> oldSubdivisionData = VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision); 51 for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) { 52 final Status status = e.getKey(); 53 if (status != Status.unknown) { // special is a hack 54 for (String sdCode : e.getValue()) { 55 final String region = SubdivisionNames.getRegionFromSubdivision(sdCode); formerRegionToSubdivisions.put(region, sdCode)56 formerRegionToSubdivisions.put(region, sdCode); 57 } 58 } 59 } formerRegionToSubdivisions.freeze()60 formerRegionToSubdivisions.freeze(); 61 } 62 63 static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create(); 64 static { 65 for (Entry<String, R2<List<String>, String>> entry : SUBDIVISION_ALIASES_FORMER.entrySet()) { 66 String oldId = entry.getKey(); 67 for (String newId : entry.getValue().get0()) { subdivisionIdToOld.put(newId, oldId)68 subdivisionIdToOld.put(newId, oldId); 69 } 70 } 71 } 72 } 73 main(String[] args)74 public static void main(String[] args) throws IOException { 75 CLDRConfig.getInstance().getSupplementalDataInfo(); 76 // TODO Restructure so that this call is done first to process the iso data 77 // then the extraction uses that data. 78 // also restructure the SubdivisionInfo to not be static 79 boolean preprocess = args.length > 0; 80 if (preprocess) { 81 for (String source : Arrays.asList( 82 "2015-05-04_iso_country_code_ALL_xml", 83 "2016-01-13_iso_country_code_ALL_xml", 84 "2016-12-09_iso_country_code_ALL_xml", 85 "2017-02-12_iso_country_code_ALL_xml", 86 "2017-09-15_iso_country_code_ALL_xml", 87 "2018-02-20_iso_country_code_ALL_xml", 88 "2018-09-02_iso_country_code_ALL_xml", 89 "2019-02-26_iso_country_code_ALL_xml", 90 "2020-03-05_iso_country_code_ALL_xml" 91 )) { 92 SubdivisionSet sdset1 = new SubdivisionSet(CLDRPaths.CLDR_PRIVATE_DIRECTORY + source + "/iso_country_codes.xml"); 93 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) { 94 sdset1.print(pw); 95 } 96 } 97 return; 98 } 99 100 SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES); 101 SubDivisionExtractor sdset = new SubDivisionExtractor(sdset1, 102 SubdivisionInfo.VALIDITY_FORMER, 103 SubdivisionInfo.SUBDIVISION_ALIASES_FORMER, 104 SubdivisionInfo.formerRegionToSubdivisions); 105 106 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) { 107 sdset.printXml(pw); 108 } 109 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) { 110 sdset.printAliases(pw); 111 } 112 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) { 113 sdset.printEnglish(pw); 114 } 115 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) { 116 sdset.printSamples(pw); 117 } 118 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) { 119 sdset.printEnglishComp(pw); 120 } 121 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) { 122 sdset.printEnglishCompFull(pw); 123 } 124 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) { 125 sdset.printMissingMIDs(pw); 126 } 127 } 128 129 } 130