1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.Arrays; 6 import java.util.HashMap; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeSet; 12 13 import org.unicode.cldr.draft.FileUtilities; 14 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor; 15 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet; 16 import org.unicode.cldr.util.CLDRPaths; 17 import org.unicode.cldr.util.StandardCodes.LstrType; 18 import org.unicode.cldr.util.SupplementalDataInfo; 19 import org.unicode.cldr.util.Validity; 20 import org.unicode.cldr.util.Validity.Status; 21 22 import com.google.common.collect.HashMultimap; 23 import com.google.common.collect.Multimap; 24 import com.ibm.icu.impl.Relation; 25 import com.ibm.icu.impl.Row.R2; 26 27 public class GenerateSubdivisions { 28 private static final String ISO_COUNTRY_CODES = CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/"; 29 static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml"; 30 31 32 // TODO: consider whether to use the last archive directory to generate 33 // There are pros and cons. 34 // Pros are that we don't introduce "fake" deprecated elements that are introduced and deprecated during the 6 month CLDR cycle 35 // Cons are that we may have to repeat work 36 37 38 static final class SubdivisionInfo { 39 static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/"); 40 41 static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER = SDI.getLocaleAliasInfo().get("subdivision"); 42 43 static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER = new SubdivisionNames("en"); 44 45 static final Validity VALIDITY_FORMER = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 46 47 static final Relation<String, String> formerRegionToSubdivisions = Relation.of(new HashMap<String, Set<String>>(), TreeSet.class, SubdivisionNode.ROOT_COL); 48 static { 49 Map<Status, Set<String>> oldSubdivisionData = VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision); 50 for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) { 51 final Status status = e.getKey(); 52 if (status != Status.unknown) { // special is a hack 53 for (String sdCode : e.getValue()) { 54 final String region = SubdivisionNames.getRegionFromSubdivision(sdCode); formerRegionToSubdivisions.put(region, sdCode)55 formerRegionToSubdivisions.put(region, sdCode); 56 } 57 } 58 } formerRegionToSubdivisions.freeze()59 formerRegionToSubdivisions.freeze(); 60 } 61 62 static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create(); 63 static { 64 for (Entry<String, R2<List<String>, String>> entry : SUBDIVISION_ALIASES_FORMER.entrySet()) { 65 String oldId = entry.getKey(); 66 for (String newId : entry.getValue().get0()) { subdivisionIdToOld.put(newId, oldId)67 subdivisionIdToOld.put(newId, oldId); 68 } 69 } 70 } 71 } 72 main(String[] args)73 public static void main(String[] args) throws IOException { 74 // TODO Restructure so that this call is done first to process the iso data 75 // then the extraction uses that data. 76 // also restructure the SubdivisionInfo to not be static 77 boolean preprocess = args.length > 0; 78 if (preprocess) { 79 for (String source : Arrays.asList( 80 "2015-05-04_iso_country_code_ALL_xml", 81 "2016-01-13_iso_country_code_ALL_xml", 82 "2016-12-09_iso_country_code_ALL_xml", 83 "2017-02-12_iso_country_code_ALL_xml", 84 "2017-09-15_iso_country_code_ALL_xml", 85 "2018-02-20_iso_country_code_ALL_xml", 86 "2018-09-02_iso_country_code_ALL_xml")) { 87 SubdivisionSet sdset1 = new SubdivisionSet(CLDRPaths.CLDR_PRIVATE_DIRECTORY + source + "/iso_country_codes.xml"); 88 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) { 89 sdset1.print(pw); 90 } 91 } 92 return; 93 } 94 95 SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES); 96 SubDivisionExtractor sdset = new SubDivisionExtractor(sdset1, 97 SubdivisionInfo.VALIDITY_FORMER, 98 SubdivisionInfo.SUBDIVISION_ALIASES_FORMER, 99 SubdivisionInfo.formerRegionToSubdivisions); 100 101 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) { 102 sdset.printXml(pw); 103 } 104 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) { 105 sdset.printAliases(pw); 106 } 107 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) { 108 sdset.printEnglish(pw); 109 } 110 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) { 111 sdset.printSamples(pw); 112 } 113 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) { 114 sdset.printEnglishComp(pw); 115 } 116 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) { 117 sdset.printEnglishCompFull(pw); 118 } 119 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) { 120 sdset.printMissingMIDs(pw); 121 } 122 } 123 124 } 125