• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.Arrays;
6 import java.util.HashMap;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeSet;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor;
15 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRPaths;
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 import org.unicode.cldr.util.SupplementalDataInfo;
20 import org.unicode.cldr.util.Validity;
21 import org.unicode.cldr.util.Validity.Status;
22 
23 import com.google.common.collect.HashMultimap;
24 import com.google.common.collect.Multimap;
25 import com.ibm.icu.impl.Relation;
26 import com.ibm.icu.impl.Row.R2;
27 
28 public class GenerateSubdivisions {
29     private static final String ISO_COUNTRY_CODES = CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/";
30     static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml";
31 
32 
33     // TODO: consider whether to use the last archive directory to generate
34     // There are pros and cons.
35     // Pros are that we don't introduce "fake" deprecated elements that are introduced and deprecated during the 6 month CLDR cycle
36     // Cons are that we may have to repeat work
37 
38 
39     static final class SubdivisionInfo {
40         static final SupplementalDataInfo SDI_LAST = SupplementalDataInfo.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/");
41 
42         static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER = SDI_LAST.getLocaleAliasInfo().get("subdivision");
43 
44         static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER = new SubdivisionNames("en", "main", "subdivisions");
45 
46         static final Validity VALIDITY_FORMER = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
47 
48         static final Relation<String, String> formerRegionToSubdivisions = Relation.of(new HashMap<String, Set<String>>(), TreeSet.class, CLDRConfig.getInstance().getComparatorRoot());
49         static {
50             Map<Status, Set<String>> oldSubdivisionData = VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision);
51             for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) {
52                 final Status status = e.getKey();
53                 if (status != Status.unknown) { // special is a hack
54                     for (String sdCode : e.getValue()) {
55                         final String region = SubdivisionNames.getRegionFromSubdivision(sdCode);
formerRegionToSubdivisions.put(region, sdCode)56                         formerRegionToSubdivisions.put(region, sdCode);
57                     }
58                 }
59             }
formerRegionToSubdivisions.freeze()60             formerRegionToSubdivisions.freeze();
61         }
62 
63         static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create();
64         static {
65             for (Entry<String, R2<List<String>, String>> entry : SUBDIVISION_ALIASES_FORMER.entrySet()) {
66                 String oldId = entry.getKey();
67                 for (String newId : entry.getValue().get0()) {
subdivisionIdToOld.put(newId, oldId)68                     subdivisionIdToOld.put(newId, oldId);
69                 }
70             }
71         }
72     }
73 
main(String[] args)74     public static void main(String[] args) throws IOException {
75         CLDRConfig.getInstance().getSupplementalDataInfo();
76         // TODO Restructure so that this call is done first to process the iso data
77         // then the extraction uses that data.
78         // also restructure the SubdivisionInfo to not be static
79         boolean preprocess = args.length > 0;
80         if (preprocess) {
81             for (String source : Arrays.asList(
82                 "2015-05-04_iso_country_code_ALL_xml",
83                 "2016-01-13_iso_country_code_ALL_xml",
84                 "2016-12-09_iso_country_code_ALL_xml",
85                 "2017-02-12_iso_country_code_ALL_xml",
86                 "2017-09-15_iso_country_code_ALL_xml",
87                 "2018-02-20_iso_country_code_ALL_xml",
88                 "2018-09-02_iso_country_code_ALL_xml",
89                 "2019-02-26_iso_country_code_ALL_xml",
90                 "2020-03-05_iso_country_code_ALL_xml"
91                 )) {
92                 SubdivisionSet sdset1 = new SubdivisionSet(CLDRPaths.CLDR_PRIVATE_DIRECTORY + source + "/iso_country_codes.xml");
93                 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) {
94                     sdset1.print(pw);
95                 }
96             }
97             return;
98         }
99 
100         SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES);
101         SubDivisionExtractor sdset = new SubDivisionExtractor(sdset1,
102             SubdivisionInfo.VALIDITY_FORMER,
103             SubdivisionInfo.SUBDIVISION_ALIASES_FORMER,
104             SubdivisionInfo.formerRegionToSubdivisions);
105 
106         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) {
107             sdset.printXml(pw);
108         }
109         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) {
110             sdset.printAliases(pw);
111         }
112         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) {
113             sdset.printEnglish(pw);
114         }
115         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) {
116             sdset.printSamples(pw);
117         }
118         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) {
119             sdset.printEnglishComp(pw);
120         }
121         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) {
122             sdset.printEnglishCompFull(pw);
123         }
124         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) {
125             sdset.printMissingMIDs(pw);
126         }
127     }
128 
129 }
130