1 package org.unicode.cldr.util; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.Arrays; 6 import java.util.Collections; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.Map; 11 import java.util.Set; 12 import java.util.TreeSet; 13 14 import com.google.common.base.Joiner; 15 import com.ibm.icu.text.UnicodeSet; 16 import com.ibm.icu.util.ICUUncheckedIOException; 17 18 public class IsoRegionData { 19 static Map<String, String> _numeric = new HashMap<>(); 20 static Map<String, String> _alpha3 = new HashMap<>(); 21 static Map<String, String> _fips10 = new HashMap<>(); 22 static Map<String, String> _internet = new HashMap<>(); 23 static Set<String> other_internet = new TreeSet<>(); 24 static Set<String> available = new HashSet<>(); 25 26 static final UnicodeSet NMTOKEN = new UnicodeSet( 27 "[\\-.0-\\:A-Z_a-z\\u00B7\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u037D\\u037F-\\u1FFF\\u200C\\u200D\\u203F\\u2040\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD\\U00010000-\\U000EFFFF]") 28 .freeze(); 29 30 static { 31 /* 32 * # RFC3066; UN Numeric; ISO3166 Alpha-3, internet, FIPS-10 33 * # whitespace delimited: - for empty 34 * # See http://unstats.un.org/unsd/methods/m49/m49regin.htm 35 * # and http://www.iso.org/iso/en/prods-services/iso3166ma/01whats-new/index.html 36 * # See also http://www.cia.gov/cia/publications/factbook/appendix/appendix-d.html 37 * # and http://data.iana.org/TLD/tlds-alpha-by-domain.txt for the latest domains 38 * # and http://www.iana.org/cctld/cctld-whois.htm 39 * # and https://www.icmwg.org/ccwg/documents/ISO3166-FIPS10-A2-Mapping/3166-1-A2--to-FIPS10-A2-mapping.htm 40 * # for FIPS: http://earth-info.nga.mil/gns/html/fips_files.html 41 * RS 688 SRB rs RB 42 */ 43 try { 44 BufferedReader codes; 45 codes = CldrUtility.getUTF8Data("tlds-alpha-by-domain.txt"); 46 47 while (true) { 48 String line = codes.readLine(); 49 if (line == null) 50 break; 51 line = line.split("#")[0].trim(); 52 if (line.length() == 0) 53 continue; 54 // if (line.startsWith("XN--")) { 55 // try { 56 // line = Punycode.decode(line.substring(4), null).toString(); 57 // if (!NMTOKEN.containsAll(line)) { 58 // System.err.println("!NMTOKEN:" + line); 59 // continue; 60 // } 61 // } catch (StringPrepParseException e) { 62 // throw new IllegalArgumentException(e); 63 // } 64 // } 65 other_internet.add(line); 66 } codes.close()67 codes.close(); 68 69 Set<String> errors = new LinkedHashSet<>(); 70 codes = CldrUtility.getUTF8Data("territory_codes.txt"); 71 while (true) { 72 String line = codes.readLine(); 73 if (line == null) 74 break; 75 line = line.split("#")[0].trim(); 76 if (line.length() == 0) 77 continue; 78 String[] sourceValues = line.split("\\s+"); 79 String[] values = new String[5]; 80 for (int i = 0; i < values.length; ++i) { 81 if (i >= sourceValues.length || sourceValues[i].equals("-")) { 82 values[i] = null; 83 } else { 84 values[i] = sourceValues[i]; 85 } 86 } 87 String alpha2 = values[0]; 88 String numeric = values[1]; 89 String alpha3 = values[2]; 90 String internet = values[3]; 91 if (internet != null) { 92 internet = internet.toUpperCase(); 93 LinkedHashSet<String> internetStrings = new LinkedHashSet<>( 94 Arrays.asList(internet.split("/"))); 95 if (!other_internet.containsAll(internetStrings)) { 96 errors.addAll(internetStrings); 97 errors.removeAll(other_internet); 98 } 99 other_internet.removeAll(internetStrings); 100 internet = Joiner.on(" ").join(internetStrings); 101 } 102 String fips10 = values[4]; _numeric.put(alpha2, numeric)103 _numeric.put(alpha2, numeric); _alpha3.put(alpha2, alpha3)104 _alpha3.put(alpha2, alpha3); _fips10.put(alpha2, fips10)105 _fips10.put(alpha2, fips10); _internet.put(alpha2, internet)106 _internet.put(alpha2, internet); 107 } codes.close()108 codes.close(); 109 if (errors.size() != 0) { 110 throw new IllegalArgumentException("Internet values illegal: " + errors); 111 } 112 } catch (IOException e) { 113 throw new ICUUncheckedIOException(e); 114 } 115 _internet.put("ZZ", Joiner.on(" ").join(other_internet)); 116 117 other_internet = Collections.unmodifiableSet(other_internet); 118 _numeric.keySet()119 available.addAll(_numeric.keySet()); _alpha3.keySet()120 available.addAll(_alpha3.keySet()); _fips10.keySet()121 available.addAll(_fips10.keySet()); _internet.keySet()122 available.addAll(_internet.keySet()); 123 124 _numeric = Collections.unmodifiableMap(_numeric); 125 _alpha3 = Collections.unmodifiableMap(_alpha3); 126 _fips10 = Collections.unmodifiableMap(_fips10); 127 _internet = Collections.unmodifiableMap(_internet); 128 available = Collections.unmodifiableSet(available); 129 } 130 getNumeric(String countryCodeAlpha2)131 public static String getNumeric(String countryCodeAlpha2) { 132 return _numeric.get(countryCodeAlpha2); 133 } 134 get_alpha3(String countryCodeAlpha2)135 public static String get_alpha3(String countryCodeAlpha2) { 136 return _alpha3.get(countryCodeAlpha2); 137 } 138 get_fips10(String countryCodeAlpha2)139 public static String get_fips10(String countryCodeAlpha2) { 140 return _fips10.get(countryCodeAlpha2); 141 } 142 get_internet(String countryCodeAlpha2)143 public static String get_internet(String countryCodeAlpha2) { 144 return _internet.get(countryCodeAlpha2); 145 } 146 getOtherInternet()147 public static Set<String> getOtherInternet() { 148 return other_internet; 149 } 150 getAvailable()151 public static Set<String> getAvailable() { 152 return available; 153 } 154 } 155