1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.text.Transform; 6 import com.ibm.icu.text.Transliterator; 7 import com.ibm.icu.util.ICUUncheckedIOException; 8 import com.ibm.icu.util.Output; 9 import com.ibm.icu.util.ULocale; 10 import java.io.BufferedReader; 11 import java.io.IOException; 12 import java.nio.charset.Charset; 13 import java.nio.charset.StandardCharsets; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.Collections; 17 import java.util.HashMap; 18 import java.util.HashSet; 19 import java.util.LinkedHashSet; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import org.unicode.cldr.tool.CountryCodeConverter; 27 import org.unicode.cldr.tool.ToolConfig; 28 import org.unicode.cldr.util.ChainedMap.M3; 29 30 public class Unlocode { 31 32 private static final Charset LATIN1 = Charset.forName("ISO8859-1"); 33 34 public interface Mergeable<T> { merge(T a)35 T merge(T a); 36 } 37 38 public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> { 39 public final Set<String> names; 40 Iso3166_2Data(String... name)41 public Iso3166_2Data(String... name) { 42 this(Arrays.asList(name)); 43 } 44 Iso3166_2Data(Collection<String> names)45 public Iso3166_2Data(Collection<String> names) { 46 this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names)); 47 } 48 49 @Override toString()50 public String toString() { 51 return names.toString(); 52 } 53 54 @Override equals(Object obj)55 public boolean equals(Object obj) { 56 return names.equals(obj); 57 } 58 59 @Override hashCode()60 public int hashCode() { 61 return names.hashCode(); 62 } 63 64 @Override merge(Iso3166_2Data b)65 public Iso3166_2Data merge(Iso3166_2Data b) { 66 LinkedHashSet<String> set = new LinkedHashSet<>(names); 67 set.addAll(b.names); 68 return new Iso3166_2Data(set); 69 } 70 } 71 72 public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> { 73 public final String locode; 74 public final Set<String> names; 75 public final String subdivision; 76 public final float north; 77 public final float east; 78 LocodeData(String locode, String name, String subdivision, float north, float east)79 public LocodeData(String locode, String name, String subdivision, float north, float east) { 80 this(locode, Arrays.asList(name), subdivision, north, east); 81 } 82 LocodeData( String locode, Collection<String> names, String subdivision, float north, float east)83 public LocodeData( 84 String locode, 85 Collection<String> names, 86 String subdivision, 87 float north, 88 float east) { 89 this.locode = locode; 90 this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names)); 91 this.subdivision = subdivision; 92 this.north = north; 93 this.east = east; 94 } 95 96 @Override toString()97 public String toString() { 98 return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east; 99 } 100 101 /** Warning, must never have locode datas with the same locode and different other data. */ 102 @Override compareTo(LocodeData o)103 public int compareTo(LocodeData o) { 104 // TODO Auto-generated method stub 105 return locode.compareTo(o.locode); 106 } 107 108 /** Warning, must never have locode datas with the same locode and different other data. */ 109 @Override equals(Object obj)110 public boolean equals(Object obj) { 111 LocodeData other = (LocodeData) obj; 112 return locode.equals(other.locode); 113 } 114 115 @Override hashCode()116 public int hashCode() { 117 return locode.hashCode(); 118 } 119 120 @Override merge(LocodeData other)121 public LocodeData merge(LocodeData other) { 122 if (locode.equals(other.locode) 123 && subdivision.equals(other.subdivision) 124 && north == other.north 125 && east == other.east) { 126 LinkedHashSet<String> set = new LinkedHashSet<>(names); 127 set.addAll(other.names); 128 return new LocodeData(locode, set, subdivision, north, east); 129 } 130 throw new IllegalArgumentException("Can't merge " + this + " with " + other); 131 } 132 } 133 134 static Map<String, LocodeData> locodeToData = new HashMap<>(); 135 static Relation<String, LocodeData> nameToLocodeData = 136 Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class); 137 static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<>(); 138 static Relation<String, String> ERRORS = 139 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 140 141 static { 142 // read the data 143 try { loadIso()144 loadIso(); 145 iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data); 146 load(1); 147 load(2); 148 load(3); 149 // load exceptions 150 try { 151 BufferedReader br = 152 FileReaders.openFile( 153 CldrUtility.class, "data/external/alternate_locode_name.txt"); 154 while (true) { 155 String line = br.readLine(); 156 if (line == null) { 157 break; 158 } 159 int hash = line.indexOf('#'); 160 if (hash >= 0) { 161 line = line.substring(0, hash); 162 } 163 line = line.trim(); 164 if (line.isEmpty()) { 165 continue; 166 } 167 if (line.equals("EOF")) { 168 break; 169 } 170 String[] parts = line.split("\\s*;\\s*"); 171 // System.out.println(Arrays.asList(parts)); 172 String locode = parts[0].replace(" ", ""); 173 if (locode.length() != 5) { 174 throw new IllegalArgumentException(line); 175 } 176 String alternateName = parts[1]; 177 LocodeData locodeData = locodeToData.get(locode); putCheckingDuplicate( locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))178 putCheckingDuplicate( 179 locodeToData, 180 locode, 181 new LocodeData( 182 locode, 183 alternateName, 184 locodeData.subdivision, 185 locodeData.north, 186 locodeData.east)); 187 } br.close()188 br.close(); 189 } catch (IOException e) { 190 throw new ICUUncheckedIOException(e); 191 } 192 for (LocodeData s : locodeToData.values()) { 193 for (String name : s.names) { nameToLocodeData.put(name, s)194 nameToLocodeData.put(name, s); 195 } 196 } nameToLocodeData.freeze()197 nameToLocodeData.freeze(); 198 locodeToData = Collections.unmodifiableMap(locodeToData); ERRORS.freeze()199 ERRORS.freeze(); 200 } catch (IOException e) { 201 } 202 } 203 204 /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf 205 // 206 * 0 , 207 * 1 "AD", 208 * 2 "SJL", 209 * 3 "Sant Julià de Lòria", 210 * 4 "Sant Julia de Loria", 211 * 5 ?, 212 * 6 "--3-----", 213 * 7 "RL", 214 * 8 "1101", 215 * 9 , 216 * 10 "4228N 00130E","" 217 0 Column Change 218 X Marked for deletion in the next issue 219 1 Country code 220 "XZ" - no country 221 2 Column LOCODE 222 3 Column Name 223 4 Column Name Without Diacritics 224 5 Column Subdivision 225 6 Column Function 226 7 Column Status 227 8 Column Date 228 9 Column IATA 229 10 Latitude/Longitude 230 Torbay: 47°39′N 052°44′W "4739N 05244W" 231 */ 232 233 // public static class FieldData<K extends Enum<K>> { 234 // private List<EnumMap<K,String>> data; 235 // public FieldData(Class<K> classInstance, BufferedReader r, String filename) { 236 // data = new ArrayList<EnumMap<K,String>>(); 237 // FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() { 238 // @Override 239 // protected boolean handleLine(int lineCount, String line) { 240 // // TODO Auto-generated method stub 241 // return super.handleLine(lineCount, line); 242 // } 243 // }; 244 // myReader.process(r, filename); 245 // //new EnumMap<K, String>(classInstance); 246 // } 247 // } 248 249 enum SubdivisionFields { 250 Subdivision_category, 251 Code_3166_2, 252 Subdivision_name, 253 Language_code, 254 Romanization_system, 255 Parent_subdivision 256 } 257 loadIso()258 public static void loadIso() throws IOException { 259 BufferedReader br = 260 FileReaders.openFile( 261 CldrUtility.class, 262 "data/external/subdivisionData.txt", 263 StandardCharsets.UTF_8); 264 while (true) { 265 // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB 266 // Romanization system TAB Parent subdivision 267 268 String line = br.readLine(); 269 if (line == null) { 270 break; 271 } 272 int hash = line.indexOf('#'); 273 if (hash >= 0) { 274 line = line.substring(0, hash); 275 } 276 if (line.trim().isEmpty()) { 277 continue; 278 } 279 String[] list = line.split("\t"); 280 String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim(); 281 if (locode.endsWith("*")) { 282 locode = locode.substring(0, locode.length() - 1); 283 } 284 String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim(); 285 // if (!locode.contains("-")) { 286 // //System.out.println("*skipping: " + locode); 287 // continue; 288 // } 289 // 290 // String names = list[5]; 291 // String[] name = names.split("\\+"); 292 // String bestName = null; 293 // for (String namePair : name) { 294 // if (bestName == null) { 295 // bestName = namePair.split("=")[1]; 296 // } else if (namePair.startsWith("en=")) { 297 // bestName = namePair.split("=")[1]; 298 // break; 299 // } 300 // } 301 // System.out.println("\t" + locode + "\t" + bestName + "\t\t\t"); 302 303 putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName)); 304 } 305 br.close(); 306 } 307 load(int file)308 public static void load(int file) throws IOException { 309 BufferedReader br = 310 // CldrUtility.getUTF8Data( 311 FileReaders.openFile( 312 CldrUtility.class, 313 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv", 314 LATIN1); 315 M3<String, String, Boolean> nameToAlternate = 316 ChainedMap.of( 317 new TreeMap<String, Object>(), 318 new TreeMap<String, Object>(), 319 Boolean.class); 320 Output<String> tempOutput = new Output<>(); 321 322 String oldCountryCode = null; 323 while (true) { 324 String line = br.readLine(); 325 if (line == null) { 326 break; 327 } 328 line = line.trim(); 329 if (line.isEmpty()) { 330 continue; 331 } 332 String[] list = CldrUtility.splitCommaSeparated(line); 333 String change = list[0]; 334 String locSuffix = list[2]; 335 if (change.equals("X")) { 336 continue; 337 } 338 String countryCode = list[1]; 339 if (!countryCode.equals(oldCountryCode)) { 340 nameToAlternate.clear(); 341 oldCountryCode = countryCode; 342 } 343 String name = list[3]; 344 String name2 = list[4]; 345 346 if (change.equals("=")) { 347 String[] names = name.split("\\s*=\\s*"); 348 if (names.length != 2) { 349 throw new IllegalArgumentException(); 350 } 351 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 352 if (!name.equals(name2)) { 353 names = name2.split("\\s*=\\s*"); 354 if (names.length != 2) { 355 throw new IllegalArgumentException(); 356 } 357 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 358 } 359 continue; 360 } 361 if (locSuffix.isEmpty()) { 362 if (!name.startsWith(".")) { 363 // System.out.println("*** Skipping " + line); 364 } 365 continue; 366 } 367 368 name = removeParens(name, tempOutput); 369 String name3 = tempOutput.value; 370 name2 = removeParens(name2, tempOutput); 371 String name4 = tempOutput.value; 372 373 String subdivision = list[5]; 374 if (!subdivision.isEmpty()) { 375 subdivision = countryCode + "-" + subdivision; 376 if (getIso3166_2Data(subdivision) == null) { 377 ERRORS.put( 378 subdivision, "Missing subdivision " + subdivision + " on line " + line); 379 } 380 } 381 String latLong = list[10]; 382 float latN = 0; 383 float longE = 0; 384 if (!latLong.isEmpty()) { 385 String[] latlong = latLong.split(" "); 386 latN = parse(latlong[0]); 387 longE = parse(latlong[1]); 388 } 389 String locode = countryCode + locSuffix; 390 LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE); 391 putCheckingDuplicate(locodeToData, locode, locodeData); 392 Map<String, Boolean> alternates = nameToAlternate.get(name); 393 if (alternates != null) { 394 for (String alt : alternates.keySet()) { 395 putCheckingDuplicate( 396 locodeToData, 397 locode, 398 new LocodeData(locode, alt, subdivision, latN, longE)); 399 } 400 } 401 if (!name2.equals(name)) { 402 putCheckingDuplicate( 403 locodeToData, 404 locode, 405 new LocodeData(locode, name2, subdivision, latN, longE)); 406 alternates = nameToAlternate.get(name2); 407 if (alternates != null) { 408 for (String alt : alternates.keySet()) { 409 putCheckingDuplicate( 410 locodeToData, 411 locode, 412 new LocodeData(locode, alt, subdivision, latN, longE)); 413 } 414 } 415 } 416 if (name3 != null) { 417 putCheckingDuplicate( 418 locodeToData, 419 locode, 420 new LocodeData(locode, name3, subdivision, latN, longE)); 421 } 422 if (name4 != null && !name4.equals(name3)) { 423 putCheckingDuplicate( 424 locodeToData, 425 locode, 426 new LocodeData(locode, name4, subdivision, latN, longE)); 427 } 428 } 429 br.close(); 430 } 431 removeParens(String name, Output<String> tempOutput)432 public static String removeParens(String name, Output<String> tempOutput) { 433 int paren = name.indexOf("("); 434 tempOutput.value = null; 435 if (paren > 0) { 436 int paren2 = name.indexOf(")", paren); 437 if (paren2 < 0) { 438 paren2 = name.length(); 439 } 440 // if the parens start with (ex, then it appears to be a safe alias. 441 // if not, we don't know, since the UN format is ambiguous 442 // sometimes yes: «Ras Zubbaya (Ras Dubayyah)» 443 // sometimes no: «Challis Venture (oil terminal)» 444 String temp = name.substring(paren + 1, paren2); 445 if (temp.startsWith("ex ")) { 446 tempOutput.value = temp.substring(3); 447 } 448 name = 449 paren2 == name.length() 450 ? name.substring(0, paren).trim() 451 : (name.substring(0, paren) + name.substring(paren2 + 1)) 452 .replace(" ", " ") 453 .trim(); 454 // System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value + 455 // "»"); 456 } 457 return name; 458 } 459 putCheckingDuplicate( Map<K, V> map, K key, V value)460 public static <K, V extends Mergeable<V>> void putCheckingDuplicate( 461 Map<K, V> map, K key, V value) { 462 V old = map.get(key); 463 if (old != null && !old.equals(value)) { 464 try { 465 map.put(key, old.merge(value)); 466 } catch (Exception e) { 467 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage()); 468 } 469 } else { 470 map.put(key, value); 471 } 472 } 473 getLocodeData(String unlocode)474 public static LocodeData getLocodeData(String unlocode) { 475 return locodeToData.get(unlocode); 476 } 477 entrySet()478 public static Set<Entry<String, LocodeData>> entrySet() { 479 return locodeToData.entrySet(); 480 } 481 getAvailable()482 public static Set<String> getAvailable() { 483 return locodeToData.keySet(); 484 } 485 getIso3166_2Data(String unlocode)486 public static Iso3166_2Data getIso3166_2Data(String unlocode) { 487 return iso3166_2Data.get(unlocode); 488 } 489 isoEntrySet()490 public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() { 491 return iso3166_2Data.entrySet(); 492 } 493 getAvailableIso3166_2()494 public static Set<String> getAvailableIso3166_2() { 495 return iso3166_2Data.keySet(); 496 } 497 getLoadErrors()498 public static Relation<String, String> getLoadErrors() { 499 return ERRORS; 500 } 501 parse(String string)502 private static float parse(String string) { 503 int len = string.length(); 504 char dir = string.charAt(len - 1); 505 int result0 = Integer.parseInt(string.substring(0, len - 1)); 506 float fract = (result0 % 100) / 60f; 507 fract = ((int) (fract * 100 + 0.499999999f)) / 100f; 508 float result = (result0 / 100) + fract; 509 return dir == 'N' || dir == 'E' ? result : -result; 510 } 511 main(String[] args)512 public static void main(String[] args) throws IOException { 513 Relation<String, LocodeData> countryNameToCities = 514 Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class); 515 Set<String> errors = new TreeSet<>(); 516 loadCitiesCapitals(countryNameToCities, errors); 517 loadCitiesOver1M(countryNameToCities, errors); 518 SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo(); 519 Set<String> missing = new TreeSet<>(supp.getBcp47Keys().get("tz")); 520 Set<String> already = new TreeSet<>(); 521 522 for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) { 523 String countryName = entry.getKey(); 524 LocodeData item = entry.getValue(); 525 String firstName = item.names.iterator().next(); 526 LinkedHashSet<String> remainingNames = new LinkedHashSet<>(item.names); 527 remainingNames.remove(firstName); 528 String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH); 529 String info = 530 countryName 531 + "\t" 532 + (remainingNames.isEmpty() ? "" : remainingNames) 533 + "\t" 534 + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")"); 535 536 if (missing.contains(lowerLocode)) { 537 missing.remove(lowerLocode); 538 already.add(lowerLocode); 539 continue; 540 } 541 System.out.println( 542 "<location type=\"" 543 + lowerLocode 544 + "\">" 545 + firstName 546 + "</location>\t<!--" 547 + info 548 + "-->"); 549 } 550 System.out.println(); 551 System.out.println(Joiner.on("\n").join(errors)); 552 System.out.println(); 553 showLocodes("In exemplars already:", already); 554 System.out.println(); 555 showLocodes("In exemplars but not new cities:", missing); 556 System.out.println(); 557 for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) { 558 System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue()); 559 } 560 if (true) return; 561 562 int i = 0; 563 // for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) { 564 // System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s)); 565 // //if (i > 1000) break; 566 // } 567 for (String s : new TreeSet<>(Unlocode.getAvailable())) { 568 if (!s.startsWith("GT")) { 569 continue; 570 } 571 System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s)); 572 // if (i > 1000) break; 573 } 574 575 // Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR")); 576 // 577 // for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) { 578 // String key = s.getKey(); 579 // Set<String> values = s.getValue(); 580 // if (KNOWN_ERRORS.contains(key)) { 581 // System.out.println("# Known error\t" + key); 582 // continue; 583 // } 584 // String s2 = values.toString(); 585 // System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + 586 // "…"); 587 // } 588 } 589 showLocodes(String title, Set<String> already)590 public static void showLocodes(String title, Set<String> already) { 591 Set<String> noData = new TreeSet<>(); 592 Set<String> noData2 = new TreeSet<>(); 593 for (String locode : already) { 594 String upperLocode = locode.toUpperCase(Locale.ENGLISH); 595 String countryName = 596 ULocale.getDisplayCountry( 597 "und-" + upperLocode.substring(0, 2), ULocale.ENGLISH); 598 LocodeData data = locodeToData.get(upperLocode); 599 if (data == null) { 600 if (locode.length() == 5) { 601 noData.add(locode); 602 } else { 603 noData2.add(locode); 604 } 605 } else { 606 System.out.println(title + "\t" + countryName + "\t" + data); 607 } 608 } 609 System.out.println("* No locode data, len 5:\t" + noData); 610 System.out.println("* No locode data:\t" + noData2); 611 } 612 loadCitiesOver1M( Relation<String, LocodeData> countryNameToCities, Set<String> errors2)613 public static int loadCitiesOver1M( 614 Relation<String, LocodeData> countryNameToCities, Set<String> errors2) 615 throws IOException { 616 int i = 1; 617 618 BufferedReader br = 619 FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt"); 620 main: 621 while (true) { 622 String line = br.readLine(); 623 if (line == null) { 624 break; 625 } 626 if (line.startsWith("#")) { 627 continue; 628 } 629 String[] parts = line.split("\t"); 630 // System.out.println(Arrays.asList(parts)); 631 String cityName = parts[2]; 632 String subdivision = null; 633 int bracket = cityName.indexOf('['); 634 if (bracket > 0) { 635 try { 636 subdivision = cityName.substring(bracket + 1, cityName.indexOf(']')); 637 cityName = cityName.substring(0, bracket); 638 } catch (Exception e) { 639 throw new IllegalArgumentException(cityName); 640 } 641 } 642 String countryName = parts[3]; 643 add(countryName, subdivision, cityName, countryNameToCities, errors2); 644 645 // String countryCode = 646 // CountryCodeConverter.getCodeFromName(countryName); 647 // if (countryCode == null) { 648 // System.out.println("*** Couldn't find country " + countryName); 649 // continue; 650 // } 651 // Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 652 // if (locodeDatas == null) { 653 // System.out.println((i++) + " Couldn't find city " + cityName + " 654 // in " + countryName); 655 // continue; 656 // } else if (locodeDatas.size() == 1) { 657 // add(countryNameToCities,locodeDatas.iterator().next()); 658 // } else { 659 // Set<LocodeData> rem = new LinkedHashSet(); 660 // for (LocodeData x : locodeDatas) { 661 // if (x.subdivision.equals(subdivision)) { 662 // add(countryNameToCities, x); 663 // continue main; 664 // } 665 // if (x.subdivision.startsWith(countryCode)) { 666 // rem.add(x); 667 // } 668 // } 669 // if (rem.size() != 1) { 670 // System.out.println((i++) + " No single record for " + cityName 671 // + "\t" + rem); 672 // } else { 673 // add(countryNameToCities, rem.iterator().next()); 674 // } 675 // } 676 } 677 br.close(); 678 return i; 679 } 680 loadCitiesCapitals( Relation<String, LocodeData> countryNameToCities, Set<String> errors2)681 public static int loadCitiesCapitals( 682 Relation<String, LocodeData> countryNameToCities, Set<String> errors2) 683 throws IOException { 684 int i = 1; 685 BufferedReader br = 686 FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt"); 687 while (true) { 688 String line = br.readLine(); 689 if (line == null) { 690 break; 691 } 692 if (line.startsWith("#")) { 693 continue; 694 } 695 String[] parts = line.split(" *\t *"); 696 // System.out.println(Arrays.asList(parts)); 697 String cityName = parts[0]; 698 String countryName = parts[1]; 699 add(countryName, null, cityName, countryNameToCities, errors2); 700 } 701 br.close(); 702 return i; 703 } 704 705 static final Set<String> noncountries = 706 new HashSet<>( 707 Arrays.asList( 708 "United States Virgin Islands", 709 "Akrotiri and Dhekelia", 710 "Easter Island", 711 "Somaliland", 712 "Northern Cyprus", 713 "Nagorno-Karabakh Republic", 714 "Abkhazia", 715 "Transnistria", 716 "South Ossetia")); 717 718 static final Transform<String, String> REMOVE_ACCENTS = 719 Transliterator.getInstance("nfd;[:mn:]remove"); 720 add( String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)721 static void add( 722 String countryName, 723 String subdivision, 724 String cityName, 725 Relation<String, LocodeData> countryNameToCities, 726 Set<String> errors2) { 727 String countryCode = CountryCodeConverter.getCodeFromName(countryName, false); 728 if (countryCode == null) { 729 if (noncountries.contains(countryName)) { 730 return; // skip 731 } 732 errors2.add("**Couldn't find country " + countryName); 733 // continue; 734 } 735 countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH); 736 Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 737 if (locodeDatas == null) { 738 // try again without accents 739 String cityName2 = REMOVE_ACCENTS.transform(cityName); 740 if (!cityName.equals(cityName2)) { 741 locodeDatas = nameToLocodeData.get(cityName2); 742 } 743 } 744 if (locodeDatas == null) { 745 errors2.add( 746 "** No matching record for\t" 747 + countryName 748 + "\t" 749 + countryCode 750 + "\t" 751 + cityName); 752 } else { 753 Set<LocodeData> rem = new LinkedHashSet<>(); 754 for (LocodeData x : locodeDatas) { 755 if (x.locode.startsWith(countryCode)) { 756 if (x.subdivision.equals(subdivision)) { 757 rem.clear(); 758 rem.add(x); 759 break; 760 } 761 rem.add(x); 762 } 763 } 764 if (rem.size() == 0) { 765 errors2.add( 766 "** No matching country record for\t" 767 + countryName 768 + "\t" 769 + countryCode 770 + "\t" 771 + cityName 772 + "\t" 773 + locodeDatas); 774 } else if (rem.size() != 1) { 775 errors2.add( 776 "** Multiple matching country records for\t" 777 + countryName 778 + "\t" 779 + countryCode 780 + "\t" 781 + cityName 782 + "\t" 783 + rem); 784 } else { 785 LocodeData locodeData = rem.iterator().next(); 786 countryNameToCities.put(countryName, locodeData); 787 } 788 } 789 } 790 } 791