1 package org.unicode.cldr.util; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.nio.charset.Charset; 6 import java.nio.charset.StandardCharsets; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.LinkedHashSet; 13 import java.util.Locale; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 20 import org.unicode.cldr.tool.CountryCodeConverter; 21 import org.unicode.cldr.tool.ToolConfig; 22 import org.unicode.cldr.util.ChainedMap.M3; 23 24 import com.google.common.base.Joiner; 25 import com.ibm.icu.impl.Relation; 26 import com.ibm.icu.text.Transform; 27 import com.ibm.icu.text.Transliterator; 28 import com.ibm.icu.util.ICUUncheckedIOException; 29 import com.ibm.icu.util.Output; 30 import com.ibm.icu.util.ULocale; 31 32 public class Unlocode { 33 34 private static final Charset LATIN1 = Charset.forName("ISO8859-1"); 35 36 public interface Mergeable<T> { merge(T a)37 T merge(T a); 38 } 39 40 public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> { 41 public final Set<String> names; 42 Iso3166_2Data(String... name)43 public Iso3166_2Data(String... name) { 44 this(Arrays.asList(name)); 45 } 46 Iso3166_2Data(Collection<String> names)47 public Iso3166_2Data(Collection<String> names) { 48 this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names)); 49 } 50 51 @Override toString()52 public String toString() { 53 return names.toString(); 54 } 55 56 @Override equals(Object obj)57 public boolean equals(Object obj) { 58 return names.equals(obj); 59 } 60 61 @Override hashCode()62 public int hashCode() { 63 return names.hashCode(); 64 } 65 66 @Override merge(Iso3166_2Data b)67 public Iso3166_2Data merge(Iso3166_2Data b) { 68 LinkedHashSet<String> set = new LinkedHashSet<>(names); 69 set.addAll(b.names); 70 return new Iso3166_2Data(set); 71 } 72 } 73 74 public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> { 75 public final String locode; 76 public final Set<String> names; 77 public final String subdivision; 78 public final float north; 79 public final float east; 80 LocodeData(String locode, String name, String subdivision, float north, float east)81 public LocodeData(String locode, String name, String subdivision, float north, float east) { 82 this(locode, Arrays.asList(name), subdivision, north, east); 83 } 84 LocodeData(String locode, Collection<String> names, String subdivision, float north, float east)85 public LocodeData(String locode, Collection<String> names, String subdivision, float north, float east) { 86 this.locode = locode; 87 this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names)); 88 this.subdivision = subdivision; 89 this.north = north; 90 this.east = east; 91 } 92 93 @Override toString()94 public String toString() { 95 return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east; 96 } 97 98 /** 99 * Warning, must never have locode datas with the same locode and different other data. 100 */ 101 @Override compareTo(LocodeData o)102 public int compareTo(LocodeData o) { 103 // TODO Auto-generated method stub 104 return locode.compareTo(o.locode); 105 } 106 107 /** 108 * Warning, must never have locode datas with the same locode and different other data. 109 */ 110 @Override equals(Object obj)111 public boolean equals(Object obj) { 112 LocodeData other = (LocodeData) obj; 113 return locode.equals(other.locode); 114 } 115 116 @Override hashCode()117 public int hashCode() { 118 return locode.hashCode(); 119 } 120 121 @Override merge(LocodeData other)122 public LocodeData merge(LocodeData other) { 123 if (locode.equals(other.locode) 124 && subdivision.equals(other.subdivision) 125 && north == other.north 126 && east == other.east) { 127 LinkedHashSet<String> set = new LinkedHashSet<>(names); 128 set.addAll(other.names); 129 return new LocodeData(locode, set, subdivision, north, east); 130 } 131 throw new IllegalArgumentException("Can't merge " + this + " with " + other); 132 } 133 134 } 135 136 static Map<String, LocodeData> locodeToData = new HashMap<>(); 137 static Relation<String, LocodeData> nameToLocodeData = Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class); 138 static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<>(); 139 static Relation<String, String> ERRORS = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 140 141 static { 142 // read the data 143 try { loadIso()144 loadIso(); 145 iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data); 146 load(1); 147 load(2); 148 load(3); 149 // load exceptions 150 try { 151 BufferedReader br = FileReaders.openFile(CldrUtility.class, 152 "data/external/alternate_locode_name.txt"); 153 while (true) { 154 String line = br.readLine(); 155 if (line == null) { 156 break; 157 } 158 int hash = line.indexOf('#'); 159 if (hash >= 0) { 160 line = line.substring(0, hash); 161 } 162 line = line.trim(); 163 if (line.isEmpty()) { 164 continue; 165 } 166 if (line.equals("EOF")) { 167 break; 168 } 169 String[] parts = line.split("\\s*;\\s*"); 170 //System.out.println(Arrays.asList(parts)); 171 String locode = parts[0].replace(" ", ""); 172 if (locode.length() != 5) { 173 throw new IllegalArgumentException(line); 174 } 175 String alternateName = parts[1]; 176 LocodeData locodeData = locodeToData.get(locode); putCheckingDuplicate(locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))177 putCheckingDuplicate(locodeToData, locode, new LocodeData( 178 locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east)); 179 } br.close()180 br.close(); 181 } catch (IOException e) { 182 throw new ICUUncheckedIOException(e); 183 } 184 for (LocodeData s : locodeToData.values()) { 185 for (String name : s.names) { nameToLocodeData.put(name, s)186 nameToLocodeData.put(name, s); 187 } 188 } nameToLocodeData.freeze()189 nameToLocodeData.freeze(); 190 locodeToData = Collections.unmodifiableMap(locodeToData); ERRORS.freeze()191 ERRORS.freeze(); 192 } catch (IOException e) { 193 } 194 } 195 196 /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf 197 // 198 * 0 , 199 * 1 "AD", 200 * 2 "SJL", 201 * 3 "Sant Julià de Lòria", 202 * 4 "Sant Julia de Loria", 203 * 5 ?, 204 * 6 "--3-----", 205 * 7 "RL", 206 * 8 "1101", 207 * 9 , 208 * 10 "4228N 00130E","" 209 0 Column Change 210 X Marked for deletion in the next issue 211 1 Country code 212 "XZ" - no country 213 2 Column LOCODE 214 3 Column Name 215 4 Column Name Without Diacritics 216 5 Column Subdivision 217 6 Column Function 218 7 Column Status 219 8 Column Date 220 9 Column IATA 221 10 Latitude/Longitude 222 Torbay: 47°39′N 052°44′W "4739N 05244W" 223 */ 224 225 // public static class FieldData<K extends Enum<K>> { 226 // private List<EnumMap<K,String>> data; 227 // public FieldData(Class<K> classInstance, BufferedReader r, String filename) { 228 // data = new ArrayList<EnumMap<K,String>>(); 229 // FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() { 230 // @Override 231 // protected boolean handleLine(int lineCount, String line) { 232 // // TODO Auto-generated method stub 233 // return super.handleLine(lineCount, line); 234 // } 235 // }; 236 // myReader.process(r, filename); 237 // //new EnumMap<K, String>(classInstance); 238 // } 239 // } 240 241 enum SubdivisionFields { 242 Subdivision_category, Code_3166_2, Subdivision_name, Language_code, Romanization_system, Parent_subdivision 243 } 244 loadIso()245 public static void loadIso() throws IOException { 246 BufferedReader br = FileReaders.openFile(CldrUtility.class, 247 "data/external/subdivisionData.txt", StandardCharsets.UTF_8); 248 while (true) { 249 // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB Romanization system TAB Parent subdivision 250 251 String line = br.readLine(); 252 if (line == null) { 253 break; 254 } 255 int hash = line.indexOf('#'); 256 if (hash >= 0) { 257 line = line.substring(0, hash); 258 } 259 if (line.trim().isEmpty()) { 260 continue; 261 } 262 String[] list = line.split("\t"); 263 String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim(); 264 if (locode.endsWith("*")) { 265 locode = locode.substring(0, locode.length() - 1); 266 } 267 String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim(); 268 // if (!locode.contains("-")) { 269 // //System.out.println("*skipping: " + locode); 270 // continue; 271 // } 272 // 273 // String names = list[5]; 274 // String[] name = names.split("\\+"); 275 // String bestName = null; 276 // for (String namePair : name) { 277 // if (bestName == null) { 278 // bestName = namePair.split("=")[1]; 279 // } else if (namePair.startsWith("en=")) { 280 // bestName = namePair.split("=")[1]; 281 // break; 282 // } 283 // } 284 // System.out.println("\t" + locode + "\t" + bestName + "\t\t\t"); 285 286 putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName)); 287 } 288 br.close(); 289 } 290 load(int file)291 public static void load(int file) throws IOException { 292 BufferedReader br = 293 //CldrUtility.getUTF8Data( 294 FileReaders.openFile(CldrUtility.class, 295 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv", 296 LATIN1); 297 M3<String, String, Boolean> nameToAlternate = ChainedMap.of(new TreeMap<String, Object>(), new TreeMap<String, Object>(), Boolean.class); 298 Output<String> tempOutput = new Output<>(); 299 300 String oldCountryCode = null; 301 while (true) { 302 String line = br.readLine(); 303 if (line == null) { 304 break; 305 } 306 line = line.trim(); 307 if (line.isEmpty()) { 308 continue; 309 } 310 String[] list = CldrUtility.splitCommaSeparated(line); 311 String change = list[0]; 312 String locSuffix = list[2]; 313 if (change.equals("X")) { 314 continue; 315 } 316 String countryCode = list[1]; 317 if (!countryCode.equals(oldCountryCode)) { 318 nameToAlternate.clear(); 319 oldCountryCode = countryCode; 320 } 321 String name = list[3]; 322 String name2 = list[4]; 323 324 if (change.equals("=")) { 325 String[] names = name.split("\\s*=\\s*"); 326 if (names.length != 2) { 327 throw new IllegalArgumentException(); 328 } 329 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 330 if (!name.equals(name2)) { 331 names = name2.split("\\s*=\\s*"); 332 if (names.length != 2) { 333 throw new IllegalArgumentException(); 334 } 335 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 336 } 337 continue; 338 } 339 if (locSuffix.isEmpty()) { 340 if (!name.startsWith(".")) { 341 // System.out.println("*** Skipping " + line); 342 } 343 continue; 344 } 345 346 name = removeParens(name, tempOutput); 347 String name3 = tempOutput.value; 348 name2 = removeParens(name2, tempOutput); 349 String name4 = tempOutput.value; 350 351 String subdivision = list[5]; 352 if (!subdivision.isEmpty()) { 353 subdivision = countryCode + "-" + subdivision; 354 if (getIso3166_2Data(subdivision) == null) { 355 ERRORS.put(subdivision, "Missing subdivision " + subdivision + " on line " + line); 356 } 357 } 358 String latLong = list[10]; 359 float latN = 0; 360 float longE = 0; 361 if (!latLong.isEmpty()) { 362 String[] latlong = latLong.split(" "); 363 latN = parse(latlong[0]); 364 longE = parse(latlong[1]); 365 } 366 String locode = countryCode + locSuffix; 367 LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE); 368 putCheckingDuplicate(locodeToData, locode, locodeData); 369 Map<String, Boolean> alternates = nameToAlternate.get(name); 370 if (alternates != null) { 371 for (String alt : alternates.keySet()) { 372 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE)); 373 } 374 } 375 if (!name2.equals(name)) { 376 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name2, subdivision, latN, longE)); 377 alternates = nameToAlternate.get(name2); 378 if (alternates != null) { 379 for (String alt : alternates.keySet()) { 380 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE)); 381 } 382 } 383 } 384 if (name3 != null) { 385 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name3, subdivision, latN, longE)); 386 } 387 if (name4 != null && !name4.equals(name3)) { 388 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name4, subdivision, latN, longE)); 389 } 390 } 391 br.close(); 392 } 393 removeParens(String name, Output<String> tempOutput)394 public static String removeParens(String name, Output<String> tempOutput) { 395 int paren = name.indexOf("("); 396 tempOutput.value = null; 397 if (paren > 0) { 398 int paren2 = name.indexOf(")", paren); 399 if (paren2 < 0) { 400 paren2 = name.length(); 401 } 402 // if the parens start with (ex, then it appears to be a safe alias. 403 // if not, we don't know, since the UN format is ambiguous 404 // sometimes yes: «Ras Zubbaya (Ras Dubayyah)» 405 // sometimes no: «Challis Venture (oil terminal)» 406 String temp = name.substring(paren + 1, paren2); 407 if (temp.startsWith("ex ")) { 408 tempOutput.value = temp.substring(3); 409 } 410 name = paren2 == name.length() 411 ? name.substring(0, paren).trim() 412 : (name.substring(0, paren) + name.substring(paren2 + 1)).replace(" ", " ").trim(); 413 //System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value + "»"); 414 } 415 return name; 416 } 417 putCheckingDuplicate(Map<K, V> map, K key, V value)418 public static <K, V extends Mergeable<V>> void putCheckingDuplicate(Map<K, V> map, K key, V value) { 419 V old = map.get(key); 420 if (old != null && !old.equals(value)) { 421 try { 422 map.put(key, old.merge(value)); 423 } catch (Exception e) { 424 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage()); 425 } 426 } else { 427 map.put(key, value); 428 } 429 } 430 getLocodeData(String unlocode)431 public static LocodeData getLocodeData(String unlocode) { 432 return locodeToData.get(unlocode); 433 } 434 entrySet()435 public static Set<Entry<String, LocodeData>> entrySet() { 436 return locodeToData.entrySet(); 437 } 438 getAvailable()439 public static Set<String> getAvailable() { 440 return locodeToData.keySet(); 441 } 442 getIso3166_2Data(String unlocode)443 public static Iso3166_2Data getIso3166_2Data(String unlocode) { 444 return iso3166_2Data.get(unlocode); 445 } 446 isoEntrySet()447 public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() { 448 return iso3166_2Data.entrySet(); 449 } 450 getAvailableIso3166_2()451 public static Set<String> getAvailableIso3166_2() { 452 return iso3166_2Data.keySet(); 453 } 454 getLoadErrors()455 public static Relation<String, String> getLoadErrors() { 456 return ERRORS; 457 } 458 parse(String string)459 private static float parse(String string) { 460 int len = string.length(); 461 char dir = string.charAt(len - 1); 462 int result0 = Integer.parseInt(string.substring(0, len - 1)); 463 float fract = (result0 % 100) / 60f; 464 fract = ((int) (fract * 100 + 0.499999999f)) / 100f; 465 float result = (result0 / 100) + fract; 466 return dir == 'N' || dir == 'E' ? result : -result; 467 } 468 main(String[] args)469 public static void main(String[] args) throws IOException { 470 Relation<String, LocodeData> countryNameToCities = Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class); 471 Set<String> errors = new TreeSet<>(); 472 loadCitiesCapitals(countryNameToCities, errors); 473 loadCitiesOver1M(countryNameToCities, errors); 474 SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo(); 475 Set<String> missing = new TreeSet<>( 476 supp.getBcp47Keys().get("tz")); 477 Set<String> already = new TreeSet<>(); 478 479 for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) { 480 String countryName = entry.getKey(); 481 LocodeData item = entry.getValue(); 482 String firstName = item.names.iterator().next(); 483 LinkedHashSet<String> remainingNames = new LinkedHashSet<>(item.names); 484 remainingNames.remove(firstName); 485 String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH); 486 String info = countryName 487 + "\t" + (remainingNames.isEmpty() ? "" : remainingNames) 488 + "\t" + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")"); 489 490 if (missing.contains(lowerLocode)) { 491 missing.remove(lowerLocode); 492 already.add(lowerLocode); 493 continue; 494 } 495 System.out.println("<location type=\"" + lowerLocode 496 + "\">" + firstName 497 + "</location>\t<!--" + info 498 + "-->"); 499 } 500 System.out.println(); 501 System.out.println(Joiner.on("\n").join(errors)); 502 System.out.println(); 503 showLocodes("In exemplars already:", already); 504 System.out.println(); 505 showLocodes("In exemplars but not new cities:", missing); 506 System.out.println(); 507 for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) { 508 System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue()); 509 } 510 if (true) return; 511 512 int i = 0; 513 // for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) { 514 // System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s)); 515 // //if (i > 1000) break; 516 // } 517 for (String s : new TreeSet<>(Unlocode.getAvailable())) { 518 if (!s.startsWith("GT")) { 519 continue; 520 } 521 System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s)); 522 //if (i > 1000) break; 523 } 524 525 // Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR")); 526 // 527 // for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) { 528 // String key = s.getKey(); 529 // Set<String> values = s.getValue(); 530 // if (KNOWN_ERRORS.contains(key)) { 531 // System.out.println("# Known error\t" + key); 532 // continue; 533 // } 534 // String s2 = values.toString(); 535 // System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + "…"); 536 // } 537 } 538 showLocodes(String title, Set<String> already)539 public static void showLocodes(String title, Set<String> already) { 540 Set<String> noData = new TreeSet<>(); 541 Set<String> noData2 = new TreeSet<>(); 542 for (String locode : already) { 543 String upperLocode = locode.toUpperCase(Locale.ENGLISH); 544 String countryName = ULocale.getDisplayCountry("und-" + upperLocode.substring(0, 2), ULocale.ENGLISH); 545 LocodeData data = locodeToData.get(upperLocode); 546 if (data == null) { 547 if (locode.length() == 5) { 548 noData.add(locode); 549 } else { 550 noData2.add(locode); 551 } 552 } else { 553 System.out.println(title + "\t" + countryName + "\t" + data); 554 } 555 } 556 System.out.println("* No locode data, len 5:\t" + noData); 557 System.out.println("* No locode data:\t" + noData2); 558 } 559 loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)560 public static int loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException { 561 int i = 1; 562 563 BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt"); 564 main: while (true) { 565 String line = br.readLine(); 566 if (line == null) { 567 break; 568 } 569 if (line.startsWith("#")) { 570 continue; 571 } 572 String[] parts = line.split("\t"); 573 //System.out.println(Arrays.asList(parts)); 574 String cityName = parts[2]; 575 String subdivision = null; 576 int bracket = cityName.indexOf('['); 577 if (bracket > 0) { 578 try { 579 subdivision = cityName.substring(bracket + 1, cityName.indexOf(']')); 580 cityName = cityName.substring(0, bracket); 581 } catch (Exception e) { 582 throw new IllegalArgumentException(cityName); 583 } 584 } 585 String countryName = parts[3]; 586 add(countryName, subdivision, cityName, countryNameToCities, errors2); 587 588 // String countryCode = CountryCodeConverter.getCodeFromName(countryName); 589 // if (countryCode == null) { 590 // System.out.println("*** Couldn't find country " + countryName); 591 // continue; 592 // } 593 // Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 594 // if (locodeDatas == null) { 595 // System.out.println((i++) + " Couldn't find city " + cityName + " in " + countryName); 596 // continue; 597 // } else if (locodeDatas.size() == 1) { 598 // add(countryNameToCities,locodeDatas.iterator().next()); 599 // } else { 600 // Set<LocodeData> rem = new LinkedHashSet(); 601 // for (LocodeData x : locodeDatas) { 602 // if (x.subdivision.equals(subdivision)) { 603 // add(countryNameToCities, x); 604 // continue main; 605 // } 606 // if (x.subdivision.startsWith(countryCode)) { 607 // rem.add(x); 608 // } 609 // } 610 // if (rem.size() != 1) { 611 // System.out.println((i++) + " No single record for " + cityName + "\t" + rem); 612 // } else { 613 // add(countryNameToCities, rem.iterator().next()); 614 // } 615 // } 616 } 617 br.close(); 618 return i; 619 } 620 loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)621 public static int loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException { 622 int i = 1; 623 BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt"); 624 while (true) { 625 String line = br.readLine(); 626 if (line == null) { 627 break; 628 } 629 if (line.startsWith("#")) { 630 continue; 631 } 632 String[] parts = line.split(" *\t *"); 633 //System.out.println(Arrays.asList(parts)); 634 String cityName = parts[0]; 635 String countryName = parts[1]; 636 add(countryName, null, cityName, countryNameToCities, errors2); 637 } 638 br.close(); 639 return i; 640 } 641 642 static final Set<String> noncountries = new HashSet<>(Arrays.asList( 643 "United States Virgin Islands", "Akrotiri and Dhekelia", "Easter Island", "Somaliland", "Northern Cyprus", "Nagorno-Karabakh Republic", "Abkhazia", 644 "Transnistria", "South Ossetia")); 645 646 static final Transform<String, String> REMOVE_ACCENTS = Transliterator.getInstance("nfd;[:mn:]remove"); 647 add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)648 static void add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2) { 649 String countryCode = CountryCodeConverter.getCodeFromName(countryName, false); 650 if (countryCode == null) { 651 if (noncountries.contains(countryName)) { 652 return; // skip 653 } 654 errors2.add("**Couldn't find country " + countryName); 655 //continue; 656 } 657 countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH); 658 Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 659 if (locodeDatas == null) { 660 // try again without accents 661 String cityName2 = REMOVE_ACCENTS.transform(cityName); 662 if (!cityName.equals(cityName2)) { 663 locodeDatas = nameToLocodeData.get(cityName2); 664 } 665 } 666 if (locodeDatas == null) { 667 errors2.add("** No matching record for\t" + countryName + "\t" + countryCode + "\t" + cityName); 668 } else { 669 Set<LocodeData> rem = new LinkedHashSet<>(); 670 for (LocodeData x : locodeDatas) { 671 if (x.locode.startsWith(countryCode)) { 672 if (x.subdivision.equals(subdivision)) { 673 rem.clear(); 674 rem.add(x); 675 break; 676 } 677 rem.add(x); 678 } 679 } 680 if (rem.size() == 0) { 681 errors2.add("** No matching country record for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + locodeDatas); 682 } else if (rem.size() != 1) { 683 errors2.add("** Multiple matching country records for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + rem); 684 } else { 685 LocodeData locodeData = rem.iterator().next(); 686 countryNameToCities.put(countryName, locodeData); 687 } 688 } 689 } 690 }