1 package org.unicode.cldr.util; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.nio.charset.Charset; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.LinkedHashSet; 12 import java.util.Locale; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 19 import org.unicode.cldr.tool.CountryCodeConverter; 20 import org.unicode.cldr.tool.ToolConfig; 21 import org.unicode.cldr.util.ChainedMap.M3; 22 23 import com.ibm.icu.dev.util.CollectionUtilities; 24 import com.ibm.icu.impl.Relation; 25 import com.ibm.icu.text.Transform; 26 import com.ibm.icu.text.Transliterator; 27 import com.ibm.icu.util.ICUUncheckedIOException; 28 import com.ibm.icu.util.Output; 29 import com.ibm.icu.util.ULocale; 30 31 public class Unlocode { 32 33 private static final Charset LATIN1 = Charset.forName("ISO8859-1"); 34 35 public interface Mergeable<T> { merge(T a)36 T merge(T a); 37 } 38 39 public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> { 40 public final Set<String> names; 41 Iso3166_2Data(String... name)42 public Iso3166_2Data(String... name) { 43 this(Arrays.asList(name)); 44 } 45 Iso3166_2Data(Collection<String> names)46 public Iso3166_2Data(Collection<String> names) { 47 this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names)); 48 } 49 50 @Override toString()51 public String toString() { 52 return names.toString(); 53 } 54 55 @Override equals(Object obj)56 public boolean equals(Object obj) { 57 return names.equals((Iso3166_2Data) obj); 58 } 59 60 @Override hashCode()61 public int hashCode() { 62 return names.hashCode(); 63 } 64 65 @Override merge(Iso3166_2Data b)66 public Iso3166_2Data merge(Iso3166_2Data b) { 67 LinkedHashSet<String> set = new LinkedHashSet<String>(names); 68 set.addAll(b.names); 69 return new Iso3166_2Data(set); 70 } 71 } 72 73 public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> { 74 public final String locode; 75 public final Set<String> names; 76 public final String subdivision; 77 public final float north; 78 public final float east; 79 LocodeData(String locode, String name, String subdivision, float north, float east)80 public LocodeData(String locode, String name, String subdivision, float north, float east) { 81 this(locode, Arrays.asList(name), subdivision, north, east); 82 } 83 LocodeData(String locode, Collection<String> names, String subdivision, float north, float east)84 public LocodeData(String locode, Collection<String> names, String subdivision, float north, float east) { 85 this.locode = locode; 86 this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names)); 87 this.subdivision = subdivision; 88 this.north = north; 89 this.east = east; 90 } 91 92 @Override toString()93 public String toString() { 94 return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east; 95 } 96 97 /** 98 * Warning, must never have locode datas with the same locode and different other data. 99 */ 100 @Override compareTo(LocodeData o)101 public int compareTo(LocodeData o) { 102 // TODO Auto-generated method stub 103 return locode.compareTo(o.locode); 104 } 105 106 /** 107 * Warning, must never have locode datas with the same locode and different other data. 108 */ 109 @Override equals(Object obj)110 public boolean equals(Object obj) { 111 LocodeData other = (LocodeData) obj; 112 return locode.equals(other.locode); 113 } 114 115 @Override hashCode()116 public int hashCode() { 117 return locode.hashCode(); 118 } 119 120 @Override merge(LocodeData other)121 public LocodeData merge(LocodeData other) { 122 if (locode.equals(other.locode) 123 && subdivision.equals(other.subdivision) 124 && north == other.north 125 && east == other.east) { 126 LinkedHashSet<String> set = new LinkedHashSet<String>(names); 127 set.addAll(other.names); 128 return new LocodeData(locode, set, subdivision, north, east); 129 } 130 throw new IllegalArgumentException("Can't merge " + this + " with " + other); 131 } 132 133 } 134 135 static Map<String, LocodeData> locodeToData = new HashMap<String, LocodeData>(); 136 static Relation<String, LocodeData> nameToLocodeData = Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class); 137 static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<String, Iso3166_2Data>(); 138 static Relation<String, String> ERRORS = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 139 140 static { 141 // read the data 142 try { loadIso()143 loadIso(); 144 iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data); 145 load(1); 146 load(2); 147 load(3); 148 // load exceptions 149 try { 150 BufferedReader br = FileReaders.openFile(CldrUtility.class, 151 "data/external/alternate_locode_name.txt"); 152 while (true) { 153 String line = br.readLine(); 154 if (line == null) { 155 break; 156 } 157 int hash = line.indexOf('#'); 158 if (hash >= 0) { 159 line = line.substring(0, hash); 160 } 161 line = line.trim(); 162 if (line.isEmpty()) { 163 continue; 164 } 165 if (line.equals("EOF")) { 166 break; 167 } 168 String[] parts = line.split("\\s*;\\s*"); 169 //System.out.println(Arrays.asList(parts)); 170 String locode = parts[0].replace(" ", ""); 171 if (locode.length() != 5) { 172 throw new IllegalArgumentException(line); 173 } 174 String alternateName = parts[1]; 175 LocodeData locodeData = locodeToData.get(locode); putCheckingDuplicate(locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))176 putCheckingDuplicate(locodeToData, locode, new LocodeData( 177 locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east)); 178 } br.close()179 br.close(); 180 } catch (IOException e) { 181 throw new ICUUncheckedIOException(e); 182 } 183 for (LocodeData s : locodeToData.values()) { 184 for (String name : s.names) { nameToLocodeData.put(name, s)185 nameToLocodeData.put(name, s); 186 } 187 } nameToLocodeData.freeze()188 nameToLocodeData.freeze(); 189 locodeToData = Collections.unmodifiableMap(locodeToData); ERRORS.freeze()190 ERRORS.freeze(); 191 } catch (IOException e) { 192 } 193 } 194 195 /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf 196 // 197 * 0 , 198 * 1 "AD", 199 * 2 "SJL", 200 * 3 "Sant Julià de Lòria", 201 * 4 "Sant Julia de Loria", 202 * 5 ?, 203 * 6 "--3-----", 204 * 7 "RL", 205 * 8 "1101", 206 * 9 , 207 * 10 "4228N 00130E","" 208 0 Column Change 209 X Marked for deletion in the next issue 210 1 Country code 211 "XZ" - no country 212 2 Column LOCODE 213 3 Column Name 214 4 Column Name Without Diacritics 215 5 Column Subdivision 216 6 Column Function 217 7 Column Status 218 8 Column Date 219 9 Column IATA 220 10 Latitude/Longitude 221 Torbay: 47°39′N 052°44′W "4739N 05244W" 222 */ 223 224 // public static class FieldData<K extends Enum<K>> { 225 // private List<EnumMap<K,String>> data; 226 // public FieldData(Class<K> classInstance, BufferedReader r, String filename) { 227 // data = new ArrayList<EnumMap<K,String>>(); 228 // FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() { 229 // @Override 230 // protected boolean handleLine(int lineCount, String line) { 231 // // TODO Auto-generated method stub 232 // return super.handleLine(lineCount, line); 233 // } 234 // }; 235 // myReader.process(r, filename); 236 // //new EnumMap<K, String>(classInstance); 237 // } 238 // } 239 240 enum SubdivisionFields { 241 Subdivision_category, Code_3166_2, Subdivision_name, Language_code, Romanization_system, Parent_subdivision 242 } 243 loadIso()244 public static void loadIso() throws IOException { 245 BufferedReader br = FileReaders.openFile(CldrUtility.class, 246 "data/external/subdivisionData.txt", CldrUtility.UTF8); 247 while (true) { 248 // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB Romanization system TAB Parent subdivision 249 250 String line = br.readLine(); 251 if (line == null) { 252 break; 253 } 254 int hash = line.indexOf('#'); 255 if (hash >= 0) { 256 line = line.substring(0, hash); 257 } 258 if (line.trim().isEmpty()) { 259 continue; 260 } 261 String[] list = line.split("\t"); 262 String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim(); 263 if (locode.endsWith("*")) { 264 locode = locode.substring(0, locode.length() - 1); 265 } 266 String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim(); 267 // if (!locode.contains("-")) { 268 // //System.out.println("*skipping: " + locode); 269 // continue; 270 // } 271 // 272 // String names = list[5]; 273 // String[] name = names.split("\\+"); 274 // String bestName = null; 275 // for (String namePair : name) { 276 // if (bestName == null) { 277 // bestName = namePair.split("=")[1]; 278 // } else if (namePair.startsWith("en=")) { 279 // bestName = namePair.split("=")[1]; 280 // break; 281 // } 282 // } 283 // System.out.println("\t" + locode + "\t" + bestName + "\t\t\t"); 284 285 putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName)); 286 } 287 br.close(); 288 } 289 load(int file)290 public static void load(int file) throws IOException { 291 BufferedReader br = 292 //CldrUtility.getUTF8Data( 293 FileReaders.openFile(CldrUtility.class, 294 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv", 295 LATIN1); 296 M3<String, String, Boolean> nameToAlternate = ChainedMap.of(new TreeMap<String, Object>(), new TreeMap<String, Object>(), Boolean.class); 297 Output<String> tempOutput = new Output<String>(); 298 299 String oldCountryCode = null; 300 while (true) { 301 String line = br.readLine(); 302 if (line == null) { 303 break; 304 } 305 line = line.trim(); 306 if (line.isEmpty()) { 307 continue; 308 } 309 String[] list = CldrUtility.splitCommaSeparated(line); 310 String change = list[0]; 311 String locSuffix = list[2]; 312 if (change.equals("X")) { 313 continue; 314 } 315 String countryCode = list[1]; 316 if (!countryCode.equals(oldCountryCode)) { 317 nameToAlternate.clear(); 318 oldCountryCode = countryCode; 319 } 320 String name = list[3]; 321 String name2 = list[4]; 322 323 if (change.equals("=")) { 324 String[] names = name.split("\\s*=\\s*"); 325 if (names.length != 2) { 326 throw new IllegalArgumentException(); 327 } 328 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 329 if (!name.equals(name2)) { 330 names = name2.split("\\s*=\\s*"); 331 if (names.length != 2) { 332 throw new IllegalArgumentException(); 333 } 334 nameToAlternate.put(names[1], names[0], Boolean.TRUE); 335 } 336 continue; 337 } 338 if (locSuffix.isEmpty()) { 339 if (!name.startsWith(".")) { 340 // System.out.println("*** Skipping " + line); 341 } 342 continue; 343 } 344 345 name = removeParens(name, tempOutput); 346 String name3 = tempOutput.value; 347 name2 = removeParens(name2, tempOutput); 348 String name4 = tempOutput.value; 349 350 String subdivision = list[5]; 351 if (!subdivision.isEmpty()) { 352 subdivision = countryCode + "-" + subdivision; 353 if (getIso3166_2Data(subdivision) == null) { 354 ERRORS.put(subdivision, "Missing subdivision " + subdivision + " on line " + line); 355 } 356 } 357 String latLong = list[10]; 358 float latN = 0; 359 float longE = 0; 360 if (!latLong.isEmpty()) { 361 String[] latlong = latLong.split(" "); 362 latN = parse(latlong[0]); 363 longE = parse(latlong[1]); 364 } 365 String locode = countryCode + locSuffix; 366 LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE); 367 putCheckingDuplicate(locodeToData, locode, locodeData); 368 Map<String, Boolean> alternates = nameToAlternate.get(name); 369 if (alternates != null) { 370 for (String alt : alternates.keySet()) { 371 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE)); 372 } 373 } 374 if (!name2.equals(name)) { 375 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name2, subdivision, latN, longE)); 376 alternates = nameToAlternate.get(name2); 377 if (alternates != null) { 378 for (String alt : alternates.keySet()) { 379 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE)); 380 } 381 } 382 } 383 if (name3 != null) { 384 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name3, subdivision, latN, longE)); 385 } 386 if (name4 != null && !name4.equals(name3)) { 387 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name4, subdivision, latN, longE)); 388 } 389 } 390 br.close(); 391 } 392 removeParens(String name, Output<String> tempOutput)393 public static String removeParens(String name, Output<String> tempOutput) { 394 int paren = name.indexOf("("); 395 tempOutput.value = null; 396 if (paren > 0) { 397 int paren2 = name.indexOf(")", paren); 398 if (paren2 < 0) { 399 paren2 = name.length(); 400 } 401 // if the parens start with (ex, then it appears to be a safe alias. 402 // if not, we don't know, since the UN format is ambiguous 403 // sometimes yes: «Ras Zubbaya (Ras Dubayyah)» 404 // sometimes no: «Challis Venture (oil terminal)» 405 String temp = name.substring(paren + 1, paren2); 406 if (temp.startsWith("ex ")) { 407 tempOutput.value = temp.substring(3); 408 } 409 name = paren2 == name.length() 410 ? name.substring(0, paren).trim() 411 : (name.substring(0, paren) + name.substring(paren2 + 1)).replace(" ", " ").trim(); 412 //System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value + "»"); 413 } 414 return name; 415 } 416 putCheckingDuplicate(Map<K, V> map, K key, V value)417 public static <K, V extends Mergeable<V>> void putCheckingDuplicate(Map<K, V> map, K key, V value) { 418 V old = map.get(key); 419 if (old != null && !old.equals(value)) { 420 try { 421 map.put(key, old.merge(value)); 422 } catch (Exception e) { 423 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage()); 424 } 425 } else { 426 map.put(key, value); 427 } 428 } 429 getLocodeData(String unlocode)430 public static LocodeData getLocodeData(String unlocode) { 431 return locodeToData.get(unlocode); 432 } 433 entrySet()434 public static Set<Entry<String, LocodeData>> entrySet() { 435 return locodeToData.entrySet(); 436 } 437 getAvailable()438 public static Set<String> getAvailable() { 439 return locodeToData.keySet(); 440 } 441 getIso3166_2Data(String unlocode)442 public static Iso3166_2Data getIso3166_2Data(String unlocode) { 443 return iso3166_2Data.get(unlocode); 444 } 445 isoEntrySet()446 public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() { 447 return iso3166_2Data.entrySet(); 448 } 449 getAvailableIso3166_2()450 public static Set<String> getAvailableIso3166_2() { 451 return iso3166_2Data.keySet(); 452 } 453 getLoadErrors()454 public static Relation<String, String> getLoadErrors() { 455 return ERRORS; 456 } 457 parse(String string)458 private static float parse(String string) { 459 int len = string.length(); 460 char dir = string.charAt(len - 1); 461 int result0 = Integer.parseInt(string.substring(0, len - 1)); 462 float fract = (result0 % 100) / 60f; 463 fract = ((int) (fract * 100 + 0.499999999f)) / 100f; 464 float result = (result0 / 100) + fract; 465 return dir == 'N' || dir == 'E' ? result : -result; 466 } 467 main(String[] args)468 public static void main(String[] args) throws IOException { 469 Relation<String, LocodeData> countryNameToCities = Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class); 470 Set<String> errors = new TreeSet<String>(); 471 loadCitiesCapitals(countryNameToCities, errors); 472 loadCitiesOver1M(countryNameToCities, errors); 473 SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo(); 474 Set<String> missing = new TreeSet<String>( 475 supp.getBcp47Keys().get("tz")); 476 Set<String> already = new TreeSet<String>(); 477 478 for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) { 479 String countryName = entry.getKey(); 480 LocodeData item = entry.getValue(); 481 String firstName = item.names.iterator().next(); 482 LinkedHashSet<String> remainingNames = new LinkedHashSet<String>(item.names); 483 remainingNames.remove(firstName); 484 String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH); 485 String info = countryName 486 + "\t" + (remainingNames.isEmpty() ? "" : remainingNames) 487 + "\t" + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")"); 488 489 if (missing.contains(lowerLocode)) { 490 missing.remove(lowerLocode); 491 already.add(lowerLocode); 492 continue; 493 } 494 System.out.println("<location type=\"" + lowerLocode 495 + "\">" + firstName 496 + "</location>\t<!--" + info 497 + "-->"); 498 } 499 System.out.println(); 500 System.out.println(CollectionUtilities.join(errors, "\n")); 501 System.out.println(); 502 showLocodes("In exemplars already:", already); 503 System.out.println(); 504 showLocodes("In exemplars but not new cities:", missing); 505 System.out.println(); 506 for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) { 507 System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue()); 508 } 509 if (true) return; 510 511 int i = 0; 512 // for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) { 513 // System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s)); 514 // //if (i > 1000) break; 515 // } 516 for (String s : new TreeSet<String>(Unlocode.getAvailable())) { 517 if (!s.startsWith("GT")) { 518 continue; 519 } 520 System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s)); 521 //if (i > 1000) break; 522 } 523 524 // Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR")); 525 // 526 // for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) { 527 // String key = s.getKey(); 528 // Set<String> values = s.getValue(); 529 // if (KNOWN_ERRORS.contains(key)) { 530 // System.out.println("# Known error\t" + key); 531 // continue; 532 // } 533 // String s2 = values.toString(); 534 // System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + "…"); 535 // } 536 } 537 showLocodes(String title, Set<String> already)538 public static void showLocodes(String title, Set<String> already) { 539 Set<String> noData = new TreeSet<String>(); 540 Set<String> noData2 = new TreeSet<String>(); 541 for (String locode : already) { 542 String upperLocode = locode.toUpperCase(Locale.ENGLISH); 543 String countryName = ULocale.getDisplayCountry("und-" + upperLocode.substring(0, 2), ULocale.ENGLISH); 544 LocodeData data = locodeToData.get(upperLocode); 545 if (data == null) { 546 if (locode.length() == 5) { 547 noData.add(locode); 548 } else { 549 noData2.add(locode); 550 } 551 } else { 552 System.out.println(title + "\t" + countryName + "\t" + data); 553 } 554 } 555 System.out.println("* No locode data, len 5:\t" + noData); 556 System.out.println("* No locode data:\t" + noData2); 557 } 558 loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)559 public static int loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException { 560 int i = 1; 561 562 BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt"); 563 main: while (true) { 564 String line = br.readLine(); 565 if (line == null) { 566 break; 567 } 568 if (line.startsWith("#")) { 569 continue; 570 } 571 String[] parts = line.split("\t"); 572 //System.out.println(Arrays.asList(parts)); 573 String cityName = parts[2]; 574 String subdivision = null; 575 int bracket = cityName.indexOf('['); 576 if (bracket > 0) { 577 try { 578 subdivision = cityName.substring(bracket + 1, cityName.indexOf(']')); 579 cityName = cityName.substring(0, bracket); 580 } catch (Exception e) { 581 throw new IllegalArgumentException(cityName); 582 } 583 } 584 String countryName = parts[3]; 585 add(countryName, subdivision, cityName, countryNameToCities, errors2); 586 587 // String countryCode = CountryCodeConverter.getCodeFromName(countryName); 588 // if (countryCode == null) { 589 // System.out.println("*** Couldn't find country " + countryName); 590 // continue; 591 // } 592 // Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 593 // if (locodeDatas == null) { 594 // System.out.println((i++) + " Couldn't find city " + cityName + " in " + countryName); 595 // continue; 596 // } else if (locodeDatas.size() == 1) { 597 // add(countryNameToCities,locodeDatas.iterator().next()); 598 // } else { 599 // Set<LocodeData> rem = new LinkedHashSet(); 600 // for (LocodeData x : locodeDatas) { 601 // if (x.subdivision.equals(subdivision)) { 602 // add(countryNameToCities, x); 603 // continue main; 604 // } 605 // if (x.subdivision.startsWith(countryCode)) { 606 // rem.add(x); 607 // } 608 // } 609 // if (rem.size() != 1) { 610 // System.out.println((i++) + " No single record for " + cityName + "\t" + rem); 611 // } else { 612 // add(countryNameToCities, rem.iterator().next()); 613 // } 614 // } 615 } 616 br.close(); 617 return i; 618 } 619 loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)620 public static int loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException { 621 int i = 1; 622 BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt"); 623 while (true) { 624 String line = br.readLine(); 625 if (line == null) { 626 break; 627 } 628 if (line.startsWith("#")) { 629 continue; 630 } 631 String[] parts = line.split(" *\t *"); 632 //System.out.println(Arrays.asList(parts)); 633 String cityName = parts[0]; 634 String countryName = parts[1]; 635 add(countryName, null, cityName, countryNameToCities, errors2); 636 } 637 br.close(); 638 return i; 639 } 640 641 static final Set<String> noncountries = new HashSet<String>(Arrays.asList( 642 "United States Virgin Islands", "Akrotiri and Dhekelia", "Easter Island", "Somaliland", "Northern Cyprus", "Nagorno-Karabakh Republic", "Abkhazia", 643 "Transnistria", "South Ossetia")); 644 645 static final Transform<String, String> REMOVE_ACCENTS = Transliterator.getInstance("nfd;[:mn:]remove"); 646 add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)647 static void add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2) { 648 String countryCode = CountryCodeConverter.getCodeFromName(countryName); 649 if (countryCode == null) { 650 if (noncountries.contains(countryName)) { 651 return; // skip 652 } 653 errors2.add("**Couldn't find country " + countryName); 654 //continue; 655 } 656 countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH); 657 Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName); 658 if (locodeDatas == null) { 659 // try again without accents 660 String cityName2 = REMOVE_ACCENTS.transform(cityName); 661 if (!cityName.equals(cityName2)) { 662 locodeDatas = nameToLocodeData.get(cityName2); 663 } 664 } 665 if (locodeDatas == null) { 666 errors2.add("** No matching record for\t" + countryName + "\t" + countryCode + "\t" + cityName); 667 } else { 668 Set<LocodeData> rem = new LinkedHashSet<LocodeData>(); 669 for (LocodeData x : locodeDatas) { 670 if (x.locode.startsWith(countryCode)) { 671 if (x.subdivision.equals(subdivision)) { 672 rem.clear(); 673 rem.add(x); 674 break; 675 } 676 rem.add(x); 677 } 678 } 679 if (rem.size() == 0) { 680 errors2.add("** No matching country record for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + locodeDatas); 681 } else if (rem.size() != 1) { 682 errors2.add("** Multiple matching country records for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + rem); 683 } else { 684 LocodeData locodeData = rem.iterator().next(); 685 countryNameToCities.put(countryName, locodeData); 686 } 687 } 688 } 689 }