• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.text.Transform;
6 import com.ibm.icu.text.Transliterator;
7 import com.ibm.icu.util.ICUUncheckedIOException;
8 import com.ibm.icu.util.Output;
9 import com.ibm.icu.util.ULocale;
10 import java.io.BufferedReader;
11 import java.io.IOException;
12 import java.nio.charset.Charset;
13 import java.nio.charset.StandardCharsets;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.Collections;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.LinkedHashSet;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import org.unicode.cldr.tool.CountryCodeConverter;
27 import org.unicode.cldr.tool.ToolConfig;
28 import org.unicode.cldr.util.ChainedMap.M3;
29 
30 public class Unlocode {
31 
32     private static final Charset LATIN1 = Charset.forName("ISO8859-1");
33 
34     public interface Mergeable<T> {
merge(T a)35         T merge(T a);
36     }
37 
38     public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> {
39         public final Set<String> names;
40 
Iso3166_2Data(String... name)41         public Iso3166_2Data(String... name) {
42             this(Arrays.asList(name));
43         }
44 
Iso3166_2Data(Collection<String> names)45         public Iso3166_2Data(Collection<String> names) {
46             this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names));
47         }
48 
49         @Override
toString()50         public String toString() {
51             return names.toString();
52         }
53 
54         @Override
equals(Object obj)55         public boolean equals(Object obj) {
56             return names.equals(obj);
57         }
58 
59         @Override
hashCode()60         public int hashCode() {
61             return names.hashCode();
62         }
63 
64         @Override
merge(Iso3166_2Data b)65         public Iso3166_2Data merge(Iso3166_2Data b) {
66             LinkedHashSet<String> set = new LinkedHashSet<>(names);
67             set.addAll(b.names);
68             return new Iso3166_2Data(set);
69         }
70     }
71 
72     public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> {
73         public final String locode;
74         public final Set<String> names;
75         public final String subdivision;
76         public final float north;
77         public final float east;
78 
LocodeData(String locode, String name, String subdivision, float north, float east)79         public LocodeData(String locode, String name, String subdivision, float north, float east) {
80             this(locode, Arrays.asList(name), subdivision, north, east);
81         }
82 
LocodeData( String locode, Collection<String> names, String subdivision, float north, float east)83         public LocodeData(
84                 String locode,
85                 Collection<String> names,
86                 String subdivision,
87                 float north,
88                 float east) {
89             this.locode = locode;
90             this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names));
91             this.subdivision = subdivision;
92             this.north = north;
93             this.east = east;
94         }
95 
96         @Override
toString()97         public String toString() {
98             return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east;
99         }
100 
101         /** Warning, must never have locode datas with the same locode and different other data. */
102         @Override
compareTo(LocodeData o)103         public int compareTo(LocodeData o) {
104             // TODO Auto-generated method stub
105             return locode.compareTo(o.locode);
106         }
107 
108         /** Warning, must never have locode datas with the same locode and different other data. */
109         @Override
equals(Object obj)110         public boolean equals(Object obj) {
111             LocodeData other = (LocodeData) obj;
112             return locode.equals(other.locode);
113         }
114 
115         @Override
hashCode()116         public int hashCode() {
117             return locode.hashCode();
118         }
119 
120         @Override
merge(LocodeData other)121         public LocodeData merge(LocodeData other) {
122             if (locode.equals(other.locode)
123                     && subdivision.equals(other.subdivision)
124                     && north == other.north
125                     && east == other.east) {
126                 LinkedHashSet<String> set = new LinkedHashSet<>(names);
127                 set.addAll(other.names);
128                 return new LocodeData(locode, set, subdivision, north, east);
129             }
130             throw new IllegalArgumentException("Can't merge " + this + " with " + other);
131         }
132     }
133 
134     static Map<String, LocodeData> locodeToData = new HashMap<>();
135     static Relation<String, LocodeData> nameToLocodeData =
136             Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class);
137     static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<>();
138     static Relation<String, String> ERRORS =
139             Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
140 
141     static {
142         // read the data
143         try {
loadIso()144             loadIso();
145             iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data);
146             load(1);
147             load(2);
148             load(3);
149             // load exceptions
150             try {
151                 BufferedReader br =
152                         FileReaders.openFile(
153                                 CldrUtility.class, "data/external/alternate_locode_name.txt");
154                 while (true) {
155                     String line = br.readLine();
156                     if (line == null) {
157                         break;
158                     }
159                     int hash = line.indexOf('#');
160                     if (hash >= 0) {
161                         line = line.substring(0, hash);
162                     }
163                     line = line.trim();
164                     if (line.isEmpty()) {
165                         continue;
166                     }
167                     if (line.equals("EOF")) {
168                         break;
169                     }
170                     String[] parts = line.split("\\s*;\\s*");
171                     // System.out.println(Arrays.asList(parts));
172                     String locode = parts[0].replace(" ", "");
173                     if (locode.length() != 5) {
174                         throw new IllegalArgumentException(line);
175                     }
176                     String alternateName = parts[1];
177                     LocodeData locodeData = locodeToData.get(locode);
putCheckingDuplicate( locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))178                     putCheckingDuplicate(
179                             locodeToData,
180                             locode,
181                             new LocodeData(
182                                     locode,
183                                     alternateName,
184                                     locodeData.subdivision,
185                                     locodeData.north,
186                                     locodeData.east));
187                 }
br.close()188                 br.close();
189             } catch (IOException e) {
190                 throw new ICUUncheckedIOException(e);
191             }
192             for (LocodeData s : locodeToData.values()) {
193                 for (String name : s.names) {
nameToLocodeData.put(name, s)194                     nameToLocodeData.put(name, s);
195                 }
196             }
nameToLocodeData.freeze()197             nameToLocodeData.freeze();
198             locodeToData = Collections.unmodifiableMap(locodeToData);
ERRORS.freeze()199             ERRORS.freeze();
200         } catch (IOException e) {
201         }
202     }
203 
204     /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf
205     //
206      * 0 ,
207      * 1 "AD",
208      * 2 "SJL",
209      * 3 "Sant Julià de Lòria",
210      * 4 "Sant Julia de Loria",
211      * 5 ?,
212      * 6 "--3-----",
213      * 7 "RL",
214      * 8 "1101",
215      * 9 ,
216      * 10 "4228N 00130E",""
217             0 Column Change
218             X Marked for deletion in the next issue
219             1 Country code
220                     "XZ" - no country
221             2 Column LOCODE
222             3 Column Name
223             4 Column Name Without Diacritics
224             5 Column Subdivision
225             6 Column Function
226             7 Column Status
227             8 Column Date
228             9 Column IATA
229             10 Latitude/Longitude
230             Torbay: 47°39′N 052°44′W "4739N 05244W"
231      */
232 
233     //    public static class FieldData<K extends Enum<K>> {
234     //        private List<EnumMap<K,String>> data;
235     //        public FieldData(Class<K> classInstance, BufferedReader r, String filename) {
236     //            data = new ArrayList<EnumMap<K,String>>();
237     //            FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() {
238     //                @Override
239     //                protected boolean handleLine(int lineCount, String line) {
240     //                    // TODO Auto-generated method stub
241     //                    return super.handleLine(lineCount, line);
242     //                }
243     //            };
244     //            myReader.process(r, filename);
245     //            //new EnumMap<K, String>(classInstance);
246     //        }
247     //    }
248 
249     enum SubdivisionFields {
250         Subdivision_category,
251         Code_3166_2,
252         Subdivision_name,
253         Language_code,
254         Romanization_system,
255         Parent_subdivision
256     }
257 
loadIso()258     public static void loadIso() throws IOException {
259         BufferedReader br =
260                 FileReaders.openFile(
261                         CldrUtility.class,
262                         "data/external/subdivisionData.txt",
263                         StandardCharsets.UTF_8);
264         while (true) {
265             // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB
266             // Romanization system TAB Parent subdivision
267 
268             String line = br.readLine();
269             if (line == null) {
270                 break;
271             }
272             int hash = line.indexOf('#');
273             if (hash >= 0) {
274                 line = line.substring(0, hash);
275             }
276             if (line.trim().isEmpty()) {
277                 continue;
278             }
279             String[] list = line.split("\t");
280             String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim();
281             if (locode.endsWith("*")) {
282                 locode = locode.substring(0, locode.length() - 1);
283             }
284             String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim();
285             //            if (!locode.contains("-")) {
286             //                //System.out.println("*skipping: " + locode);
287             //                continue;
288             //            }
289             //
290             //            String names = list[5];
291             //            String[] name = names.split("\\+");
292             //            String bestName = null;
293             //            for (String namePair : name) {
294             //                if (bestName == null) {
295             //                    bestName = namePair.split("=")[1];
296             //                } else if (namePair.startsWith("en=")) {
297             //                    bestName = namePair.split("=")[1];
298             //                    break;
299             //                }
300             //            }
301             //            System.out.println("\t" + locode + "\t" + bestName + "\t\t\t");
302 
303             putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName));
304         }
305         br.close();
306     }
307 
load(int file)308     public static void load(int file) throws IOException {
309         BufferedReader br =
310                 // CldrUtility.getUTF8Data(
311                 FileReaders.openFile(
312                         CldrUtility.class,
313                         "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv",
314                         LATIN1);
315         M3<String, String, Boolean> nameToAlternate =
316                 ChainedMap.of(
317                         new TreeMap<String, Object>(),
318                         new TreeMap<String, Object>(),
319                         Boolean.class);
320         Output<String> tempOutput = new Output<>();
321 
322         String oldCountryCode = null;
323         while (true) {
324             String line = br.readLine();
325             if (line == null) {
326                 break;
327             }
328             line = line.trim();
329             if (line.isEmpty()) {
330                 continue;
331             }
332             String[] list = CldrUtility.splitCommaSeparated(line);
333             String change = list[0];
334             String locSuffix = list[2];
335             if (change.equals("X")) {
336                 continue;
337             }
338             String countryCode = list[1];
339             if (!countryCode.equals(oldCountryCode)) {
340                 nameToAlternate.clear();
341                 oldCountryCode = countryCode;
342             }
343             String name = list[3];
344             String name2 = list[4];
345 
346             if (change.equals("=")) {
347                 String[] names = name.split("\\s*=\\s*");
348                 if (names.length != 2) {
349                     throw new IllegalArgumentException();
350                 }
351                 nameToAlternate.put(names[1], names[0], Boolean.TRUE);
352                 if (!name.equals(name2)) {
353                     names = name2.split("\\s*=\\s*");
354                     if (names.length != 2) {
355                         throw new IllegalArgumentException();
356                     }
357                     nameToAlternate.put(names[1], names[0], Boolean.TRUE);
358                 }
359                 continue;
360             }
361             if (locSuffix.isEmpty()) {
362                 if (!name.startsWith(".")) {
363                     // System.out.println("*** Skipping " + line);
364                 }
365                 continue;
366             }
367 
368             name = removeParens(name, tempOutput);
369             String name3 = tempOutput.value;
370             name2 = removeParens(name2, tempOutput);
371             String name4 = tempOutput.value;
372 
373             String subdivision = list[5];
374             if (!subdivision.isEmpty()) {
375                 subdivision = countryCode + "-" + subdivision;
376                 if (getIso3166_2Data(subdivision) == null) {
377                     ERRORS.put(
378                             subdivision, "Missing subdivision " + subdivision + " on line " + line);
379                 }
380             }
381             String latLong = list[10];
382             float latN = 0;
383             float longE = 0;
384             if (!latLong.isEmpty()) {
385                 String[] latlong = latLong.split(" ");
386                 latN = parse(latlong[0]);
387                 longE = parse(latlong[1]);
388             }
389             String locode = countryCode + locSuffix;
390             LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE);
391             putCheckingDuplicate(locodeToData, locode, locodeData);
392             Map<String, Boolean> alternates = nameToAlternate.get(name);
393             if (alternates != null) {
394                 for (String alt : alternates.keySet()) {
395                     putCheckingDuplicate(
396                             locodeToData,
397                             locode,
398                             new LocodeData(locode, alt, subdivision, latN, longE));
399                 }
400             }
401             if (!name2.equals(name)) {
402                 putCheckingDuplicate(
403                         locodeToData,
404                         locode,
405                         new LocodeData(locode, name2, subdivision, latN, longE));
406                 alternates = nameToAlternate.get(name2);
407                 if (alternates != null) {
408                     for (String alt : alternates.keySet()) {
409                         putCheckingDuplicate(
410                                 locodeToData,
411                                 locode,
412                                 new LocodeData(locode, alt, subdivision, latN, longE));
413                     }
414                 }
415             }
416             if (name3 != null) {
417                 putCheckingDuplicate(
418                         locodeToData,
419                         locode,
420                         new LocodeData(locode, name3, subdivision, latN, longE));
421             }
422             if (name4 != null && !name4.equals(name3)) {
423                 putCheckingDuplicate(
424                         locodeToData,
425                         locode,
426                         new LocodeData(locode, name4, subdivision, latN, longE));
427             }
428         }
429         br.close();
430     }
431 
removeParens(String name, Output<String> tempOutput)432     public static String removeParens(String name, Output<String> tempOutput) {
433         int paren = name.indexOf("(");
434         tempOutput.value = null;
435         if (paren > 0) {
436             int paren2 = name.indexOf(")", paren);
437             if (paren2 < 0) {
438                 paren2 = name.length();
439             }
440             // if the parens start with (ex, then it appears to be a safe alias.
441             // if not, we don't know, since the UN format is ambiguous
442             // sometimes yes: «Ras Zubbaya (Ras Dubayyah)»
443             // sometimes no: «Challis Venture (oil terminal)»
444             String temp = name.substring(paren + 1, paren2);
445             if (temp.startsWith("ex ")) {
446                 tempOutput.value = temp.substring(3);
447             }
448             name =
449                     paren2 == name.length()
450                             ? name.substring(0, paren).trim()
451                             : (name.substring(0, paren) + name.substring(paren2 + 1))
452                                     .replace("  ", " ")
453                                     .trim();
454             // System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value +
455             // "»");
456         }
457         return name;
458     }
459 
putCheckingDuplicate( Map<K, V> map, K key, V value)460     public static <K, V extends Mergeable<V>> void putCheckingDuplicate(
461             Map<K, V> map, K key, V value) {
462         V old = map.get(key);
463         if (old != null && !old.equals(value)) {
464             try {
465                 map.put(key, old.merge(value));
466             } catch (Exception e) {
467                 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage());
468             }
469         } else {
470             map.put(key, value);
471         }
472     }
473 
getLocodeData(String unlocode)474     public static LocodeData getLocodeData(String unlocode) {
475         return locodeToData.get(unlocode);
476     }
477 
entrySet()478     public static Set<Entry<String, LocodeData>> entrySet() {
479         return locodeToData.entrySet();
480     }
481 
getAvailable()482     public static Set<String> getAvailable() {
483         return locodeToData.keySet();
484     }
485 
getIso3166_2Data(String unlocode)486     public static Iso3166_2Data getIso3166_2Data(String unlocode) {
487         return iso3166_2Data.get(unlocode);
488     }
489 
isoEntrySet()490     public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() {
491         return iso3166_2Data.entrySet();
492     }
493 
getAvailableIso3166_2()494     public static Set<String> getAvailableIso3166_2() {
495         return iso3166_2Data.keySet();
496     }
497 
getLoadErrors()498     public static Relation<String, String> getLoadErrors() {
499         return ERRORS;
500     }
501 
parse(String string)502     private static float parse(String string) {
503         int len = string.length();
504         char dir = string.charAt(len - 1);
505         int result0 = Integer.parseInt(string.substring(0, len - 1));
506         float fract = (result0 % 100) / 60f;
507         fract = ((int) (fract * 100 + 0.499999999f)) / 100f;
508         float result = (result0 / 100) + fract;
509         return dir == 'N' || dir == 'E' ? result : -result;
510     }
511 
main(String[] args)512     public static void main(String[] args) throws IOException {
513         Relation<String, LocodeData> countryNameToCities =
514                 Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class);
515         Set<String> errors = new TreeSet<>();
516         loadCitiesCapitals(countryNameToCities, errors);
517         loadCitiesOver1M(countryNameToCities, errors);
518         SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo();
519         Set<String> missing = new TreeSet<>(supp.getBcp47Keys().get("tz"));
520         Set<String> already = new TreeSet<>();
521 
522         for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) {
523             String countryName = entry.getKey();
524             LocodeData item = entry.getValue();
525             String firstName = item.names.iterator().next();
526             LinkedHashSet<String> remainingNames = new LinkedHashSet<>(item.names);
527             remainingNames.remove(firstName);
528             String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH);
529             String info =
530                     countryName
531                             + "\t"
532                             + (remainingNames.isEmpty() ? "" : remainingNames)
533                             + "\t"
534                             + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")");
535 
536             if (missing.contains(lowerLocode)) {
537                 missing.remove(lowerLocode);
538                 already.add(lowerLocode);
539                 continue;
540             }
541             System.out.println(
542                     "<location type=\""
543                             + lowerLocode
544                             + "\">"
545                             + firstName
546                             + "</location>\t<!--"
547                             + info
548                             + "-->");
549         }
550         System.out.println();
551         System.out.println(Joiner.on("\n").join(errors));
552         System.out.println();
553         showLocodes("In exemplars already:", already);
554         System.out.println();
555         showLocodes("In exemplars but not new cities:", missing);
556         System.out.println();
557         for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) {
558             System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue());
559         }
560         if (true) return;
561 
562         int i = 0;
563         //        for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) {
564         //            System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s));
565         //            //if (i > 1000) break;
566         //        }
567         for (String s : new TreeSet<>(Unlocode.getAvailable())) {
568             if (!s.startsWith("GT")) {
569                 continue;
570             }
571             System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s));
572             // if (i > 1000) break;
573         }
574 
575         //        Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR"));
576         //
577         //        for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) {
578         //            String key = s.getKey();
579         //            Set<String> values = s.getValue();
580         //            if (KNOWN_ERRORS.contains(key)) {
581         //                System.out.println("# Known error\t" + key);
582         //                continue;
583         //            }
584         //            String s2 = values.toString();
585         //            System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) +
586         // "…");
587         //        }
588     }
589 
showLocodes(String title, Set<String> already)590     public static void showLocodes(String title, Set<String> already) {
591         Set<String> noData = new TreeSet<>();
592         Set<String> noData2 = new TreeSet<>();
593         for (String locode : already) {
594             String upperLocode = locode.toUpperCase(Locale.ENGLISH);
595             String countryName =
596                     ULocale.getDisplayCountry(
597                             "und-" + upperLocode.substring(0, 2), ULocale.ENGLISH);
598             LocodeData data = locodeToData.get(upperLocode);
599             if (data == null) {
600                 if (locode.length() == 5) {
601                     noData.add(locode);
602                 } else {
603                     noData2.add(locode);
604                 }
605             } else {
606                 System.out.println(title + "\t" + countryName + "\t" + data);
607             }
608         }
609         System.out.println("* No locode data, len 5:\t" + noData);
610         System.out.println("* No locode data:\t" + noData2);
611     }
612 
loadCitiesOver1M( Relation<String, LocodeData> countryNameToCities, Set<String> errors2)613     public static int loadCitiesOver1M(
614             Relation<String, LocodeData> countryNameToCities, Set<String> errors2)
615             throws IOException {
616         int i = 1;
617 
618         BufferedReader br =
619                 FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt");
620         main:
621         while (true) {
622             String line = br.readLine();
623             if (line == null) {
624                 break;
625             }
626             if (line.startsWith("#")) {
627                 continue;
628             }
629             String[] parts = line.split("\t");
630             // System.out.println(Arrays.asList(parts));
631             String cityName = parts[2];
632             String subdivision = null;
633             int bracket = cityName.indexOf('[');
634             if (bracket > 0) {
635                 try {
636                     subdivision = cityName.substring(bracket + 1, cityName.indexOf(']'));
637                     cityName = cityName.substring(0, bracket);
638                 } catch (Exception e) {
639                     throw new IllegalArgumentException(cityName);
640                 }
641             }
642             String countryName = parts[3];
643             add(countryName, subdivision, cityName, countryNameToCities, errors2);
644 
645             //                String countryCode =
646             // CountryCodeConverter.getCodeFromName(countryName);
647             //                if (countryCode == null) {
648             //                    System.out.println("*** Couldn't find country " + countryName);
649             //                    continue;
650             //                }
651             //                Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
652             //                if (locodeDatas == null) {
653             //                    System.out.println((i++) + " Couldn't find city " + cityName + "
654             // in " + countryName);
655             //                    continue;
656             //                } else if (locodeDatas.size() == 1) {
657             //                    add(countryNameToCities,locodeDatas.iterator().next());
658             //                } else  {
659             //                    Set<LocodeData> rem = new LinkedHashSet();
660             //                    for (LocodeData x : locodeDatas) {
661             //                        if (x.subdivision.equals(subdivision)) {
662             //                            add(countryNameToCities, x);
663             //                            continue main;
664             //                        }
665             //                        if (x.subdivision.startsWith(countryCode)) {
666             //                            rem.add(x);
667             //                        }
668             //                    }
669             //                    if (rem.size() != 1) {
670             //                        System.out.println((i++) + " No single record for " + cityName
671             // + "\t" + rem);
672             //                    } else {
673             //                        add(countryNameToCities, rem.iterator().next());
674             //                    }
675             //                }
676         }
677         br.close();
678         return i;
679     }
680 
loadCitiesCapitals( Relation<String, LocodeData> countryNameToCities, Set<String> errors2)681     public static int loadCitiesCapitals(
682             Relation<String, LocodeData> countryNameToCities, Set<String> errors2)
683             throws IOException {
684         int i = 1;
685         BufferedReader br =
686                 FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt");
687         while (true) {
688             String line = br.readLine();
689             if (line == null) {
690                 break;
691             }
692             if (line.startsWith("#")) {
693                 continue;
694             }
695             String[] parts = line.split(" *\t *");
696             // System.out.println(Arrays.asList(parts));
697             String cityName = parts[0];
698             String countryName = parts[1];
699             add(countryName, null, cityName, countryNameToCities, errors2);
700         }
701         br.close();
702         return i;
703     }
704 
705     static final Set<String> noncountries =
706             new HashSet<>(
707                     Arrays.asList(
708                             "United States Virgin Islands",
709                             "Akrotiri and Dhekelia",
710                             "Easter Island",
711                             "Somaliland",
712                             "Northern Cyprus",
713                             "Nagorno-Karabakh Republic",
714                             "Abkhazia",
715                             "Transnistria",
716                             "South Ossetia"));
717 
718     static final Transform<String, String> REMOVE_ACCENTS =
719             Transliterator.getInstance("nfd;[:mn:]remove");
720 
add( String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)721     static void add(
722             String countryName,
723             String subdivision,
724             String cityName,
725             Relation<String, LocodeData> countryNameToCities,
726             Set<String> errors2) {
727         String countryCode = CountryCodeConverter.getCodeFromName(countryName, false);
728         if (countryCode == null) {
729             if (noncountries.contains(countryName)) {
730                 return; // skip
731             }
732             errors2.add("**Couldn't find country " + countryName);
733             // continue;
734         }
735         countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH);
736         Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
737         if (locodeDatas == null) {
738             // try again without accents
739             String cityName2 = REMOVE_ACCENTS.transform(cityName);
740             if (!cityName.equals(cityName2)) {
741                 locodeDatas = nameToLocodeData.get(cityName2);
742             }
743         }
744         if (locodeDatas == null) {
745             errors2.add(
746                     "** No matching record for\t"
747                             + countryName
748                             + "\t"
749                             + countryCode
750                             + "\t"
751                             + cityName);
752         } else {
753             Set<LocodeData> rem = new LinkedHashSet<>();
754             for (LocodeData x : locodeDatas) {
755                 if (x.locode.startsWith(countryCode)) {
756                     if (x.subdivision.equals(subdivision)) {
757                         rem.clear();
758                         rem.add(x);
759                         break;
760                     }
761                     rem.add(x);
762                 }
763             }
764             if (rem.size() == 0) {
765                 errors2.add(
766                         "** No matching country record for\t"
767                                 + countryName
768                                 + "\t"
769                                 + countryCode
770                                 + "\t"
771                                 + cityName
772                                 + "\t"
773                                 + locodeDatas);
774             } else if (rem.size() != 1) {
775                 errors2.add(
776                         "** Multiple matching country records for\t"
777                                 + countryName
778                                 + "\t"
779                                 + countryCode
780                                 + "\t"
781                                 + cityName
782                                 + "\t"
783                                 + rem);
784             } else {
785                 LocodeData locodeData = rem.iterator().next();
786                 countryNameToCities.put(countryName, locodeData);
787             }
788         }
789     }
790 }
791