• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.nio.charset.Charset;
6 import java.nio.charset.StandardCharsets;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Collections;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.LinkedHashSet;
13 import java.util.Locale;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 
20 import org.unicode.cldr.tool.CountryCodeConverter;
21 import org.unicode.cldr.tool.ToolConfig;
22 import org.unicode.cldr.util.ChainedMap.M3;
23 
24 import com.google.common.base.Joiner;
25 import com.ibm.icu.impl.Relation;
26 import com.ibm.icu.text.Transform;
27 import com.ibm.icu.text.Transliterator;
28 import com.ibm.icu.util.ICUUncheckedIOException;
29 import com.ibm.icu.util.Output;
30 import com.ibm.icu.util.ULocale;
31 
32 public class Unlocode {
33 
34     private static final Charset LATIN1 = Charset.forName("ISO8859-1");
35 
36     public interface Mergeable<T> {
merge(T a)37         T merge(T a);
38     }
39 
40     public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> {
41         public final Set<String> names;
42 
Iso3166_2Data(String... name)43         public Iso3166_2Data(String... name) {
44             this(Arrays.asList(name));
45         }
46 
Iso3166_2Data(Collection<String> names)47         public Iso3166_2Data(Collection<String> names) {
48             this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names));
49         }
50 
51         @Override
toString()52         public String toString() {
53             return names.toString();
54         }
55 
56         @Override
equals(Object obj)57         public boolean equals(Object obj) {
58             return names.equals(obj);
59         }
60 
61         @Override
hashCode()62         public int hashCode() {
63             return names.hashCode();
64         }
65 
66         @Override
merge(Iso3166_2Data b)67         public Iso3166_2Data merge(Iso3166_2Data b) {
68             LinkedHashSet<String> set = new LinkedHashSet<>(names);
69             set.addAll(b.names);
70             return new Iso3166_2Data(set);
71         }
72     }
73 
74     public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> {
75         public final String locode;
76         public final Set<String> names;
77         public final String subdivision;
78         public final float north;
79         public final float east;
80 
LocodeData(String locode, String name, String subdivision, float north, float east)81         public LocodeData(String locode, String name, String subdivision, float north, float east) {
82             this(locode, Arrays.asList(name), subdivision, north, east);
83         }
84 
LocodeData(String locode, Collection<String> names, String subdivision, float north, float east)85         public LocodeData(String locode, Collection<String> names, String subdivision, float north, float east) {
86             this.locode = locode;
87             this.names = Collections.unmodifiableSet(new LinkedHashSet<>(names));
88             this.subdivision = subdivision;
89             this.north = north;
90             this.east = east;
91         }
92 
93         @Override
toString()94         public String toString() {
95             return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east;
96         }
97 
98         /**
99          * Warning, must never have locode datas with the same locode and different other data.
100          */
101         @Override
compareTo(LocodeData o)102         public int compareTo(LocodeData o) {
103             // TODO Auto-generated method stub
104             return locode.compareTo(o.locode);
105         }
106 
107         /**
108          * Warning, must never have locode datas with the same locode and different other data.
109          */
110         @Override
equals(Object obj)111         public boolean equals(Object obj) {
112             LocodeData other = (LocodeData) obj;
113             return locode.equals(other.locode);
114         }
115 
116         @Override
hashCode()117         public int hashCode() {
118             return locode.hashCode();
119         }
120 
121         @Override
merge(LocodeData other)122         public LocodeData merge(LocodeData other) {
123             if (locode.equals(other.locode)
124                 && subdivision.equals(other.subdivision)
125                 && north == other.north
126                 && east == other.east) {
127                 LinkedHashSet<String> set = new LinkedHashSet<>(names);
128                 set.addAll(other.names);
129                 return new LocodeData(locode, set, subdivision, north, east);
130             }
131             throw new IllegalArgumentException("Can't merge " + this + " with " + other);
132         }
133 
134     }
135 
136     static Map<String, LocodeData> locodeToData = new HashMap<>();
137     static Relation<String, LocodeData> nameToLocodeData = Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class);
138     static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<>();
139     static Relation<String, String> ERRORS = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
140 
141     static {
142         // read the data
143         try {
loadIso()144             loadIso();
145             iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data);
146             load(1);
147             load(2);
148             load(3);
149             // load exceptions
150             try {
151                 BufferedReader br = FileReaders.openFile(CldrUtility.class,
152                     "data/external/alternate_locode_name.txt");
153                 while (true) {
154                     String line = br.readLine();
155                     if (line == null) {
156                         break;
157                     }
158                     int hash = line.indexOf('#');
159                     if (hash >= 0) {
160                         line = line.substring(0, hash);
161                     }
162                     line = line.trim();
163                     if (line.isEmpty()) {
164                         continue;
165                     }
166                     if (line.equals("EOF")) {
167                         break;
168                     }
169                     String[] parts = line.split("\\s*;\\s*");
170                     //System.out.println(Arrays.asList(parts));
171                     String locode = parts[0].replace(" ", "");
172                     if (locode.length() != 5) {
173                         throw new IllegalArgumentException(line);
174                     }
175                     String alternateName = parts[1];
176                     LocodeData locodeData = locodeToData.get(locode);
putCheckingDuplicate(locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))177                     putCheckingDuplicate(locodeToData, locode, new LocodeData(
178                         locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east));
179                 }
br.close()180                 br.close();
181             } catch (IOException e) {
182                 throw new ICUUncheckedIOException(e);
183             }
184             for (LocodeData s : locodeToData.values()) {
185                 for (String name : s.names) {
nameToLocodeData.put(name, s)186                     nameToLocodeData.put(name, s);
187                 }
188             }
nameToLocodeData.freeze()189             nameToLocodeData.freeze();
190             locodeToData = Collections.unmodifiableMap(locodeToData);
ERRORS.freeze()191             ERRORS.freeze();
192         } catch (IOException e) {
193         }
194     }
195 
196     /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf
197     //
198      * 0 ,
199      * 1 "AD",
200      * 2 "SJL",
201      * 3 "Sant Julià de Lòria",
202      * 4 "Sant Julia de Loria",
203      * 5 ?,
204      * 6 "--3-----",
205      * 7 "RL",
206      * 8 "1101",
207      * 9 ,
208      * 10 "4228N 00130E",""
209             0 Column Change
210             X Marked for deletion in the next issue
211             1 Country code
212                     "XZ" - no country
213             2 Column LOCODE
214             3 Column Name
215             4 Column Name Without Diacritics
216             5 Column Subdivision
217             6 Column Function
218             7 Column Status
219             8 Column Date
220             9 Column IATA
221             10 Latitude/Longitude
222             Torbay: 47°39′N 052°44′W "4739N 05244W"
223      */
224 
225     //    public static class FieldData<K extends Enum<K>> {
226     //        private List<EnumMap<K,String>> data;
227     //        public FieldData(Class<K> classInstance, BufferedReader r, String filename) {
228     //            data = new ArrayList<EnumMap<K,String>>();
229     //            FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() {
230     //                @Override
231     //                protected boolean handleLine(int lineCount, String line) {
232     //                    // TODO Auto-generated method stub
233     //                    return super.handleLine(lineCount, line);
234     //                }
235     //            };
236     //            myReader.process(r, filename);
237     //            //new EnumMap<K, String>(classInstance);
238     //        }
239     //    }
240 
241     enum SubdivisionFields {
242         Subdivision_category, Code_3166_2, Subdivision_name, Language_code, Romanization_system, Parent_subdivision
243     }
244 
loadIso()245     public static void loadIso() throws IOException {
246         BufferedReader br = FileReaders.openFile(CldrUtility.class,
247             "data/external/subdivisionData.txt", StandardCharsets.UTF_8);
248         while (true) {
249             // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB Romanization system TAB Parent subdivision
250 
251             String line = br.readLine();
252             if (line == null) {
253                 break;
254             }
255             int hash = line.indexOf('#');
256             if (hash >= 0) {
257                 line = line.substring(0, hash);
258             }
259             if (line.trim().isEmpty()) {
260                 continue;
261             }
262             String[] list = line.split("\t");
263             String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim();
264             if (locode.endsWith("*")) {
265                 locode = locode.substring(0, locode.length() - 1);
266             }
267             String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim();
268             //            if (!locode.contains("-")) {
269             //                //System.out.println("*skipping: " + locode);
270             //                continue;
271             //            }
272             //
273             //            String names = list[5];
274             //            String[] name = names.split("\\+");
275             //            String bestName = null;
276             //            for (String namePair : name) {
277             //                if (bestName == null) {
278             //                    bestName = namePair.split("=")[1];
279             //                } else if (namePair.startsWith("en=")) {
280             //                    bestName = namePair.split("=")[1];
281             //                    break;
282             //                }
283             //            }
284 //            System.out.println("\t" + locode + "\t" + bestName + "\t\t\t");
285 
286             putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName));
287         }
288         br.close();
289     }
290 
load(int file)291     public static void load(int file) throws IOException {
292         BufferedReader br =
293             //CldrUtility.getUTF8Data(
294             FileReaders.openFile(CldrUtility.class,
295                 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv",
296                 LATIN1);
297         M3<String, String, Boolean> nameToAlternate = ChainedMap.of(new TreeMap<String, Object>(), new TreeMap<String, Object>(), Boolean.class);
298         Output<String> tempOutput = new Output<>();
299 
300         String oldCountryCode = null;
301         while (true) {
302             String line = br.readLine();
303             if (line == null) {
304                 break;
305             }
306             line = line.trim();
307             if (line.isEmpty()) {
308                 continue;
309             }
310             String[] list = CldrUtility.splitCommaSeparated(line);
311             String change = list[0];
312             String locSuffix = list[2];
313             if (change.equals("X")) {
314                 continue;
315             }
316             String countryCode = list[1];
317             if (!countryCode.equals(oldCountryCode)) {
318                 nameToAlternate.clear();
319                 oldCountryCode = countryCode;
320             }
321             String name = list[3];
322             String name2 = list[4];
323 
324             if (change.equals("=")) {
325                 String[] names = name.split("\\s*=\\s*");
326                 if (names.length != 2) {
327                     throw new IllegalArgumentException();
328                 }
329                 nameToAlternate.put(names[1], names[0], Boolean.TRUE);
330                 if (!name.equals(name2)) {
331                     names = name2.split("\\s*=\\s*");
332                     if (names.length != 2) {
333                         throw new IllegalArgumentException();
334                     }
335                     nameToAlternate.put(names[1], names[0], Boolean.TRUE);
336                 }
337                 continue;
338             }
339             if (locSuffix.isEmpty()) {
340                 if (!name.startsWith(".")) {
341                     // System.out.println("*** Skipping " + line);
342                 }
343                 continue;
344             }
345 
346             name = removeParens(name, tempOutput);
347             String name3 = tempOutput.value;
348             name2 = removeParens(name2, tempOutput);
349             String name4 = tempOutput.value;
350 
351             String subdivision = list[5];
352             if (!subdivision.isEmpty()) {
353                 subdivision = countryCode + "-" + subdivision;
354                 if (getIso3166_2Data(subdivision) == null) {
355                     ERRORS.put(subdivision, "Missing subdivision " + subdivision + " on line " + line);
356                 }
357             }
358             String latLong = list[10];
359             float latN = 0;
360             float longE = 0;
361             if (!latLong.isEmpty()) {
362                 String[] latlong = latLong.split(" ");
363                 latN = parse(latlong[0]);
364                 longE = parse(latlong[1]);
365             }
366             String locode = countryCode + locSuffix;
367             LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE);
368             putCheckingDuplicate(locodeToData, locode, locodeData);
369             Map<String, Boolean> alternates = nameToAlternate.get(name);
370             if (alternates != null) {
371                 for (String alt : alternates.keySet()) {
372                     putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
373                 }
374             }
375             if (!name2.equals(name)) {
376                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name2, subdivision, latN, longE));
377                 alternates = nameToAlternate.get(name2);
378                 if (alternates != null) {
379                     for (String alt : alternates.keySet()) {
380                         putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
381                     }
382                 }
383             }
384             if (name3 != null) {
385                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name3, subdivision, latN, longE));
386             }
387             if (name4 != null && !name4.equals(name3)) {
388                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name4, subdivision, latN, longE));
389             }
390         }
391         br.close();
392     }
393 
removeParens(String name, Output<String> tempOutput)394     public static String removeParens(String name, Output<String> tempOutput) {
395         int paren = name.indexOf("(");
396         tempOutput.value = null;
397         if (paren > 0) {
398             int paren2 = name.indexOf(")", paren);
399             if (paren2 < 0) {
400                 paren2 = name.length();
401             }
402             // if the parens start with (ex, then it appears to be a safe alias.
403             // if not, we don't know, since the UN format is ambiguous
404             // sometimes yes: «Ras Zubbaya (Ras Dubayyah)»
405             // sometimes no: «Challis Venture (oil terminal)»
406             String temp = name.substring(paren + 1, paren2);
407             if (temp.startsWith("ex ")) {
408                 tempOutput.value = temp.substring(3);
409             }
410             name = paren2 == name.length()
411                 ? name.substring(0, paren).trim()
412                 : (name.substring(0, paren) + name.substring(paren2 + 1)).replace("  ", " ").trim();
413             //System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value + "»");
414         }
415         return name;
416     }
417 
putCheckingDuplicate(Map<K, V> map, K key, V value)418     public static <K, V extends Mergeable<V>> void putCheckingDuplicate(Map<K, V> map, K key, V value) {
419         V old = map.get(key);
420         if (old != null && !old.equals(value)) {
421             try {
422                 map.put(key, old.merge(value));
423             } catch (Exception e) {
424                 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage());
425             }
426         } else {
427             map.put(key, value);
428         }
429     }
430 
getLocodeData(String unlocode)431     public static LocodeData getLocodeData(String unlocode) {
432         return locodeToData.get(unlocode);
433     }
434 
entrySet()435     public static Set<Entry<String, LocodeData>> entrySet() {
436         return locodeToData.entrySet();
437     }
438 
getAvailable()439     public static Set<String> getAvailable() {
440         return locodeToData.keySet();
441     }
442 
getIso3166_2Data(String unlocode)443     public static Iso3166_2Data getIso3166_2Data(String unlocode) {
444         return iso3166_2Data.get(unlocode);
445     }
446 
isoEntrySet()447     public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() {
448         return iso3166_2Data.entrySet();
449     }
450 
getAvailableIso3166_2()451     public static Set<String> getAvailableIso3166_2() {
452         return iso3166_2Data.keySet();
453     }
454 
getLoadErrors()455     public static Relation<String, String> getLoadErrors() {
456         return ERRORS;
457     }
458 
parse(String string)459     private static float parse(String string) {
460         int len = string.length();
461         char dir = string.charAt(len - 1);
462         int result0 = Integer.parseInt(string.substring(0, len - 1));
463         float fract = (result0 % 100) / 60f;
464         fract = ((int) (fract * 100 + 0.499999999f)) / 100f;
465         float result = (result0 / 100) + fract;
466         return dir == 'N' || dir == 'E' ? result : -result;
467     }
468 
main(String[] args)469     public static void main(String[] args) throws IOException {
470         Relation<String, LocodeData> countryNameToCities = Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class);
471         Set<String> errors = new TreeSet<>();
472         loadCitiesCapitals(countryNameToCities, errors);
473         loadCitiesOver1M(countryNameToCities, errors);
474         SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo();
475         Set<String> missing = new TreeSet<>(
476             supp.getBcp47Keys().get("tz"));
477         Set<String> already = new TreeSet<>();
478 
479         for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) {
480             String countryName = entry.getKey();
481             LocodeData item = entry.getValue();
482             String firstName = item.names.iterator().next();
483             LinkedHashSet<String> remainingNames = new LinkedHashSet<>(item.names);
484             remainingNames.remove(firstName);
485             String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH);
486             String info = countryName
487                 + "\t" + (remainingNames.isEmpty() ? "" : remainingNames)
488                 + "\t" + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")");
489 
490             if (missing.contains(lowerLocode)) {
491                 missing.remove(lowerLocode);
492                 already.add(lowerLocode);
493                 continue;
494             }
495             System.out.println("<location type=\"" + lowerLocode
496                 + "\">" + firstName
497                 + "</location>\t<!--" + info
498                 + "-->");
499         }
500         System.out.println();
501         System.out.println(Joiner.on("\n").join(errors));
502         System.out.println();
503         showLocodes("In exemplars already:", already);
504         System.out.println();
505         showLocodes("In exemplars but not new cities:", missing);
506         System.out.println();
507         for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) {
508             System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue());
509         }
510         if (true) return;
511 
512         int i = 0;
513         //        for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) {
514         //            System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s));
515         //            //if (i > 1000) break;
516         //        }
517         for (String s : new TreeSet<>(Unlocode.getAvailable())) {
518             if (!s.startsWith("GT")) {
519                 continue;
520             }
521             System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s));
522             //if (i > 1000) break;
523         }
524 
525         //        Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR"));
526         //
527         //        for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) {
528         //            String key = s.getKey();
529         //            Set<String> values = s.getValue();
530         //            if (KNOWN_ERRORS.contains(key)) {
531         //                System.out.println("# Known error\t" + key);
532         //                continue;
533         //            }
534         //            String s2 = values.toString();
535         //            System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + "…");
536         //        }
537     }
538 
showLocodes(String title, Set<String> already)539     public static void showLocodes(String title, Set<String> already) {
540         Set<String> noData = new TreeSet<>();
541         Set<String> noData2 = new TreeSet<>();
542         for (String locode : already) {
543             String upperLocode = locode.toUpperCase(Locale.ENGLISH);
544             String countryName = ULocale.getDisplayCountry("und-" + upperLocode.substring(0, 2), ULocale.ENGLISH);
545             LocodeData data = locodeToData.get(upperLocode);
546             if (data == null) {
547                 if (locode.length() == 5) {
548                     noData.add(locode);
549                 } else {
550                     noData2.add(locode);
551                 }
552             } else {
553                 System.out.println(title + "\t" + countryName + "\t" + data);
554             }
555         }
556         System.out.println("* No locode data, len 5:\t" + noData);
557         System.out.println("* No locode data:\t" + noData2);
558     }
559 
loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)560     public static int loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
561         int i = 1;
562 
563         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt");
564         main: while (true) {
565             String line = br.readLine();
566             if (line == null) {
567                 break;
568             }
569             if (line.startsWith("#")) {
570                 continue;
571             }
572             String[] parts = line.split("\t");
573             //System.out.println(Arrays.asList(parts));
574             String cityName = parts[2];
575             String subdivision = null;
576             int bracket = cityName.indexOf('[');
577             if (bracket > 0) {
578                 try {
579                     subdivision = cityName.substring(bracket + 1, cityName.indexOf(']'));
580                     cityName = cityName.substring(0, bracket);
581                 } catch (Exception e) {
582                     throw new IllegalArgumentException(cityName);
583                 }
584             }
585             String countryName = parts[3];
586             add(countryName, subdivision, cityName, countryNameToCities, errors2);
587 
588             //                String countryCode = CountryCodeConverter.getCodeFromName(countryName);
589             //                if (countryCode == null) {
590             //                    System.out.println("*** Couldn't find country " + countryName);
591             //                    continue;
592             //                }
593             //                Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
594             //                if (locodeDatas == null) {
595             //                    System.out.println((i++) + " Couldn't find city " + cityName + " in " + countryName);
596             //                    continue;
597             //                } else if (locodeDatas.size() == 1) {
598             //                    add(countryNameToCities,locodeDatas.iterator().next());
599             //                } else  {
600             //                    Set<LocodeData> rem = new LinkedHashSet();
601             //                    for (LocodeData x : locodeDatas) {
602             //                        if (x.subdivision.equals(subdivision)) {
603             //                            add(countryNameToCities, x);
604             //                            continue main;
605             //                        }
606             //                        if (x.subdivision.startsWith(countryCode)) {
607             //                            rem.add(x);
608             //                        }
609             //                    }
610             //                    if (rem.size() != 1) {
611             //                        System.out.println((i++) + " No single record for " + cityName + "\t" + rem);
612             //                    } else {
613             //                        add(countryNameToCities, rem.iterator().next());
614             //                    }
615             //                }
616         }
617         br.close();
618         return i;
619     }
620 
loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)621     public static int loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
622         int i = 1;
623         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt");
624         while (true) {
625             String line = br.readLine();
626             if (line == null) {
627                 break;
628             }
629             if (line.startsWith("#")) {
630                 continue;
631             }
632             String[] parts = line.split(" *\t *");
633             //System.out.println(Arrays.asList(parts));
634             String cityName = parts[0];
635             String countryName = parts[1];
636             add(countryName, null, cityName, countryNameToCities, errors2);
637         }
638         br.close();
639         return i;
640     }
641 
642     static final Set<String> noncountries = new HashSet<>(Arrays.asList(
643         "United States Virgin Islands", "Akrotiri and Dhekelia", "Easter Island", "Somaliland", "Northern Cyprus", "Nagorno-Karabakh Republic", "Abkhazia",
644         "Transnistria", "South Ossetia"));
645 
646     static final Transform<String, String> REMOVE_ACCENTS = Transliterator.getInstance("nfd;[:mn:]remove");
647 
add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)648     static void add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2) {
649         String countryCode = CountryCodeConverter.getCodeFromName(countryName, false);
650         if (countryCode == null) {
651             if (noncountries.contains(countryName)) {
652                 return; // skip
653             }
654             errors2.add("**Couldn't find country " + countryName);
655             //continue;
656         }
657         countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH);
658         Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
659         if (locodeDatas == null) {
660             // try again without accents
661             String cityName2 = REMOVE_ACCENTS.transform(cityName);
662             if (!cityName.equals(cityName2)) {
663                 locodeDatas = nameToLocodeData.get(cityName2);
664             }
665         }
666         if (locodeDatas == null) {
667             errors2.add("** No matching record for\t" + countryName + "\t" + countryCode + "\t" + cityName);
668         } else {
669             Set<LocodeData> rem = new LinkedHashSet<>();
670             for (LocodeData x : locodeDatas) {
671                 if (x.locode.startsWith(countryCode)) {
672                     if (x.subdivision.equals(subdivision)) {
673                         rem.clear();
674                         rem.add(x);
675                         break;
676                     }
677                     rem.add(x);
678                 }
679             }
680             if (rem.size() == 0) {
681                 errors2.add("** No matching country record for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + locodeDatas);
682             } else if (rem.size() != 1) {
683                 errors2.add("** Multiple matching country records for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + rem);
684             } else {
685                 LocodeData locodeData = rem.iterator().next();
686                 countryNameToCities.put(countryName, locodeData);
687             }
688         }
689     }
690 }