• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.nio.charset.Charset;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.LinkedHashSet;
12 import java.util.Locale;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 
19 import org.unicode.cldr.tool.CountryCodeConverter;
20 import org.unicode.cldr.tool.ToolConfig;
21 import org.unicode.cldr.util.ChainedMap.M3;
22 
23 import com.ibm.icu.dev.util.CollectionUtilities;
24 import com.ibm.icu.impl.Relation;
25 import com.ibm.icu.text.Transform;
26 import com.ibm.icu.text.Transliterator;
27 import com.ibm.icu.util.ICUUncheckedIOException;
28 import com.ibm.icu.util.Output;
29 import com.ibm.icu.util.ULocale;
30 
31 public class Unlocode {
32 
33     private static final Charset LATIN1 = Charset.forName("ISO8859-1");
34 
35     public interface Mergeable<T> {
merge(T a)36         T merge(T a);
37     }
38 
39     public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> {
40         public final Set<String> names;
41 
Iso3166_2Data(String... name)42         public Iso3166_2Data(String... name) {
43             this(Arrays.asList(name));
44         }
45 
Iso3166_2Data(Collection<String> names)46         public Iso3166_2Data(Collection<String> names) {
47             this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names));
48         }
49 
50         @Override
toString()51         public String toString() {
52             return names.toString();
53         }
54 
55         @Override
equals(Object obj)56         public boolean equals(Object obj) {
57             return names.equals((Iso3166_2Data) obj);
58         }
59 
60         @Override
hashCode()61         public int hashCode() {
62             return names.hashCode();
63         }
64 
65         @Override
merge(Iso3166_2Data b)66         public Iso3166_2Data merge(Iso3166_2Data b) {
67             LinkedHashSet<String> set = new LinkedHashSet<String>(names);
68             set.addAll(b.names);
69             return new Iso3166_2Data(set);
70         }
71     }
72 
73     public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> {
74         public final String locode;
75         public final Set<String> names;
76         public final String subdivision;
77         public final float north;
78         public final float east;
79 
LocodeData(String locode, String name, String subdivision, float north, float east)80         public LocodeData(String locode, String name, String subdivision, float north, float east) {
81             this(locode, Arrays.asList(name), subdivision, north, east);
82         }
83 
LocodeData(String locode, Collection<String> names, String subdivision, float north, float east)84         public LocodeData(String locode, Collection<String> names, String subdivision, float north, float east) {
85             this.locode = locode;
86             this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names));
87             this.subdivision = subdivision;
88             this.north = north;
89             this.east = east;
90         }
91 
92         @Override
toString()93         public String toString() {
94             return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east;
95         }
96 
97         /**
98          * Warning, must never have locode datas with the same locode and different other data.
99          */
100         @Override
compareTo(LocodeData o)101         public int compareTo(LocodeData o) {
102             // TODO Auto-generated method stub
103             return locode.compareTo(o.locode);
104         }
105 
106         /**
107          * Warning, must never have locode datas with the same locode and different other data.
108          */
109         @Override
equals(Object obj)110         public boolean equals(Object obj) {
111             LocodeData other = (LocodeData) obj;
112             return locode.equals(other.locode);
113         }
114 
115         @Override
hashCode()116         public int hashCode() {
117             return locode.hashCode();
118         }
119 
120         @Override
merge(LocodeData other)121         public LocodeData merge(LocodeData other) {
122             if (locode.equals(other.locode)
123                 && subdivision.equals(other.subdivision)
124                 && north == other.north
125                 && east == other.east) {
126                 LinkedHashSet<String> set = new LinkedHashSet<String>(names);
127                 set.addAll(other.names);
128                 return new LocodeData(locode, set, subdivision, north, east);
129             }
130             throw new IllegalArgumentException("Can't merge " + this + " with " + other);
131         }
132 
133     }
134 
135     static Map<String, LocodeData> locodeToData = new HashMap<String, LocodeData>();
136     static Relation<String, LocodeData> nameToLocodeData = Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class);
137     static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<String, Iso3166_2Data>();
138     static Relation<String, String> ERRORS = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
139 
140     static {
141         // read the data
142         try {
loadIso()143             loadIso();
144             iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data);
145             load(1);
146             load(2);
147             load(3);
148             // load exceptions
149             try {
150                 BufferedReader br = FileReaders.openFile(CldrUtility.class,
151                     "data/external/alternate_locode_name.txt");
152                 while (true) {
153                     String line = br.readLine();
154                     if (line == null) {
155                         break;
156                     }
157                     int hash = line.indexOf('#');
158                     if (hash >= 0) {
159                         line = line.substring(0, hash);
160                     }
161                     line = line.trim();
162                     if (line.isEmpty()) {
163                         continue;
164                     }
165                     if (line.equals("EOF")) {
166                         break;
167                     }
168                     String[] parts = line.split("\\s*;\\s*");
169                     //System.out.println(Arrays.asList(parts));
170                     String locode = parts[0].replace(" ", "");
171                     if (locode.length() != 5) {
172                         throw new IllegalArgumentException(line);
173                     }
174                     String alternateName = parts[1];
175                     LocodeData locodeData = locodeToData.get(locode);
putCheckingDuplicate(locodeToData, locode, new LocodeData( locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east))176                     putCheckingDuplicate(locodeToData, locode, new LocodeData(
177                         locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east));
178                 }
br.close()179                 br.close();
180             } catch (IOException e) {
181                 throw new ICUUncheckedIOException(e);
182             }
183             for (LocodeData s : locodeToData.values()) {
184                 for (String name : s.names) {
nameToLocodeData.put(name, s)185                     nameToLocodeData.put(name, s);
186                 }
187             }
nameToLocodeData.freeze()188             nameToLocodeData.freeze();
189             locodeToData = Collections.unmodifiableMap(locodeToData);
ERRORS.freeze()190             ERRORS.freeze();
191         } catch (IOException e) {
192         }
193     }
194 
195     /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf
196     //
197      * 0 ,
198      * 1 "AD",
199      * 2 "SJL",
200      * 3 "Sant Julià de Lòria",
201      * 4 "Sant Julia de Loria",
202      * 5 ?,
203      * 6 "--3-----",
204      * 7 "RL",
205      * 8 "1101",
206      * 9 ,
207      * 10 "4228N 00130E",""
208             0 Column Change
209             X Marked for deletion in the next issue
210             1 Country code
211                     "XZ" - no country
212             2 Column LOCODE
213             3 Column Name
214             4 Column Name Without Diacritics
215             5 Column Subdivision
216             6 Column Function
217             7 Column Status
218             8 Column Date
219             9 Column IATA
220             10 Latitude/Longitude
221             Torbay: 47°39′N 052°44′W "4739N 05244W"
222      */
223 
224     //    public static class FieldData<K extends Enum<K>> {
225     //        private List<EnumMap<K,String>> data;
226     //        public FieldData(Class<K> classInstance, BufferedReader r, String filename) {
227     //            data = new ArrayList<EnumMap<K,String>>();
228     //            FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() {
229     //                @Override
230     //                protected boolean handleLine(int lineCount, String line) {
231     //                    // TODO Auto-generated method stub
232     //                    return super.handleLine(lineCount, line);
233     //                }
234     //            };
235     //            myReader.process(r, filename);
236     //            //new EnumMap<K, String>(classInstance);
237     //        }
238     //    }
239 
240     enum SubdivisionFields {
241         Subdivision_category, Code_3166_2, Subdivision_name, Language_code, Romanization_system, Parent_subdivision
242     }
243 
loadIso()244     public static void loadIso() throws IOException {
245         BufferedReader br = FileReaders.openFile(CldrUtility.class,
246             "data/external/subdivisionData.txt", CldrUtility.UTF8);
247         while (true) {
248             // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB Romanization system TAB Parent subdivision
249 
250             String line = br.readLine();
251             if (line == null) {
252                 break;
253             }
254             int hash = line.indexOf('#');
255             if (hash >= 0) {
256                 line = line.substring(0, hash);
257             }
258             if (line.trim().isEmpty()) {
259                 continue;
260             }
261             String[] list = line.split("\t");
262             String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim();
263             if (locode.endsWith("*")) {
264                 locode = locode.substring(0, locode.length() - 1);
265             }
266             String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim();
267             //            if (!locode.contains("-")) {
268             //                //System.out.println("*skipping: " + locode);
269             //                continue;
270             //            }
271             //
272             //            String names = list[5];
273             //            String[] name = names.split("\\+");
274             //            String bestName = null;
275             //            for (String namePair : name) {
276             //                if (bestName == null) {
277             //                    bestName = namePair.split("=")[1];
278             //                } else if (namePair.startsWith("en=")) {
279             //                    bestName = namePair.split("=")[1];
280             //                    break;
281             //                }
282             //            }
283 //            System.out.println("\t" + locode + "\t" + bestName + "\t\t\t");
284 
285             putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName));
286         }
287         br.close();
288     }
289 
load(int file)290     public static void load(int file) throws IOException {
291         BufferedReader br =
292             //CldrUtility.getUTF8Data(
293             FileReaders.openFile(CldrUtility.class,
294                 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv",
295                 LATIN1);
296         M3<String, String, Boolean> nameToAlternate = ChainedMap.of(new TreeMap<String, Object>(), new TreeMap<String, Object>(), Boolean.class);
297         Output<String> tempOutput = new Output<String>();
298 
299         String oldCountryCode = null;
300         while (true) {
301             String line = br.readLine();
302             if (line == null) {
303                 break;
304             }
305             line = line.trim();
306             if (line.isEmpty()) {
307                 continue;
308             }
309             String[] list = CldrUtility.splitCommaSeparated(line);
310             String change = list[0];
311             String locSuffix = list[2];
312             if (change.equals("X")) {
313                 continue;
314             }
315             String countryCode = list[1];
316             if (!countryCode.equals(oldCountryCode)) {
317                 nameToAlternate.clear();
318                 oldCountryCode = countryCode;
319             }
320             String name = list[3];
321             String name2 = list[4];
322 
323             if (change.equals("=")) {
324                 String[] names = name.split("\\s*=\\s*");
325                 if (names.length != 2) {
326                     throw new IllegalArgumentException();
327                 }
328                 nameToAlternate.put(names[1], names[0], Boolean.TRUE);
329                 if (!name.equals(name2)) {
330                     names = name2.split("\\s*=\\s*");
331                     if (names.length != 2) {
332                         throw new IllegalArgumentException();
333                     }
334                     nameToAlternate.put(names[1], names[0], Boolean.TRUE);
335                 }
336                 continue;
337             }
338             if (locSuffix.isEmpty()) {
339                 if (!name.startsWith(".")) {
340                     // System.out.println("*** Skipping " + line);
341                 }
342                 continue;
343             }
344 
345             name = removeParens(name, tempOutput);
346             String name3 = tempOutput.value;
347             name2 = removeParens(name2, tempOutput);
348             String name4 = tempOutput.value;
349 
350             String subdivision = list[5];
351             if (!subdivision.isEmpty()) {
352                 subdivision = countryCode + "-" + subdivision;
353                 if (getIso3166_2Data(subdivision) == null) {
354                     ERRORS.put(subdivision, "Missing subdivision " + subdivision + " on line " + line);
355                 }
356             }
357             String latLong = list[10];
358             float latN = 0;
359             float longE = 0;
360             if (!latLong.isEmpty()) {
361                 String[] latlong = latLong.split(" ");
362                 latN = parse(latlong[0]);
363                 longE = parse(latlong[1]);
364             }
365             String locode = countryCode + locSuffix;
366             LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE);
367             putCheckingDuplicate(locodeToData, locode, locodeData);
368             Map<String, Boolean> alternates = nameToAlternate.get(name);
369             if (alternates != null) {
370                 for (String alt : alternates.keySet()) {
371                     putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
372                 }
373             }
374             if (!name2.equals(name)) {
375                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name2, subdivision, latN, longE));
376                 alternates = nameToAlternate.get(name2);
377                 if (alternates != null) {
378                     for (String alt : alternates.keySet()) {
379                         putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
380                     }
381                 }
382             }
383             if (name3 != null) {
384                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name3, subdivision, latN, longE));
385             }
386             if (name4 != null && !name4.equals(name3)) {
387                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name4, subdivision, latN, longE));
388             }
389         }
390         br.close();
391     }
392 
removeParens(String name, Output<String> tempOutput)393     public static String removeParens(String name, Output<String> tempOutput) {
394         int paren = name.indexOf("(");
395         tempOutput.value = null;
396         if (paren > 0) {
397             int paren2 = name.indexOf(")", paren);
398             if (paren2 < 0) {
399                 paren2 = name.length();
400             }
401             // if the parens start with (ex, then it appears to be a safe alias.
402             // if not, we don't know, since the UN format is ambiguous
403             // sometimes yes: «Ras Zubbaya (Ras Dubayyah)»
404             // sometimes no: «Challis Venture (oil terminal)»
405             String temp = name.substring(paren + 1, paren2);
406             if (temp.startsWith("ex ")) {
407                 tempOutput.value = temp.substring(3);
408             }
409             name = paren2 == name.length()
410                 ? name.substring(0, paren).trim()
411                 : (name.substring(0, paren) + name.substring(paren2 + 1)).replace("  ", " ").trim();
412             //System.out.println("«" + orginal + "» => «" + name + "», «" + tempOutput.value + "»");
413         }
414         return name;
415     }
416 
putCheckingDuplicate(Map<K, V> map, K key, V value)417     public static <K, V extends Mergeable<V>> void putCheckingDuplicate(Map<K, V> map, K key, V value) {
418         V old = map.get(key);
419         if (old != null && !old.equals(value)) {
420             try {
421                 map.put(key, old.merge(value));
422             } catch (Exception e) {
423                 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage());
424             }
425         } else {
426             map.put(key, value);
427         }
428     }
429 
getLocodeData(String unlocode)430     public static LocodeData getLocodeData(String unlocode) {
431         return locodeToData.get(unlocode);
432     }
433 
entrySet()434     public static Set<Entry<String, LocodeData>> entrySet() {
435         return locodeToData.entrySet();
436     }
437 
getAvailable()438     public static Set<String> getAvailable() {
439         return locodeToData.keySet();
440     }
441 
getIso3166_2Data(String unlocode)442     public static Iso3166_2Data getIso3166_2Data(String unlocode) {
443         return iso3166_2Data.get(unlocode);
444     }
445 
isoEntrySet()446     public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() {
447         return iso3166_2Data.entrySet();
448     }
449 
getAvailableIso3166_2()450     public static Set<String> getAvailableIso3166_2() {
451         return iso3166_2Data.keySet();
452     }
453 
getLoadErrors()454     public static Relation<String, String> getLoadErrors() {
455         return ERRORS;
456     }
457 
parse(String string)458     private static float parse(String string) {
459         int len = string.length();
460         char dir = string.charAt(len - 1);
461         int result0 = Integer.parseInt(string.substring(0, len - 1));
462         float fract = (result0 % 100) / 60f;
463         fract = ((int) (fract * 100 + 0.499999999f)) / 100f;
464         float result = (result0 / 100) + fract;
465         return dir == 'N' || dir == 'E' ? result : -result;
466     }
467 
main(String[] args)468     public static void main(String[] args) throws IOException {
469         Relation<String, LocodeData> countryNameToCities = Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class);
470         Set<String> errors = new TreeSet<String>();
471         loadCitiesCapitals(countryNameToCities, errors);
472         loadCitiesOver1M(countryNameToCities, errors);
473         SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo();
474         Set<String> missing = new TreeSet<String>(
475             supp.getBcp47Keys().get("tz"));
476         Set<String> already = new TreeSet<String>();
477 
478         for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) {
479             String countryName = entry.getKey();
480             LocodeData item = entry.getValue();
481             String firstName = item.names.iterator().next();
482             LinkedHashSet<String> remainingNames = new LinkedHashSet<String>(item.names);
483             remainingNames.remove(firstName);
484             String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH);
485             String info = countryName
486                 + "\t" + (remainingNames.isEmpty() ? "" : remainingNames)
487                 + "\t" + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")");
488 
489             if (missing.contains(lowerLocode)) {
490                 missing.remove(lowerLocode);
491                 already.add(lowerLocode);
492                 continue;
493             }
494             System.out.println("<location type=\"" + lowerLocode
495                 + "\">" + firstName
496                 + "</location>\t<!--" + info
497                 + "-->");
498         }
499         System.out.println();
500         System.out.println(CollectionUtilities.join(errors, "\n"));
501         System.out.println();
502         showLocodes("In exemplars already:", already);
503         System.out.println();
504         showLocodes("In exemplars but not new cities:", missing);
505         System.out.println();
506         for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) {
507             System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue());
508         }
509         if (true) return;
510 
511         int i = 0;
512         //        for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) {
513         //            System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s));
514         //            //if (i > 1000) break;
515         //        }
516         for (String s : new TreeSet<String>(Unlocode.getAvailable())) {
517             if (!s.startsWith("GT")) {
518                 continue;
519             }
520             System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s));
521             //if (i > 1000) break;
522         }
523 
524         //        Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR"));
525         //
526         //        for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) {
527         //            String key = s.getKey();
528         //            Set<String> values = s.getValue();
529         //            if (KNOWN_ERRORS.contains(key)) {
530         //                System.out.println("# Known error\t" + key);
531         //                continue;
532         //            }
533         //            String s2 = values.toString();
534         //            System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + "…");
535         //        }
536     }
537 
showLocodes(String title, Set<String> already)538     public static void showLocodes(String title, Set<String> already) {
539         Set<String> noData = new TreeSet<String>();
540         Set<String> noData2 = new TreeSet<String>();
541         for (String locode : already) {
542             String upperLocode = locode.toUpperCase(Locale.ENGLISH);
543             String countryName = ULocale.getDisplayCountry("und-" + upperLocode.substring(0, 2), ULocale.ENGLISH);
544             LocodeData data = locodeToData.get(upperLocode);
545             if (data == null) {
546                 if (locode.length() == 5) {
547                     noData.add(locode);
548                 } else {
549                     noData2.add(locode);
550                 }
551             } else {
552                 System.out.println(title + "\t" + countryName + "\t" + data);
553             }
554         }
555         System.out.println("* No locode data, len 5:\t" + noData);
556         System.out.println("* No locode data:\t" + noData2);
557     }
558 
loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)559     public static int loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
560         int i = 1;
561 
562         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt");
563         main: while (true) {
564             String line = br.readLine();
565             if (line == null) {
566                 break;
567             }
568             if (line.startsWith("#")) {
569                 continue;
570             }
571             String[] parts = line.split("\t");
572             //System.out.println(Arrays.asList(parts));
573             String cityName = parts[2];
574             String subdivision = null;
575             int bracket = cityName.indexOf('[');
576             if (bracket > 0) {
577                 try {
578                     subdivision = cityName.substring(bracket + 1, cityName.indexOf(']'));
579                     cityName = cityName.substring(0, bracket);
580                 } catch (Exception e) {
581                     throw new IllegalArgumentException(cityName);
582                 }
583             }
584             String countryName = parts[3];
585             add(countryName, subdivision, cityName, countryNameToCities, errors2);
586 
587             //                String countryCode = CountryCodeConverter.getCodeFromName(countryName);
588             //                if (countryCode == null) {
589             //                    System.out.println("*** Couldn't find country " + countryName);
590             //                    continue;
591             //                }
592             //                Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
593             //                if (locodeDatas == null) {
594             //                    System.out.println((i++) + " Couldn't find city " + cityName + " in " + countryName);
595             //                    continue;
596             //                } else if (locodeDatas.size() == 1) {
597             //                    add(countryNameToCities,locodeDatas.iterator().next());
598             //                } else  {
599             //                    Set<LocodeData> rem = new LinkedHashSet();
600             //                    for (LocodeData x : locodeDatas) {
601             //                        if (x.subdivision.equals(subdivision)) {
602             //                            add(countryNameToCities, x);
603             //                            continue main;
604             //                        }
605             //                        if (x.subdivision.startsWith(countryCode)) {
606             //                            rem.add(x);
607             //                        }
608             //                    }
609             //                    if (rem.size() != 1) {
610             //                        System.out.println((i++) + " No single record for " + cityName + "\t" + rem);
611             //                    } else {
612             //                        add(countryNameToCities, rem.iterator().next());
613             //                    }
614             //                }
615         }
616         br.close();
617         return i;
618     }
619 
loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2)620     public static int loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
621         int i = 1;
622         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt");
623         while (true) {
624             String line = br.readLine();
625             if (line == null) {
626                 break;
627             }
628             if (line.startsWith("#")) {
629                 continue;
630             }
631             String[] parts = line.split(" *\t *");
632             //System.out.println(Arrays.asList(parts));
633             String cityName = parts[0];
634             String countryName = parts[1];
635             add(countryName, null, cityName, countryNameToCities, errors2);
636         }
637         br.close();
638         return i;
639     }
640 
641     static final Set<String> noncountries = new HashSet<String>(Arrays.asList(
642         "United States Virgin Islands", "Akrotiri and Dhekelia", "Easter Island", "Somaliland", "Northern Cyprus", "Nagorno-Karabakh Republic", "Abkhazia",
643         "Transnistria", "South Ossetia"));
644 
645     static final Transform<String, String> REMOVE_ACCENTS = Transliterator.getInstance("nfd;[:mn:]remove");
646 
add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2)647     static void add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2) {
648         String countryCode = CountryCodeConverter.getCodeFromName(countryName);
649         if (countryCode == null) {
650             if (noncountries.contains(countryName)) {
651                 return; // skip
652             }
653             errors2.add("**Couldn't find country " + countryName);
654             //continue;
655         }
656         countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH);
657         Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
658         if (locodeDatas == null) {
659             // try again without accents
660             String cityName2 = REMOVE_ACCENTS.transform(cityName);
661             if (!cityName.equals(cityName2)) {
662                 locodeDatas = nameToLocodeData.get(cityName2);
663             }
664         }
665         if (locodeDatas == null) {
666             errors2.add("** No matching record for\t" + countryName + "\t" + countryCode + "\t" + cityName);
667         } else {
668             Set<LocodeData> rem = new LinkedHashSet<LocodeData>();
669             for (LocodeData x : locodeDatas) {
670                 if (x.locode.startsWith(countryCode)) {
671                     if (x.subdivision.equals(subdivision)) {
672                         rem.clear();
673                         rem.add(x);
674                         break;
675                     }
676                     rem.add(x);
677                 }
678             }
679             if (rem.size() == 0) {
680                 errors2.add("** No matching country record for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + locodeDatas);
681             } else if (rem.size() != 1) {
682                 errors2.add("** Multiple matching country records for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + rem);
683             } else {
684                 LocodeData locodeData = rem.iterator().next();
685                 countryNameToCities.put(countryName, locodeData);
686             }
687         }
688     }
689 }