• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.draft;
2 
3 import java.util.Collection;
4 import java.util.Collections;
5 import java.util.HashMap;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Objects;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.TreeSet;
13 
14 import org.unicode.cldr.util.CLDRConfig;
15 import org.unicode.cldr.util.CldrUtility;
16 import org.unicode.cldr.util.Counter;
17 import org.unicode.cldr.util.LanguageTagParser;
18 import org.unicode.cldr.util.SupplementalDataInfo;
19 
20 import com.google.common.base.Splitter;
21 import com.google.common.collect.HashMultimap;
22 import com.google.common.collect.Multimap;
23 import com.ibm.icu.impl.Row.R2;
24 import com.ibm.icu.util.ULocale;
25 import com.ibm.icu.util.ULocale.Minimize;
26 
27 public class XLikelySubtags {
28     private static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo();
29     private static final Map<String, Map<String, R2<List<String>, String>>> aliasInfo = SDI.getLocaleAliasInfo();
30     private static final Map<String, R2<List<String>, String>> REGION_ALIASES = aliasInfo.get("territory");
31     private static final Map<String, R2<List<String>, String>> LANGUAGE_ALIASES = aliasInfo.get("language");
32     private static final XLikelySubtags DEFAULT = new XLikelySubtags(SDI.getLikelySubtags(), true);
33 
getDefault()34     public static final XLikelySubtags getDefault() {
35         return DEFAULT;
36     }
37 
38     private static final boolean SHORT = false;
39 
40     static abstract class Maker {
make()41         abstract <V> V make();
42 
43         @SuppressWarnings("unchecked")
getSubtable(Map<K, V> langTable, final K language)44         public <K, V> V getSubtable(Map<K, V> langTable, final K language) {
45             V scriptTable = langTable.get(language);
46             if (scriptTable == null) {
47                 langTable.put(language, scriptTable = (V) make());
48             }
49             return scriptTable;
50         }
51 
52         static final Maker HASHMAP = new Maker() {
53             @SuppressWarnings("unchecked")
54             public Map<Object, Object> make() {
55                 return new HashMap<>();
56             }
57         };
58 
59         static final Maker TREEMAP = new Maker() {
60             @SuppressWarnings("unchecked")
61             public Map<Object, Object> make() {
62                 return new TreeMap<>();
63             }
64         };
65     }
66 
67     public static class LSR {
68         public final String language;
69         public final String script;
70         public final String region;
71 
from(String language, String script, String region)72         public static LSR from(String language, String script, String region) {
73             return new LSR(language, script, region);
74         }
75 
from(ULocale locale)76         public static LSR from(ULocale locale) {
77             return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
78         }
79 
fromMaximalized(ULocale locale)80         public static LSR fromMaximalized(ULocale locale) {
81             return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
82         }
83 
fromMaximalized(String language, String script, String region)84         public static LSR fromMaximalized(String language, String script, String region) {
85             String canonicalLanguage = getCanonical(LANGUAGE_ALIASES.get(language));
86             // hack
87             if (language.equals("mo")) {
88                 canonicalLanguage = "ro";
89             }
90             String canonicalRegion = getCanonical(REGION_ALIASES.get(region));
91 
92             return DEFAULT.maximize(
93                 canonicalLanguage == null ? language : canonicalLanguage,
94                 script,
95                 canonicalRegion == null ? region : canonicalRegion);
96         }
97 
LSR(String language, String script, String region)98         public LSR(String language, String script, String region) {
99             this.language = language;
100             this.script = script;
101             this.region = region;
102         }
103 
104         @Override
toString()105         public String toString() {
106             StringBuilder result = new StringBuilder(language);
107             if (!script.isEmpty()) {
108                 result.append('-').append(script);
109             }
110             if (!region.isEmpty()) {
111                 result.append('-').append(region);
112             }
113             return result.toString();
114         }
115 
replace(String language2, String script2, String region2)116         public LSR replace(String language2, String script2, String region2) {
117             if (language2 == null && script2 == null && region2 == null) return this;
118             return new LSR(
119                 language2 == null ? language : language2,
120                 script2 == null ? script : script2,
121                 region2 == null ? region : region2);
122         }
123 
124         @Override
equals(Object obj)125         public boolean equals(Object obj) {
126             LSR other = (LSR) obj;
127             return language.equals(other.language)
128                 && script.equals(other.script)
129                 && region.equals(other.region);
130         }
131 
132         @Override
hashCode()133         public int hashCode() {
134             return Objects.hash(language, script, region);
135         }
136     }
137 
138     final Map<String, Map<String, Map<String, LSR>>> langTable;
139 
XLikelySubtags(Map<String, String> rawData, boolean skipNoncanonical)140     public XLikelySubtags(Map<String, String> rawData, boolean skipNoncanonical) {
141         this.langTable = init(rawData, skipNoncanonical);
142     }
143 
init(final Map<String, String> rawData, boolean skipNoncanonical)144     private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData, boolean skipNoncanonical) {
145         // prepare alias info. We want a mapping from the canonical form to all aliases
146 
147         Multimap<String, String> canonicalToAliasLanguage = HashMultimap.create();
148         getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
149 
150         // Don't bother with script; there are none
151 
152         Multimap<String, String> canonicalToAliasRegion = HashMultimap.create();
153         getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
154 
155         Maker maker = Maker.TREEMAP;
156         Map<String, Map<String, Map<String, LSR>>> result = maker.make();
157         LanguageTagParser ltp = new LanguageTagParser();
158         Splitter bar = Splitter.on('_');
159         int last = -1;
160         // set the base data
161         Map<LSR, LSR> internCache = new HashMap<>();
162         for (Entry<String, String> sourceTarget : rawData.entrySet()) {
163             ltp.set(sourceTarget.getKey());
164             final String language = ltp.getLanguage();
165             final String script = ltp.getScript();
166             final String region = ltp.getRegion();
167 
168             ltp.set(sourceTarget.getValue());
169             String languageTarget = ltp.getLanguage();
170             final String scriptTarget = ltp.getScript();
171             final String regionTarget = ltp.getRegion();
172 
173             set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
174             // now add aliases
175             Collection<String> languageAliases = canonicalToAliasLanguage.get(language);
176             if (languageAliases.isEmpty()) {
177                 languageAliases = Collections.singleton(language);
178             }
179             Collection<String> regionAliases = canonicalToAliasRegion.get(region);
180             if (regionAliases.isEmpty()) {
181                 regionAliases = Collections.singleton(region);
182             }
183             for (String languageAlias : languageAliases) {
184                 for (String regionAlias : regionAliases) {
185                     if (languageAlias.equals(language) && regionAlias.equals(region)) {
186                         continue;
187                     }
188                     set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
189                 }
190             }
191         }
192         // hack
193         set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
194 
195         // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
196         // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
197 
198         // so und-Latn-GH   =>  ak-Latn-GH
199         Map<String, Map<String, LSR>> undScriptMap = result.get("und");
200         Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
201         for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
202             final LSR value = regionEntry.getValue();
203             set(result, "und", value.script, value.region, value);
204         }
205         //
206         // check that every level has "" (or "und")
207         if (!result.containsKey("und")) {
208             throw new IllegalArgumentException("failure: base");
209         }
210         for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
211             String lang = langEntry.getKey();
212             final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
213             if (!scriptMap.containsKey("")) {
214                 throw new IllegalArgumentException("failure: " + lang);
215             }
216             for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
217                 String script = scriptEntry.getKey();
218                 final Map<String, LSR> regionMap = scriptEntry.getValue();
219                 if (!regionMap.containsKey("")) {
220                     throw new IllegalArgumentException("failure: " + lang + "-" + script);
221                 }
222 //                for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
223 //                    String region = regionEntry.getKey();
224 //                    LSR value = regionEntry.getValue();
225 //                }
226             }
227         }
228         return result;
229     }
230 
getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias)231     private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
232         for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
233             final String alias = e.getKey();
234             if (alias.contains("_")) {
235                 continue; // only do simple aliasing
236             }
237             String canonical = getCanonical(e.getValue());
238             canonicalToAlias.put(canonical, alias);
239         }
240     }
241 
getCanonical(R2<List<String>, String> aliasAndReason)242     private static String getCanonical(R2<List<String>, String> aliasAndReason) {
243         if (aliasAndReason == null) {
244             return null;
245         }
246         if (aliasAndReason.get1().equals("overlong")) {
247             return null;
248         }
249         List<String> value = aliasAndReason.get0();
250         if (value.size() != 1) {
251             return null;
252         }
253         final String canonical = value.iterator().next();
254         if (canonical.contains("_")) {
255             return null; // only do simple aliasing
256         }
257         return canonical;
258     }
259 
set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache)260     private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
261         final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
262         LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
263         LSR oldValue = internCache.get(newValue);
264         if (oldValue == null) {
265             internCache.put(newValue, newValue);
266             oldValue = newValue;
267         }
268         set(langTable, language, script, region, oldValue);
269     }
270 
set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue)271     private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
272         Map<String, Map<String, LSR>> scriptTable = Maker.TREEMAP.getSubtable(langTable, language);
273         Map<String, LSR> regionTable = Maker.TREEMAP.getSubtable(scriptTable, script);
274         LSR oldValue = regionTable.get(region);
275         if (oldValue != null) {
276             int debug = 0;
277         }
278         regionTable.put(region, newValue);
279     }
280 
281     /**
282      * Convenience methods
283      * @param source
284      * @return
285      */
maximize(String source)286     public LSR maximize(String source) {
287         return maximize(ULocale.forLanguageTag(source));
288     }
289 
maximize(ULocale source)290     public LSR maximize(ULocale source) {
291         return maximize(source.getLanguage(), source.getScript(), source.getCountry());
292     }
293 
maximize(LSR source)294     public LSR maximize(LSR source) {
295         return maximize(source.language, source.script, source.region);
296     }
297 
298 //    public static ULocale addLikelySubtags(ULocale loc) {
299 //
300 //    }
301 
302     /**
303      * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
304      */
maximize(String language, String script, String region)305     public LSR maximize(String language, String script, String region) {
306         int retainOldMask = 0;
307         Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
308         if (scriptTable == null) { // cannot happen if language == "und"
309             retainOldMask |= 4;
310             scriptTable = langTable.get("und");
311         } else if (!language.equals("und")) {
312             retainOldMask |= 4;
313         }
314 
315         if (script.equals("Zzzz")) {
316             script = "";
317         }
318         Map<String, LSR> regionTable = scriptTable.get(script);
319         if (regionTable == null) { // cannot happen if script == ""
320             retainOldMask |= 2;
321             regionTable = scriptTable.get("");
322         } else if (!script.isEmpty()) {
323             retainOldMask |= 2;
324         }
325 
326         if (region.equals("ZZ")) {
327             region = "";
328         }
329         LSR result = regionTable.get(region);
330         if (result == null) { // cannot happen if region == ""
331             retainOldMask |= 1;
332             result = regionTable.get("");
333             if (result == null) {
334                 return null;
335             }
336         } else if (!region.isEmpty()) {
337             retainOldMask |= 1;
338         }
339 
340         switch (retainOldMask) {
341         default:
342         case 0:
343             return result;
344         case 1:
345             return result.replace(null, null, region);
346         case 2:
347             return result.replace(null, script, null);
348         case 3:
349             return result.replace(null, script, region);
350         case 4:
351             return result.replace(language, null, null);
352         case 5:
353             return result.replace(language, null, region);
354         case 6:
355             return result.replace(language, script, null);
356         case 7:
357             return result.replace(language, script, region);
358         }
359     }
360 
minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor)361     private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
362         LSR result = maximize(languageIn, scriptIn, regionIn);
363 
364         // We could try just a series of checks, like:
365         // LSR result2 = addLikelySubtags(languageIn, "", "");
366         // if result.equals(result2) return result2;
367         // However, we can optimize 2 of the cases:
368         //   (languageIn, "", "")
369         //   (languageIn, "", regionIn)
370 
371         Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
372 
373         Map<String, LSR> regionTable0 = scriptTable.get("");
374         LSR value00 = regionTable0.get("");
375         boolean favorRegionOk = false;
376         if (result.script.equals(value00.script)) { //script is default
377             if (result.region.equals(value00.region)) {
378                 return result.replace(null, "", "");
379             } else if (fieldToFavor == fieldToFavor.FAVOR_REGION) {
380                 return result.replace(null, "", null);
381             } else {
382                 favorRegionOk = true;
383             }
384         }
385 
386         // The last case is not as easy to optimize.
387         // Maybe do later, but for now use the straightforward code.
388         LSR result2 = maximize(languageIn, scriptIn, "");
389         if (result2.equals(result)) {
390             return result.replace(null, null, "");
391         } else if (favorRegionOk) {
392             return result.replace(null, "", null);
393         }
394         return result;
395     }
396 
show(Map<String, V> map, String indent, StringBuilder output)397     private static <V> StringBuilder show(Map<String, V> map, String indent, StringBuilder output) {
398         String first = indent.isEmpty() ? "" : "\t";
399         for (Entry<String, V> e : map.entrySet()) {
400             String key = e.getKey();
401             V value = e.getValue();
402             output.append(first + (key.isEmpty() ? "∅" : key));
403             if (value instanceof Map) {
404                 show((Map) value, indent + "\t", output);
405             } else {
406                 output.append("\t" + CldrUtility.toString(value)).append("\n");
407             }
408             first = indent;
409         }
410         return output;
411     }
412 
413     @Override
toString()414     public String toString() {
415         return show(langTable, "", new StringBuilder()).toString();
416     }
417 
main(String[] args)418     public static void main(String[] args) {
419         System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
420 
421         SupplementalDataInfo sdi = SDI;
422         final Map<String, String> rawData = sdi.getLikelySubtags();
423         XLikelySubtags ls = XLikelySubtags.getDefault();
424         System.out.println(ls);
425         ls.maximize(new ULocale("iw"));
426         if (true) return;
427 
428         LanguageTagParser ltp = new LanguageTagParser();
429 //        String[][] tests = {
430 //            {"und", "en-Latn-US"},
431 //            {"und-TW", "en-Latn-US"},
432 //            {"und-CN", "en-Latn-US"},
433 //            {"und-Hans", "en-Latn-US"},
434 //            {"und-Hans-CN", "en-Latn-US"},
435 //            {"und-Hans-TW", "en-Latn-US"},
436 //            {"und-Hant", "en-Latn-US"},
437 //            {"und-Hant-TW", "en-Latn-US"},
438 //            {"und-Hant-CN", "en-Latn-US"},
439 //            {"zh-TW", "en-Latn-US"},
440 //            {"zh-CN", "en-Latn-US"},
441 //            {"zh-Hans", "en-Latn-US"},
442 //            {"zh-Hans-CN", "en-Latn-US"},
443 //            {"zh-Hans-TW", "en-Latn-US"},
444 //            {"zh-Hant", "en-Latn-US"},
445 //            {"zh-Hant-TW", "en-Latn-US"},
446 //            {"zh-Hant-CN", "en-Latn-US"},
447 //        };
448 //        for (String[] sourceTarget : tests) {
449 //            ltp.set(sourceTarget[0]);
450 //            LSR result = ls.addLikelySubtags(ltp.getLanguage(), ltp.getScript(), ltp.getRegion());
451 //            ltp.set(sourceTarget[1]);
452 //            ULocale sourceLocale = ULocale.forLanguageTag(sourceTarget[0]);
453 //            ULocale max = ULocale.addLikelySubtags(sourceLocale);
454 //            boolean same = max.toLanguageTag().equals(result.toString());
455 //            System.out.println(sourceTarget[0] + "\t" + sourceTarget[1] + "\t" + result + (same ? "" : "\t≠" + max.toLanguageTag()));
456 //        }
457 
458         // get all the languages, scripts, and regions
459         Set<String> languages = new TreeSet<String>();
460         Set<String> scripts = new TreeSet<String>();
461         Set<String> regions = new TreeSet<String>();
462         Counter<String> languageCounter = new Counter<>();
463         Counter<String> scriptCounter = new Counter<>();
464         Counter<String> regionCounter = new Counter<>();
465 
466         for (Entry<String, String> sourceTarget : rawData.entrySet()) {
467             final String source = sourceTarget.getKey();
468             ltp.set(source);
469             languages.add(ltp.getLanguage());
470             scripts.add(ltp.getScript());
471             regions.add(ltp.getRegion());
472             final String target = sourceTarget.getValue();
473             ltp.set(target);
474             add(target, languageCounter, ltp.getLanguage(), 1);
475             add(target, scriptCounter, ltp.getScript(), 1);
476             add(target, regionCounter, ltp.getRegion(), 1);
477         }
478         ltp.set("und-Zzzz-ZZ");
479         languageCounter.add(ltp.getLanguage(), 1);
480         scriptCounter.add(ltp.getScript(), 1);
481         regionCounter.add(ltp.getRegion(), 1);
482 
483         if (SHORT) {
484             removeSingletons(languages, languageCounter);
485             removeSingletons(scripts, scriptCounter);
486             removeSingletons(regions, regionCounter);
487         }
488 
489         System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
490         System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
491         System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
492 
493         int maxCount = Integer.MAX_VALUE;
494 
495         int counter = maxCount;
496         long tempTime = System.nanoTime();
497         newMax: for (String language : languages) {
498             for (String script : scripts) {
499                 for (String region : regions) {
500                     if (--counter < 0) break newMax;
501                     LSR result = ls.maximize(language, script, region);
502                 }
503             }
504         }
505         long newMaxTime = System.nanoTime() - tempTime;
506         System.out.println("newMaxTime: " + newMaxTime);
507 
508         counter = maxCount;
509         tempTime = System.nanoTime();
510         newMin: for (String language : languages) {
511             for (String script : scripts) {
512                 for (String region : regions) {
513                     if (--counter < 0) break newMin;
514                     LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
515                 }
516             }
517         }
518         long newMinTime = System.nanoTime() - tempTime;
519         System.out.println("newMinTime: " + newMinTime);
520 
521         // *****
522 
523         tempTime = System.nanoTime();
524         counter = maxCount;
525         oldMax: for (String language : languages) {
526             for (String script : scripts) {
527                 for (String region : regions) {
528                     if (--counter < 0) break oldMax;
529                     ULocale tempLocale = new ULocale(language, script, region);
530                     ULocale max = ULocale.addLikelySubtags(tempLocale);
531                 }
532             }
533         }
534         long oldMaxTime = System.nanoTime() - tempTime;
535         System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime / newMaxTime + "x");
536 
537         counter = maxCount;
538         tempTime = System.nanoTime();
539         oldMin: for (String language : languages) {
540             for (String script : scripts) {
541                 for (String region : regions) {
542                     if (--counter < 0) break oldMin;
543                     ULocale tempLocale = new ULocale(language, script, region);
544                     ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
545                 }
546             }
547         }
548         long oldMinTime = System.nanoTime() - tempTime;
549         System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime / newMinTime + "x");
550 
551         counter = maxCount;
552         testMain: for (String language : languages) {
553             System.out.println(language);
554             int tests = 0;
555             for (String script : scripts) {
556                 for (String region : regions) {
557                     ++tests;
558                     if (--counter < 0) break testMain;
559                     LSR maxNew = ls.maximize(language, script, region);
560                     LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
561                     LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
562 
563                     ULocale tempLocale = new ULocale(language, script, region);
564                     ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
565                     ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
566                     ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
567 
568                     // check values
569                     final String maxNewS = String.valueOf(maxNew);
570                     final String maxOldS = maxOld.toLanguageTag();
571                     boolean sameMax = maxOldS.equals(maxNewS);
572 
573                     final String minNewSS = String.valueOf(minNewS);
574                     final String minOldSS = minOldS.toLanguageTag();
575                     boolean sameMinS = minNewSS.equals(minOldSS);
576 
577                     final String minNewRS = String.valueOf(minNewR);
578                     final String minOldRS = minOldS.toLanguageTag();
579                     boolean sameMinR = minNewRS.equals(minOldRS);
580 
581                     if (sameMax && sameMinS && sameMinR) continue;
582                     System.out.println(new LSR(language, script, region)
583                         + "\tmax: " + maxNew
584                         + (sameMax ? "" : "≠" + maxOldS)
585                         + "\tminS: " + minNewS
586                         + (sameMinS ? "" : "≠" + minOldS)
587                         + "\tminR: " + minNewR
588                         + (sameMinR ? "" : "≠" + minOldR));
589                 }
590             }
591             System.out.println(language + ": " + tests);
592         }
593     }
594 
add(String target, Counter<String> languageCounter, String language, int count)595     private static void add(String target, Counter<String> languageCounter, String language, int count) {
596         if (language.equals("aa")) {
597             int debug = 0;
598         }
599         languageCounter.add(language, count);
600     }
601 
removeSingletons(Set<String> languages, Counter<String> languageCounter)602     private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
603         for (String s : languageCounter) {
604             final long count = languageCounter.get(s);
605             if (count <= 1) {
606                 languages.remove(s);
607             }
608         }
609     }
610 }
611