1 package org.unicode.cldr.tool; 2 3 import java.util.Set; 4 5 import org.unicode.cldr.util.CLDRFile; 6 import org.unicode.cldr.util.CLDRPaths; 7 import org.unicode.cldr.util.Counter; 8 import org.unicode.cldr.util.Factory; 9 import org.unicode.cldr.util.Pair; 10 import org.unicode.cldr.util.SupplementalDataInfo; 11 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 12 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 13 14 public class GetLanguageData { 15 SupplementalDataInfo sdata = SupplementalDataInfo 16 .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 17 Factory cldrFactory = Factory 18 .make(CLDRPaths.MAIN_DIRECTORY, ".*"); 19 CLDRFile english = cldrFactory.make("en", true); 20 Set<String> euCountries = sdata.getContained("EU"); 21 Counter<String> languageToGdp = new Counter<String>(); 22 Counter<String> languageToPop = new Counter<String>(); 23 main(String[] args)24 public static void main(String[] args) { 25 new GetLanguageData().run(); 26 } 27 run()28 private void run() { 29 findSuspectData(); 30 System.out.println("Code\tLang\tLpop\tApprox. Gdp"); 31 for (String language : sdata.getLanguages()) { 32 final long pop = languageToPop.getCount(language); 33 System.out.print(language + "\t" + english.getName(language)); 34 if (pop > 0) { 35 Pair<OfficialStatus, String> status = isOfficialLanguageOfEUCountry(language); 36 System.out.print("\t" + pop // 37 + "\t" + languageToGdp.getCount(language) // 38 + "\t" + (status.getFirst().isOfficial() ? status.getFirst() : "") // 39 + "\t" + status.getSecond() // 40 ); 41 } 42 System.out.println(); 43 } 44 } 45 findSuspectData()46 private void findSuspectData() { 47 Set<String> territories = sdata.getTerritoriesWithPopulationData(); 48 for (String territory : territories) { 49 double scale = 1.0; 50 final PopulationData populationDataForTerritory = sdata 51 .getPopulationDataForTerritory(territory); 52 final double gdp = populationDataForTerritory.getGdp(); 53 double territoryPop = populationDataForTerritory.getPopulation(); 54 double langPop = 0; 55 double officialLangPop = 0; 56 Set<String> languages = sdata.getLanguagesForTerritoryWithPopulationData(territory); 57 for (String language : languages) { 58 if (language.equals("tl")) continue; 59 PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory); 60 langPop += pop2.getPopulation(); 61 if (pop2.getOfficialStatus().isOfficial()) { 62 officialLangPop += pop2.getPopulation(); 63 } 64 } 65 final double missing = 0.75 * territoryPop - langPop; 66 if (missing > 0) { 67 System.out.println(territory // 68 + "\t" + english.getName("territory", territory) // 69 + "\t" + territoryPop // 70 + "\t" + langPop // 71 + "\t" + gdp // 72 ); 73 scale = 1 + missing / officialLangPop; 74 // scale up the official so that 75 // official + non-official = 70% of total 76 langPop = territoryPop * 0.75; 77 System.out.println("\tScaling " + territory + "\t" + scale * 100 + "%"); 78 } 79 long langUnknown = (long) territoryPop; 80 for (String language : languages) { 81 if (language.equals("tl")) continue; 82 PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory); 83 double langPop2 = pop2.getPopulation(); 84 if (pop2.getOfficialStatus().isOfficial()) { 85 langPop2 *= scale; 86 } 87 languageToGdp.add(language, (long) (gdp * langPop2 / territoryPop)); 88 languageToPop.add(language, (long) (langPop2)); 89 langUnknown -= langPop2; 90 } 91 if (langUnknown > 0) { 92 languageToGdp.add("und", (long) (gdp * langUnknown / territoryPop)); 93 languageToPop.add("und", (long) (langUnknown)); 94 } 95 } 96 } 97 isOfficialLanguageOfEUCountry(String language)98 private Pair<OfficialStatus, String> isOfficialLanguageOfEUCountry(String language) { 99 OfficialStatus bestStatus = OfficialStatus.unknown; 100 String eu = ""; 101 double bestEuPop = 0; 102 Set<String> territories = sdata.getTerritoriesForPopulationData(language); 103 for (String territory : territories) { 104 PopulationData pop = sdata.getLanguageAndTerritoryPopulationData(language, territory); 105 OfficialStatus status = pop.getOfficialStatus(); 106 if (bestStatus.compareTo(status) < 0) { 107 bestStatus = status; 108 } 109 if (status.isMajor() && euCountries.contains(territory)) { 110 if (pop.getLiteratePopulation() > bestEuPop) { 111 bestEuPop = pop.getLiteratePopulation(); 112 eu = territory; 113 } 114 } 115 } 116 return Pair.of(bestStatus, eu); 117 } 118 } 119