1 /* 2 * Copyright (C) 2011 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.i18n.phonenumbers.geocoding; 18 19 import java.io.Externalizable; 20 import java.io.IOException; 21 import java.io.ObjectInput; 22 import java.io.ObjectOutput; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Collections; 26 import java.util.HashMap; 27 import java.util.HashSet; 28 import java.util.List; 29 import java.util.Map; 30 import java.util.Set; 31 import java.util.SortedMap; 32 import java.util.SortedSet; 33 import java.util.TreeSet; 34 35 /** 36 * A utility which knows the data files that are available for the geocoder to use. The data files 37 * contain mappings from phone number prefixes to text descriptions, and are organized by country 38 * calling code and language that the text descriptions are in. 39 * 40 * @author Shaopeng Jia 41 */ 42 public class MappingFileProvider implements Externalizable { 43 private int numOfEntries = 0; 44 private int[] countryCallingCodes; 45 private List<Set<String>> availableLanguages; 46 private static final Map<String, String> LOCALE_NORMALIZATION_MAP; 47 48 static { 49 Map<String, String> normalizationMap = new HashMap<String, String>(); 50 normalizationMap.put("zh_TW", "zh_Hant"); 51 normalizationMap.put("zh_HK", "zh_Hant"); 52 normalizationMap.put("zh_MO", "zh_Hant"); 53 54 LOCALE_NORMALIZATION_MAP = Collections.unmodifiableMap(normalizationMap); 55 } 56 57 /** 58 * Creates an empty {@link MappingFileProvider}. The default constructor is necessary for 59 * implementing {@link Externalizable}. The empty provider could later be populated by 60 * {@link #readFileConfigs(java.util.SortedMap)} or {@link #readExternal(java.io.ObjectInput)}. 61 */ MappingFileProvider()62 public MappingFileProvider() { 63 } 64 65 /** 66 * Initializes an {@link MappingFileProvider} with {@code availableDataFiles}. 67 * 68 * @param availableDataFiles a map from country calling codes to sets of languages in which data 69 * files are available for the specific country calling code. The map is sorted in ascending 70 * order of the country calling codes as integers. 71 */ readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles)72 public void readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles) { 73 numOfEntries = availableDataFiles.size(); 74 countryCallingCodes = new int[numOfEntries]; 75 availableLanguages = new ArrayList<Set<String>>(numOfEntries); 76 int index = 0; 77 for (int countryCallingCode : availableDataFiles.keySet()) { 78 countryCallingCodes[index++] = countryCallingCode; 79 availableLanguages.add(new HashSet<String>(availableDataFiles.get(countryCallingCode))); 80 } 81 } 82 83 /** 84 * Supports Java Serialization. 85 */ readExternal(ObjectInput objectInput)86 public void readExternal(ObjectInput objectInput) throws IOException { 87 numOfEntries = objectInput.readInt(); 88 if (countryCallingCodes == null || countryCallingCodes.length < numOfEntries) { 89 countryCallingCodes = new int[numOfEntries]; 90 } 91 if (availableLanguages == null) { 92 availableLanguages = new ArrayList<Set<String>>(); 93 } 94 for (int i = 0; i < numOfEntries; i++) { 95 countryCallingCodes[i] = objectInput.readInt(); 96 int numOfLangs = objectInput.readInt(); 97 Set<String> setOfLangs = new HashSet<String>(); 98 for (int j = 0; j < numOfLangs; j++) { 99 setOfLangs.add(objectInput.readUTF()); 100 } 101 availableLanguages.add(setOfLangs); 102 } 103 } 104 105 /** 106 * Supports Java Serialization. 107 */ writeExternal(ObjectOutput objectOutput)108 public void writeExternal(ObjectOutput objectOutput) throws IOException { 109 objectOutput.writeInt(numOfEntries); 110 for (int i = 0; i < numOfEntries; i++) { 111 objectOutput.writeInt(countryCallingCodes[i]); 112 Set<String> setOfLangs = availableLanguages.get(i); 113 int numOfLangs = setOfLangs.size(); 114 objectOutput.writeInt(numOfLangs); 115 for (String lang : setOfLangs) { 116 objectOutput.writeUTF(lang); 117 } 118 } 119 } 120 121 /** 122 * Returns a string representing the data in this class. The string contains one line for each 123 * country calling code. The country calling code is followed by a '|' and then a list of 124 * comma-separated languages sorted in ascending order. 125 */ 126 @Override toString()127 public String toString() { 128 StringBuilder output = new StringBuilder(); 129 for (int i = 0; i < numOfEntries; i++) { 130 output.append(countryCallingCodes[i]); 131 output.append('|'); 132 SortedSet<String> sortedSetOfLangs = new TreeSet<String>(availableLanguages.get(i)); 133 for (String lang : sortedSetOfLangs) { 134 output.append(lang); 135 output.append(','); 136 } 137 output.append('\n'); 138 } 139 return output.toString(); 140 } 141 142 /** 143 * Gets the name of the file that contains the mapping data for the {@code countryCallingCode} in 144 * the language specified. 145 * 146 * @param countryCallingCode the country calling code of phone numbers which the data file 147 * contains 148 * @param language two-letter lowercase ISO language codes as defined by ISO 639-1 149 * @param script four-letter titlecase (the first letter is uppercase and the rest of the letters 150 * are lowercase) ISO script codes as defined in ISO 15924 151 * @param region two-letter uppercase ISO country codes as defined by ISO 3166-1 152 * @return the name of the file, or empty string if no such file can be found 153 */ getFileName(int countryCallingCode, String language, String script, String region)154 String getFileName(int countryCallingCode, String language, String script, String region) { 155 if (language.length() == 0) { 156 return ""; 157 } 158 int index = Arrays.binarySearch(countryCallingCodes, countryCallingCode); 159 if (index < 0) { 160 return ""; 161 } 162 Set<String> setOfLangs = availableLanguages.get(index); 163 if (setOfLangs.size() > 0) { 164 String languageCode = findBestMatchingLanguageCode(setOfLangs, language, script, region); 165 if (languageCode.length() > 0) { 166 StringBuilder fileName = new StringBuilder(); 167 fileName.append(countryCallingCode).append('_').append(languageCode); 168 return fileName.toString(); 169 } 170 } 171 return ""; 172 } 173 findBestMatchingLanguageCode( Set<String> setOfLangs, String language, String script, String region)174 private String findBestMatchingLanguageCode( 175 Set<String> setOfLangs, String language, String script, String region) { 176 StringBuilder fullLocale = constructFullLocale(language, script, region); 177 String fullLocaleStr = fullLocale.toString(); 178 String normalizedLocale = LOCALE_NORMALIZATION_MAP.get(fullLocaleStr); 179 if (normalizedLocale != null) { 180 if (setOfLangs.contains(normalizedLocale)) { 181 return normalizedLocale; 182 } 183 } 184 if (setOfLangs.contains(fullLocaleStr)) { 185 return fullLocaleStr; 186 } 187 188 if (onlyOneOfScriptOrRegionIsEmpty(script, region)) { 189 if (setOfLangs.contains(language)) { 190 return language; 191 } 192 } else if (script.length() > 0 && region.length() > 0) { 193 StringBuilder langWithScript = new StringBuilder(language).append('_').append(script); 194 String langWithScriptStr = langWithScript.toString(); 195 if (setOfLangs.contains(langWithScriptStr)) { 196 return langWithScriptStr; 197 } 198 199 StringBuilder langWithRegion = new StringBuilder(language).append('_').append(region); 200 String langWithRegionStr = langWithRegion.toString(); 201 if (setOfLangs.contains(langWithRegionStr)) { 202 return langWithRegionStr; 203 } 204 205 if (setOfLangs.contains(language)) { 206 return language; 207 } 208 } 209 return ""; 210 } 211 onlyOneOfScriptOrRegionIsEmpty(String script, String region)212 private boolean onlyOneOfScriptOrRegionIsEmpty(String script, String region) { 213 return (script.length() == 0 && region.length() > 0) || 214 (region.length() == 0 && script.length() > 0); 215 } 216 constructFullLocale(String language, String script, String region)217 private StringBuilder constructFullLocale(String language, String script, String region) { 218 StringBuilder fullLocale = new StringBuilder(language); 219 appendSubsequentLocalePart(script, fullLocale); 220 appendSubsequentLocalePart(region, fullLocale); 221 return fullLocale; 222 } 223 appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale)224 private void appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale) { 225 if (subsequentLocalePart.length() > 0) { 226 fullLocale.append('_').append(subsequentLocalePart); 227 } 228 } 229 } 230