• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.i18n.phonenumbers.geocoding;
18 
19 import java.io.Externalizable;
20 import java.io.IOException;
21 import java.io.ObjectInput;
22 import java.io.ObjectOutput;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
34 
35 /**
36  * A utility which knows the data files that are available for the geocoder to use. The data files
37  * contain mappings from phone number prefixes to text descriptions, and are organized by country
38  * calling code and language that the text descriptions are in.
39  *
40  * @author Shaopeng Jia
41  */
42 public class MappingFileProvider implements Externalizable {
43   private int numOfEntries = 0;
44   private int[] countryCallingCodes;
45   private List<Set<String>> availableLanguages;
46   private static final Map<String, String> LOCALE_NORMALIZATION_MAP;
47 
48   static {
49     Map<String, String> normalizationMap = new HashMap<String, String>();
50     normalizationMap.put("zh_TW", "zh_Hant");
51     normalizationMap.put("zh_HK", "zh_Hant");
52     normalizationMap.put("zh_MO", "zh_Hant");
53 
54     LOCALE_NORMALIZATION_MAP = Collections.unmodifiableMap(normalizationMap);
55   }
56 
57   /**
58    * Creates an empty {@link MappingFileProvider}. The default constructor is necessary for
59    * implementing {@link Externalizable}. The empty provider could later be populated by
60    * {@link #readFileConfigs(java.util.SortedMap)} or {@link #readExternal(java.io.ObjectInput)}.
61    */
MappingFileProvider()62   public MappingFileProvider() {
63   }
64 
65   /**
66    * Initializes an {@link MappingFileProvider} with {@code availableDataFiles}.
67    *
68    * @param availableDataFiles  a map from country calling codes to sets of languages in which data
69    *     files are available for the specific country calling code. The map is sorted in ascending
70    *     order of the country calling codes as integers.
71    */
readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles)72   public void readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles) {
73     numOfEntries = availableDataFiles.size();
74     countryCallingCodes = new int[numOfEntries];
75     availableLanguages = new ArrayList<Set<String>>(numOfEntries);
76     int index = 0;
77     for (int countryCallingCode : availableDataFiles.keySet()) {
78       countryCallingCodes[index++] = countryCallingCode;
79       availableLanguages.add(new HashSet<String>(availableDataFiles.get(countryCallingCode)));
80     }
81   }
82 
83   /**
84    * Supports Java Serialization.
85    */
readExternal(ObjectInput objectInput)86   public void readExternal(ObjectInput objectInput) throws IOException {
87     numOfEntries = objectInput.readInt();
88     if (countryCallingCodes == null || countryCallingCodes.length < numOfEntries) {
89       countryCallingCodes = new int[numOfEntries];
90     }
91     if (availableLanguages == null) {
92       availableLanguages = new ArrayList<Set<String>>();
93     }
94     for (int i = 0; i < numOfEntries; i++) {
95       countryCallingCodes[i] = objectInput.readInt();
96       int numOfLangs = objectInput.readInt();
97       Set<String> setOfLangs = new HashSet<String>();
98       for (int j = 0; j < numOfLangs; j++) {
99         setOfLangs.add(objectInput.readUTF());
100       }
101       availableLanguages.add(setOfLangs);
102     }
103   }
104 
105   /**
106    * Supports Java Serialization.
107    */
writeExternal(ObjectOutput objectOutput)108   public void writeExternal(ObjectOutput objectOutput) throws IOException {
109     objectOutput.writeInt(numOfEntries);
110     for (int i = 0; i < numOfEntries; i++) {
111       objectOutput.writeInt(countryCallingCodes[i]);
112       Set<String> setOfLangs = availableLanguages.get(i);
113       int numOfLangs = setOfLangs.size();
114       objectOutput.writeInt(numOfLangs);
115       for (String lang : setOfLangs) {
116         objectOutput.writeUTF(lang);
117       }
118     }
119   }
120 
121   /**
122    * Returns a string representing the data in this class. The string contains one line for each
123    * country calling code. The country calling code is followed by a '|' and then a list of
124    * comma-separated languages sorted in ascending order.
125    */
126   @Override
toString()127   public String toString() {
128     StringBuilder output = new StringBuilder();
129     for (int i = 0; i < numOfEntries; i++) {
130       output.append(countryCallingCodes[i]);
131       output.append('|');
132       SortedSet<String> sortedSetOfLangs = new TreeSet<String>(availableLanguages.get(i));
133       for (String lang : sortedSetOfLangs) {
134         output.append(lang);
135         output.append(',');
136       }
137       output.append('\n');
138     }
139     return output.toString();
140   }
141 
142   /**
143    * Gets the name of the file that contains the mapping data for the {@code countryCallingCode} in
144    * the language specified.
145    *
146    * @param countryCallingCode  the country calling code of phone numbers which the data file
147    *     contains
148    * @param language  two-letter lowercase ISO language codes as defined by ISO 639-1
149    * @param script  four-letter titlecase (the first letter is uppercase and the rest of the letters
150    *     are lowercase) ISO script codes as defined in ISO 15924
151    * @param region  two-letter uppercase ISO country codes as defined by ISO 3166-1
152    * @return  the name of the file, or empty string if no such file can be found
153    */
getFileName(int countryCallingCode, String language, String script, String region)154   String getFileName(int countryCallingCode, String language, String script, String region) {
155     if (language.length() == 0) {
156       return "";
157     }
158     int index = Arrays.binarySearch(countryCallingCodes, countryCallingCode);
159     if (index < 0) {
160       return "";
161     }
162     Set<String> setOfLangs = availableLanguages.get(index);
163     if (setOfLangs.size() > 0) {
164       String languageCode = findBestMatchingLanguageCode(setOfLangs, language, script, region);
165       if (languageCode.length() > 0) {
166         StringBuilder fileName = new StringBuilder();
167         fileName.append(countryCallingCode).append('_').append(languageCode);
168         return fileName.toString();
169       }
170     }
171     return "";
172   }
173 
findBestMatchingLanguageCode( Set<String> setOfLangs, String language, String script, String region)174   private String findBestMatchingLanguageCode(
175       Set<String> setOfLangs, String language, String script, String region) {
176     StringBuilder fullLocale = constructFullLocale(language, script, region);
177     String fullLocaleStr = fullLocale.toString();
178     String normalizedLocale = LOCALE_NORMALIZATION_MAP.get(fullLocaleStr);
179     if (normalizedLocale != null) {
180       if (setOfLangs.contains(normalizedLocale)) {
181         return normalizedLocale;
182       }
183     }
184     if (setOfLangs.contains(fullLocaleStr)) {
185       return fullLocaleStr;
186     }
187 
188     if (onlyOneOfScriptOrRegionIsEmpty(script, region)) {
189       if (setOfLangs.contains(language)) {
190         return language;
191       }
192     } else if (script.length() > 0 && region.length() > 0) {
193       StringBuilder langWithScript = new StringBuilder(language).append('_').append(script);
194       String langWithScriptStr = langWithScript.toString();
195       if (setOfLangs.contains(langWithScriptStr)) {
196         return langWithScriptStr;
197       }
198 
199       StringBuilder langWithRegion = new StringBuilder(language).append('_').append(region);
200       String langWithRegionStr = langWithRegion.toString();
201       if (setOfLangs.contains(langWithRegionStr)) {
202         return langWithRegionStr;
203       }
204 
205       if (setOfLangs.contains(language)) {
206         return language;
207       }
208     }
209     return "";
210   }
211 
onlyOneOfScriptOrRegionIsEmpty(String script, String region)212   private boolean onlyOneOfScriptOrRegionIsEmpty(String script, String region) {
213     return (script.length() == 0 && region.length() > 0) ||
214             (region.length() == 0 && script.length() > 0);
215   }
216 
constructFullLocale(String language, String script, String region)217   private StringBuilder constructFullLocale(String language, String script, String region) {
218     StringBuilder fullLocale = new StringBuilder(language);
219     appendSubsequentLocalePart(script, fullLocale);
220     appendSubsequentLocalePart(region, fullLocale);
221     return fullLocale;
222   }
223 
appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale)224   private void appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale) {
225     if (subsequentLocalePart.length() > 0) {
226       fullLocale.append('_').append(subsequentLocalePart);
227     }
228   }
229 }
230