• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Libphonenumber Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers;
18 
19 import java.io.BufferedOutputStream;
20 import java.io.BufferedReader;
21 import java.io.FileInputStream;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.OutputStream;
27 import java.io.PrintWriter;
28 import java.util.ArrayList;
29 import java.util.HashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.SortedMap;
33 import java.util.TreeMap;
34 import java.util.logging.Logger;
35 
36 /**
37  * Utility class that makes the geocoding data as small as possible. This class assumes the
38  * geocoding data provided as input doesn't contain any gaps thus should not be used with incomplete
39  * data (missing prefixes).
40  * <pre>
41  * Example:        Can be combined as:
42  *   33131|Paris     331|Paris
43  *   33132|Paris     334|Marseille
44  *   3341|Marseille
45  * </pre>
46  *
47  * @author Philippe Liard
48  */
49 public class CombineGeoData {
50   private final InputStream inputStream;
51   private final OutputStream outputStream;
52   private final String outputLineSeparator;
53   private static final Logger LOGGER = Logger.getLogger(CombineGeoData.class.getName());
54 
CombineGeoData(InputStream inputStream, OutputStream outputStream, String lineSeparator)55   public CombineGeoData(InputStream inputStream, OutputStream outputStream, String lineSeparator) {
56     this.inputStream = inputStream;
57     this.outputStream = outputStream;
58     this.outputLineSeparator = lineSeparator;
59   }
60 
CombineGeoData(InputStream inputStream, OutputStream outputStream)61   public CombineGeoData(InputStream inputStream, OutputStream outputStream) {
62     this(inputStream, outputStream, System.getProperty("line.separator"));
63   }
64 
65   /**
66    * Utility class that contains two indexes (start and end).
67    */
68   static class Range {
69     public final int start;
70     public final int end;
71 
Range(int start, int end)72     public Range(int start, int end) {
73       this.start = start;
74       this.end = end;
75     }
76   }
77 
78   /**
79    * Parses the input text file expected to contain lines written as 'prefix|description'. Note that
80    * description can be empty.
81    *
82    * @return the map of phone prefix data parsed.
83    * @throws IOException
84    */
parseInput()85   private SortedMap<String, String> parseInput() throws IOException {
86     SortedMap<String, String> outputMap = new TreeMap<String, String>();
87     BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
88 
89     for (String line; (line = bufferedReader.readLine()) != null; ) {
90       int indexOfPipe = line.indexOf('|');
91       if (indexOfPipe == -1) {
92         continue;
93       }
94       outputMap.put(line.substring(0, indexOfPipe), line.substring(indexOfPipe + 1));
95     }
96     return outputMap;
97   }
98 
99   /**
100    * Creates a sorted array of phone number prefixes as strings from the provided phone number
101    * prefix map.
102    *
103    * @return the array of phone number prefixes sorted by string.
104    */
createSortedPrefixArray(SortedMap<String, String> phonePrefixMap)105   static String[] createSortedPrefixArray(SortedMap<String, String> phonePrefixMap) {
106     String[] sortedPrefixes = new String[phonePrefixMap.size()];
107     phonePrefixMap.keySet().toArray(sortedPrefixes);
108     return sortedPrefixes;
109   }
110 
111   /**
112    * Finds the end index of the range of phone number prefixes starting at the provided index.
113    * A range ends when a different description or prefix divided by 10 is encountered.
114    *
115    * @param prefixes  the array of phone number prefixes sorted by string
116    * @param phonePrefixMap  the map associating phone number prefixes and descriptions
117    * @param start  the start index of the prefixes array
118    * @return  the index of the end of the range starting at the provided index
119    */
findRangeEnd(String[] prefixes, Map<String, String> phonePrefixMap, int start)120   static int findRangeEnd(String[] prefixes, Map<String, String> phonePrefixMap, int start) {
121     String previousPrefix = prefixes[start];
122     int previousPrefixAsInt = Integer.parseInt(previousPrefix);
123     String previousLocation = phonePrefixMap.get(previousPrefix);
124 
125     for (int i = start; i < prefixes.length; i++) {
126       String currentPrefix = prefixes[i];
127       String currentLocation = phonePrefixMap.get(currentPrefix);
128       if (!currentLocation.equals(previousLocation) ||
129           (Integer.parseInt(currentPrefix) / 10 != previousPrefixAsInt / 10)) {
130         return i - 1;
131       }
132     }
133     return prefixes.length - 1;
134   }
135 
136   /**
137    * Splits the provided array of prefixes into an array of ranges. A range contains the start and
138    * end indexes of a set of mappings that share the same description and have the same prefix minus
139    * the last digit.
140    *
141    * @param prefixes  the array of phone number prefixes sorted by string
142    * @param phonePrefixMap  the map associating phone number prefixes and descriptions
143    * @return  the list of ranges
144    */
createRanges(String[] prefixes, Map<String, String> phonePrefixMap)145   static List<Range> createRanges(String[] prefixes, Map<String, String> phonePrefixMap) {
146     List<Range> ranges = new ArrayList<Range>();
147     int index = 0;
148     int phonePrefixMapSize = phonePrefixMap.size();
149 
150     while (index < phonePrefixMapSize) {
151       int rangeEnd = findRangeEnd(prefixes, phonePrefixMap, index);
152       ranges.add(new Range(index, rangeEnd));
153       index = rangeEnd + 1;
154     }
155     return ranges;
156   }
157 
158   /**
159    * Checks whether the provided candidate prefix conflicts with the prefixes contained in the
160    * provided range. A conflict occurs if the provided prefix covers (is a prefix of) one of the
161    * prefixes contained in the range.
162    *
163    * @param prefixes  the array of phone number prefixes sorted by string
164    * @param candidate  the candidate phone number prefix
165    * @param start  the start of the range
166    * @param end  the end of the range
167    * @return  whether the candidate prefix conflicts with the prefixes contained in the range
168    */
findConflict(String[] prefixes, int candidate, int start, int end)169   static boolean findConflict(String[] prefixes, int candidate, int start, int end) {
170     String candidateAsString = String.valueOf(candidate);
171     for (int i = start; i <= end; i++) {
172       String prefix = prefixes[i];
173       if (prefix.startsWith(candidateAsString)) {
174         return true;
175       }
176     }
177     return false;
178   }
179 
180   /**
181    * Checks whether the provided candidate prefix conflicts with the prefixes contained in the
182    * ranges adjacent (before and after the provided range) of the provided range.
183    *
184    * @param ranges  the list of ranges
185    * @param rangeIndex  the index of the range in which the conflict search occurs
186    * @param prefixes  the array of phone number prefixes sorted by string
187    * @param candidate  the candidate phone number prefix
188    * @return  whether a conflict was found in the provided range
189    */
hasConflict(List<Range> ranges, int rangeIndex, String[] prefixes, int candidate)190   static boolean hasConflict(List<Range> ranges, int rangeIndex, String[] prefixes, int candidate) {
191     if (rangeIndex > 0) {
192       Range previousRange = ranges.get(rangeIndex - 1);
193       if (findConflict(prefixes, candidate, previousRange.start, previousRange.end)) {
194         return true;
195       }
196     }
197     if (rangeIndex < ranges.size() - 1) {
198       Range nextRange = ranges.get(rangeIndex + 1);
199       if (findConflict(prefixes, candidate, nextRange.start, nextRange.end)) {
200         return true;
201       }
202     }
203     return false;
204   }
205 
206   /**
207    * Combines the mappings contained in the provided map. A new combined map is returned as a result
208    * in case any combination occurred. Otherwise (if no combination occurred) the same map (same
209    * identity) is returned. Note that this method performs a 'single step' (i.e performs only one
210    * combination iteration).
211    */
combine(SortedMap<String, String> phonePrefixMap)212   static SortedMap<String, String> combine(SortedMap<String, String> phonePrefixMap) {
213     String[] prefixes = createSortedPrefixArray(phonePrefixMap);
214     List<Range> ranges = createRanges(prefixes, phonePrefixMap);
215     Map<Integer /* range index */, Integer> combinedPrefixes = new HashMap<Integer, Integer>();
216     int rangeIndex = 0;
217 
218     for (Range range : ranges) {
219       int prefixCandidate = Integer.parseInt(prefixes[range.start]) / 10;
220       if (prefixCandidate != 0 && !hasConflict(ranges, rangeIndex, prefixes, prefixCandidate)) {
221         combinedPrefixes.put(rangeIndex, prefixCandidate);
222       }
223       ++rangeIndex;
224     }
225     if (combinedPrefixes.size() == 0) {
226       return phonePrefixMap;
227     }
228     SortedMap<String, String> combinedMap = new TreeMap<String, String>();
229     rangeIndex = 0;
230     for (Range range : ranges) {
231       Integer combinedRange = combinedPrefixes.get(rangeIndex++);
232       if (combinedRange != null) {
233         String firstPrefixOfRange = prefixes[range.start];
234         combinedMap.put(String.valueOf(combinedRange), phonePrefixMap.get(firstPrefixOfRange));
235       } else {
236         for (int i = range.start; i <= range.end; i++) {
237           String prefix = prefixes[i];
238           combinedMap.put(prefix, phonePrefixMap.get(prefix));
239         }
240       }
241     }
242     return combinedMap;
243   }
244 
245   /**
246    * Combines the provided map associating phone number prefixes and descriptions.
247    *
248    * @return  the combined map
249    */
combineMultipleTimes(SortedMap<String, String> phonePrefixMap)250   static SortedMap<String, String> combineMultipleTimes(SortedMap<String, String> phonePrefixMap) {
251     SortedMap<String, String> previousMap = null;
252     while (phonePrefixMap != previousMap) {
253       previousMap = phonePrefixMap;
254       phonePrefixMap = combine(phonePrefixMap);
255     }
256     return phonePrefixMap;
257   }
258 
259   /**
260    * Combines the geocoding data read from the provided input stream and writes it as a result to
261    * the provided output stream. Uses the provided string as the line separator.
262    */
run()263   public void run() throws IOException {
264     SortedMap<String, String> phonePrefixMap = parseInput();
265     phonePrefixMap = combineMultipleTimes(phonePrefixMap);
266     PrintWriter printWriter = new PrintWriter(new BufferedOutputStream(outputStream));
267     for (Map.Entry<String, String> mapping : phonePrefixMap.entrySet()) {
268       printWriter.printf("%s|%s%s", mapping.getKey(), mapping.getValue(), outputLineSeparator);
269     }
270     printWriter.flush();
271   }
272 
main(String[] args)273   public static void main(String[] args) {
274     if (args.length != 2) {
275       LOGGER.severe("usage: java -jar combine-geodata.jar /path/to/input /path/to/output");
276       System.exit(1);
277     }
278     try {
279       CombineGeoData combineGeoData =
280           new CombineGeoData(new FileInputStream(args[0]), new FileOutputStream(args[1]));
281       combineGeoData.run();
282     } catch (Exception e) {
283       LOGGER.severe(e.getMessage());
284       System.exit(1);
285     }
286   }
287 }
288