• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
2 
3 import java.util.ArrayList;
4 import java.util.Collection;
5 import java.util.Comparator;
6 import java.util.HashMap;
7 import java.util.HashSet;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13 
14 import org.unicode.cldr.icu.RegexManager.CldrArray;
15 import org.unicode.cldr.icu.RegexManager.PathValueInfo;
16 import org.unicode.cldr.icu.RegexManager.RegexResult;
17 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
18 import org.unicode.cldr.tool.FilterFactory;
19 import org.unicode.cldr.util.Builder;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.DtdType;
22 import org.unicode.cldr.util.Factory;
23 import org.unicode.cldr.util.LanguageTagParser;
24 import org.unicode.cldr.util.LocaleIDParser;
25 import org.unicode.cldr.util.PatternCache;
26 import org.unicode.cldr.util.RegexLookup;
27 import org.unicode.cldr.util.RegexLookup.Finder;
28 import org.unicode.cldr.util.SupplementalDataInfo;
29 //import org.unicode.cldr.util.SupplementalDataInfo.MeasurementType;
30 
31 import com.ibm.icu.util.Output;
32 
33 /**
34  * A mapper that converts locale data from CLDR to the ICU data structure.
35  *
36  * @author jchye
37  */
38 public class LocaleMapper extends Mapper {
39     /**
40      * Map for converting enums to their integer values.
41      */
42     private static final Map<String, String> enumMap = Builder.with(new HashMap<String, String>())
43         .put("titlecase-firstword", "1")
44         .put("no-change", "0")
45         .freeze();
46 
47     private static final Pattern DRAFT_PATTERN = PatternCache.get("\\[@draft=\"\\w+\"]");
48     private static final Pattern TERRITORY_XPATH = PatternCache.get(
49         "//ldml/localeDisplayNames/territories/territory\\[@type=\"(\\w+)\"]");
50     private static final Pattern RB_DATETIMEPATTERN = PatternCache.get(
51         "/calendar/(\\w++)/DateTimePatterns");
52 
53     private SupplementalDataInfo supplementalDataInfo;
54     // We may use different factories for resolved or unresolved CLDRFiles depending
55     // on whether filtering is required.
56     private Factory unresolvedFactory;
57     private Factory resolvedFactory;
58     private Factory specialFactory;
59     private RegexManager manager;
60     private String debugXPath;
61 
62     private Set<String> deprecatedTerritories;
63 
64     /**
65      * Special hack comparator, so that RB strings come out in the right order.
66      * This is only important for the order of items in arrays.
67      */
68     private static Comparator<String> comparator = new Comparator<String>() {
69         private final Pattern CURRENCY_FORMAT = PatternCache.get(
70             "//ldml/numbers/currencies/currency\\[@type=\"\\w++\"]/(.++)");
71         private final Pattern DATE_OR_TIME_FORMAT = PatternCache.get(
72             "//ldml/dates/calendars/calendar\\[@type=\"\\w++\"]/(date|time)Formats/.*");
73         private final Pattern MONTH_PATTERN = PatternCache
74             .get(
75                 "//ldml/dates/calendars/calendar\\[@type=\"\\w++\"]/months/monthContext\\[@type=\"[\\w\\-]++\"]/monthWidth\\[@type=\"\\w++\"]/month\\[@type=\"\\d++\"](\\[@yeartype=\"leap\"])?");
76         private final Pattern CONTEXT_TRANSFORM = PatternCache.get(
77             "//ldml/contextTransforms/contextTransformUsage\\[@type=\"([^\"]++)\"]/contextTransform\\[@type=\"([^\"]++)\"]");
78 
79         private final String[] CURRENCY_ORDER = { "symbol", "displayName",
80             "pattern[@type=\"standard\"]", "decimal", "group" };
81 
82         /**
83          * Reverse the ordering of the following:
84          * //ldml/numbers/currencies/currency[@type="([^"]*)"]/displayName ; curr ; /Currencies/$1
85          * //ldml/numbers/currencies/currency[@type="([^"]*)"]/symbol ; curr ; /Currencies/$1
86          * and the following (time/date)
87          * //ldml/dates/calendars/calendar[@type="([^"]*)"]/(dateFormats|dateTimeFormats|timeFormats)/(?:[^/\[]*)[@type=
88          * "([^"]*)"]/(?:[^/\[]*)[@type="([^"]*)"]/.* ; locales ; /calendar/$1/DateTimePatterns
89          */
90         @Override
91         public int compare(String arg0, String arg1) {
92             Matcher[] matchers = new Matcher[2];
93             if (RegexManager.matches(CURRENCY_FORMAT, arg0, arg1, matchers)) {
94                 // Use ldml ordering except that symbol should be first.
95                 int index0 = getIndexOf(CURRENCY_ORDER, matchers[0].group(1));
96                 int index1 = getIndexOf(CURRENCY_ORDER, matchers[1].group(1));
97                 return index0 - index1;
98             } else if (RegexManager.matches(DATE_OR_TIME_FORMAT, arg0, arg1, matchers)) {
99                 int compareValue = matchers[0].group(1).compareTo(matchers[1].group(1));
100                 if (compareValue != 0) return -compareValue;
101             } else if (RegexManager.matches(CONTEXT_TRANSFORM, arg0, arg1, matchers)) {
102                 // Sort uiListOrMenu before stand-alone.
103                 if (matchers[0].group(1).equals(matchers[1].group(1))) {
104                     return -matchers[0].group(2).compareTo(matchers[1].group(2));
105                 }
106             } else if (RegexManager.matches(MONTH_PATTERN, arg0, arg1, matchers)) {
107                 // Sort leap year types after normal month types.
108                 String matchGroup0 = matchers[0].group(1);
109                 String matchGroup1 = matchers[1].group(1);
110                 if (matchGroup0 != matchGroup1) {
111                     return matchGroup0 == null && matchGroup1 != null ? -1 : 1;
112                 }
113             }
114 
115             return CLDRFile.getComparator(DtdType.ldml).compare(arg0, arg1);
116         }
117     };
118 
119     /**
120      * Looks for a string in an array
121      *
122      * @param order
123      *            the array to be searched
124      * @param key
125      *            the string to be searched for
126      * @return the index of the string if found, -1 if not found
127      */
getIndexOf(String[] order, String key)128     private static int getIndexOf(String[] order, String key) {
129         for (int i = 0; i < order.length; i++) {
130             if (order[i].equals(key)) return i;
131         }
132         return -1;
133     }
134 
135     /**
136      * LocaleMapper constructor.
137      *
138      * @param factory
139      *            the factory containing the CLDR data to be converted
140      * @param specialFactory
141      *            a factory containing any additional CLDR data
142      * @param supplementalDataInfo
143      *            SupplementalDataInfo object
144      * @param useAltValues
145      *            true if alt path filtering should be performed
146      * @param organization
147      *            the organization to filter the data by
148      *            (null if coverage filtering is not needed)
149      */
LocaleMapper(Factory factory, Factory specialFactory, SupplementalDataInfo supplementalDataInfo, boolean useAltValues, String organization)150     public LocaleMapper(Factory factory, Factory specialFactory,
151         SupplementalDataInfo supplementalDataInfo, boolean useAltValues,
152         String organization) {
153         manager = new RegexManager("ldml2icu_locale.txt");
154         unresolvedFactory = resolvedFactory = factory;
155         // If filtering is required, filter all unresolved CLDRFiles for use in
156         // fillFromCldr(). We don't filter the resolved CLDRFiles by organization
157         // coverage level because
158         // some rbPaths (e.g. /calendar/x/DateTimePatterns) have a fixed number
159         // of values that must always be present regardless of filtering.
160         if (useAltValues || organization != null) {
161             unresolvedFactory = FilterFactory.load(factory, organization, useAltValues);
162             resolvedFactory = FilterFactory.load(factory, null, useAltValues);
163         }
164         this.specialFactory = specialFactory;
165         this.supplementalDataInfo = supplementalDataInfo;
166     }
167 
168     /**
169      * @return the set of locales available for processing by this mapper
170      */
171     @Override
getAvailable()172     public Set<String> getAvailable() {
173         return unresolvedFactory.getAvailable();
174     }
175 
176     /**
177      * @param filename
178      * @return true if a special XML file with the specified filename is available.
179      */
hasSpecialFile(String filename)180     private boolean hasSpecialFile(String filename) {
181         return specialFactory != null && specialFactory.getAvailable().contains(filename);
182     }
183 
184     /**
185      * @return the set of deprecated territories to be ignored. Remove when no longer
186      *         present in CLDR data.
187      */
getDeprecatedTerritories()188     private Set<String> getDeprecatedTerritories() {
189         if (deprecatedTerritories == null) {
190             deprecatedTerritories = Builder.with(
191                 supplementalDataInfo.getLocaleAliasInfo().get("territory").keySet())
192                 .remove("062").remove("172").remove("200").remove("830")
193                 .remove("AN").remove("CS").remove("QU").get();
194         }
195         return deprecatedTerritories;
196     }
197 
198     /**
199      * Fills an IcuData object using the CLDR data for the specified locale.
200      *
201      * @param locale
202      * @return the filled IcuData object
203      */
204     @Override
fillFromCldr(String locale)205     public IcuData[] fillFromCldr(String locale) {
206         Set<String> deprecatedTerritories = getDeprecatedTerritories();
207         CLDRFile resolvedCldr = resolvedFactory.make(locale, true);
208         RegexLookup<RegexResult> pathConverter = manager.getPathConverter(resolvedCldr);
209 
210         // First pass through the unresolved CLDRFile to get all icu paths.
211         CLDRFile cldr = unresolvedFactory.make(locale, false);
212         Map<String, CldrArray> pathValueMap = new HashMap<String, CldrArray>();
213         Set<String> validRbPaths = new HashSet<String>();
214         for (String xpath : cldr) {
215             // Territory hacks to be removed once CLDR data is fixed.
216             Matcher matcher = TERRITORY_XPATH.matcher(xpath);
217             if (matcher.matches()) {
218                 String country = matcher.group(1);
219                 if (deprecatedTerritories.contains(country)) {
220                     continue;
221                 }
222             }
223 
224             // Add rb paths.
225             Output<Finder> matcherFound = new Output<Finder>();
226             Output<String[]> firstInfo = new Output<>();
227             RegexResult regexResult = matchXPath(pathConverter, cldr, xpath, matcherFound, firstInfo);
228             if (regexResult == null) continue;
229 //            String[] arguments = matcherFound.value.getInfo();
230             String[] arguments = firstInfo.value;
231             for (PathValueInfo info : regexResult) {
232                 String rbPath = info.processRbPath(arguments);
233                 validRbPaths.add(rbPath);
234                 // The immediate parent of every path should also exist.
235                 validRbPaths.add(rbPath.substring(0, rbPath.lastIndexOf('/')));
236             }
237         }
238 
239         // Get all values from the resolved CLDRFile.
240         for (String xpath : resolvedCldr) {
241             // Since the unresolved CLDRFile may have been modified, use it
242             // to add values instead of the resolved CLDRFile if possible.
243             CLDRFile fileToUse = cldr.getStringValue(xpath) == null ? resolvedCldr : cldr;
244             addMatchesForPath(xpath, fileToUse, validRbPaths, pathConverter, pathValueMap);
245         }
246 
247         // Add fallback paths if necessary.
248         manager.addFallbackValues(resolvedCldr, pathValueMap);
249 
250         // Add special values to file.
251         boolean hasSpecial = hasSpecialFile(locale);
252         if (hasSpecial) {
253             CLDRFile specialCldrFile = specialFactory.make(locale, false);
254             for (String xpath : specialCldrFile) {
255                 if (resolvedCldr.isHere(xpath)) continue;
256                 addMatchesForPath(xpath, specialCldrFile, null, pathConverter, pathValueMap);
257             }
258         }
259 
260         for (String rbPath : pathValueMap.keySet()) {
261             // HACK: DateTimePatterns needs a duplicate of the medium
262             // dateTimeFormat (formerly indicated using dateTimeFormats/default).
263             // This hack can be removed when ICU no longer requires it.
264             Matcher matcher = RB_DATETIMEPATTERN.matcher(rbPath);
265             if (matcher.matches()) {
266                 String calendar = matcher.group(1);
267                 CldrArray valueList = RegexManager.getCldrArray(rbPath, pathValueMap);
268                 // Create a dummy xpath to sort the value in front of the other date time formats.
269                 String basePath = "//ldml/dates/calendars/calendar[@type=\"" + calendar + "\"]/dateTimeFormats";
270                 String mediumFormatPath = basePath
271                     + "/dateTimeFormatLength[@type=\"medium\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
272                 valueList.add(basePath,
273                     getStringValue(resolvedCldr, mediumFormatPath),
274                     null);
275             }
276         }
277 
278         // HACK: Fill missing narrow era values with their abbreviated versions.
279         CldrArray narrowEras = pathValueMap.get("/calendar/japanese/eras/narrow");
280         CldrArray abbreviatedEras = pathValueMap.get("/calendar/japanese/eras/abbreviated");
281         if (narrowEras != null && abbreviatedEras != null) {
282             narrowEras.addAll(abbreviatedEras);
283         }
284 
285         IcuData icuData = new IcuData("common/main/" + locale + ".xml", locale, true, enumMap);
286         if (hasSpecial) {
287             icuData.setFileComment("ICU <specials> source: <path>/common/main/" + locale + ".xml");
288         }
289         fillIcuData(pathValueMap, comparator, icuData);
290 
291         // More hacks
292         hackAddExtras(resolvedCldr, locale, icuData);
293         return new IcuData[] { icuData };
294     }
295 
fillIcuData(Map<String, CldrArray> pathValueMap, Comparator<String> comparator, IcuData icuData)296     private void fillIcuData(Map<String, CldrArray> pathValueMap,
297         Comparator<String> comparator, IcuData icuData) {
298         // Convert values to final data structure.
299         for (String rbPath : pathValueMap.keySet()) {
300             icuData.addAll(rbPath, pathValueMap.get(rbPath).sortValues(comparator));
301         }
302     }
303 
getFullXPath(String xpath, CLDRFile cldrFile)304     public static String getFullXPath(String xpath, CLDRFile cldrFile) {
305         String fullPath = cldrFile.getFullXPath(xpath);
306         return fullPath == null ? xpath : DRAFT_PATTERN.matcher(fullPath).replaceAll("");
307     }
308 
309     /**
310      * @param cldr
311      * @param path
312      * @param matcherFound
313      * @param firstInfo
314      * @return the result of converting an xpath into an ICU-style path
315      */
matchXPath(RegexLookup<RegexResult> lookup, CLDRFile cldr, String path, Output<Finder> matcherFound, Output<String[]> firstInfo)316     private RegexResult matchXPath(RegexLookup<RegexResult> lookup,
317         CLDRFile cldr, String path,
318         Output<Finder> matcherFound, Output<String[]> firstInfo) {
319         String fullPath = cldr.getFullXPath(path);
320         fullPath = fullPath == null ? path : DRAFT_PATTERN.matcher(fullPath).replaceAll("");
321         List<String> debugResults = isDebugXPath(fullPath) ? new ArrayList<String>() : null;
322         Output<String[]> info = new Output<>();
323         RegexResult result = lookup.get(fullPath, null, info, matcherFound, debugResults);
324         if (debugResults != null) {
325             if (result == null) {
326                 RegexManager.printLookupResults(fullPath, debugResults);
327             } else {
328                 System.out.println(fullPath + " successfully matched");
329             }
330         }
331         if (firstInfo != null && info.value != null) {
332             firstInfo.value = info.value;
333         }
334         return result;
335     }
336 
337     /**
338      * Attempts to match an xpath and adds the results of a successful match to
339      * the specified map
340      *
341      * @param xpath
342      *            the xpath to be matched
343      * @param cldrFile
344      *            the CLDR file to get locale data from
345      * @param validRbPaths
346      *            the set of valid rbPaths that the result must belong
347      *            to, null if such a requirement does not exist
348      * @param pathValueMap
349      *            the map that the results will be added to
350      */
addMatchesForPath(String xpath, CLDRFile cldrFile, Set<String> validRbPaths, RegexLookup<RegexResult> pathConverter, Map<String, CldrArray> pathValueMap)351     private void addMatchesForPath(String xpath, CLDRFile cldrFile,
352         Set<String> validRbPaths, RegexLookup<RegexResult> pathConverter,
353         Map<String, CldrArray> pathValueMap) {
354         Output<Finder> matcher = new Output<Finder>();
355         Output<String[]> firstInfo = new Output<>();
356         RegexResult regexResult = matchXPath(pathConverter,
357             cldrFile, xpath, matcher, firstInfo);
358         if (regexResult == null) return;
359 //        String[] arguments = matcher.value.getInfo();
360         String[] arguments = firstInfo.value;
361         String cldrValue = getStringValue(cldrFile, xpath);
362         for (PathValueInfo info : regexResult) {
363             String rbPath = info.processRbPath(arguments);
364             // Don't add additional paths at this stage.
365             if (validRbPaths != null && !validRbPaths.contains(rbPath)) continue;
366             CldrArray valueList = RegexManager.getCldrArray(rbPath, pathValueMap);
367             List<String> values = info.processValues(arguments, cldrValue);
368             String baseXPath = info.processXPath(arguments, xpath);
369             String groupKey = info.processGroupKey(arguments);
370             valueList.put(baseXPath, values, groupKey);
371         }
372     }
373 
374     /**
375      * @param cldrFile
376      * @param xpath
377      * @return the value of the specified xpath (fallback or otherwise)
378      */
getStringValue(CLDRFile cldrFile, String xpath)379     private String getStringValue(CLDRFile cldrFile, String xpath) {
380         String value = cldrFile.getStringValue(xpath);
381         // HACK: DAIP doesn't currently make spaces in currency formats non-breaking.
382         // Remove this when fixed.
383         if (NumericType.getNumericType(xpath) == NumericType.CURRENCY) {
384             value = value.replace(' ', '\u00A0');
385         }
386         return value;
387     }
388 
389     /**
390      * Adds all mappings that couldn't be represented in the ldml2icu.txt file.
391      *
392      * @param cldrResolved
393      * @param locale
394      */
hackAddExtras(CLDRFile cldrResolved, String locale, IcuData icuData)395     private void hackAddExtras(CLDRFile cldrResolved, String locale, IcuData icuData) {
396         // Specify parent of non-language locales.
397         String parent = supplementalDataInfo.getExplicitParentLocale(locale);
398         if (parent != null) {
399             icuData.add("/%%Parent", parent);
400         }
401 
402         // <version number="$Revision: 5806 $"/>
403         String version = cldrResolved.getFullXPath("//ldml/identity/version");
404         icuData.add("/Version", MapperUtils.formatVersion(version));
405 
406         // PaperSize:intvector{ 279, 216, } - now in supplemental
407         // MeasurementSystem:int{1} - now in supplemental
408 
409         // Default calendar.
410         String localeID = cldrResolved.getLocaleID();
411         String calendar = getCalendarIfDifferent(localeID);
412         if (calendar != null) {
413             icuData.add("/calendar/default", calendar);
414         }
415     }
416 
417     /**
418      * Returns the default calendar to be used for a locale. If the default
419      * calendar for the parent locale is the same, null is returned.
420      */
getCalendarIfDifferent(String localeID)421     private String getCalendarIfDifferent(String localeID) {
422         String calendar = getCalendar(localeID);
423         if (calendar == null) return null;
424         String parent = LocaleIDParser.getParent(localeID);
425         String parentCalendar = null;
426         while (parentCalendar == null && parent != null) {
427             parentCalendar = getCalendar(parent);
428             parent = LocaleIDParser.getParent(parent);
429         }
430         return calendar.equals(parentCalendar) ? null : calendar;
431     }
432 
433     /**
434      * Returns the default calendar to be used for a locale, if any.
435      */
getCalendar(String localeID)436     private String getCalendar(String localeID) {
437         LanguageTagParser parser = new LanguageTagParser().set(localeID);
438         String region = localeID.equals("root") ? "001" : parser.getRegion();
439         if (region.equals("")) {
440             localeID = supplementalDataInfo.getLikelySubtags().get(parser.getLanguage());
441             if (localeID == null) {
442                 throw new RuntimeException("Likely subtag not found for " + parser.getLanguage());
443             }
444             parser.set(localeID);
445             region = parser.getRegion();
446             if (region == null) region = "001";
447         }
448         List<String> calendars = supplementalDataInfo.getCalendars(region);
449         return calendars == null ? null : calendars.get(0);
450     }
451 
452     //private String getMeasurementToDisplay(String localeID, MeasurementType measurementType) {...} // deleted
453 
454     /**
455      * @param localeID
456      * @param measurementType
457      *            the type of measurement required
458      * @return the measurement of the specified locale
459      */
460 //    private String getMeasurement(String localeID, MeasurementType measurementType) {
461 //        String region = localeID.equals("root") ? "001" : new LanguageTagParser().set(localeID).getRegion();
462 //        Map<MeasurementType, Map<String, String>> regionMeasurementData = supplementalDataInfo
463 //            .getTerritoryMeasurementData();
464 //        Map<String, String> typeMap = regionMeasurementData.get(measurementType);
465 //        return typeMap.get(region);
466 //    }     //not used
467 
468     /**
469      * Sets xpath to monitor for debugging purposes.
470      * @param debugXPath
471      */
setDebugXPath(String debugXPath)472     public void setDebugXPath(String debugXPath) {
473         this.debugXPath = debugXPath;
474     }
475 
476     /**
477      * @param xpath
478      * @return true if the xpath is to be debugged
479      */
isDebugXPath(String xpath)480     boolean isDebugXPath(String xpath) {
481         return debugXPath == null ? false : xpath.startsWith(debugXPath);
482     }
483 
484     @Override
generateMakefile(Collection<String> aliases)485     public Makefile generateMakefile(Collection<String> aliases) {
486         Makefile makefile = new Makefile("GENRB");
487         makefile.addSyntheticAlias(aliases);
488         makefile.addAliasSource();
489         makefile.addSource(sources);
490         return makefile;
491     }
492 }
493