• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Created on May 19, 2005
3  * Copyright (C) 2004-2011, Unicode, Inc., International Business Machines Corporation, and others.
4  * For terms of use, see http://www.unicode.org/terms_of_use.html
5  */
6 package org.unicode.cldr.tool;
7 
8 import java.util.HashMap;
9 import java.util.Iterator;
10 import java.util.Map;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 
15 import org.unicode.cldr.icu.CollationMapper;
16 import org.unicode.cldr.icu.IcuData;
17 import org.unicode.cldr.util.LocaleIDParser;
18 import org.unicode.cldr.util.Log;
19 
20 import com.ibm.icu.text.Collator;
21 import com.ibm.icu.text.RuleBasedCollator;
22 import com.ibm.icu.text.Transliterator;
23 import com.ibm.icu.util.ULocale;
24 
25 public class GenerateCldrCollationTests {
26     String sourceDir;
27     Set<String> validLocales = new TreeSet<String>();
28     Map<String, Object> ulocale_rules = new TreeMap<String, Object>(GenerateCldrTests.ULocaleComparator);
29     Map<String, Map<String, RuleBasedCollator>> locale_types_rules = new TreeMap<String, Map<String, RuleBasedCollator>>();
30     Map<RuleBasedCollator, RuleBasedCollator> collation_collation = new HashMap<RuleBasedCollator, RuleBasedCollator>();
31     RuleBasedCollator emptyCollator = (RuleBasedCollator) Collator.getInstance(new ULocale(""));
32 
getAvailableSet()33     public Set<String> getAvailableSet() {
34         return ulocale_rules.keySet();
35     }
36 
getInstance(String locale)37     public RuleBasedCollator getInstance(String locale) {
38         return (RuleBasedCollator) ulocale_rules.get(locale);
39     }
40 
show()41     void show() {
42         Log.logln("Showing Locales");
43         Log.logln("Unique Collators: " + collation_collation.size());
44         for (Iterator it2 = ulocale_rules.keySet().iterator(); it2.hasNext();) {
45             ULocale locale = (ULocale) it2.next();
46             RuleBasedCollator col = (RuleBasedCollator) ulocale_rules.get(locale);
47             Log.logln("\t" + locale + ", " + col.getRules());
48         }
49     }
50 
GenerateCldrCollationTests(String sourceDir, String localeRegex, Set<String> locales)51     GenerateCldrCollationTests(String sourceDir, String localeRegex, Set<String> locales) throws Exception {
52         this.sourceDir = sourceDir;
53         Set<String> s = GenerateCldrTests.getMatchingXMLFiles(sourceDir, localeRegex);
54         for (Iterator<String> it = s.iterator(); it.hasNext();) {
55             getCollationRules(it.next());
56         }
57 
58         // now fixup the validLocales, adding in what they inherit
59         // TODO, add check: validSubLocales are masked by intervening locales.
60         for (Iterator<String> it = validLocales.iterator(); it.hasNext();) {
61             String locale = it.next();
62             Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale);
63             if (types_rules != null)
64                 Log.logln("Weird: overlap in validLocales: " + locale);
65             else {
66                 for (String parentlocale = LocaleIDParser.getSimpleParent(locale); parentlocale != null; parentlocale = LocaleIDParser
67                     .getSimpleParent(parentlocale)) {
68                     types_rules = locale_types_rules.get(parentlocale);
69                     if (types_rules != null) {
70                         locale_types_rules.put(locale, types_rules);
71                         break;
72                     }
73                 }
74             }
75         }
76         // now generate the @-style locales
77         ulocale_rules.put("root", Collator.getInstance(ULocale.ROOT));
78 
79         for (Iterator<String> it = locale_types_rules.keySet().iterator(); it.hasNext();) {
80             String locale = it.next();
81             Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale);
82             for (Iterator<String> it2 = types_rules.keySet().iterator(); it2.hasNext();) {
83                 String type = it2.next();
84                 // TODO fix HACK
85                 if (type.equals("unihan")) {
86                     if (!locale.startsWith("zh")) continue;
87                 }
88                 RuleBasedCollator col = (RuleBasedCollator) types_rules.get(type);
89                 String name = type.equals("standard") ? locale : locale + "@collation=" + type;
90                 ulocale_rules.put(name, col);
91             }
92         }
93         // now flesh out
94         // Collator root = Collator.getInstance(ULocale.ROOT);
95         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
96             String locale = it.next();
97             if (ulocale_rules.get(locale) != null) continue;
98             String parent = LocaleIDParser.getSimpleParent(locale); // GenerateCldrTests.getParent(locale);
99             if (parent == null) continue;
100             try {
101                 ulocale_rules.put(locale, ulocale_rules.get(parent));
102             } catch (RuntimeException e) {
103                 throw e;
104             }
105         }
106     }
107 
108     static Transliterator fromHex = Transliterator.getInstance("hex-any");
109 
getCollationRules(String locale)110     private void getCollationRules(String locale) throws Exception {
111         System.out.println("Loading collation:\t" + locale);
112         CollationMapper mapper = new CollationMapper(sourceDir, null);
113         StringBuilder stringBuilder = new StringBuilder();
114         TreeMap<String, RuleBasedCollator> types_rules = new TreeMap<String, RuleBasedCollator>();
115         IcuData[] dataList = mapper.fillFromCldr(locale);
116         IcuData icuData = dataList[0];
117         for (String rbPath : icuData.keySet()) {
118             if (!rbPath.endsWith("/Sequence")) continue;
119             // remove the \ u's, because they blow up
120             stringBuilder.setLength(0);
121             for (String line : icuData.get(rbPath).get(0)) {
122                 stringBuilder.append(line);
123             }
124             String originalRules = stringBuilder.toString();
125             String rules = fromHex.transliterate(originalRules);
126             String name = rbPath.split("/")[2];
127             RuleBasedCollator fixed = generateCollator(locale, name, rules);
128             if (fixed != null) {
129                 Log.logln("Rules for: " + locale + ", " + name);
130                 Log.logln(rules);
131                 if (!rules.equals(originalRules)) {
132                     Log.logln("Original Rules from Ram: ");
133                     Log.logln(originalRules);
134                 }
135                 types_rules.put(name, fixed);
136             }
137             locale_types_rules.put(locale, types_rules);
138         }
139         // now get the valid sublocales
140         for (int i = 1; i < dataList.length; i++) {
141             IcuData subLocale = dataList[i];
142             Log.logln("Valid Sub Locale: " + subLocale.getName());
143             validLocales.add(subLocale.getName());
144         }
145     }
146 
147     /**
148      * @param locale
149      * @param current
150      * @param foo
151      * @param rules
152      */
generateCollator(String locale, String current, String rules)153     private RuleBasedCollator generateCollator(String locale, String current, String rules) {
154         RuleBasedCollator fixed = null;
155         try {
156             if (rules.equals(""))
157                 fixed = emptyCollator;
158             else {
159                 rules = GenerateCldrTests.replace(rules, "[optimize[", "[optimize [");
160                 rules = GenerateCldrTests.replace(rules, "[suppressContractions[", "[suppressContractions [");
161                 RuleBasedCollator col = new RuleBasedCollator(rules);
162                 fixed = (RuleBasedCollator) collation_collation.get(col);
163                 if (fixed == null) {
164                     collation_collation.put(col, col);
165                     fixed = col;
166                 }
167             }
168         } catch (Exception e) {
169             Log.logln("***Cannot create collator from: " + locale + ", " + current + ", " + rules);
170             e.printStackTrace(Log.getLog());
171             RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale(locale));
172             String oldrules = coll.getRules();
173             Log.logln("Old ICU4J: " + oldrules);
174             Log.logln("Equal?: " + oldrules.equals(rules));
175         }
176         return fixed;
177     }
178 }
179