1 /* 2 * Created on May 19, 2005 3 * Copyright (C) 2004-2011, Unicode, Inc., International Business Machines Corporation, and others. 4 * For terms of use, see http://www.unicode.org/terms_of_use.html 5 */ 6 package org.unicode.cldr.tool; 7 8 import java.util.HashMap; 9 import java.util.Iterator; 10 import java.util.Map; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.icu.CollationMapper; 16 import org.unicode.cldr.icu.IcuData; 17 import org.unicode.cldr.util.LocaleIDParser; 18 import org.unicode.cldr.util.Log; 19 20 import com.ibm.icu.text.Collator; 21 import com.ibm.icu.text.RuleBasedCollator; 22 import com.ibm.icu.text.Transliterator; 23 import com.ibm.icu.util.ULocale; 24 25 public class GenerateCldrCollationTests { 26 String sourceDir; 27 Set<String> validLocales = new TreeSet<String>(); 28 Map<String, Object> ulocale_rules = new TreeMap<String, Object>(GenerateCldrTests.ULocaleComparator); 29 Map<String, Map<String, RuleBasedCollator>> locale_types_rules = new TreeMap<String, Map<String, RuleBasedCollator>>(); 30 Map<RuleBasedCollator, RuleBasedCollator> collation_collation = new HashMap<RuleBasedCollator, RuleBasedCollator>(); 31 RuleBasedCollator emptyCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("")); 32 getAvailableSet()33 public Set<String> getAvailableSet() { 34 return ulocale_rules.keySet(); 35 } 36 getInstance(String locale)37 public RuleBasedCollator getInstance(String locale) { 38 return (RuleBasedCollator) ulocale_rules.get(locale); 39 } 40 show()41 void show() { 42 Log.logln("Showing Locales"); 43 Log.logln("Unique Collators: " + collation_collation.size()); 44 for (Iterator it2 = ulocale_rules.keySet().iterator(); it2.hasNext();) { 45 ULocale locale = (ULocale) it2.next(); 46 RuleBasedCollator col = (RuleBasedCollator) ulocale_rules.get(locale); 47 Log.logln("\t" + locale + ", " + col.getRules()); 48 } 49 } 50 GenerateCldrCollationTests(String sourceDir, String localeRegex, Set<String> locales)51 GenerateCldrCollationTests(String sourceDir, String localeRegex, Set<String> locales) throws Exception { 52 this.sourceDir = sourceDir; 53 Set<String> s = GenerateCldrTests.getMatchingXMLFiles(sourceDir, localeRegex); 54 for (Iterator<String> it = s.iterator(); it.hasNext();) { 55 getCollationRules(it.next()); 56 } 57 58 // now fixup the validLocales, adding in what they inherit 59 // TODO, add check: validSubLocales are masked by intervening locales. 60 for (Iterator<String> it = validLocales.iterator(); it.hasNext();) { 61 String locale = it.next(); 62 Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale); 63 if (types_rules != null) 64 Log.logln("Weird: overlap in validLocales: " + locale); 65 else { 66 for (String parentlocale = LocaleIDParser.getSimpleParent(locale); parentlocale != null; parentlocale = LocaleIDParser 67 .getSimpleParent(parentlocale)) { 68 types_rules = locale_types_rules.get(parentlocale); 69 if (types_rules != null) { 70 locale_types_rules.put(locale, types_rules); 71 break; 72 } 73 } 74 } 75 } 76 // now generate the @-style locales 77 ulocale_rules.put("root", Collator.getInstance(ULocale.ROOT)); 78 79 for (Iterator<String> it = locale_types_rules.keySet().iterator(); it.hasNext();) { 80 String locale = it.next(); 81 Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale); 82 for (Iterator<String> it2 = types_rules.keySet().iterator(); it2.hasNext();) { 83 String type = it2.next(); 84 // TODO fix HACK 85 if (type.equals("unihan")) { 86 if (!locale.startsWith("zh")) continue; 87 } 88 RuleBasedCollator col = (RuleBasedCollator) types_rules.get(type); 89 String name = type.equals("standard") ? locale : locale + "@collation=" + type; 90 ulocale_rules.put(name, col); 91 } 92 } 93 // now flesh out 94 // Collator root = Collator.getInstance(ULocale.ROOT); 95 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 96 String locale = it.next(); 97 if (ulocale_rules.get(locale) != null) continue; 98 String parent = LocaleIDParser.getSimpleParent(locale); // GenerateCldrTests.getParent(locale); 99 if (parent == null) continue; 100 try { 101 ulocale_rules.put(locale, ulocale_rules.get(parent)); 102 } catch (RuntimeException e) { 103 throw e; 104 } 105 } 106 } 107 108 static Transliterator fromHex = Transliterator.getInstance("hex-any"); 109 getCollationRules(String locale)110 private void getCollationRules(String locale) throws Exception { 111 System.out.println("Loading collation:\t" + locale); 112 CollationMapper mapper = new CollationMapper(sourceDir, null); 113 StringBuilder stringBuilder = new StringBuilder(); 114 TreeMap<String, RuleBasedCollator> types_rules = new TreeMap<String, RuleBasedCollator>(); 115 IcuData[] dataList = mapper.fillFromCldr(locale); 116 IcuData icuData = dataList[0]; 117 for (String rbPath : icuData.keySet()) { 118 if (!rbPath.endsWith("/Sequence")) continue; 119 // remove the \ u's, because they blow up 120 stringBuilder.setLength(0); 121 for (String line : icuData.get(rbPath).get(0)) { 122 stringBuilder.append(line); 123 } 124 String originalRules = stringBuilder.toString(); 125 String rules = fromHex.transliterate(originalRules); 126 String name = rbPath.split("/")[2]; 127 RuleBasedCollator fixed = generateCollator(locale, name, rules); 128 if (fixed != null) { 129 Log.logln("Rules for: " + locale + ", " + name); 130 Log.logln(rules); 131 if (!rules.equals(originalRules)) { 132 Log.logln("Original Rules from Ram: "); 133 Log.logln(originalRules); 134 } 135 types_rules.put(name, fixed); 136 } 137 locale_types_rules.put(locale, types_rules); 138 } 139 // now get the valid sublocales 140 for (int i = 1; i < dataList.length; i++) { 141 IcuData subLocale = dataList[i]; 142 Log.logln("Valid Sub Locale: " + subLocale.getName()); 143 validLocales.add(subLocale.getName()); 144 } 145 } 146 147 /** 148 * @param locale 149 * @param current 150 * @param foo 151 * @param rules 152 */ generateCollator(String locale, String current, String rules)153 private RuleBasedCollator generateCollator(String locale, String current, String rules) { 154 RuleBasedCollator fixed = null; 155 try { 156 if (rules.equals("")) 157 fixed = emptyCollator; 158 else { 159 rules = GenerateCldrTests.replace(rules, "[optimize[", "[optimize ["); 160 rules = GenerateCldrTests.replace(rules, "[suppressContractions[", "[suppressContractions ["); 161 RuleBasedCollator col = new RuleBasedCollator(rules); 162 fixed = (RuleBasedCollator) collation_collation.get(col); 163 if (fixed == null) { 164 collation_collation.put(col, col); 165 fixed = col; 166 } 167 } 168 } catch (Exception e) { 169 Log.logln("***Cannot create collator from: " + locale + ", " + current + ", " + rules); 170 e.printStackTrace(Log.getLog()); 171 RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale(locale)); 172 String oldrules = coll.getRules(); 173 Log.logln("Old ICU4J: " + oldrules); 174 Log.logln("Equal?: " + oldrules.equals(rules)); 175 } 176 return fixed; 177 } 178 } 179