1 package org.unicode.cldr.unittest; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.EnumMap; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.LinkedHashMap; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.Factory; 20 import org.unicode.cldr.util.LanguageTagParser; 21 import org.unicode.cldr.util.Level; 22 import org.unicode.cldr.util.Organization; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.SupplementalDataInfo; 25 import org.unicode.cldr.util.With; 26 import org.unicode.cldr.util.XPathParts; 27 28 import com.ibm.icu.dev.util.CollectionUtilities; 29 import com.ibm.icu.impl.Relation; 30 import com.ibm.icu.impl.Row; 31 import com.ibm.icu.impl.Row.R2; 32 import com.ibm.icu.text.DateTimePatternGenerator; 33 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 34 35 public class CheckYear { 36 static CLDRConfig testInfo = CLDRConfig.getInstance(); 37 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 38 private static final String LOCALES = ".*"; 39 private static final String[] STOCK = { "short", "medium", "long", "full" }; 40 41 enum Category { 42 Year2_MonthNumeric, Year2_Other, Year4_MonthNumeric, Year4_Other 43 } 44 45 static DateTimePatternGenerator dtp = DateTimePatternGenerator 46 .getEmptyInstance(); 47 static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 48 49 // mismatches between stocks 50 static Map<String, Relation<String, String>> stock2skeleton2locales = new LinkedHashMap<String, Relation<String, String>>(); 51 static { 52 for (String stock : STOCK) { 53 stock2skeleton2locales.put("date-" + stock, Relation.of( 54 new TreeMap<String, Set<String>>(), TreeSet.class)); 55 } 56 for (String stock : STOCK) { 57 stock2skeleton2locales.put("time-" + stock, Relation.of( 58 new TreeMap<String, Set<String>>(), TreeSet.class)); 59 } 60 } 61 62 static class LocaleInfo { 63 private static final boolean DEBUG = false; 64 // information on the type of years 65 Relation<Category, String> category2base = Relation.of( 66 new EnumMap<Category, Set<String>>(Category.class), 67 TreeSet.class); 68 // collisions between baseSkeletons 69 Map<String, Relation<String, Row.R2<String, String>>> base2BasePatterns2Info = new TreeMap<String, Relation<String, Row.R2<String, String>>>(); 70 71 Map<String, String> skeleton2pattern = new HashMap<String, String>(); 72 recordStockTime(String localeId, String stock, String dateTimePattern)73 public void recordStockTime(String localeId, String stock, 74 String dateTimePattern) { 75 String skeleton = dtp.getSkeleton(dateTimePattern); 76 String base = getBaseSkeleton(skeleton); 77 stock2skeleton2locales.get("time-" + stock).put(skeleton, localeId); 78 recordBase(base, skeleton, dateTimePattern); 79 } 80 recordStock(String localeId, String stock, String dateTimePattern)81 public void recordStock(String localeId, String stock, 82 String dateTimePattern) { 83 String skeleton = dtp.getSkeleton(dateTimePattern); 84 String base = getBaseSkeleton(skeleton); 85 stock2skeleton2locales.get("date-" + stock).put( 86 skeleton.replace("yyyy", "y"), localeId); 87 String key = skeleton + "*" + stock.charAt(0); 88 recordBase(base, skeleton, dateTimePattern); 89 recordYearStuff(base, key, dateTimePattern); 90 } 91 record(String skeleton, String dateTimePattern)92 public void record(String skeleton, String dateTimePattern) { 93 String base = getBaseSkeleton(skeleton); 94 recordBase(base, skeleton, dateTimePattern); 95 recordYearStuff(base, skeleton, dateTimePattern); 96 } 97 recordBase(String base, String skeleton, String dateTimePattern)98 public void recordBase(String base, String skeleton, 99 String dateTimePattern) { 100 String coreBase = getCoreSkeleton(base); 101 Relation<String, Row.R2<String, String>> basePatterns2Info = base2BasePatterns2Info 102 .get(coreBase); 103 if (basePatterns2Info == null) { 104 base2BasePatterns2Info 105 .put(coreBase, 106 basePatterns2Info = Relation 107 .of(new TreeMap<String, Set<Row.R2<String, String>>>(), 108 TreeSet.class)); 109 } 110 // adjust the pattern to correspond to the base fields 111 // String coreSkeleton = getCoreSkeleton(skeleton); 112 String minimizedPattern = replaceFieldTypes(dateTimePattern, 113 coreBase, !coreBase.equals(base)); 114 basePatterns2Info.put(minimizedPattern, 115 Row.of(skeleton, dateTimePattern)); 116 // if (skeleton2pattern.put(skeleton, basePattern) != null) { 117 // throw new IllegalArgumentException(); 118 // } 119 } 120 getCoreSkeleton(String skeleton)121 public String getCoreSkeleton(String skeleton) { 122 int slashPos = skeleton.indexOf('/'); 123 String s = slashPos < 0 ? skeleton : skeleton 124 .substring(0, slashPos); 125 return s; 126 } 127 128 private void recordYearStuff(String base, String skeleton, 129 String dateTimePattern) { 130 // do the year stuff 131 if (!dateTimePattern.contains("y")) { 132 return; 133 } 134 boolean isDigit4 = true; 135 if (dateTimePattern.contains("yyyy")) { 136 // nothing 137 } else if (dateTimePattern.contains("yy")) { 138 isDigit4 = false; 139 } 140 boolean monthNumeric = false; 141 if (dateTimePattern.contains("MMM") 142 || dateTimePattern.contains("LLL")) { 143 // nothing 144 } else if (dateTimePattern.contains("M") 145 || dateTimePattern.contains("L")) { 146 monthNumeric = true; 147 } 148 if (isDigit4) { 149 if (monthNumeric) { 150 category2base.put(Category.Year4_MonthNumeric, skeleton); 151 } else { 152 category2base.put(Category.Year4_Other, skeleton); 153 } 154 } else { 155 if (monthNumeric) { 156 category2base.put(Category.Year2_MonthNumeric, skeleton); 157 } else { 158 category2base.put(Category.Year2_Other, skeleton); 159 } 160 } 161 } 162 163 public String replaceFieldTypes(String dateTimePattern, 164 String skeleton, boolean isInterval) { 165 if (!isInterval) { 166 return replaceFieldPartsCompletely(dateTimePattern, skeleton); 167 } else { 168 String part = getCorePattern(dateTimePattern); 169 return replaceFieldPartsCompletely(part, skeleton); 170 } 171 } 172 173 public String replaceFieldPartsCompletely(String dateTimePattern, 174 String skeleton) { 175 String minimizedPattern = dtp.replaceFieldTypes(dateTimePattern, 176 skeleton); 177 178 // fix numerics 179 StringBuilder result = new StringBuilder(); 180 for (Object item : formatParser.set(minimizedPattern).getItems()) { 181 if (item instanceof String) { 182 Object quoteLiteral = formatParser.quoteLiteral(item 183 .toString()); 184 result.append(quoteLiteral); 185 } else { 186 VariableField item2 = (DateTimePatternGenerator.VariableField) item; 187 if (item2.isNumeric()) { 188 result.append(item.toString().charAt(0)); 189 } else { 190 result.append(item); 191 } 192 } 193 } 194 String resultString = result.toString(); 195 return resultString; 196 } 197 198 private String getCorePattern(String intervalPattern) { 199 // get up to the first duplicate field. Then compare the result on 200 // both sides 201 StringBuilder b = new StringBuilder(); 202 StringBuilder result = new StringBuilder(); 203 boolean firstPart = true; 204 int endFirstPart = -1; 205 int startSecondPart = -1; 206 int goodSoFar = -1; 207 Set<Integer> firstComponents = new HashSet<Integer>(); 208 Set<Integer> secondComponents = new HashSet<Integer>(); 209 for (Object item : formatParser.set(intervalPattern).getItems()) { 210 if (item instanceof String) { 211 Object quoteLiteral = formatParser.quoteLiteral(item 212 .toString()); 213 b.append(quoteLiteral); 214 goodSoFar = result.length(); 215 result.append(quoteLiteral); 216 } else { 217 VariableField item2 = (DateTimePatternGenerator.VariableField) item; 218 int type = item2.getType(); 219 if (firstPart && firstComponents.contains(type)) { 220 firstPart = false; 221 startSecondPart = b.length(); 222 } 223 b.append(item); 224 if (firstPart) { 225 endFirstPart = b.length(); 226 firstComponents.add(type); 227 result.append(item); 228 } else { 229 secondComponents.add(type); 230 if (firstComponents.contains(type)) { 231 result.setLength(goodSoFar); 232 } else { 233 result.append(item); 234 } 235 } 236 } 237 } 238 String normalized = b.toString(); 239 if (!normalized.equals(intervalPattern)) { 240 System.out.println("Not normalized: " + intervalPattern + "\t" 241 + normalized); 242 } 243 if (endFirstPart < 0 || startSecondPart < 0) { 244 throw new IllegalArgumentException("Illegal interval pattern: " 245 + intervalPattern); 246 } else { 247 if (DEBUG) 248 System.out.println(normalized.substring(0, endFirstPart) 249 + "$$" 250 + normalized.substring(endFirstPart, 251 startSecondPart) 252 + "$$" 253 + normalized.substring(startSecondPart) + "\t=>\t" 254 + result); 255 } 256 return result.toString(); 257 } 258 259 private String getBaseSkeleton(String skeleton) { 260 int slashPos = skeleton.indexOf('/'); 261 String core = skeleton; 262 String diff = ""; 263 if (slashPos >= 0) { 264 core = skeleton.substring(0, slashPos); 265 diff = skeleton.substring(slashPos); 266 } 267 core = dtp.getBaseSkeleton(core); 268 return core + diff; 269 } 270 271 } 272 273 static Map<String, LocaleInfo> data = new TreeMap<String, LocaleInfo>(); 274 275 // private static final Relation<String,String> digit4 = Relation.of(new 276 // TreeMap<String,Set<String>>(), 277 // TreeSet.class); 278 // private static final Relation<String,String> digit2 = Relation.of(new 279 // TreeMap<String,Set<String>>(), 280 // TreeSet.class); 281 282 public static void main(String[] args) throws IOException { 283 CLDRFile englishFile = testInfo.getEnglish(); 284 285 Factory factory = Factory.make(CLDRPaths.TMP2_DIRECTORY 286 + "vxml/common/main/", LOCALES); 287 String calendarID = "gregorian"; 288 System.out.println("Total locales: " 289 + factory.getAvailableLanguages().size()); 290 Map<String, String> sorted = new TreeMap<String, String>(); 291 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 292 Set<String> defaultContent = sdi.getDefaultContentLocales(); 293 LanguageTagParser ltp = new LanguageTagParser(); 294 295 for (String localeID : factory.getAvailableLanguages()) { 296 if (!ltp.set(localeID).getRegion().isEmpty()) { 297 continue; 298 } 299 if (defaultContent.contains(localeID)) { 300 System.out.println("Skipping default content: " + localeID); 301 continue; 302 } 303 sorted.put(englishFile.getName(localeID, true), localeID); 304 data.put(localeID, new LocaleInfo()); 305 } 306 307 gatherInfo(factory, calendarID, sorted); 308 309 writeYearWidths(sorted, true, "year-width-diff.txt"); 310 writeYearWidths(sorted, false, "year-width-diff-other.txt"); 311 312 writeConflictingStockItems(true, "conflicting-stock.txt"); 313 writeConflictingStockItems(false, "conflicting-stock-other.txt"); 314 315 writeConflictingPatterns(sorted, true, "conflicting-patterns.txt"); 316 writeConflictingPatterns(sorted, false, 317 "conflicting-patterns-other.txt"); 318 } 319 320 public static void gatherInfo(Factory factory, String calendarID, 321 Map<String, String> sorted) throws IOException { 322 XPathParts parts = new XPathParts(); 323 324 for (Entry<String, String> entry : sorted.entrySet()) { 325 String localeId = entry.getValue(); 326 CLDRFile file = factory.make(localeId, true); 327 LocaleInfo localeInfo = data.get(localeId); 328 for (String stock : STOCK) { 329 String path = "//ldml/dates/calendars/calendar[@type=\"" 330 + calendarID 331 + "\"]/dateFormats/dateFormatLength[@type=\"" 332 + stock 333 + "\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 334 String dateTimePattern = file.getStringValue(path); 335 localeInfo.recordStock(localeId, stock, dateTimePattern); 336 path = "//ldml/dates/calendars/calendar[@type=\"" 337 + calendarID 338 + "\"]/timeFormats/timeFormatLength[@type=\"" 339 + stock 340 + "\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 341 dateTimePattern = file.getStringValue(path); 342 localeInfo.recordStockTime(localeId, stock, dateTimePattern); 343 } 344 for (String path : With 345 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\"" 346 + calendarID 347 + "\"]/dateTimeFormats/availableFormats/dateFormatItem"))) { 348 String key = parts.set(path).getAttributeValue(-1, "id"); 349 String value = file.getStringValue(path); 350 localeInfo.record(key, value); 351 } 352 for (String path : With 353 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\"" 354 + calendarID 355 + "\"]/dateTimeFormats/intervalFormats/intervalFormatItem"))) { 356 String skeleton = parts.set(path).getAttributeValue(-2, "id"); 357 String diff = parts.set(path).getAttributeValue(-1, "id"); 358 String value = file.getStringValue(path); 359 localeInfo.record(skeleton + "/" + diff, value); 360 } 361 } 362 } 363 364 public static void writeYearWidths(Map<String, String> sorted, 365 boolean modern, String filename) throws IOException { 366 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 367 + "datecheck/", filename); 368 out.println("Name\tid\t" 369 + CollectionUtilities.join(Category.values(), "\t")); 370 for (Entry<String, String> entry : sorted.entrySet()) { 371 String localeId = entry.getValue(); 372 boolean priority = getPriority(localeId); 373 if (modern != priority) { 374 continue; 375 } 376 String name = entry.getKey(); 377 LocaleInfo localeInfo = data.get(localeId); 378 out.print(name + "\t" + localeId); 379 for (Category item : Category.values()) { 380 Set<String> items = localeInfo.category2base.get(item); 381 if (items != null) { 382 out.print("\t" + CollectionUtilities.join(items, " ")); 383 } else { 384 out.print("\t"); 385 } 386 } 387 out.println(); 388 } 389 out.close(); 390 } 391 392 public static void writeConflictingStockItems(boolean modern, 393 String filename) throws IOException { 394 PrintWriter out; 395 System.out.println("\nMismatched Stock items\n"); 396 out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 397 + "datecheck/", filename); 398 out.println("Stock\tSkeleton\tLocales"); 399 for (Entry<String, Relation<String, String>> stockAndSkeleton2locales : stock2skeleton2locales 400 .entrySet()) { 401 String stock = stockAndSkeleton2locales.getKey(); 402 for (Entry<String, Set<String>> entry2 : stockAndSkeleton2locales 403 .getValue().keyValuesSet()) { 404 String filtered = filter(entry2.getValue(), modern); 405 if (filtered.isEmpty()) { 406 continue; 407 } 408 out.println(stock + "\t" + entry2.getKey() + "\t" + filtered); 409 } 410 } 411 out.close(); 412 } 413 414 private static String filter(Set<String> value, boolean modern) { 415 StringBuilder b = new StringBuilder(); 416 for (String localeId : value) { 417 if (modern != getPriority(localeId)) { 418 continue; 419 } 420 if (b.length() != 0) { 421 b.append(" "); 422 } 423 b.append(localeId); 424 } 425 return b.toString(); 426 } 427 428 public static void writeConflictingPatterns(Map<String, String> sorted, 429 boolean modern, String filename) throws IOException { 430 PrintWriter out; 431 out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 432 + "datecheck/", filename); 433 out.println("Language\tId\tMin. Skeleton\tMin Pat1\tskeleton → pattern\tMin Pat2\tskeleton → pattern\tMin Pat3\tskeleton → pattern"); 434 for (Entry<String, String> entry : sorted.entrySet()) { 435 String localeId = entry.getValue(); 436 if (modern != getPriority(localeId)) { 437 continue; 438 } 439 String name = entry.getKey(); 440 LocaleInfo localeInfo = data.get(localeId); 441 442 for (Entry<String, Relation<String, R2<String, String>>> baseAndBasePatterns2Info : localeInfo.base2BasePatterns2Info 443 .entrySet()) { 444 String base = baseAndBasePatterns2Info.getKey(); 445 Relation<String, R2<String, String>> basePatterns2Info = baseAndBasePatterns2Info 446 .getValue(); 447 if (basePatterns2Info.size() == 1) { 448 continue; 449 } 450 // Ewe ee MMM LLL → ‹[MMM, LLL]› 451 // Ewe ee MMM MMM → ‹[MMM/M, MMM–MMM]› 452 // => Ewe ee MMM ‹LLL›: tab MMM → ‹LLL› tab ‹MMM›: tab MMM/M → 453 // ‹MMM–MMM› 454 StringBuilder s = new StringBuilder(name + "\t" + localeId 455 + "\t" + base); 456 457 for (Entry<String, Set<R2<String, String>>> basePatternsAndInfo : basePatterns2Info 458 .keyValuesSet()) { 459 String basePattern = basePatternsAndInfo.getKey(); 460 s.append("\t‹" + basePattern + "›:\t\""); 461 boolean first = true; 462 for (R2<String, String> info : basePatternsAndInfo 463 .getValue()) { 464 if (first) { 465 first = false; 466 } else { 467 s.append(";\n"); 468 } 469 s.append(info.get0() + " → ‹" + info.get1() + "›"); 470 } 471 s.append("\""); 472 } 473 out.println(s); 474 } 475 } 476 out.close(); 477 } 478 479 public static boolean getPriority(String localeId) { 480 return STANDARD_CODES.getLocaleCoverageLevel( 481 Organization.google.toString(), localeId) == Level.MODERN 482 || STANDARD_CODES.getLocaleCoverageLevel( 483 Organization.apple.toString(), localeId) == Level.MODERN 484 || STANDARD_CODES.getLocaleCoverageLevel( 485 Organization.ibm.toString(), localeId) == Level.MODERN; 486 } 487 } 488