1 package org.unicode.cldr.unittest; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.EnumMap; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.LinkedHashMap; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.Factory; 20 import org.unicode.cldr.util.LanguageTagParser; 21 import org.unicode.cldr.util.Level; 22 import org.unicode.cldr.util.Organization; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.SupplementalDataInfo; 25 import org.unicode.cldr.util.With; 26 import org.unicode.cldr.util.XPathParts; 27 28 import com.google.common.base.Joiner; 29 import com.ibm.icu.impl.Relation; 30 import com.ibm.icu.impl.Row; 31 import com.ibm.icu.impl.Row.R2; 32 import com.ibm.icu.text.DateTimePatternGenerator; 33 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 34 35 public class CheckYear { 36 static CLDRConfig testInfo = CLDRConfig.getInstance(); 37 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 38 private static final String LOCALES = ".*"; 39 private static final String[] STOCK = { "short", "medium", "long", "full" }; 40 41 enum Category { 42 Year2_MonthNumeric, Year2_Other, Year4_MonthNumeric, Year4_Other 43 } 44 45 static DateTimePatternGenerator dtp = DateTimePatternGenerator 46 .getEmptyInstance(); 47 static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 48 49 // mismatches between stocks 50 static Map<String, Relation<String, String>> stock2skeleton2locales = new LinkedHashMap<String, Relation<String, String>>(); 51 static { 52 for (String stock : STOCK) { 53 stock2skeleton2locales.put("date-" + stock, Relation.of( 54 new TreeMap<String, Set<String>>(), TreeSet.class)); 55 } 56 for (String stock : STOCK) { 57 stock2skeleton2locales.put("time-" + stock, Relation.of( 58 new TreeMap<String, Set<String>>(), TreeSet.class)); 59 } 60 } 61 62 static class LocaleInfo { 63 private static final boolean DEBUG = false; 64 // information on the type of years 65 Relation<Category, String> category2base = Relation.of( 66 new EnumMap<Category, Set<String>>(Category.class), 67 TreeSet.class); 68 // collisions between baseSkeletons 69 Map<String, Relation<String, Row.R2<String, String>>> base2BasePatterns2Info = new TreeMap<String, Relation<String, Row.R2<String, String>>>(); 70 71 Map<String, String> skeleton2pattern = new HashMap<String, String>(); 72 recordStockTime(String localeId, String stock, String dateTimePattern)73 public void recordStockTime(String localeId, String stock, 74 String dateTimePattern) { 75 String skeleton = dtp.getSkeleton(dateTimePattern); 76 String base = getBaseSkeleton(skeleton); 77 stock2skeleton2locales.get("time-" + stock).put(skeleton, localeId); 78 recordBase(base, skeleton, dateTimePattern); 79 } 80 recordStock(String localeId, String stock, String dateTimePattern)81 public void recordStock(String localeId, String stock, 82 String dateTimePattern) { 83 String skeleton = dtp.getSkeleton(dateTimePattern); 84 String base = getBaseSkeleton(skeleton); 85 stock2skeleton2locales.get("date-" + stock).put( 86 skeleton.replace("yyyy", "y"), localeId); 87 String key = skeleton + "*" + stock.charAt(0); 88 recordBase(base, skeleton, dateTimePattern); 89 recordYearStuff(key, dateTimePattern); 90 } 91 record(String skeleton, String dateTimePattern)92 public void record(String skeleton, String dateTimePattern) { 93 String base = getBaseSkeleton(skeleton); 94 recordBase(base, skeleton, dateTimePattern); 95 recordYearStuff(skeleton, dateTimePattern); 96 } 97 recordBase(String base, String skeleton, String dateTimePattern)98 public void recordBase(String base, String skeleton, 99 String dateTimePattern) { 100 String coreBase = getCoreSkeleton(base); 101 Relation<String, Row.R2<String, String>> basePatterns2Info = base2BasePatterns2Info 102 .get(coreBase); 103 if (basePatterns2Info == null) { 104 base2BasePatterns2Info 105 .put(coreBase, 106 basePatterns2Info = Relation 107 .of(new TreeMap<String, Set<Row.R2<String, String>>>(), 108 TreeSet.class)); 109 } 110 // adjust the pattern to correspond to the base fields 111 // String coreSkeleton = getCoreSkeleton(skeleton); 112 String minimizedPattern = replaceFieldTypes(dateTimePattern, 113 coreBase, !coreBase.equals(base)); 114 basePatterns2Info.put(minimizedPattern, 115 Row.of(skeleton, dateTimePattern)); 116 // if (skeleton2pattern.put(skeleton, basePattern) != null) { 117 // throw new IllegalArgumentException(); 118 // } 119 } 120 getCoreSkeleton(String skeleton)121 public String getCoreSkeleton(String skeleton) { 122 int slashPos = skeleton.indexOf('/'); 123 String s = slashPos < 0 ? skeleton : skeleton 124 .substring(0, slashPos); 125 return s; 126 } 127 128 private void recordYearStuff(String skeleton, String dateTimePattern) { 129 // do the year stuff 130 if (!dateTimePattern.contains("y")) { 131 return; 132 } 133 boolean isDigit4 = true; 134 if (dateTimePattern.contains("yyyy")) { 135 // nothing 136 } else if (dateTimePattern.contains("yy")) { 137 isDigit4 = false; 138 } 139 boolean monthNumeric = false; 140 if (dateTimePattern.contains("MMM") 141 || dateTimePattern.contains("LLL")) { 142 // nothing 143 } else if (dateTimePattern.contains("M") 144 || dateTimePattern.contains("L")) { 145 monthNumeric = true; 146 } 147 if (isDigit4) { 148 if (monthNumeric) { 149 category2base.put(Category.Year4_MonthNumeric, skeleton); 150 } else { 151 category2base.put(Category.Year4_Other, skeleton); 152 } 153 } else { 154 if (monthNumeric) { 155 category2base.put(Category.Year2_MonthNumeric, skeleton); 156 } else { 157 category2base.put(Category.Year2_Other, skeleton); 158 } 159 } 160 } 161 162 public String replaceFieldTypes(String dateTimePattern, 163 String skeleton, boolean isInterval) { 164 if (!isInterval) { 165 return replaceFieldPartsCompletely(dateTimePattern, skeleton); 166 } else { 167 String part = getCorePattern(dateTimePattern); 168 return replaceFieldPartsCompletely(part, skeleton); 169 } 170 } 171 172 public String replaceFieldPartsCompletely(String dateTimePattern, 173 String skeleton) { 174 String minimizedPattern = dtp.replaceFieldTypes(dateTimePattern, 175 skeleton); 176 177 // fix numerics 178 StringBuilder result = new StringBuilder(); 179 for (Object item : formatParser.set(minimizedPattern).getItems()) { 180 if (item instanceof String) { 181 Object quoteLiteral = formatParser.quoteLiteral(item 182 .toString()); 183 result.append(quoteLiteral); 184 } else { 185 VariableField item2 = (DateTimePatternGenerator.VariableField) item; 186 if (item2.isNumeric()) { 187 result.append(item.toString().charAt(0)); 188 } else { 189 result.append(item); 190 } 191 } 192 } 193 String resultString = result.toString(); 194 return resultString; 195 } 196 197 private String getCorePattern(String intervalPattern) { 198 // get up to the first duplicate field. Then compare the result on 199 // both sides 200 StringBuilder b = new StringBuilder(); 201 StringBuilder result = new StringBuilder(); 202 boolean firstPart = true; 203 int endFirstPart = -1; 204 int startSecondPart = -1; 205 int goodSoFar = -1; 206 Set<Integer> firstComponents = new HashSet<Integer>(); 207 Set<Integer> secondComponents = new HashSet<Integer>(); 208 for (Object item : formatParser.set(intervalPattern).getItems()) { 209 if (item instanceof String) { 210 Object quoteLiteral = formatParser.quoteLiteral(item.toString()); 211 b.append(quoteLiteral); 212 goodSoFar = result.length(); 213 result.append(quoteLiteral); 214 } else { 215 VariableField item2 = (DateTimePatternGenerator.VariableField) item; 216 int type = item2.getType(); 217 if (firstPart && firstComponents.contains(type)) { 218 firstPart = false; 219 startSecondPart = b.length(); 220 } 221 b.append(item); 222 if (firstPart) { 223 endFirstPart = b.length(); 224 firstComponents.add(type); 225 result.append(item); 226 } else { 227 secondComponents.add(type); 228 if (firstComponents.contains(type)) { 229 result.setLength(goodSoFar); 230 } else { 231 result.append(item); 232 } 233 } 234 } 235 } 236 String normalized = b.toString(); 237 if (!normalized.equals(intervalPattern)) { 238 System.out.println("Not normalized: " + intervalPattern + "\t" 239 + normalized); 240 } 241 if (endFirstPart < 0 || startSecondPart < 0) { 242 throw new IllegalArgumentException("Illegal interval pattern: " 243 + intervalPattern); 244 } else { 245 if (DEBUG) 246 System.out.println(normalized.substring(0, endFirstPart) 247 + "$$" 248 + normalized.substring(endFirstPart, 249 startSecondPart) 250 + "$$" 251 + normalized.substring(startSecondPart) + "\t=>\t" 252 + result); 253 } 254 return result.toString(); 255 } 256 257 private String getBaseSkeleton(String skeleton) { 258 int slashPos = skeleton.indexOf('/'); 259 String core = skeleton; 260 String diff = ""; 261 if (slashPos >= 0) { 262 core = skeleton.substring(0, slashPos); 263 diff = skeleton.substring(slashPos); 264 } 265 core = dtp.getBaseSkeleton(core); 266 return core + diff; 267 } 268 269 } 270 271 static Map<String, LocaleInfo> data = new TreeMap<String, LocaleInfo>(); 272 273 // private static final Relation<String,String> digit4 = Relation.of(new 274 // TreeMap<String,Set<String>>(), 275 // TreeSet.class); 276 // private static final Relation<String,String> digit2 = Relation.of(new 277 // TreeMap<String,Set<String>>(), 278 // TreeSet.class); 279 280 public static void main(String[] args) throws IOException { 281 CLDRFile englishFile = testInfo.getEnglish(); 282 283 Factory factory = Factory.make(CLDRPaths.TMP2_DIRECTORY 284 + "vxml/common/main/", LOCALES); 285 String calendarID = "gregorian"; 286 System.out.println("Total locales: " 287 + factory.getAvailableLanguages().size()); 288 Map<String, String> sorted = new TreeMap<String, String>(); 289 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 290 Set<String> defaultContent = sdi.getDefaultContentLocales(); 291 LanguageTagParser ltp = new LanguageTagParser(); 292 293 for (String localeID : factory.getAvailableLanguages()) { 294 if (!ltp.set(localeID).getRegion().isEmpty()) { 295 continue; 296 } 297 if (defaultContent.contains(localeID)) { 298 System.out.println("Skipping default content: " + localeID); 299 continue; 300 } 301 sorted.put(englishFile.getName(localeID, true), localeID); 302 data.put(localeID, new LocaleInfo()); 303 } 304 305 gatherInfo(factory, calendarID, sorted); 306 307 writeYearWidths(sorted, true, "year-width-diff.txt"); 308 writeYearWidths(sorted, false, "year-width-diff-other.txt"); 309 310 writeConflictingStockItems(true, "conflicting-stock.txt"); 311 writeConflictingStockItems(false, "conflicting-stock-other.txt"); 312 313 writeConflictingPatterns(sorted, true, "conflicting-patterns.txt"); 314 writeConflictingPatterns(sorted, false, 315 "conflicting-patterns-other.txt"); 316 } 317 318 public static void gatherInfo(Factory factory, String calendarID, 319 Map<String, String> sorted) throws IOException { 320 321 for (Entry<String, String> entry : sorted.entrySet()) { 322 String localeId = entry.getValue(); 323 CLDRFile file = factory.make(localeId, true); 324 LocaleInfo localeInfo = data.get(localeId); 325 for (String stock : STOCK) { 326 String path = "//ldml/dates/calendars/calendar[@type=\"" 327 + calendarID 328 + "\"]/dateFormats/dateFormatLength[@type=\"" 329 + stock 330 + "\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 331 String dateTimePattern = file.getStringValue(path); 332 localeInfo.recordStock(localeId, stock, dateTimePattern); 333 path = "//ldml/dates/calendars/calendar[@type=\"" 334 + calendarID 335 + "\"]/timeFormats/timeFormatLength[@type=\"" 336 + stock 337 + "\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 338 dateTimePattern = file.getStringValue(path); 339 localeInfo.recordStockTime(localeId, stock, dateTimePattern); 340 } 341 for (String path : With 342 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\"" 343 + calendarID 344 + "\"]/dateTimeFormats/availableFormats/dateFormatItem"))) { 345 XPathParts parts = XPathParts.getFrozenInstance(path); 346 String key = parts.getAttributeValue(-1, "id"); 347 String value = file.getStringValue(path); 348 localeInfo.record(key, value); 349 } 350 for (String path : With 351 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\"" 352 + calendarID 353 + "\"]/dateTimeFormats/intervalFormats/intervalFormatItem"))) { 354 XPathParts parts = XPathParts.getFrozenInstance(path); 355 String skeleton = parts.getAttributeValue(-2, "id"); 356 String diff = parts.getAttributeValue(-1, "id"); 357 String value = file.getStringValue(path); 358 localeInfo.record(skeleton + "/" + diff, value); 359 } 360 } 361 } 362 363 public static void writeYearWidths(Map<String, String> sorted, 364 boolean modern, String filename) throws IOException { 365 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 366 + "datecheck/", filename); 367 out.println("Name\tid\t" 368 + Joiner.on("\t").join(Category.values())); 369 for (Entry<String, String> entry : sorted.entrySet()) { 370 String localeId = entry.getValue(); 371 boolean priority = getPriority(localeId); 372 if (modern != priority) { 373 continue; 374 } 375 String name = entry.getKey(); 376 LocaleInfo localeInfo = data.get(localeId); 377 out.print(name + "\t" + localeId); 378 for (Category item : Category.values()) { 379 Set<String> items = localeInfo.category2base.get(item); 380 if (items != null) { 381 out.print("\t" + Joiner.on(" ").join(items)); 382 } else { 383 out.print("\t"); 384 } 385 } 386 out.println(); 387 } 388 out.close(); 389 } 390 391 public static void writeConflictingStockItems(boolean modern, 392 String filename) throws IOException { 393 PrintWriter out; 394 System.out.println("\nMismatched Stock items\n"); 395 out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 396 + "datecheck/", filename); 397 out.println("Stock\tSkeleton\tLocales"); 398 for (Entry<String, Relation<String, String>> stockAndSkeleton2locales : stock2skeleton2locales 399 .entrySet()) { 400 String stock = stockAndSkeleton2locales.getKey(); 401 for (Entry<String, Set<String>> entry2 : stockAndSkeleton2locales 402 .getValue().keyValuesSet()) { 403 String filtered = filter(entry2.getValue(), modern); 404 if (filtered.isEmpty()) { 405 continue; 406 } 407 out.println(stock + "\t" + entry2.getKey() + "\t" + filtered); 408 } 409 } 410 out.close(); 411 } 412 413 private static String filter(Set<String> value, boolean modern) { 414 StringBuilder b = new StringBuilder(); 415 for (String localeId : value) { 416 if (modern != getPriority(localeId)) { 417 continue; 418 } 419 if (b.length() != 0) { 420 b.append(" "); 421 } 422 b.append(localeId); 423 } 424 return b.toString(); 425 } 426 427 public static void writeConflictingPatterns(Map<String, String> sorted, 428 boolean modern, String filename) throws IOException { 429 PrintWriter out; 430 out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY 431 + "datecheck/", filename); 432 out.println("Language\tId\tMin. Skeleton\tMin Pat1\tskeleton → pattern\tMin Pat2\tskeleton → pattern\tMin Pat3\tskeleton → pattern"); 433 for (Entry<String, String> entry : sorted.entrySet()) { 434 String localeId = entry.getValue(); 435 if (modern != getPriority(localeId)) { 436 continue; 437 } 438 String name = entry.getKey(); 439 LocaleInfo localeInfo = data.get(localeId); 440 441 for (Entry<String, Relation<String, R2<String, String>>> baseAndBasePatterns2Info : localeInfo.base2BasePatterns2Info 442 .entrySet()) { 443 String base = baseAndBasePatterns2Info.getKey(); 444 Relation<String, R2<String, String>> basePatterns2Info = baseAndBasePatterns2Info 445 .getValue(); 446 if (basePatterns2Info.size() == 1) { 447 continue; 448 } 449 // Ewe ee MMM LLL → ‹[MMM, LLL]› 450 // Ewe ee MMM MMM → ‹[MMM/M, MMM–MMM]› 451 // => Ewe ee MMM ‹LLL›: tab MMM → ‹LLL› tab ‹MMM›: tab MMM/M → 452 // ‹MMM–MMM› 453 StringBuilder s = new StringBuilder(name + "\t" + localeId 454 + "\t" + base); 455 456 for (Entry<String, Set<R2<String, String>>> basePatternsAndInfo : basePatterns2Info 457 .keyValuesSet()) { 458 String basePattern = basePatternsAndInfo.getKey(); 459 s.append("\t‹" + basePattern + "›:\t\""); 460 boolean first = true; 461 for (R2<String, String> info : basePatternsAndInfo 462 .getValue()) { 463 if (first) { 464 first = false; 465 } else { 466 s.append(";\n"); 467 } 468 s.append(info.get0() + " → ‹" + info.get1() + "›"); 469 } 470 s.append("\""); 471 } 472 out.println(s); 473 } 474 } 475 out.close(); 476 } 477 478 public static boolean getPriority(String localeId) { 479 return STANDARD_CODES.getLocaleCoverageLevel( 480 Organization.google.toString(), localeId) == Level.MODERN 481 || STANDARD_CODES.getLocaleCoverageLevel( 482 Organization.apple.toString(), localeId) == Level.MODERN 483 || STANDARD_CODES.getLocaleCoverageLevel( 484 Organization.ibm.toString(), localeId) == Level.MODERN; 485 } 486 } 487