• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.EnumMap;
6 import java.util.HashMap;
7 import java.util.HashSet;
8 import java.util.LinkedHashMap;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 
15 import org.unicode.cldr.draft.FileUtilities;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.LanguageTagParser;
21 import org.unicode.cldr.util.Level;
22 import org.unicode.cldr.util.Organization;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.With;
26 import org.unicode.cldr.util.XPathParts;
27 
28 import com.ibm.icu.dev.util.CollectionUtilities;
29 import com.ibm.icu.impl.Relation;
30 import com.ibm.icu.impl.Row;
31 import com.ibm.icu.impl.Row.R2;
32 import com.ibm.icu.text.DateTimePatternGenerator;
33 import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
34 
35 public class CheckYear {
36     static CLDRConfig testInfo = CLDRConfig.getInstance();
37     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
38     private static final String LOCALES = ".*";
39     private static final String[] STOCK = { "short", "medium", "long", "full" };
40 
41     enum Category {
42         Year2_MonthNumeric, Year2_Other, Year4_MonthNumeric, Year4_Other
43     }
44 
45     static DateTimePatternGenerator dtp = DateTimePatternGenerator
46         .getEmptyInstance();
47     static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
48 
49     // mismatches between stocks
50     static Map<String, Relation<String, String>> stock2skeleton2locales = new LinkedHashMap<String, Relation<String, String>>();
51     static {
52         for (String stock : STOCK) {
53             stock2skeleton2locales.put("date-" + stock, Relation.of(
54                 new TreeMap<String, Set<String>>(), TreeSet.class));
55         }
56         for (String stock : STOCK) {
57             stock2skeleton2locales.put("time-" + stock, Relation.of(
58                 new TreeMap<String, Set<String>>(), TreeSet.class));
59         }
60     }
61 
62     static class LocaleInfo {
63         private static final boolean DEBUG = false;
64         // information on the type of years
65         Relation<Category, String> category2base = Relation.of(
66             new EnumMap<Category, Set<String>>(Category.class),
67             TreeSet.class);
68         // collisions between baseSkeletons
69         Map<String, Relation<String, Row.R2<String, String>>> base2BasePatterns2Info = new TreeMap<String, Relation<String, Row.R2<String, String>>>();
70 
71         Map<String, String> skeleton2pattern = new HashMap<String, String>();
72 
recordStockTime(String localeId, String stock, String dateTimePattern)73         public void recordStockTime(String localeId, String stock,
74             String dateTimePattern) {
75             String skeleton = dtp.getSkeleton(dateTimePattern);
76             String base = getBaseSkeleton(skeleton);
77             stock2skeleton2locales.get("time-" + stock).put(skeleton, localeId);
78             recordBase(base, skeleton, dateTimePattern);
79         }
80 
recordStock(String localeId, String stock, String dateTimePattern)81         public void recordStock(String localeId, String stock,
82             String dateTimePattern) {
83             String skeleton = dtp.getSkeleton(dateTimePattern);
84             String base = getBaseSkeleton(skeleton);
85             stock2skeleton2locales.get("date-" + stock).put(
86                 skeleton.replace("yyyy", "y"), localeId);
87             String key = skeleton + "*" + stock.charAt(0);
88             recordBase(base, skeleton, dateTimePattern);
89             recordYearStuff(base, key, dateTimePattern);
90         }
91 
record(String skeleton, String dateTimePattern)92         public void record(String skeleton, String dateTimePattern) {
93             String base = getBaseSkeleton(skeleton);
94             recordBase(base, skeleton, dateTimePattern);
95             recordYearStuff(base, skeleton, dateTimePattern);
96         }
97 
recordBase(String base, String skeleton, String dateTimePattern)98         public void recordBase(String base, String skeleton,
99             String dateTimePattern) {
100             String coreBase = getCoreSkeleton(base);
101             Relation<String, Row.R2<String, String>> basePatterns2Info = base2BasePatterns2Info
102                 .get(coreBase);
103             if (basePatterns2Info == null) {
104                 base2BasePatterns2Info
105                     .put(coreBase,
106                         basePatterns2Info = Relation
107                             .of(new TreeMap<String, Set<Row.R2<String, String>>>(),
108                                 TreeSet.class));
109             }
110             // adjust the pattern to correspond to the base fields
111             // String coreSkeleton = getCoreSkeleton(skeleton);
112             String minimizedPattern = replaceFieldTypes(dateTimePattern,
113                 coreBase, !coreBase.equals(base));
114             basePatterns2Info.put(minimizedPattern,
115                 Row.of(skeleton, dateTimePattern));
116             // if (skeleton2pattern.put(skeleton, basePattern) != null) {
117             // throw new IllegalArgumentException();
118             // }
119         }
120 
getCoreSkeleton(String skeleton)121         public String getCoreSkeleton(String skeleton) {
122             int slashPos = skeleton.indexOf('/');
123             String s = slashPos < 0 ? skeleton : skeleton
124                 .substring(0, slashPos);
125             return s;
126         }
127 
128         private void recordYearStuff(String base, String skeleton,
129             String dateTimePattern) {
130             // do the year stuff
131             if (!dateTimePattern.contains("y")) {
132                 return;
133             }
134             boolean isDigit4 = true;
135             if (dateTimePattern.contains("yyyy")) {
136                 // nothing
137             } else if (dateTimePattern.contains("yy")) {
138                 isDigit4 = false;
139             }
140             boolean monthNumeric = false;
141             if (dateTimePattern.contains("MMM")
142                 || dateTimePattern.contains("LLL")) {
143                 // nothing
144             } else if (dateTimePattern.contains("M")
145                 || dateTimePattern.contains("L")) {
146                 monthNumeric = true;
147             }
148             if (isDigit4) {
149                 if (monthNumeric) {
150                     category2base.put(Category.Year4_MonthNumeric, skeleton);
151                 } else {
152                     category2base.put(Category.Year4_Other, skeleton);
153                 }
154             } else {
155                 if (monthNumeric) {
156                     category2base.put(Category.Year2_MonthNumeric, skeleton);
157                 } else {
158                     category2base.put(Category.Year2_Other, skeleton);
159                 }
160             }
161         }
162 
163         public String replaceFieldTypes(String dateTimePattern,
164             String skeleton, boolean isInterval) {
165             if (!isInterval) {
166                 return replaceFieldPartsCompletely(dateTimePattern, skeleton);
167             } else {
168                 String part = getCorePattern(dateTimePattern);
169                 return replaceFieldPartsCompletely(part, skeleton);
170             }
171         }
172 
173         public String replaceFieldPartsCompletely(String dateTimePattern,
174             String skeleton) {
175             String minimizedPattern = dtp.replaceFieldTypes(dateTimePattern,
176                 skeleton);
177 
178             // fix numerics
179             StringBuilder result = new StringBuilder();
180             for (Object item : formatParser.set(minimizedPattern).getItems()) {
181                 if (item instanceof String) {
182                     Object quoteLiteral = formatParser.quoteLiteral(item
183                         .toString());
184                     result.append(quoteLiteral);
185                 } else {
186                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
187                     if (item2.isNumeric()) {
188                         result.append(item.toString().charAt(0));
189                     } else {
190                         result.append(item);
191                     }
192                 }
193             }
194             String resultString = result.toString();
195             return resultString;
196         }
197 
198         private String getCorePattern(String intervalPattern) {
199             // get up to the first duplicate field. Then compare the result on
200             // both sides
201             StringBuilder b = new StringBuilder();
202             StringBuilder result = new StringBuilder();
203             boolean firstPart = true;
204             int endFirstPart = -1;
205             int startSecondPart = -1;
206             int goodSoFar = -1;
207             Set<Integer> firstComponents = new HashSet<Integer>();
208             Set<Integer> secondComponents = new HashSet<Integer>();
209             for (Object item : formatParser.set(intervalPattern).getItems()) {
210                 if (item instanceof String) {
211                     Object quoteLiteral = formatParser.quoteLiteral(item
212                         .toString());
213                     b.append(quoteLiteral);
214                     goodSoFar = result.length();
215                     result.append(quoteLiteral);
216                 } else {
217                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
218                     int type = item2.getType();
219                     if (firstPart && firstComponents.contains(type)) {
220                         firstPart = false;
221                         startSecondPart = b.length();
222                     }
223                     b.append(item);
224                     if (firstPart) {
225                         endFirstPart = b.length();
226                         firstComponents.add(type);
227                         result.append(item);
228                     } else {
229                         secondComponents.add(type);
230                         if (firstComponents.contains(type)) {
231                             result.setLength(goodSoFar);
232                         } else {
233                             result.append(item);
234                         }
235                     }
236                 }
237             }
238             String normalized = b.toString();
239             if (!normalized.equals(intervalPattern)) {
240                 System.out.println("Not normalized: " + intervalPattern + "\t"
241                     + normalized);
242             }
243             if (endFirstPart < 0 || startSecondPart < 0) {
244                 throw new IllegalArgumentException("Illegal interval pattern: "
245                     + intervalPattern);
246             } else {
247                 if (DEBUG)
248                     System.out.println(normalized.substring(0, endFirstPart)
249                         + "$$"
250                         + normalized.substring(endFirstPart,
251                             startSecondPart)
252                         + "$$"
253                         + normalized.substring(startSecondPart) + "\t=>\t"
254                         + result);
255             }
256             return result.toString();
257         }
258 
259         private String getBaseSkeleton(String skeleton) {
260             int slashPos = skeleton.indexOf('/');
261             String core = skeleton;
262             String diff = "";
263             if (slashPos >= 0) {
264                 core = skeleton.substring(0, slashPos);
265                 diff = skeleton.substring(slashPos);
266             }
267             core = dtp.getBaseSkeleton(core);
268             return core + diff;
269         }
270 
271     }
272 
273     static Map<String, LocaleInfo> data = new TreeMap<String, LocaleInfo>();
274 
275     // private static final Relation<String,String> digit4 = Relation.of(new
276     // TreeMap<String,Set<String>>(),
277     // TreeSet.class);
278     // private static final Relation<String,String> digit2 = Relation.of(new
279     // TreeMap<String,Set<String>>(),
280     // TreeSet.class);
281 
282     public static void main(String[] args) throws IOException {
283         CLDRFile englishFile = testInfo.getEnglish();
284 
285         Factory factory = Factory.make(CLDRPaths.TMP2_DIRECTORY
286             + "vxml/common/main/", LOCALES);
287         String calendarID = "gregorian";
288         System.out.println("Total locales: "
289             + factory.getAvailableLanguages().size());
290         Map<String, String> sorted = new TreeMap<String, String>();
291         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
292         Set<String> defaultContent = sdi.getDefaultContentLocales();
293         LanguageTagParser ltp = new LanguageTagParser();
294 
295         for (String localeID : factory.getAvailableLanguages()) {
296             if (!ltp.set(localeID).getRegion().isEmpty()) {
297                 continue;
298             }
299             if (defaultContent.contains(localeID)) {
300                 System.out.println("Skipping default content: " + localeID);
301                 continue;
302             }
303             sorted.put(englishFile.getName(localeID, true), localeID);
304             data.put(localeID, new LocaleInfo());
305         }
306 
307         gatherInfo(factory, calendarID, sorted);
308 
309         writeYearWidths(sorted, true, "year-width-diff.txt");
310         writeYearWidths(sorted, false, "year-width-diff-other.txt");
311 
312         writeConflictingStockItems(true, "conflicting-stock.txt");
313         writeConflictingStockItems(false, "conflicting-stock-other.txt");
314 
315         writeConflictingPatterns(sorted, true, "conflicting-patterns.txt");
316         writeConflictingPatterns(sorted, false,
317             "conflicting-patterns-other.txt");
318     }
319 
320     public static void gatherInfo(Factory factory, String calendarID,
321         Map<String, String> sorted) throws IOException {
322         XPathParts parts = new XPathParts();
323 
324         for (Entry<String, String> entry : sorted.entrySet()) {
325             String localeId = entry.getValue();
326             CLDRFile file = factory.make(localeId, true);
327             LocaleInfo localeInfo = data.get(localeId);
328             for (String stock : STOCK) {
329                 String path = "//ldml/dates/calendars/calendar[@type=\""
330                     + calendarID
331                     + "\"]/dateFormats/dateFormatLength[@type=\""
332                     + stock
333                     + "\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
334                 String dateTimePattern = file.getStringValue(path);
335                 localeInfo.recordStock(localeId, stock, dateTimePattern);
336                 path = "//ldml/dates/calendars/calendar[@type=\""
337                     + calendarID
338                     + "\"]/timeFormats/timeFormatLength[@type=\""
339                     + stock
340                     + "\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
341                 dateTimePattern = file.getStringValue(path);
342                 localeInfo.recordStockTime(localeId, stock, dateTimePattern);
343             }
344             for (String path : With
345                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
346                     + calendarID
347                     + "\"]/dateTimeFormats/availableFormats/dateFormatItem"))) {
348                 String key = parts.set(path).getAttributeValue(-1, "id");
349                 String value = file.getStringValue(path);
350                 localeInfo.record(key, value);
351             }
352             for (String path : With
353                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
354                     + calendarID
355                     + "\"]/dateTimeFormats/intervalFormats/intervalFormatItem"))) {
356                 String skeleton = parts.set(path).getAttributeValue(-2, "id");
357                 String diff = parts.set(path).getAttributeValue(-1, "id");
358                 String value = file.getStringValue(path);
359                 localeInfo.record(skeleton + "/" + diff, value);
360             }
361         }
362     }
363 
364     public static void writeYearWidths(Map<String, String> sorted,
365         boolean modern, String filename) throws IOException {
366         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
367             + "datecheck/", filename);
368         out.println("Name\tid\t"
369             + CollectionUtilities.join(Category.values(), "\t"));
370         for (Entry<String, String> entry : sorted.entrySet()) {
371             String localeId = entry.getValue();
372             boolean priority = getPriority(localeId);
373             if (modern != priority) {
374                 continue;
375             }
376             String name = entry.getKey();
377             LocaleInfo localeInfo = data.get(localeId);
378             out.print(name + "\t" + localeId);
379             for (Category item : Category.values()) {
380                 Set<String> items = localeInfo.category2base.get(item);
381                 if (items != null) {
382                     out.print("\t" + CollectionUtilities.join(items, " "));
383                 } else {
384                     out.print("\t");
385                 }
386             }
387             out.println();
388         }
389         out.close();
390     }
391 
392     public static void writeConflictingStockItems(boolean modern,
393         String filename) throws IOException {
394         PrintWriter out;
395         System.out.println("\nMismatched Stock items\n");
396         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
397             + "datecheck/", filename);
398         out.println("Stock\tSkeleton\tLocales");
399         for (Entry<String, Relation<String, String>> stockAndSkeleton2locales : stock2skeleton2locales
400             .entrySet()) {
401             String stock = stockAndSkeleton2locales.getKey();
402             for (Entry<String, Set<String>> entry2 : stockAndSkeleton2locales
403                 .getValue().keyValuesSet()) {
404                 String filtered = filter(entry2.getValue(), modern);
405                 if (filtered.isEmpty()) {
406                     continue;
407                 }
408                 out.println(stock + "\t" + entry2.getKey() + "\t" + filtered);
409             }
410         }
411         out.close();
412     }
413 
414     private static String filter(Set<String> value, boolean modern) {
415         StringBuilder b = new StringBuilder();
416         for (String localeId : value) {
417             if (modern != getPriority(localeId)) {
418                 continue;
419             }
420             if (b.length() != 0) {
421                 b.append(" ");
422             }
423             b.append(localeId);
424         }
425         return b.toString();
426     }
427 
428     public static void writeConflictingPatterns(Map<String, String> sorted,
429         boolean modern, String filename) throws IOException {
430         PrintWriter out;
431         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
432             + "datecheck/", filename);
433         out.println("Language\tId\tMin. Skeleton\tMin Pat1\tskeleton → pattern\tMin Pat2\tskeleton → pattern\tMin Pat3\tskeleton → pattern");
434         for (Entry<String, String> entry : sorted.entrySet()) {
435             String localeId = entry.getValue();
436             if (modern != getPriority(localeId)) {
437                 continue;
438             }
439             String name = entry.getKey();
440             LocaleInfo localeInfo = data.get(localeId);
441 
442             for (Entry<String, Relation<String, R2<String, String>>> baseAndBasePatterns2Info : localeInfo.base2BasePatterns2Info
443                 .entrySet()) {
444                 String base = baseAndBasePatterns2Info.getKey();
445                 Relation<String, R2<String, String>> basePatterns2Info = baseAndBasePatterns2Info
446                     .getValue();
447                 if (basePatterns2Info.size() == 1) {
448                     continue;
449                 }
450                 // Ewe ee MMM LLL → ‹[MMM, LLL]›
451                 // Ewe ee MMM MMM → ‹[MMM/M, MMM–MMM]›
452                 // => Ewe ee MMM ‹LLL›: tab MMM → ‹LLL› tab ‹MMM›: tab MMM/M →
453                 // ‹MMM–MMM›
454                 StringBuilder s = new StringBuilder(name + "\t" + localeId
455                     + "\t" + base);
456 
457                 for (Entry<String, Set<R2<String, String>>> basePatternsAndInfo : basePatterns2Info
458                     .keyValuesSet()) {
459                     String basePattern = basePatternsAndInfo.getKey();
460                     s.append("\t‹" + basePattern + "›:\t\"");
461                     boolean first = true;
462                     for (R2<String, String> info : basePatternsAndInfo
463                         .getValue()) {
464                         if (first) {
465                             first = false;
466                         } else {
467                             s.append(";\n");
468                         }
469                         s.append(info.get0() + " → ‹" + info.get1() + "›");
470                     }
471                     s.append("\"");
472                 }
473                 out.println(s);
474             }
475         }
476         out.close();
477     }
478 
479     public static boolean getPriority(String localeId) {
480         return STANDARD_CODES.getLocaleCoverageLevel(
481             Organization.google.toString(), localeId) == Level.MODERN
482             || STANDARD_CODES.getLocaleCoverageLevel(
483                 Organization.apple.toString(), localeId) == Level.MODERN
484             || STANDARD_CODES.getLocaleCoverageLevel(
485                 Organization.ibm.toString(), localeId) == Level.MODERN;
486     }
487 }
488