• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.EnumMap;
6 import java.util.HashMap;
7 import java.util.HashSet;
8 import java.util.LinkedHashMap;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 
15 import org.unicode.cldr.draft.FileUtilities;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.LanguageTagParser;
21 import org.unicode.cldr.util.Level;
22 import org.unicode.cldr.util.Organization;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.With;
26 import org.unicode.cldr.util.XPathParts;
27 
28 import com.google.common.base.Joiner;
29 import com.ibm.icu.impl.Relation;
30 import com.ibm.icu.impl.Row;
31 import com.ibm.icu.impl.Row.R2;
32 import com.ibm.icu.text.DateTimePatternGenerator;
33 import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
34 
35 public class CheckYear {
36     static CLDRConfig testInfo = CLDRConfig.getInstance();
37     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
38     private static final String LOCALES = ".*";
39     private static final String[] STOCK = { "short", "medium", "long", "full" };
40 
41     enum Category {
42         Year2_MonthNumeric, Year2_Other, Year4_MonthNumeric, Year4_Other
43     }
44 
45     static DateTimePatternGenerator dtp = DateTimePatternGenerator
46         .getEmptyInstance();
47     static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
48 
49     // mismatches between stocks
50     static Map<String, Relation<String, String>> stock2skeleton2locales = new LinkedHashMap<String, Relation<String, String>>();
51     static {
52         for (String stock : STOCK) {
53             stock2skeleton2locales.put("date-" + stock, Relation.of(
54                 new TreeMap<String, Set<String>>(), TreeSet.class));
55         }
56         for (String stock : STOCK) {
57             stock2skeleton2locales.put("time-" + stock, Relation.of(
58                 new TreeMap<String, Set<String>>(), TreeSet.class));
59         }
60     }
61 
62     static class LocaleInfo {
63         private static final boolean DEBUG = false;
64         // information on the type of years
65         Relation<Category, String> category2base = Relation.of(
66             new EnumMap<Category, Set<String>>(Category.class),
67             TreeSet.class);
68         // collisions between baseSkeletons
69         Map<String, Relation<String, Row.R2<String, String>>> base2BasePatterns2Info = new TreeMap<String, Relation<String, Row.R2<String, String>>>();
70 
71         Map<String, String> skeleton2pattern = new HashMap<String, String>();
72 
recordStockTime(String localeId, String stock, String dateTimePattern)73         public void recordStockTime(String localeId, String stock,
74             String dateTimePattern) {
75             String skeleton = dtp.getSkeleton(dateTimePattern);
76             String base = getBaseSkeleton(skeleton);
77             stock2skeleton2locales.get("time-" + stock).put(skeleton, localeId);
78             recordBase(base, skeleton, dateTimePattern);
79         }
80 
recordStock(String localeId, String stock, String dateTimePattern)81         public void recordStock(String localeId, String stock,
82             String dateTimePattern) {
83             String skeleton = dtp.getSkeleton(dateTimePattern);
84             String base = getBaseSkeleton(skeleton);
85             stock2skeleton2locales.get("date-" + stock).put(
86                 skeleton.replace("yyyy", "y"), localeId);
87             String key = skeleton + "*" + stock.charAt(0);
88             recordBase(base, skeleton, dateTimePattern);
89             recordYearStuff(key, dateTimePattern);
90         }
91 
record(String skeleton, String dateTimePattern)92         public void record(String skeleton, String dateTimePattern) {
93             String base = getBaseSkeleton(skeleton);
94             recordBase(base, skeleton, dateTimePattern);
95             recordYearStuff(skeleton, dateTimePattern);
96         }
97 
recordBase(String base, String skeleton, String dateTimePattern)98         public void recordBase(String base, String skeleton,
99             String dateTimePattern) {
100             String coreBase = getCoreSkeleton(base);
101             Relation<String, Row.R2<String, String>> basePatterns2Info = base2BasePatterns2Info
102                 .get(coreBase);
103             if (basePatterns2Info == null) {
104                 base2BasePatterns2Info
105                     .put(coreBase,
106                         basePatterns2Info = Relation
107                             .of(new TreeMap<String, Set<Row.R2<String, String>>>(),
108                                 TreeSet.class));
109             }
110             // adjust the pattern to correspond to the base fields
111             // String coreSkeleton = getCoreSkeleton(skeleton);
112             String minimizedPattern = replaceFieldTypes(dateTimePattern,
113                 coreBase, !coreBase.equals(base));
114             basePatterns2Info.put(minimizedPattern,
115                 Row.of(skeleton, dateTimePattern));
116             // if (skeleton2pattern.put(skeleton, basePattern) != null) {
117             // throw new IllegalArgumentException();
118             // }
119         }
120 
getCoreSkeleton(String skeleton)121         public String getCoreSkeleton(String skeleton) {
122             int slashPos = skeleton.indexOf('/');
123             String s = slashPos < 0 ? skeleton : skeleton
124                 .substring(0, slashPos);
125             return s;
126         }
127 
128         private void recordYearStuff(String skeleton, String dateTimePattern) {
129             // do the year stuff
130             if (!dateTimePattern.contains("y")) {
131                 return;
132             }
133             boolean isDigit4 = true;
134             if (dateTimePattern.contains("yyyy")) {
135                 // nothing
136             } else if (dateTimePattern.contains("yy")) {
137                 isDigit4 = false;
138             }
139             boolean monthNumeric = false;
140             if (dateTimePattern.contains("MMM")
141                 || dateTimePattern.contains("LLL")) {
142                 // nothing
143             } else if (dateTimePattern.contains("M")
144                 || dateTimePattern.contains("L")) {
145                 monthNumeric = true;
146             }
147             if (isDigit4) {
148                 if (monthNumeric) {
149                     category2base.put(Category.Year4_MonthNumeric, skeleton);
150                 } else {
151                     category2base.put(Category.Year4_Other, skeleton);
152                 }
153             } else {
154                 if (monthNumeric) {
155                     category2base.put(Category.Year2_MonthNumeric, skeleton);
156                 } else {
157                     category2base.put(Category.Year2_Other, skeleton);
158                 }
159             }
160         }
161 
162         public String replaceFieldTypes(String dateTimePattern,
163             String skeleton, boolean isInterval) {
164             if (!isInterval) {
165                 return replaceFieldPartsCompletely(dateTimePattern, skeleton);
166             } else {
167                 String part = getCorePattern(dateTimePattern);
168                 return replaceFieldPartsCompletely(part, skeleton);
169             }
170         }
171 
172         public String replaceFieldPartsCompletely(String dateTimePattern,
173             String skeleton) {
174             String minimizedPattern = dtp.replaceFieldTypes(dateTimePattern,
175                 skeleton);
176 
177             // fix numerics
178             StringBuilder result = new StringBuilder();
179             for (Object item : formatParser.set(minimizedPattern).getItems()) {
180                 if (item instanceof String) {
181                     Object quoteLiteral = formatParser.quoteLiteral(item
182                         .toString());
183                     result.append(quoteLiteral);
184                 } else {
185                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
186                     if (item2.isNumeric()) {
187                         result.append(item.toString().charAt(0));
188                     } else {
189                         result.append(item);
190                     }
191                 }
192             }
193             String resultString = result.toString();
194             return resultString;
195         }
196 
197         private String getCorePattern(String intervalPattern) {
198             // get up to the first duplicate field. Then compare the result on
199             // both sides
200             StringBuilder b = new StringBuilder();
201             StringBuilder result = new StringBuilder();
202             boolean firstPart = true;
203             int endFirstPart = -1;
204             int startSecondPart = -1;
205             int goodSoFar = -1;
206             Set<Integer> firstComponents = new HashSet<Integer>();
207             Set<Integer> secondComponents = new HashSet<Integer>();
208             for (Object item : formatParser.set(intervalPattern).getItems()) {
209                 if (item instanceof String) {
210                     Object quoteLiteral = formatParser.quoteLiteral(item.toString());
211                     b.append(quoteLiteral);
212                     goodSoFar = result.length();
213                     result.append(quoteLiteral);
214                 } else {
215                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
216                     int type = item2.getType();
217                     if (firstPart && firstComponents.contains(type)) {
218                         firstPart = false;
219                         startSecondPart = b.length();
220                     }
221                     b.append(item);
222                     if (firstPart) {
223                         endFirstPart = b.length();
224                         firstComponents.add(type);
225                         result.append(item);
226                     } else {
227                         secondComponents.add(type);
228                         if (firstComponents.contains(type)) {
229                             result.setLength(goodSoFar);
230                         } else {
231                             result.append(item);
232                         }
233                     }
234                 }
235             }
236             String normalized = b.toString();
237             if (!normalized.equals(intervalPattern)) {
238                 System.out.println("Not normalized: " + intervalPattern + "\t"
239                     + normalized);
240             }
241             if (endFirstPart < 0 || startSecondPart < 0) {
242                 throw new IllegalArgumentException("Illegal interval pattern: "
243                     + intervalPattern);
244             } else {
245                 if (DEBUG)
246                     System.out.println(normalized.substring(0, endFirstPart)
247                         + "$$"
248                         + normalized.substring(endFirstPart,
249                             startSecondPart)
250                         + "$$"
251                         + normalized.substring(startSecondPart) + "\t=>\t"
252                         + result);
253             }
254             return result.toString();
255         }
256 
257         private String getBaseSkeleton(String skeleton) {
258             int slashPos = skeleton.indexOf('/');
259             String core = skeleton;
260             String diff = "";
261             if (slashPos >= 0) {
262                 core = skeleton.substring(0, slashPos);
263                 diff = skeleton.substring(slashPos);
264             }
265             core = dtp.getBaseSkeleton(core);
266             return core + diff;
267         }
268 
269     }
270 
271     static Map<String, LocaleInfo> data = new TreeMap<String, LocaleInfo>();
272 
273     // private static final Relation<String,String> digit4 = Relation.of(new
274     // TreeMap<String,Set<String>>(),
275     // TreeSet.class);
276     // private static final Relation<String,String> digit2 = Relation.of(new
277     // TreeMap<String,Set<String>>(),
278     // TreeSet.class);
279 
280     public static void main(String[] args) throws IOException {
281         CLDRFile englishFile = testInfo.getEnglish();
282 
283         Factory factory = Factory.make(CLDRPaths.TMP2_DIRECTORY
284             + "vxml/common/main/", LOCALES);
285         String calendarID = "gregorian";
286         System.out.println("Total locales: "
287             + factory.getAvailableLanguages().size());
288         Map<String, String> sorted = new TreeMap<String, String>();
289         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
290         Set<String> defaultContent = sdi.getDefaultContentLocales();
291         LanguageTagParser ltp = new LanguageTagParser();
292 
293         for (String localeID : factory.getAvailableLanguages()) {
294             if (!ltp.set(localeID).getRegion().isEmpty()) {
295                 continue;
296             }
297             if (defaultContent.contains(localeID)) {
298                 System.out.println("Skipping default content: " + localeID);
299                 continue;
300             }
301             sorted.put(englishFile.getName(localeID, true), localeID);
302             data.put(localeID, new LocaleInfo());
303         }
304 
305         gatherInfo(factory, calendarID, sorted);
306 
307         writeYearWidths(sorted, true, "year-width-diff.txt");
308         writeYearWidths(sorted, false, "year-width-diff-other.txt");
309 
310         writeConflictingStockItems(true, "conflicting-stock.txt");
311         writeConflictingStockItems(false, "conflicting-stock-other.txt");
312 
313         writeConflictingPatterns(sorted, true, "conflicting-patterns.txt");
314         writeConflictingPatterns(sorted, false,
315             "conflicting-patterns-other.txt");
316     }
317 
318     public static void gatherInfo(Factory factory, String calendarID,
319         Map<String, String> sorted) throws IOException {
320 
321         for (Entry<String, String> entry : sorted.entrySet()) {
322             String localeId = entry.getValue();
323             CLDRFile file = factory.make(localeId, true);
324             LocaleInfo localeInfo = data.get(localeId);
325             for (String stock : STOCK) {
326                 String path = "//ldml/dates/calendars/calendar[@type=\""
327                     + calendarID
328                     + "\"]/dateFormats/dateFormatLength[@type=\""
329                     + stock
330                     + "\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
331                 String dateTimePattern = file.getStringValue(path);
332                 localeInfo.recordStock(localeId, stock, dateTimePattern);
333                 path = "//ldml/dates/calendars/calendar[@type=\""
334                     + calendarID
335                     + "\"]/timeFormats/timeFormatLength[@type=\""
336                     + stock
337                     + "\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
338                 dateTimePattern = file.getStringValue(path);
339                 localeInfo.recordStockTime(localeId, stock, dateTimePattern);
340             }
341             for (String path : With
342                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
343                     + calendarID
344                     + "\"]/dateTimeFormats/availableFormats/dateFormatItem"))) {
345                 XPathParts parts = XPathParts.getFrozenInstance(path);
346                 String key = parts.getAttributeValue(-1, "id");
347                 String value = file.getStringValue(path);
348                 localeInfo.record(key, value);
349             }
350             for (String path : With
351                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
352                     + calendarID
353                     + "\"]/dateTimeFormats/intervalFormats/intervalFormatItem"))) {
354                 XPathParts parts = XPathParts.getFrozenInstance(path);
355                 String skeleton = parts.getAttributeValue(-2, "id");
356                 String diff = parts.getAttributeValue(-1, "id");
357                 String value = file.getStringValue(path);
358                 localeInfo.record(skeleton + "/" + diff, value);
359             }
360         }
361     }
362 
363     public static void writeYearWidths(Map<String, String> sorted,
364         boolean modern, String filename) throws IOException {
365         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
366             + "datecheck/", filename);
367         out.println("Name\tid\t"
368             + Joiner.on("\t").join(Category.values()));
369         for (Entry<String, String> entry : sorted.entrySet()) {
370             String localeId = entry.getValue();
371             boolean priority = getPriority(localeId);
372             if (modern != priority) {
373                 continue;
374             }
375             String name = entry.getKey();
376             LocaleInfo localeInfo = data.get(localeId);
377             out.print(name + "\t" + localeId);
378             for (Category item : Category.values()) {
379                 Set<String> items = localeInfo.category2base.get(item);
380                 if (items != null) {
381                     out.print("\t" + Joiner.on(" ").join(items));
382                 } else {
383                     out.print("\t");
384                 }
385             }
386             out.println();
387         }
388         out.close();
389     }
390 
391     public static void writeConflictingStockItems(boolean modern,
392         String filename) throws IOException {
393         PrintWriter out;
394         System.out.println("\nMismatched Stock items\n");
395         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
396             + "datecheck/", filename);
397         out.println("Stock\tSkeleton\tLocales");
398         for (Entry<String, Relation<String, String>> stockAndSkeleton2locales : stock2skeleton2locales
399             .entrySet()) {
400             String stock = stockAndSkeleton2locales.getKey();
401             for (Entry<String, Set<String>> entry2 : stockAndSkeleton2locales
402                 .getValue().keyValuesSet()) {
403                 String filtered = filter(entry2.getValue(), modern);
404                 if (filtered.isEmpty()) {
405                     continue;
406                 }
407                 out.println(stock + "\t" + entry2.getKey() + "\t" + filtered);
408             }
409         }
410         out.close();
411     }
412 
413     private static String filter(Set<String> value, boolean modern) {
414         StringBuilder b = new StringBuilder();
415         for (String localeId : value) {
416             if (modern != getPriority(localeId)) {
417                 continue;
418             }
419             if (b.length() != 0) {
420                 b.append(" ");
421             }
422             b.append(localeId);
423         }
424         return b.toString();
425     }
426 
427     public static void writeConflictingPatterns(Map<String, String> sorted,
428         boolean modern, String filename) throws IOException {
429         PrintWriter out;
430         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
431             + "datecheck/", filename);
432         out.println("Language\tId\tMin. Skeleton\tMin Pat1\tskeleton → pattern\tMin Pat2\tskeleton → pattern\tMin Pat3\tskeleton → pattern");
433         for (Entry<String, String> entry : sorted.entrySet()) {
434             String localeId = entry.getValue();
435             if (modern != getPriority(localeId)) {
436                 continue;
437             }
438             String name = entry.getKey();
439             LocaleInfo localeInfo = data.get(localeId);
440 
441             for (Entry<String, Relation<String, R2<String, String>>> baseAndBasePatterns2Info : localeInfo.base2BasePatterns2Info
442                 .entrySet()) {
443                 String base = baseAndBasePatterns2Info.getKey();
444                 Relation<String, R2<String, String>> basePatterns2Info = baseAndBasePatterns2Info
445                     .getValue();
446                 if (basePatterns2Info.size() == 1) {
447                     continue;
448                 }
449                 // Ewe ee MMM LLL → ‹[MMM, LLL]›
450                 // Ewe ee MMM MMM → ‹[MMM/M, MMM–MMM]›
451                 // => Ewe ee MMM ‹LLL›: tab MMM → ‹LLL› tab ‹MMM›: tab MMM/M →
452                 // ‹MMM–MMM›
453                 StringBuilder s = new StringBuilder(name + "\t" + localeId
454                     + "\t" + base);
455 
456                 for (Entry<String, Set<R2<String, String>>> basePatternsAndInfo : basePatterns2Info
457                     .keyValuesSet()) {
458                     String basePattern = basePatternsAndInfo.getKey();
459                     s.append("\t‹" + basePattern + "›:\t\"");
460                     boolean first = true;
461                     for (R2<String, String> info : basePatternsAndInfo
462                         .getValue()) {
463                         if (first) {
464                             first = false;
465                         } else {
466                             s.append(";\n");
467                         }
468                         s.append(info.get0() + " → ‹" + info.get1() + "›");
469                     }
470                     s.append("\"");
471                 }
472                 out.println(s);
473             }
474         }
475         out.close();
476     }
477 
478     public static boolean getPriority(String localeId) {
479         return STANDARD_CODES.getLocaleCoverageLevel(
480             Organization.google.toString(), localeId) == Level.MODERN
481             || STANDARD_CODES.getLocaleCoverageLevel(
482                 Organization.apple.toString(), localeId) == Level.MODERN
483             || STANDARD_CODES.getLocaleCoverageLevel(
484                 Organization.ibm.toString(), localeId) == Level.MODERN;
485     }
486 }
487