• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Splitter;
4 import com.google.common.collect.ImmutableMap;
5 import com.google.common.collect.ImmutableMultimap;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.util.ICUUncheckedIOException;
10 import com.ibm.icu.util.Output;
11 import com.ibm.icu.util.ULocale;
12 import java.io.IOException;
13 import java.math.BigInteger;
14 import java.math.MathContext;
15 import java.nio.file.Files;
16 import java.nio.file.Path;
17 import java.util.Collection;
18 import java.util.Comparator;
19 import java.util.HashSet;
20 import java.util.LinkedHashSet;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Map.Entry;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Pattern;
28 import org.unicode.cldr.util.CLDRPaths;
29 import org.unicode.cldr.util.CldrUtility;
30 import org.unicode.cldr.util.Pair;
31 import org.unicode.cldr.util.Rational;
32 import org.unicode.cldr.util.Rational.FormatStyle;
33 import org.unicode.cldr.util.StandardCodes.LstrType;
34 import org.unicode.cldr.util.SupplementalDataInfo;
35 import org.unicode.cldr.util.TempPrintWriter;
36 import org.unicode.cldr.util.UnitConverter;
37 import org.unicode.cldr.util.UnitConverter.ConversionInfo;
38 import org.unicode.cldr.util.UnitPreferences;
39 import org.unicode.cldr.util.UnitPreferences.UnitPreference;
40 import org.unicode.cldr.util.Validity;
41 import org.unicode.cldr.util.Validity.Status;
42 
43 /** Quick extraction from TestUnits; TODO pretty it up */
44 public class GenerateUnitTestData {
45 
46     private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
47     private static final UnitConverter converter = SDI.getUnitConverter();
48     private static final String TEST_SEP = ";\t";
49     private static final Set<String> NOT_CONVERTABLE = ImmutableSet.of("generic");
50     private static final Rational R1000 = Rational.of(1000);
51 
52     private static final Map<String, String> CORE_TO_TYPE;
53     private static final Multimap<String, String> TYPE_TO_CORE;
54 
main(String[] args)55     public static void main(String[] args) {
56         GenerateUnitTestData item = new GenerateUnitTestData();
57         item.TestParseUnit();
58         item.TestUnitPreferences();
59         item.generateUnitLocalePreferences();
60     }
61 
62     static {
63         Set<String> VALID_UNITS =
64                 Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular);
65 
66         Map<String, String> coreToType = new TreeMap<>();
67         TreeMultimap<String, String> typeToCore = TreeMultimap.create();
68         for (String s : VALID_UNITS) {
69             int dashPos = s.indexOf('-');
70             String unitType = s.substring(0, dashPos);
71             String coreUnit = s.substring(dashPos + 1);
72             coreUnit = converter.fixDenormalized(coreUnit);
coreToType.put(coreUnit, unitType)73             coreToType.put(coreUnit, unitType);
typeToCore.put(unitType, coreUnit)74             typeToCore.put(unitType, coreUnit);
75         }
76         CORE_TO_TYPE = ImmutableMap.copyOf(coreToType);
77         TYPE_TO_CORE = ImmutableMultimap.copyOf(typeToCore);
78     }
79 
TestParseUnit()80     public void TestParseUnit() {
81         Output<String> compoundBaseUnit = new Output<>();
82         String[][] tests = {
83             {"kilometer-pound-per-hour", "kilogram-meter-per-second", "45359237/360000000"},
84             {"kilometer-per-hour", "meter-per-second", "5/18"},
85         };
86         //        for (String[] test : tests) {
87         //            String source = test[0];
88         //            String expectedUnit = test[1];
89         //            Rational expectedRational = new Rational.RationalParser().parse(test[2]);
90         //            ConversionInfo unitInfo = converter.parseUnitId(source, compoundBaseUnit,
91         // false);
92         //            assertEquals(source, expectedUnit, compoundBaseUnit.value);
93         //            assertEquals(source, expectedRational, unitInfo.factor);
94         //        }
95 
96         // check all
97         Set<String> badUnits = new LinkedHashSet<>();
98         Set<String> noQuantity = new LinkedHashSet<>();
99         Multimap<Pair<String, Double>, String> testPrintout = TreeMultimap.create();
100 
101         // checkUnitConvertability(converter, compoundBaseUnit, badUnits, "pint-metric-per-second");
102 
103         for (Entry<String, String> entry : TYPE_TO_CORE.entries()) {
104             String type = entry.getKey();
105             String unit = entry.getValue();
106             if (NOT_CONVERTABLE.contains(unit)) {
107                 continue;
108             }
109             checkUnitConvertability(
110                     converter, compoundBaseUnit, badUnits, noQuantity, type, unit, testPrintout);
111         }
112         if (true) { // test data
113             try (TempPrintWriter pw =
114                     TempPrintWriter.openUTF8Writer(
115                             CLDRPaths.TEST_DATA + "units", "unitsTest.txt")) {
116 
117                 pw.println(
118                         "# Test data for unit conversions\n"
119                                 + CldrUtility.getCopyrightString("#  ")
120                                 + "\n"
121                                 + "#\n"
122                                 + "# Format:\n"
123                                 + "#\tQuantity\t;\tx\t;\ty\t;\tconversion to y (rational)\t;\ttest: 1000 x ⟹ y\n"
124                                 + "#\n"
125                                 + "# Use: convert 1000 x units to the y unit; the result should match the final column,\n"
126                                 + "#   at the given precision. For example, when the last column is 159.1549,\n"
127                                 + "#   round to 4 decimal digits before comparing.\n"
128                                 + "# Note that certain conversions are approximate, such as degrees to radians\n"
129                                 + "#\n"
130                                 + "# Generation: Use GenerateUnitTestData.java to regenerate unitsTest.txt.\n");
131                 for (Entry<Pair<String, Double>, String> entry : testPrintout.entries()) {
132                     pw.println(entry.getValue());
133                 }
134             }
135         }
136     }
137 
TestUnitPreferences()138     public void TestUnitPreferences() {
139         UnitPreferences prefs = SDI.getUnitPreferences();
140         try (TempPrintWriter pw =
141                 TempPrintWriter.openUTF8Writer(
142                         CLDRPaths.TEST_DATA + "units", "unitPreferencesTest.txt")) {
143             pw.println(getHeader("Region"));
144             Rational ONE_TENTH = Rational.of(1, 10);
145 
146             // Note that for production usage, precomputed data like the
147             // prefs.getFastMap(converter) would be used instead of the raw data.
148 
149             for (Entry<String, Map<String, Multimap<Set<String>, UnitPreference>>> entry :
150                     prefs.getData().entrySet()) {
151                 String quantity = entry.getKey();
152                 String baseUnit = converter.getBaseUnitFromQuantity(quantity);
153                 for (Entry<String, Multimap<Set<String>, UnitPreference>> entry2 :
154                         entry.getValue().entrySet()) {
155                     String usage = entry2.getKey();
156 
157                     // collect samples of base units
158                     for (Entry<Set<String>, Collection<UnitPreference>> entry3 :
159                             entry2.getValue().asMap().entrySet()) {
160                         boolean first = true;
161                         Set<Rational> samples = new TreeSet<>(Comparator.reverseOrder());
162                         for (UnitPreference pref : entry3.getValue()) {
163                             final String topUnit =
164                                     UnitPreferences.SPLIT_AND.split(pref.unit).iterator().next();
165                             if (first) {
166                                 samples.add(
167                                         converter.convert(
168                                                 pref.geq.add(ONE_TENTH), topUnit, baseUnit, false));
169                                 first = false;
170                             }
171                             samples.add(converter.convert(pref.geq, topUnit, baseUnit, false));
172                             samples.add(
173                                     converter.convert(
174                                             pref.geq.subtract(ONE_TENTH),
175                                             topUnit,
176                                             baseUnit,
177                                             false));
178                         }
179                         // show samples
180                         Set<String> regions = entry3.getKey();
181                         String sampleRegion = regions.iterator().next();
182                         Collection<UnitPreference> uprefs = entry3.getValue();
183                         for (Rational sample : samples) {
184                             showSample(quantity, usage, sampleRegion, sample, baseUnit, uprefs, pw);
185                         }
186                         pw.println();
187                     }
188                 }
189             }
190         }
191     }
192 
generateUnitLocalePreferences()193     public void generateUnitLocalePreferences() {
194         try (TempPrintWriter pwLocale =
195                 TempPrintWriter.openUTF8Writer(
196                         CLDRPaths.TEST_DATA + "units", "unitLocalePreferencesTest.txt")) {
197 
198             try {
199                 Set<List<Object>> seen = new HashSet<>();
200                 // first copy existing lines
201                 // This includes the header, so modify the old header if changes are needed!
202                 Files.lines(Path.of(CLDRPaths.TEST_DATA + "units/unitLocalePreferencesTest.txt"))
203                         .forEach(line -> formatPwLocale(pwLocale, line, seen));
204                 // TODO: add more lines
205                 formatLocaleLine(
206                         "byte-per-millisecond", Rational.of(123), "default", "en", "", seen);
207             } catch (IOException e) {
208                 throw new ICUUncheckedIOException(e);
209             }
210         }
211     }
212 
213     static final Splitter SPLIT_SEMI = Splitter.on(Pattern.compile("\\s*;\\s*")).trimResults();
214 
formatPwLocale(TempPrintWriter pwLocale, String rawLine, Set<List<Object>> seen)215     private void formatPwLocale(TempPrintWriter pwLocale, String rawLine, Set<List<Object>> seen) {
216         int hashPos = rawLine.indexOf('#');
217         String line = hashPos < 0 ? rawLine : rawLine.substring(0, hashPos);
218         String comment = hashPos < 0 ? "" : "#" + rawLine.substring(hashPos + 1);
219         if (line.isBlank()) {
220             if (!comment.isBlank()) {
221                 pwLocale.println(comment);
222             }
223             return;
224         }
225         List<String> parts = SPLIT_SEMI.splitToList(line);
226 
227         String sourceUnit = parts.get(0);
228         Rational sourceAmount = Rational.of(parts.get(1));
229         String usage = parts.get(2);
230         String languageTag = parts.get(3);
231         String newLine =
232                 formatLocaleLine(sourceUnit, sourceAmount, usage, languageTag, comment, seen);
233         if (newLine != null) {
234             pwLocale.println(newLine);
235         }
236     }
237 
238     public String formatLocaleLine(
239             String sourceUnit,
240             Rational sourceAmount,
241             String usage,
242             String languageTag,
243             String comment,
244             Set<List<Object>> seen) {
245         List<Object> bundle = List.of(sourceUnit, sourceAmount, usage, languageTag);
246         if (bundle.contains(seen)) {
247             return null;
248         }
249         seen.add(bundle);
250 
251         UnitPreferences prefs = SDI.getUnitPreferences();
252         final ULocale uLocale = ULocale.forLanguageTag(languageTag);
253         UnitPreference unitPreference =
254                 prefs.getUnitPreference(sourceAmount, sourceUnit, usage, uLocale);
255         if (unitPreference == null) { // if the quantity isn't found
256             throw new IllegalArgumentException(
257                     String.format(
258                             "No unit preferences found for unit: %s, usage: %s, locale:%s",
259                             sourceUnit, usage, languageTag));
260         }
261         String actualUnit = unitPreference.unit;
262         Rational actualValue =
263                 converter.convert(sourceAmount, sourceUnit, unitPreference.unit, false);
264         // #    input-unit; amount; usage;  languageTag; expected-unit; expected-amount # comment
265         final String newFileLine =
266                 String.format(
267                         "%s;\t%s;\t%s;\t%s;\t%s;\t%s%s",
268                         sourceUnit,
269                         sourceAmount.toString(FormatStyle.formatted),
270                         usage,
271                         languageTag,
272                         actualUnit,
273                         actualValue.toString(FormatStyle.formatted),
274                         comment.isBlank() ? "" : "\t" + comment);
275         return newFileLine;
276     }
277 
278     static LikelySubtags likely = new LikelySubtags();
279 
280     public String getHeader(String regionOrLocale) {
281         return "\n# Test data for unit region preferences\n"
282                 + CldrUtility.getCopyrightString("#  ")
283                 + "\n"
284                 + "#\n"
285                 + "# Format:\n"
286                 + "#\tQuantity;\tUsage;\t"
287                 + regionOrLocale
288                 + ";\tInput (r);\tInput (d);\tInput Unit;\tOutput (r);\tOutput (d);\tOutput Unit\n"
289                 + "#\n"
290                 + "# Use: Convert the Input amount & unit according to the Usage and "
291                 + regionOrLocale
292                 + ".\n"
293                 + "#\t The result should match the Output amount and unit.\n"
294                 + "#\t Both rational (r) and double64 (d) forms of the input and output amounts are supplied so that implementations\n"
295                 + "#\t have two options for testing based on the precision in their implementations. For example:\n"
296                 + "#\t   3429 / 12500; 0.27432; meter;\n"
297                 + "#\t The Output amount and Unit are repeated for mixed units. In such a case, only the smallest unit will have\n"
298                 + "#\t both a rational and decimal amount; the others will have a single integer value, such as:\n"
299                 + "#\t   length; person-height; CA; 3429 / 12500; 0.27432; meter; 2; foot; 54 / 5; 10.8; inch\n"
300                 + "#\t The input and output units are unit identifers; in particular, the output does not have further processing:\n"
301                 + "#\t\t • no localization\n"
302                 + "#\t\t • no adjustment for pluralization\n"
303                 + "#\t\t • no formatted with the skeleton\n"
304                 + "#\t\t • no suppression of zero values (for secondary -and- units such as pound in stone-and-pound)\n"
305                 + "#\n"
306                 + "# Generation: Use GenerateUnitTestData.java to regenerate unitPreferencesTest.txt.\n";
307     }
308 
309     private void showSample(
310             String quantity,
311             String usage,
312             String sampleRegionOrLocale,
313             Rational sampleBaseValue,
314             String baseUnit,
315             Collection<UnitPreference> prefs,
316             TempPrintWriter pw) {
317         String lastUnit = null;
318         boolean gotOne = false;
319         for (UnitPreference pref : prefs) {
320             final String topUnit = UnitPreferences.SPLIT_AND.split(pref.unit).iterator().next();
321             Rational baseGeq = converter.convert(pref.geq, topUnit, baseUnit, false);
322             if (sampleBaseValue.compareTo(baseGeq) >= 0) {
323                 showSample2(
324                         quantity,
325                         usage,
326                         sampleRegionOrLocale,
327                         sampleBaseValue,
328                         baseUnit,
329                         pref.unit,
330                         pw);
331                 gotOne = true;
332                 break;
333             }
334             lastUnit = pref.unit;
335         }
336         if (!gotOne) {
337             showSample2(
338                     quantity, usage, sampleRegionOrLocale, sampleBaseValue, baseUnit, lastUnit, pw);
339         }
340     }
341 
342     private void showSample2(
343             String quantity,
344             String usage,
345             String sampleRegionOrLocale,
346             Rational sampleBaseValue,
347             String baseUnit,
348             String lastUnit,
349             TempPrintWriter pw) {
350         Rational originalSampleBaseValue = sampleBaseValue;
351         // Known slow algorithm for mixed values, but for generating tests we don't care.
352         final List<String> units = UnitPreferences.SPLIT_AND.splitToList(lastUnit);
353         StringBuilder formattedUnit = new StringBuilder();
354         int remaining = units.size();
355         for (String unit : units) {
356             --remaining;
357             Rational sample = converter.convert(sampleBaseValue, baseUnit, unit, false);
358             if (formattedUnit.length() != 0) {
359                 formattedUnit.append(TEST_SEP);
360             }
361             if (remaining != 0) {
362                 BigInteger floor = sample.floor();
363                 formattedUnit.append(floor + TEST_SEP + unit);
364                 // convert back to base unit
365                 sampleBaseValue =
366                         converter.convert(
367                                 sample.subtract(Rational.of(floor)), unit, baseUnit, false);
368             } else {
369                 formattedUnit.append(sample + TEST_SEP + sample.doubleValue() + TEST_SEP + unit);
370             }
371         }
372         pw.println(
373                 quantity
374                         + TEST_SEP
375                         + usage
376                         + TEST_SEP
377                         + sampleRegionOrLocale
378                         + TEST_SEP
379                         + originalSampleBaseValue
380                         + TEST_SEP
381                         + originalSampleBaseValue.doubleValue()
382                         + TEST_SEP
383                         + baseUnit
384                         + TEST_SEP
385                         + formattedUnit);
386     }
387 
388     private void checkUnitConvertability(
389             UnitConverter converter,
390             Output<String> compoundBaseUnit,
391             Set<String> badUnits,
392             Set<String> noQuantity,
393             String type,
394             String unit,
395             Multimap<Pair<String, Double>, String> testPrintout) {
396 
397         if (converter.isBaseUnit(unit)) {
398             String quantity = converter.getQuantityFromBaseUnit(unit);
399             if (quantity == null) {
400                 noQuantity.add(unit);
401             }
402             if (true) {
403                 testPrintout.put(
404                         new Pair<>(quantity, 1000d),
405                         quantity + "\t;\t" + unit + "\t;\t" + unit + "\t;\t1 * x\t;\t1,000.00");
406             }
407         } else {
408             ConversionInfo unitInfo = converter.getUnitInfo(unit, compoundBaseUnit);
409             if (unitInfo == null) {
410                 unitInfo = converter.parseUnitId(unit, compoundBaseUnit, false);
411             }
412             if (unitInfo == null) {
413                 badUnits.add(unit);
414             } else if (true) {
415                 String quantity = converter.getQuantityFromBaseUnit(compoundBaseUnit.value);
416                 if (quantity == null) {
417                     noQuantity.add(compoundBaseUnit.value);
418                 }
419                 final double testValue =
420                         unitInfo.convert(R1000).toBigDecimal(MathContext.DECIMAL32).doubleValue();
421                 testPrintout.put(
422                         new Pair<>(quantity, testValue),
423                         quantity
424                                 + "\t;\t"
425                                 + unit
426                                 + "\t;\t"
427                                 + compoundBaseUnit
428                                 + "\t;\t"
429                                 + unitInfo
430                                 + "\t;\t"
431                                 + testValue
432                         //                    + "\t" +
433                         // unitInfo.factor.toBigDecimal(MathContext.DECIMAL32)
434                         //                    + "\t" +
435                         // unitInfo.factor.reciprocal().toBigDecimal(MathContext.DECIMAL32)
436                         );
437             }
438         }
439     }
440 }
441