1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Splitter; 4 import com.google.common.collect.ImmutableMap; 5 import com.google.common.collect.ImmutableMultimap; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.util.ICUUncheckedIOException; 10 import com.ibm.icu.util.Output; 11 import com.ibm.icu.util.ULocale; 12 import java.io.IOException; 13 import java.math.BigInteger; 14 import java.math.MathContext; 15 import java.nio.file.Files; 16 import java.nio.file.Path; 17 import java.util.Collection; 18 import java.util.Comparator; 19 import java.util.HashSet; 20 import java.util.LinkedHashSet; 21 import java.util.List; 22 import java.util.Map; 23 import java.util.Map.Entry; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 import java.util.regex.Pattern; 28 import org.unicode.cldr.util.CLDRPaths; 29 import org.unicode.cldr.util.CldrUtility; 30 import org.unicode.cldr.util.Pair; 31 import org.unicode.cldr.util.Rational; 32 import org.unicode.cldr.util.Rational.FormatStyle; 33 import org.unicode.cldr.util.StandardCodes.LstrType; 34 import org.unicode.cldr.util.SupplementalDataInfo; 35 import org.unicode.cldr.util.TempPrintWriter; 36 import org.unicode.cldr.util.UnitConverter; 37 import org.unicode.cldr.util.UnitConverter.ConversionInfo; 38 import org.unicode.cldr.util.UnitPreferences; 39 import org.unicode.cldr.util.UnitPreferences.UnitPreference; 40 import org.unicode.cldr.util.Validity; 41 import org.unicode.cldr.util.Validity.Status; 42 43 /** Quick extraction from TestUnits; TODO pretty it up */ 44 public class GenerateUnitTestData { 45 46 private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 47 private static final UnitConverter converter = SDI.getUnitConverter(); 48 private static final String TEST_SEP = ";\t"; 49 private static final Set<String> NOT_CONVERTABLE = ImmutableSet.of("generic"); 50 private static final Rational R1000 = Rational.of(1000); 51 52 private static final Map<String, String> CORE_TO_TYPE; 53 private static final Multimap<String, String> TYPE_TO_CORE; 54 main(String[] args)55 public static void main(String[] args) { 56 GenerateUnitTestData item = new GenerateUnitTestData(); 57 item.TestParseUnit(); 58 item.TestUnitPreferences(); 59 item.generateUnitLocalePreferences(); 60 } 61 62 static { 63 Set<String> VALID_UNITS = 64 Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular); 65 66 Map<String, String> coreToType = new TreeMap<>(); 67 TreeMultimap<String, String> typeToCore = TreeMultimap.create(); 68 for (String s : VALID_UNITS) { 69 int dashPos = s.indexOf('-'); 70 String unitType = s.substring(0, dashPos); 71 String coreUnit = s.substring(dashPos + 1); 72 coreUnit = converter.fixDenormalized(coreUnit); coreToType.put(coreUnit, unitType)73 coreToType.put(coreUnit, unitType); typeToCore.put(unitType, coreUnit)74 typeToCore.put(unitType, coreUnit); 75 } 76 CORE_TO_TYPE = ImmutableMap.copyOf(coreToType); 77 TYPE_TO_CORE = ImmutableMultimap.copyOf(typeToCore); 78 } 79 TestParseUnit()80 public void TestParseUnit() { 81 Output<String> compoundBaseUnit = new Output<>(); 82 String[][] tests = { 83 {"kilometer-pound-per-hour", "kilogram-meter-per-second", "45359237/360000000"}, 84 {"kilometer-per-hour", "meter-per-second", "5/18"}, 85 }; 86 // for (String[] test : tests) { 87 // String source = test[0]; 88 // String expectedUnit = test[1]; 89 // Rational expectedRational = new Rational.RationalParser().parse(test[2]); 90 // ConversionInfo unitInfo = converter.parseUnitId(source, compoundBaseUnit, 91 // false); 92 // assertEquals(source, expectedUnit, compoundBaseUnit.value); 93 // assertEquals(source, expectedRational, unitInfo.factor); 94 // } 95 96 // check all 97 Set<String> badUnits = new LinkedHashSet<>(); 98 Set<String> noQuantity = new LinkedHashSet<>(); 99 Multimap<Pair<String, Double>, String> testPrintout = TreeMultimap.create(); 100 101 // checkUnitConvertability(converter, compoundBaseUnit, badUnits, "pint-metric-per-second"); 102 103 for (Entry<String, String> entry : TYPE_TO_CORE.entries()) { 104 String type = entry.getKey(); 105 String unit = entry.getValue(); 106 if (NOT_CONVERTABLE.contains(unit)) { 107 continue; 108 } 109 checkUnitConvertability( 110 converter, compoundBaseUnit, badUnits, noQuantity, type, unit, testPrintout); 111 } 112 if (true) { // test data 113 try (TempPrintWriter pw = 114 TempPrintWriter.openUTF8Writer( 115 CLDRPaths.TEST_DATA + "units", "unitsTest.txt")) { 116 117 pw.println( 118 "# Test data for unit conversions\n" 119 + CldrUtility.getCopyrightString("# ") 120 + "\n" 121 + "#\n" 122 + "# Format:\n" 123 + "#\tQuantity\t;\tx\t;\ty\t;\tconversion to y (rational)\t;\ttest: 1000 x ⟹ y\n" 124 + "#\n" 125 + "# Use: convert 1000 x units to the y unit; the result should match the final column,\n" 126 + "# at the given precision. For example, when the last column is 159.1549,\n" 127 + "# round to 4 decimal digits before comparing.\n" 128 + "# Note that certain conversions are approximate, such as degrees to radians\n" 129 + "#\n" 130 + "# Generation: Use GenerateUnitTestData.java to regenerate unitsTest.txt.\n"); 131 for (Entry<Pair<String, Double>, String> entry : testPrintout.entries()) { 132 pw.println(entry.getValue()); 133 } 134 } 135 } 136 } 137 TestUnitPreferences()138 public void TestUnitPreferences() { 139 UnitPreferences prefs = SDI.getUnitPreferences(); 140 try (TempPrintWriter pw = 141 TempPrintWriter.openUTF8Writer( 142 CLDRPaths.TEST_DATA + "units", "unitPreferencesTest.txt")) { 143 pw.println(getHeader("Region")); 144 Rational ONE_TENTH = Rational.of(1, 10); 145 146 // Note that for production usage, precomputed data like the 147 // prefs.getFastMap(converter) would be used instead of the raw data. 148 149 for (Entry<String, Map<String, Multimap<Set<String>, UnitPreference>>> entry : 150 prefs.getData().entrySet()) { 151 String quantity = entry.getKey(); 152 String baseUnit = converter.getBaseUnitFromQuantity(quantity); 153 for (Entry<String, Multimap<Set<String>, UnitPreference>> entry2 : 154 entry.getValue().entrySet()) { 155 String usage = entry2.getKey(); 156 157 // collect samples of base units 158 for (Entry<Set<String>, Collection<UnitPreference>> entry3 : 159 entry2.getValue().asMap().entrySet()) { 160 boolean first = true; 161 Set<Rational> samples = new TreeSet<>(Comparator.reverseOrder()); 162 for (UnitPreference pref : entry3.getValue()) { 163 final String topUnit = 164 UnitPreferences.SPLIT_AND.split(pref.unit).iterator().next(); 165 if (first) { 166 samples.add( 167 converter.convert( 168 pref.geq.add(ONE_TENTH), topUnit, baseUnit, false)); 169 first = false; 170 } 171 samples.add(converter.convert(pref.geq, topUnit, baseUnit, false)); 172 samples.add( 173 converter.convert( 174 pref.geq.subtract(ONE_TENTH), 175 topUnit, 176 baseUnit, 177 false)); 178 } 179 // show samples 180 Set<String> regions = entry3.getKey(); 181 String sampleRegion = regions.iterator().next(); 182 Collection<UnitPreference> uprefs = entry3.getValue(); 183 for (Rational sample : samples) { 184 showSample(quantity, usage, sampleRegion, sample, baseUnit, uprefs, pw); 185 } 186 pw.println(); 187 } 188 } 189 } 190 } 191 } 192 generateUnitLocalePreferences()193 public void generateUnitLocalePreferences() { 194 try (TempPrintWriter pwLocale = 195 TempPrintWriter.openUTF8Writer( 196 CLDRPaths.TEST_DATA + "units", "unitLocalePreferencesTest.txt")) { 197 198 try { 199 Set<List<Object>> seen = new HashSet<>(); 200 // first copy existing lines 201 // This includes the header, so modify the old header if changes are needed! 202 Files.lines(Path.of(CLDRPaths.TEST_DATA + "units/unitLocalePreferencesTest.txt")) 203 .forEach(line -> formatPwLocale(pwLocale, line, seen)); 204 // TODO: add more lines 205 formatLocaleLine( 206 "byte-per-millisecond", Rational.of(123), "default", "en", "", seen); 207 } catch (IOException e) { 208 throw new ICUUncheckedIOException(e); 209 } 210 } 211 } 212 213 static final Splitter SPLIT_SEMI = Splitter.on(Pattern.compile("\\s*;\\s*")).trimResults(); 214 formatPwLocale(TempPrintWriter pwLocale, String rawLine, Set<List<Object>> seen)215 private void formatPwLocale(TempPrintWriter pwLocale, String rawLine, Set<List<Object>> seen) { 216 int hashPos = rawLine.indexOf('#'); 217 String line = hashPos < 0 ? rawLine : rawLine.substring(0, hashPos); 218 String comment = hashPos < 0 ? "" : "#" + rawLine.substring(hashPos + 1); 219 if (line.isBlank()) { 220 if (!comment.isBlank()) { 221 pwLocale.println(comment); 222 } 223 return; 224 } 225 List<String> parts = SPLIT_SEMI.splitToList(line); 226 227 String sourceUnit = parts.get(0); 228 Rational sourceAmount = Rational.of(parts.get(1)); 229 String usage = parts.get(2); 230 String languageTag = parts.get(3); 231 String newLine = 232 formatLocaleLine(sourceUnit, sourceAmount, usage, languageTag, comment, seen); 233 if (newLine != null) { 234 pwLocale.println(newLine); 235 } 236 } 237 238 public String formatLocaleLine( 239 String sourceUnit, 240 Rational sourceAmount, 241 String usage, 242 String languageTag, 243 String comment, 244 Set<List<Object>> seen) { 245 List<Object> bundle = List.of(sourceUnit, sourceAmount, usage, languageTag); 246 if (bundle.contains(seen)) { 247 return null; 248 } 249 seen.add(bundle); 250 251 UnitPreferences prefs = SDI.getUnitPreferences(); 252 final ULocale uLocale = ULocale.forLanguageTag(languageTag); 253 UnitPreference unitPreference = 254 prefs.getUnitPreference(sourceAmount, sourceUnit, usage, uLocale); 255 if (unitPreference == null) { // if the quantity isn't found 256 throw new IllegalArgumentException( 257 String.format( 258 "No unit preferences found for unit: %s, usage: %s, locale:%s", 259 sourceUnit, usage, languageTag)); 260 } 261 String actualUnit = unitPreference.unit; 262 Rational actualValue = 263 converter.convert(sourceAmount, sourceUnit, unitPreference.unit, false); 264 // # input-unit; amount; usage; languageTag; expected-unit; expected-amount # comment 265 final String newFileLine = 266 String.format( 267 "%s;\t%s;\t%s;\t%s;\t%s;\t%s%s", 268 sourceUnit, 269 sourceAmount.toString(FormatStyle.formatted), 270 usage, 271 languageTag, 272 actualUnit, 273 actualValue.toString(FormatStyle.formatted), 274 comment.isBlank() ? "" : "\t" + comment); 275 return newFileLine; 276 } 277 278 static LikelySubtags likely = new LikelySubtags(); 279 280 public String getHeader(String regionOrLocale) { 281 return "\n# Test data for unit region preferences\n" 282 + CldrUtility.getCopyrightString("# ") 283 + "\n" 284 + "#\n" 285 + "# Format:\n" 286 + "#\tQuantity;\tUsage;\t" 287 + regionOrLocale 288 + ";\tInput (r);\tInput (d);\tInput Unit;\tOutput (r);\tOutput (d);\tOutput Unit\n" 289 + "#\n" 290 + "# Use: Convert the Input amount & unit according to the Usage and " 291 + regionOrLocale 292 + ".\n" 293 + "#\t The result should match the Output amount and unit.\n" 294 + "#\t Both rational (r) and double64 (d) forms of the input and output amounts are supplied so that implementations\n" 295 + "#\t have two options for testing based on the precision in their implementations. For example:\n" 296 + "#\t 3429 / 12500; 0.27432; meter;\n" 297 + "#\t The Output amount and Unit are repeated for mixed units. In such a case, only the smallest unit will have\n" 298 + "#\t both a rational and decimal amount; the others will have a single integer value, such as:\n" 299 + "#\t length; person-height; CA; 3429 / 12500; 0.27432; meter; 2; foot; 54 / 5; 10.8; inch\n" 300 + "#\t The input and output units are unit identifers; in particular, the output does not have further processing:\n" 301 + "#\t\t • no localization\n" 302 + "#\t\t • no adjustment for pluralization\n" 303 + "#\t\t • no formatted with the skeleton\n" 304 + "#\t\t • no suppression of zero values (for secondary -and- units such as pound in stone-and-pound)\n" 305 + "#\n" 306 + "# Generation: Use GenerateUnitTestData.java to regenerate unitPreferencesTest.txt.\n"; 307 } 308 309 private void showSample( 310 String quantity, 311 String usage, 312 String sampleRegionOrLocale, 313 Rational sampleBaseValue, 314 String baseUnit, 315 Collection<UnitPreference> prefs, 316 TempPrintWriter pw) { 317 String lastUnit = null; 318 boolean gotOne = false; 319 for (UnitPreference pref : prefs) { 320 final String topUnit = UnitPreferences.SPLIT_AND.split(pref.unit).iterator().next(); 321 Rational baseGeq = converter.convert(pref.geq, topUnit, baseUnit, false); 322 if (sampleBaseValue.compareTo(baseGeq) >= 0) { 323 showSample2( 324 quantity, 325 usage, 326 sampleRegionOrLocale, 327 sampleBaseValue, 328 baseUnit, 329 pref.unit, 330 pw); 331 gotOne = true; 332 break; 333 } 334 lastUnit = pref.unit; 335 } 336 if (!gotOne) { 337 showSample2( 338 quantity, usage, sampleRegionOrLocale, sampleBaseValue, baseUnit, lastUnit, pw); 339 } 340 } 341 342 private void showSample2( 343 String quantity, 344 String usage, 345 String sampleRegionOrLocale, 346 Rational sampleBaseValue, 347 String baseUnit, 348 String lastUnit, 349 TempPrintWriter pw) { 350 Rational originalSampleBaseValue = sampleBaseValue; 351 // Known slow algorithm for mixed values, but for generating tests we don't care. 352 final List<String> units = UnitPreferences.SPLIT_AND.splitToList(lastUnit); 353 StringBuilder formattedUnit = new StringBuilder(); 354 int remaining = units.size(); 355 for (String unit : units) { 356 --remaining; 357 Rational sample = converter.convert(sampleBaseValue, baseUnit, unit, false); 358 if (formattedUnit.length() != 0) { 359 formattedUnit.append(TEST_SEP); 360 } 361 if (remaining != 0) { 362 BigInteger floor = sample.floor(); 363 formattedUnit.append(floor + TEST_SEP + unit); 364 // convert back to base unit 365 sampleBaseValue = 366 converter.convert( 367 sample.subtract(Rational.of(floor)), unit, baseUnit, false); 368 } else { 369 formattedUnit.append(sample + TEST_SEP + sample.doubleValue() + TEST_SEP + unit); 370 } 371 } 372 pw.println( 373 quantity 374 + TEST_SEP 375 + usage 376 + TEST_SEP 377 + sampleRegionOrLocale 378 + TEST_SEP 379 + originalSampleBaseValue 380 + TEST_SEP 381 + originalSampleBaseValue.doubleValue() 382 + TEST_SEP 383 + baseUnit 384 + TEST_SEP 385 + formattedUnit); 386 } 387 388 private void checkUnitConvertability( 389 UnitConverter converter, 390 Output<String> compoundBaseUnit, 391 Set<String> badUnits, 392 Set<String> noQuantity, 393 String type, 394 String unit, 395 Multimap<Pair<String, Double>, String> testPrintout) { 396 397 if (converter.isBaseUnit(unit)) { 398 String quantity = converter.getQuantityFromBaseUnit(unit); 399 if (quantity == null) { 400 noQuantity.add(unit); 401 } 402 if (true) { 403 testPrintout.put( 404 new Pair<>(quantity, 1000d), 405 quantity + "\t;\t" + unit + "\t;\t" + unit + "\t;\t1 * x\t;\t1,000.00"); 406 } 407 } else { 408 ConversionInfo unitInfo = converter.getUnitInfo(unit, compoundBaseUnit); 409 if (unitInfo == null) { 410 unitInfo = converter.parseUnitId(unit, compoundBaseUnit, false); 411 } 412 if (unitInfo == null) { 413 badUnits.add(unit); 414 } else if (true) { 415 String quantity = converter.getQuantityFromBaseUnit(compoundBaseUnit.value); 416 if (quantity == null) { 417 noQuantity.add(compoundBaseUnit.value); 418 } 419 final double testValue = 420 unitInfo.convert(R1000).toBigDecimal(MathContext.DECIMAL32).doubleValue(); 421 testPrintout.put( 422 new Pair<>(quantity, testValue), 423 quantity 424 + "\t;\t" 425 + unit 426 + "\t;\t" 427 + compoundBaseUnit 428 + "\t;\t" 429 + unitInfo 430 + "\t;\t" 431 + testValue 432 // + "\t" + 433 // unitInfo.factor.toBigDecimal(MathContext.DECIMAL32) 434 // + "\t" + 435 // unitInfo.factor.reciprocal().toBigDecimal(MathContext.DECIMAL32) 436 ); 437 } 438 } 439 } 440 } 441