1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "unicode/dcfmtsym.h"
9
10 #include "cstr.h"
11 #include "numbertest.h"
12 #include "number_utils.h"
13 #include "number_skeletons.h"
14 #include "putilimp.h"
15
16 using namespace icu::number::impl;
17
18
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)19 void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
20 if (exec) {
21 logln("TestSuite AffixUtilsTest: ");
22 }
23 TESTCASE_AUTO_BEGIN;
24 TESTCASE_AUTO(validTokens);
25 TESTCASE_AUTO(invalidTokens);
26 TESTCASE_AUTO(unknownTokens);
27 TESTCASE_AUTO(unexpectedTokens);
28 TESTCASE_AUTO(duplicateValues);
29 TESTCASE_AUTO(stemsRequiringOption);
30 TESTCASE_AUTO(defaultTokens);
31 TESTCASE_AUTO(flexibleSeparators);
32 TESTCASE_AUTO(wildcardCharacters);
33 TESTCASE_AUTO(perUnitInArabic);
34 TESTCASE_AUTO(perUnitToSkeleton);
35 TESTCASE_AUTO(measurementSystemOverride);
36 TESTCASE_AUTO_END;
37 }
38
validTokens()39 void NumberSkeletonTest::validTokens() {
40 IcuTestErrorCode status(*this, "validTokens");
41
42 // This tests only if the tokens are valid, not their behavior.
43 // Most of these are from the design doc.
44 static const char16_t* cases[] = {
45 u"precision-integer",
46 u"precision-unlimited",
47 u"@@@##",
48 u"@@*",
49 u"@@+",
50 u"@@+/w",
51 u".000##",
52 u".00*",
53 u".00+",
54 u".",
55 u"./w",
56 u".*",
57 u".+",
58 u".+/w",
59 u".######",
60 u".00/@@*",
61 u".00/@@+",
62 u".00/@##",
63 u".00/@##/w",
64 u".00/@",
65 u".00/@r",
66 u".00/@@s",
67 u".00/@@#r",
68 u"precision-increment/3.14",
69 u"precision-increment/3.14/w",
70 u"precision-currency-standard",
71 u"precision-currency-standard/w",
72 u"precision-integer rounding-mode-half-up",
73 u".00# rounding-mode-ceiling",
74 u".00/@@* rounding-mode-floor",
75 u".00/@@+ rounding-mode-floor",
76 u"scientific",
77 u"scientific/*ee",
78 u"scientific/+ee",
79 u"scientific/sign-always",
80 u"scientific/*ee/sign-always",
81 u"scientific/+ee/sign-always",
82 u"scientific/sign-always/*ee",
83 u"scientific/sign-always/+ee",
84 u"scientific/sign-except-zero",
85 u"engineering",
86 u"engineering/*eee",
87 u"engineering/+eee",
88 u"compact-short",
89 u"compact-long",
90 u"notation-simple",
91 u"percent",
92 u"permille",
93 u"measure-unit/length-meter",
94 u"measure-unit/area-square-meter",
95 u"measure-unit/energy-joule per-measure-unit/length-meter",
96 u"unit/square-meter-per-square-meter",
97 u"currency/XXX",
98 u"currency/ZZZ",
99 u"currency/usd",
100 u"group-off",
101 u"group-min2",
102 u"group-auto",
103 u"group-on-aligned",
104 u"group-thousands",
105 u"integer-width/00",
106 u"integer-width/#0",
107 u"integer-width/*00",
108 u"integer-width/+00",
109 u"sign-always",
110 u"sign-auto",
111 u"sign-never",
112 u"sign-accounting",
113 u"sign-accounting-always",
114 u"sign-except-zero",
115 u"sign-accounting-except-zero",
116 u"unit-width-narrow",
117 u"unit-width-short",
118 u"unit-width-iso-code",
119 u"unit-width-full-name",
120 u"unit-width-hidden",
121 u"decimal-auto",
122 u"decimal-always",
123 u"scale/5.2",
124 u"scale/-5.2",
125 u"scale/100",
126 u"scale/1E2",
127 u"scale/1",
128 u"latin",
129 u"numbering-system/arab",
130 u"numbering-system/latn",
131 u"precision-integer/@##",
132 u"precision-integer rounding-mode-ceiling",
133 u"precision-currency-cash rounding-mode-ceiling",
134 u"0",
135 u"00",
136 u"000",
137 u"E0",
138 u"E00",
139 u"E000",
140 u"EE0",
141 u"EE00",
142 u"EE+?0",
143 u"EE+?00",
144 u"EE+!0",
145 u"EE+!00",
146 };
147
148 for (auto& cas : cases) {
149 UnicodeString skeletonString(cas);
150 status.setScope(skeletonString);
151 UParseError perror;
152 NumberFormatter::forSkeleton(skeletonString, perror, status);
153 assertSuccess(CStr(skeletonString)(), status, true);
154 assertEquals(skeletonString, -1, perror.offset);
155 status.errIfFailureAndReset();
156 }
157 }
158
invalidTokens()159 void NumberSkeletonTest::invalidTokens() {
160 static const char16_t* cases[] = {
161 u".00x",
162 u".00i",
163 u".00/x",
164 u".00/ww",
165 u".00##0",
166 u".##*",
167 u".00##*",
168 u".0#*",
169 u"@#*",
170 u".##+",
171 u".00##+",
172 u".0#+",
173 u"@#+",
174 u"@@x",
175 u"@@##0",
176 u".00/@@",
177 u".00/@@x",
178 u".00/@@#",
179 u".00/@@#*",
180 u".00/floor/@@*", // wrong order
181 u".00/@@#+",
182 u".00/@@@+r",
183 u".00/floor/@@+", // wrong order
184 u"precision-increment/français", // non-invariant characters for C++
185 u"scientific/ee",
186 u"precision-increment/xxx",
187 u"precision-increment/NaN",
188 u"precision-increment/Infinity",
189 u"precision-increment/0.1.2",
190 u"scale/xxx",
191 u"scale/NaN",
192 u"scale/Infinity",
193 u"scale/0.1.2",
194 u"scale/français", // non-invariant characters for C++
195 u"currency/dummy",
196 u"currency/ççç", // three characters but not ASCII
197 u"measure-unit/foo",
198 u"integer-width/xxx",
199 u"integer-width/0*",
200 u"integer-width/*0#",
201 u"integer-width/*#",
202 u"integer-width/*#0",
203 u"integer-width/0+",
204 u"integer-width/+0#",
205 u"integer-width/+#",
206 u"integer-width/+#0",
207 u"scientific/foo",
208 u"E",
209 u"E1",
210 u"E+",
211 u"E+?",
212 u"E+!",
213 u"E+0",
214 u"EE",
215 u"EE+",
216 u"EEE",
217 u"EEE0",
218 u"001",
219 u"00*",
220 u"00+",
221 };
222
223 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
224 }
225
unknownTokens()226 void NumberSkeletonTest::unknownTokens() {
227 static const char16_t* cases[] = {
228 u"maesure-unit",
229 u"measure-unit/foo-bar",
230 u"numbering-system/dummy",
231 u"français",
232 u"measure-unit/français-français", // non-invariant characters for C++
233 u"numbering-system/français", // non-invariant characters for C++
234 u"currency-USD"};
235
236 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
237 }
238
unexpectedTokens()239 void NumberSkeletonTest::unexpectedTokens() {
240 static const char16_t* cases[] = {
241 u".00/w/w",
242 u"group-thousands/foo",
243 u"precision-integer//@## group-off",
244 u"precision-integer//@## group-off",
245 u"precision-integer/ group-off",
246 u"precision-integer// group-off"};
247
248 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
249 }
250
duplicateValues()251 void NumberSkeletonTest::duplicateValues() {
252 static const char16_t* cases[] = {
253 u"precision-integer precision-integer",
254 u"precision-integer .00+",
255 u"precision-integer precision-unlimited",
256 u"precision-integer @@@",
257 u"scientific engineering",
258 u"engineering compact-long",
259 u"sign-auto sign-always"};
260
261 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
262 }
263
stemsRequiringOption()264 void NumberSkeletonTest::stemsRequiringOption() {
265 static const char16_t* stems[] = {
266 u"precision-increment",
267 u"measure-unit",
268 u"per-measure-unit",
269 u"currency",
270 u"integer-width",
271 u"numbering-system",
272 u"scale"};
273 static const char16_t* suffixes[] = {u"", u"/@##", u" scientific", u"/@## scientific"};
274
275 for (auto& stem : stems) {
276 for (auto& suffix : suffixes) {
277 UnicodeString skeletonString = UnicodeString(stem) + suffix;
278 UErrorCode status = U_ZERO_ERROR;
279 UParseError perror;
280 NumberFormatter::forSkeleton(skeletonString, perror, status);
281 assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
282
283 // Check the UParseError for integrity.
284 // If an option is present, the option is wrong; error offset is at the start of the option
285 // If an option is not present, the error offset is at the token separator (end of stem)
286 int32_t expectedOffset = u_strlen(stem) + ((suffix[0] == u'/') ? 1 : 0);
287 assertEquals(skeletonString, expectedOffset, perror.offset);
288 UnicodeString expectedPreContext = skeletonString.tempSubString(0, expectedOffset);
289 if (expectedPreContext.length() >= U_PARSE_CONTEXT_LEN - 1) {
290 expectedPreContext = expectedPreContext.tempSubString(expectedOffset - U_PARSE_CONTEXT_LEN + 1);
291 }
292 assertEquals(skeletonString, expectedPreContext, perror.preContext);
293 UnicodeString expectedPostContext = skeletonString.tempSubString(expectedOffset);
294 // None of the postContext strings in this test exceed U_PARSE_CONTEXT_LEN
295 assertEquals(skeletonString, expectedPostContext, perror.postContext);
296 }
297 }
298 }
299
defaultTokens()300 void NumberSkeletonTest::defaultTokens() {
301 IcuTestErrorCode status(*this, "defaultTokens");
302
303 static const char16_t* cases[] = {
304 u"notation-simple",
305 u"base-unit",
306 u"group-auto",
307 u"integer-width/+0",
308 u"sign-auto",
309 u"unit-width-short",
310 u"decimal-auto"};
311
312 for (auto& cas : cases) {
313 UnicodeString skeletonString(cas);
314 status.setScope(skeletonString);
315 UnicodeString normalized = NumberFormatter::forSkeleton(
316 skeletonString, status).toSkeleton(status);
317 // Skeleton should become empty when normalized
318 assertEquals(skeletonString, u"", normalized);
319 status.errIfFailureAndReset();
320 }
321 }
322
flexibleSeparators()323 void NumberSkeletonTest::flexibleSeparators() {
324 IcuTestErrorCode status(*this, "flexibleSeparators");
325
326 static struct TestCase {
327 const char16_t* skeleton;
328 const char16_t* expected;
329 } cases[] = {{u"precision-integer group-off", u"5142"},
330 {u"precision-integer group-off", u"5142"},
331 {u"precision-integer/@## group-off", u"5140"},
332 {u"precision-integer/@## group-off", u"5140"}};
333
334 for (auto& cas : cases) {
335 UnicodeString skeletonString(cas.skeleton);
336 UnicodeString expected(cas.expected);
337 status.setScope(skeletonString);
338 UnicodeString actual = NumberFormatter::forSkeleton(skeletonString, status).locale("en")
339 .formatDouble(5142.3, status)
340 .toString(status);
341 if (!status.errDataIfFailureAndReset()) {
342 assertEquals(skeletonString, expected, actual);
343 }
344 status.errIfFailureAndReset();
345 }
346 }
347
wildcardCharacters()348 void NumberSkeletonTest::wildcardCharacters() {
349 IcuTestErrorCode status(*this, "wildcardCharacters");
350
351 struct TestCase {
352 const char16_t* star;
353 const char16_t* plus;
354 } cases[] = {
355 { u".00*", u".00+" },
356 { u"@@*", u"@@+" },
357 { u"scientific/*ee", u"scientific/+ee" },
358 { u"integer-width/*00", u"integer-width/+00" },
359 };
360
361 for (const auto& cas : cases) {
362 UnicodeString star(cas.star);
363 UnicodeString plus(cas.plus);
364 status.setScope(star);
365
366 UnicodeString normalized = NumberFormatter::forSkeleton(plus, status)
367 .toSkeleton(status);
368 assertEquals("Plus should normalize to star", star, normalized);
369 status.errIfFailureAndReset();
370 }
371 }
372
373 // In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens.
expectedErrorSkeleton(const char16_t ** cases,int32_t casesLen)374 void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) {
375 for (int32_t i = 0; i < casesLen; i++) {
376 UnicodeString skeletonString(cases[i]);
377 UErrorCode status = U_ZERO_ERROR;
378 NumberFormatter::forSkeleton(skeletonString, status);
379 assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
380 }
381 }
382
perUnitInArabic()383 void NumberSkeletonTest::perUnitInArabic() {
384 IcuTestErrorCode status(*this, "perUnitInArabic");
385
386 struct TestCase {
387 const char16_t* type;
388 const char16_t* subtype;
389 } cases[] = {
390 {u"area", u"acre"},
391 {u"digital", u"bit"},
392 {u"digital", u"byte"},
393 {u"temperature", u"celsius"},
394 {u"length", u"centimeter"},
395 {u"duration", u"day"},
396 {u"angle", u"degree"},
397 {u"temperature", u"fahrenheit"},
398 {u"volume", u"fluid-ounce"},
399 {u"length", u"foot"},
400 {u"volume", u"gallon"},
401 {u"digital", u"gigabit"},
402 {u"digital", u"gigabyte"},
403 {u"mass", u"gram"},
404 {u"area", u"hectare"},
405 {u"duration", u"hour"},
406 {u"length", u"inch"},
407 {u"digital", u"kilobit"},
408 {u"digital", u"kilobyte"},
409 {u"mass", u"kilogram"},
410 {u"length", u"kilometer"},
411 {u"volume", u"liter"},
412 {u"digital", u"megabit"},
413 {u"digital", u"megabyte"},
414 {u"length", u"meter"},
415 {u"length", u"mile"},
416 {u"length", u"mile-scandinavian"},
417 {u"volume", u"milliliter"},
418 {u"length", u"millimeter"},
419 {u"duration", u"millisecond"},
420 {u"duration", u"minute"},
421 {u"duration", u"month"},
422 {u"mass", u"ounce"},
423 {u"concentr", u"percent"},
424 {u"digital", u"petabyte"},
425 {u"mass", u"pound"},
426 {u"duration", u"second"},
427 {u"mass", u"stone"},
428 {u"digital", u"terabit"},
429 {u"digital", u"terabyte"},
430 {u"duration", u"week"},
431 {u"length", u"yard"},
432 {u"duration", u"year"},
433 };
434
435 for (const auto& cas1 : cases) {
436 for (const auto& cas2 : cases) {
437 UnicodeString skeleton(u"measure-unit/");
438 skeleton += cas1.type;
439 skeleton += u"-";
440 skeleton += cas1.subtype;
441 skeleton += u" ";
442 skeleton += u"per-measure-unit/";
443 skeleton += cas2.type;
444 skeleton += u"-";
445 skeleton += cas2.subtype;
446
447 status.setScope(skeleton);
448 UnicodeString actual = NumberFormatter::forSkeleton(skeleton, status).locale("ar")
449 .formatDouble(5142.3, status)
450 .toString(status);
451 status.errIfFailureAndReset();
452 }
453 }
454 }
455
perUnitToSkeleton()456 void NumberSkeletonTest::perUnitToSkeleton() {
457 IcuTestErrorCode status(*this, "perUnitToSkeleton");
458 struct TestCase {
459 const char16_t* type;
460 const char16_t* subtype;
461 } cases[] = {
462 {u"area", u"acre"},
463 {u"concentr", u"percent"},
464 {u"concentr", u"permille"},
465 {u"concentr", u"permillion"},
466 {u"concentr", u"permyriad"},
467 {u"digital", u"bit"},
468 {u"length", u"yard"},
469 };
470
471 for (const auto& cas1 : cases) {
472 for (const auto& cas2 : cases) {
473 UnicodeString skeleton(u"measure-unit/");
474 skeleton += cas1.type;
475 skeleton += u"-";
476 skeleton += cas1.subtype;
477 skeleton += u" ";
478 skeleton += u"per-measure-unit/";
479 skeleton += cas2.type;
480 skeleton += u"-";
481 skeleton += cas2.subtype;
482
483 status.setScope(skeleton);
484 if (cas1.type != cas2.type && cas1.subtype != cas2.subtype) {
485 UnicodeString toSkeleton = NumberFormatter::forSkeleton(
486 skeleton, status).toSkeleton(status);
487 if (status.errIfFailureAndReset()) {
488 continue;
489 }
490 // Ensure both subtype are in the toSkeleton.
491 UnicodeString msg;
492 msg.append(toSkeleton)
493 .append(" should contain '")
494 .append(UnicodeString(cas1.subtype))
495 .append("' when constructed from ")
496 .append(skeleton);
497 assertTrue(msg, toSkeleton.indexOf(cas1.subtype) >= 0);
498
499 msg.remove();
500 msg.append(toSkeleton)
501 .append(" should contain '")
502 .append(UnicodeString(cas2.subtype))
503 .append("' when constructed from ")
504 .append(skeleton);
505 assertTrue(msg, toSkeleton.indexOf(cas2.subtype) >= 0);
506 }
507 }
508 }
509 }
510
measurementSystemOverride()511 void NumberSkeletonTest::measurementSystemOverride() {
512 // NOTE TO REVIEWERS: When the appropriate changes are made on the CLDR side, do we want to keep this
513 // test or rely on additions the CLDR project makes to unitPreferencesTest.txt? --rtg 8/29/23
514 IcuTestErrorCode status(*this, "measurementSystemOverride");
515 struct TestCase {
516 const char* locale;
517 const char16_t* skeleton;
518 const char16_t* expectedResult;
519 } testCases[] = {
520 // Norway uses m/s for wind speed and should with or without the "ms-metric" subtag in the locale,
521 // but it uses km/h for other speeds. France uses km/h for all speeds. And in both places, if
522 // you say "ms-ussystem", you should get mph. In the US, we use mph for all speeds, but should
523 // use km/h if the locale has "ms-metric" in it.
524 { "nn_NO", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" },
525 { "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" },
526 { "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76 mile/t" },
527 { "fr_FR", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" },
528 { "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" },
529 { "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76\u202Fmi/h" },
530 { "en_US", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" },
531 { "en_US@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1.2 km/h" },
532 { "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" },
533
534 { "nn_NO", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" },
535 { "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" },
536 { "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76 mile/t" },
537 { "fr_FR", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" },
538 { "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" },
539 { "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76\u202Fmi/h" },
540 { "en_US", u"unit/kilometer-per-hour usage/default", u"0.76 mph" },
541 { "en_US@measure=metric", u"unit/kilometer-per-hour usage/default", u"1.2 km/h" },
542 { "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0.76 mph" },
543 };
544
545 for (const auto& testCase : testCases) {
546 UErrorCode err = U_ZERO_ERROR;
547 LocalizedNumberFormatter nf = NumberFormatter::forSkeleton(testCase.skeleton, err).locale(testCase.locale);
548 UnicodeString actualResult = nf.formatDouble(1.23, err).toString(err);
549
550 UnicodeString errorMessage = ": ";
551 errorMessage += testCase.locale;
552 errorMessage += "/";
553 errorMessage += testCase.skeleton;
554 if (assertSuccess(u"Formatting error" + errorMessage, err)) {
555 assertEquals(u"Wrong result" + errorMessage, testCase.expectedResult, actualResult);
556 }
557 }
558 }
559
560 #endif /* #if !UCONFIG_NO_FORMATTING */
561