1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "unicode/dcfmtsym.h"
9
10 #include "cstr.h"
11 #include "numbertest.h"
12 #include "number_utils.h"
13 #include "number_skeletons.h"
14 #include "putilimp.h"
15
16 using namespace icu::number::impl;
17
18
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)19 void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
20 if (exec) {
21 logln("TestSuite AffixUtilsTest: ");
22 }
23 TESTCASE_AUTO_BEGIN;
24 TESTCASE_AUTO(validTokens);
25 TESTCASE_AUTO(invalidTokens);
26 TESTCASE_AUTO(unknownTokens);
27 TESTCASE_AUTO(unexpectedTokens);
28 TESTCASE_AUTO(duplicateValues);
29 TESTCASE_AUTO(stemsRequiringOption);
30 TESTCASE_AUTO(defaultTokens);
31 TESTCASE_AUTO(flexibleSeparators);
32 TESTCASE_AUTO(wildcardCharacters);
33 TESTCASE_AUTO(perUnitInArabic);
34 TESTCASE_AUTO(perUnitToSkeleton);
35 TESTCASE_AUTO_END;
36 }
37
validTokens()38 void NumberSkeletonTest::validTokens() {
39 IcuTestErrorCode status(*this, "validTokens");
40
41 // This tests only if the tokens are valid, not their behavior.
42 // Most of these are from the design doc.
43 static const char16_t* cases[] = {
44 u"precision-integer",
45 u"precision-unlimited",
46 u"@@@##",
47 u"@@*",
48 u"@@+",
49 u"@@+/w",
50 u".000##",
51 u".00*",
52 u".00+",
53 u".",
54 u"./w",
55 u".*",
56 u".+",
57 u".+/w",
58 u".######",
59 u".00/@@*",
60 u".00/@@+",
61 u".00/@##",
62 u".00/@##/w",
63 u".00/@",
64 u".00/@r",
65 u".00/@@s",
66 u".00/@@#r",
67 u"precision-increment/3.14",
68 u"precision-increment/3.14/w",
69 u"precision-currency-standard",
70 u"precision-currency-standard/w",
71 u"precision-integer rounding-mode-half-up",
72 u".00# rounding-mode-ceiling",
73 u".00/@@* rounding-mode-floor",
74 u".00/@@+ rounding-mode-floor",
75 u"scientific",
76 u"scientific/*ee",
77 u"scientific/+ee",
78 u"scientific/sign-always",
79 u"scientific/*ee/sign-always",
80 u"scientific/+ee/sign-always",
81 u"scientific/sign-always/*ee",
82 u"scientific/sign-always/+ee",
83 u"scientific/sign-except-zero",
84 u"engineering",
85 u"engineering/*eee",
86 u"engineering/+eee",
87 u"compact-short",
88 u"compact-long",
89 u"notation-simple",
90 u"percent",
91 u"permille",
92 u"measure-unit/length-meter",
93 u"measure-unit/area-square-meter",
94 u"measure-unit/energy-joule per-measure-unit/length-meter",
95 u"unit/square-meter-per-square-meter",
96 u"currency/XXX",
97 u"currency/ZZZ",
98 u"currency/usd",
99 u"group-off",
100 u"group-min2",
101 u"group-auto",
102 u"group-on-aligned",
103 u"group-thousands",
104 u"integer-width/00",
105 u"integer-width/#0",
106 u"integer-width/*00",
107 u"integer-width/+00",
108 u"sign-always",
109 u"sign-auto",
110 u"sign-never",
111 u"sign-accounting",
112 u"sign-accounting-always",
113 u"sign-except-zero",
114 u"sign-accounting-except-zero",
115 u"unit-width-narrow",
116 u"unit-width-short",
117 u"unit-width-iso-code",
118 u"unit-width-full-name",
119 u"unit-width-hidden",
120 u"decimal-auto",
121 u"decimal-always",
122 u"scale/5.2",
123 u"scale/-5.2",
124 u"scale/100",
125 u"scale/1E2",
126 u"scale/1",
127 u"latin",
128 u"numbering-system/arab",
129 u"numbering-system/latn",
130 u"precision-integer/@##",
131 u"precision-integer rounding-mode-ceiling",
132 u"precision-currency-cash rounding-mode-ceiling",
133 u"0",
134 u"00",
135 u"000",
136 u"E0",
137 u"E00",
138 u"E000",
139 u"EE0",
140 u"EE00",
141 u"EE+?0",
142 u"EE+?00",
143 u"EE+!0",
144 u"EE+!00",
145 };
146
147 for (auto& cas : cases) {
148 UnicodeString skeletonString(cas);
149 status.setScope(skeletonString);
150 UParseError perror;
151 NumberFormatter::forSkeleton(skeletonString, perror, status);
152 assertSuccess(CStr(skeletonString)(), status, true);
153 assertEquals(skeletonString, -1, perror.offset);
154 status.errIfFailureAndReset();
155 }
156 }
157
invalidTokens()158 void NumberSkeletonTest::invalidTokens() {
159 static const char16_t* cases[] = {
160 u".00x",
161 u".00i",
162 u".00/x",
163 u".00/ww",
164 u".00##0",
165 u".##*",
166 u".00##*",
167 u".0#*",
168 u"@#*",
169 u".##+",
170 u".00##+",
171 u".0#+",
172 u"@#+",
173 u"@@x",
174 u"@@##0",
175 u".00/@@",
176 u".00/@@x",
177 u".00/@@#",
178 u".00/@@#*",
179 u".00/floor/@@*", // wrong order
180 u".00/@@#+",
181 u".00/@@@+r",
182 u".00/floor/@@+", // wrong order
183 u"precision-increment/français", // non-invariant characters for C++
184 u"scientific/ee",
185 u"precision-increment/xxx",
186 u"precision-increment/NaN",
187 u"precision-increment/Infinity",
188 u"precision-increment/0.1.2",
189 u"scale/xxx",
190 u"scale/NaN",
191 u"scale/Infinity",
192 u"scale/0.1.2",
193 u"scale/français", // non-invariant characters for C++
194 u"currency/dummy",
195 u"currency/ççç", // three characters but not ASCII
196 u"measure-unit/foo",
197 u"integer-width/xxx",
198 u"integer-width/0*",
199 u"integer-width/*0#",
200 u"integer-width/*#",
201 u"integer-width/*#0",
202 u"integer-width/0+",
203 u"integer-width/+0#",
204 u"integer-width/+#",
205 u"integer-width/+#0",
206 u"scientific/foo",
207 u"E",
208 u"E1",
209 u"E+",
210 u"E+?",
211 u"E+!",
212 u"E+0",
213 u"EE",
214 u"EE+",
215 u"EEE",
216 u"EEE0",
217 u"001",
218 u"00*",
219 u"00+",
220 };
221
222 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
223 }
224
unknownTokens()225 void NumberSkeletonTest::unknownTokens() {
226 static const char16_t* cases[] = {
227 u"maesure-unit",
228 u"measure-unit/foo-bar",
229 u"numbering-system/dummy",
230 u"français",
231 u"measure-unit/français-français", // non-invariant characters for C++
232 u"numbering-system/français", // non-invariant characters for C++
233 u"currency-USD"};
234
235 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
236 }
237
unexpectedTokens()238 void NumberSkeletonTest::unexpectedTokens() {
239 static const char16_t* cases[] = {
240 u".00/w/w",
241 u"group-thousands/foo",
242 u"precision-integer//@## group-off",
243 u"precision-integer//@## group-off",
244 u"precision-integer/ group-off",
245 u"precision-integer// group-off"};
246
247 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
248 }
249
duplicateValues()250 void NumberSkeletonTest::duplicateValues() {
251 static const char16_t* cases[] = {
252 u"precision-integer precision-integer",
253 u"precision-integer .00+",
254 u"precision-integer precision-unlimited",
255 u"precision-integer @@@",
256 u"scientific engineering",
257 u"engineering compact-long",
258 u"sign-auto sign-always"};
259
260 expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
261 }
262
stemsRequiringOption()263 void NumberSkeletonTest::stemsRequiringOption() {
264 static const char16_t* stems[] = {
265 u"precision-increment",
266 u"measure-unit",
267 u"per-measure-unit",
268 u"currency",
269 u"integer-width",
270 u"numbering-system",
271 u"scale"};
272 static const char16_t* suffixes[] = {u"", u"/@##", u" scientific", u"/@## scientific"};
273
274 for (auto& stem : stems) {
275 for (auto& suffix : suffixes) {
276 UnicodeString skeletonString = UnicodeString(stem) + suffix;
277 UErrorCode status = U_ZERO_ERROR;
278 UParseError perror;
279 NumberFormatter::forSkeleton(skeletonString, perror, status);
280 assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
281
282 // Check the UParseError for integrity.
283 // If an option is present, the option is wrong; error offset is at the start of the option
284 // If an option is not present, the error offset is at the token separator (end of stem)
285 int32_t expectedOffset = u_strlen(stem) + ((suffix[0] == u'/') ? 1 : 0);
286 assertEquals(skeletonString, expectedOffset, perror.offset);
287 UnicodeString expectedPreContext = skeletonString.tempSubString(0, expectedOffset);
288 if (expectedPreContext.length() >= U_PARSE_CONTEXT_LEN - 1) {
289 expectedPreContext = expectedPreContext.tempSubString(expectedOffset - U_PARSE_CONTEXT_LEN + 1);
290 }
291 assertEquals(skeletonString, expectedPreContext, perror.preContext);
292 UnicodeString expectedPostContext = skeletonString.tempSubString(expectedOffset);
293 // None of the postContext strings in this test exceed U_PARSE_CONTEXT_LEN
294 assertEquals(skeletonString, expectedPostContext, perror.postContext);
295 }
296 }
297 }
298
defaultTokens()299 void NumberSkeletonTest::defaultTokens() {
300 IcuTestErrorCode status(*this, "defaultTokens");
301
302 static const char16_t* cases[] = {
303 u"notation-simple",
304 u"base-unit",
305 u"group-auto",
306 u"integer-width/+0",
307 u"sign-auto",
308 u"unit-width-short",
309 u"decimal-auto"};
310
311 for (auto& cas : cases) {
312 UnicodeString skeletonString(cas);
313 status.setScope(skeletonString);
314 UnicodeString normalized = NumberFormatter::forSkeleton(
315 skeletonString, status).toSkeleton(status);
316 // Skeleton should become empty when normalized
317 assertEquals(skeletonString, u"", normalized);
318 status.errIfFailureAndReset();
319 }
320 }
321
flexibleSeparators()322 void NumberSkeletonTest::flexibleSeparators() {
323 IcuTestErrorCode status(*this, "flexibleSeparators");
324
325 static struct TestCase {
326 const char16_t* skeleton;
327 const char16_t* expected;
328 } cases[] = {{u"precision-integer group-off", u"5142"},
329 {u"precision-integer group-off", u"5142"},
330 {u"precision-integer/@## group-off", u"5140"},
331 {u"precision-integer/@## group-off", u"5140"}};
332
333 for (auto& cas : cases) {
334 UnicodeString skeletonString(cas.skeleton);
335 UnicodeString expected(cas.expected);
336 status.setScope(skeletonString);
337 UnicodeString actual = NumberFormatter::forSkeleton(skeletonString, status).locale("en")
338 .formatDouble(5142.3, status)
339 .toString(status);
340 if (!status.errDataIfFailureAndReset()) {
341 assertEquals(skeletonString, expected, actual);
342 }
343 status.errIfFailureAndReset();
344 }
345 }
346
wildcardCharacters()347 void NumberSkeletonTest::wildcardCharacters() {
348 IcuTestErrorCode status(*this, "wildcardCharacters");
349
350 struct TestCase {
351 const char16_t* star;
352 const char16_t* plus;
353 } cases[] = {
354 { u".00*", u".00+" },
355 { u"@@*", u"@@+" },
356 { u"scientific/*ee", u"scientific/+ee" },
357 { u"integer-width/*00", u"integer-width/+00" },
358 };
359
360 for (const auto& cas : cases) {
361 UnicodeString star(cas.star);
362 UnicodeString plus(cas.plus);
363 status.setScope(star);
364
365 UnicodeString normalized = NumberFormatter::forSkeleton(plus, status)
366 .toSkeleton(status);
367 assertEquals("Plus should normalize to star", star, normalized);
368 status.errIfFailureAndReset();
369 }
370 }
371
372 // In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens.
expectedErrorSkeleton(const char16_t ** cases,int32_t casesLen)373 void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) {
374 for (int32_t i = 0; i < casesLen; i++) {
375 UnicodeString skeletonString(cases[i]);
376 UErrorCode status = U_ZERO_ERROR;
377 NumberFormatter::forSkeleton(skeletonString, status);
378 assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
379 }
380 }
381
perUnitInArabic()382 void NumberSkeletonTest::perUnitInArabic() {
383 IcuTestErrorCode status(*this, "perUnitInArabic");
384
385 struct TestCase {
386 const char16_t* type;
387 const char16_t* subtype;
388 } cases[] = {
389 {u"area", u"acre"},
390 {u"digital", u"bit"},
391 {u"digital", u"byte"},
392 {u"temperature", u"celsius"},
393 {u"length", u"centimeter"},
394 {u"duration", u"day"},
395 {u"angle", u"degree"},
396 {u"temperature", u"fahrenheit"},
397 {u"volume", u"fluid-ounce"},
398 {u"length", u"foot"},
399 {u"volume", u"gallon"},
400 {u"digital", u"gigabit"},
401 {u"digital", u"gigabyte"},
402 {u"mass", u"gram"},
403 {u"area", u"hectare"},
404 {u"duration", u"hour"},
405 {u"length", u"inch"},
406 {u"digital", u"kilobit"},
407 {u"digital", u"kilobyte"},
408 {u"mass", u"kilogram"},
409 {u"length", u"kilometer"},
410 {u"volume", u"liter"},
411 {u"digital", u"megabit"},
412 {u"digital", u"megabyte"},
413 {u"length", u"meter"},
414 {u"length", u"mile"},
415 {u"length", u"mile-scandinavian"},
416 {u"volume", u"milliliter"},
417 {u"length", u"millimeter"},
418 {u"duration", u"millisecond"},
419 {u"duration", u"minute"},
420 {u"duration", u"month"},
421 {u"mass", u"ounce"},
422 {u"concentr", u"percent"},
423 {u"digital", u"petabyte"},
424 {u"mass", u"pound"},
425 {u"duration", u"second"},
426 {u"mass", u"stone"},
427 {u"digital", u"terabit"},
428 {u"digital", u"terabyte"},
429 {u"duration", u"week"},
430 {u"length", u"yard"},
431 {u"duration", u"year"},
432 };
433
434 for (const auto& cas1 : cases) {
435 for (const auto& cas2 : cases) {
436 UnicodeString skeleton(u"measure-unit/");
437 skeleton += cas1.type;
438 skeleton += u"-";
439 skeleton += cas1.subtype;
440 skeleton += u" ";
441 skeleton += u"per-measure-unit/";
442 skeleton += cas2.type;
443 skeleton += u"-";
444 skeleton += cas2.subtype;
445
446 status.setScope(skeleton);
447 UnicodeString actual = NumberFormatter::forSkeleton(skeleton, status).locale("ar")
448 .formatDouble(5142.3, status)
449 .toString(status);
450 status.errIfFailureAndReset();
451 }
452 }
453 }
454
perUnitToSkeleton()455 void NumberSkeletonTest::perUnitToSkeleton() {
456 IcuTestErrorCode status(*this, "perUnitToSkeleton");
457 struct TestCase {
458 const char16_t* type;
459 const char16_t* subtype;
460 } cases[] = {
461 {u"area", u"acre"},
462 {u"concentr", u"percent"},
463 {u"concentr", u"permille"},
464 {u"concentr", u"permillion"},
465 {u"concentr", u"permyriad"},
466 {u"digital", u"bit"},
467 {u"length", u"yard"},
468 };
469
470 for (const auto& cas1 : cases) {
471 for (const auto& cas2 : cases) {
472 UnicodeString skeleton(u"measure-unit/");
473 skeleton += cas1.type;
474 skeleton += u"-";
475 skeleton += cas1.subtype;
476 skeleton += u" ";
477 skeleton += u"per-measure-unit/";
478 skeleton += cas2.type;
479 skeleton += u"-";
480 skeleton += cas2.subtype;
481
482 status.setScope(skeleton);
483 if (cas1.type != cas2.type && cas1.subtype != cas2.subtype) {
484 UnicodeString toSkeleton = NumberFormatter::forSkeleton(
485 skeleton, status).toSkeleton(status);
486 if (status.errIfFailureAndReset()) {
487 continue;
488 }
489 // Ensure both subtype are in the toSkeleton.
490 UnicodeString msg;
491 msg.append(toSkeleton)
492 .append(" should contain '")
493 .append(UnicodeString(cas1.subtype))
494 .append("' when constructed from ")
495 .append(skeleton);
496 assertTrue(msg, toSkeleton.indexOf(cas1.subtype) >= 0);
497
498 msg.remove();
499 msg.append(toSkeleton)
500 .append(" should contain '")
501 .append(UnicodeString(cas2.subtype))
502 .append("' when constructed from ")
503 .append(skeleton);
504 assertTrue(msg, toSkeleton.indexOf(cas2.subtype) >= 0);
505 }
506 }
507 }
508 }
509
510 #endif /* #if !UCONFIG_NO_FORMATTING */
511