1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 #define UNISTR_FROM_CHAR_EXPLICIT
12
13 #include "uassert.h"
14 #include "number_patternstring.h"
15 #include "unicode/utf16.h"
16 #include "number_utils.h"
17 #include "number_roundingutils.h"
18 #include "number_mapper.h"
19
20 using namespace icu;
21 using namespace icu::number;
22 using namespace icu::number::impl;
23
24
parseToPatternInfo(const UnicodeString & patternString,ParsedPatternInfo & patternInfo,UErrorCode & status)25 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
26 UErrorCode& status) {
27 patternInfo.consumePattern(patternString, status);
28 }
29
30 DecimalFormatProperties
parseToProperties(const UnicodeString & pattern,IgnoreRounding ignoreRounding,UErrorCode & status)31 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
32 UErrorCode& status) {
33 DecimalFormatProperties properties;
34 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
35 return properties;
36 }
37
parseToProperties(const UnicodeString & pattern,UErrorCode & status)38 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
39 UErrorCode& status) {
40 return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
41 }
42
43 void
parseToExistingProperties(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)44 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
45 IgnoreRounding ignoreRounding, UErrorCode& status) {
46 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
47 }
48
49
charAt(int32_t flags,int32_t index) const50 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
51 const Endpoints& endpoints = getEndpoints(flags);
52 if (index < 0 || index >= endpoints.end - endpoints.start) {
53 UPRV_UNREACHABLE;
54 }
55 return pattern.charAt(endpoints.start + index);
56 }
57
length(int32_t flags) const58 int32_t ParsedPatternInfo::length(int32_t flags) const {
59 return getLengthFromEndpoints(getEndpoints(flags));
60 }
61
getLengthFromEndpoints(const Endpoints & endpoints)62 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
63 return endpoints.end - endpoints.start;
64 }
65
getString(int32_t flags) const66 UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
67 const Endpoints& endpoints = getEndpoints(flags);
68 if (endpoints.start == endpoints.end) {
69 return UnicodeString();
70 }
71 // Create a new UnicodeString
72 return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
73 }
74
getEndpoints(int32_t flags) const75 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
76 bool prefix = (flags & AFFIX_PREFIX) != 0;
77 bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
78 bool padding = (flags & AFFIX_PADDING) != 0;
79 if (isNegative && padding) {
80 return negative.paddingEndpoints;
81 } else if (padding) {
82 return positive.paddingEndpoints;
83 } else if (prefix && isNegative) {
84 return negative.prefixEndpoints;
85 } else if (prefix) {
86 return positive.prefixEndpoints;
87 } else if (isNegative) {
88 return negative.suffixEndpoints;
89 } else {
90 return positive.suffixEndpoints;
91 }
92 }
93
positiveHasPlusSign() const94 bool ParsedPatternInfo::positiveHasPlusSign() const {
95 return positive.hasPlusSign;
96 }
97
hasNegativeSubpattern() const98 bool ParsedPatternInfo::hasNegativeSubpattern() const {
99 return fHasNegativeSubpattern;
100 }
101
negativeHasMinusSign() const102 bool ParsedPatternInfo::negativeHasMinusSign() const {
103 return negative.hasMinusSign;
104 }
105
hasCurrencySign() const106 bool ParsedPatternInfo::hasCurrencySign() const {
107 return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
108 }
109
containsSymbolType(AffixPatternType type,UErrorCode & status) const110 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
111 return AffixUtils::containsType(pattern, type, status);
112 }
113
hasBody() const114 bool ParsedPatternInfo::hasBody() const {
115 return positive.integerTotal > 0;
116 }
117
118 /////////////////////////////////////////////////////
119 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
120 /////////////////////////////////////////////////////
121
peek()122 UChar32 ParsedPatternInfo::ParserState::peek() {
123 if (offset == pattern.length()) {
124 return -1;
125 } else {
126 return pattern.char32At(offset);
127 }
128 }
129
next()130 UChar32 ParsedPatternInfo::ParserState::next() {
131 int codePoint = peek();
132 offset += U16_LENGTH(codePoint);
133 return codePoint;
134 }
135
consumePattern(const UnicodeString & patternString,UErrorCode & status)136 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
137 if (U_FAILURE(status)) { return; }
138 this->pattern = patternString;
139
140 // This class is not intended for writing twice!
141 // Use move assignment to overwrite instead.
142 U_ASSERT(state.offset == 0);
143
144 // pattern := subpattern (';' subpattern)?
145 currentSubpattern = &positive;
146 consumeSubpattern(status);
147 if (U_FAILURE(status)) { return; }
148 if (state.peek() == u';') {
149 state.next(); // consume the ';'
150 // Don't consume the negative subpattern if it is empty (trailing ';')
151 if (state.peek() != -1) {
152 fHasNegativeSubpattern = true;
153 currentSubpattern = &negative;
154 consumeSubpattern(status);
155 if (U_FAILURE(status)) { return; }
156 }
157 }
158 if (state.peek() != -1) {
159 state.toParseException(u"Found unquoted special character");
160 status = U_UNQUOTED_SPECIAL;
161 }
162 }
163
consumeSubpattern(UErrorCode & status)164 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
165 // subpattern := literals? number exponent? literals?
166 consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
167 if (U_FAILURE(status)) { return; }
168 consumeAffix(currentSubpattern->prefixEndpoints, status);
169 if (U_FAILURE(status)) { return; }
170 consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
171 if (U_FAILURE(status)) { return; }
172 consumeFormat(status);
173 if (U_FAILURE(status)) { return; }
174 consumeExponent(status);
175 if (U_FAILURE(status)) { return; }
176 consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
177 if (U_FAILURE(status)) { return; }
178 consumeAffix(currentSubpattern->suffixEndpoints, status);
179 if (U_FAILURE(status)) { return; }
180 consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
181 if (U_FAILURE(status)) { return; }
182 }
183
consumePadding(PadPosition paddingLocation,UErrorCode & status)184 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
185 if (state.peek() != u'*') {
186 return;
187 }
188 if (currentSubpattern->hasPadding) {
189 state.toParseException(u"Cannot have multiple pad specifiers");
190 status = U_MULTIPLE_PAD_SPECIFIERS;
191 return;
192 }
193 currentSubpattern->paddingLocation = paddingLocation;
194 currentSubpattern->hasPadding = true;
195 state.next(); // consume the '*'
196 currentSubpattern->paddingEndpoints.start = state.offset;
197 consumeLiteral(status);
198 currentSubpattern->paddingEndpoints.end = state.offset;
199 }
200
consumeAffix(Endpoints & endpoints,UErrorCode & status)201 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
202 // literals := { literal }
203 endpoints.start = state.offset;
204 while (true) {
205 switch (state.peek()) {
206 case u'#':
207 case u'@':
208 case u';':
209 case u'*':
210 case u'.':
211 case u',':
212 case u'0':
213 case u'1':
214 case u'2':
215 case u'3':
216 case u'4':
217 case u'5':
218 case u'6':
219 case u'7':
220 case u'8':
221 case u'9':
222 case -1:
223 // Characters that cannot appear unquoted in a literal
224 // break outer;
225 goto after_outer;
226
227 case u'%':
228 currentSubpattern->hasPercentSign = true;
229 break;
230
231 case u'‰':
232 currentSubpattern->hasPerMilleSign = true;
233 break;
234
235 case u'¤':
236 currentSubpattern->hasCurrencySign = true;
237 break;
238
239 case u'-':
240 currentSubpattern->hasMinusSign = true;
241 break;
242
243 case u'+':
244 currentSubpattern->hasPlusSign = true;
245 break;
246
247 default:
248 break;
249 }
250 consumeLiteral(status);
251 if (U_FAILURE(status)) { return; }
252 }
253 after_outer:
254 endpoints.end = state.offset;
255 }
256
consumeLiteral(UErrorCode & status)257 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
258 if (state.peek() == -1) {
259 state.toParseException(u"Expected unquoted literal but found EOL");
260 status = U_PATTERN_SYNTAX_ERROR;
261 return;
262 } else if (state.peek() == u'\'') {
263 state.next(); // consume the starting quote
264 while (state.peek() != u'\'') {
265 if (state.peek() == -1) {
266 state.toParseException(u"Expected quoted literal but found EOL");
267 status = U_PATTERN_SYNTAX_ERROR;
268 return;
269 } else {
270 state.next(); // consume a quoted character
271 }
272 }
273 state.next(); // consume the ending quote
274 } else {
275 // consume a non-quoted literal character
276 state.next();
277 }
278 }
279
consumeFormat(UErrorCode & status)280 void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
281 consumeIntegerFormat(status);
282 if (U_FAILURE(status)) { return; }
283 if (state.peek() == u'.') {
284 state.next(); // consume the decimal point
285 currentSubpattern->hasDecimal = true;
286 currentSubpattern->widthExceptAffixes += 1;
287 consumeFractionFormat(status);
288 if (U_FAILURE(status)) { return; }
289 }
290 }
291
consumeIntegerFormat(UErrorCode & status)292 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
293 // Convenience reference:
294 ParsedSubpatternInfo& result = *currentSubpattern;
295
296 while (true) {
297 switch (state.peek()) {
298 case u',':
299 result.widthExceptAffixes += 1;
300 result.groupingSizes <<= 16;
301 break;
302
303 case u'#':
304 if (result.integerNumerals > 0) {
305 state.toParseException(u"# cannot follow 0 before decimal point");
306 status = U_UNEXPECTED_TOKEN;
307 return;
308 }
309 result.widthExceptAffixes += 1;
310 result.groupingSizes += 1;
311 if (result.integerAtSigns > 0) {
312 result.integerTrailingHashSigns += 1;
313 } else {
314 result.integerLeadingHashSigns += 1;
315 }
316 result.integerTotal += 1;
317 break;
318
319 case u'@':
320 if (result.integerNumerals > 0) {
321 state.toParseException(u"Cannot mix 0 and @");
322 status = U_UNEXPECTED_TOKEN;
323 return;
324 }
325 if (result.integerTrailingHashSigns > 0) {
326 state.toParseException(u"Cannot nest # inside of a run of @");
327 status = U_UNEXPECTED_TOKEN;
328 return;
329 }
330 result.widthExceptAffixes += 1;
331 result.groupingSizes += 1;
332 result.integerAtSigns += 1;
333 result.integerTotal += 1;
334 break;
335
336 case u'0':
337 case u'1':
338 case u'2':
339 case u'3':
340 case u'4':
341 case u'5':
342 case u'6':
343 case u'7':
344 case u'8':
345 case u'9':
346 if (result.integerAtSigns > 0) {
347 state.toParseException(u"Cannot mix @ and 0");
348 status = U_UNEXPECTED_TOKEN;
349 return;
350 }
351 result.widthExceptAffixes += 1;
352 result.groupingSizes += 1;
353 result.integerNumerals += 1;
354 result.integerTotal += 1;
355 if (!result.rounding.isZeroish() || state.peek() != u'0') {
356 result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
357 }
358 break;
359
360 default:
361 goto after_outer;
362 }
363 state.next(); // consume the symbol
364 }
365
366 after_outer:
367 // Disallow patterns with a trailing ',' or with two ',' next to each other
368 auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
369 auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
370 auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
371 if (grouping1 == 0 && grouping2 != -1) {
372 state.toParseException(u"Trailing grouping separator is invalid");
373 status = U_UNEXPECTED_TOKEN;
374 return;
375 }
376 if (grouping2 == 0 && grouping3 != -1) {
377 state.toParseException(u"Grouping width of zero is invalid");
378 status = U_PATTERN_SYNTAX_ERROR;
379 return;
380 }
381 }
382
consumeFractionFormat(UErrorCode & status)383 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
384 // Convenience reference:
385 ParsedSubpatternInfo& result = *currentSubpattern;
386
387 int32_t zeroCounter = 0;
388 while (true) {
389 switch (state.peek()) {
390 case u'#':
391 result.widthExceptAffixes += 1;
392 result.fractionHashSigns += 1;
393 result.fractionTotal += 1;
394 zeroCounter++;
395 break;
396
397 case u'0':
398 case u'1':
399 case u'2':
400 case u'3':
401 case u'4':
402 case u'5':
403 case u'6':
404 case u'7':
405 case u'8':
406 case u'9':
407 if (result.fractionHashSigns > 0) {
408 state.toParseException(u"0 cannot follow # after decimal point");
409 status = U_UNEXPECTED_TOKEN;
410 return;
411 }
412 result.widthExceptAffixes += 1;
413 result.fractionNumerals += 1;
414 result.fractionTotal += 1;
415 if (state.peek() == u'0') {
416 zeroCounter++;
417 } else {
418 result.rounding
419 .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
420 zeroCounter = 0;
421 }
422 break;
423
424 default:
425 return;
426 }
427 state.next(); // consume the symbol
428 }
429 }
430
consumeExponent(UErrorCode & status)431 void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
432 // Convenience reference:
433 ParsedSubpatternInfo& result = *currentSubpattern;
434
435 if (state.peek() != u'E') {
436 return;
437 }
438 if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
439 state.toParseException(u"Cannot have grouping separator in scientific notation");
440 status = U_MALFORMED_EXPONENTIAL_PATTERN;
441 return;
442 }
443 state.next(); // consume the E
444 result.widthExceptAffixes++;
445 if (state.peek() == u'+') {
446 state.next(); // consume the +
447 result.exponentHasPlusSign = true;
448 result.widthExceptAffixes++;
449 }
450 while (state.peek() == u'0') {
451 state.next(); // consume the 0
452 result.exponentZeros += 1;
453 result.widthExceptAffixes++;
454 }
455 }
456
457 ///////////////////////////////////////////////////
458 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
459 ///////////////////////////////////////////////////
460
parseToExistingPropertiesImpl(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)461 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
462 DecimalFormatProperties& properties,
463 IgnoreRounding ignoreRounding, UErrorCode& status) {
464 if (pattern.length() == 0) {
465 // Backwards compatibility requires that we reset to the default values.
466 // TODO: Only overwrite the properties that "saveToProperties" normally touches?
467 properties.clear();
468 return;
469 }
470
471 ParsedPatternInfo patternInfo;
472 parseToPatternInfo(pattern, patternInfo, status);
473 if (U_FAILURE(status)) { return; }
474 patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
475 }
476
477 void
patternInfoToProperties(DecimalFormatProperties & properties,ParsedPatternInfo & patternInfo,IgnoreRounding _ignoreRounding,UErrorCode & status)478 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
479 IgnoreRounding _ignoreRounding, UErrorCode& status) {
480 // Translate from PatternParseResult to Properties.
481 // Note that most data from "negative" is ignored per the specification of DecimalFormat.
482
483 const ParsedSubpatternInfo& positive = patternInfo.positive;
484
485 bool ignoreRounding;
486 if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
487 ignoreRounding = false;
488 } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
489 ignoreRounding = positive.hasCurrencySign;
490 } else {
491 U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
492 ignoreRounding = true;
493 }
494
495 // Grouping settings
496 auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
497 auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
498 auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
499 if (grouping2 != -1) {
500 properties.groupingSize = grouping1;
501 properties.groupingUsed = true;
502 } else {
503 properties.groupingSize = -1;
504 properties.groupingUsed = false;
505 }
506 if (grouping3 != -1) {
507 properties.secondaryGroupingSize = grouping2;
508 } else {
509 properties.secondaryGroupingSize = -1;
510 }
511
512 // For backwards compatibility, require that the pattern emit at least one min digit.
513 int minInt, minFrac;
514 if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
515 // patterns like ".##"
516 minInt = 0;
517 minFrac = uprv_max(1, positive.fractionNumerals);
518 } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
519 // patterns like "#.##"
520 minInt = 1;
521 minFrac = 0;
522 } else {
523 minInt = positive.integerNumerals;
524 minFrac = positive.fractionNumerals;
525 }
526
527 // Rounding settings
528 // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
529 if (positive.integerAtSigns > 0) {
530 properties.minimumFractionDigits = -1;
531 properties.maximumFractionDigits = -1;
532 properties.roundingIncrement = 0.0;
533 properties.minimumSignificantDigits = positive.integerAtSigns;
534 properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
535 } else if (!positive.rounding.isZeroish()) {
536 if (!ignoreRounding) {
537 properties.minimumFractionDigits = minFrac;
538 properties.maximumFractionDigits = positive.fractionTotal;
539 properties.roundingIncrement = positive.rounding.toDouble();
540 } else {
541 properties.minimumFractionDigits = -1;
542 properties.maximumFractionDigits = -1;
543 properties.roundingIncrement = 0.0;
544 }
545 properties.minimumSignificantDigits = -1;
546 properties.maximumSignificantDigits = -1;
547 } else {
548 if (!ignoreRounding) {
549 properties.minimumFractionDigits = minFrac;
550 properties.maximumFractionDigits = positive.fractionTotal;
551 properties.roundingIncrement = 0.0;
552 } else {
553 properties.minimumFractionDigits = -1;
554 properties.maximumFractionDigits = -1;
555 properties.roundingIncrement = 0.0;
556 }
557 properties.minimumSignificantDigits = -1;
558 properties.maximumSignificantDigits = -1;
559 }
560
561 // If the pattern ends with a '.' then force the decimal point.
562 if (positive.hasDecimal && positive.fractionTotal == 0) {
563 properties.decimalSeparatorAlwaysShown = true;
564 } else {
565 properties.decimalSeparatorAlwaysShown = false;
566 }
567
568 // Scientific notation settings
569 if (positive.exponentZeros > 0) {
570 properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
571 properties.minimumExponentDigits = positive.exponentZeros;
572 if (positive.integerAtSigns == 0) {
573 // patterns without '@' can define max integer digits, used for engineering notation
574 properties.minimumIntegerDigits = positive.integerNumerals;
575 properties.maximumIntegerDigits = positive.integerTotal;
576 } else {
577 // patterns with '@' cannot define max integer digits
578 properties.minimumIntegerDigits = 1;
579 properties.maximumIntegerDigits = -1;
580 }
581 } else {
582 properties.exponentSignAlwaysShown = false;
583 properties.minimumExponentDigits = -1;
584 properties.minimumIntegerDigits = minInt;
585 properties.maximumIntegerDigits = -1;
586 }
587
588 // Compute the affix patterns (required for both padding and affixes)
589 UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
590 UnicodeString posSuffix = patternInfo.getString(0);
591
592 // Padding settings
593 if (positive.hasPadding) {
594 // The width of the positive prefix and suffix templates are included in the padding
595 int paddingWidth = positive.widthExceptAffixes +
596 AffixUtils::estimateLength(posPrefix, status) +
597 AffixUtils::estimateLength(posSuffix, status);
598 properties.formatWidth = paddingWidth;
599 UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
600 if (rawPaddingString.length() == 1) {
601 properties.padString = rawPaddingString;
602 } else if (rawPaddingString.length() == 2) {
603 if (rawPaddingString.charAt(0) == u'\'') {
604 properties.padString.setTo(u"'", -1);
605 } else {
606 properties.padString = rawPaddingString;
607 }
608 } else {
609 properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
610 }
611 properties.padPosition = positive.paddingLocation;
612 } else {
613 properties.formatWidth = -1;
614 properties.padString.setToBogus();
615 properties.padPosition.nullify();
616 }
617
618 // Set the affixes
619 // Always call the setter, even if the prefixes are empty, especially in the case of the
620 // negative prefix pattern, to prevent default values from overriding the pattern.
621 properties.positivePrefixPattern = posPrefix;
622 properties.positiveSuffixPattern = posSuffix;
623 if (patternInfo.fHasNegativeSubpattern) {
624 properties.negativePrefixPattern = patternInfo.getString(
625 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
626 properties.negativeSuffixPattern = patternInfo.getString(
627 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
628 } else {
629 properties.negativePrefixPattern.setToBogus();
630 properties.negativeSuffixPattern.setToBogus();
631 }
632
633 // Set the magnitude multiplier
634 if (positive.hasPercentSign) {
635 properties.magnitudeMultiplier = 2;
636 } else if (positive.hasPerMilleSign) {
637 properties.magnitudeMultiplier = 3;
638 } else {
639 properties.magnitudeMultiplier = 0;
640 }
641 }
642
643 ///////////////////////////////////////////////////////////////////
644 /// End PatternStringParser.java; begin PatternStringUtils.java ///
645 ///////////////////////////////////////////////////////////////////
646
647 // Determine whether a given roundingIncrement should be ignored for formatting
648 // based on the current maxFrac value (maximum fraction digits). For example a
649 // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
650 // is 2 or more. Note that roundingIncrements are rounded in significance, so
651 // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
652 // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
653 // 0.005 is treated like 0.001 for significance). This is the reason for the
654 // initial doubling below.
655 // roundIncr must be non-zero.
ignoreRoundingIncrement(double roundIncr,int32_t maxFrac)656 bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) {
657 if (maxFrac < 0) {
658 return false;
659 }
660 int32_t frac = 0;
661 roundIncr *= 2.0;
662 for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0);
663 return (frac > maxFrac);
664 }
665
propertiesToPatternString(const DecimalFormatProperties & properties,UErrorCode & status)666 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
667 UErrorCode& status) {
668 UnicodeString sb;
669
670 // Convenience references
671 // The uprv_min() calls prevent DoS
672 int32_t dosMax = 100;
673 int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax));
674 int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax));
675 bool useGrouping = properties.groupingUsed;
676 int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax);
677 NullableValue<PadPosition> paddingLocation = properties.padPosition;
678 UnicodeString paddingString = properties.padString;
679 int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax));
680 int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
681 int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax));
682 int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
683 int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
684 int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
685 bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
686 int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
687 bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
688
689 AutoAffixPatternProvider affixProvider(properties, status);
690
691 // Prefixes
692 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX));
693 int32_t afterPrefixPos = sb.length();
694
695 // Figure out the grouping sizes.
696 if (!useGrouping) {
697 grouping1 = 0;
698 grouping2 = 0;
699 } else if (grouping1 == grouping2) {
700 grouping1 = 0;
701 }
702 int32_t groupingLength = grouping1 + grouping2 + 1;
703
704 // Figure out the digits we need to put in the pattern.
705 double roundingInterval = properties.roundingIncrement;
706 UnicodeString digitsString;
707 int32_t digitsStringScale = 0;
708 if (maxSig != uprv_min(dosMax, -1)) {
709 // Significant Digits.
710 while (digitsString.length() < minSig) {
711 digitsString.append(u'@');
712 }
713 while (digitsString.length() < maxSig) {
714 digitsString.append(u'#');
715 }
716 } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) {
717 // Rounding Interval.
718 digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr);
719 // TODO: Check for DoS here?
720 DecimalQuantity incrementQuantity;
721 incrementQuantity.setToDouble(roundingInterval);
722 incrementQuantity.adjustMagnitude(-digitsStringScale);
723 incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
724 UnicodeString str = incrementQuantity.toPlainString();
725 if (str.charAt(0) == u'-') {
726 // TODO: Unsupported operation exception or fail silently?
727 digitsString.append(str, 1, str.length() - 1);
728 } else {
729 digitsString.append(str);
730 }
731 }
732 while (digitsString.length() + digitsStringScale < minInt) {
733 digitsString.insert(0, u'0');
734 }
735 while (-digitsStringScale < minFrac) {
736 digitsString.append(u'0');
737 digitsStringScale--;
738 }
739
740 // Write the digits to the string builder
741 int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
742 m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
743 int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
744 for (int32_t magnitude = m0; magnitude >= mN; magnitude--) {
745 int32_t di = digitsString.length() + digitsStringScale - magnitude - 1;
746 if (di < 0 || di >= digitsString.length()) {
747 sb.append(u'#');
748 } else {
749 sb.append(digitsString.charAt(di));
750 }
751 // Decimal separator
752 if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
753 sb.append(u'.');
754 }
755 if (!useGrouping) {
756 continue;
757 }
758 // Least-significant grouping separator
759 if (magnitude > 0 && magnitude == grouping1) {
760 sb.append(u',');
761 }
762 // All other grouping separators
763 if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) {
764 sb.append(u',');
765 }
766 }
767
768 // Exponential notation
769 if (exponentDigits != uprv_min(dosMax, -1)) {
770 sb.append(u'E');
771 if (exponentShowPlusSign) {
772 sb.append(u'+');
773 }
774 for (int32_t i = 0; i < exponentDigits; i++) {
775 sb.append(u'0');
776 }
777 }
778
779 // Suffixes
780 int32_t beforeSuffixPos = sb.length();
781 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
782
783 // Resolve Padding
784 if (paddingWidth > 0 && !paddingLocation.isNull()) {
785 while (paddingWidth - sb.length() > 0) {
786 sb.insert(afterPrefixPos, u'#');
787 beforeSuffixPos++;
788 }
789 int32_t addedLength;
790 switch (paddingLocation.get(status)) {
791 case PadPosition::UNUM_PAD_BEFORE_PREFIX:
792 addedLength = escapePaddingString(paddingString, sb, 0, status);
793 sb.insert(0, u'*');
794 afterPrefixPos += addedLength + 1;
795 beforeSuffixPos += addedLength + 1;
796 break;
797 case PadPosition::UNUM_PAD_AFTER_PREFIX:
798 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
799 sb.insert(afterPrefixPos, u'*');
800 afterPrefixPos += addedLength + 1;
801 beforeSuffixPos += addedLength + 1;
802 break;
803 case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
804 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
805 sb.insert(beforeSuffixPos, u'*');
806 break;
807 case PadPosition::UNUM_PAD_AFTER_SUFFIX:
808 sb.append(u'*');
809 escapePaddingString(paddingString, sb, sb.length(), status);
810 break;
811 }
812 if (U_FAILURE(status)) { return sb; }
813 }
814
815 // Negative affixes
816 // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
817 if (affixProvider.get().hasNegativeSubpattern()) {
818 sb.append(u';');
819 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
820 // Copy the positive digit format into the negative.
821 // This is optional; the pattern is the same as if '#' were appended here instead.
822 // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
823 // See http://bugs.icu-project.org/trac/ticket/13707
824 UnicodeString copy(sb);
825 sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
826 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
827 }
828
829 return sb;
830 }
831
escapePaddingString(UnicodeString input,UnicodeString & output,int startIndex,UErrorCode & status)832 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
833 UErrorCode& status) {
834 (void) status;
835 if (input.length() == 0) {
836 input.setTo(kFallbackPaddingString, -1);
837 }
838 int startLength = output.length();
839 if (input.length() == 1) {
840 if (input.compare(u"'", -1) == 0) {
841 output.insert(startIndex, u"''", -1);
842 } else {
843 output.insert(startIndex, input);
844 }
845 } else {
846 output.insert(startIndex, u'\'');
847 int offset = 1;
848 for (int i = 0; i < input.length(); i++) {
849 // it's okay to deal in chars here because the quote mark is the only interesting thing.
850 char16_t ch = input.charAt(i);
851 if (ch == u'\'') {
852 output.insert(startIndex + offset, u"''", -1);
853 offset += 2;
854 } else {
855 output.insert(startIndex + offset, ch);
856 offset += 1;
857 }
858 }
859 output.insert(startIndex + offset, u'\'');
860 }
861 return output.length() - startLength;
862 }
863
864 UnicodeString
convertLocalized(const UnicodeString & input,const DecimalFormatSymbols & symbols,bool toLocalized,UErrorCode & status)865 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
866 bool toLocalized, UErrorCode& status) {
867 // Construct a table of strings to be converted between localized and standard.
868 static constexpr int32_t LEN = 21;
869 UnicodeString table[LEN][2];
870 int standIdx = toLocalized ? 0 : 1;
871 int localIdx = toLocalized ? 1 : 0;
872 table[0][standIdx] = u"%";
873 table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
874 table[1][standIdx] = u"‰";
875 table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
876 table[2][standIdx] = u".";
877 table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
878 table[3][standIdx] = u",";
879 table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
880 table[4][standIdx] = u"-";
881 table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
882 table[5][standIdx] = u"+";
883 table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
884 table[6][standIdx] = u";";
885 table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
886 table[7][standIdx] = u"@";
887 table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
888 table[8][standIdx] = u"E";
889 table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
890 table[9][standIdx] = u"*";
891 table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
892 table[10][standIdx] = u"#";
893 table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
894 for (int i = 0; i < 10; i++) {
895 table[11 + i][standIdx] = u'0' + i;
896 table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
897 }
898
899 // Special case: quotes are NOT allowed to be in any localIdx strings.
900 // Substitute them with '’' instead.
901 for (int32_t i = 0; i < LEN; i++) {
902 table[i][localIdx].findAndReplace(u'\'', u'’');
903 }
904
905 // Iterate through the string and convert.
906 // State table:
907 // 0 => base state
908 // 1 => first char inside a quoted sequence in input and output string
909 // 2 => inside a quoted sequence in input and output string
910 // 3 => first char after a close quote in input string;
911 // close quote still needs to be written to output string
912 // 4 => base state in input string; inside quoted sequence in output string
913 // 5 => first char inside a quoted sequence in input string;
914 // inside quoted sequence in output string
915 UnicodeString result;
916 int state = 0;
917 for (int offset = 0; offset < input.length(); offset++) {
918 UChar ch = input.charAt(offset);
919
920 // Handle a quote character (state shift)
921 if (ch == u'\'') {
922 if (state == 0) {
923 result.append(u'\'');
924 state = 1;
925 continue;
926 } else if (state == 1) {
927 result.append(u'\'');
928 state = 0;
929 continue;
930 } else if (state == 2) {
931 state = 3;
932 continue;
933 } else if (state == 3) {
934 result.append(u'\'');
935 result.append(u'\'');
936 state = 1;
937 continue;
938 } else if (state == 4) {
939 state = 5;
940 continue;
941 } else {
942 U_ASSERT(state == 5);
943 result.append(u'\'');
944 result.append(u'\'');
945 state = 4;
946 continue;
947 }
948 }
949
950 if (state == 0 || state == 3 || state == 4) {
951 for (auto& pair : table) {
952 // Perform a greedy match on this symbol string
953 UnicodeString temp = input.tempSubString(offset, pair[0].length());
954 if (temp == pair[0]) {
955 // Skip ahead past this region for the next iteration
956 offset += pair[0].length() - 1;
957 if (state == 3 || state == 4) {
958 result.append(u'\'');
959 state = 0;
960 }
961 result.append(pair[1]);
962 goto continue_outer;
963 }
964 }
965 // No replacement found. Check if a special quote is necessary
966 for (auto& pair : table) {
967 UnicodeString temp = input.tempSubString(offset, pair[1].length());
968 if (temp == pair[1]) {
969 if (state == 0) {
970 result.append(u'\'');
971 state = 4;
972 }
973 result.append(ch);
974 goto continue_outer;
975 }
976 }
977 // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
978 if (state == 3 || state == 4) {
979 result.append(u'\'');
980 state = 0;
981 }
982 result.append(ch);
983 } else {
984 U_ASSERT(state == 1 || state == 2 || state == 5);
985 result.append(ch);
986 state = 2;
987 }
988 continue_outer:;
989 }
990 // Resolve final quotes
991 if (state == 3 || state == 4) {
992 result.append(u'\'');
993 state = 0;
994 }
995 if (state != 0) {
996 // Malformed localized pattern: unterminated quote
997 status = U_PATTERN_SYNTAX_ERROR;
998 }
999 return result;
1000 }
1001
patternInfoToStringBuilder(const AffixPatternProvider & patternInfo,bool isPrefix,PatternSignType patternSignType,StandardPlural::Form plural,bool perMilleReplacesPercent,UnicodeString & output)1002 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1003 PatternSignType patternSignType,
1004 StandardPlural::Form plural,
1005 bool perMilleReplacesPercent, UnicodeString& output) {
1006
1007 // Should the output render '+' where '-' would normally appear in the pattern?
1008 bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
1009 && !patternInfo.positiveHasPlusSign();
1010
1011 // Should we use the affix from the negative subpattern?
1012 // (If not, we will use the positive subpattern.)
1013 bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
1014 && (patternSignType == PATTERN_SIGN_TYPE_NEG
1015 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1016
1017 // Resolve the flags for the affix pattern.
1018 int flags = 0;
1019 if (useNegativeAffixPattern) {
1020 flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1021 }
1022 if (isPrefix) {
1023 flags |= AffixPatternProvider::AFFIX_PREFIX;
1024 }
1025 if (plural != StandardPlural::Form::COUNT) {
1026 U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1027 flags |= plural;
1028 }
1029
1030 // Should we prepend a sign to the pattern?
1031 bool prependSign;
1032 if (!isPrefix || useNegativeAffixPattern) {
1033 prependSign = false;
1034 } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) {
1035 prependSign = true;
1036 } else {
1037 prependSign = plusReplacesMinusSign;
1038 }
1039
1040 // Compute the length of the affix pattern.
1041 int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1042
1043 // Finally, set the result into the StringBuilder.
1044 output.remove();
1045 for (int index = 0; index < length; index++) {
1046 char16_t candidate;
1047 if (prependSign && index == 0) {
1048 candidate = u'-';
1049 } else if (prependSign) {
1050 candidate = patternInfo.charAt(flags, index - 1);
1051 } else {
1052 candidate = patternInfo.charAt(flags, index);
1053 }
1054 if (plusReplacesMinusSign && candidate == u'-') {
1055 candidate = u'+';
1056 }
1057 if (perMilleReplacesPercent && candidate == u'%') {
1058 candidate = u'‰';
1059 }
1060 output.append(candidate);
1061 }
1062 }
1063
resolveSignDisplay(UNumberSignDisplay signDisplay,Signum signum)1064 PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) {
1065 switch (signDisplay) {
1066 case UNUM_SIGN_AUTO:
1067 case UNUM_SIGN_ACCOUNTING:
1068 switch (signum) {
1069 case SIGNUM_NEG:
1070 case SIGNUM_NEG_ZERO:
1071 return PATTERN_SIGN_TYPE_NEG;
1072 case SIGNUM_POS_ZERO:
1073 case SIGNUM_POS:
1074 return PATTERN_SIGN_TYPE_POS;
1075 default:
1076 break;
1077 }
1078 break;
1079
1080 case UNUM_SIGN_ALWAYS:
1081 case UNUM_SIGN_ACCOUNTING_ALWAYS:
1082 switch (signum) {
1083 case SIGNUM_NEG:
1084 case SIGNUM_NEG_ZERO:
1085 return PATTERN_SIGN_TYPE_NEG;
1086 case SIGNUM_POS_ZERO:
1087 case SIGNUM_POS:
1088 return PATTERN_SIGN_TYPE_POS_SIGN;
1089 default:
1090 break;
1091 }
1092 break;
1093
1094 case UNUM_SIGN_EXCEPT_ZERO:
1095 case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
1096 switch (signum) {
1097 case SIGNUM_NEG:
1098 return PATTERN_SIGN_TYPE_NEG;
1099 case SIGNUM_NEG_ZERO:
1100 case SIGNUM_POS_ZERO:
1101 return PATTERN_SIGN_TYPE_POS;
1102 case SIGNUM_POS:
1103 return PATTERN_SIGN_TYPE_POS_SIGN;
1104 default:
1105 break;
1106 }
1107 break;
1108
1109 case UNUM_SIGN_NEGATIVE:
1110 case UNUM_SIGN_ACCOUNTING_NEGATIVE:
1111 switch (signum) {
1112 case SIGNUM_NEG:
1113 return PATTERN_SIGN_TYPE_NEG;
1114 case SIGNUM_NEG_ZERO:
1115 case SIGNUM_POS_ZERO:
1116 case SIGNUM_POS:
1117 return PATTERN_SIGN_TYPE_POS;
1118 default:
1119 break;
1120 }
1121 break;
1122
1123 case UNUM_SIGN_NEVER:
1124 return PATTERN_SIGN_TYPE_POS;
1125
1126 default:
1127 break;
1128 }
1129
1130 UPRV_UNREACHABLE;
1131 return PATTERN_SIGN_TYPE_POS;
1132 }
1133
1134 #endif /* #if !UCONFIG_NO_FORMATTING */
1135