1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 #define UNISTR_FROM_CHAR_EXPLICIT
12
13 #include "uassert.h"
14 #include "number_patternstring.h"
15 #include "unicode/utf16.h"
16 #include "number_utils.h"
17 #include "number_roundingutils.h"
18
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22
23
parseToPatternInfo(const UnicodeString & patternString,ParsedPatternInfo & patternInfo,UErrorCode & status)24 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
25 UErrorCode& status) {
26 patternInfo.consumePattern(patternString, status);
27 }
28
29 DecimalFormatProperties
parseToProperties(const UnicodeString & pattern,IgnoreRounding ignoreRounding,UErrorCode & status)30 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
31 UErrorCode& status) {
32 DecimalFormatProperties properties;
33 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
34 return properties;
35 }
36
parseToProperties(const UnicodeString & pattern,UErrorCode & status)37 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
38 UErrorCode& status) {
39 return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
40 }
41
42 void
parseToExistingProperties(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)43 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
44 IgnoreRounding ignoreRounding, UErrorCode& status) {
45 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
46 }
47
48
charAt(int32_t flags,int32_t index) const49 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
50 const Endpoints& endpoints = getEndpoints(flags);
51 if (index < 0 || index >= endpoints.end - endpoints.start) {
52 U_ASSERT(false);
53 }
54 return pattern.charAt(endpoints.start + index);
55 }
56
length(int32_t flags) const57 int32_t ParsedPatternInfo::length(int32_t flags) const {
58 return getLengthFromEndpoints(getEndpoints(flags));
59 }
60
getLengthFromEndpoints(const Endpoints & endpoints)61 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
62 return endpoints.end - endpoints.start;
63 }
64
getString(int32_t flags) const65 UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
66 const Endpoints& endpoints = getEndpoints(flags);
67 if (endpoints.start == endpoints.end) {
68 return UnicodeString();
69 }
70 // Create a new UnicodeString
71 return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
72 }
73
getEndpoints(int32_t flags) const74 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
75 bool prefix = (flags & AFFIX_PREFIX) != 0;
76 bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
77 bool padding = (flags & AFFIX_PADDING) != 0;
78 if (isNegative && padding) {
79 return negative.paddingEndpoints;
80 } else if (padding) {
81 return positive.paddingEndpoints;
82 } else if (prefix && isNegative) {
83 return negative.prefixEndpoints;
84 } else if (prefix) {
85 return positive.prefixEndpoints;
86 } else if (isNegative) {
87 return negative.suffixEndpoints;
88 } else {
89 return positive.suffixEndpoints;
90 }
91 }
92
positiveHasPlusSign() const93 bool ParsedPatternInfo::positiveHasPlusSign() const {
94 return positive.hasPlusSign;
95 }
96
hasNegativeSubpattern() const97 bool ParsedPatternInfo::hasNegativeSubpattern() const {
98 return fHasNegativeSubpattern;
99 }
100
negativeHasMinusSign() const101 bool ParsedPatternInfo::negativeHasMinusSign() const {
102 return negative.hasMinusSign;
103 }
104
hasCurrencySign() const105 bool ParsedPatternInfo::hasCurrencySign() const {
106 return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
107 }
108
containsSymbolType(AffixPatternType type,UErrorCode & status) const109 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
110 return AffixUtils::containsType(pattern, type, status);
111 }
112
hasBody() const113 bool ParsedPatternInfo::hasBody() const {
114 return positive.integerTotal > 0;
115 }
116
117 /////////////////////////////////////////////////////
118 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
119 /////////////////////////////////////////////////////
120
peek()121 UChar32 ParsedPatternInfo::ParserState::peek() {
122 if (offset == pattern.length()) {
123 return -1;
124 } else {
125 return pattern.char32At(offset);
126 }
127 }
128
next()129 UChar32 ParsedPatternInfo::ParserState::next() {
130 int codePoint = peek();
131 offset += U16_LENGTH(codePoint);
132 return codePoint;
133 }
134
consumePattern(const UnicodeString & patternString,UErrorCode & status)135 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
136 if (U_FAILURE(status)) { return; }
137 this->pattern = patternString;
138
139 // This class is not intended for writing twice!
140 // Use move assignment to overwrite instead.
141 U_ASSERT(state.offset == 0);
142
143 // pattern := subpattern (';' subpattern)?
144 currentSubpattern = &positive;
145 consumeSubpattern(status);
146 if (U_FAILURE(status)) { return; }
147 if (state.peek() == u';') {
148 state.next(); // consume the ';'
149 // Don't consume the negative subpattern if it is empty (trailing ';')
150 if (state.peek() != -1) {
151 fHasNegativeSubpattern = true;
152 currentSubpattern = &negative;
153 consumeSubpattern(status);
154 if (U_FAILURE(status)) { return; }
155 }
156 }
157 if (state.peek() != -1) {
158 state.toParseException(u"Found unquoted special character");
159 status = U_UNQUOTED_SPECIAL;
160 }
161 }
162
consumeSubpattern(UErrorCode & status)163 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
164 // subpattern := literals? number exponent? literals?
165 consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
166 if (U_FAILURE(status)) { return; }
167 consumeAffix(currentSubpattern->prefixEndpoints, status);
168 if (U_FAILURE(status)) { return; }
169 consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
170 if (U_FAILURE(status)) { return; }
171 consumeFormat(status);
172 if (U_FAILURE(status)) { return; }
173 consumeExponent(status);
174 if (U_FAILURE(status)) { return; }
175 consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
176 if (U_FAILURE(status)) { return; }
177 consumeAffix(currentSubpattern->suffixEndpoints, status);
178 if (U_FAILURE(status)) { return; }
179 consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
180 if (U_FAILURE(status)) { return; }
181 }
182
consumePadding(PadPosition paddingLocation,UErrorCode & status)183 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
184 if (state.peek() != u'*') {
185 return;
186 }
187 if (currentSubpattern->hasPadding) {
188 state.toParseException(u"Cannot have multiple pad specifiers");
189 status = U_MULTIPLE_PAD_SPECIFIERS;
190 return;
191 }
192 currentSubpattern->paddingLocation = paddingLocation;
193 currentSubpattern->hasPadding = true;
194 state.next(); // consume the '*'
195 currentSubpattern->paddingEndpoints.start = state.offset;
196 consumeLiteral(status);
197 currentSubpattern->paddingEndpoints.end = state.offset;
198 }
199
consumeAffix(Endpoints & endpoints,UErrorCode & status)200 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
201 // literals := { literal }
202 endpoints.start = state.offset;
203 while (true) {
204 switch (state.peek()) {
205 case u'#':
206 case u'@':
207 case u';':
208 case u'*':
209 case u'.':
210 case u',':
211 case u'0':
212 case u'1':
213 case u'2':
214 case u'3':
215 case u'4':
216 case u'5':
217 case u'6':
218 case u'7':
219 case u'8':
220 case u'9':
221 case -1:
222 // Characters that cannot appear unquoted in a literal
223 // break outer;
224 goto after_outer;
225
226 case u'%':
227 currentSubpattern->hasPercentSign = true;
228 break;
229
230 case u'‰':
231 currentSubpattern->hasPerMilleSign = true;
232 break;
233
234 case u'¤':
235 currentSubpattern->hasCurrencySign = true;
236 break;
237
238 case u'-':
239 currentSubpattern->hasMinusSign = true;
240 break;
241
242 case u'+':
243 currentSubpattern->hasPlusSign = true;
244 break;
245
246 default:
247 break;
248 }
249 consumeLiteral(status);
250 if (U_FAILURE(status)) { return; }
251 }
252 after_outer:
253 endpoints.end = state.offset;
254 }
255
consumeLiteral(UErrorCode & status)256 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
257 if (state.peek() == -1) {
258 state.toParseException(u"Expected unquoted literal but found EOL");
259 status = U_PATTERN_SYNTAX_ERROR;
260 return;
261 } else if (state.peek() == u'\'') {
262 state.next(); // consume the starting quote
263 while (state.peek() != u'\'') {
264 if (state.peek() == -1) {
265 state.toParseException(u"Expected quoted literal but found EOL");
266 status = U_PATTERN_SYNTAX_ERROR;
267 return;
268 } else {
269 state.next(); // consume a quoted character
270 }
271 }
272 state.next(); // consume the ending quote
273 } else {
274 // consume a non-quoted literal character
275 state.next();
276 }
277 }
278
consumeFormat(UErrorCode & status)279 void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
280 consumeIntegerFormat(status);
281 if (U_FAILURE(status)) { return; }
282 if (state.peek() == u'.') {
283 state.next(); // consume the decimal point
284 currentSubpattern->hasDecimal = true;
285 currentSubpattern->widthExceptAffixes += 1;
286 consumeFractionFormat(status);
287 if (U_FAILURE(status)) { return; }
288 }
289 }
290
consumeIntegerFormat(UErrorCode & status)291 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
292 // Convenience reference:
293 ParsedSubpatternInfo& result = *currentSubpattern;
294
295 while (true) {
296 switch (state.peek()) {
297 case u',':
298 result.widthExceptAffixes += 1;
299 result.groupingSizes <<= 16;
300 break;
301
302 case u'#':
303 if (result.integerNumerals > 0) {
304 state.toParseException(u"# cannot follow 0 before decimal point");
305 status = U_UNEXPECTED_TOKEN;
306 return;
307 }
308 result.widthExceptAffixes += 1;
309 result.groupingSizes += 1;
310 if (result.integerAtSigns > 0) {
311 result.integerTrailingHashSigns += 1;
312 } else {
313 result.integerLeadingHashSigns += 1;
314 }
315 result.integerTotal += 1;
316 break;
317
318 case u'@':
319 if (result.integerNumerals > 0) {
320 state.toParseException(u"Cannot mix 0 and @");
321 status = U_UNEXPECTED_TOKEN;
322 return;
323 }
324 if (result.integerTrailingHashSigns > 0) {
325 state.toParseException(u"Cannot nest # inside of a run of @");
326 status = U_UNEXPECTED_TOKEN;
327 return;
328 }
329 result.widthExceptAffixes += 1;
330 result.groupingSizes += 1;
331 result.integerAtSigns += 1;
332 result.integerTotal += 1;
333 break;
334
335 case u'0':
336 case u'1':
337 case u'2':
338 case u'3':
339 case u'4':
340 case u'5':
341 case u'6':
342 case u'7':
343 case u'8':
344 case u'9':
345 if (result.integerAtSigns > 0) {
346 state.toParseException(u"Cannot mix @ and 0");
347 status = U_UNEXPECTED_TOKEN;
348 return;
349 }
350 result.widthExceptAffixes += 1;
351 result.groupingSizes += 1;
352 result.integerNumerals += 1;
353 result.integerTotal += 1;
354 if (!result.rounding.isZero() || state.peek() != u'0') {
355 result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
356 }
357 break;
358
359 default:
360 goto after_outer;
361 }
362 state.next(); // consume the symbol
363 }
364
365 after_outer:
366 // Disallow patterns with a trailing ',' or with two ',' next to each other
367 auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
368 auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
369 auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
370 if (grouping1 == 0 && grouping2 != -1) {
371 state.toParseException(u"Trailing grouping separator is invalid");
372 status = U_UNEXPECTED_TOKEN;
373 return;
374 }
375 if (grouping2 == 0 && grouping3 != -1) {
376 state.toParseException(u"Grouping width of zero is invalid");
377 status = U_PATTERN_SYNTAX_ERROR;
378 return;
379 }
380 }
381
consumeFractionFormat(UErrorCode & status)382 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
383 // Convenience reference:
384 ParsedSubpatternInfo& result = *currentSubpattern;
385
386 int32_t zeroCounter = 0;
387 while (true) {
388 switch (state.peek()) {
389 case u'#':
390 result.widthExceptAffixes += 1;
391 result.fractionHashSigns += 1;
392 result.fractionTotal += 1;
393 zeroCounter++;
394 break;
395
396 case u'0':
397 case u'1':
398 case u'2':
399 case u'3':
400 case u'4':
401 case u'5':
402 case u'6':
403 case u'7':
404 case u'8':
405 case u'9':
406 if (result.fractionHashSigns > 0) {
407 state.toParseException(u"0 cannot follow # after decimal point");
408 status = U_UNEXPECTED_TOKEN;
409 return;
410 }
411 result.widthExceptAffixes += 1;
412 result.fractionNumerals += 1;
413 result.fractionTotal += 1;
414 if (state.peek() == u'0') {
415 zeroCounter++;
416 } else {
417 result.rounding
418 .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
419 zeroCounter = 0;
420 }
421 break;
422
423 default:
424 return;
425 }
426 state.next(); // consume the symbol
427 }
428 }
429
consumeExponent(UErrorCode & status)430 void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
431 // Convenience reference:
432 ParsedSubpatternInfo& result = *currentSubpattern;
433
434 if (state.peek() != u'E') {
435 return;
436 }
437 if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
438 state.toParseException(u"Cannot have grouping separator in scientific notation");
439 status = U_MALFORMED_EXPONENTIAL_PATTERN;
440 return;
441 }
442 state.next(); // consume the E
443 result.widthExceptAffixes++;
444 if (state.peek() == u'+') {
445 state.next(); // consume the +
446 result.exponentHasPlusSign = true;
447 result.widthExceptAffixes++;
448 }
449 while (state.peek() == u'0') {
450 state.next(); // consume the 0
451 result.exponentZeros += 1;
452 result.widthExceptAffixes++;
453 }
454 }
455
456 ///////////////////////////////////////////////////
457 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
458 ///////////////////////////////////////////////////
459
parseToExistingPropertiesImpl(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)460 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
461 DecimalFormatProperties& properties,
462 IgnoreRounding ignoreRounding, UErrorCode& status) {
463 if (pattern.length() == 0) {
464 // Backwards compatibility requires that we reset to the default values.
465 // TODO: Only overwrite the properties that "saveToProperties" normally touches?
466 properties.clear();
467 return;
468 }
469
470 ParsedPatternInfo patternInfo;
471 parseToPatternInfo(pattern, patternInfo, status);
472 if (U_FAILURE(status)) { return; }
473 patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
474 }
475
476 void
patternInfoToProperties(DecimalFormatProperties & properties,ParsedPatternInfo & patternInfo,IgnoreRounding _ignoreRounding,UErrorCode & status)477 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
478 IgnoreRounding _ignoreRounding, UErrorCode& status) {
479 // Translate from PatternParseResult to Properties.
480 // Note that most data from "negative" is ignored per the specification of DecimalFormat.
481
482 const ParsedSubpatternInfo& positive = patternInfo.positive;
483
484 bool ignoreRounding;
485 if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
486 ignoreRounding = false;
487 } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
488 ignoreRounding = positive.hasCurrencySign;
489 } else {
490 U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
491 ignoreRounding = true;
492 }
493
494 // Grouping settings
495 auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
496 auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
497 auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
498 if (grouping2 != -1) {
499 properties.groupingSize = grouping1;
500 properties.groupingUsed = true;
501 } else {
502 properties.groupingSize = -1;
503 properties.groupingUsed = false;
504 }
505 if (grouping3 != -1) {
506 properties.secondaryGroupingSize = grouping2;
507 } else {
508 properties.secondaryGroupingSize = -1;
509 }
510
511 // For backwards compatibility, require that the pattern emit at least one min digit.
512 int minInt, minFrac;
513 if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
514 // patterns like ".##"
515 minInt = 0;
516 minFrac = uprv_max(1, positive.fractionNumerals);
517 } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
518 // patterns like "#.##"
519 minInt = 1;
520 minFrac = 0;
521 } else {
522 minInt = positive.integerNumerals;
523 minFrac = positive.fractionNumerals;
524 }
525
526 // Rounding settings
527 // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
528 if (positive.integerAtSigns > 0) {
529 properties.minimumFractionDigits = -1;
530 properties.maximumFractionDigits = -1;
531 properties.roundingIncrement = 0.0;
532 properties.minimumSignificantDigits = positive.integerAtSigns;
533 properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
534 } else if (!positive.rounding.isZero()) {
535 if (!ignoreRounding) {
536 properties.minimumFractionDigits = minFrac;
537 properties.maximumFractionDigits = positive.fractionTotal;
538 properties.roundingIncrement = positive.rounding.toDouble();
539 } else {
540 properties.minimumFractionDigits = -1;
541 properties.maximumFractionDigits = -1;
542 properties.roundingIncrement = 0.0;
543 }
544 properties.minimumSignificantDigits = -1;
545 properties.maximumSignificantDigits = -1;
546 } else {
547 if (!ignoreRounding) {
548 properties.minimumFractionDigits = minFrac;
549 properties.maximumFractionDigits = positive.fractionTotal;
550 properties.roundingIncrement = 0.0;
551 } else {
552 properties.minimumFractionDigits = -1;
553 properties.maximumFractionDigits = -1;
554 properties.roundingIncrement = 0.0;
555 }
556 properties.minimumSignificantDigits = -1;
557 properties.maximumSignificantDigits = -1;
558 }
559
560 // If the pattern ends with a '.' then force the decimal point.
561 if (positive.hasDecimal && positive.fractionTotal == 0) {
562 properties.decimalSeparatorAlwaysShown = true;
563 } else {
564 properties.decimalSeparatorAlwaysShown = false;
565 }
566
567 // Scientific notation settings
568 if (positive.exponentZeros > 0) {
569 properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
570 properties.minimumExponentDigits = positive.exponentZeros;
571 if (positive.integerAtSigns == 0) {
572 // patterns without '@' can define max integer digits, used for engineering notation
573 properties.minimumIntegerDigits = positive.integerNumerals;
574 properties.maximumIntegerDigits = positive.integerTotal;
575 } else {
576 // patterns with '@' cannot define max integer digits
577 properties.minimumIntegerDigits = 1;
578 properties.maximumIntegerDigits = -1;
579 }
580 } else {
581 properties.exponentSignAlwaysShown = false;
582 properties.minimumExponentDigits = -1;
583 properties.minimumIntegerDigits = minInt;
584 properties.maximumIntegerDigits = -1;
585 }
586
587 // Compute the affix patterns (required for both padding and affixes)
588 UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
589 UnicodeString posSuffix = patternInfo.getString(0);
590
591 // Padding settings
592 if (positive.hasPadding) {
593 // The width of the positive prefix and suffix templates are included in the padding
594 int paddingWidth = positive.widthExceptAffixes +
595 AffixUtils::estimateLength(posPrefix, status) +
596 AffixUtils::estimateLength(posSuffix, status);
597 properties.formatWidth = paddingWidth;
598 UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
599 if (rawPaddingString.length() == 1) {
600 properties.padString = rawPaddingString;
601 } else if (rawPaddingString.length() == 2) {
602 if (rawPaddingString.charAt(0) == u'\'') {
603 properties.padString.setTo(u"'", -1);
604 } else {
605 properties.padString = rawPaddingString;
606 }
607 } else {
608 properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
609 }
610 properties.padPosition = positive.paddingLocation;
611 } else {
612 properties.formatWidth = -1;
613 properties.padString.setToBogus();
614 properties.padPosition.nullify();
615 }
616
617 // Set the affixes
618 // Always call the setter, even if the prefixes are empty, especially in the case of the
619 // negative prefix pattern, to prevent default values from overriding the pattern.
620 properties.positivePrefixPattern = posPrefix;
621 properties.positiveSuffixPattern = posSuffix;
622 if (patternInfo.fHasNegativeSubpattern) {
623 properties.negativePrefixPattern = patternInfo.getString(
624 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
625 properties.negativeSuffixPattern = patternInfo.getString(
626 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
627 } else {
628 properties.negativePrefixPattern.setToBogus();
629 properties.negativeSuffixPattern.setToBogus();
630 }
631
632 // Set the magnitude multiplier
633 if (positive.hasPercentSign) {
634 properties.magnitudeMultiplier = 2;
635 } else if (positive.hasPerMilleSign) {
636 properties.magnitudeMultiplier = 3;
637 } else {
638 properties.magnitudeMultiplier = 0;
639 }
640 }
641
642 ///////////////////////////////////////////////////////////////////
643 /// End PatternStringParser.java; begin PatternStringUtils.java ///
644 ///////////////////////////////////////////////////////////////////
645
propertiesToPatternString(const DecimalFormatProperties & properties,UErrorCode & status)646 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
647 UErrorCode& status) {
648 UnicodeString sb;
649
650 // Convenience references
651 // The uprv_min() calls prevent DoS
652 int dosMax = 100;
653 int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
654 int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
655 int paddingWidth = uprv_min(properties.formatWidth, dosMax);
656 NullableValue<PadPosition> paddingLocation = properties.padPosition;
657 UnicodeString paddingString = properties.padString;
658 int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
659 int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
660 int minFrac = uprv_max(uprv_min(properties.minimumFractionDigits, dosMax), 0);
661 int maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
662 int minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
663 int maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
664 bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
665 int exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
666 bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
667 UnicodeString pp = properties.positivePrefix;
668 UnicodeString ppp = properties.positivePrefixPattern;
669 UnicodeString ps = properties.positiveSuffix;
670 UnicodeString psp = properties.positiveSuffixPattern;
671 UnicodeString np = properties.negativePrefix;
672 UnicodeString npp = properties.negativePrefixPattern;
673 UnicodeString ns = properties.negativeSuffix;
674 UnicodeString nsp = properties.negativeSuffixPattern;
675
676 // Prefixes
677 if (!ppp.isBogus()) {
678 sb.append(ppp);
679 }
680 sb.append(AffixUtils::escape(pp));
681 int afterPrefixPos = sb.length();
682
683 // Figure out the grouping sizes.
684 int grouping1, grouping2, grouping;
685 if (groupingSize != uprv_min(dosMax, -1) && firstGroupingSize != uprv_min(dosMax, -1) &&
686 groupingSize != firstGroupingSize) {
687 grouping = groupingSize;
688 grouping1 = groupingSize;
689 grouping2 = firstGroupingSize;
690 } else if (groupingSize != uprv_min(dosMax, -1)) {
691 grouping = groupingSize;
692 grouping1 = 0;
693 grouping2 = groupingSize;
694 } else if (firstGroupingSize != uprv_min(dosMax, -1)) {
695 grouping = groupingSize;
696 grouping1 = 0;
697 grouping2 = firstGroupingSize;
698 } else {
699 grouping = 0;
700 grouping1 = 0;
701 grouping2 = 0;
702 }
703 int groupingLength = grouping1 + grouping2 + 1;
704
705 // Figure out the digits we need to put in the pattern.
706 double roundingInterval = properties.roundingIncrement;
707 UnicodeString digitsString;
708 int digitsStringScale = 0;
709 if (maxSig != uprv_min(dosMax, -1)) {
710 // Significant Digits.
711 while (digitsString.length() < minSig) {
712 digitsString.append(u'@');
713 }
714 while (digitsString.length() < maxSig) {
715 digitsString.append(u'#');
716 }
717 } else if (roundingInterval != 0.0) {
718 // Rounding Interval.
719 digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval);
720 // TODO: Check for DoS here?
721 DecimalQuantity incrementQuantity;
722 incrementQuantity.setToDouble(roundingInterval);
723 incrementQuantity.adjustMagnitude(-digitsStringScale);
724 incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
725 UnicodeString str = incrementQuantity.toPlainString();
726 if (str.charAt(0) == u'-') {
727 // TODO: Unsupported operation exception or fail silently?
728 digitsString.append(str, 1, str.length() - 1);
729 } else {
730 digitsString.append(str);
731 }
732 }
733 while (digitsString.length() + digitsStringScale < minInt) {
734 digitsString.insert(0, u'0');
735 }
736 while (-digitsStringScale < minFrac) {
737 digitsString.append(u'0');
738 digitsStringScale--;
739 }
740
741 // Write the digits to the string builder
742 int m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
743 m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
744 int mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
745 for (int magnitude = m0; magnitude >= mN; magnitude--) {
746 int di = digitsString.length() + digitsStringScale - magnitude - 1;
747 if (di < 0 || di >= digitsString.length()) {
748 sb.append(u'#');
749 } else {
750 sb.append(digitsString.charAt(di));
751 }
752 if (magnitude > grouping2 && grouping > 0 && (magnitude - grouping2) % grouping == 0) {
753 sb.append(u',');
754 } else if (magnitude > 0 && magnitude == grouping2) {
755 sb.append(u',');
756 } else if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
757 sb.append(u'.');
758 }
759 }
760
761 // Exponential notation
762 if (exponentDigits != uprv_min(dosMax, -1)) {
763 sb.append(u'E');
764 if (exponentShowPlusSign) {
765 sb.append(u'+');
766 }
767 for (int i = 0; i < exponentDigits; i++) {
768 sb.append(u'0');
769 }
770 }
771
772 // Suffixes
773 int beforeSuffixPos = sb.length();
774 if (!psp.isBogus()) {
775 sb.append(psp);
776 }
777 sb.append(AffixUtils::escape(ps));
778
779 // Resolve Padding
780 if (paddingWidth != -1 && !paddingLocation.isNull()) {
781 while (paddingWidth - sb.length() > 0) {
782 sb.insert(afterPrefixPos, u'#');
783 beforeSuffixPos++;
784 }
785 int addedLength;
786 switch (paddingLocation.get(status)) {
787 case PadPosition::UNUM_PAD_BEFORE_PREFIX:
788 addedLength = escapePaddingString(paddingString, sb, 0, status);
789 sb.insert(0, u'*');
790 afterPrefixPos += addedLength + 1;
791 beforeSuffixPos += addedLength + 1;
792 break;
793 case PadPosition::UNUM_PAD_AFTER_PREFIX:
794 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
795 sb.insert(afterPrefixPos, u'*');
796 afterPrefixPos += addedLength + 1;
797 beforeSuffixPos += addedLength + 1;
798 break;
799 case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
800 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
801 sb.insert(beforeSuffixPos, u'*');
802 break;
803 case PadPosition::UNUM_PAD_AFTER_SUFFIX:
804 sb.append(u'*');
805 escapePaddingString(paddingString, sb, sb.length(), status);
806 break;
807 }
808 if (U_FAILURE(status)) { return sb; }
809 }
810
811 // Negative affixes
812 // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
813 if (!np.isBogus() || !ns.isBogus() || (npp.isBogus() && !nsp.isBogus()) ||
814 (!npp.isBogus() && (npp.length() != 1 || npp.charAt(0) != u'-' || nsp.length() != 0))) {
815 sb.append(u';');
816 if (!npp.isBogus()) {
817 sb.append(npp);
818 }
819 sb.append(AffixUtils::escape(np));
820 // Copy the positive digit format into the negative.
821 // This is optional; the pattern is the same as if '#' were appended here instead.
822 // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
823 // See http://bugs.icu-project.org/trac/ticket/13707
824 UnicodeString copy(sb);
825 sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
826 if (!nsp.isBogus()) {
827 sb.append(nsp);
828 }
829 sb.append(AffixUtils::escape(ns));
830 }
831
832 return sb;
833 }
834
escapePaddingString(UnicodeString input,UnicodeString & output,int startIndex,UErrorCode & status)835 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
836 UErrorCode& status) {
837 (void) status;
838 if (input.length() == 0) {
839 input.setTo(kFallbackPaddingString, -1);
840 }
841 int startLength = output.length();
842 if (input.length() == 1) {
843 if (input.compare(u"'", -1) == 0) {
844 output.insert(startIndex, u"''", -1);
845 } else {
846 output.insert(startIndex, input);
847 }
848 } else {
849 output.insert(startIndex, u'\'');
850 int offset = 1;
851 for (int i = 0; i < input.length(); i++) {
852 // it's okay to deal in chars here because the quote mark is the only interesting thing.
853 char16_t ch = input.charAt(i);
854 if (ch == u'\'') {
855 output.insert(startIndex + offset, u"''", -1);
856 offset += 2;
857 } else {
858 output.insert(startIndex + offset, ch);
859 offset += 1;
860 }
861 }
862 output.insert(startIndex + offset, u'\'');
863 }
864 return output.length() - startLength;
865 }
866
867 UnicodeString
convertLocalized(const UnicodeString & input,const DecimalFormatSymbols & symbols,bool toLocalized,UErrorCode & status)868 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
869 bool toLocalized, UErrorCode& status) {
870 // Construct a table of strings to be converted between localized and standard.
871 static constexpr int32_t LEN = 21;
872 UnicodeString table[LEN][2];
873 int standIdx = toLocalized ? 0 : 1;
874 int localIdx = toLocalized ? 1 : 0;
875 table[0][standIdx] = u"%";
876 table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
877 table[1][standIdx] = u"‰";
878 table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
879 table[2][standIdx] = u".";
880 table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
881 table[3][standIdx] = u",";
882 table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
883 table[4][standIdx] = u"-";
884 table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
885 table[5][standIdx] = u"+";
886 table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
887 table[6][standIdx] = u";";
888 table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
889 table[7][standIdx] = u"@";
890 table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
891 table[8][standIdx] = u"E";
892 table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
893 table[9][standIdx] = u"*";
894 table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
895 table[10][standIdx] = u"#";
896 table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
897 for (int i = 0; i < 10; i++) {
898 table[11 + i][standIdx] = u'0' + i;
899 table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
900 }
901
902 // Special case: quotes are NOT allowed to be in any localIdx strings.
903 // Substitute them with '’' instead.
904 for (int32_t i = 0; i < LEN; i++) {
905 table[i][localIdx].findAndReplace(u'\'', u'’');
906 }
907
908 // Iterate through the string and convert.
909 // State table:
910 // 0 => base state
911 // 1 => first char inside a quoted sequence in input and output string
912 // 2 => inside a quoted sequence in input and output string
913 // 3 => first char after a close quote in input string;
914 // close quote still needs to be written to output string
915 // 4 => base state in input string; inside quoted sequence in output string
916 // 5 => first char inside a quoted sequence in input string;
917 // inside quoted sequence in output string
918 UnicodeString result;
919 int state = 0;
920 for (int offset = 0; offset < input.length(); offset++) {
921 UChar ch = input.charAt(offset);
922
923 // Handle a quote character (state shift)
924 if (ch == u'\'') {
925 if (state == 0) {
926 result.append(u'\'');
927 state = 1;
928 continue;
929 } else if (state == 1) {
930 result.append(u'\'');
931 state = 0;
932 continue;
933 } else if (state == 2) {
934 state = 3;
935 continue;
936 } else if (state == 3) {
937 result.append(u'\'');
938 result.append(u'\'');
939 state = 1;
940 continue;
941 } else if (state == 4) {
942 state = 5;
943 continue;
944 } else {
945 U_ASSERT(state == 5);
946 result.append(u'\'');
947 result.append(u'\'');
948 state = 4;
949 continue;
950 }
951 }
952
953 if (state == 0 || state == 3 || state == 4) {
954 for (auto& pair : table) {
955 // Perform a greedy match on this symbol string
956 UnicodeString temp = input.tempSubString(offset, pair[0].length());
957 if (temp == pair[0]) {
958 // Skip ahead past this region for the next iteration
959 offset += pair[0].length() - 1;
960 if (state == 3 || state == 4) {
961 result.append(u'\'');
962 state = 0;
963 }
964 result.append(pair[1]);
965 goto continue_outer;
966 }
967 }
968 // No replacement found. Check if a special quote is necessary
969 for (auto& pair : table) {
970 UnicodeString temp = input.tempSubString(offset, pair[1].length());
971 if (temp == pair[1]) {
972 if (state == 0) {
973 result.append(u'\'');
974 state = 4;
975 }
976 result.append(ch);
977 goto continue_outer;
978 }
979 }
980 // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
981 if (state == 3 || state == 4) {
982 result.append(u'\'');
983 state = 0;
984 }
985 result.append(ch);
986 } else {
987 U_ASSERT(state == 1 || state == 2 || state == 5);
988 result.append(ch);
989 state = 2;
990 }
991 continue_outer:;
992 }
993 // Resolve final quotes
994 if (state == 3 || state == 4) {
995 result.append(u'\'');
996 state = 0;
997 }
998 if (state != 0) {
999 // Malformed localized pattern: unterminated quote
1000 status = U_PATTERN_SYNTAX_ERROR;
1001 }
1002 return result;
1003 }
1004
patternInfoToStringBuilder(const AffixPatternProvider & patternInfo,bool isPrefix,int8_t signum,UNumberSignDisplay signDisplay,StandardPlural::Form plural,bool perMilleReplacesPercent,UnicodeString & output)1005 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1006 int8_t signum, UNumberSignDisplay signDisplay,
1007 StandardPlural::Form plural,
1008 bool perMilleReplacesPercent, UnicodeString& output) {
1009
1010 // Should the output render '+' where '-' would normally appear in the pattern?
1011 bool plusReplacesMinusSign = signum != -1 && (
1012 signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
1013 signum == 1 && (
1014 signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
1015 signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
1016 patternInfo.positiveHasPlusSign() == false;
1017
1018 // Should we use the affix from the negative subpattern? (If not, we will use the positive
1019 // subpattern.)
1020 bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
1021 signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1022
1023 // Resolve the flags for the affix pattern.
1024 int flags = 0;
1025 if (useNegativeAffixPattern) {
1026 flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1027 }
1028 if (isPrefix) {
1029 flags |= AffixPatternProvider::AFFIX_PREFIX;
1030 }
1031 if (plural != StandardPlural::Form::COUNT) {
1032 U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1033 flags |= plural;
1034 }
1035
1036 // Should we prepend a sign to the pattern?
1037 bool prependSign;
1038 if (!isPrefix || useNegativeAffixPattern) {
1039 prependSign = false;
1040 } else if (signum == -1) {
1041 prependSign = signDisplay != UNUM_SIGN_NEVER;
1042 } else {
1043 prependSign = plusReplacesMinusSign;
1044 }
1045
1046 // Compute the length of the affix pattern.
1047 int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1048
1049 // Finally, set the result into the StringBuilder.
1050 output.remove();
1051 for (int index = 0; index < length; index++) {
1052 char16_t candidate;
1053 if (prependSign && index == 0) {
1054 candidate = u'-';
1055 } else if (prependSign) {
1056 candidate = patternInfo.charAt(flags, index - 1);
1057 } else {
1058 candidate = patternInfo.charAt(flags, index);
1059 }
1060 if (plusReplacesMinusSign && candidate == u'-') {
1061 candidate = u'+';
1062 }
1063 if (perMilleReplacesPercent && candidate == u'%') {
1064 candidate = u'‰';
1065 }
1066 output.append(candidate);
1067 }
1068 }
1069
1070 #endif /* #if !UCONFIG_NO_FORMATTING */
1071