1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 *
7 * File CHOICFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation and got rid
14 * of nextDouble/previousDouble and replaced with
15 * boolean array.
16 * 4/10/97 aliu Clean up. Modified to work on AIX.
17 * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
18 * wchar.h.
19 * 07/09/97 helena Made ParsePosition into a class.
20 * 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
21 * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
22 * 02/22/99 stephen Removed character literals for EBCDIC safety
23 ********************************************************************************
24 */
25
26 #include "unicode/utypes.h"
27
28 #if !UCONFIG_NO_FORMATTING
29
30 #include "unicode/choicfmt.h"
31 #include "unicode/numfmt.h"
32 #include "unicode/locid.h"
33 #include "cpputils.h"
34 #include "cstring.h"
35 #include "messageimpl.h"
36 #include "putilimp.h"
37 #include "uassert.h"
38 #include <stdio.h>
39 #include <float.h>
40
41 // *****************************************************************************
42 // class ChoiceFormat
43 // *****************************************************************************
44
45 U_NAMESPACE_BEGIN
46
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
48
49 // Special characters used by ChoiceFormat. There are two characters
50 // used interchangeably to indicate <=. Either is parsed, but only
51 // LESS_EQUAL is generated by toPattern().
52 #define SINGLE_QUOTE ((UChar)0x0027) /*'*/
53 #define LESS_THAN ((UChar)0x003C) /*<*/
54 #define LESS_EQUAL ((UChar)0x0023) /*#*/
55 #define LESS_EQUAL2 ((UChar)0x2264)
56 #define VERTICAL_BAR ((UChar)0x007C) /*|*/
57 #define MINUS ((UChar)0x002D) /*-*/
58
59 static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
60 static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
61
62 #ifdef INFINITY
63 #undef INFINITY
64 #endif
65 #define INFINITY ((UChar)0x221E)
66
67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
69 #define POSITIVE_INF_STRLEN 1
70 #define NEGATIVE_INF_STRLEN 2
71
72 // -------------------------------------
73 // Creates a ChoiceFormat instance based on the pattern.
74
ChoiceFormat(const UnicodeString & newPattern,UErrorCode & status)75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
76 UErrorCode& status)
77 : constructorErrorCode(status),
78 msgPattern(status)
79 {
80 applyPattern(newPattern, status);
81 }
82
83 // -------------------------------------
84 // Creates a ChoiceFormat instance with the limit array and
85 // format strings for each limit.
86
ChoiceFormat(const double * limits,const UnicodeString * formats,int32_t cnt)87 ChoiceFormat::ChoiceFormat(const double* limits,
88 const UnicodeString* formats,
89 int32_t cnt )
90 : constructorErrorCode(U_ZERO_ERROR),
91 msgPattern(constructorErrorCode)
92 {
93 setChoices(limits, NULL, formats, cnt, constructorErrorCode);
94 }
95
96 // -------------------------------------
97
ChoiceFormat(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)98 ChoiceFormat::ChoiceFormat(const double* limits,
99 const UBool* closures,
100 const UnicodeString* formats,
101 int32_t cnt )
102 : constructorErrorCode(U_ZERO_ERROR),
103 msgPattern(constructorErrorCode)
104 {
105 setChoices(limits, closures, formats, cnt, constructorErrorCode);
106 }
107
108 // -------------------------------------
109 // copy constructor
110
ChoiceFormat(const ChoiceFormat & that)111 ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
112 : NumberFormat(that),
113 constructorErrorCode(that.constructorErrorCode),
114 msgPattern(that.msgPattern)
115 {
116 }
117
118 // -------------------------------------
119 // Private constructor that creates a
120 // ChoiceFormat instance based on the
121 // pattern and populates UParseError
122
ChoiceFormat(const UnicodeString & newPattern,UParseError & parseError,UErrorCode & status)123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
124 UParseError& parseError,
125 UErrorCode& status)
126 : constructorErrorCode(status),
127 msgPattern(status)
128 {
129 applyPattern(newPattern,parseError, status);
130 }
131 // -------------------------------------
132
133 UBool
operator ==(const Format & that) const134 ChoiceFormat::operator==(const Format& that) const
135 {
136 if (this == &that) return TRUE;
137 if (!NumberFormat::operator==(that)) return FALSE;
138 ChoiceFormat& thatAlias = (ChoiceFormat&)that;
139 return msgPattern == thatAlias.msgPattern;
140 }
141
142 // -------------------------------------
143 // copy constructor
144
145 const ChoiceFormat&
operator =(const ChoiceFormat & that)146 ChoiceFormat::operator=(const ChoiceFormat& that)
147 {
148 if (this != &that) {
149 NumberFormat::operator=(that);
150 constructorErrorCode = that.constructorErrorCode;
151 msgPattern = that.msgPattern;
152 }
153 return *this;
154 }
155
156 // -------------------------------------
157
~ChoiceFormat()158 ChoiceFormat::~ChoiceFormat()
159 {
160 }
161
162 // -------------------------------------
163
164 /**
165 * Convert a double value to a string without the overhead of NumberFormat.
166 */
167 UnicodeString&
dtos(double value,UnicodeString & string)168 ChoiceFormat::dtos(double value,
169 UnicodeString& string)
170 {
171 /* Buffer to contain the digits and any extra formatting stuff. */
172 char temp[DBL_DIG + 16];
173 char *itrPtr = temp;
174 char *expPtr;
175
176 sprintf(temp, "%.*g", DBL_DIG, value);
177
178 /* Find and convert the decimal point.
179 Using setlocale on some machines will cause sprintf to use a comma for certain locales.
180 */
181 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
182 itrPtr++;
183 }
184 if (*itrPtr != 0 && *itrPtr != 'e') {
185 /* We reached something that looks like a decimal point.
186 In case someone used setlocale(), which changes the decimal point. */
187 *itrPtr = '.';
188 itrPtr++;
189 }
190 /* Search for the exponent */
191 while (*itrPtr && *itrPtr != 'e') {
192 itrPtr++;
193 }
194 if (*itrPtr == 'e') {
195 itrPtr++;
196 /* Verify the exponent sign */
197 if (*itrPtr == '+' || *itrPtr == '-') {
198 itrPtr++;
199 }
200 /* Remove leading zeros. You will see this on Windows machines. */
201 expPtr = itrPtr;
202 while (*itrPtr == '0') {
203 itrPtr++;
204 }
205 if (*itrPtr && expPtr != itrPtr) {
206 /* Shift the exponent without zeros. */
207 while (*itrPtr) {
208 *(expPtr++) = *(itrPtr++);
209 }
210 // NULL terminate
211 *expPtr = 0;
212 }
213 }
214
215 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
216 return string;
217 }
218
219 // -------------------------------------
220 // calls the overloaded applyPattern method.
221
222 void
applyPattern(const UnicodeString & pattern,UErrorCode & status)223 ChoiceFormat::applyPattern(const UnicodeString& pattern,
224 UErrorCode& status)
225 {
226 msgPattern.parseChoiceStyle(pattern, NULL, status);
227 constructorErrorCode = status;
228 }
229
230 // -------------------------------------
231 // Applies the pattern to this ChoiceFormat instance.
232
233 void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & status)234 ChoiceFormat::applyPattern(const UnicodeString& pattern,
235 UParseError& parseError,
236 UErrorCode& status)
237 {
238 msgPattern.parseChoiceStyle(pattern, &parseError, status);
239 constructorErrorCode = status;
240 }
241 // -------------------------------------
242 // Returns the input pattern string.
243
244 UnicodeString&
toPattern(UnicodeString & result) const245 ChoiceFormat::toPattern(UnicodeString& result) const
246 {
247 return result = msgPattern.getPatternString();
248 }
249
250 // -------------------------------------
251 // Sets the limit and format arrays.
252 void
setChoices(const double * limits,const UnicodeString * formats,int32_t cnt)253 ChoiceFormat::setChoices( const double* limits,
254 const UnicodeString* formats,
255 int32_t cnt )
256 {
257 UErrorCode errorCode = U_ZERO_ERROR;
258 setChoices(limits, NULL, formats, cnt, errorCode);
259 }
260
261 // -------------------------------------
262 // Sets the limit and format arrays.
263 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)264 ChoiceFormat::setChoices( const double* limits,
265 const UBool* closures,
266 const UnicodeString* formats,
267 int32_t cnt )
268 {
269 UErrorCode errorCode = U_ZERO_ERROR;
270 setChoices(limits, closures, formats, cnt, errorCode);
271 }
272
273 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t count,UErrorCode & errorCode)274 ChoiceFormat::setChoices(const double* limits,
275 const UBool* closures,
276 const UnicodeString* formats,
277 int32_t count,
278 UErrorCode &errorCode) {
279 if (U_FAILURE(errorCode)) {
280 return;
281 }
282 if (limits == NULL || formats == NULL) {
283 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284 return;
285 }
286 // Reconstruct the original input pattern.
287 // Modified version of the pre-ICU 4.8 toPattern() implementation.
288 UnicodeString result;
289 for (int32_t i = 0; i < count; ++i) {
290 if (i != 0) {
291 result += VERTICAL_BAR;
292 }
293 UnicodeString buf;
294 if (uprv_isPositiveInfinity(limits[i])) {
295 result += INFINITY;
296 } else if (uprv_isNegativeInfinity(limits[i])) {
297 result += MINUS;
298 result += INFINITY;
299 } else {
300 result += dtos(limits[i], buf);
301 }
302 if (closures != NULL && closures[i]) {
303 result += LESS_THAN;
304 } else {
305 result += LESS_EQUAL;
306 }
307 // Append formats[i], using quotes if there are special
308 // characters. Single quotes themselves must be escaped in
309 // either case.
310 const UnicodeString& text = formats[i];
311 int32_t textLength = text.length();
312 int32_t nestingLevel = 0;
313 for (int32_t j = 0; j < textLength; ++j) {
314 UChar c = text[j];
315 if (c == SINGLE_QUOTE && nestingLevel == 0) {
316 // Double each top-level apostrophe.
317 result.append(c);
318 } else if (c == VERTICAL_BAR && nestingLevel == 0) {
319 // Surround each pipe symbol with apostrophes for quoting.
320 // If the next character is an apostrophe, then that will be doubled,
321 // and although the parser will see the apostrophe pairs beginning
322 // and ending one character earlier than our doubling, the result
323 // is as desired.
324 // | -> '|'
325 // |' -> '|'''
326 // |'' -> '|''''' etc.
327 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
328 continue; // Skip the append(c) at the end of the loop body.
329 } else if (c == LEFT_CURLY_BRACE) {
330 ++nestingLevel;
331 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
332 --nestingLevel;
333 }
334 result.append(c);
335 }
336 }
337 // Apply the reconstructed pattern.
338 applyPattern(result, errorCode);
339 }
340
341 // -------------------------------------
342 // Gets the limit array.
343
344 const double*
getLimits(int32_t & cnt) const345 ChoiceFormat::getLimits(int32_t& cnt) const
346 {
347 cnt = 0;
348 return NULL;
349 }
350
351 // -------------------------------------
352 // Gets the closures array.
353
354 const UBool*
getClosures(int32_t & cnt) const355 ChoiceFormat::getClosures(int32_t& cnt) const
356 {
357 cnt = 0;
358 return NULL;
359 }
360
361 // -------------------------------------
362 // Gets the format array.
363
364 const UnicodeString*
getFormats(int32_t & cnt) const365 ChoiceFormat::getFormats(int32_t& cnt) const
366 {
367 cnt = 0;
368 return NULL;
369 }
370
371 // -------------------------------------
372 // Formats an int64 number, it's actually formatted as
373 // a double. The returned format string may differ
374 // from the input number because of this.
375
376 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & status) const377 ChoiceFormat::format(int64_t number,
378 UnicodeString& appendTo,
379 FieldPosition& status) const
380 {
381 return format((double) number, appendTo, status);
382 }
383
384 // -------------------------------------
385 // Formats an int32_t number, it's actually formatted as
386 // a double.
387
388 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & status) const389 ChoiceFormat::format(int32_t number,
390 UnicodeString& appendTo,
391 FieldPosition& status) const
392 {
393 return format((double) number, appendTo, status);
394 }
395
396 // -------------------------------------
397 // Formats a double number.
398
399 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition &) const400 ChoiceFormat::format(double number,
401 UnicodeString& appendTo,
402 FieldPosition& /*pos*/) const
403 {
404 if (msgPattern.countParts() == 0) {
405 // No pattern was applied, or it failed.
406 return appendTo;
407 }
408 // Get the appropriate sub-message.
409 int32_t msgStart = findSubMessage(msgPattern, 0, number);
410 if (!MessageImpl::jdkAposMode(msgPattern)) {
411 int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
412 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
413 appendTo.append(msgPattern.getPatternString(),
414 patternStart,
415 msgPattern.getPatternIndex(msgLimit) - patternStart);
416 return appendTo;
417 }
418 // JDK compatibility mode: Remove SKIP_SYNTAX.
419 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
420 }
421
422 int32_t
findSubMessage(const MessagePattern & pattern,int32_t partIndex,double number)423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
424 int32_t count = pattern.countParts();
425 int32_t msgStart;
426 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
427 // until ARG_LIMIT or end of choice-only pattern.
428 // Ignore the first number and selector and start the loop on the first message.
429 partIndex += 2;
430 for (;;) {
431 // Skip but remember the current sub-message.
432 msgStart = partIndex;
433 partIndex = pattern.getLimitPartIndex(partIndex);
434 if (++partIndex >= count) {
435 // Reached the end of the choice-only pattern.
436 // Return with the last sub-message.
437 break;
438 }
439 const MessagePattern::Part &part = pattern.getPart(partIndex++);
440 UMessagePatternPartType type = part.getType();
441 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
442 // Reached the end of the ChoiceFormat style.
443 // Return with the last sub-message.
444 break;
445 }
446 // part is an ARG_INT or ARG_DOUBLE
447 U_ASSERT(MessagePattern::Part::hasNumericValue(type));
448 double boundary = pattern.getNumericValue(part);
449 // Fetch the ARG_SELECTOR character.
450 int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
451 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
452 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
453 // The number is in the interval between the previous boundary and the current one.
454 // Return with the sub-message between them.
455 // The !(a>b) and !(a>=b) comparisons are equivalent to
456 // (a<=b) and (a<b) except they "catch" NaN.
457 break;
458 }
459 }
460 return msgStart;
461 }
462
463 // -------------------------------------
464 // Formats an array of objects. Checks if the data type of the objects
465 // to get the right value for formatting.
466
467 UnicodeString&
format(const Formattable * objs,int32_t cnt,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const468 ChoiceFormat::format(const Formattable* objs,
469 int32_t cnt,
470 UnicodeString& appendTo,
471 FieldPosition& pos,
472 UErrorCode& status) const
473 {
474 if(cnt < 0) {
475 status = U_ILLEGAL_ARGUMENT_ERROR;
476 return appendTo;
477 }
478 if (msgPattern.countParts() == 0) {
479 status = U_INVALID_STATE_ERROR;
480 return appendTo;
481 }
482
483 for (int32_t i = 0; i < cnt; i++) {
484 double objDouble = objs[i].getDouble(status);
485 if (U_SUCCESS(status)) {
486 format(objDouble, appendTo, pos);
487 }
488 }
489
490 return appendTo;
491 }
492
493 // -------------------------------------
494 // Formats an array of objects. Checks if the data type of the objects
495 // to get the right value for formatting.
496
497 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const498 ChoiceFormat::format(const Formattable& obj,
499 UnicodeString& appendTo,
500 FieldPosition& pos,
501 UErrorCode& status) const
502 {
503 return NumberFormat::format(obj, appendTo, pos, status);
504 }
505 // -------------------------------------
506
507 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & pos) const508 ChoiceFormat::parse(const UnicodeString& text,
509 Formattable& result,
510 ParsePosition& pos) const
511 {
512 result.setDouble(parseArgument(msgPattern, 0, text, pos));
513 }
514
515 double
parseArgument(const MessagePattern & pattern,int32_t partIndex,const UnicodeString & source,ParsePosition & pos)516 ChoiceFormat::parseArgument(
517 const MessagePattern &pattern, int32_t partIndex,
518 const UnicodeString &source, ParsePosition &pos) {
519 // find the best number (defined as the one with the longest parse)
520 int32_t start = pos.getIndex();
521 int32_t furthest = start;
522 double bestNumber = uprv_getNaN();
523 double tempNumber = 0.0;
524 int32_t count = pattern.countParts();
525 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
526 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
527 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
528 int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
529 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
530 if (len >= 0) {
531 int32_t newIndex = start + len;
532 if (newIndex > furthest) {
533 furthest = newIndex;
534 bestNumber = tempNumber;
535 if (furthest == source.length()) {
536 break;
537 }
538 }
539 }
540 partIndex = msgLimit + 1;
541 }
542 if (furthest == start) {
543 pos.setErrorIndex(start);
544 } else {
545 pos.setIndex(furthest);
546 }
547 return bestNumber;
548 }
549
550 int32_t
matchStringUntilLimitPart(const MessagePattern & pattern,int32_t partIndex,int32_t limitPartIndex,const UnicodeString & source,int32_t sourceOffset)551 ChoiceFormat::matchStringUntilLimitPart(
552 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
553 const UnicodeString &source, int32_t sourceOffset) {
554 int32_t matchingSourceLength = 0;
555 const UnicodeString &msgString = pattern.getPatternString();
556 int32_t prevIndex = pattern.getPart(partIndex).getLimit();
557 for (;;) {
558 const MessagePattern::Part &part = pattern.getPart(++partIndex);
559 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
560 int32_t index = part.getIndex();
561 int32_t length = index - prevIndex;
562 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
563 return -1; // mismatch
564 }
565 matchingSourceLength += length;
566 if (partIndex == limitPartIndex) {
567 return matchingSourceLength;
568 }
569 prevIndex = part.getLimit(); // SKIP_SYNTAX
570 }
571 }
572 }
573
574 // -------------------------------------
575 // Parses the text and return the Formattable object.
576
577 void
parse(const UnicodeString & text,Formattable & result,UErrorCode & status) const578 ChoiceFormat::parse(const UnicodeString& text,
579 Formattable& result,
580 UErrorCode& status) const
581 {
582 NumberFormat::parse(text, result, status);
583 }
584
585 // -------------------------------------
586
587 Format*
clone() const588 ChoiceFormat::clone() const
589 {
590 ChoiceFormat *aCopy = new ChoiceFormat(*this);
591 return aCopy;
592 }
593
594 U_NAMESPACE_END
595
596 #endif /* #if !UCONFIG_NO_FORMATTING */
597
598 //eof
599