1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 *
7 * File MSGFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 * 11/01/09 kirtig Added SelectFormat
20 ********************************************************************/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_FORMATTING
25
26 #include "unicode/msgfmt.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/smpdtfmt.h"
30 #include "unicode/choicfmt.h"
31 #include "unicode/plurfmt.h"
32 #include "unicode/selfmt.h"
33 #include "unicode/ustring.h"
34 #include "unicode/ucnv_err.h"
35 #include "unicode/uchar.h"
36 #include "unicode/umsg.h"
37 #include "unicode/rbnf.h"
38 #include "cmemory.h"
39 #include "msgfmt_impl.h"
40 #include "util.h"
41 #include "uassert.h"
42 #include "ustrfmt.h"
43 #include "uvector.h"
44
45 // *****************************************************************************
46 // class MessageFormat
47 // *****************************************************************************
48
49 #define COMMA ((UChar)0x002C)
50 #define SINGLE_QUOTE ((UChar)0x0027)
51 #define LEFT_CURLY_BRACE ((UChar)0x007B)
52 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
53
54 //---------------------------------------
55 // static data
56
57 static const UChar ID_EMPTY[] = {
58 0 /* empty string, used for default so that null can mark end of list */
59 };
60
61 static const UChar ID_NUMBER[] = {
62 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
63 };
64 static const UChar ID_DATE[] = {
65 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
66 };
67 static const UChar ID_TIME[] = {
68 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
69 };
70 static const UChar ID_CHOICE[] = {
71 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */
72 };
73 static const UChar ID_SPELLOUT[] = {
74 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
75 };
76 static const UChar ID_ORDINAL[] = {
77 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
78 };
79 static const UChar ID_DURATION[] = {
80 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
81 };
82 static const UChar ID_PLURAL[] = {
83 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */
84 };
85 static const UChar ID_SELECT[] = {
86 0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0 /* "select" */
87 };
88
89 // MessageFormat Type List Number, Date, Time or Choice
90 static const UChar * const TYPE_IDS[] = {
91 ID_EMPTY,
92 ID_NUMBER,
93 ID_DATE,
94 ID_TIME,
95 ID_CHOICE,
96 ID_SPELLOUT,
97 ID_ORDINAL,
98 ID_DURATION,
99 ID_PLURAL,
100 ID_SELECT,
101 NULL,
102 };
103
104 static const UChar ID_CURRENCY[] = {
105 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
106 };
107 static const UChar ID_PERCENT[] = {
108 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
109 };
110 static const UChar ID_INTEGER[] = {
111 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
112 };
113
114 // NumberFormat modifier list, default, currency, percent or integer
115 static const UChar * const NUMBER_STYLE_IDS[] = {
116 ID_EMPTY,
117 ID_CURRENCY,
118 ID_PERCENT,
119 ID_INTEGER,
120 NULL,
121 };
122
123 static const UChar ID_SHORT[] = {
124 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
125 };
126 static const UChar ID_MEDIUM[] = {
127 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
128 };
129 static const UChar ID_LONG[] = {
130 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
131 };
132 static const UChar ID_FULL[] = {
133 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
134 };
135
136 // DateFormat modifier list, default, short, medium, long or full
137 static const UChar * const DATE_STYLE_IDS[] = {
138 ID_EMPTY,
139 ID_SHORT,
140 ID_MEDIUM,
141 ID_LONG,
142 ID_FULL,
143 NULL,
144 };
145
146 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
147 U_NAMESPACE_QUALIFIER DateFormat::kDefault,
148 U_NAMESPACE_QUALIFIER DateFormat::kShort,
149 U_NAMESPACE_QUALIFIER DateFormat::kMedium,
150 U_NAMESPACE_QUALIFIER DateFormat::kLong,
151 U_NAMESPACE_QUALIFIER DateFormat::kFull,
152 };
153
154 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
155
156 U_NAMESPACE_BEGIN
157
158 // -------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)159 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
160 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
161
162 //--------------------------------------------------------------------
163
164 /**
165 * Convert a string to an unsigned decimal, ignoring rule whitespace.
166 * @return a non-negative number if successful, or a negative number
167 * upon failure.
168 */
169 static int32_t stou(const UnicodeString& string) {
170 int32_t n = 0;
171 int32_t count = 0;
172 UChar32 c;
173 for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
174 c = string.char32At(i);
175 if (uprv_isRuleWhiteSpace(c)) {
176 continue;
177 }
178 int32_t d = u_digit(c, 10);
179 if (d < 0 || ++count > 10) {
180 return -1;
181 }
182 n = 10*n + d;
183 }
184 return n;
185 }
186
187 /**
188 * Convert an integer value to a string and append the result to
189 * the given UnicodeString.
190 */
itos(int32_t i,UnicodeString & appendTo)191 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
192 UChar temp[16];
193 uprv_itou(temp,16,i,10,0); // 10 == radix
194 appendTo.append(temp);
195 return appendTo;
196 }
197
198 /*
199 * A structure representing one subformat of this MessageFormat.
200 * Each subformat has a Format object, an offset into the plain
201 * pattern text fPattern, and an argument number. The argument
202 * number corresponds to the array of arguments to be formatted.
203 * @internal
204 */
205 class MessageFormat::Subformat : public UMemory {
206 public:
207 /**
208 * @internal
209 */
210 Format* format; // formatter
211 /**
212 * @internal
213 */
214 int32_t offset; // offset into fPattern
215 /**
216 * @internal
217 */
218 // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
219 int32_t argNum; // 0-based argument number
220 /**
221 * @internal
222 */
223 UnicodeString* argName; // argument name or number
224
225 /**
226 * Clone that.format and assign it to this.format
227 * Do NOT delete this.format
228 * @internal
229 */
operator =(const Subformat & that)230 Subformat& operator=(const Subformat& that) {
231 if (this != &that) {
232 format = that.format ? that.format->clone() : NULL;
233 offset = that.offset;
234 argNum = that.argNum;
235 argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
236 }
237 return *this;
238 }
239
240 /**
241 * @internal
242 */
operator ==(const Subformat & that) const243 UBool operator==(const Subformat& that) const {
244 // Do cheap comparisons first
245 return offset == that.offset &&
246 argNum == that.argNum &&
247 ((argName == that.argName) ||
248 (*argName == *that.argName)) &&
249 ((format == that.format) || // handles NULL
250 (*format == *that.format));
251 }
252
253 /**
254 * @internal
255 */
operator !=(const Subformat & that) const256 UBool operator!=(const Subformat& that) const {
257 return !operator==(that);
258 }
259 };
260
261 // -------------------------------------
262 // Creates a MessageFormat instance based on the pattern.
263
MessageFormat(const UnicodeString & pattern,UErrorCode & success)264 MessageFormat::MessageFormat(const UnicodeString& pattern,
265 UErrorCode& success)
266 : fLocale(Locale::getDefault()), // Uses the default locale
267 formatAliases(NULL),
268 formatAliasesCapacity(0),
269 idStart(UCHAR_ID_START),
270 idContinue(UCHAR_ID_CONTINUE),
271 subformats(NULL),
272 subformatCount(0),
273 subformatCapacity(0),
274 argTypes(NULL),
275 argTypeCount(0),
276 argTypeCapacity(0),
277 isArgNumeric(TRUE),
278 defaultNumberFormat(NULL),
279 defaultDateFormat(NULL)
280 {
281 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
282 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
283 success = U_MEMORY_ALLOCATION_ERROR;
284 return;
285 }
286 applyPattern(pattern, success);
287 setLocaleIDs(fLocale.getName(), fLocale.getName());
288 }
289
MessageFormat(const UnicodeString & pattern,const Locale & newLocale,UErrorCode & success)290 MessageFormat::MessageFormat(const UnicodeString& pattern,
291 const Locale& newLocale,
292 UErrorCode& success)
293 : fLocale(newLocale),
294 formatAliases(NULL),
295 formatAliasesCapacity(0),
296 idStart(UCHAR_ID_START),
297 idContinue(UCHAR_ID_CONTINUE),
298 subformats(NULL),
299 subformatCount(0),
300 subformatCapacity(0),
301 argTypes(NULL),
302 argTypeCount(0),
303 argTypeCapacity(0),
304 isArgNumeric(TRUE),
305 defaultNumberFormat(NULL),
306 defaultDateFormat(NULL)
307 {
308 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
309 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
310 success = U_MEMORY_ALLOCATION_ERROR;
311 return;
312 }
313 applyPattern(pattern, success);
314 setLocaleIDs(fLocale.getName(), fLocale.getName());
315 }
316
MessageFormat(const UnicodeString & pattern,const Locale & newLocale,UParseError & parseError,UErrorCode & success)317 MessageFormat::MessageFormat(const UnicodeString& pattern,
318 const Locale& newLocale,
319 UParseError& parseError,
320 UErrorCode& success)
321 : fLocale(newLocale),
322 formatAliases(NULL),
323 formatAliasesCapacity(0),
324 idStart(UCHAR_ID_START),
325 idContinue(UCHAR_ID_CONTINUE),
326 subformats(NULL),
327 subformatCount(0),
328 subformatCapacity(0),
329 argTypes(NULL),
330 argTypeCount(0),
331 argTypeCapacity(0),
332 isArgNumeric(TRUE),
333 defaultNumberFormat(NULL),
334 defaultDateFormat(NULL)
335 {
336 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
337 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
338 success = U_MEMORY_ALLOCATION_ERROR;
339 return;
340 }
341 applyPattern(pattern, parseError, success);
342 setLocaleIDs(fLocale.getName(), fLocale.getName());
343 }
344
MessageFormat(const MessageFormat & that)345 MessageFormat::MessageFormat(const MessageFormat& that)
346 : Format(that),
347 formatAliases(NULL),
348 formatAliasesCapacity(0),
349 idStart(UCHAR_ID_START),
350 idContinue(UCHAR_ID_CONTINUE),
351 subformats(NULL),
352 subformatCount(0),
353 subformatCapacity(0),
354 argTypes(NULL),
355 argTypeCount(0),
356 argTypeCapacity(0),
357 isArgNumeric(TRUE),
358 defaultNumberFormat(NULL),
359 defaultDateFormat(NULL)
360 {
361 *this = that;
362 }
363
~MessageFormat()364 MessageFormat::~MessageFormat()
365 {
366 int32_t idx;
367 for (idx = 0; idx < subformatCount; idx++) {
368 delete subformats[idx].format;
369 delete subformats[idx].argName;
370 }
371 uprv_free(subformats);
372 subformats = NULL;
373 subformatCount = subformatCapacity = 0;
374
375 uprv_free(argTypes);
376 argTypes = NULL;
377 argTypeCount = argTypeCapacity = 0;
378
379 uprv_free(formatAliases);
380
381 delete defaultNumberFormat;
382 delete defaultDateFormat;
383 }
384
385 //--------------------------------------------------------------------
386 // Variable-size array management
387
388 /**
389 * Allocate subformats[] to at least the given capacity and return
390 * TRUE if successful. If not, leave subformats[] unchanged.
391 *
392 * If subformats is NULL, allocate it. If it is not NULL, enlarge it
393 * if necessary to be at least as large as specified.
394 */
allocateSubformats(int32_t capacity)395 UBool MessageFormat::allocateSubformats(int32_t capacity) {
396 if (subformats == NULL) {
397 subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
398 subformatCapacity = capacity;
399 subformatCount = 0;
400 if (subformats == NULL) {
401 subformatCapacity = 0;
402 return FALSE;
403 }
404 } else if (subformatCapacity < capacity) {
405 if (capacity < 2*subformatCapacity) {
406 capacity = 2*subformatCapacity;
407 }
408 Subformat* a = (Subformat*)
409 uprv_realloc(subformats, sizeof(*subformats) * capacity);
410 if (a == NULL) {
411 return FALSE; // request failed
412 }
413 subformats = a;
414 subformatCapacity = capacity;
415 }
416 return TRUE;
417 }
418
419 /**
420 * Allocate argTypes[] to at least the given capacity and return
421 * TRUE if successful. If not, leave argTypes[] unchanged.
422 *
423 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
424 * if necessary to be at least as large as specified.
425 */
allocateArgTypes(int32_t capacity)426 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
427 if (argTypes == NULL) {
428 argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
429 argTypeCount = 0;
430 argTypeCapacity = capacity;
431 if (argTypes == NULL) {
432 argTypeCapacity = 0;
433 return FALSE;
434 }
435 for (int32_t i=0; i<capacity; ++i) {
436 argTypes[i] = Formattable::kString;
437 }
438 } else if (argTypeCapacity < capacity) {
439 if (capacity < 2*argTypeCapacity) {
440 capacity = 2*argTypeCapacity;
441 }
442 Formattable::Type* a = (Formattable::Type*)
443 uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
444 if (a == NULL) {
445 return FALSE; // request failed
446 }
447 for (int32_t i=argTypeCapacity; i<capacity; ++i) {
448 a[i] = Formattable::kString;
449 }
450 argTypes = a;
451 argTypeCapacity = capacity;
452 }
453 return TRUE;
454 }
455
456 // -------------------------------------
457 // assignment operator
458
459 const MessageFormat&
operator =(const MessageFormat & that)460 MessageFormat::operator=(const MessageFormat& that)
461 {
462 // Reallocate the arrays BEFORE changing this object
463 if (this != &that &&
464 allocateSubformats(that.subformatCount) &&
465 allocateArgTypes(that.argTypeCount)) {
466
467 // Calls the super class for assignment first.
468 Format::operator=(that);
469
470 fPattern = that.fPattern;
471 setLocale(that.fLocale);
472 isArgNumeric = that.isArgNumeric;
473 int32_t j;
474 for (j=0; j<subformatCount; ++j) {
475 delete subformats[j].format;
476 }
477 subformatCount = 0;
478
479 for (j=0; j<that.subformatCount; ++j) {
480 // Subformat::operator= does NOT delete this.format
481 subformats[j] = that.subformats[j];
482 }
483 subformatCount = that.subformatCount;
484
485 for (j=0; j<that.argTypeCount; ++j) {
486 argTypes[j] = that.argTypes[j];
487 }
488 argTypeCount = that.argTypeCount;
489 }
490 return *this;
491 }
492
493 UBool
operator ==(const Format & rhs) const494 MessageFormat::operator==(const Format& rhs) const
495 {
496 if (this == &rhs) return TRUE;
497
498 MessageFormat& that = (MessageFormat&)rhs;
499
500 // Check class ID before checking MessageFormat members
501 if (!Format::operator==(rhs) ||
502 fPattern != that.fPattern ||
503 fLocale != that.fLocale ||
504 isArgNumeric != that.isArgNumeric) {
505 return FALSE;
506 }
507
508 int32_t j;
509 for (j=0; j<subformatCount; ++j) {
510 if (subformats[j] != that.subformats[j]) {
511 return FALSE;
512 }
513 }
514
515 return TRUE;
516 }
517
518 // -------------------------------------
519 // Creates a copy of this MessageFormat, the caller owns the copy.
520
521 Format*
clone() const522 MessageFormat::clone() const
523 {
524 return new MessageFormat(*this);
525 }
526
527 // -------------------------------------
528 // Sets the locale of this MessageFormat object to theLocale.
529
530 void
setLocale(const Locale & theLocale)531 MessageFormat::setLocale(const Locale& theLocale)
532 {
533 if (fLocale != theLocale) {
534 delete defaultNumberFormat;
535 defaultNumberFormat = NULL;
536 delete defaultDateFormat;
537 defaultDateFormat = NULL;
538 }
539 fLocale = theLocale;
540 setLocaleIDs(fLocale.getName(), fLocale.getName());
541 }
542
543 // -------------------------------------
544 // Gets the locale of this MessageFormat object.
545
546 const Locale&
getLocale() const547 MessageFormat::getLocale() const
548 {
549 return fLocale;
550 }
551
552
553
554
555 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)556 MessageFormat::applyPattern(const UnicodeString& newPattern,
557 UErrorCode& status)
558 {
559 UParseError parseError;
560 applyPattern(newPattern,parseError,status);
561 }
562
563
564 // -------------------------------------
565 // Applies the new pattern and returns an error if the pattern
566 // is not correct.
567 void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & ec)568 MessageFormat::applyPattern(const UnicodeString& pattern,
569 UParseError& parseError,
570 UErrorCode& ec)
571 {
572 if(U_FAILURE(ec)) {
573 return;
574 }
575 // The pattern is broken up into segments. Each time a subformat
576 // is encountered, 4 segments are recorded. For example, consider
577 // the pattern:
578 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
579 // The first set of segments is:
580 // segments[0] = "There "
581 // segments[1] = "0"
582 // segments[2] = "choice"
583 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
584
585 // During parsing, the plain text is accumulated into segments[0].
586 // Segments 1..3 are used to parse each subpattern. Each time a
587 // subpattern is parsed, it creates a format object that is stored
588 // in the subformats array, together with an offset and argument
589 // number. The offset into the plain text stored in
590 // segments[0].
591
592 // Quotes in segment 0 are handled normally. They are removed.
593 // Quotes may not occur in segments 1 or 2.
594 // Quotes in segment 3 are parsed and _copied_. This makes
595 // subformat patterns work, e.g., {1,number,'#'.##} passes
596 // the pattern "'#'.##" to DecimalFormat.
597
598 UnicodeString segments[4];
599 int32_t part = 0; // segment we are in, 0..3
600 // Record the highest argument number in the pattern. (In the
601 // subpattern {3,number} the argument number is 3.)
602 int32_t formatNumber = 0;
603 UBool inQuote = FALSE;
604 int32_t braceStack = 0;
605 // Clear error struct
606 parseError.offset = -1;
607 parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
608 int32_t patLen = pattern.length();
609 int32_t i;
610
611 for (i=0; i<subformatCount; ++i) {
612 delete subformats[i].format;
613 }
614 subformatCount = 0;
615 argTypeCount = 0;
616
617 for (i=0; i<patLen; ++i) {
618 UChar ch = pattern[i];
619 if (part == 0) {
620 // In segment 0, recognize and remove quotes
621 if (ch == SINGLE_QUOTE) {
622 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
623 segments[0] += ch;
624 ++i;
625 } else {
626 inQuote = !inQuote;
627 }
628 } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
629 // The only way we get from segment 0 to 1 is via an
630 // unquoted '{'.
631 part = 1;
632 } else {
633 segments[0] += ch;
634 }
635 } else if (inQuote) {
636 // In segments 1..3, recognize quoted matter, and copy it
637 // into the segment, together with the quotes. This takes
638 // care of '' as well.
639 segments[part] += ch;
640 if (ch == SINGLE_QUOTE) {
641 inQuote = FALSE;
642 }
643 } else {
644 // We have an unquoted character in segment 1..3
645 switch (ch) {
646 case COMMA:
647 // Commas bump us to the next segment, except for segment 3,
648 // which can contain commas. See example above.
649 if (part < 3)
650 part += 1;
651 else
652 segments[3] += ch;
653 break;
654 case LEFT_CURLY_BRACE:
655 // Handle '{' within segment 3. The initial '{'
656 // before segment 1 is handled above.
657 if (part != 3) {
658 ec = U_PATTERN_SYNTAX_ERROR;
659 goto SYNTAX_ERROR;
660 }
661 ++braceStack;
662 segments[part] += ch;
663 break;
664 case RIGHT_CURLY_BRACE:
665 if (braceStack == 0) {
666 makeFormat(formatNumber, segments, parseError,ec);
667 if (U_FAILURE(ec)){
668 goto SYNTAX_ERROR;
669 }
670 formatNumber++;
671
672 segments[1].remove();
673 segments[2].remove();
674 segments[3].remove();
675 part = 0;
676 } else {
677 --braceStack;
678 segments[part] += ch;
679 }
680 break;
681 case SINGLE_QUOTE:
682 inQuote = TRUE;
683 // fall through (copy quote chars in segments 1..3)
684 default:
685 segments[part] += ch;
686 break;
687 }
688 }
689 }
690 if (braceStack != 0 || part != 0) {
691 // Unmatched braces in the pattern
692 ec = U_UNMATCHED_BRACES;
693 goto SYNTAX_ERROR;
694 }
695 fPattern = segments[0];
696 return;
697
698 SYNTAX_ERROR:
699 syntaxError(pattern, i, parseError);
700 for (i=0; i<subformatCount; ++i) {
701 delete subformats[i].format;
702 }
703 argTypeCount = subformatCount = 0;
704 }
705 // -------------------------------------
706 // Converts this MessageFormat instance to a pattern.
707
708 UnicodeString&
toPattern(UnicodeString & appendTo) const709 MessageFormat::toPattern(UnicodeString& appendTo) const {
710 // later, make this more extensible
711 int32_t lastOffset = 0;
712 int32_t i;
713 for (i=0; i<subformatCount; ++i) {
714 copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
715 lastOffset = subformats[i].offset;
716 appendTo += LEFT_CURLY_BRACE;
717 if (isArgNumeric) {
718 itos(subformats[i].argNum, appendTo);
719 }
720 else {
721 appendTo += *subformats[i].argName;
722 }
723 Format* fmt = subformats[i].format;
724 DecimalFormat* decfmt;
725 SimpleDateFormat* sdtfmt;
726 ChoiceFormat* chcfmt;
727 PluralFormat* plfmt;
728 SelectFormat* selfmt;
729 if (fmt == NULL) {
730 // do nothing, string format
731 }
732 else if ((decfmt = dynamic_cast<DecimalFormat*>(fmt)) != NULL) {
733 UErrorCode ec = U_ZERO_ERROR;
734 NumberFormat& formatAlias = *decfmt;
735 NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
736 NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
737 NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
738 NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
739
740 appendTo += COMMA;
741 appendTo += ID_NUMBER;
742 if (formatAlias != *defaultTemplate) {
743 appendTo += COMMA;
744 if (formatAlias == *currencyTemplate) {
745 appendTo += ID_CURRENCY;
746 }
747 else if (formatAlias == *percentTemplate) {
748 appendTo += ID_PERCENT;
749 }
750 else if (formatAlias == *integerTemplate) {
751 appendTo += ID_INTEGER;
752 }
753 else {
754 UnicodeString buffer;
755 appendTo += decfmt->toPattern(buffer);
756 }
757 }
758
759 delete defaultTemplate;
760 delete currencyTemplate;
761 delete percentTemplate;
762 delete integerTemplate;
763 }
764 else if ((sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt)) != NULL) {
765 DateFormat& formatAlias = *sdtfmt;
766 DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
767 DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
768 DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
769 DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
770 DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
771 DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
772 DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
773 DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
774
775
776 appendTo += COMMA;
777 if (formatAlias == *defaultDateTemplate) {
778 // default is medium. no need to handle medium separately.
779 appendTo += ID_DATE;
780 }
781 else if (formatAlias == *shortDateTemplate) {
782 appendTo += ID_DATE;
783 appendTo += COMMA;
784 appendTo += ID_SHORT;
785 }
786 else if (formatAlias == *longDateTemplate) {
787 appendTo += ID_DATE;
788 appendTo += COMMA;
789 appendTo += ID_LONG;
790 }
791 else if (formatAlias == *fullDateTemplate) {
792 appendTo += ID_DATE;
793 appendTo += COMMA;
794 appendTo += ID_FULL;
795 }
796 else if (formatAlias == *defaultTimeTemplate) {
797 // default is medium. no need to handle medium separately.
798 appendTo += ID_TIME;
799 }
800 else if (formatAlias == *shortTimeTemplate) {
801 appendTo += ID_TIME;
802 appendTo += COMMA;
803 appendTo += ID_SHORT;
804 }
805 else if (formatAlias == *longTimeTemplate) {
806 appendTo += ID_TIME;
807 appendTo += COMMA;
808 appendTo += ID_LONG;
809 }
810 else if (formatAlias == *fullTimeTemplate) {
811 appendTo += ID_TIME;
812 appendTo += COMMA;
813 appendTo += ID_FULL;
814 }
815 else {
816 UnicodeString buffer;
817 appendTo += ID_DATE;
818 appendTo += COMMA;
819 appendTo += sdtfmt->toPattern(buffer);
820 }
821
822 delete defaultDateTemplate;
823 delete shortDateTemplate;
824 delete longDateTemplate;
825 delete fullDateTemplate;
826 delete defaultTimeTemplate;
827 delete shortTimeTemplate;
828 delete longTimeTemplate;
829 delete fullTimeTemplate;
830 // {sfb} there should be a more efficient way to do this!
831 }
832 else if ((chcfmt = dynamic_cast<ChoiceFormat*>(fmt)) != NULL) {
833 UnicodeString buffer;
834 appendTo += COMMA;
835 appendTo += ID_CHOICE;
836 appendTo += COMMA;
837 appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
838 }
839 else if ((plfmt = dynamic_cast<PluralFormat*>(fmt)) != NULL) {
840 UnicodeString buffer;
841 appendTo += plfmt->toPattern(buffer);
842 }
843 else if ((selfmt = dynamic_cast<SelectFormat*>(fmt)) != NULL) {
844 UnicodeString buffer;
845 appendTo += ((SelectFormat*)fmt)->toPattern(buffer);
846 }
847 else {
848 //appendTo += ", unknown";
849 }
850 appendTo += RIGHT_CURLY_BRACE;
851 }
852 copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
853 return appendTo;
854 }
855
856 // -------------------------------------
857 // Adopts the new formats array and updates the array count.
858 // This MessageFormat instance owns the new formats.
859
860 void
adoptFormats(Format ** newFormats,int32_t count)861 MessageFormat::adoptFormats(Format** newFormats,
862 int32_t count) {
863 if (newFormats == NULL || count < 0) {
864 return;
865 }
866
867 int32_t i;
868 if (allocateSubformats(count)) {
869 for (i=0; i<subformatCount; ++i) {
870 delete subformats[i].format;
871 }
872 for (i=0; i<count; ++i) {
873 subformats[i].format = newFormats[i];
874 }
875 subformatCount = count;
876 } else {
877 // An adopt method must always take ownership. Delete
878 // the incoming format objects and return unchanged.
879 for (i=0; i<count; ++i) {
880 delete newFormats[i];
881 }
882 }
883
884 // TODO: What about the .offset and .argNum fields?
885 }
886
887 // -------------------------------------
888 // Sets the new formats array and updates the array count.
889 // This MessageFormat instance maks a copy of the new formats.
890
891 void
setFormats(const Format ** newFormats,int32_t count)892 MessageFormat::setFormats(const Format** newFormats,
893 int32_t count) {
894 if (newFormats == NULL || count < 0) {
895 return;
896 }
897
898 if (allocateSubformats(count)) {
899 int32_t i;
900 for (i=0; i<subformatCount; ++i) {
901 delete subformats[i].format;
902 }
903 subformatCount = 0;
904
905 for (i=0; i<count; ++i) {
906 subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
907 }
908 subformatCount = count;
909 }
910
911 // TODO: What about the .offset and .arg fields?
912 }
913
914 // -------------------------------------
915 // Adopt a single format by format number.
916 // Do nothing if the format number is not less than the array count.
917
918 void
adoptFormat(int32_t n,Format * newFormat)919 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
920 if (n < 0 || n >= subformatCount) {
921 delete newFormat;
922 } else {
923 delete subformats[n].format;
924 subformats[n].format = newFormat;
925 }
926 }
927
928 // -------------------------------------
929 // Adopt a single format by format name.
930 // Do nothing if there is no match of formatName.
931 void
adoptFormat(const UnicodeString & formatName,Format * formatToAdopt,UErrorCode & status)932 MessageFormat::adoptFormat(const UnicodeString& formatName,
933 Format* formatToAdopt,
934 UErrorCode& status) {
935 if (isArgNumeric ) {
936 int32_t argumentNumber = stou(formatName);
937 if (argumentNumber<0) {
938 status = U_ARGUMENT_TYPE_MISMATCH;
939 return;
940 }
941 adoptFormat(argumentNumber, formatToAdopt);
942 return;
943 }
944 for (int32_t i=0; i<subformatCount; ++i) {
945 if (formatName==*subformats[i].argName) {
946 delete subformats[i].format;
947 if ( formatToAdopt== NULL) {
948 // This should never happen -- but we'll be nice if it does
949 subformats[i].format = NULL;
950 } else {
951 subformats[i].format = formatToAdopt;
952 }
953 }
954 }
955 }
956
957 // -------------------------------------
958 // Set a single format.
959 // Do nothing if the variable is not less than the array count.
960
961 void
setFormat(int32_t n,const Format & newFormat)962 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
963 if (n >= 0 && n < subformatCount) {
964 delete subformats[n].format;
965 if (&newFormat == NULL) {
966 // This should never happen -- but we'll be nice if it does
967 subformats[n].format = NULL;
968 } else {
969 subformats[n].format = newFormat.clone();
970 }
971 }
972 }
973
974 // -------------------------------------
975 // Get a single format by format name.
976 // Do nothing if the variable is not less than the array count.
977 Format *
getFormat(const UnicodeString & formatName,UErrorCode & status)978 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
979
980 if (U_FAILURE(status)) return NULL;
981
982 if (isArgNumeric ) {
983 int32_t argumentNumber = stou(formatName);
984 if (argumentNumber<0) {
985 status = U_ARGUMENT_TYPE_MISMATCH;
986 return NULL;
987 }
988 if (argumentNumber < 0 || argumentNumber >= subformatCount) {
989 return subformats[argumentNumber].format;
990 }
991 else {
992 return NULL;
993 }
994 }
995
996 for (int32_t i=0; i<subformatCount; ++i) {
997 if (formatName==*subformats[i].argName)
998 {
999 return subformats[i].format;
1000 }
1001 }
1002 return NULL;
1003 }
1004
1005 // -------------------------------------
1006 // Set a single format by format name
1007 // Do nothing if the variable is not less than the array count.
1008 void
setFormat(const UnicodeString & formatName,const Format & newFormat,UErrorCode & status)1009 MessageFormat::setFormat(const UnicodeString& formatName,
1010 const Format& newFormat,
1011 UErrorCode& status) {
1012 if (isArgNumeric) {
1013 status = U_ARGUMENT_TYPE_MISMATCH;
1014 return;
1015 }
1016 for (int32_t i=0; i<subformatCount; ++i) {
1017 if (formatName==*subformats[i].argName)
1018 {
1019 delete subformats[i].format;
1020 if (&newFormat == NULL) {
1021 // This should never happen -- but we'll be nice if it does
1022 subformats[i].format = NULL;
1023 } else {
1024 subformats[i].format = newFormat.clone();
1025 }
1026 break;
1027 }
1028 }
1029 }
1030
1031 // -------------------------------------
1032 // Gets the format array.
1033
1034 const Format**
getFormats(int32_t & cnt) const1035 MessageFormat::getFormats(int32_t& cnt) const
1036 {
1037 // This old API returns an array (which we hold) of Format*
1038 // pointers. The array is valid up to the next call to any
1039 // method on this object. We construct and resize an array
1040 // on demand that contains aliases to the subformats[i].format
1041 // pointers.
1042 MessageFormat* t = (MessageFormat*) this;
1043 cnt = 0;
1044 if (formatAliases == NULL) {
1045 t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
1046 Format** a = (Format**)
1047 uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
1048 if (a == NULL) {
1049 return NULL;
1050 }
1051 t->formatAliases = a;
1052 } else if (subformatCount > formatAliasesCapacity) {
1053 Format** a = (Format**)
1054 uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
1055 if (a == NULL) {
1056 return NULL;
1057 }
1058 t->formatAliases = a;
1059 t->formatAliasesCapacity = subformatCount;
1060 }
1061 for (int32_t i=0; i<subformatCount; ++i) {
1062 t->formatAliases[i] = subformats[i].format;
1063 }
1064 cnt = subformatCount;
1065 return (const Format**)formatAliases;
1066 }
1067
1068
1069 StringEnumeration*
getFormatNames(UErrorCode & status)1070 MessageFormat::getFormatNames(UErrorCode& status) {
1071 if (U_FAILURE(status)) return NULL;
1072
1073 if (isArgNumeric) {
1074 status = U_ARGUMENT_TYPE_MISMATCH;
1075 return NULL;
1076 }
1077 UVector *fFormatNames = new UVector(status);
1078 if (U_FAILURE(status)) {
1079 status = U_MEMORY_ALLOCATION_ERROR;
1080 return NULL;
1081 }
1082 for (int32_t i=0; i<subformatCount; ++i) {
1083 fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
1084 }
1085
1086 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
1087 return nameEnumerator;
1088 }
1089
1090 // -------------------------------------
1091 // Formats the source Formattable array and copy into the result buffer.
1092 // Ignore the FieldPosition result for error checking.
1093
1094 UnicodeString&
format(const Formattable * source,int32_t cnt,UnicodeString & appendTo,FieldPosition & ignore,UErrorCode & success) const1095 MessageFormat::format(const Formattable* source,
1096 int32_t cnt,
1097 UnicodeString& appendTo,
1098 FieldPosition& ignore,
1099 UErrorCode& success) const
1100 {
1101 if (U_FAILURE(success))
1102 return appendTo;
1103
1104 return format(source, cnt, appendTo, ignore, 0, success);
1105 }
1106
1107 // -------------------------------------
1108 // Internally creates a MessageFormat instance based on the
1109 // pattern and formats the arguments Formattable array and
1110 // copy into the appendTo buffer.
1111
1112 UnicodeString&
format(const UnicodeString & pattern,const Formattable * arguments,int32_t cnt,UnicodeString & appendTo,UErrorCode & success)1113 MessageFormat::format( const UnicodeString& pattern,
1114 const Formattable* arguments,
1115 int32_t cnt,
1116 UnicodeString& appendTo,
1117 UErrorCode& success)
1118 {
1119 MessageFormat temp(pattern, success);
1120 FieldPosition ignore(0);
1121 temp.format(arguments, cnt, appendTo, ignore, success);
1122 return appendTo;
1123 }
1124
1125 // -------------------------------------
1126 // Formats the source Formattable object and copy into the
1127 // appendTo buffer. The Formattable object must be an array
1128 // of Formattable instances, returns error otherwise.
1129
1130 UnicodeString&
format(const Formattable & source,UnicodeString & appendTo,FieldPosition & ignore,UErrorCode & success) const1131 MessageFormat::format(const Formattable& source,
1132 UnicodeString& appendTo,
1133 FieldPosition& ignore,
1134 UErrorCode& success) const
1135 {
1136 int32_t cnt;
1137
1138 if (U_FAILURE(success))
1139 return appendTo;
1140 if (source.getType() != Formattable::kArray) {
1141 success = U_ILLEGAL_ARGUMENT_ERROR;
1142 return appendTo;
1143 }
1144 const Formattable* tmpPtr = source.getArray(cnt);
1145
1146 return format(tmpPtr, cnt, appendTo, ignore, 0, success);
1147 }
1148
1149
1150 UnicodeString&
format(const UnicodeString * argumentNames,const Formattable * arguments,int32_t count,UnicodeString & appendTo,UErrorCode & success) const1151 MessageFormat::format(const UnicodeString* argumentNames,
1152 const Formattable* arguments,
1153 int32_t count,
1154 UnicodeString& appendTo,
1155 UErrorCode& success) const {
1156 FieldPosition ignore(0);
1157 return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
1158 }
1159
1160 UnicodeString&
format(const Formattable * arguments,int32_t cnt,UnicodeString & appendTo,FieldPosition & status,int32_t recursionProtection,UErrorCode & success) const1161 MessageFormat::format(const Formattable* arguments,
1162 int32_t cnt,
1163 UnicodeString& appendTo,
1164 FieldPosition& status,
1165 int32_t recursionProtection,
1166 UErrorCode& success) const
1167 {
1168 return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1169 }
1170
1171 // -------------------------------------
1172 // Formats the arguments Formattable array and copy into the appendTo buffer.
1173 // Ignore the FieldPosition result for error checking.
1174
1175 UnicodeString&
format(const Formattable * arguments,const UnicodeString * argumentNames,int32_t cnt,UnicodeString & appendTo,FieldPosition & status,int32_t recursionProtection,UErrorCode & success) const1176 MessageFormat::format(const Formattable* arguments,
1177 const UnicodeString *argumentNames,
1178 int32_t cnt,
1179 UnicodeString& appendTo,
1180 FieldPosition& status,
1181 int32_t recursionProtection,
1182 UErrorCode& success) const
1183 {
1184 int32_t lastOffset = 0;
1185 int32_t argumentNumber=0;
1186 if (cnt < 0 || (cnt && arguments == NULL)) {
1187 success = U_ILLEGAL_ARGUMENT_ERROR;
1188 return appendTo;
1189 }
1190
1191 if ( !isArgNumeric && argumentNames== NULL ) {
1192 success = U_ILLEGAL_ARGUMENT_ERROR;
1193 return appendTo;
1194 }
1195
1196 const Formattable *obj=NULL;
1197 for (int32_t i=0; i<subformatCount; ++i) {
1198 // Append the prefix of current format element.
1199 appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
1200 lastOffset = subformats[i].offset;
1201 obj = NULL;
1202 if (isArgNumeric) {
1203 argumentNumber = subformats[i].argNum;
1204
1205 // Checks the scope of the argument number.
1206 if (argumentNumber >= cnt) {
1207 appendTo += LEFT_CURLY_BRACE;
1208 itos(argumentNumber, appendTo);
1209 appendTo += RIGHT_CURLY_BRACE;
1210 continue;
1211 }
1212 obj = arguments+argumentNumber;
1213 }
1214 else {
1215 for (int32_t j=0; j<cnt; ++j) {
1216 if (argumentNames[j]== *subformats[i].argName ) {
1217 obj = arguments+j;
1218 break;
1219 }
1220 }
1221 if (obj == NULL ) {
1222 appendTo += LEFT_CURLY_BRACE;
1223 appendTo += *subformats[i].argName;
1224 appendTo += RIGHT_CURLY_BRACE;
1225 continue;
1226
1227 }
1228 }
1229 Formattable::Type type = obj->getType();
1230
1231 // Recursively calling the format process only if the current
1232 // format argument refers to either of the following:
1233 // a ChoiceFormat object, a PluralFormat object, a SelectFormat object.
1234 Format* fmt = subformats[i].format;
1235 if (fmt != NULL) {
1236 UnicodeString argNum;
1237 fmt->format(*obj, argNum, success);
1238
1239 // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the
1240 // MessageFormat pattern application.
1241 if ((dynamic_cast<ChoiceFormat*>(fmt) != NULL ||
1242 dynamic_cast<PluralFormat*>(fmt) != NULL ||
1243 dynamic_cast<SelectFormat*>(fmt) != NULL) &&
1244 argNum.indexOf(LEFT_CURLY_BRACE) >= 0
1245 ) {
1246 MessageFormat temp(argNum, fLocale, success);
1247 // TODO: Implement recursion protection
1248 if ( isArgNumeric ) {
1249 temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1250 }
1251 else {
1252 temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
1253 }
1254 if (U_FAILURE(success)) {
1255 return appendTo;
1256 }
1257 }
1258 else {
1259 appendTo += argNum;
1260 }
1261 }
1262 // If the obj data type is a number, use a NumberFormat instance.
1263 else if ((type == Formattable::kDouble) ||
1264 (type == Formattable::kLong) ||
1265 (type == Formattable::kInt64)) {
1266
1267 const NumberFormat* nf = getDefaultNumberFormat(success);
1268 if (nf == NULL) {
1269 return appendTo;
1270 }
1271 if (type == Formattable::kDouble) {
1272 nf->format(obj->getDouble(), appendTo);
1273 } else if (type == Formattable::kLong) {
1274 nf->format(obj->getLong(), appendTo);
1275 } else {
1276 nf->format(obj->getInt64(), appendTo);
1277 }
1278 }
1279 // If the obj data type is a Date instance, use a DateFormat instance.
1280 else if (type == Formattable::kDate) {
1281 const DateFormat* df = getDefaultDateFormat(success);
1282 if (df == NULL) {
1283 return appendTo;
1284 }
1285 df->format(obj->getDate(), appendTo);
1286 }
1287 else if (type == Formattable::kString) {
1288 appendTo += obj->getString();
1289 }
1290 else {
1291 success = U_ILLEGAL_ARGUMENT_ERROR;
1292 return appendTo;
1293 }
1294 }
1295 // Appends the rest of the pattern characters after the real last offset.
1296 appendTo.append(fPattern, lastOffset, 0x7fffffff);
1297 return appendTo;
1298 }
1299
1300
1301 // -------------------------------------
1302 // Parses the source pattern and returns the Formattable objects array,
1303 // the array count and the ending parse position. The caller of this method
1304 // owns the array.
1305
1306 Formattable*
parse(const UnicodeString & source,ParsePosition & pos,int32_t & count) const1307 MessageFormat::parse(const UnicodeString& source,
1308 ParsePosition& pos,
1309 int32_t& count) const
1310 {
1311 // Allocate at least one element. Allocating an array of length
1312 // zero causes problems on some platforms (e.g. Win32).
1313 Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
1314 int32_t patternOffset = 0;
1315 int32_t sourceOffset = pos.getIndex();
1316 ParsePosition tempPos(0);
1317 count = 0; // {sfb} reset to zero
1318 int32_t len;
1319 // If resultArray could not be created, exit out.
1320 // Avoid crossing initialization of variables above.
1321 if (resultArray == NULL) {
1322 goto PARSE_ERROR;
1323 }
1324 for (int32_t i = 0; i < subformatCount; ++i) {
1325 // match up to format
1326 len = subformats[i].offset - patternOffset;
1327 if (len == 0 ||
1328 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1329 sourceOffset += len;
1330 patternOffset += len;
1331 }
1332 else {
1333 goto PARSE_ERROR;
1334 }
1335
1336 // now use format
1337 Format* fmt = subformats[i].format;
1338 int32_t argNum = subformats[i].argNum;
1339 if (fmt == NULL) { // string format
1340 // if at end, use longest possible match
1341 // otherwise uses first match to intervening string
1342 // does NOT recursively try all possibilities
1343 int32_t tempLength = (i+1<subformatCount) ?
1344 subformats[i+1].offset : fPattern.length();
1345
1346 int32_t next;
1347 if (patternOffset >= tempLength) {
1348 next = source.length();
1349 }
1350 else {
1351 UnicodeString buffer;
1352 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
1353 next = source.indexOf(buffer, sourceOffset);
1354 }
1355
1356 if (next < 0) {
1357 goto PARSE_ERROR;
1358 }
1359 else {
1360 UnicodeString buffer;
1361 source.extract(sourceOffset,next - sourceOffset, buffer);
1362 UnicodeString strValue = buffer;
1363 UnicodeString temp(LEFT_CURLY_BRACE);
1364 // {sfb} check this later
1365 if (isArgNumeric) {
1366 itos(argNum, temp);
1367 }
1368 else {
1369 temp+=(*subformats[i].argName);
1370 }
1371 temp += RIGHT_CURLY_BRACE;
1372 if (strValue != temp) {
1373 source.extract(sourceOffset,next - sourceOffset, buffer);
1374 resultArray[argNum].setString(buffer);
1375 // {sfb} not sure about this
1376 if ((argNum + 1) > count) {
1377 count = argNum + 1;
1378 }
1379 }
1380 sourceOffset = next;
1381 }
1382 }
1383 else {
1384 tempPos.setIndex(sourceOffset);
1385 fmt->parseObject(source, resultArray[argNum], tempPos);
1386 if (tempPos.getIndex() == sourceOffset) {
1387 goto PARSE_ERROR;
1388 }
1389
1390 if ((argNum + 1) > count) {
1391 count = argNum + 1;
1392 }
1393 sourceOffset = tempPos.getIndex(); // update
1394 }
1395 }
1396 len = fPattern.length() - patternOffset;
1397 if (len == 0 ||
1398 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1399 pos.setIndex(sourceOffset + len);
1400 return resultArray;
1401 }
1402 // else fall through...
1403
1404 PARSE_ERROR:
1405 pos.setErrorIndex(sourceOffset);
1406 delete [] resultArray;
1407 count = 0;
1408 return NULL; // leave index as is to signal error
1409 }
1410
1411 // -------------------------------------
1412 // Parses the source string and returns the array of
1413 // Formattable objects and the array count. The caller
1414 // owns the returned array.
1415
1416 Formattable*
parse(const UnicodeString & source,int32_t & cnt,UErrorCode & success) const1417 MessageFormat::parse(const UnicodeString& source,
1418 int32_t& cnt,
1419 UErrorCode& success) const
1420 {
1421 if (!isArgNumeric ) {
1422 success = U_ARGUMENT_TYPE_MISMATCH;
1423 return NULL;
1424 }
1425 ParsePosition status(0);
1426 // Calls the actual implementation method and starts
1427 // from zero offset of the source text.
1428 Formattable* result = parse(source, status, cnt);
1429 if (status.getIndex() == 0) {
1430 success = U_MESSAGE_PARSE_ERROR;
1431 delete[] result;
1432 return NULL;
1433 }
1434 return result;
1435 }
1436
1437 // -------------------------------------
1438 // Parses the source text and copy into the result buffer.
1439
1440 void
parseObject(const UnicodeString & source,Formattable & result,ParsePosition & status) const1441 MessageFormat::parseObject( const UnicodeString& source,
1442 Formattable& result,
1443 ParsePosition& status) const
1444 {
1445 int32_t cnt = 0;
1446 Formattable* tmpResult = parse(source, status, cnt);
1447 if (tmpResult != NULL)
1448 result.adoptArray(tmpResult, cnt);
1449 }
1450
1451 UnicodeString
autoQuoteApostrophe(const UnicodeString & pattern,UErrorCode & status)1452 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1453 UnicodeString result;
1454 if (U_SUCCESS(status)) {
1455 int32_t plen = pattern.length();
1456 const UChar* pat = pattern.getBuffer();
1457 int32_t blen = plen * 2 + 1; // space for null termination, convenience
1458 UChar* buf = result.getBuffer(blen);
1459 if (buf == NULL) {
1460 status = U_MEMORY_ALLOCATION_ERROR;
1461 } else {
1462 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1463 result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1464 }
1465 }
1466 if (U_FAILURE(status)) {
1467 result.setToBogus();
1468 }
1469 return result;
1470 }
1471
1472 // -------------------------------------
1473
makeRBNF(URBNFRuleSetTag tag,const Locale & locale,const UnicodeString & defaultRuleSet,UErrorCode & ec)1474 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1475 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1476 if (fmt == NULL) {
1477 ec = U_MEMORY_ALLOCATION_ERROR;
1478 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1479 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1480 fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1481 }
1482 return fmt;
1483 }
1484
1485 /**
1486 * Reads the segments[] array (see applyPattern()) and parses the
1487 * segments[1..3] into a Format* object. Stores the format object in
1488 * the subformats[] array. Updates the argTypes[] array type
1489 * information for the corresponding argument.
1490 *
1491 * @param formatNumber index into subformats[] for this format
1492 * @param segments array of strings with the parsed pattern segments
1493 * @param parseError parse error data (output param)
1494 * @param ec error code
1495 */
1496 void
makeFormat(int32_t formatNumber,UnicodeString * segments,UParseError & parseError,UErrorCode & ec)1497 MessageFormat::makeFormat(int32_t formatNumber,
1498 UnicodeString* segments,
1499 UParseError& parseError,
1500 UErrorCode& ec) {
1501 if (U_FAILURE(ec)) {
1502 return;
1503 }
1504
1505 // Parse the argument number
1506 int32_t argumentNumber = stou(segments[1]); // always unlocalized!
1507 UnicodeString argumentName;
1508 if (argumentNumber < 0) {
1509 if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
1510 ec = U_INVALID_FORMAT_ERROR;
1511 return;
1512 }
1513 isArgNumeric = FALSE;
1514 argumentNumber=formatNumber;
1515 }
1516 if (!isArgNumeric) {
1517 if ( !isLegalArgName(segments[1]) ) {
1518 ec = U_INVALID_FORMAT_ERROR;
1519 return;
1520 }
1521 argumentName = segments[1];
1522 }
1523
1524 // Parse the format, recording the argument type and creating a
1525 // new Format object (except for string arguments).
1526 Formattable::Type argType;
1527 Format *fmt = NULL;
1528 int32_t typeID, styleID;
1529 DateFormat::EStyle style;
1530 UnicodeString unquotedPattern, quotedPattern;
1531 UBool inQuote = FALSE;
1532
1533 switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
1534
1535 case 0: // string
1536 argType = Formattable::kString;
1537 break;
1538
1539 case 1: // number
1540 argType = Formattable::kDouble;
1541
1542 switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
1543 case 0: // default
1544 fmt = NumberFormat::createInstance(fLocale, ec);
1545 break;
1546 case 1: // currency
1547 fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1548 break;
1549 case 2: // percent
1550 fmt = NumberFormat::createPercentInstance(fLocale, ec);
1551 break;
1552 case 3: // integer
1553 argType = Formattable::kLong;
1554 fmt = createIntegerFormat(fLocale, ec);
1555 break;
1556 default: // pattern
1557 fmt = NumberFormat::createInstance(fLocale, ec);
1558 if (fmt) {
1559 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
1560 if (decfmt != NULL) {
1561 decfmt->applyPattern(segments[3],parseError,ec);
1562 }
1563 }
1564 break;
1565 }
1566 break;
1567
1568 case 2: // date
1569 case 3: // time
1570 argType = Formattable::kDate;
1571 styleID = findKeyword(segments[3], DATE_STYLE_IDS);
1572 style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1573
1574 if (typeID == 2) {
1575 fmt = DateFormat::createDateInstance(style, fLocale);
1576 } else {
1577 fmt = DateFormat::createTimeInstance(style, fLocale);
1578 }
1579
1580 if (styleID < 0 && fmt != NULL) {
1581 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
1582 if (sdtfmt != NULL) {
1583 sdtfmt->applyPattern(segments[3]);
1584 }
1585 }
1586 break;
1587
1588 case 4: // choice
1589 argType = Formattable::kDouble;
1590
1591 fmt = new ChoiceFormat(segments[3], parseError, ec);
1592 break;
1593
1594 case 5: // spellout
1595 argType = Formattable::kDouble;
1596 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
1597 break;
1598 case 6: // ordinal
1599 argType = Formattable::kDouble;
1600 fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
1601 break;
1602 case 7: // duration
1603 argType = Formattable::kDouble;
1604 fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
1605 break;
1606 case 8: // plural
1607 case 9: // Select
1608 if(typeID == 8)
1609 argType = Formattable::kDouble;
1610 else
1611 argType = Formattable::kString;
1612 quotedPattern = segments[3];
1613 for (int32_t i = 0; i < quotedPattern.length(); ++i) {
1614 UChar ch = quotedPattern.charAt(i);
1615 if (ch == SINGLE_QUOTE) {
1616 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
1617 unquotedPattern+=ch;
1618 ++i;
1619 }
1620 else {
1621 inQuote = !inQuote;
1622 }
1623 }
1624 else {
1625 unquotedPattern += ch;
1626 }
1627 }
1628 if(typeID == 8)
1629 fmt = new PluralFormat(fLocale, unquotedPattern, ec);
1630 else
1631 fmt = new SelectFormat(unquotedPattern, ec);
1632 break;
1633 default:
1634 argType = Formattable::kString;
1635 ec = U_ILLEGAL_ARGUMENT_ERROR;
1636 break;
1637 }
1638
1639 if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
1640 ec = U_MEMORY_ALLOCATION_ERROR;
1641 }
1642
1643 if (!allocateSubformats(formatNumber+1) ||
1644 !allocateArgTypes(argumentNumber+1)) {
1645 ec = U_MEMORY_ALLOCATION_ERROR;
1646 }
1647
1648 if (U_FAILURE(ec)) {
1649 delete fmt;
1650 return;
1651 }
1652
1653 // Parse succeeded; record results in our arrays
1654 subformats[formatNumber].format = fmt;
1655 subformats[formatNumber].offset = segments[0].length();
1656 if (isArgNumeric) {
1657 subformats[formatNumber].argName = NULL;
1658 subformats[formatNumber].argNum = argumentNumber;
1659 }
1660 else {
1661 subformats[formatNumber].argName = new UnicodeString(argumentName);
1662 subformats[formatNumber].argNum = -1;
1663 }
1664 subformatCount = formatNumber+1;
1665
1666 // Careful here: argumentNumber may in general arrive out of
1667 // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1668 argTypes[argumentNumber] = argType;
1669 if (argumentNumber+1 > argTypeCount) {
1670 argTypeCount = argumentNumber+1;
1671 }
1672 }
1673
1674 // -------------------------------------
1675 // Finds the string, s, in the string array, list.
findKeyword(const UnicodeString & s,const UChar * const * list)1676 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1677 const UChar * const *list)
1678 {
1679 if (s.length() == 0)
1680 return 0; // default
1681
1682 UnicodeString buffer = s;
1683 // Trims the space characters and turns all characters
1684 // in s to lower case.
1685 buffer.trim().toLower("");
1686 for (int32_t i = 0; list[i]; ++i) {
1687 if (!buffer.compare(list[i], u_strlen(list[i]))) {
1688 return i;
1689 }
1690 }
1691 return -1;
1692 }
1693
1694 // -------------------------------------
1695 // Checks the range of the source text to quote the special
1696 // characters, { and ' and copy to target buffer.
1697
1698 void
copyAndFixQuotes(const UnicodeString & source,int32_t start,int32_t end,UnicodeString & appendTo)1699 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
1700 int32_t start,
1701 int32_t end,
1702 UnicodeString& appendTo)
1703 {
1704 UBool gotLB = FALSE;
1705
1706 for (int32_t i = start; i < end; ++i) {
1707 UChar ch = source[i];
1708 if (ch == LEFT_CURLY_BRACE) {
1709 appendTo += SINGLE_QUOTE;
1710 appendTo += LEFT_CURLY_BRACE;
1711 appendTo += SINGLE_QUOTE;
1712 gotLB = TRUE;
1713 }
1714 else if (ch == RIGHT_CURLY_BRACE) {
1715 if(gotLB) {
1716 appendTo += RIGHT_CURLY_BRACE;
1717 gotLB = FALSE;
1718 }
1719 else {
1720 // orig code.
1721 appendTo += SINGLE_QUOTE;
1722 appendTo += RIGHT_CURLY_BRACE;
1723 appendTo += SINGLE_QUOTE;
1724 }
1725 }
1726 else if (ch == SINGLE_QUOTE) {
1727 appendTo += SINGLE_QUOTE;
1728 appendTo += SINGLE_QUOTE;
1729 }
1730 else {
1731 appendTo += ch;
1732 }
1733 }
1734 }
1735
1736 /**
1737 * Convenience method that ought to be in NumberFormat
1738 */
1739 NumberFormat*
createIntegerFormat(const Locale & locale,UErrorCode & status) const1740 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1741 NumberFormat *temp = NumberFormat::createInstance(locale, status);
1742 DecimalFormat *temp2;
1743 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
1744 temp2->setMaximumFractionDigits(0);
1745 temp2->setDecimalSeparatorAlwaysShown(FALSE);
1746 temp2->setParseIntegerOnly(TRUE);
1747 }
1748
1749 return temp;
1750 }
1751
1752 /**
1753 * Return the default number format. Used to format a numeric
1754 * argument when subformats[i].format is NULL. Returns NULL
1755 * on failure.
1756 *
1757 * Semantically const but may modify *this.
1758 */
getDefaultNumberFormat(UErrorCode & ec) const1759 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1760 if (defaultNumberFormat == NULL) {
1761 MessageFormat* t = (MessageFormat*) this;
1762 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1763 if (U_FAILURE(ec)) {
1764 delete t->defaultNumberFormat;
1765 t->defaultNumberFormat = NULL;
1766 } else if (t->defaultNumberFormat == NULL) {
1767 ec = U_MEMORY_ALLOCATION_ERROR;
1768 }
1769 }
1770 return defaultNumberFormat;
1771 }
1772
1773 /**
1774 * Return the default date format. Used to format a date
1775 * argument when subformats[i].format is NULL. Returns NULL
1776 * on failure.
1777 *
1778 * Semantically const but may modify *this.
1779 */
getDefaultDateFormat(UErrorCode & ec) const1780 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1781 if (defaultDateFormat == NULL) {
1782 MessageFormat* t = (MessageFormat*) this;
1783 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1784 if (t->defaultDateFormat == NULL) {
1785 ec = U_MEMORY_ALLOCATION_ERROR;
1786 }
1787 }
1788 return defaultDateFormat;
1789 }
1790
1791 UBool
usesNamedArguments() const1792 MessageFormat::usesNamedArguments() const {
1793 return !isArgNumeric;
1794 }
1795
1796 UBool
isLegalArgName(const UnicodeString & argName) const1797 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
1798 if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
1799 return FALSE;
1800 }
1801 for (int32_t i=1; i<argName.length(); ++i) {
1802 if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
1803 return FALSE;
1804 }
1805 }
1806 return TRUE;
1807 }
1808
1809 int32_t
getArgTypeCount() const1810 MessageFormat::getArgTypeCount() const {
1811 return argTypeCount;
1812 }
1813
FormatNameEnumeration(UVector * fNameList,UErrorCode &)1814 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1815 pos=0;
1816 fFormatNames = fNameList;
1817 }
1818
1819 const UnicodeString*
snext(UErrorCode & status)1820 FormatNameEnumeration::snext(UErrorCode& status) {
1821 if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1822 return (const UnicodeString*)fFormatNames->elementAt(pos++);
1823 }
1824 return NULL;
1825 }
1826
1827 void
reset(UErrorCode &)1828 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1829 pos=0;
1830 }
1831
1832 int32_t
count(UErrorCode &) const1833 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1834 return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1835 }
1836
~FormatNameEnumeration()1837 FormatNameEnumeration::~FormatNameEnumeration() {
1838 UnicodeString *s;
1839 for (int32_t i=0; i<fFormatNames->size(); ++i) {
1840 if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
1841 delete s;
1842 }
1843 }
1844 delete fFormatNames;
1845 }
1846 U_NAMESPACE_END
1847
1848 #endif /* #if !UCONFIG_NO_FORMATTING */
1849
1850 //eof
1851