• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************
6  *
7  * File MSGFMT.CPP
8  *
9  * Modification History:
10  *
11  *   Date        Name        Description
12  *   02/19/97    aliu        Converted from java.
13  *   03/20/97    helena      Finished first cut of implementation.
14  *   04/10/97    aliu        Made to work on AIX.  Added stoi to replace wtoi.
15  *   06/11/97    helena      Fixed addPattern to take the pattern correctly.
16  *   06/17/97    helena      Fixed the getPattern to return the correct pattern.
17  *   07/09/97    helena      Made ParsePosition into a class.
18  *   02/22/99    stephen     Removed character literals for EBCDIC safety
19  *   11/01/09    kirtig      Added SelectFormat
20  ********************************************************************/
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_FORMATTING
25 
26 #include "unicode/msgfmt.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/smpdtfmt.h"
30 #include "unicode/choicfmt.h"
31 #include "unicode/plurfmt.h"
32 #include "unicode/selfmt.h"
33 #include "unicode/ustring.h"
34 #include "unicode/ucnv_err.h"
35 #include "unicode/uchar.h"
36 #include "unicode/umsg.h"
37 #include "unicode/rbnf.h"
38 #include "cmemory.h"
39 #include "msgfmt_impl.h"
40 #include "util.h"
41 #include "uassert.h"
42 #include "ustrfmt.h"
43 #include "uvector.h"
44 
45 // *****************************************************************************
46 // class MessageFormat
47 // *****************************************************************************
48 
49 #define COMMA             ((UChar)0x002C)
50 #define SINGLE_QUOTE      ((UChar)0x0027)
51 #define LEFT_CURLY_BRACE  ((UChar)0x007B)
52 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
53 
54 //---------------------------------------
55 // static data
56 
57 static const UChar ID_EMPTY[]     = {
58     0 /* empty string, used for default so that null can mark end of list */
59 };
60 
61 static const UChar ID_NUMBER[]    = {
62     0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0  /* "number" */
63 };
64 static const UChar ID_DATE[]      = {
65     0x64, 0x61, 0x74, 0x65, 0              /* "date" */
66 };
67 static const UChar ID_TIME[]      = {
68     0x74, 0x69, 0x6D, 0x65, 0              /* "time" */
69 };
70 static const UChar ID_CHOICE[]    = {
71     0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0  /* "choice" */
72 };
73 static const UChar ID_SPELLOUT[]  = {
74     0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
75 };
76 static const UChar ID_ORDINAL[]   = {
77     0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
78 };
79 static const UChar ID_DURATION[]  = {
80     0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
81 };
82 static const UChar ID_PLURAL[]  = {
83     0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0  /* "plural" */
84 };
85 static const UChar ID_SELECT[]  = {
86     0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0  /* "select" */
87 };
88 
89 // MessageFormat Type List  Number, Date, Time or Choice
90 static const UChar * const TYPE_IDS[] = {
91     ID_EMPTY,
92     ID_NUMBER,
93     ID_DATE,
94     ID_TIME,
95     ID_CHOICE,
96     ID_SPELLOUT,
97     ID_ORDINAL,
98     ID_DURATION,
99     ID_PLURAL,
100     ID_SELECT,
101     NULL,
102 };
103 
104 static const UChar ID_CURRENCY[]  = {
105     0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0  /* "currency" */
106 };
107 static const UChar ID_PERCENT[]   = {
108     0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0        /* "percent" */
109 };
110 static const UChar ID_INTEGER[]   = {
111     0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0        /* "integer" */
112 };
113 
114 // NumberFormat modifier list, default, currency, percent or integer
115 static const UChar * const NUMBER_STYLE_IDS[] = {
116     ID_EMPTY,
117     ID_CURRENCY,
118     ID_PERCENT,
119     ID_INTEGER,
120     NULL,
121 };
122 
123 static const UChar ID_SHORT[]     = {
124     0x73, 0x68, 0x6F, 0x72, 0x74, 0        /* "short" */
125 };
126 static const UChar ID_MEDIUM[]    = {
127     0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0  /* "medium" */
128 };
129 static const UChar ID_LONG[]      = {
130     0x6C, 0x6F, 0x6E, 0x67, 0              /* "long" */
131 };
132 static const UChar ID_FULL[]      = {
133     0x66, 0x75, 0x6C, 0x6C, 0              /* "full" */
134 };
135 
136 // DateFormat modifier list, default, short, medium, long or full
137 static const UChar * const DATE_STYLE_IDS[] = {
138     ID_EMPTY,
139     ID_SHORT,
140     ID_MEDIUM,
141     ID_LONG,
142     ID_FULL,
143     NULL,
144 };
145 
146 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
147     U_NAMESPACE_QUALIFIER DateFormat::kDefault,
148     U_NAMESPACE_QUALIFIER DateFormat::kShort,
149     U_NAMESPACE_QUALIFIER DateFormat::kMedium,
150     U_NAMESPACE_QUALIFIER DateFormat::kLong,
151     U_NAMESPACE_QUALIFIER DateFormat::kFull,
152 };
153 
154 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
155 
156 U_NAMESPACE_BEGIN
157 
158 // -------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)159 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
160 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
161 
162 //--------------------------------------------------------------------
163 
164 /**
165  * Convert a string to an unsigned decimal, ignoring rule whitespace.
166  * @return a non-negative number if successful, or a negative number
167  *         upon failure.
168  */
169 static int32_t stou(const UnicodeString& string) {
170     int32_t n = 0;
171     int32_t count = 0;
172     UChar32 c;
173     for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
174         c = string.char32At(i);
175         if (uprv_isRuleWhiteSpace(c)) {
176             continue;
177         }
178         int32_t d = u_digit(c, 10);
179         if (d < 0 || ++count > 10) {
180             return -1;
181         }
182         n = 10*n + d;
183     }
184     return n;
185 }
186 
187 /**
188  * Convert an integer value to a string and append the result to
189  * the given UnicodeString.
190  */
itos(int32_t i,UnicodeString & appendTo)191 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
192     UChar temp[16];
193     uprv_itou(temp,16,i,10,0); // 10 == radix
194     appendTo.append(temp);
195     return appendTo;
196 }
197 
198 /*
199  * A structure representing one subformat of this MessageFormat.
200  * Each subformat has a Format object, an offset into the plain
201  * pattern text fPattern, and an argument number.  The argument
202  * number corresponds to the array of arguments to be formatted.
203  * @internal
204  */
205 class MessageFormat::Subformat : public UMemory {
206 public:
207     /**
208      * @internal
209      */
210     Format* format; // formatter
211     /**
212      * @internal
213      */
214     int32_t offset; // offset into fPattern
215     /**
216      * @internal
217      */
218     // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
219     int32_t argNum;    // 0-based argument number
220     /**
221      * @internal
222      */
223     UnicodeString* argName; // argument name or number
224 
225     /**
226      * Clone that.format and assign it to this.format
227      * Do NOT delete this.format
228      * @internal
229      */
operator =(const Subformat & that)230     Subformat& operator=(const Subformat& that) {
231         if (this != &that) {
232             format = that.format ? that.format->clone() : NULL;
233             offset = that.offset;
234             argNum = that.argNum;
235             argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
236         }
237         return *this;
238     }
239 
240     /**
241      * @internal
242      */
operator ==(const Subformat & that) const243     UBool operator==(const Subformat& that) const {
244         // Do cheap comparisons first
245         return offset == that.offset &&
246                argNum == that.argNum &&
247                ((argName == that.argName) ||
248                 (*argName == *that.argName)) &&
249                ((format == that.format) || // handles NULL
250                 (*format == *that.format));
251     }
252 
253     /**
254      * @internal
255      */
operator !=(const Subformat & that) const256     UBool operator!=(const Subformat& that) const {
257         return !operator==(that);
258     }
259 };
260 
261 // -------------------------------------
262 // Creates a MessageFormat instance based on the pattern.
263 
MessageFormat(const UnicodeString & pattern,UErrorCode & success)264 MessageFormat::MessageFormat(const UnicodeString& pattern,
265                              UErrorCode& success)
266 : fLocale(Locale::getDefault()),  // Uses the default locale
267   formatAliases(NULL),
268   formatAliasesCapacity(0),
269   idStart(UCHAR_ID_START),
270   idContinue(UCHAR_ID_CONTINUE),
271   subformats(NULL),
272   subformatCount(0),
273   subformatCapacity(0),
274   argTypes(NULL),
275   argTypeCount(0),
276   argTypeCapacity(0),
277   isArgNumeric(TRUE),
278   defaultNumberFormat(NULL),
279   defaultDateFormat(NULL)
280 {
281     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
282         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
283         success = U_MEMORY_ALLOCATION_ERROR;
284         return;
285     }
286     applyPattern(pattern, success);
287     setLocaleIDs(fLocale.getName(), fLocale.getName());
288 }
289 
MessageFormat(const UnicodeString & pattern,const Locale & newLocale,UErrorCode & success)290 MessageFormat::MessageFormat(const UnicodeString& pattern,
291                              const Locale& newLocale,
292                              UErrorCode& success)
293 : fLocale(newLocale),
294   formatAliases(NULL),
295   formatAliasesCapacity(0),
296   idStart(UCHAR_ID_START),
297   idContinue(UCHAR_ID_CONTINUE),
298   subformats(NULL),
299   subformatCount(0),
300   subformatCapacity(0),
301   argTypes(NULL),
302   argTypeCount(0),
303   argTypeCapacity(0),
304   isArgNumeric(TRUE),
305   defaultNumberFormat(NULL),
306   defaultDateFormat(NULL)
307 {
308     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
309         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
310         success = U_MEMORY_ALLOCATION_ERROR;
311         return;
312     }
313     applyPattern(pattern, success);
314     setLocaleIDs(fLocale.getName(), fLocale.getName());
315 }
316 
MessageFormat(const UnicodeString & pattern,const Locale & newLocale,UParseError & parseError,UErrorCode & success)317 MessageFormat::MessageFormat(const UnicodeString& pattern,
318                              const Locale& newLocale,
319                              UParseError& parseError,
320                              UErrorCode& success)
321 : fLocale(newLocale),
322   formatAliases(NULL),
323   formatAliasesCapacity(0),
324   idStart(UCHAR_ID_START),
325   idContinue(UCHAR_ID_CONTINUE),
326   subformats(NULL),
327   subformatCount(0),
328   subformatCapacity(0),
329   argTypes(NULL),
330   argTypeCount(0),
331   argTypeCapacity(0),
332   isArgNumeric(TRUE),
333   defaultNumberFormat(NULL),
334   defaultDateFormat(NULL)
335 {
336     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
337         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
338         success = U_MEMORY_ALLOCATION_ERROR;
339         return;
340     }
341     applyPattern(pattern, parseError, success);
342     setLocaleIDs(fLocale.getName(), fLocale.getName());
343 }
344 
MessageFormat(const MessageFormat & that)345 MessageFormat::MessageFormat(const MessageFormat& that)
346 : Format(that),
347   formatAliases(NULL),
348   formatAliasesCapacity(0),
349   idStart(UCHAR_ID_START),
350   idContinue(UCHAR_ID_CONTINUE),
351   subformats(NULL),
352   subformatCount(0),
353   subformatCapacity(0),
354   argTypes(NULL),
355   argTypeCount(0),
356   argTypeCapacity(0),
357   isArgNumeric(TRUE),
358   defaultNumberFormat(NULL),
359   defaultDateFormat(NULL)
360 {
361     *this = that;
362 }
363 
~MessageFormat()364 MessageFormat::~MessageFormat()
365 {
366     int32_t idx;
367     for (idx = 0; idx < subformatCount; idx++) {
368         delete subformats[idx].format;
369         delete subformats[idx].argName;
370     }
371     uprv_free(subformats);
372     subformats = NULL;
373     subformatCount = subformatCapacity = 0;
374 
375     uprv_free(argTypes);
376     argTypes = NULL;
377     argTypeCount = argTypeCapacity = 0;
378 
379     uprv_free(formatAliases);
380 
381     delete defaultNumberFormat;
382     delete defaultDateFormat;
383 }
384 
385 //--------------------------------------------------------------------
386 // Variable-size array management
387 
388 /**
389  * Allocate subformats[] to at least the given capacity and return
390  * TRUE if successful.  If not, leave subformats[] unchanged.
391  *
392  * If subformats is NULL, allocate it.  If it is not NULL, enlarge it
393  * if necessary to be at least as large as specified.
394  */
allocateSubformats(int32_t capacity)395 UBool MessageFormat::allocateSubformats(int32_t capacity) {
396     if (subformats == NULL) {
397         subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
398         subformatCapacity = capacity;
399         subformatCount = 0;
400         if (subformats == NULL) {
401             subformatCapacity = 0;
402             return FALSE;
403         }
404     } else if (subformatCapacity < capacity) {
405         if (capacity < 2*subformatCapacity) {
406             capacity = 2*subformatCapacity;
407         }
408         Subformat* a = (Subformat*)
409             uprv_realloc(subformats, sizeof(*subformats) * capacity);
410         if (a == NULL) {
411             return FALSE; // request failed
412         }
413         subformats = a;
414         subformatCapacity = capacity;
415     }
416     return TRUE;
417 }
418 
419 /**
420  * Allocate argTypes[] to at least the given capacity and return
421  * TRUE if successful.  If not, leave argTypes[] unchanged.
422  *
423  * If argTypes is NULL, allocate it.  If it is not NULL, enlarge it
424  * if necessary to be at least as large as specified.
425  */
allocateArgTypes(int32_t capacity)426 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
427     if (argTypes == NULL) {
428         argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
429         argTypeCount = 0;
430         argTypeCapacity = capacity;
431         if (argTypes == NULL) {
432             argTypeCapacity = 0;
433             return FALSE;
434         }
435         for (int32_t i=0; i<capacity; ++i) {
436             argTypes[i] = Formattable::kString;
437         }
438     } else if (argTypeCapacity < capacity) {
439         if (capacity < 2*argTypeCapacity) {
440             capacity = 2*argTypeCapacity;
441         }
442         Formattable::Type* a = (Formattable::Type*)
443             uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
444         if (a == NULL) {
445             return FALSE; // request failed
446         }
447         for (int32_t i=argTypeCapacity; i<capacity; ++i) {
448             a[i] = Formattable::kString;
449         }
450         argTypes = a;
451         argTypeCapacity = capacity;
452     }
453     return TRUE;
454 }
455 
456 // -------------------------------------
457 // assignment operator
458 
459 const MessageFormat&
operator =(const MessageFormat & that)460 MessageFormat::operator=(const MessageFormat& that)
461 {
462     // Reallocate the arrays BEFORE changing this object
463     if (this != &that &&
464         allocateSubformats(that.subformatCount) &&
465         allocateArgTypes(that.argTypeCount)) {
466 
467         // Calls the super class for assignment first.
468         Format::operator=(that);
469 
470         fPattern = that.fPattern;
471         setLocale(that.fLocale);
472         isArgNumeric = that.isArgNumeric;
473         int32_t j;
474         for (j=0; j<subformatCount; ++j) {
475             delete subformats[j].format;
476         }
477         subformatCount = 0;
478 
479         for (j=0; j<that.subformatCount; ++j) {
480             // Subformat::operator= does NOT delete this.format
481             subformats[j] = that.subformats[j];
482         }
483         subformatCount = that.subformatCount;
484 
485         for (j=0; j<that.argTypeCount; ++j) {
486             argTypes[j] = that.argTypes[j];
487         }
488         argTypeCount = that.argTypeCount;
489     }
490     return *this;
491 }
492 
493 UBool
operator ==(const Format & rhs) const494 MessageFormat::operator==(const Format& rhs) const
495 {
496     if (this == &rhs) return TRUE;
497 
498     MessageFormat& that = (MessageFormat&)rhs;
499 
500     // Check class ID before checking MessageFormat members
501     if (!Format::operator==(rhs) ||
502         fPattern != that.fPattern ||
503         fLocale != that.fLocale ||
504         isArgNumeric != that.isArgNumeric) {
505         return FALSE;
506     }
507 
508     int32_t j;
509     for (j=0; j<subformatCount; ++j) {
510         if (subformats[j] != that.subformats[j]) {
511             return FALSE;
512         }
513     }
514 
515     return TRUE;
516 }
517 
518 // -------------------------------------
519 // Creates a copy of this MessageFormat, the caller owns the copy.
520 
521 Format*
clone() const522 MessageFormat::clone() const
523 {
524     return new MessageFormat(*this);
525 }
526 
527 // -------------------------------------
528 // Sets the locale of this MessageFormat object to theLocale.
529 
530 void
setLocale(const Locale & theLocale)531 MessageFormat::setLocale(const Locale& theLocale)
532 {
533     if (fLocale != theLocale) {
534         delete defaultNumberFormat;
535         defaultNumberFormat = NULL;
536         delete defaultDateFormat;
537         defaultDateFormat = NULL;
538     }
539     fLocale = theLocale;
540     setLocaleIDs(fLocale.getName(), fLocale.getName());
541 }
542 
543 // -------------------------------------
544 // Gets the locale of this MessageFormat object.
545 
546 const Locale&
getLocale() const547 MessageFormat::getLocale() const
548 {
549     return fLocale;
550 }
551 
552 
553 
554 
555 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)556 MessageFormat::applyPattern(const UnicodeString& newPattern,
557                             UErrorCode& status)
558 {
559     UParseError parseError;
560     applyPattern(newPattern,parseError,status);
561 }
562 
563 
564 // -------------------------------------
565 // Applies the new pattern and returns an error if the pattern
566 // is not correct.
567 void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & ec)568 MessageFormat::applyPattern(const UnicodeString& pattern,
569                             UParseError& parseError,
570                             UErrorCode& ec)
571 {
572     if(U_FAILURE(ec)) {
573         return;
574     }
575     // The pattern is broken up into segments.  Each time a subformat
576     // is encountered, 4 segments are recorded.  For example, consider
577     // the pattern:
578     //  "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
579     // The first set of segments is:
580     //  segments[0] = "There "
581     //  segments[1] = "0"
582     //  segments[2] = "choice"
583     //  segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
584 
585     // During parsing, the plain text is accumulated into segments[0].
586     // Segments 1..3 are used to parse each subpattern.  Each time a
587     // subpattern is parsed, it creates a format object that is stored
588     // in the subformats array, together with an offset and argument
589     // number.  The offset into the plain text stored in
590     // segments[0].
591 
592     // Quotes in segment 0 are handled normally.  They are removed.
593     // Quotes may not occur in segments 1 or 2.
594     // Quotes in segment 3 are parsed and _copied_.  This makes
595     //  subformat patterns work, e.g., {1,number,'#'.##} passes
596     //  the pattern "'#'.##" to DecimalFormat.
597 
598     UnicodeString segments[4];
599     int32_t part = 0; // segment we are in, 0..3
600     // Record the highest argument number in the pattern.  (In the
601     // subpattern {3,number} the argument number is 3.)
602     int32_t formatNumber = 0;
603     UBool inQuote = FALSE;
604     int32_t braceStack = 0;
605     // Clear error struct
606     parseError.offset = -1;
607     parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
608     int32_t patLen = pattern.length();
609     int32_t i;
610 
611     for (i=0; i<subformatCount; ++i) {
612         delete subformats[i].format;
613     }
614     subformatCount = 0;
615     argTypeCount = 0;
616 
617     for (i=0; i<patLen; ++i) {
618         UChar ch = pattern[i];
619         if (part == 0) {
620             // In segment 0, recognize and remove quotes
621             if (ch == SINGLE_QUOTE) {
622                 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
623                     segments[0] += ch;
624                     ++i;
625                 } else {
626                     inQuote = !inQuote;
627                 }
628             } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
629                 // The only way we get from segment 0 to 1 is via an
630                 // unquoted '{'.
631                 part = 1;
632             } else {
633                 segments[0] += ch;
634             }
635         } else if (inQuote) {
636             // In segments 1..3, recognize quoted matter, and copy it
637             // into the segment, together with the quotes.  This takes
638             // care of '' as well.
639             segments[part] += ch;
640             if (ch == SINGLE_QUOTE) {
641                 inQuote = FALSE;
642             }
643         } else {
644             // We have an unquoted character in segment 1..3
645             switch (ch) {
646             case COMMA:
647                 // Commas bump us to the next segment, except for segment 3,
648                 // which can contain commas.  See example above.
649                 if (part < 3)
650                     part += 1;
651                 else
652                     segments[3] += ch;
653                 break;
654             case LEFT_CURLY_BRACE:
655                 // Handle '{' within segment 3.  The initial '{'
656                 // before segment 1 is handled above.
657                 if (part != 3) {
658                     ec = U_PATTERN_SYNTAX_ERROR;
659                     goto SYNTAX_ERROR;
660                 }
661                 ++braceStack;
662                 segments[part] += ch;
663                 break;
664             case RIGHT_CURLY_BRACE:
665                 if (braceStack == 0) {
666                     makeFormat(formatNumber, segments, parseError,ec);
667                     if (U_FAILURE(ec)){
668                         goto SYNTAX_ERROR;
669                     }
670                     formatNumber++;
671 
672                     segments[1].remove();
673                     segments[2].remove();
674                     segments[3].remove();
675                     part = 0;
676                 } else {
677                     --braceStack;
678                     segments[part] += ch;
679                 }
680                 break;
681             case SINGLE_QUOTE:
682                 inQuote = TRUE;
683                 // fall through (copy quote chars in segments 1..3)
684             default:
685                 segments[part] += ch;
686                 break;
687             }
688         }
689     }
690     if (braceStack != 0 || part != 0) {
691         // Unmatched braces in the pattern
692         ec = U_UNMATCHED_BRACES;
693         goto SYNTAX_ERROR;
694     }
695     fPattern = segments[0];
696     return;
697 
698  SYNTAX_ERROR:
699     syntaxError(pattern, i, parseError);
700     for (i=0; i<subformatCount; ++i) {
701         delete subformats[i].format;
702     }
703     argTypeCount = subformatCount = 0;
704 }
705 // -------------------------------------
706 // Converts this MessageFormat instance to a pattern.
707 
708 UnicodeString&
toPattern(UnicodeString & appendTo) const709 MessageFormat::toPattern(UnicodeString& appendTo) const {
710     // later, make this more extensible
711     int32_t lastOffset = 0;
712     int32_t i;
713     for (i=0; i<subformatCount; ++i) {
714         copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
715         lastOffset = subformats[i].offset;
716         appendTo += LEFT_CURLY_BRACE;
717         if (isArgNumeric) {
718             itos(subformats[i].argNum, appendTo);
719         }
720         else {
721             appendTo += *subformats[i].argName;
722         }
723         Format* fmt = subformats[i].format;
724         DecimalFormat* decfmt;
725         SimpleDateFormat* sdtfmt;
726         ChoiceFormat* chcfmt;
727         PluralFormat* plfmt;
728         SelectFormat* selfmt;
729         if (fmt == NULL) {
730             // do nothing, string format
731         }
732         else if ((decfmt = dynamic_cast<DecimalFormat*>(fmt)) != NULL) {
733             UErrorCode ec = U_ZERO_ERROR;
734             NumberFormat& formatAlias = *decfmt;
735             NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
736             NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
737             NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
738             NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
739 
740             appendTo += COMMA;
741             appendTo += ID_NUMBER;
742             if (formatAlias != *defaultTemplate) {
743                 appendTo += COMMA;
744                 if (formatAlias == *currencyTemplate) {
745                     appendTo += ID_CURRENCY;
746                 }
747                 else if (formatAlias == *percentTemplate) {
748                     appendTo += ID_PERCENT;
749                 }
750                 else if (formatAlias == *integerTemplate) {
751                     appendTo += ID_INTEGER;
752                 }
753                 else {
754                     UnicodeString buffer;
755                     appendTo += decfmt->toPattern(buffer);
756                 }
757             }
758 
759             delete defaultTemplate;
760             delete currencyTemplate;
761             delete percentTemplate;
762             delete integerTemplate;
763         }
764         else if ((sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt)) != NULL) {
765             DateFormat& formatAlias = *sdtfmt;
766             DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
767             DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
768             DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
769             DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
770             DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
771             DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
772             DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
773             DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
774 
775 
776             appendTo += COMMA;
777             if (formatAlias == *defaultDateTemplate) {
778                 // default is medium. no need to handle medium separately.
779                 appendTo += ID_DATE;
780             }
781             else if (formatAlias == *shortDateTemplate) {
782                 appendTo += ID_DATE;
783                 appendTo += COMMA;
784                 appendTo += ID_SHORT;
785             }
786             else if (formatAlias == *longDateTemplate) {
787                 appendTo += ID_DATE;
788                 appendTo += COMMA;
789                 appendTo += ID_LONG;
790             }
791             else if (formatAlias == *fullDateTemplate) {
792                 appendTo += ID_DATE;
793                 appendTo += COMMA;
794                 appendTo += ID_FULL;
795             }
796             else if (formatAlias == *defaultTimeTemplate) {
797                 // default is medium. no need to handle medium separately.
798                 appendTo += ID_TIME;
799             }
800             else if (formatAlias == *shortTimeTemplate) {
801                 appendTo += ID_TIME;
802                 appendTo += COMMA;
803                 appendTo += ID_SHORT;
804             }
805             else if (formatAlias == *longTimeTemplate) {
806                 appendTo += ID_TIME;
807                 appendTo += COMMA;
808                 appendTo += ID_LONG;
809             }
810             else if (formatAlias == *fullTimeTemplate) {
811                 appendTo += ID_TIME;
812                 appendTo += COMMA;
813                 appendTo += ID_FULL;
814             }
815             else {
816                 UnicodeString buffer;
817                 appendTo += ID_DATE;
818                 appendTo += COMMA;
819                 appendTo += sdtfmt->toPattern(buffer);
820             }
821 
822             delete defaultDateTemplate;
823             delete shortDateTemplate;
824             delete longDateTemplate;
825             delete fullDateTemplate;
826             delete defaultTimeTemplate;
827             delete shortTimeTemplate;
828             delete longTimeTemplate;
829             delete fullTimeTemplate;
830             // {sfb} there should be a more efficient way to do this!
831         }
832         else if ((chcfmt = dynamic_cast<ChoiceFormat*>(fmt)) != NULL) {
833             UnicodeString buffer;
834             appendTo += COMMA;
835             appendTo += ID_CHOICE;
836             appendTo += COMMA;
837             appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
838         }
839         else if ((plfmt = dynamic_cast<PluralFormat*>(fmt)) != NULL) {
840             UnicodeString buffer;
841             appendTo += plfmt->toPattern(buffer);
842         }
843         else if ((selfmt = dynamic_cast<SelectFormat*>(fmt)) != NULL) {
844             UnicodeString buffer;
845             appendTo += ((SelectFormat*)fmt)->toPattern(buffer);
846         }
847         else {
848             //appendTo += ", unknown";
849         }
850         appendTo += RIGHT_CURLY_BRACE;
851     }
852     copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
853     return appendTo;
854 }
855 
856 // -------------------------------------
857 // Adopts the new formats array and updates the array count.
858 // This MessageFormat instance owns the new formats.
859 
860 void
adoptFormats(Format ** newFormats,int32_t count)861 MessageFormat::adoptFormats(Format** newFormats,
862                             int32_t count) {
863     if (newFormats == NULL || count < 0) {
864         return;
865     }
866 
867     int32_t i;
868     if (allocateSubformats(count)) {
869         for (i=0; i<subformatCount; ++i) {
870             delete subformats[i].format;
871         }
872         for (i=0; i<count; ++i) {
873             subformats[i].format = newFormats[i];
874         }
875         subformatCount = count;
876     } else {
877         // An adopt method must always take ownership.  Delete
878         // the incoming format objects and return unchanged.
879         for (i=0; i<count; ++i) {
880             delete newFormats[i];
881         }
882     }
883 
884     // TODO: What about the .offset and .argNum fields?
885 }
886 
887 // -------------------------------------
888 // Sets the new formats array and updates the array count.
889 // This MessageFormat instance maks a copy of the new formats.
890 
891 void
setFormats(const Format ** newFormats,int32_t count)892 MessageFormat::setFormats(const Format** newFormats,
893                           int32_t count) {
894     if (newFormats == NULL || count < 0) {
895         return;
896     }
897 
898     if (allocateSubformats(count)) {
899         int32_t i;
900         for (i=0; i<subformatCount; ++i) {
901             delete subformats[i].format;
902         }
903         subformatCount = 0;
904 
905         for (i=0; i<count; ++i) {
906             subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
907         }
908         subformatCount = count;
909     }
910 
911     // TODO: What about the .offset and .arg fields?
912 }
913 
914 // -------------------------------------
915 // Adopt a single format by format number.
916 // Do nothing if the format number is not less than the array count.
917 
918 void
adoptFormat(int32_t n,Format * newFormat)919 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
920     if (n < 0 || n >= subformatCount) {
921         delete newFormat;
922     } else {
923         delete subformats[n].format;
924         subformats[n].format = newFormat;
925     }
926 }
927 
928 // -------------------------------------
929 // Adopt a single format by format name.
930 // Do nothing if there is no match of formatName.
931 void
adoptFormat(const UnicodeString & formatName,Format * formatToAdopt,UErrorCode & status)932 MessageFormat::adoptFormat(const UnicodeString& formatName,
933                            Format* formatToAdopt,
934                            UErrorCode& status) {
935     if (isArgNumeric ) {
936         int32_t argumentNumber = stou(formatName);
937         if (argumentNumber<0) {
938             status = U_ARGUMENT_TYPE_MISMATCH;
939             return;
940         }
941         adoptFormat(argumentNumber, formatToAdopt);
942         return;
943     }
944     for (int32_t i=0; i<subformatCount; ++i) {
945         if (formatName==*subformats[i].argName) {
946             delete subformats[i].format;
947             if ( formatToAdopt== NULL) {
948                 // This should never happen -- but we'll be nice if it does
949                 subformats[i].format = NULL;
950             } else {
951                 subformats[i].format = formatToAdopt;
952             }
953         }
954     }
955 }
956 
957 // -------------------------------------
958 // Set a single format.
959 // Do nothing if the variable is not less than the array count.
960 
961 void
setFormat(int32_t n,const Format & newFormat)962 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
963     if (n >= 0 && n < subformatCount) {
964         delete subformats[n].format;
965         if (&newFormat == NULL) {
966             // This should never happen -- but we'll be nice if it does
967             subformats[n].format = NULL;
968         } else {
969             subformats[n].format = newFormat.clone();
970         }
971     }
972 }
973 
974 // -------------------------------------
975 // Get a single format by format name.
976 // Do nothing if the variable is not less than the array count.
977 Format *
getFormat(const UnicodeString & formatName,UErrorCode & status)978 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
979 
980     if (U_FAILURE(status)) return NULL;
981 
982     if (isArgNumeric ) {
983         int32_t argumentNumber = stou(formatName);
984         if (argumentNumber<0) {
985             status = U_ARGUMENT_TYPE_MISMATCH;
986             return NULL;
987         }
988         if (argumentNumber < 0 || argumentNumber >= subformatCount) {
989             return subformats[argumentNumber].format;
990         }
991         else {
992             return NULL;
993         }
994     }
995 
996     for (int32_t i=0; i<subformatCount; ++i) {
997         if (formatName==*subformats[i].argName)
998         {
999             return subformats[i].format;
1000         }
1001     }
1002     return NULL;
1003 }
1004 
1005 // -------------------------------------
1006 // Set a single format by format name
1007 // Do nothing if the variable is not less than the array count.
1008 void
setFormat(const UnicodeString & formatName,const Format & newFormat,UErrorCode & status)1009 MessageFormat::setFormat(const UnicodeString& formatName,
1010                          const Format& newFormat,
1011                          UErrorCode& status) {
1012     if (isArgNumeric) {
1013         status = U_ARGUMENT_TYPE_MISMATCH;
1014         return;
1015     }
1016     for (int32_t i=0; i<subformatCount; ++i) {
1017         if (formatName==*subformats[i].argName)
1018         {
1019             delete subformats[i].format;
1020             if (&newFormat == NULL) {
1021                 // This should never happen -- but we'll be nice if it does
1022                 subformats[i].format = NULL;
1023             } else {
1024                 subformats[i].format = newFormat.clone();
1025             }
1026             break;
1027         }
1028     }
1029 }
1030 
1031 // -------------------------------------
1032 // Gets the format array.
1033 
1034 const Format**
getFormats(int32_t & cnt) const1035 MessageFormat::getFormats(int32_t& cnt) const
1036 {
1037     // This old API returns an array (which we hold) of Format*
1038     // pointers.  The array is valid up to the next call to any
1039     // method on this object.  We construct and resize an array
1040     // on demand that contains aliases to the subformats[i].format
1041     // pointers.
1042     MessageFormat* t = (MessageFormat*) this;
1043     cnt = 0;
1044     if (formatAliases == NULL) {
1045         t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
1046         Format** a = (Format**)
1047             uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
1048         if (a == NULL) {
1049             return NULL;
1050         }
1051         t->formatAliases = a;
1052     } else if (subformatCount > formatAliasesCapacity) {
1053         Format** a = (Format**)
1054             uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
1055         if (a == NULL) {
1056             return NULL;
1057         }
1058         t->formatAliases = a;
1059         t->formatAliasesCapacity = subformatCount;
1060     }
1061     for (int32_t i=0; i<subformatCount; ++i) {
1062         t->formatAliases[i] = subformats[i].format;
1063     }
1064     cnt = subformatCount;
1065     return (const Format**)formatAliases;
1066 }
1067 
1068 
1069 StringEnumeration*
getFormatNames(UErrorCode & status)1070 MessageFormat::getFormatNames(UErrorCode& status) {
1071     if (U_FAILURE(status))  return NULL;
1072 
1073     if (isArgNumeric) {
1074         status = U_ARGUMENT_TYPE_MISMATCH;
1075         return NULL;
1076     }
1077     UVector *fFormatNames = new UVector(status);
1078     if (U_FAILURE(status)) {
1079         status = U_MEMORY_ALLOCATION_ERROR;
1080         return NULL;
1081     }
1082     for (int32_t i=0; i<subformatCount; ++i) {
1083         fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
1084     }
1085 
1086     StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
1087     return nameEnumerator;
1088 }
1089 
1090 // -------------------------------------
1091 // Formats the source Formattable array and copy into the result buffer.
1092 // Ignore the FieldPosition result for error checking.
1093 
1094 UnicodeString&
format(const Formattable * source,int32_t cnt,UnicodeString & appendTo,FieldPosition & ignore,UErrorCode & success) const1095 MessageFormat::format(const Formattable* source,
1096                       int32_t cnt,
1097                       UnicodeString& appendTo,
1098                       FieldPosition& ignore,
1099                       UErrorCode& success) const
1100 {
1101     if (U_FAILURE(success))
1102         return appendTo;
1103 
1104     return format(source, cnt, appendTo, ignore, 0, success);
1105 }
1106 
1107 // -------------------------------------
1108 // Internally creates a MessageFormat instance based on the
1109 // pattern and formats the arguments Formattable array and
1110 // copy into the appendTo buffer.
1111 
1112 UnicodeString&
format(const UnicodeString & pattern,const Formattable * arguments,int32_t cnt,UnicodeString & appendTo,UErrorCode & success)1113 MessageFormat::format(  const UnicodeString& pattern,
1114                         const Formattable* arguments,
1115                         int32_t cnt,
1116                         UnicodeString& appendTo,
1117                         UErrorCode& success)
1118 {
1119     MessageFormat temp(pattern, success);
1120     FieldPosition ignore(0);
1121     temp.format(arguments, cnt, appendTo, ignore, success);
1122     return appendTo;
1123 }
1124 
1125 // -------------------------------------
1126 // Formats the source Formattable object and copy into the
1127 // appendTo buffer.  The Formattable object must be an array
1128 // of Formattable instances, returns error otherwise.
1129 
1130 UnicodeString&
format(const Formattable & source,UnicodeString & appendTo,FieldPosition & ignore,UErrorCode & success) const1131 MessageFormat::format(const Formattable& source,
1132                       UnicodeString& appendTo,
1133                       FieldPosition& ignore,
1134                       UErrorCode& success) const
1135 {
1136     int32_t cnt;
1137 
1138     if (U_FAILURE(success))
1139         return appendTo;
1140     if (source.getType() != Formattable::kArray) {
1141         success = U_ILLEGAL_ARGUMENT_ERROR;
1142         return appendTo;
1143     }
1144     const Formattable* tmpPtr = source.getArray(cnt);
1145 
1146     return format(tmpPtr, cnt, appendTo, ignore, 0, success);
1147 }
1148 
1149 
1150 UnicodeString&
format(const UnicodeString * argumentNames,const Formattable * arguments,int32_t count,UnicodeString & appendTo,UErrorCode & success) const1151 MessageFormat::format(const UnicodeString* argumentNames,
1152                       const Formattable* arguments,
1153                       int32_t count,
1154                       UnicodeString& appendTo,
1155                       UErrorCode& success) const {
1156     FieldPosition ignore(0);
1157     return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
1158 }
1159 
1160 UnicodeString&
format(const Formattable * arguments,int32_t cnt,UnicodeString & appendTo,FieldPosition & status,int32_t recursionProtection,UErrorCode & success) const1161 MessageFormat::format(const Formattable* arguments,
1162                       int32_t cnt,
1163                       UnicodeString& appendTo,
1164                       FieldPosition& status,
1165                       int32_t recursionProtection,
1166                       UErrorCode& success) const
1167 {
1168     return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1169 }
1170 
1171 // -------------------------------------
1172 // Formats the arguments Formattable array and copy into the appendTo buffer.
1173 // Ignore the FieldPosition result for error checking.
1174 
1175 UnicodeString&
format(const Formattable * arguments,const UnicodeString * argumentNames,int32_t cnt,UnicodeString & appendTo,FieldPosition & status,int32_t recursionProtection,UErrorCode & success) const1176 MessageFormat::format(const Formattable* arguments,
1177                       const UnicodeString *argumentNames,
1178                       int32_t cnt,
1179                       UnicodeString& appendTo,
1180                       FieldPosition& status,
1181                       int32_t recursionProtection,
1182                       UErrorCode& success) const
1183 {
1184     int32_t lastOffset = 0;
1185     int32_t argumentNumber=0;
1186     if (cnt < 0 || (cnt && arguments == NULL)) {
1187         success = U_ILLEGAL_ARGUMENT_ERROR;
1188         return appendTo;
1189     }
1190 
1191     if ( !isArgNumeric && argumentNames== NULL ) {
1192         success = U_ILLEGAL_ARGUMENT_ERROR;
1193         return appendTo;
1194     }
1195 
1196     const Formattable *obj=NULL;
1197     for (int32_t i=0; i<subformatCount; ++i) {
1198         // Append the prefix of current format element.
1199         appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
1200         lastOffset = subformats[i].offset;
1201         obj = NULL;
1202         if (isArgNumeric) {
1203             argumentNumber = subformats[i].argNum;
1204 
1205             // Checks the scope of the argument number.
1206             if (argumentNumber >= cnt) {
1207                 appendTo += LEFT_CURLY_BRACE;
1208                 itos(argumentNumber, appendTo);
1209                 appendTo += RIGHT_CURLY_BRACE;
1210                 continue;
1211             }
1212             obj = arguments+argumentNumber;
1213         }
1214         else {
1215             for (int32_t j=0; j<cnt; ++j) {
1216                 if (argumentNames[j]== *subformats[i].argName ) {
1217                     obj = arguments+j;
1218                     break;
1219                 }
1220             }
1221             if (obj == NULL ) {
1222                 appendTo += LEFT_CURLY_BRACE;
1223                 appendTo += *subformats[i].argName;
1224                 appendTo += RIGHT_CURLY_BRACE;
1225                 continue;
1226 
1227             }
1228         }
1229         Formattable::Type type = obj->getType();
1230 
1231         // Recursively calling the format process only if the current
1232         // format argument refers to either of the following:
1233         // a ChoiceFormat object, a PluralFormat object, a SelectFormat object.
1234         Format* fmt = subformats[i].format;
1235         if (fmt != NULL) {
1236             UnicodeString argNum;
1237             fmt->format(*obj, argNum, success);
1238 
1239             // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the
1240             // MessageFormat pattern application.
1241             if ((dynamic_cast<ChoiceFormat*>(fmt) != NULL ||
1242                  dynamic_cast<PluralFormat*>(fmt) != NULL ||
1243                  dynamic_cast<SelectFormat*>(fmt) != NULL) &&
1244                 argNum.indexOf(LEFT_CURLY_BRACE) >= 0
1245             ) {
1246                 MessageFormat temp(argNum, fLocale, success);
1247                 // TODO: Implement recursion protection
1248                 if ( isArgNumeric ) {
1249                     temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1250                 }
1251                 else {
1252                     temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
1253                 }
1254                 if (U_FAILURE(success)) {
1255                     return appendTo;
1256                 }
1257             }
1258             else {
1259                 appendTo += argNum;
1260             }
1261         }
1262         // If the obj data type is a number, use a NumberFormat instance.
1263         else if ((type == Formattable::kDouble) ||
1264                  (type == Formattable::kLong) ||
1265                  (type == Formattable::kInt64)) {
1266 
1267             const NumberFormat* nf = getDefaultNumberFormat(success);
1268             if (nf == NULL) {
1269                 return appendTo;
1270             }
1271             if (type == Formattable::kDouble) {
1272                 nf->format(obj->getDouble(), appendTo);
1273             } else if (type == Formattable::kLong) {
1274                 nf->format(obj->getLong(), appendTo);
1275             } else {
1276                 nf->format(obj->getInt64(), appendTo);
1277             }
1278         }
1279         // If the obj data type is a Date instance, use a DateFormat instance.
1280         else if (type == Formattable::kDate) {
1281             const DateFormat* df = getDefaultDateFormat(success);
1282             if (df == NULL) {
1283                 return appendTo;
1284             }
1285             df->format(obj->getDate(), appendTo);
1286         }
1287         else if (type == Formattable::kString) {
1288             appendTo += obj->getString();
1289         }
1290         else {
1291             success = U_ILLEGAL_ARGUMENT_ERROR;
1292             return appendTo;
1293         }
1294     }
1295     // Appends the rest of the pattern characters after the real last offset.
1296     appendTo.append(fPattern, lastOffset, 0x7fffffff);
1297     return appendTo;
1298 }
1299 
1300 
1301 // -------------------------------------
1302 // Parses the source pattern and returns the Formattable objects array,
1303 // the array count and the ending parse position.  The caller of this method
1304 // owns the array.
1305 
1306 Formattable*
parse(const UnicodeString & source,ParsePosition & pos,int32_t & count) const1307 MessageFormat::parse(const UnicodeString& source,
1308                      ParsePosition& pos,
1309                      int32_t& count) const
1310 {
1311     // Allocate at least one element.  Allocating an array of length
1312     // zero causes problems on some platforms (e.g. Win32).
1313     Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
1314     int32_t patternOffset = 0;
1315     int32_t sourceOffset = pos.getIndex();
1316     ParsePosition tempPos(0);
1317     count = 0; // {sfb} reset to zero
1318     int32_t len;
1319     // If resultArray could not be created, exit out.
1320     // Avoid crossing initialization of variables above.
1321     if (resultArray == NULL) {
1322         goto PARSE_ERROR;
1323     }
1324     for (int32_t i = 0; i < subformatCount; ++i) {
1325         // match up to format
1326         len = subformats[i].offset - patternOffset;
1327         if (len == 0 ||
1328             fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1329             sourceOffset += len;
1330             patternOffset += len;
1331         }
1332         else {
1333             goto PARSE_ERROR;
1334         }
1335 
1336         // now use format
1337         Format* fmt = subformats[i].format;
1338         int32_t argNum = subformats[i].argNum;
1339         if (fmt == NULL) {   // string format
1340             // if at end, use longest possible match
1341             // otherwise uses first match to intervening string
1342             // does NOT recursively try all possibilities
1343             int32_t tempLength = (i+1<subformatCount) ?
1344                 subformats[i+1].offset : fPattern.length();
1345 
1346             int32_t next;
1347             if (patternOffset >= tempLength) {
1348                 next = source.length();
1349             }
1350             else {
1351                 UnicodeString buffer;
1352                 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
1353                 next = source.indexOf(buffer, sourceOffset);
1354             }
1355 
1356             if (next < 0) {
1357                 goto PARSE_ERROR;
1358             }
1359             else {
1360                 UnicodeString buffer;
1361                 source.extract(sourceOffset,next - sourceOffset, buffer);
1362                 UnicodeString strValue = buffer;
1363                 UnicodeString temp(LEFT_CURLY_BRACE);
1364                 // {sfb} check this later
1365                 if (isArgNumeric) {
1366                     itos(argNum, temp);
1367                 }
1368                 else {
1369                     temp+=(*subformats[i].argName);
1370                 }
1371                 temp += RIGHT_CURLY_BRACE;
1372                 if (strValue != temp) {
1373                     source.extract(sourceOffset,next - sourceOffset, buffer);
1374                     resultArray[argNum].setString(buffer);
1375                     // {sfb} not sure about this
1376                     if ((argNum + 1) > count) {
1377                         count = argNum + 1;
1378                     }
1379                 }
1380                 sourceOffset = next;
1381             }
1382         }
1383         else {
1384             tempPos.setIndex(sourceOffset);
1385             fmt->parseObject(source, resultArray[argNum], tempPos);
1386             if (tempPos.getIndex() == sourceOffset) {
1387                 goto PARSE_ERROR;
1388             }
1389 
1390             if ((argNum + 1) > count) {
1391                 count = argNum + 1;
1392             }
1393             sourceOffset = tempPos.getIndex(); // update
1394         }
1395     }
1396     len = fPattern.length() - patternOffset;
1397     if (len == 0 ||
1398         fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1399         pos.setIndex(sourceOffset + len);
1400         return resultArray;
1401     }
1402     // else fall through...
1403 
1404  PARSE_ERROR:
1405     pos.setErrorIndex(sourceOffset);
1406     delete [] resultArray;
1407     count = 0;
1408     return NULL; // leave index as is to signal error
1409 }
1410 
1411 // -------------------------------------
1412 // Parses the source string and returns the array of
1413 // Formattable objects and the array count.  The caller
1414 // owns the returned array.
1415 
1416 Formattable*
parse(const UnicodeString & source,int32_t & cnt,UErrorCode & success) const1417 MessageFormat::parse(const UnicodeString& source,
1418                      int32_t& cnt,
1419                      UErrorCode& success) const
1420 {
1421     if (!isArgNumeric ) {
1422         success = U_ARGUMENT_TYPE_MISMATCH;
1423         return NULL;
1424     }
1425     ParsePosition status(0);
1426     // Calls the actual implementation method and starts
1427     // from zero offset of the source text.
1428     Formattable* result = parse(source, status, cnt);
1429     if (status.getIndex() == 0) {
1430         success = U_MESSAGE_PARSE_ERROR;
1431         delete[] result;
1432         return NULL;
1433     }
1434     return result;
1435 }
1436 
1437 // -------------------------------------
1438 // Parses the source text and copy into the result buffer.
1439 
1440 void
parseObject(const UnicodeString & source,Formattable & result,ParsePosition & status) const1441 MessageFormat::parseObject( const UnicodeString& source,
1442                             Formattable& result,
1443                             ParsePosition& status) const
1444 {
1445     int32_t cnt = 0;
1446     Formattable* tmpResult = parse(source, status, cnt);
1447     if (tmpResult != NULL)
1448         result.adoptArray(tmpResult, cnt);
1449 }
1450 
1451 UnicodeString
autoQuoteApostrophe(const UnicodeString & pattern,UErrorCode & status)1452 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1453   UnicodeString result;
1454   if (U_SUCCESS(status)) {
1455     int32_t plen = pattern.length();
1456     const UChar* pat = pattern.getBuffer();
1457     int32_t blen = plen * 2 + 1; // space for null termination, convenience
1458     UChar* buf = result.getBuffer(blen);
1459     if (buf == NULL) {
1460       status = U_MEMORY_ALLOCATION_ERROR;
1461     } else {
1462       int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1463       result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1464     }
1465   }
1466   if (U_FAILURE(status)) {
1467     result.setToBogus();
1468   }
1469   return result;
1470 }
1471 
1472 // -------------------------------------
1473 
makeRBNF(URBNFRuleSetTag tag,const Locale & locale,const UnicodeString & defaultRuleSet,UErrorCode & ec)1474 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1475     RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1476     if (fmt == NULL) {
1477         ec = U_MEMORY_ALLOCATION_ERROR;
1478     } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1479         UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1480         fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1481     }
1482     return fmt;
1483 }
1484 
1485 /**
1486  * Reads the segments[] array (see applyPattern()) and parses the
1487  * segments[1..3] into a Format* object.  Stores the format object in
1488  * the subformats[] array.  Updates the argTypes[] array type
1489  * information for the corresponding argument.
1490  *
1491  * @param formatNumber index into subformats[] for this format
1492  * @param segments array of strings with the parsed pattern segments
1493  * @param parseError parse error data (output param)
1494  * @param ec error code
1495  */
1496 void
makeFormat(int32_t formatNumber,UnicodeString * segments,UParseError & parseError,UErrorCode & ec)1497 MessageFormat::makeFormat(int32_t formatNumber,
1498                           UnicodeString* segments,
1499                           UParseError& parseError,
1500                           UErrorCode& ec) {
1501     if (U_FAILURE(ec)) {
1502         return;
1503     }
1504 
1505     // Parse the argument number
1506     int32_t argumentNumber = stou(segments[1]); // always unlocalized!
1507     UnicodeString argumentName;
1508     if (argumentNumber < 0) {
1509         if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
1510             ec = U_INVALID_FORMAT_ERROR;
1511             return;
1512         }
1513         isArgNumeric = FALSE;
1514         argumentNumber=formatNumber;
1515     }
1516     if (!isArgNumeric) {
1517         if ( !isLegalArgName(segments[1]) ) {
1518             ec = U_INVALID_FORMAT_ERROR;
1519             return;
1520         }
1521         argumentName = segments[1];
1522     }
1523 
1524     // Parse the format, recording the argument type and creating a
1525     // new Format object (except for string arguments).
1526     Formattable::Type argType;
1527     Format *fmt = NULL;
1528     int32_t typeID, styleID;
1529     DateFormat::EStyle style;
1530     UnicodeString unquotedPattern, quotedPattern;
1531     UBool inQuote = FALSE;
1532 
1533     switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
1534 
1535     case 0: // string
1536         argType = Formattable::kString;
1537         break;
1538 
1539     case 1: // number
1540         argType = Formattable::kDouble;
1541 
1542         switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
1543         case 0: // default
1544             fmt = NumberFormat::createInstance(fLocale, ec);
1545             break;
1546         case 1: // currency
1547             fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1548             break;
1549         case 2: // percent
1550             fmt = NumberFormat::createPercentInstance(fLocale, ec);
1551             break;
1552         case 3: // integer
1553             argType = Formattable::kLong;
1554             fmt = createIntegerFormat(fLocale, ec);
1555             break;
1556         default: // pattern
1557             fmt = NumberFormat::createInstance(fLocale, ec);
1558             if (fmt) {
1559                 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
1560                 if (decfmt != NULL) {
1561                     decfmt->applyPattern(segments[3],parseError,ec);
1562                 }
1563             }
1564             break;
1565         }
1566         break;
1567 
1568     case 2: // date
1569     case 3: // time
1570         argType = Formattable::kDate;
1571         styleID = findKeyword(segments[3], DATE_STYLE_IDS);
1572         style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1573 
1574         if (typeID == 2) {
1575             fmt = DateFormat::createDateInstance(style, fLocale);
1576         } else {
1577             fmt = DateFormat::createTimeInstance(style, fLocale);
1578         }
1579 
1580         if (styleID < 0 && fmt != NULL) {
1581             SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
1582             if (sdtfmt != NULL) {
1583                 sdtfmt->applyPattern(segments[3]);
1584             }
1585         }
1586         break;
1587 
1588     case 4: // choice
1589         argType = Formattable::kDouble;
1590 
1591         fmt = new ChoiceFormat(segments[3], parseError, ec);
1592         break;
1593 
1594     case 5: // spellout
1595         argType = Formattable::kDouble;
1596         fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
1597         break;
1598     case 6: // ordinal
1599         argType = Formattable::kDouble;
1600         fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
1601         break;
1602     case 7: // duration
1603         argType = Formattable::kDouble;
1604         fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
1605         break;
1606     case 8: // plural
1607     case 9: // Select
1608         if(typeID == 8)
1609             argType = Formattable::kDouble;
1610         else
1611             argType = Formattable::kString;
1612         quotedPattern = segments[3];
1613         for (int32_t i = 0; i < quotedPattern.length(); ++i) {
1614             UChar ch = quotedPattern.charAt(i);
1615             if (ch == SINGLE_QUOTE) {
1616                 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
1617                     unquotedPattern+=ch;
1618                     ++i;
1619                 }
1620                 else {
1621                     inQuote = !inQuote;
1622                 }
1623             }
1624             else {
1625                 unquotedPattern += ch;
1626             }
1627         }
1628         if(typeID == 8)
1629             fmt = new PluralFormat(fLocale, unquotedPattern, ec);
1630         else
1631             fmt = new SelectFormat(unquotedPattern, ec);
1632         break;
1633     default:
1634         argType = Formattable::kString;
1635         ec = U_ILLEGAL_ARGUMENT_ERROR;
1636         break;
1637     }
1638 
1639     if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
1640         ec = U_MEMORY_ALLOCATION_ERROR;
1641     }
1642 
1643     if (!allocateSubformats(formatNumber+1) ||
1644         !allocateArgTypes(argumentNumber+1)) {
1645         ec = U_MEMORY_ALLOCATION_ERROR;
1646     }
1647 
1648     if (U_FAILURE(ec)) {
1649         delete fmt;
1650         return;
1651     }
1652 
1653     // Parse succeeded; record results in our arrays
1654     subformats[formatNumber].format = fmt;
1655     subformats[formatNumber].offset = segments[0].length();
1656     if (isArgNumeric) {
1657         subformats[formatNumber].argName = NULL;
1658         subformats[formatNumber].argNum = argumentNumber;
1659     }
1660     else {
1661         subformats[formatNumber].argName = new UnicodeString(argumentName);
1662         subformats[formatNumber].argNum = -1;
1663     }
1664     subformatCount = formatNumber+1;
1665 
1666     // Careful here: argumentNumber may in general arrive out of
1667     // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1668     argTypes[argumentNumber] = argType;
1669     if (argumentNumber+1 > argTypeCount) {
1670         argTypeCount = argumentNumber+1;
1671     }
1672 }
1673 
1674 // -------------------------------------
1675 // Finds the string, s, in the string array, list.
findKeyword(const UnicodeString & s,const UChar * const * list)1676 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1677                                    const UChar * const *list)
1678 {
1679     if (s.length() == 0)
1680         return 0; // default
1681 
1682     UnicodeString buffer = s;
1683     // Trims the space characters and turns all characters
1684     // in s to lower case.
1685     buffer.trim().toLower("");
1686     for (int32_t i = 0; list[i]; ++i) {
1687         if (!buffer.compare(list[i], u_strlen(list[i]))) {
1688             return i;
1689         }
1690     }
1691     return -1;
1692 }
1693 
1694 // -------------------------------------
1695 // Checks the range of the source text to quote the special
1696 // characters, { and ' and copy to target buffer.
1697 
1698 void
copyAndFixQuotes(const UnicodeString & source,int32_t start,int32_t end,UnicodeString & appendTo)1699 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
1700                                 int32_t start,
1701                                 int32_t end,
1702                                 UnicodeString& appendTo)
1703 {
1704     UBool gotLB = FALSE;
1705 
1706     for (int32_t i = start; i < end; ++i) {
1707         UChar ch = source[i];
1708         if (ch == LEFT_CURLY_BRACE) {
1709             appendTo += SINGLE_QUOTE;
1710             appendTo += LEFT_CURLY_BRACE;
1711             appendTo += SINGLE_QUOTE;
1712             gotLB = TRUE;
1713         }
1714         else if (ch == RIGHT_CURLY_BRACE) {
1715             if(gotLB) {
1716                 appendTo += RIGHT_CURLY_BRACE;
1717                 gotLB = FALSE;
1718             }
1719             else {
1720                 // orig code.
1721                 appendTo += SINGLE_QUOTE;
1722                 appendTo += RIGHT_CURLY_BRACE;
1723                 appendTo += SINGLE_QUOTE;
1724             }
1725         }
1726         else if (ch == SINGLE_QUOTE) {
1727             appendTo += SINGLE_QUOTE;
1728             appendTo += SINGLE_QUOTE;
1729         }
1730         else {
1731             appendTo += ch;
1732         }
1733     }
1734 }
1735 
1736 /**
1737  * Convenience method that ought to be in NumberFormat
1738  */
1739 NumberFormat*
createIntegerFormat(const Locale & locale,UErrorCode & status) const1740 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1741     NumberFormat *temp = NumberFormat::createInstance(locale, status);
1742     DecimalFormat *temp2;
1743     if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
1744         temp2->setMaximumFractionDigits(0);
1745         temp2->setDecimalSeparatorAlwaysShown(FALSE);
1746         temp2->setParseIntegerOnly(TRUE);
1747     }
1748 
1749     return temp;
1750 }
1751 
1752 /**
1753  * Return the default number format.  Used to format a numeric
1754  * argument when subformats[i].format is NULL.  Returns NULL
1755  * on failure.
1756  *
1757  * Semantically const but may modify *this.
1758  */
getDefaultNumberFormat(UErrorCode & ec) const1759 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1760     if (defaultNumberFormat == NULL) {
1761         MessageFormat* t = (MessageFormat*) this;
1762         t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1763         if (U_FAILURE(ec)) {
1764             delete t->defaultNumberFormat;
1765             t->defaultNumberFormat = NULL;
1766         } else if (t->defaultNumberFormat == NULL) {
1767             ec = U_MEMORY_ALLOCATION_ERROR;
1768         }
1769     }
1770     return defaultNumberFormat;
1771 }
1772 
1773 /**
1774  * Return the default date format.  Used to format a date
1775  * argument when subformats[i].format is NULL.  Returns NULL
1776  * on failure.
1777  *
1778  * Semantically const but may modify *this.
1779  */
getDefaultDateFormat(UErrorCode & ec) const1780 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1781     if (defaultDateFormat == NULL) {
1782         MessageFormat* t = (MessageFormat*) this;
1783         t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1784         if (t->defaultDateFormat == NULL) {
1785             ec = U_MEMORY_ALLOCATION_ERROR;
1786         }
1787     }
1788     return defaultDateFormat;
1789 }
1790 
1791 UBool
usesNamedArguments() const1792 MessageFormat::usesNamedArguments() const {
1793     return !isArgNumeric;
1794 }
1795 
1796 UBool
isLegalArgName(const UnicodeString & argName) const1797 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
1798     if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
1799         return FALSE;
1800     }
1801     for (int32_t i=1; i<argName.length(); ++i) {
1802         if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
1803             return FALSE;
1804         }
1805     }
1806     return TRUE;
1807 }
1808 
1809 int32_t
getArgTypeCount() const1810 MessageFormat::getArgTypeCount() const {
1811         return argTypeCount;
1812 }
1813 
FormatNameEnumeration(UVector * fNameList,UErrorCode &)1814 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1815     pos=0;
1816     fFormatNames = fNameList;
1817 }
1818 
1819 const UnicodeString*
snext(UErrorCode & status)1820 FormatNameEnumeration::snext(UErrorCode& status) {
1821     if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1822         return (const UnicodeString*)fFormatNames->elementAt(pos++);
1823     }
1824     return NULL;
1825 }
1826 
1827 void
reset(UErrorCode &)1828 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1829     pos=0;
1830 }
1831 
1832 int32_t
count(UErrorCode &) const1833 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1834        return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1835 }
1836 
~FormatNameEnumeration()1837 FormatNameEnumeration::~FormatNameEnumeration() {
1838     UnicodeString *s;
1839     for (int32_t i=0; i<fFormatNames->size(); ++i) {
1840         if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
1841             delete s;
1842         }
1843     }
1844     delete fFormatNames;
1845 }
1846 U_NAMESPACE_END
1847 
1848 #endif /* #if !UCONFIG_NO_FORMATTING */
1849 
1850 //eof
1851