1 /*
2 ********************************************************************************
3 * Copyright (C) 1997-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ********************************************************************************
6 *
7 * File CHOICFMT.H
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation and got rid
14 * of nextDouble/previousDouble and replaced with
15 * boolean array.
16 * 4/10/97 aliu Clean up. Modified to work on AIX.
17 * 8/6/97 nos Removed overloaded constructor, member var 'buffer'.
18 * 07/22/98 stephen Removed operator!= (implemented in Format)
19 ********************************************************************************
20 */
21
22 #ifndef CHOICFMT_H
23 #define CHOICFMT_H
24
25 #include "unicode/utypes.h"
26
27 /**
28 * \file
29 * \brief C++ API: Choice Format.
30 */
31
32 #if !UCONFIG_NO_FORMATTING
33
34 #include "unicode/fieldpos.h"
35 #include "unicode/format.h"
36 #include "unicode/messagepattern.h"
37 #include "unicode/numfmt.h"
38 #include "unicode/unistr.h"
39
40 U_NAMESPACE_BEGIN
41
42 class MessageFormat;
43
44 /**
45 * ChoiceFormat converts between ranges of numeric values and strings for those ranges.
46 * The strings must conform to the MessageFormat pattern syntax.
47 *
48 * <p><em><code>ChoiceFormat</code> is probably not what you need.
49 * Please use <code>MessageFormat</code>
50 * with <code>plural</code> arguments for proper plural selection,
51 * and <code>select</code> arguments for simple selection among a fixed set of choices!</em></p>
52 *
53 * <p>A <code>ChoiceFormat</code> splits
54 * the real number line \htmlonly<code>-∞</code> to
55 * <code>+∞</code>\endhtmlonly into two
56 * or more contiguous ranges. Each range is mapped to a
57 * string.</p>
58 *
59 * <p><code>ChoiceFormat</code> was originally intended
60 * for displaying grammatically correct
61 * plurals such as "There is one file." vs. "There are 2 files."
62 * <em>However,</em> plural rules for many languages
63 * are too complex for the capabilities of ChoiceFormat,
64 * and its requirement of specifying the precise rules for each message
65 * is unmanageable for translators.</p>
66 *
67 * <p>There are two methods of defining a <code>ChoiceFormat</code>; both
68 * are equivalent. The first is by using a string pattern. This is the
69 * preferred method in most cases. The second method is through direct
70 * specification of the arrays that logically make up the
71 * <code>ChoiceFormat</code>.</p>
72 *
73 * <p>Note: Typically, choice formatting is done (if done at all) via <code>MessageFormat</code>
74 * with a <code>choice</code> argument type,
75 * rather than using a stand-alone <code>ChoiceFormat</code>.</p>
76 *
77 * <h5>Patterns and Their Interpretation</h5>
78 *
79 * <p>The pattern string defines the range boundaries and the strings for each number range.
80 * Syntax:
81 * <pre>
82 * choiceStyle = number separator message ('|' number separator message)*
83 * number = normal_number | ['-'] \htmlonly∞\endhtmlonly (U+221E, infinity)
84 * normal_number = double value (unlocalized ASCII string)
85 * separator = less_than | less_than_or_equal
86 * less_than = '<'
87 * less_than_or_equal = '#' | \htmlonly≤\endhtmlonly (U+2264)
88 * message: see {@link MessageFormat}
89 * </pre>
90 * Pattern_White_Space between syntax elements is ignored, except
91 * around each range's sub-message.</p>
92 *
93 * <p>Each numeric sub-range extends from the current range's number
94 * to the next range's number.
95 * The number itself is included in its range if a <code>less_than_or_equal</code> sign is used,
96 * and excluded from its range (and instead included in the previous range)
97 * if a <code>less_than</code> sign is used.</p>
98 *
99 * <p>When a <code>ChoiceFormat</code> is constructed from
100 * arrays of numbers, closure flags and strings,
101 * they are interpreted just like
102 * the sequence of <code>(number separator string)</code> in an equivalent pattern string.
103 * <code>closure[i]==TRUE</code> corresponds to a <code>less_than</code> separator sign.
104 * The equivalent pattern string will be constructed automatically.</p>
105 *
106 * <p>During formatting, a number is mapped to the first range
107 * where the number is not greater than the range's upper limit.
108 * That range's message string is returned. A NaN maps to the very first range.</p>
109 *
110 * <p>During parsing, a range is selected for the longest match of
111 * any range's message. That range's number is returned, ignoring the separator/closure.
112 * Only a simple string match is performed, without parsing of arguments that
113 * might be specified in the message strings.</p>
114 *
115 * <p>Note that the first range's number is ignored in formatting
116 * but may be returned from parsing.</p>
117 *
118 * <h5>Examples</h5>
119 *
120 * <p>Here is an example of two arrays that map the number
121 * <code>1..7</code> to the English day of the week abbreviations
122 * <code>Sun..Sat</code>. No closures array is given; this is the same as
123 * specifying all closures to be <code>FALSE</code>.</p>
124 *
125 * <pre> {1,2,3,4,5,6,7},
126 * {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"}</pre>
127 *
128 * <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
129 * +Inf] to three strings. That is, the number line is split into three
130 * ranges: x < 1.0, x = 1.0, and x > 1.0.
131 * (The round parentheses in the notation above indicate an exclusive boundary,
132 * like the turned bracket in European notation: [-Inf, 1) == [-Inf, 1[ )</p>
133 *
134 * <pre> {0, 1, 1},
135 * {FALSE, FALSE, TRUE},
136 * {"no files", "one file", "many files"}</pre>
137 *
138 * <p>Here is an example that shows formatting and parsing: </p>
139 *
140 * \code
141 * #include <unicode/choicfmt.h>
142 * #include <unicode/unistr.h>
143 * #include <iostream.h>
144 *
145 * int main(int argc, char *argv[]) {
146 * double limits[] = {1,2,3,4,5,6,7};
147 * UnicodeString monthNames[] = {
148 * "Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
149 * ChoiceFormat fmt(limits, monthNames, 7);
150 * UnicodeString str;
151 * char buf[256];
152 * for (double x = 1.0; x <= 8.0; x += 1.0) {
153 * fmt.format(x, str);
154 * str.extract(0, str.length(), buf, 256, "");
155 * str.truncate(0);
156 * cout << x << " -> "
157 * << buf << endl;
158 * }
159 * cout << endl;
160 * return 0;
161 * }
162 * \endcode
163 *
164 * <p><em>User subclasses are not supported.</em> While clients may write
165 * subclasses, such code will not necessarily work and will not be
166 * guaranteed to work stably from release to release.
167 */
168 class U_I18N_API ChoiceFormat: public NumberFormat {
169 public:
170 /**
171 * Constructs a new ChoiceFormat from the pattern string.
172 *
173 * @param pattern Pattern used to construct object.
174 * @param status Output param to receive success code. If the
175 * pattern cannot be parsed, set to failure code.
176 * @stable ICU 2.0
177 */
178 ChoiceFormat(const UnicodeString& pattern,
179 UErrorCode& status);
180
181
182 /**
183 * Constructs a new ChoiceFormat with the given limits and message strings.
184 * All closure flags default to <code>FALSE</code>,
185 * equivalent to <code>less_than_or_equal</code> separators.
186 *
187 * Copies the limits and formats instead of adopting them.
188 *
189 * @param limits Array of limit values.
190 * @param formats Array of formats.
191 * @param count Size of 'limits' and 'formats' arrays.
192 * @stable ICU 2.0
193 */
194 ChoiceFormat(const double* limits,
195 const UnicodeString* formats,
196 int32_t count );
197
198 /**
199 * Constructs a new ChoiceFormat with the given limits, closure flags and message strings.
200 *
201 * Copies the limits and formats instead of adopting them.
202 *
203 * @param limits Array of limit values
204 * @param closures Array of booleans specifying whether each
205 * element of 'limits' is open or closed. If FALSE, then the
206 * corresponding limit number is a member of its range.
207 * If TRUE, then the limit number belongs to the previous range it.
208 * @param formats Array of formats
209 * @param count Size of 'limits', 'closures', and 'formats' arrays
210 * @stable ICU 2.4
211 */
212 ChoiceFormat(const double* limits,
213 const UBool* closures,
214 const UnicodeString* formats,
215 int32_t count);
216
217 /**
218 * Copy constructor.
219 *
220 * @param that ChoiceFormat object to be copied from
221 * @stable ICU 2.0
222 */
223 ChoiceFormat(const ChoiceFormat& that);
224
225 /**
226 * Assignment operator.
227 *
228 * @param that ChoiceFormat object to be copied
229 * @stable ICU 2.0
230 */
231 const ChoiceFormat& operator=(const ChoiceFormat& that);
232
233 /**
234 * Destructor.
235 * @stable ICU 2.0
236 */
237 virtual ~ChoiceFormat();
238
239 /**
240 * Clones this Format object. The caller owns the
241 * result and must delete it when done.
242 *
243 * @return a copy of this object
244 * @stable ICU 2.0
245 */
246 virtual Format* clone(void) const;
247
248 /**
249 * Returns true if the given Format objects are semantically equal.
250 * Objects of different subclasses are considered unequal.
251 *
252 * @param other ChoiceFormat object to be compared
253 * @return true if other is the same as this.
254 * @stable ICU 2.0
255 */
256 virtual UBool operator==(const Format& other) const;
257
258 /**
259 * Sets the pattern.
260 * @param pattern The pattern to be applied.
261 * @param status Output param set to success/failure code on
262 * exit. If the pattern is invalid, this will be
263 * set to a failure result.
264 * @stable ICU 2.0
265 */
266 virtual void applyPattern(const UnicodeString& pattern,
267 UErrorCode& status);
268
269 /**
270 * Sets the pattern.
271 * @param pattern The pattern to be applied.
272 * @param parseError Struct to receive information on position
273 * of error if an error is encountered
274 * @param status Output param set to success/failure code on
275 * exit. If the pattern is invalid, this will be
276 * set to a failure result.
277 * @stable ICU 2.0
278 */
279 virtual void applyPattern(const UnicodeString& pattern,
280 UParseError& parseError,
281 UErrorCode& status);
282 /**
283 * Gets the pattern.
284 *
285 * @param pattern Output param which will receive the pattern
286 * Previous contents are deleted.
287 * @return A reference to 'pattern'
288 * @stable ICU 2.0
289 */
290 virtual UnicodeString& toPattern(UnicodeString &pattern) const;
291
292 /**
293 * Sets the choices to be used in formatting.
294 * For details see the constructor with the same parameter list.
295 *
296 * @param limitsToCopy Contains the top value that you want
297 * parsed with that format,and should be in
298 * ascending sorted order. When formatting X,
299 * the choice will be the i, where limit[i]
300 * <= X < limit[i+1].
301 * @param formatsToCopy The format strings you want to use for each limit.
302 * @param count The size of the above arrays.
303 * @stable ICU 2.0
304 */
305 virtual void setChoices(const double* limitsToCopy,
306 const UnicodeString* formatsToCopy,
307 int32_t count );
308
309 /**
310 * Sets the choices to be used in formatting.
311 * For details see the constructor with the same parameter list.
312 *
313 * @param limits Array of limits
314 * @param closures Array of limit booleans
315 * @param formats Array of format string
316 * @param count The size of the above arrays
317 * @stable ICU 2.4
318 */
319 virtual void setChoices(const double* limits,
320 const UBool* closures,
321 const UnicodeString* formats,
322 int32_t count);
323
324 /**
325 * Returns NULL and 0.
326 * Before ICU 4.8, this used to return the choice limits array.
327 *
328 * @param count Will be set to 0.
329 * @return NULL
330 * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
331 */
332 virtual const double* getLimits(int32_t& count) const;
333
334 /**
335 * Returns NULL and 0.
336 * Before ICU 4.8, this used to return the limit booleans array.
337 *
338 * @param count Will be set to 0.
339 * @return NULL
340 * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
341 */
342 virtual const UBool* getClosures(int32_t& count) const;
343
344 /**
345 * Returns NULL and 0.
346 * Before ICU 4.8, this used to return the array of choice strings.
347 *
348 * @param count Will be set to 0.
349 * @return NULL
350 * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
351 */
352 virtual const UnicodeString* getFormats(int32_t& count) const;
353
354
355 using NumberFormat::format;
356
357 /**
358 * Formats a double number using this object's choices.
359 *
360 * @param number The value to be formatted.
361 * @param appendTo Output parameter to receive result.
362 * Result is appended to existing contents.
363 * @param pos On input: an alignment field, if desired.
364 * On output: the offsets of the alignment field.
365 * @return Reference to 'appendTo' parameter.
366 * @stable ICU 2.0
367 */
368 virtual UnicodeString& format(double number,
369 UnicodeString& appendTo,
370 FieldPosition& pos) const;
371 /**
372 * Formats an int32_t number using this object's choices.
373 *
374 * @param number The value to be formatted.
375 * @param appendTo Output parameter to receive result.
376 * Result is appended to existing contents.
377 * @param pos On input: an alignment field, if desired.
378 * On output: the offsets of the alignment field.
379 * @return Reference to 'appendTo' parameter.
380 * @stable ICU 2.0
381 */
382 virtual UnicodeString& format(int32_t number,
383 UnicodeString& appendTo,
384 FieldPosition& pos) const;
385
386 /**
387 * Formats an int64_t number using this object's choices.
388 *
389 * @param number The value to be formatted.
390 * @param appendTo Output parameter to receive result.
391 * Result is appended to existing contents.
392 * @param pos On input: an alignment field, if desired.
393 * On output: the offsets of the alignment field.
394 * @return Reference to 'appendTo' parameter.
395 * @stable ICU 2.8
396 */
397 virtual UnicodeString& format(int64_t number,
398 UnicodeString& appendTo,
399 FieldPosition& pos) const;
400
401 /**
402 * Formats an array of objects using this object's choices.
403 *
404 * @param objs The array of objects to be formatted.
405 * @param cnt The size of objs.
406 * @param appendTo Output parameter to receive result.
407 * Result is appended to existing contents.
408 * @param pos On input: an alignment field, if desired.
409 * On output: the offsets of the alignment field.
410 * @param success Output param set to success/failure code on
411 * exit.
412 * @return Reference to 'appendTo' parameter.
413 * @stable ICU 2.0
414 */
415 virtual UnicodeString& format(const Formattable* objs,
416 int32_t cnt,
417 UnicodeString& appendTo,
418 FieldPosition& pos,
419 UErrorCode& success) const;
420 /**
421 * Formats an object using this object's choices.
422 *
423 *
424 * @param obj The object to be formatted.
425 * @param appendTo Output parameter to receive result.
426 * Result is appended to existing contents.
427 * @param pos On input: an alignment field, if desired.
428 * On output: the offsets of the alignment field.
429 * @param status Output param set to success/failure code on
430 * exit.
431 * @return Reference to 'appendTo' parameter.
432 * @stable ICU 2.0
433 */
434 virtual UnicodeString& format(const Formattable& obj,
435 UnicodeString& appendTo,
436 FieldPosition& pos,
437 UErrorCode& status) const;
438
439 /**
440 * Redeclared NumberFormat method.
441 *
442 * @param obj The object to be formatted.
443 * @param appendTo Output parameter to receive result.
444 * Result is appended to existing contents.
445 * @param status Output param set to success/failure code on
446 * exit.
447 * @return Reference to 'appendTo' parameter.
448 * @stable ICU 2.0
449 */
450 UnicodeString& format(const Formattable& obj,
451 UnicodeString& appendTo,
452 UErrorCode& status) const;
453
454 /**
455 * Redeclared NumberFormat method.
456 * Formats a double number. These methods call the NumberFormat
457 * pure virtual format() methods with the default FieldPosition.
458 *
459 * @param number The value to be formatted.
460 * @param appendTo Output parameter to receive result.
461 * Result is appended to existing contents.
462 * @return Reference to 'appendTo' parameter.
463 * @stable ICU 2.0
464 */
465 UnicodeString& format( double number,
466 UnicodeString& appendTo) const;
467
468 /**
469 * Redeclared NumberFormat method.
470 * Formats an int32_t number. These methods call the NumberFormat
471 * pure virtual format() methods with the default FieldPosition.
472 *
473 * @param number The value to be formatted.
474 * @param appendTo Output parameter to receive result.
475 * Result is appended to existing contents.
476 * @return Reference to 'appendTo' parameter.
477 * @stable ICU 2.0
478 */
479 UnicodeString& format( int32_t number,
480 UnicodeString& appendTo) const;
481
482 /**
483 * Looks for the longest match of any message string on the input text and,
484 * if there is a match, sets the result object to the corresponding range's number.
485 *
486 * If no string matches, then the parsePosition is unchanged.
487 *
488 * @param text The text to be parsed.
489 * @param result Formattable to be set to the parse result.
490 * If parse fails, return contents are undefined.
491 * @param parsePosition The position to start parsing at on input.
492 * On output, moved to after the last successfully
493 * parse character. On parse failure, does not change.
494 * @stable ICU 2.0
495 */
496 virtual void parse(const UnicodeString& text,
497 Formattable& result,
498 ParsePosition& parsePosition) const;
499
500 /**
501 * Looks for the longest match of any message string on the input text and,
502 * if there is a match, sets the result object to the corresponding range's number.
503 *
504 * If no string matches, then the UErrorCode is set to U_INVALID_FORMAT_ERROR.
505 *
506 * @param text The text to be parsed.
507 * @param result Formattable to be set to the parse result.
508 * If parse fails, return contents are undefined.
509 * @param status Output param with the formatted string.
510 * @stable ICU 2.0
511 */
512 virtual void parse(const UnicodeString& text,
513 Formattable& result,
514 UErrorCode& status) const;
515
516 /**
517 * Returns a unique class ID POLYMORPHICALLY. Part of ICU's "poor man's RTTI".
518 *
519 * @return The class ID for this object. All objects of a
520 * given class have the same class ID. Objects of
521 * other classes have different class IDs.
522 * @stable ICU 2.0
523 */
524 virtual UClassID getDynamicClassID(void) const;
525
526 /**
527 * Returns the class ID for this class. This is useful only for
528 * comparing to a return value from getDynamicClassID(). For example:
529 * <pre>
530 * . Base* polymorphic_pointer = createPolymorphicObject();
531 * . if (polymorphic_pointer->getDynamicClassID() ==
532 * . Derived::getStaticClassID()) ...
533 * </pre>
534 * @return The class ID for all objects of this class.
535 * @stable ICU 2.0
536 */
537 static UClassID U_EXPORT2 getStaticClassID(void);
538
539 private:
540 /**
541 * Converts a double value to a string.
542 * @param value the double number to be converted.
543 * @param string the result string.
544 * @return the converted string.
545 */
546 static UnicodeString& dtos(double value, UnicodeString& string);
547
548 ChoiceFormat(); // default constructor not implemented
549
550 /**
551 * Construct a new ChoiceFormat with the limits and the corresponding formats
552 * based on the pattern.
553 *
554 * @param newPattern Pattern used to construct object.
555 * @param parseError Struct to receive information on position
556 * of error if an error is encountered.
557 * @param status Output param to receive success code. If the
558 * pattern cannot be parsed, set to failure code.
559 * @stable ICU 2.0
560 */
561 ChoiceFormat(const UnicodeString& newPattern,
562 UParseError& parseError,
563 UErrorCode& status);
564
565 friend class MessageFormat;
566
567 virtual void setChoices(const double* limits,
568 const UBool* closures,
569 const UnicodeString* formats,
570 int32_t count,
571 UErrorCode &errorCode);
572
573 /**
574 * Finds the ChoiceFormat sub-message for the given number.
575 * @param pattern A MessagePattern.
576 * @param partIndex the index of the first ChoiceFormat argument style part.
577 * @param number a number to be mapped to one of the ChoiceFormat argument's intervals
578 * @return the sub-message start part index.
579 */
580 static int32_t findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number);
581
582 static double parseArgument(
583 const MessagePattern &pattern, int32_t partIndex,
584 const UnicodeString &source, ParsePosition &pos);
585
586 /**
587 * Matches the pattern string from the end of the partIndex to
588 * the beginning of the limitPartIndex,
589 * including all syntax except SKIP_SYNTAX,
590 * against the source string starting at sourceOffset.
591 * If they match, returns the length of the source string match.
592 * Otherwise returns -1.
593 */
594 static int32_t matchStringUntilLimitPart(
595 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
596 const UnicodeString &source, int32_t sourceOffset);
597
598 /**
599 * Some of the ChoiceFormat constructors do not have a UErrorCode paramater.
600 * We need _some_ way to provide one for the MessagePattern constructor.
601 * Alternatively, the MessagePattern could be a pointer field, but that is
602 * not nice either.
603 */
604 UErrorCode constructorErrorCode;
605
606 /**
607 * The MessagePattern which contains the parsed structure of the pattern string.
608 *
609 * Starting with ICU 4.8, the MessagePattern contains a sequence of
610 * numeric/selector/message parts corresponding to the parsed pattern.
611 * For details see the MessagePattern class API docs.
612 */
613 MessagePattern msgPattern;
614
615 /**
616 * Docs & fields from before ICU 4.8, before MessagePattern was used.
617 * Commented out, and left only for explanation of semantics.
618 * --------
619 * Each ChoiceFormat divides the range -Inf..+Inf into fCount
620 * intervals. The intervals are:
621 *
622 * 0: fChoiceLimits[0]..fChoiceLimits[1]
623 * 1: fChoiceLimits[1]..fChoiceLimits[2]
624 * ...
625 * fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1]
626 * fCount-1: fChoiceLimits[fCount-1]..+Inf
627 *
628 * Interval 0 is special; during formatting (mapping numbers to
629 * strings), it also contains all numbers less than
630 * fChoiceLimits[0], as well as NaN values.
631 *
632 * Interval i maps to and from string fChoiceFormats[i]. When
633 * parsing (mapping strings to numbers), then intervals map to
634 * their lower limit, that is, interval i maps to fChoiceLimit[i].
635 *
636 * The intervals may be closed, half open, or open. This affects
637 * formatting but does not affect parsing. Interval i is affected
638 * by fClosures[i] and fClosures[i+1]. If fClosures[i]
639 * is FALSE, then the value fChoiceLimits[i] is in interval i.
640 * That is, intervals i and i are:
641 *
642 * i-1: ... x < fChoiceLimits[i]
643 * i: fChoiceLimits[i] <= x ...
644 *
645 * If fClosures[i] is TRUE, then the value fChoiceLimits[i] is
646 * in interval i-1. That is, intervals i-1 and i are:
647 *
648 * i-1: ... x <= fChoiceLimits[i]
649 * i: fChoiceLimits[i] < x ...
650 *
651 * Because of the nature of interval 0, fClosures[0] has no
652 * effect.
653 */
654 // double* fChoiceLimits;
655 // UBool* fClosures;
656 // UnicodeString* fChoiceFormats;
657 // int32_t fCount;
658 };
659
660 inline UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,UErrorCode & status)661 ChoiceFormat::format(const Formattable& obj,
662 UnicodeString& appendTo,
663 UErrorCode& status) const {
664 // Don't use Format:: - use immediate base class only,
665 // in case immediate base modifies behavior later.
666 return NumberFormat::format(obj, appendTo, status);
667 }
668
669 inline UnicodeString&
format(double number,UnicodeString & appendTo)670 ChoiceFormat::format(double number,
671 UnicodeString& appendTo) const {
672 return NumberFormat::format(number, appendTo);
673 }
674
675 inline UnicodeString&
format(int32_t number,UnicodeString & appendTo)676 ChoiceFormat::format(int32_t number,
677 UnicodeString& appendTo) const {
678 return NumberFormat::format(number, appendTo);
679 }
680 U_NAMESPACE_END
681
682 #endif /* #if !UCONFIG_NO_FORMATTING */
683
684 #endif // _CHOICFMT
685 //eof
686