• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  * Copyright (C) 2010 , Yahoo! Inc.
6  ********************************************************************
7  *
8  * File SELFMT.H
9  *
10  * Modification History:
11  *
12  *   Date        Name        Description
13  *   11/11/09    kirtig      Finished first cut of implementation.
14  ********************************************************************/
15 
16 #ifndef SELFMT
17 #define SELFMT
18 
19 #include "unicode/utypes.h"
20 #include "unicode/numfmt.h"
21 
22 /**
23  * \file
24  * \brief C++ API: SelectFormat object
25  */
26 
27 #if !UCONFIG_NO_FORMATTING
28 
29 U_NAMESPACE_BEGIN
30 
31 class Hashtable;
32 
33 /**
34   * <p><code>SelectFormat</code> supports the creation of  internationalized
35   * messages by selecting phrases based on keywords. The pattern  specifies
36   * how to map keywords to phrases and provides a default phrase. The
37   * object provided to the format method is a string that's matched
38   * against the keywords. If there is a match, the corresponding phrase
39   * is selected; otherwise, the default phrase is used.</p>
40   *
41   * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
42   *
43   * <p>The main use case for the select format is gender based  inflection.
44   * When names or nouns are inserted into sentences, their gender can  affect pronouns,
45   * verb forms, articles, and adjectives. Special care needs to be
46   * taken for the case where the gender cannot be determined.
47   * The impact varies between languages:</p>
48   * \htmlonly
49   * <ul>
50   * <li>English has three genders, and unknown gender is handled as a  special
51   * case. Names use the gender of the named person (if known), nouns  referring
52   * to people use natural gender, and inanimate objects are usually  neutral.
53   * The gender only affects pronouns: "he", "she", "it", "they".
54   *
55   * <li>German differs from English in that the gender of nouns is  rather
56   * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
57   * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
58   * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
59   *
60   * <li>French has only two genders; as in German the gender of nouns
61   * is rather arbitrary - for sun and moon, the genders
62   * are the opposite of those in German. The gender affects
63   * pronouns ("il", "elle"), articles ("le", "la"),
64   * adjective forms ("bon", "bonne"), and sometimes
65   * verb forms ("all&#x00E9;", "all&#x00E9;e").
66   *
67   * <li>Polish distinguishes five genders (or noun classes),
68   * human masculine, animate non-human masculine, inanimate masculine,
69   * feminine, and neuter.
70   * </ul>
71   * \endhtmlonly
72   * <p>Some other languages have noun classes that are not related to  gender,
73   * but similar in grammatical use.
74   * Some African languages have around 20 noun classes.</p>
75   *
76   * <p>To enable localizers to create sentence patterns that take their
77   * language's gender dependencies into consideration, software has to  provide
78   * information about the gender associated with a noun or name to
79   * <code>MessageFormat</code>.
80   * Two main cases can be distinguished:</p>
81   *
82   * <ul>
83   * <li>For people, natural gender information should be maintained  for each person.
84   * The keywords "male", "female", "mixed" (for groups of people)
85   * and "unknown" are used.
86   *
87   * <li>For nouns, grammatical gender information should be maintained  for
88   * each noun and per language, e.g., in resource bundles.
89   * The keywords "masculine", "feminine", and "neuter" are commonly  used,
90   * but some languages may require other keywords.
91   * </ul>
92   *
93   * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
94   * parameter separate from the name or noun it's associated with. For  example,
95   * to generate a message such as "Jean went to Paris", three separate  arguments
96   * would be provided: The name of the person as argument 0, the  gender of
97   * the person as argument 1, and the name of the city as argument 2.
98   * The sentence pattern for English, where the gender of the person has
99   * no impact on this simple sentence, would not refer to argument 1  at all:</p>
100   *
101   * <pre>{0} went to {2}.</pre>
102   *
103   * <p>The sentence pattern for French, where the gender of the person affects
104   * the form of the participle, uses a select format based on argument 1:</p>
105   *
106   * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
107   *
108   * <p>Patterns can be nested, so that it's possible to handle  interactions of
109   * number and gender where necessary. For example, if the above  sentence should
110   * allow for the names of several people to be inserted, the  following sentence
111   * pattern can be used (with argument 0 the list of people's names,
112   * argument 1 the number of people, argument 2 their combined gender, and
113   * argument 3 the city name):</p>
114   *
115   * \htmlonly
116   * <pre>{0} {1, plural,
117   *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
118   *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
119   *          }&#x00E0; {3}.</pre>
120   * \endhtmlonly
121   *
122   * <h4>Patterns and Their Interpretation</h4>
123   *
124   * <p>The <code>SelectFormat</code> pattern text defines the phrase  output
125   * for each user-defined keyword.
126   * The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
127   * clauses.
128   * Each clause assigns the phrase <code><i>phrase</i></code>
129   * to the user-defined <code><i>keyword</i></code>.</p>
130   *
131   * <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
132   * that don't match this pattern result in the error code
133   * <code>U_ILLEGAL_CHARACTER</code>.
134   * You always have to define a phrase for the default keyword
135   * <code>other</code>; this phrase is returned when the keyword
136   * provided to
137   * the <code>format</code> method matches no other keyword.
138   * If a pattern does not provide a phrase for <code>other</code>, the  method
139   * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
140   * If a pattern provides more than one phrase for the same keyword, the
141   * error <code>U_DUPLICATE_KEYWORD</code> is returned.
142   * <br>
143   * Spaces between <code><i>keyword</i></code> and
144   * <code>{<i>phrase</i>}</code>  will be ignored; spaces within
145   * <code>{<i>phrase</i>}</code> will be preserved.<p>
146   *
147   * <p>The phrase for a particular select case may contain other message
148   * format patterns. <code>SelectFormat</code> preserves these so that  you
149   * can use the strings produced by <code>SelectFormat</code> with other
150   * formatters. If you are using <code>SelectFormat</code> inside a
151   * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
152   * automatically evaluate the resulting format pattern.
153   * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
154   * in phrases to define a nested format pattern.</p>
155   *
156   * <p>Example:
157   * \htmlonly
158   *
159   * UErrorCode status = U_ZERO_ERROR;
160   * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
161   * if (U_FAILURE(status)) {
162   *       return;
163   * }
164   * FieldPosition ignore(FieldPosition::DONT_CARE);
165   * UnicodeString result;
166   *
167   * char* str1= "Kirti,female";
168   * Formattable args1[] = {"Kirti","female"};
169   * msgFmt->format(args1, 2, result, ignore, status);
170   * cout << "Input is " << str1 << " and result is: " << result << endl;
171   * delete msgFmt;
172   *
173   * \endhtmlonly
174   * </p>
175   *
176   * Produces the output:<br>
177   * \htmlonly
178   * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
179   * \endhtmlonly
180   *
181   * @stable ICU 4.4
182   */
183 
184 class U_I18N_API SelectFormat : public Format {
185 public:
186 
187     /**
188      * Creates a new <code>SelectFormat</code> for a given pattern string.
189      * @param  pattern the pattern for this <code>SelectFormat</code>.
190      *                 errors are returned to status if the pattern is invalid.
191      * @param status   output param set to success/failure code on exit, which
192      *                 must not indicate a failure before the function call.
193      * @stable ICU 4.4
194      */
195     SelectFormat(const UnicodeString& pattern, UErrorCode& status);
196 
197     /**
198      * copy constructor.
199      * @stable ICU 4.4
200      */
201     SelectFormat(const SelectFormat& other);
202 
203     /**
204      * Destructor.
205      * @stable ICU 4.4
206      */
207     virtual ~SelectFormat();
208 
209     /**
210      * Sets the pattern used by this select format.
211      * for the keyword rules.
212      * Patterns and their interpretation are specified in the class description.
213      *
214      * @param pattern the pattern for this select format
215      *                errors are returned to status if the pattern is invalid.
216      * @param status  output param set to success/failure code on exit, which
217      *                must not indicate a failure before the function call.
218      * @stable ICU 4.4
219      */
220     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
221 
222 
223     using Format::format;
224 
225     /**
226      * Selects the phrase for  the given keyword
227      *
228      * @param keyword  The keyword that is used to select an alternative.
229      * @param appendTo output parameter to receive result.
230      *                 result is appended to existing contents.
231      * @param pos      On input: an alignment field, if desired.
232      *                 On output: the offsets of the alignment field.
233      * @param status  output param set to success/failure code on exit, which
234      *                 must not indicate a failure before the function call.
235      * @return         Reference to 'appendTo' parameter.
236      * @stable ICU 4.4
237      */
238     UnicodeString& format(const UnicodeString& keyword,
239                             UnicodeString& appendTo,
240                             FieldPosition& pos,
241                             UErrorCode& status) const;
242 
243     /**
244      * Assignment operator
245      *
246      * @param other    the SelectFormat object to copy from.
247      * @stable ICU 4.4
248      */
249     SelectFormat& operator=(const SelectFormat& other);
250 
251     /**
252      * Return true if another object is semantically equal to this one.
253      *
254      * @param other    the SelectFormat object to be compared with.
255      * @return         true if other is semantically equal to this.
256      * @stable ICU 4.4
257      */
258     virtual UBool operator==(const Format& other) const;
259 
260     /**
261      * Return true if another object is semantically unequal to this one.
262      *
263      * @param other    the SelectFormat object to be compared with.
264      * @return         true if other is semantically unequal to this.
265      * @stable ICU 4.4
266      */
267     virtual UBool operator!=(const Format& other) const;
268 
269     /**
270      * Clones this Format object polymorphically.  The caller owns the
271      * result and should delete it when done.
272      * @stable ICU 4.4
273      */
274     virtual Format* clone(void) const;
275 
276     /**
277      * Format an object to produce a string.
278      * This method handles keyword strings.
279      * If the Formattable object is not a <code>UnicodeString</code>,
280      * then it returns a failing UErrorCode.
281      *
282      * @param obj       A keyword string that is used to select an alternative.
283      * @param appendTo  output parameter to receive result.
284      *                  Result is appended to existing contents.
285      * @param pos       On input: an alignment field, if desired.
286      *                  On output: the offsets of the alignment field.
287      * @param status    output param filled with success/failure status.
288      * @return          Reference to 'appendTo' parameter.
289      * @stable ICU 4.4
290      */
291     UnicodeString& format(const Formattable& obj,
292                          UnicodeString& appendTo,
293                          FieldPosition& pos,
294                          UErrorCode& status) const;
295 
296     /**
297      * Returns the pattern from applyPattern() or constructor.
298      *
299      * @param  appendTo  output parameter to receive result.
300      *                  Result is appended to existing contents.
301      * @return the UnicodeString with inserted pattern.
302      * @stable ICU 4.4
303      */
304     UnicodeString& toPattern(UnicodeString& appendTo);
305 
306     /**
307      * This method is not yet supported by <code>SelectFormat</code>.
308      * <P>
309      * Before calling, set parse_pos.index to the offset you want to start
310      * parsing at in the source. After calling, parse_pos.index is the end of
311      * the text you parsed. If error occurs, index is unchanged.
312      * <P>
313      * When parsing, leading whitespace is discarded (with a successful parse),
314      * while trailing whitespace is left as is.
315      * <P>
316      * See Format::parseObject() for more.
317      *
318      * @param source     The string to be parsed into an object.
319      * @param result     Formattable to be set to the parse result.
320      *     If parse fails, return contents are undefined.
321      * @param parse_pos The position to start parsing at. Upon return
322      *     this param is set to the position after the
323      *     last character successfully parsed. If the
324      *     source is not parsed successfully, this param
325      *     will remain unchanged.
326      * @stable ICU 4.4
327      */
328     virtual void parseObject(const UnicodeString& source,
329                             Formattable& result,
330                             ParsePosition& parse_pos) const;
331 
332     /**
333      * ICU "poor man's RTTI", returns a UClassID for this class.
334      * @stable ICU 4.4
335      */
336     static UClassID U_EXPORT2 getStaticClassID(void);
337 
338     /**
339      * ICU "poor man's RTTI", returns a UClassID for the actual class.
340      * @stable ICU 4.4
341      */
342     virtual UClassID getDynamicClassID() const;
343 
344 private:
345     typedef enum classesForSelectFormat{
346         tStartKeyword,
347         tContinueKeyword,
348         tLeftBrace,
349         tRightBrace,
350         tSpace,
351         tOther
352     }CharacterClass;
353 
354     UnicodeString pattern;
355     //Hash to store the keyword, phrase pairs.
356     Hashtable  *parsedValuesHash;
357 
358     SelectFormat();   // default constructor not implemented.
359     void initHashTable(UErrorCode &status);
360     void cleanHashTable();
361 
362     //For the applyPattern , classifies char.s in one of the characterClass.
363     CharacterClass classifyCharacter(UChar ch) const;
364     //Checks if the "other" keyword is present in pattern.
365     UBool checkSufficientDefinition();
366     //Checks if the keyword passed is valid.
367     UBool checkValidKeyword(const UnicodeString& argKeyword) const;
368     void parsingFailure();
369     void copyHashtable(Hashtable *other, UErrorCode& status);
370 };
371 
372 U_NAMESPACE_END
373 
374 #endif /* #if !UCONFIG_NO_FORMATTING */
375 
376 #endif // _SELFMT
377 //eof
378