1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package androidx.core.i18n.messageformat_icu.simple; 9 10 import java.io.IOException; 11 import java.io.ObjectInputStream; 12 import java.text.NumberFormat; 13 import java.text.ParsePosition; 14 import java.util.Locale; 15 import java.util.Map; 16 17 import androidx.annotation.RestrictTo; 18 import androidx.core.i18n.messageformat_icu.simple.PluralRules.FixedDecimal; 19 import androidx.core.i18n.messageformat_icu.simple.PluralRules.PluralType; 20 import androidx.core.i18n.messageformat_icu.text.MessagePattern; 21 22 /** 23 * <p> 24 * <code>PluralFormat</code> supports the creation of internationalized 25 * messages with plural inflection. It is based on <i>plural 26 * selection</i>, i.e. the caller specifies messages for each 27 * plural case that can appear in the user's language and the 28 * <code>PluralFormat</code> selects the appropriate message based on 29 * the number. 30 * </p> 31 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 32 * <p> 33 * Different languages have different ways to inflect 34 * plurals. Creating internationalized messages that include plural 35 * forms is only feasible when the framework is able to handle plural 36 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 37 * doesn't handle this well, because it attaches a number interval to 38 * each message and selects the message whose interval contains a 39 * given number. This can only handle a finite number of 40 * intervals. But in some languages, like Polish, one plural case 41 * applies to infinitely many intervals (e.g., the paucal case applies to 42 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 43 * 14). Thus <code>ChoiceFormat</code> is not adequate. 44 * </p><p> 45 * <code>PluralFormat</code> deals with this by breaking the problem 46 * into two parts: 47 * <ul> 48 * <li>It uses <code>PluralRules</code> that can define more complex 49 * conditions for a plural case than just a single interval. These plural 50 * rules define both what plural cases exist in a language, and to 51 * which numbers these cases apply. 52 * <li>It provides predefined plural rules for many languages. Thus, the programmer 53 * need not worry about the plural cases of a language and 54 * does not have to define the plural cases; they can simply 55 * use the predefined keywords. The whole plural formatting of messages can 56 * be done using localized patterns from resource bundles. For predefined plural 57 * rules, see the CLDR <i>Language Plural Rules</i> page at 58 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html 59 * </ul> 60 * </p> 61 * <h4>Usage of <code>PluralFormat</code></h4> 62 * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code> 63 * with a <code>plural</code> argument type, 64 * rather than using a stand-alone <code>PluralFormat</code>. 65 * </p><p> 66 * This discussion assumes that you use <code>PluralFormat</code> with 67 * a predefined set of plural rules. You can create one using one of 68 * the constructors that takes a <code>ULocale</code> object. To 69 * specify the message pattern, you can either pass it to the 70 * constructor or set it explicitly using the 71 * <code>applyPattern()</code> method. The <code>format()</code> 72 * method takes a number object and selects the message of the 73 * matching plural case. This message will be returned. 74 * </p> 75 * <h5>Patterns and Their Interpretation</h5> 76 * <p> 77 * The pattern text defines the message output for each plural case of the 78 * specified locale. Syntax: 79 * <blockquote><pre> 80 * pluralStyle = [offsetValue] (selector '{' message '}')+ 81 * offsetValue = "offset:" number 82 * selector = explicitValue | keyword 83 * explicitValue = '=' number // adjacent, no white space in between 84 * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ 85 * message: see {@link MessageFormat} 86 * </pre></blockquote> 87 * Pattern_White_Space between syntax elements is ignored, except 88 * between the {curly braces} and their sub-message, 89 * and between the '=' and the number of an explicitValue. 90 * 91 * </p><p> 92 * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and 93 * 'other'. You always have to define a message text for the default plural case 94 * "<code>other</code>" which is contained in every rule set. 95 * If you do not specify a message text for a particular plural case, the 96 * message text of the plural case "<code>other</code>" gets assigned to this 97 * plural case. 98 * </p><p> 99 * When formatting, the input number is first matched against the explicitValue clauses. 100 * If there is no exact-number match, then a keyword is selected by calling 101 * the <code>PluralRules</code> with the input number <em>minus the offset</em>. 102 * (The offset defaults to 0 if it is omitted from the pattern string.) 103 * If there is no clause with that keyword, then the "other" clauses is returned. 104 * </p><p> 105 * An unquoted pound sign (<code>#</code>) in the selected sub-message 106 * itself (i.e., outside of arguments nested in the sub-message) 107 * is replaced by the input number minus the offset. 108 * The number-minus-offset value is formatted using a 109 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 110 * need special number formatting, you have to use a <code>MessageFormat</code> 111 * and explicitly specify a <code>NumberFormat</code> argument. 112 * <strong>Note:</strong> That argument is formatting without subtracting the offset! 113 * If you need a custom format and have a non-zero offset, then you need to pass the 114 * number-minus-offset value as a separate parameter. 115 * </p> 116 * For a usage example, see the {@link MessageFormat} class documentation. 117 * 118 * <h4>Defining Custom Plural Rules</h4> 119 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 120 * create a <code>PluralRules</code> object and pass it to 121 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 122 * constructor, this locale will be used to format the number in the message 123 * texts. 124 * </p><p> 125 * For more information about <code>PluralRules</code>, see 126 * {@link PluralRules}. 127 * </p> 128 * 129 * @author tschumann (Tim Schumann) 130 * icu_annot::stable ICU 3.8 131 */ 132 @RestrictTo(RestrictTo.Scope.LIBRARY) 133 public class PluralFormat /* extends UFormat */ { 134 private static final long serialVersionUID = 1L; 135 136 /** 137 * The locale used for standard number formatting and getting the predefined 138 * plural rules (if they were not defined explicitly). 139 * @serial 140 */ 141 private Locale locale_ = null; 142 143 /** 144 * The plural rules used for plural selection. 145 * @serial 146 */ 147 private PluralRules pluralRules = null; 148 149 /** 150 * The applied pattern string. 151 * @serial 152 */ 153 private String pattern = null; 154 155 /** 156 * The MessagePattern which contains the parsed structure of the pattern string. 157 */ 158 transient private MessagePattern msgPattern; 159 160 /** 161 * Obsolete with use of MessagePattern since ICU 4.8. Used to be: 162 * The format messages for each plural case. It is a mapping: 163 * <code>String</code>(plural case keyword) --> <code>String</code> 164 * (message for this plural case). 165 * @serial 166 */ 167 private Map<String, String> parsedValues = null; 168 169 /** 170 * This <code>NumberFormat</code> is used for the standard formatting of 171 * the number inserted into the message. 172 * @serial 173 */ 174 private NumberFormat numberFormat = null; 175 176 /** 177 * The offset to subtract before invoking plural rules. 178 */ 179 // transient private double offset = 0; 180 181 /** 182 * Creates a new cardinal-number <code>PluralFormat</code> for the default <code>FORMAT</code> locale. 183 * This locale will be used to get the set of plural rules and for standard 184 * number formatting. 185 * @see Category#FORMAT 186 * icu_annot::stable ICU 3.8 187 */ PluralFormat()188 public PluralFormat() { 189 init(null, PluralType.CARDINAL, Locale.getDefault()); // Category.FORMAT 190 } 191 192 /** 193 * Creates a new cardinal-number <code>PluralFormat</code> for a given locale. 194 * @param locale the <code>PluralFormat</code> will be configured with 195 * rules for this locale. This locale will also be used for standard 196 * number formatting. 197 * icu_annot::stable ICU 3.8 198 */ PluralFormat(Locale locale)199 public PluralFormat(Locale locale) { 200 init(null, PluralType.CARDINAL, locale); 201 } 202 203 /** 204 * Creates a new <code>PluralFormat</code> for the plural type. 205 * The standard number formatting will be done using the given locale. 206 * @param locale the default number formatting will be done using this 207 * locale. 208 * @param type The plural type (e.g., cardinal or ordinal). 209 * icu_annot::stable ICU 50 210 */ PluralFormat(Locale locale, PluralType type)211 public PluralFormat(Locale locale, PluralType type) { 212 init(null, type, locale); 213 } 214 215 /* 216 * Initializes the <code>PluralRules</code> object. 217 * Postcondition:<br/> 218 * <code>ulocale</code> : is <code>locale</code><br/> 219 * <code>pluralRules</code>: if <code>rules</code> != <code>null</code> 220 * it's set to rules, otherwise it is the 221 * predefined plural rule set for the locale 222 * <code>ulocale</code>.<br/> 223 * <code>parsedValues</code>: is <code>null</code><br/> 224 * <code>pattern</code>: is <code>null</code><br/> 225 * <code>numberFormat</code>: a <code>NumberFormat</code> for the locale 226 * <code>ulocale</code>. 227 */ init(PluralRules rules, PluralType type, Locale locale)228 private void init(PluralRules rules, PluralType type, Locale locale) { 229 locale_ = locale; 230 pluralRules = (rules == null) ? PluralRules.forLocale(locale, type) 231 : rules; 232 resetPattern(); 233 numberFormat = NumberFormat.getInstance(locale); 234 } 235 resetPattern()236 private void resetPattern() { 237 pattern = null; 238 if(msgPattern != null) { 239 msgPattern.clear(); 240 } 241 // offset = 0; 242 } 243 244 /** 245 * Sets the pattern used by this plural format. 246 * The method parses the pattern and creates a map of format strings 247 * for the plural rules. 248 * Patterns and their interpretation are specified in the class description. 249 * 250 * @param pattern the pattern for this plural format. 251 * @throws IllegalArgumentException if the pattern is invalid. 252 * icu_annot::stable ICU 3.8 253 */ applyPattern(String pattern)254 public void applyPattern(String pattern) { 255 this.pattern = pattern; 256 if (msgPattern == null) { 257 msgPattern = new MessagePattern(); 258 } 259 try { 260 msgPattern.parsePluralStyle(pattern); 261 // offset = msgPattern.getPluralOffset(0); 262 } catch(RuntimeException e) { 263 resetPattern(); 264 throw e; 265 } 266 } 267 268 /** 269 * Returns the pattern for this PluralFormat. 270 * 271 * @return the pattern string 272 * icu_annot::stable ICU 4.2 273 */ toPattern()274 public String toPattern() { 275 return pattern; 276 } 277 278 /** 279 * Finds the PluralFormat sub-message for the given number, or the "other" sub-message. 280 * @param pattern A MessagePattern. 281 * @param partIndex the index of the first PluralFormat argument style part. 282 * @param selector the PluralSelector for mapping the number (minus offset) to a keyword. 283 * @param context worker object for the selector. 284 * @param number a number to be matched to one of the PluralFormat argument's explicit values, 285 * or mapped via the PluralSelector. 286 * @return the sub-message start part index. 287 */ findSubMessage( MessagePattern pattern, int partIndex, PluralSelector selector, Object context, double number)288 /*package*/ static int findSubMessage( 289 MessagePattern pattern, int partIndex, 290 PluralSelector selector, Object context, double number) { 291 int count=pattern.countParts(); 292 double offset; 293 MessagePattern.Part part=pattern.getPart(partIndex); 294 if(part.getType().hasNumericValue()) { 295 offset=pattern.getNumericValue(part); 296 ++partIndex; 297 } else { 298 offset=0; 299 } 300 // The keyword is null until we need to match against a non-explicit, not-"other" value. 301 // Then we get the keyword from the selector. 302 // (In other words, we never call the selector if we match against an explicit value, 303 // or if the only non-explicit keyword is "other".) 304 String keyword=null; 305 // When we find a match, we set msgStart>0 and also set this boolean to true 306 // to avoid matching the keyword again (duplicates are allowed) 307 // while we continue to look for an explicit-value match. 308 boolean haveKeywordMatch=false; 309 // msgStart is 0 until we find any appropriate sub-message. 310 // We remember the first "other" sub-message if we have not seen any 311 // appropriate sub-message before. 312 // We remember the first matching-keyword sub-message if we have not seen 313 // one of those before. 314 // (The parser allows [does not check for] duplicate keywords. 315 // We just have to make sure to take the first one.) 316 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 317 // at the first keyword match. 318 // We keep going until we find an explicit-value match or reach the end of the plural style. 319 int msgStart=0; 320 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 321 // until ARG_LIMIT or end of plural-only pattern. 322 do { 323 part=pattern.getPart(partIndex++); 324 MessagePattern.Part.Type type=part.getType(); 325 if(type==MessagePattern.Part.Type.ARG_LIMIT) { 326 break; 327 } 328 assert type==MessagePattern.Part.Type.ARG_SELECTOR; 329 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 330 if(pattern.getPartType(partIndex).hasNumericValue()) { 331 // explicit value like "=2" 332 part=pattern.getPart(partIndex++); 333 if(number==pattern.getNumericValue(part)) { 334 // matches explicit value 335 return partIndex; 336 } 337 } else if(!haveKeywordMatch) { 338 // plural keyword like "few" or "other" 339 // Compare "other" first and call the selector if this is not "other". 340 if(pattern.partSubstringMatches(part, "other")) { 341 if(msgStart==0) { 342 msgStart=partIndex; 343 if(keyword!=null && keyword.equals("other")) { 344 // This is the first "other" sub-message, 345 // and the selected keyword is also "other". 346 // Do not match "other" again. 347 haveKeywordMatch=true; 348 } 349 } 350 } else { 351 if(keyword==null) { 352 keyword=selector.select(context, number-offset); 353 if(msgStart!=0 && keyword.equals("other")) { 354 // We have already seen an "other" sub-message. 355 // Do not match "other" again. 356 haveKeywordMatch=true; 357 // Skip keyword matching but do getLimitPartIndex(). 358 } 359 } 360 if(!haveKeywordMatch && pattern.partSubstringMatches(part, keyword)) { 361 // keyword matches 362 msgStart=partIndex; 363 // Do not match this keyword again. 364 haveKeywordMatch=true; 365 } 366 } 367 } 368 partIndex=pattern.getLimitPartIndex(partIndex); 369 } while(++partIndex<count); 370 return msgStart; 371 } 372 373 /** 374 * Interface for selecting PluralFormat keywords for numbers. 375 * The PluralRules class was intended to implement this interface, 376 * but there is no public API that uses a PluralSelector, 377 * only MessageFormat and PluralFormat have PluralSelector implementations. 378 * Therefore, PluralRules is not marked to implement this non-public interface, 379 * to avoid confusing users. 380 * icu_annot::internal 381 */ 382 /*package*/ interface PluralSelector { 383 /** 384 * Given a number, returns the appropriate PluralFormat keyword. 385 * 386 * @param context worker object for the selector. 387 * @param number The number to be plural-formatted. 388 * @return The selected PluralFormat keyword. 389 */ select(Object context, double number)390 public String select(Object context, double number); 391 } 392 393 // See PluralSelector: 394 // We could avoid this adapter class if we made PluralSelector public 395 // (or at least publicly visible) and had PluralRules implement PluralSelector. 396 // private final class PluralSelectorAdapter implements PluralSelector { 397 // @Override 398 // public String select(Object context, double number) { 399 // FixedDecimal dec = (FixedDecimal) context; 400 // assert dec.source == number; 401 // return pluralRules.select(dec); 402 // } 403 // } 404 // transient private PluralSelectorAdapter pluralRulesWrapper = new PluralSelectorAdapter(); 405 406 /** 407 * This method is not yet supported by <code>PluralFormat</code>. 408 * @param text the string to be parsed. 409 * @param parsePosition defines the position where parsing is to begin, 410 * and upon return, the position where parsing left off. If the position 411 * has not changed upon return, then parsing failed. 412 * @return nothing because this method is not yet implemented. 413 * @throws UnsupportedOperationException will always be thrown by this method. 414 * icu_annot::stable ICU 3.8 415 */ parse(String text, ParsePosition parsePosition)416 public Number parse(String text, ParsePosition parsePosition) { 417 throw new UnsupportedOperationException(); 418 } 419 420 /** 421 * This method is not yet supported by <code>PluralFormat</code>. 422 * @param source the string to be parsed. 423 * @param pos defines the position where parsing is to begin, 424 * and upon return, the position where parsing left off. If the position 425 * has not changed upon return, then parsing failed. 426 * @return nothing because this method is not yet implemented. 427 * @throws UnsupportedOperationException will always be thrown by this method. 428 * icu_annot::stable ICU 3.8 429 */ parseObject(String source, ParsePosition pos)430 public Object parseObject(String source, ParsePosition pos) { 431 throw new UnsupportedOperationException(); 432 } 433 434 /** 435 * Returns true if this equals the provided PluralFormat. 436 * @param rhs the PluralFormat to compare against 437 * @return true if this equals rhs 438 * icu_annot::stable ICU 3.8 439 */ 440 // public boolean equals(PluralFormat rhs) { 441 // return equals((Object)rhs); 442 // } 443 444 /** 445 * {@inheritDoc} 446 * icu_annot::stable ICU 3.8 447 */ 448 @Override hashCode()449 public int hashCode() { 450 return pluralRules.hashCode() ^ parsedValues.hashCode(); 451 } 452 453 /** 454 * {@inheritDoc} 455 * icu_annot::stable ICU 3.8 456 */ 457 @Override toString()458 public String toString() { 459 StringBuilder buf = new StringBuilder(); 460 buf.append("locale=" + locale_); 461 buf.append(", rules='" + pluralRules + "'"); 462 buf.append(", pattern='" + pattern + "'"); 463 buf.append(", format='" + numberFormat + "'"); 464 return buf.toString(); 465 } 466 467 /* 468 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 469 in.defaultReadObject(); 470 pluralRulesWrapper = new PluralSelectorAdapter(); 471 // Ignore the parsedValues from an earlier class version (before ICU 4.8) 472 // and rebuild the msgPattern. 473 parsedValues = null; 474 if (pattern != null) { 475 applyPattern(pattern); 476 } 477 } 478 */ 479 } 480