1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2012-2016, Google, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.io.InvalidObjectException; 12 import java.text.AttributedCharacterIterator; 13 import java.text.Format; 14 import java.util.ArrayList; 15 import java.util.Arrays; 16 import java.util.Collection; 17 import java.util.Iterator; 18 import java.util.Locale; 19 import java.util.regex.Pattern; 20 21 import com.ibm.icu.impl.FormattedStringBuilder; 22 import com.ibm.icu.impl.FormattedValueStringBuilderImpl; 23 import com.ibm.icu.impl.FormattedValueStringBuilderImpl.SpanFieldPlaceholder; 24 import com.ibm.icu.impl.ICUCache; 25 import com.ibm.icu.impl.ICUData; 26 import com.ibm.icu.impl.ICUResourceBundle; 27 import com.ibm.icu.impl.SimpleCache; 28 import com.ibm.icu.impl.SimpleFormatterImpl; 29 import com.ibm.icu.impl.SimpleFormatterImpl.IterInternal; 30 import com.ibm.icu.impl.Utility; 31 import com.ibm.icu.util.ULocale; 32 import com.ibm.icu.util.UResourceBundle; 33 34 /** 35 * Immutable class for formatting a list, using data from CLDR (or supplied 36 * separately). The class is not subclassable. 37 * 38 * @author Mark Davis 39 * @stable ICU 50 40 */ 41 final public class ListFormatter { 42 // Compiled SimpleFormatter patterns. 43 private final String start; 44 private final String middle; 45 private final ULocale locale; 46 47 private interface PatternHandler { getTwoPattern(String text)48 public String getTwoPattern(String text); getEndPattern(String text)49 public String getEndPattern(String text); 50 } 51 private final PatternHandler patternHandler; 52 53 /** 54 * Type of meaning expressed by the list. 55 * 56 * @stable ICU 67 57 */ 58 public enum Type { 59 /** 60 * Conjunction formatting, e.g. "Alice, Bob, Charlie, and Delta". 61 * 62 * @stable ICU 67 63 */ 64 AND, 65 66 /** 67 * Disjunction (or alternative, or simply one of) formatting, e.g. 68 * "Alice, Bob, Charlie, or Delta". 69 * 70 * @stable ICU 67 71 */ 72 OR, 73 74 /** 75 * Formatting of a list of values with units, e.g. "5 pounds, 12 ounces". 76 * 77 * @stable ICU 67 78 */ 79 UNITS 80 }; 81 82 /** 83 * Verbosity level of the list patterns. 84 * 85 * @stable ICU 67 86 */ 87 public enum Width { 88 /** 89 * Use list formatting with full words (no abbreviations) when possible. 90 * 91 * @stable ICU 67 92 */ 93 WIDE, 94 95 /** 96 * Use list formatting of typical length. 97 * 98 * @stable ICU 67 99 */ 100 SHORT, 101 102 /** 103 * Use list formatting of the shortest possible length. 104 * 105 * @stable ICU 67 106 */ 107 NARROW, 108 }; 109 110 /** 111 * Class for span fields in FormattedList. 112 * 113 * @stable ICU 67 114 */ 115 public static final class SpanField extends UFormat.SpanField { 116 private static final long serialVersionUID = 3563544214705634403L; 117 118 /** 119 * The concrete field used for spans in FormattedList. 120 * 121 * Instances of LIST_SPAN should have an associated value, the index 122 * within the input list that is represented by the span. 123 * 124 * @stable ICU 67 125 */ 126 public static final SpanField LIST_SPAN = new SpanField("list-span"); 127 SpanField(String name)128 private SpanField(String name) { 129 super(name); 130 } 131 132 /** 133 * serialization method resolve instances to the constant 134 * ListFormatter.SpanField values 135 * @internal 136 * @deprecated This API is ICU internal only. 137 */ 138 @Deprecated 139 @Override readResolve()140 protected Object readResolve() throws InvalidObjectException { 141 if (this.getName().equals(LIST_SPAN.getName())) 142 return LIST_SPAN; 143 144 throw new InvalidObjectException("An invalid object."); 145 } 146 } 147 148 /** 149 * Field selectors for format fields defined by ListFormatter. 150 * @stable ICU 67 151 */ 152 public static final class Field extends Format.Field { 153 private static final long serialVersionUID = -8071145668708265437L; 154 155 /** 156 * The literal text in the result which came from the resources. 157 * @stable ICU 67 158 */ 159 public static Field LITERAL = new Field("literal"); 160 161 /** 162 * The element text in the result which came from the input strings. 163 * @stable ICU 67 164 */ 165 public static Field ELEMENT = new Field("element"); 166 Field(String name)167 private Field(String name) { 168 super(name); 169 } 170 171 /** 172 * Serialization method resolve instances to the constant Field values 173 * 174 * @internal 175 * @deprecated This API is ICU internal only. 176 */ 177 @Deprecated 178 @Override readResolve()179 protected Object readResolve() throws InvalidObjectException { 180 if (this.getName().equals(LITERAL.getName())) 181 return LITERAL; 182 if (this.getName().equals(ELEMENT.getName())) 183 return ELEMENT; 184 185 throw new InvalidObjectException("An invalid object."); 186 } 187 } 188 189 /** 190 * An immutable class containing the result of a list formatting operation. 191 * 192 * Instances of this class are immutable and thread-safe. 193 * 194 * Not intended for public subclassing. 195 * 196 * @stable ICU 67 197 */ 198 public static final class FormattedList implements FormattedValue { 199 private final FormattedStringBuilder string; 200 FormattedList(FormattedStringBuilder string)201 FormattedList(FormattedStringBuilder string) { 202 this.string = string; 203 } 204 205 /** 206 * {@inheritDoc} 207 * @stable ICU 67 208 */ 209 @Override toString()210 public String toString() { 211 return string.toString(); 212 } 213 214 /** 215 * {@inheritDoc} 216 * @stable ICU 67 217 */ 218 @Override length()219 public int length() { 220 return string.length(); 221 } 222 223 /** 224 * {@inheritDoc} 225 * @stable ICU 67 226 */ 227 @Override charAt(int index)228 public char charAt(int index) { 229 return string.charAt(index); 230 } 231 232 /** 233 * {@inheritDoc} 234 * @stable ICU 67 235 */ 236 @Override subSequence(int start, int end)237 public CharSequence subSequence(int start, int end) { 238 return string.subString(start, end); 239 } 240 241 /** 242 * {@inheritDoc} 243 * @stable ICU 67 244 */ 245 @Override appendTo(A appendable)246 public <A extends Appendable> A appendTo(A appendable) { 247 return Utility.appendTo(string, appendable); 248 } 249 250 /** 251 * {@inheritDoc} 252 * @stable ICU 67 253 */ 254 @Override nextPosition(ConstrainedFieldPosition cfpos)255 public boolean nextPosition(ConstrainedFieldPosition cfpos) { 256 return FormattedValueStringBuilderImpl.nextPosition(string, cfpos, null); 257 } 258 259 /** 260 * {@inheritDoc} 261 * @stable ICU 67 262 */ 263 @Override toCharacterIterator()264 public AttributedCharacterIterator toCharacterIterator() { 265 return FormattedValueStringBuilderImpl.toCharacterIterator(string, null); 266 } 267 } 268 269 /** 270 * <b>Internal:</b> Create a ListFormatter from component strings, 271 * with definitions as in LDML. 272 * 273 * @param two 274 * string for two items, containing {0} for the first, and {1} 275 * for the second. 276 * @param start 277 * string for the start of a list items, containing {0} for the 278 * first, and {1} for the rest. 279 * @param middle 280 * string for the start of a list items, containing {0} for the 281 * first part of the list, and {1} for the rest of the list. 282 * @param end 283 * string for the end of a list items, containing {0} for the 284 * first part of the list, and {1} for the last item. 285 * @internal 286 * @deprecated This API is ICU internal only. 287 */ 288 @Deprecated ListFormatter(String two, String start, String middle, String end)289 public ListFormatter(String two, String start, String middle, String end) { 290 this( 291 compilePattern(two, new StringBuilder()), 292 compilePattern(start, new StringBuilder()), 293 compilePattern(middle, new StringBuilder()), 294 compilePattern(end, new StringBuilder()), 295 null); 296 } 297 ListFormatter(String two, String start, String middle, String end, ULocale locale)298 private ListFormatter(String two, String start, String middle, String end, ULocale locale) { 299 this.start = start; 300 this.middle = middle; 301 this.locale = locale; 302 this.patternHandler = createPatternHandler(two, end); 303 } 304 compilePattern(String pattern, StringBuilder sb)305 private static String compilePattern(String pattern, StringBuilder sb) { 306 return SimpleFormatterImpl.compileToStringMinMaxArguments(pattern, sb, 2, 2); 307 } 308 309 /** 310 * Create a list formatter that is appropriate for a locale. 311 * 312 * @param locale 313 * the locale in question. 314 * @return ListFormatter 315 * @stable ICU 67 316 */ getInstance(ULocale locale, Type type, Width width)317 public static ListFormatter getInstance(ULocale locale, Type type, Width width) { 318 String styleName = typeWidthToStyleString(type, width); 319 if (styleName == null) { 320 throw new IllegalArgumentException("Invalid list format type/width"); 321 } 322 return cache.get(locale, styleName); 323 } 324 325 /** 326 * Create a list formatter that is appropriate for a locale. 327 * 328 * @param locale 329 * the locale in question. 330 * @return ListFormatter 331 * @stable ICU 67 332 */ getInstance(Locale locale, Type type, Width width)333 public static ListFormatter getInstance(Locale locale, Type type, Width width) { 334 return getInstance(ULocale.forLocale(locale), type, width); 335 } 336 337 /** 338 * Create a list formatter that is appropriate for a locale. 339 * 340 * @param locale 341 * the locale in question. 342 * @return ListFormatter 343 * @stable ICU 50 344 */ getInstance(ULocale locale)345 public static ListFormatter getInstance(ULocale locale) { 346 return getInstance(locale, Type.AND, Width.WIDE); 347 } 348 349 /** 350 * Create a list formatter that is appropriate for a locale. 351 * 352 * @param locale 353 * the locale in question. 354 * @return ListFormatter 355 * @stable ICU 50 356 */ getInstance(Locale locale)357 public static ListFormatter getInstance(Locale locale) { 358 return getInstance(ULocale.forLocale(locale), Type.AND, Width.WIDE); 359 } 360 361 /** 362 * Create a list formatter that is appropriate for the default FORMAT locale. 363 * 364 * @return ListFormatter 365 * @stable ICU 50 366 */ getInstance()367 public static ListFormatter getInstance() { 368 return getInstance(ULocale.getDefault(ULocale.Category.FORMAT)); 369 } 370 371 /** 372 * Format a list of objects. 373 * 374 * @param items 375 * items to format. The toString() method is called on each. 376 * @return items formatted into a string 377 * @stable ICU 50 378 */ format(Object... items)379 public String format(Object... items) { 380 return format(Arrays.asList(items)); 381 } 382 383 /** 384 * Format a collection of objects. The toString() method is called on each. 385 * 386 * @param items 387 * items to format. The toString() method is called on each. 388 * @return items formatted into a string 389 * @stable ICU 50 390 */ format(Collection<?> items)391 public String format(Collection<?> items) { 392 return formatImpl(items, false).toString(); 393 } 394 395 /** 396 * Format a list of objects to a FormattedList. You can access the offsets 397 * of each element from the FormattedList. 398 * 399 * @param items 400 * items to format. The toString() method is called on each. 401 * @return items formatted into a FormattedList 402 * @stable ICU 67 403 */ formatToValue(Object... items)404 public FormattedList formatToValue(Object... items) { 405 return formatToValue(Arrays.asList(items)); 406 } 407 408 409 /** 410 * Format a collection of objects to a FormattedList. You can access the offsets 411 * of each element from the FormattedList. 412 * 413 * @param items 414 * items to format. The toString() method is called on each. 415 * @return items formatted into a FormattedList 416 * @stable ICU 67 417 */ formatToValue(Collection<?> items)418 public FormattedList formatToValue(Collection<?> items) { 419 return formatImpl(items, true).toValue(); 420 } 421 422 // Formats a collection of objects and returns the formatted string plus the offset 423 // in the string where the index th element appears. index is zero based. If index is 424 // negative or greater than or equal to the size of items then this function returns -1 for 425 // the offset. formatImpl(Collection<?> items, boolean needsFields)426 FormattedListBuilder formatImpl(Collection<?> items, boolean needsFields) { 427 Iterator<?> it = items.iterator(); 428 int count = items.size(); 429 switch (count) { 430 case 0: 431 return new FormattedListBuilder("", needsFields); 432 case 1: 433 return new FormattedListBuilder(it.next(), needsFields); 434 case 2: 435 Object first = it.next(); 436 Object second = it.next(); 437 return new FormattedListBuilder(first, needsFields) 438 .append(patternHandler.getTwoPattern(String.valueOf(second)), second, 1); 439 } 440 FormattedListBuilder builder = new FormattedListBuilder(it.next(), needsFields); 441 builder.append(start, it.next(), 1); 442 for (int idx = 2; idx < count - 1; ++idx) { 443 builder.append(middle, it.next(), idx); 444 } 445 Object last = it.next(); 446 return builder.append(patternHandler.getEndPattern(String.valueOf(last)), last, count - 1); 447 } 448 449 // A static handler just returns the pattern without considering the input text. 450 private static final class StaticHandler implements PatternHandler { StaticHandler(String two, String end)451 StaticHandler(String two, String end) { 452 twoPattern = two; 453 endPattern = end; 454 } 455 456 @Override getTwoPattern(String text)457 public String getTwoPattern(String text) { return twoPattern; } 458 459 @Override getEndPattern(String text)460 public String getEndPattern(String text) { return endPattern; } 461 462 private final String twoPattern; 463 private final String endPattern; 464 } 465 466 // A contextual handler returns one of the two patterns depending on whether the text matched the regexp. 467 private static final class ContextualHandler implements PatternHandler { ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd)468 ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd) { 469 this.regexp = regexp; 470 thenTwoPattern = thenTwo; 471 elseTwoPattern = elseTwo; 472 thenEndPattern = thenEnd; 473 elseEndPattern = elseEnd; 474 } 475 476 @Override getTwoPattern(String text)477 public String getTwoPattern(String text) { 478 if(regexp.matcher(text).matches()) { 479 return thenTwoPattern; 480 } else { 481 return elseTwoPattern; 482 } 483 } 484 485 @Override getEndPattern(String text)486 public String getEndPattern(String text) { 487 if(regexp.matcher(text).matches()) { 488 return thenEndPattern; 489 } else { 490 return elseEndPattern; 491 } 492 } 493 494 private final Pattern regexp; 495 private final String thenTwoPattern; 496 private final String elseTwoPattern; 497 private final String thenEndPattern; 498 private final String elseEndPattern; 499 500 } 501 502 // Pattern in the ICU Data which might be replaced y by e. 503 private static final String compiledY = compilePattern("{0} y {1}", new StringBuilder()); 504 505 // The new pattern to replace y to e 506 private static final String compiledE = compilePattern("{0} e {1}", new StringBuilder()); 507 508 // Pattern in the ICU Data which might be replaced o by u. 509 private static final String compiledO = compilePattern("{0} o {1}", new StringBuilder()); 510 511 // The new pattern to replace u to o 512 private static final String compiledU = compilePattern("{0} u {1}", new StringBuilder()); 513 514 // Condition to change to e. 515 // Starts with "hi" or "i" but not with "hie" nor "hia"a 516 private static final Pattern changeToE = Pattern.compile("(i.*|hi|hi[^ae].*)", Pattern.CASE_INSENSITIVE); 517 518 // Condition to change to u. 519 // Starts with "o", "ho", and "8". Also "11" by itself. 520 private static final Pattern changeToU = Pattern.compile("((o|ho|8).*|11)", Pattern.CASE_INSENSITIVE); 521 522 // Pattern in the ICU Data which might need to add a DASH after VAV. 523 private static final String compiledVav = compilePattern("{0} \u05D5{1}", new StringBuilder()); 524 525 // Pattern to add a DASH after VAV. 526 private static final String compiledVavDash = compilePattern("{0} \u05D5-{1}", new StringBuilder()); 527 528 // Condition to change to VAV follow by a dash. 529 // Starts with non Hebrew letter. 530 private static final Pattern changeToVavDash = Pattern.compile("^[\\P{InHebrew}].*$"); 531 532 // A factory function to create function based on locale 533 // Handle specal case of Spanish and Hebrew createPatternHandler(String two, String end)534 private PatternHandler createPatternHandler(String two, String end) { 535 if (this.locale != null) { 536 String language = this.locale.getLanguage(); 537 if (language.equals("es")) { 538 boolean twoIsY = two.equals(compiledY); 539 boolean endIsY = end.equals(compiledY); 540 if (twoIsY || endIsY) { 541 return new ContextualHandler( 542 changeToE, twoIsY ? compiledE : two, two, endIsY ? compiledE : end, end); 543 } 544 boolean twoIsO = two.equals(compiledO); 545 boolean endIsO = end.equals(compiledO); 546 if (twoIsO || endIsO) { 547 return new ContextualHandler( 548 changeToU, twoIsO ? compiledU : two, two, endIsO ? compiledU : end, end); 549 } 550 } else if (language.equals("he") || language.equals("iw")) { 551 boolean twoIsVav = two.equals(compiledVav); 552 boolean endIsVav = end.equals(compiledVav); 553 if (twoIsVav || endIsVav) { 554 return new ContextualHandler(changeToVavDash, 555 twoIsVav ? compiledVavDash : two, two, endIsVav ? compiledVavDash : end, end); 556 } 557 } 558 } 559 return new StaticHandler(two, end); 560 } 561 562 /** 563 * Returns the pattern to use for a particular item count. 564 * @param count the item count. 565 * @return the pattern with {0}, {1}, {2}, etc. For English, 566 * getPatternForNumItems(3) == "{0}, {1}, and {2}" 567 * @throws IllegalArgumentException when count is 0 or negative. 568 * @stable ICU 52 569 */ getPatternForNumItems(int count)570 public String getPatternForNumItems(int count) { 571 if (count <= 0) { 572 throw new IllegalArgumentException("count must be > 0"); 573 } 574 ArrayList<String> list = new ArrayList<>(); 575 for (int i = 0; i < count; i++) { 576 list.add(String.format("{%d}", i)); 577 } 578 return format(list); 579 } 580 581 /** 582 * Returns the locale of this object. 583 * @internal 584 * @deprecated This API is ICU internal only. 585 */ 586 @Deprecated getLocale()587 public ULocale getLocale() { 588 return locale; 589 } 590 591 // Builds a formatted list 592 static class FormattedListBuilder { 593 private FormattedStringBuilder string; 594 boolean needsFields; 595 596 // Start is the first object in the list; If needsFields is true, enable the slightly 597 // more expensive code path that records offsets of each element. FormattedListBuilder(Object start, boolean needsFields)598 public FormattedListBuilder(Object start, boolean needsFields) { 599 string = new FormattedStringBuilder(); 600 this.needsFields = needsFields; 601 string.setAppendableField(Field.LITERAL); 602 appendElement(start, 0); 603 } 604 605 // Appends additional object. pattern is a template indicating where the new object gets 606 // added in relation to the rest of the list. {0} represents the rest of the list; {1} 607 // represents the new object in pattern. next is the object to be added. position is the 608 // index of the next object in the list of inputs. append(String compiledPattern, Object next, int position)609 public FormattedListBuilder append(String compiledPattern, Object next, int position) { 610 assert SimpleFormatterImpl.getArgumentLimit(compiledPattern) == 2; 611 string.setAppendIndex(0); 612 long state = 0; 613 while (true) { 614 state = IterInternal.step(state, compiledPattern, string); 615 if (state == IterInternal.DONE) { 616 break; 617 } 618 int argIndex = IterInternal.getArgIndex(state); 619 if (argIndex == 0) { 620 string.setAppendIndex(string.length()); 621 } else { 622 appendElement(next, position); 623 } 624 } 625 return this; 626 } 627 appendElement(Object element, int position)628 private void appendElement(Object element, int position) { 629 String elementString = element.toString(); 630 if (needsFields) { 631 SpanFieldPlaceholder field = new SpanFieldPlaceholder(); 632 field.spanField = SpanField.LIST_SPAN; 633 field.normalField = Field.ELEMENT; 634 field.value = position; 635 field.start = -1; 636 field.length = elementString.length(); 637 string.append(elementString, field); 638 } else { 639 string.append(elementString, null); 640 } 641 } 642 appendTo(Appendable appendable)643 public void appendTo(Appendable appendable) { 644 Utility.appendTo(string, appendable); 645 } 646 getOffset(int fieldPositionFoundIndex)647 public int getOffset(int fieldPositionFoundIndex) { 648 return FormattedValueStringBuilderImpl.findSpan(string, fieldPositionFoundIndex); 649 } 650 651 @Override toString()652 public String toString() { 653 return string.toString(); 654 } 655 toValue()656 public FormattedList toValue() { 657 return new FormattedList(string); 658 } 659 } 660 661 private static class Cache { 662 private final ICUCache<String, ListFormatter> cache = 663 new SimpleCache<>(); 664 get(ULocale locale, String style)665 public ListFormatter get(ULocale locale, String style) { 666 String key = String.format("%s:%s", locale.toString(), style); 667 ListFormatter result = cache.get(key); 668 if (result == null) { 669 result = load(locale, style); 670 cache.put(key, result); 671 } 672 return result; 673 } 674 load(ULocale ulocale, String style)675 private static ListFormatter load(ULocale ulocale, String style) { 676 ICUResourceBundle r = (ICUResourceBundle)UResourceBundle. 677 getBundleInstance(ICUData.ICU_BASE_NAME, ulocale); 678 StringBuilder sb = new StringBuilder(); 679 return new ListFormatter( 680 compilePattern(r.getWithFallback("listPattern/" + style + "/2").getString(), sb), 681 compilePattern(r.getWithFallback("listPattern/" + style + "/start").getString(), sb), 682 compilePattern(r.getWithFallback("listPattern/" + style + "/middle").getString(), sb), 683 compilePattern(r.getWithFallback("listPattern/" + style + "/end").getString(), sb), 684 ulocale); 685 } 686 } 687 688 static Cache cache = new Cache(); 689 typeWidthToStyleString(Type type, Width width)690 static String typeWidthToStyleString(Type type, Width width) { 691 switch (type) { 692 case AND: 693 switch (width) { 694 case WIDE: 695 return "standard"; 696 case SHORT: 697 return "standard-short"; 698 case NARROW: 699 return "standard-narrow"; 700 } 701 break; 702 703 case OR: 704 switch (width) { 705 case WIDE: 706 return "or"; 707 case SHORT: 708 return "or-short"; 709 case NARROW: 710 return "or-narrow"; 711 } 712 break; 713 714 case UNITS: 715 switch (width) { 716 case WIDE: 717 return "unit"; 718 case SHORT: 719 return "unit-short"; 720 case NARROW: 721 return "unit-narrow"; 722 } 723 } 724 725 return null; 726 } 727 } 728