• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 2012-2016, Google, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 package com.ibm.icu.text;
10 
11 import java.io.InvalidObjectException;
12 import java.text.AttributedCharacterIterator;
13 import java.text.Format;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.Collection;
17 import java.util.Iterator;
18 import java.util.Locale;
19 import java.util.regex.Pattern;
20 
21 import com.ibm.icu.impl.FormattedStringBuilder;
22 import com.ibm.icu.impl.FormattedValueStringBuilderImpl;
23 import com.ibm.icu.impl.FormattedValueStringBuilderImpl.SpanFieldPlaceholder;
24 import com.ibm.icu.impl.ICUCache;
25 import com.ibm.icu.impl.ICUData;
26 import com.ibm.icu.impl.ICUResourceBundle;
27 import com.ibm.icu.impl.SimpleCache;
28 import com.ibm.icu.impl.SimpleFormatterImpl;
29 import com.ibm.icu.impl.SimpleFormatterImpl.IterInternal;
30 import com.ibm.icu.impl.Utility;
31 import com.ibm.icu.util.ULocale;
32 import com.ibm.icu.util.UResourceBundle;
33 
34 /**
35  * Immutable class for formatting a list, using data from CLDR (or supplied
36  * separately). The class is not subclassable.
37  *
38  * @author Mark Davis
39  * @stable ICU 50
40  */
41 final public class ListFormatter {
42     // Compiled SimpleFormatter patterns.
43     private final String start;
44     private final String middle;
45     private final ULocale locale;
46 
47     private interface PatternHandler {
getTwoPattern(String text)48         public String getTwoPattern(String text);
getEndPattern(String text)49         public String getEndPattern(String text);
50     }
51     private final PatternHandler patternHandler;
52 
53     /**
54      * Type of meaning expressed by the list.
55      *
56      * @stable ICU 67
57      */
58     public enum Type {
59         /**
60          * Conjunction formatting, e.g. "Alice, Bob, Charlie, and Delta".
61          *
62          * @stable ICU 67
63          */
64         AND,
65 
66         /**
67          * Disjunction (or alternative, or simply one of) formatting, e.g.
68          * "Alice, Bob, Charlie, or Delta".
69          *
70          * @stable ICU 67
71          */
72         OR,
73 
74         /**
75          * Formatting of a list of values with units, e.g. "5 pounds, 12 ounces".
76          *
77          * @stable ICU 67
78          */
79         UNITS
80     };
81 
82     /**
83      * Verbosity level of the list patterns.
84      *
85      * @stable ICU 67
86      */
87     public enum Width {
88         /**
89          * Use list formatting with full words (no abbreviations) when possible.
90          *
91          * @stable ICU 67
92          */
93         WIDE,
94 
95         /**
96          * Use list formatting of typical length.
97          *
98          * @stable ICU 67
99          */
100         SHORT,
101 
102         /**
103          * Use list formatting of the shortest possible length.
104          *
105          * @stable ICU 67
106          */
107         NARROW,
108     };
109 
110     /**
111      * Class for span fields in FormattedList.
112      *
113      * @stable ICU 67
114      */
115     public static final class SpanField extends UFormat.SpanField {
116         private static final long serialVersionUID = 3563544214705634403L;
117 
118         /**
119          * The concrete field used for spans in FormattedList.
120          *
121          * Instances of LIST_SPAN should have an associated value, the index
122          * within the input list that is represented by the span.
123          *
124          * @stable ICU 67
125          */
126         public static final SpanField LIST_SPAN = new SpanField("list-span");
127 
SpanField(String name)128         private SpanField(String name) {
129             super(name);
130         }
131 
132         /**
133          * serialization method resolve instances to the constant
134          * ListFormatter.SpanField values
135          * @internal
136          * @deprecated This API is ICU internal only.
137          */
138         @Deprecated
139         @Override
readResolve()140         protected Object readResolve() throws InvalidObjectException {
141             if (this.getName().equals(LIST_SPAN.getName()))
142                 return LIST_SPAN;
143 
144             throw new InvalidObjectException("An invalid object.");
145         }
146     }
147 
148     /**
149      * Field selectors for format fields defined by ListFormatter.
150      * @stable ICU 67
151      */
152     public static final class Field extends Format.Field {
153         private static final long serialVersionUID = -8071145668708265437L;
154 
155         /**
156          * The literal text in the result which came from the resources.
157          * @stable ICU 67
158          */
159         public static Field LITERAL = new Field("literal");
160 
161         /**
162          * The element text in the result which came from the input strings.
163          * @stable ICU 67
164          */
165         public static Field ELEMENT = new Field("element");
166 
Field(String name)167         private Field(String name) {
168             super(name);
169         }
170 
171         /**
172          * Serialization method resolve instances to the constant Field values
173          *
174          * @internal
175          * @deprecated This API is ICU internal only.
176          */
177         @Deprecated
178         @Override
readResolve()179         protected Object readResolve() throws InvalidObjectException {
180             if (this.getName().equals(LITERAL.getName()))
181                 return LITERAL;
182             if (this.getName().equals(ELEMENT.getName()))
183                 return ELEMENT;
184 
185             throw new InvalidObjectException("An invalid object.");
186         }
187     }
188 
189     /**
190      * An immutable class containing the result of a list formatting operation.
191      *
192      * Instances of this class are immutable and thread-safe.
193      *
194      * Not intended for public subclassing.
195      *
196      * @stable ICU 67
197      */
198     public static final class FormattedList implements FormattedValue {
199         private final FormattedStringBuilder string;
200 
FormattedList(FormattedStringBuilder string)201         FormattedList(FormattedStringBuilder string) {
202             this.string = string;
203         }
204 
205         /**
206          * {@inheritDoc}
207          * @stable ICU 67
208          */
209         @Override
toString()210         public String toString() {
211             return string.toString();
212         }
213 
214         /**
215          * {@inheritDoc}
216          * @stable ICU 67
217          */
218         @Override
length()219         public int length() {
220             return string.length();
221         }
222 
223         /**
224          * {@inheritDoc}
225          * @stable ICU 67
226          */
227         @Override
charAt(int index)228         public char charAt(int index) {
229             return string.charAt(index);
230         }
231 
232         /**
233          * {@inheritDoc}
234          * @stable ICU 67
235          */
236         @Override
subSequence(int start, int end)237         public CharSequence subSequence(int start, int end) {
238             return string.subString(start, end);
239         }
240 
241         /**
242          * {@inheritDoc}
243          * @stable ICU 67
244          */
245         @Override
appendTo(A appendable)246         public <A extends Appendable> A appendTo(A appendable) {
247             return Utility.appendTo(string, appendable);
248         }
249 
250         /**
251          * {@inheritDoc}
252          * @stable ICU 67
253          */
254         @Override
nextPosition(ConstrainedFieldPosition cfpos)255         public boolean nextPosition(ConstrainedFieldPosition cfpos) {
256             return FormattedValueStringBuilderImpl.nextPosition(string, cfpos, null);
257         }
258 
259         /**
260          * {@inheritDoc}
261          * @stable ICU 67
262          */
263         @Override
toCharacterIterator()264         public AttributedCharacterIterator toCharacterIterator() {
265             return FormattedValueStringBuilderImpl.toCharacterIterator(string, null);
266         }
267     }
268 
269     /**
270      * <b>Internal:</b> Create a ListFormatter from component strings,
271      * with definitions as in LDML.
272      *
273      * @param two
274      *            string for two items, containing {0} for the first, and {1}
275      *            for the second.
276      * @param start
277      *            string for the start of a list items, containing {0} for the
278      *            first, and {1} for the rest.
279      * @param middle
280      *            string for the start of a list items, containing {0} for the
281      *            first part of the list, and {1} for the rest of the list.
282      * @param end
283      *            string for the end of a list items, containing {0} for the
284      *            first part of the list, and {1} for the last item.
285      * @internal
286      * @deprecated This API is ICU internal only.
287      */
288     @Deprecated
ListFormatter(String two, String start, String middle, String end)289     public ListFormatter(String two, String start, String middle, String end) {
290         this(
291                 compilePattern(two, new StringBuilder()),
292                 compilePattern(start, new StringBuilder()),
293                 compilePattern(middle, new StringBuilder()),
294                 compilePattern(end, new StringBuilder()),
295                 null);
296     }
297 
ListFormatter(String two, String start, String middle, String end, ULocale locale)298     private ListFormatter(String two, String start, String middle, String end, ULocale locale) {
299         this.start = start;
300         this.middle = middle;
301         this.locale = locale;
302         this.patternHandler = createPatternHandler(two, end);
303     }
304 
compilePattern(String pattern, StringBuilder sb)305     private static String compilePattern(String pattern, StringBuilder sb) {
306         return SimpleFormatterImpl.compileToStringMinMaxArguments(pattern, sb, 2, 2);
307     }
308 
309     /**
310      * Create a list formatter that is appropriate for a locale.
311      *
312      * @param locale
313      *            the locale in question.
314      * @return ListFormatter
315      * @stable ICU 67
316      */
getInstance(ULocale locale, Type type, Width width)317     public static ListFormatter getInstance(ULocale locale, Type type, Width width) {
318         String styleName = typeWidthToStyleString(type, width);
319         if (styleName == null) {
320             throw new IllegalArgumentException("Invalid list format type/width");
321         }
322         return cache.get(locale, styleName);
323     }
324 
325     /**
326      * Create a list formatter that is appropriate for a locale.
327      *
328      * @param locale
329      *            the locale in question.
330      * @return ListFormatter
331      * @stable ICU 67
332      */
getInstance(Locale locale, Type type, Width width)333     public static ListFormatter getInstance(Locale locale, Type type, Width width) {
334         return getInstance(ULocale.forLocale(locale), type, width);
335     }
336 
337     /**
338      * Create a list formatter that is appropriate for a locale.
339      *
340      * @param locale
341      *            the locale in question.
342      * @return ListFormatter
343      * @stable ICU 50
344      */
getInstance(ULocale locale)345     public static ListFormatter getInstance(ULocale locale) {
346       return getInstance(locale, Type.AND, Width.WIDE);
347     }
348 
349     /**
350      * Create a list formatter that is appropriate for a locale.
351      *
352      * @param locale
353      *            the locale in question.
354      * @return ListFormatter
355      * @stable ICU 50
356      */
getInstance(Locale locale)357     public static ListFormatter getInstance(Locale locale) {
358         return getInstance(ULocale.forLocale(locale), Type.AND, Width.WIDE);
359     }
360 
361     /**
362      * Create a list formatter that is appropriate for the default FORMAT locale.
363      *
364      * @return ListFormatter
365      * @stable ICU 50
366      */
getInstance()367     public static ListFormatter getInstance() {
368         return getInstance(ULocale.getDefault(ULocale.Category.FORMAT));
369     }
370 
371     /**
372      * Format a list of objects.
373      *
374      * @param items
375      *            items to format. The toString() method is called on each.
376      * @return items formatted into a string
377      * @stable ICU 50
378      */
format(Object... items)379     public String format(Object... items) {
380         return format(Arrays.asList(items));
381     }
382 
383     /**
384      * Format a collection of objects. The toString() method is called on each.
385      *
386      * @param items
387      *            items to format. The toString() method is called on each.
388      * @return items formatted into a string
389      * @stable ICU 50
390      */
format(Collection<?> items)391     public String format(Collection<?> items) {
392         return formatImpl(items, false).toString();
393     }
394 
395     /**
396      * Format a list of objects to a FormattedList. You can access the offsets
397      * of each element from the FormattedList.
398      *
399      * @param items
400      *            items to format. The toString() method is called on each.
401      * @return items formatted into a FormattedList
402      * @stable ICU 67
403      */
formatToValue(Object... items)404     public FormattedList formatToValue(Object... items) {
405         return formatToValue(Arrays.asList(items));
406     }
407 
408 
409     /**
410      * Format a collection of objects to a FormattedList. You can access the offsets
411      * of each element from the FormattedList.
412      *
413      * @param items
414      *            items to format. The toString() method is called on each.
415      * @return items formatted into a FormattedList
416      * @stable ICU 67
417      */
formatToValue(Collection<?> items)418     public FormattedList formatToValue(Collection<?> items) {
419         return formatImpl(items, true).toValue();
420     }
421 
422     // Formats a collection of objects and returns the formatted string plus the offset
423     // in the string where the index th element appears. index is zero based. If index is
424     // negative or greater than or equal to the size of items then this function returns -1 for
425     // the offset.
formatImpl(Collection<?> items, boolean needsFields)426     FormattedListBuilder formatImpl(Collection<?> items, boolean needsFields) {
427         Iterator<?> it = items.iterator();
428         int count = items.size();
429         switch (count) {
430         case 0:
431             return new FormattedListBuilder("", needsFields);
432         case 1:
433             return new FormattedListBuilder(it.next(), needsFields);
434         case 2:
435             Object first = it.next();
436             Object second = it.next();
437             return new FormattedListBuilder(first, needsFields)
438                 .append(patternHandler.getTwoPattern(String.valueOf(second)), second, 1);
439         }
440         FormattedListBuilder builder = new FormattedListBuilder(it.next(), needsFields);
441         builder.append(start, it.next(), 1);
442         for (int idx = 2; idx < count - 1; ++idx) {
443             builder.append(middle, it.next(), idx);
444         }
445         Object last = it.next();
446         return builder.append(patternHandler.getEndPattern(String.valueOf(last)), last, count - 1);
447     }
448 
449     // A static handler just returns the pattern without considering the input text.
450     private static final class StaticHandler implements PatternHandler {
StaticHandler(String two, String end)451         StaticHandler(String two, String end) {
452             twoPattern = two;
453             endPattern = end;
454         }
455 
456         @Override
getTwoPattern(String text)457         public String getTwoPattern(String text) { return twoPattern; }
458 
459         @Override
getEndPattern(String text)460         public String getEndPattern(String text) { return endPattern; }
461 
462         private final String twoPattern;
463         private final String endPattern;
464     }
465 
466     // A contextual handler returns one of the two patterns depending on whether the text matched the regexp.
467     private static final class ContextualHandler implements PatternHandler {
ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd)468         ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd) {
469             this.regexp = regexp;
470             thenTwoPattern = thenTwo;
471             elseTwoPattern = elseTwo;
472             thenEndPattern = thenEnd;
473             elseEndPattern = elseEnd;
474         }
475 
476         @Override
getTwoPattern(String text)477         public String getTwoPattern(String text) {
478             if(regexp.matcher(text).matches()) {
479                 return thenTwoPattern;
480             } else {
481                 return elseTwoPattern;
482             }
483         }
484 
485         @Override
getEndPattern(String text)486         public String getEndPattern(String text) {
487             if(regexp.matcher(text).matches()) {
488                 return thenEndPattern;
489             } else {
490                 return elseEndPattern;
491             }
492         }
493 
494         private final Pattern regexp;
495         private final String thenTwoPattern;
496         private final String elseTwoPattern;
497         private final String thenEndPattern;
498         private final String elseEndPattern;
499 
500     }
501 
502     // Pattern in the ICU Data which might be replaced y by e.
503     private static final String compiledY = compilePattern("{0} y {1}", new StringBuilder());
504 
505     // The new pattern to replace y to e
506     private static final String compiledE = compilePattern("{0} e {1}", new StringBuilder());
507 
508     // Pattern in the ICU Data which might be replaced o by u.
509     private static final String compiledO = compilePattern("{0} o {1}", new StringBuilder());
510 
511     // The new pattern to replace u to o
512     private static final String compiledU = compilePattern("{0} u {1}", new StringBuilder());
513 
514     // Condition to change to e.
515     // Starts with "hi" or "i" but not with "hie" nor "hia"a
516     private static final Pattern changeToE = Pattern.compile("(i.*|hi|hi[^ae].*)", Pattern.CASE_INSENSITIVE);
517 
518     // Condition to change to u.
519     // Starts with "o", "ho", and "8". Also "11" by itself.
520     private static final Pattern changeToU = Pattern.compile("((o|ho|8).*|11)", Pattern.CASE_INSENSITIVE);
521 
522     // Pattern in the ICU Data which might need to add a DASH after VAV.
523     private static final String compiledVav = compilePattern("{0} \u05D5{1}", new StringBuilder());
524 
525     // Pattern to add a DASH after VAV.
526     private static final String compiledVavDash = compilePattern("{0} \u05D5-{1}", new StringBuilder());
527 
528     // Condition to change to VAV follow by a dash.
529     // Starts with non Hebrew letter.
530     private static final Pattern changeToVavDash = Pattern.compile("^[\\P{InHebrew}].*$");
531 
532     // A factory function to create function based on locale
533     // Handle specal case of Spanish and Hebrew
createPatternHandler(String two, String end)534     private PatternHandler createPatternHandler(String two, String end) {
535         if (this.locale != null) {
536             String language = this.locale.getLanguage();
537             if (language.equals("es")) {
538                 boolean twoIsY = two.equals(compiledY);
539                 boolean endIsY = end.equals(compiledY);
540                 if (twoIsY || endIsY) {
541                     return new ContextualHandler(
542                         changeToE, twoIsY ? compiledE : two, two, endIsY ? compiledE : end, end);
543                 }
544                 boolean twoIsO = two.equals(compiledO);
545                 boolean endIsO = end.equals(compiledO);
546                 if (twoIsO || endIsO) {
547                     return new ContextualHandler(
548                         changeToU, twoIsO ? compiledU : two, two, endIsO ? compiledU : end, end);
549                 }
550             } else if (language.equals("he") || language.equals("iw")) {
551                 boolean twoIsVav = two.equals(compiledVav);
552                 boolean endIsVav = end.equals(compiledVav);
553                 if (twoIsVav || endIsVav) {
554                     return new ContextualHandler(changeToVavDash,
555                         twoIsVav ? compiledVavDash : two, two, endIsVav ? compiledVavDash : end, end);
556                 }
557             }
558         }
559         return new StaticHandler(two, end);
560     }
561 
562     /**
563      * Returns the pattern to use for a particular item count.
564      * @param count the item count.
565      * @return the pattern with {0}, {1}, {2}, etc. For English,
566      * getPatternForNumItems(3) == "{0}, {1}, and {2}"
567      * @throws IllegalArgumentException when count is 0 or negative.
568      * @stable ICU 52
569      */
getPatternForNumItems(int count)570     public String getPatternForNumItems(int count) {
571         if (count <= 0) {
572             throw new IllegalArgumentException("count must be > 0");
573         }
574         ArrayList<String> list = new ArrayList<>();
575         for (int i = 0; i < count; i++) {
576             list.add(String.format("{%d}", i));
577         }
578         return format(list);
579     }
580 
581     /**
582      * Returns the locale of this object.
583      * @internal
584      * @deprecated This API is ICU internal only.
585      */
586     @Deprecated
getLocale()587     public ULocale getLocale() {
588         return locale;
589     }
590 
591     // Builds a formatted list
592     static class FormattedListBuilder {
593         private FormattedStringBuilder string;
594         boolean needsFields;
595 
596         // Start is the first object in the list; If needsFields is true, enable the slightly
597         // more expensive code path that records offsets of each element.
FormattedListBuilder(Object start, boolean needsFields)598         public FormattedListBuilder(Object start, boolean needsFields) {
599             string = new FormattedStringBuilder();
600             this.needsFields = needsFields;
601             string.setAppendableField(Field.LITERAL);
602             appendElement(start, 0);
603         }
604 
605         // Appends additional object. pattern is a template indicating where the new object gets
606         // added in relation to the rest of the list. {0} represents the rest of the list; {1}
607         // represents the new object in pattern. next is the object to be added. position is the
608         // index of the next object in the list of inputs.
append(String compiledPattern, Object next, int position)609         public FormattedListBuilder append(String compiledPattern, Object next, int position) {
610             assert SimpleFormatterImpl.getArgumentLimit(compiledPattern) == 2;
611             string.setAppendIndex(0);
612             long state = 0;
613             while (true) {
614                 state = IterInternal.step(state, compiledPattern, string);
615                 if (state == IterInternal.DONE) {
616                     break;
617                 }
618                 int argIndex = IterInternal.getArgIndex(state);
619                 if (argIndex == 0) {
620                     string.setAppendIndex(string.length());
621                 } else {
622                     appendElement(next, position);
623                 }
624             }
625             return this;
626         }
627 
appendElement(Object element, int position)628         private void appendElement(Object element, int position) {
629             String elementString = element.toString();
630             if (needsFields) {
631                 SpanFieldPlaceholder field = new SpanFieldPlaceholder();
632                 field.spanField = SpanField.LIST_SPAN;
633                 field.normalField = Field.ELEMENT;
634                 field.value = position;
635                 field.start = -1;
636                 field.length = elementString.length();
637                 string.append(elementString, field);
638             } else {
639                 string.append(elementString, null);
640             }
641         }
642 
appendTo(Appendable appendable)643         public void appendTo(Appendable appendable) {
644             Utility.appendTo(string, appendable);
645         }
646 
getOffset(int fieldPositionFoundIndex)647         public int getOffset(int fieldPositionFoundIndex) {
648             return FormattedValueStringBuilderImpl.findSpan(string, fieldPositionFoundIndex);
649         }
650 
651         @Override
toString()652         public String toString() {
653             return string.toString();
654         }
655 
toValue()656         public FormattedList toValue() {
657             return new FormattedList(string);
658         }
659     }
660 
661     private static class Cache {
662         private final ICUCache<String, ListFormatter> cache =
663             new SimpleCache<>();
664 
get(ULocale locale, String style)665         public ListFormatter get(ULocale locale, String style) {
666             String key = String.format("%s:%s", locale.toString(), style);
667             ListFormatter result = cache.get(key);
668             if (result == null) {
669                 result = load(locale, style);
670                 cache.put(key, result);
671             }
672             return result;
673         }
674 
load(ULocale ulocale, String style)675         private static ListFormatter load(ULocale ulocale, String style) {
676             ICUResourceBundle r = (ICUResourceBundle)UResourceBundle.
677                     getBundleInstance(ICUData.ICU_BASE_NAME, ulocale);
678             StringBuilder sb = new StringBuilder();
679             return new ListFormatter(
680                 compilePattern(r.getWithFallback("listPattern/" + style + "/2").getString(), sb),
681                 compilePattern(r.getWithFallback("listPattern/" + style + "/start").getString(), sb),
682                 compilePattern(r.getWithFallback("listPattern/" + style + "/middle").getString(), sb),
683                 compilePattern(r.getWithFallback("listPattern/" + style + "/end").getString(), sb),
684                 ulocale);
685         }
686     }
687 
688     static Cache cache = new Cache();
689 
typeWidthToStyleString(Type type, Width width)690     static String typeWidthToStyleString(Type type, Width width) {
691         switch (type) {
692             case AND:
693                 switch (width) {
694                     case WIDE:
695                         return "standard";
696                     case SHORT:
697                         return "standard-short";
698                     case NARROW:
699                         return "standard-narrow";
700                 }
701                 break;
702 
703             case OR:
704                 switch (width) {
705                     case WIDE:
706                         return "or";
707                     case SHORT:
708                         return "or-short";
709                     case NARROW:
710                         return "or-narrow";
711                 }
712                 break;
713 
714             case UNITS:
715                 switch (width) {
716                     case WIDE:
717                         return "unit";
718                     case SHORT:
719                         return "unit-short";
720                     case NARROW:
721                         return "unit-narrow";
722                 }
723         }
724 
725         return null;
726     }
727 }
728