• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 2014-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 package com.ibm.icu.impl;
10 
11 import java.io.IOException;
12 import java.text.Format;
13 
14 import com.ibm.icu.util.ICUUncheckedIOException;
15 
16 /**
17  * Formats simple patterns like "{1} was born in {0}".
18  * Internal version of {@link com.ibm.icu.text.SimpleFormatter}
19  * with only static methods, to avoid wrapper objects.
20  *
21  * <p>This class "compiles" pattern strings into a binary format
22  * and implements formatting etc. based on that.
23  *
24  * <p>Format:
25  * Index 0: One more than the highest argument number.
26  * Followed by zero or more arguments or literal-text segments.
27  *
28  * <p>An argument is stored as its number, less than ARG_NUM_LIMIT.
29  * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
30  * followed by that many chars.
31  */
32 public final class SimpleFormatterImpl {
33     /**
34      * Argument numbers must be smaller than this limit.
35      * Text segment lengths are offset by this much.
36      * This is currently the only unused char value in compiled patterns,
37      * except it is the maximum value of the first unit (max arg +1).
38      */
39     private static final int ARG_NUM_LIMIT = 0x100;
40     private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1);
41     private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2);
42     private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3);
43     /**
44      * Initial and maximum char/UChar value set for a text segment.
45      * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
46      * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
47      */
48     private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff;
49     /**
50      * Maximum length of a text segment. Longer segments are split into shorter ones.
51      */
52     private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT;
53 
54     /** "Intern" some common patterns. */
55     private static final String[][] COMMON_PATTERNS = {
56         { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" },
57         { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' },
58         { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" },
59         { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" },  // en dash
60     };
61 
62     /** Use only static methods. */
SimpleFormatterImpl()63     private SimpleFormatterImpl() {}
64 
65     /**
66      * Creates a compiled form of the pattern string, for use with appropriate static methods.
67      * The number of arguments checked against the given limits is the
68      * highest argument number plus one, not the number of occurrences of arguments.
69      *
70      * @param pattern The pattern string.
71      * @param sb A StringBuilder instance which may or may not be used.
72      * @param min The pattern must have at least this many arguments.
73      * @param max The pattern must have at most this many arguments.
74      * @return The compiled-pattern string.
75      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
76      */
compileToStringMinMaxArguments( CharSequence pattern, StringBuilder sb, int min, int max)77     public static String compileToStringMinMaxArguments(
78             CharSequence pattern, StringBuilder sb, int min, int max) {
79         // Return some precompiled common two-argument patterns.
80         if (min <= 2 && 2 <= max) {
81             for (String[] pair : COMMON_PATTERNS) {
82                 if (pair[0].contentEquals(pattern)) {
83                     assert pair[1].charAt(0) == 2;
84                     return pair[1];
85                 }
86             }
87         }
88         // Parse consistent with MessagePattern, but
89         // - support only simple numbered arguments
90         // - build a simple binary structure into the result string
91         int patternLength = pattern.length();
92         sb.ensureCapacity(patternLength);
93         // Reserve the first char for the number of arguments.
94         sb.setLength(1);
95         int textLength = 0;
96         int maxArg = -1;
97         boolean inQuote = false;
98         for (int i = 0; i < patternLength;) {
99             char c = pattern.charAt(i++);
100             if (c == '\'') {
101                 if (i < patternLength && (c = pattern.charAt(i)) == '\'') {
102                     // double apostrophe, skip the second one
103                     ++i;
104                 } else if (inQuote) {
105                     // skip the quote-ending apostrophe
106                     inQuote = false;
107                     continue;
108                 } else if (c == '{' || c == '}') {
109                     // Skip the quote-starting apostrophe, find the end of the quoted literal text.
110                     ++i;
111                     inQuote = true;
112                 } else {
113                     // The apostrophe is part of literal text.
114                     c = '\'';
115                 }
116             } else if (!inQuote && c == '{') {
117                 if (textLength > 0) {
118                     sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
119                     textLength = 0;
120                 }
121                 int argNumber;
122                 if ((i + 1) < patternLength &&
123                         0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 &&
124                         pattern.charAt(i + 1) == '}') {
125                     i += 2;
126                 } else {
127                     // Multi-digit argument number (no leading zero) or syntax error.
128                     // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
129                     // around the number, but this class does not.
130                     int argStart = i - 1;
131                     argNumber = -1;
132                     if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') {
133                         argNumber = c - '0';
134                         while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') {
135                             argNumber = argNumber * 10 + (c - '0');
136                             if (argNumber >= ARG_NUM_LIMIT) {
137                                 break;
138                             }
139                         }
140                     }
141                     if (argNumber < 0 || c != '}') {
142                         throw new IllegalArgumentException(
143                                 "Argument syntax error in pattern \"" + pattern +
144                                 "\" at index " + argStart +
145                                 ": " + pattern.subSequence(argStart, i));
146                     }
147                 }
148                 if (argNumber > maxArg) {
149                     maxArg = argNumber;
150                 }
151                 sb.append((char)argNumber);
152                 continue;
153             }  // else: c is part of literal text
154             // Append c and track the literal-text segment length.
155             if (textLength == 0) {
156                 // Reserve a char for the length of a new text segment, preset the maximum length.
157                 sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR);
158             }
159             sb.append(c);
160             if (++textLength == MAX_SEGMENT_LENGTH) {
161                 textLength = 0;
162             }
163         }
164         if (textLength > 0) {
165             sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
166         }
167         int argCount = maxArg + 1;
168         if (argCount < min) {
169             throw new IllegalArgumentException(
170                     "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\"");
171         }
172         if (argCount > max) {
173             throw new IllegalArgumentException(
174                     "More than maximum " + max + " arguments in pattern \"" + pattern + "\"");
175         }
176         sb.setCharAt(0, (char)argCount);
177         return sb.toString();
178     }
179 
180     /**
181      * @param compiledPattern Compiled form of a pattern string.
182      * @return The max argument number + 1.
183      */
getArgumentLimit(String compiledPattern)184     public static int getArgumentLimit(String compiledPattern) {
185         return compiledPattern.charAt(0);
186     }
187 
188     /**
189      * Formats the given values.
190      *
191      * @param compiledPattern Compiled form of a pattern string.
192      */
formatCompiledPattern(String compiledPattern, CharSequence... values)193     public static String formatCompiledPattern(String compiledPattern, CharSequence... values) {
194         return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString();
195     }
196 
197     /**
198      * Formats the not-compiled pattern with the given values.
199      * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern().
200      * The number of arguments checked against the given limits is the
201      * highest argument number plus one, not the number of occurrences of arguments.
202      *
203      * @param pattern Not-compiled form of a pattern string.
204      * @param min The pattern must have at least this many arguments.
205      * @param max The pattern must have at most this many arguments.
206      * @return The compiled-pattern string.
207      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
208      */
formatRawPattern(String pattern, int min, int max, CharSequence... values)209     public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) {
210         StringBuilder sb = new StringBuilder();
211         String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max);
212         sb.setLength(0);
213         return formatAndAppend(compiledPattern, sb, null, values).toString();
214     }
215 
216     /**
217      * Formats the given values, appending to the appendTo builder.
218      *
219      * @param compiledPattern Compiled form of a pattern string.
220      * @param appendTo Gets the formatted pattern and values appended.
221      * @param offsets offsets[i] receives the offset of where
222      *                values[i] replaced pattern argument {i}.
223      *                Can be null, or can be shorter or longer than values.
224      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
225      * @param values The argument values.
226      *               An argument value must not be the same object as appendTo.
227      *               values.length must be at least getArgumentLimit().
228      *               Can be null if getArgumentLimit()==0.
229      * @return appendTo
230      */
formatAndAppend( String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values)231     public static StringBuilder formatAndAppend(
232             String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) {
233         int valuesLength = values != null ? values.length : 0;
234         if (valuesLength < getArgumentLimit(compiledPattern)) {
235             throw new IllegalArgumentException("Too few values.");
236         }
237         return format(compiledPattern, values, appendTo, null, true, offsets);
238     }
239 
240     /**
241      * Formats the given values, replacing the contents of the result builder.
242      * May optimize by actually appending to the result if it is the same object
243      * as the value corresponding to the initial argument in the pattern.
244      *
245      * @param compiledPattern Compiled form of a pattern string.
246      * @param result Gets its contents replaced by the formatted pattern and values.
247      * @param offsets offsets[i] receives the offset of where
248      *                values[i] replaced pattern argument {i}.
249      *                Can be null, or can be shorter or longer than values.
250      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
251      * @param values The argument values.
252      *               An argument value may be the same object as result.
253      *               values.length must be at least getArgumentLimit().
254      * @return result
255      */
formatAndReplace( String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values)256     public static StringBuilder formatAndReplace(
257             String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) {
258         int valuesLength = values != null ? values.length : 0;
259         if (valuesLength < getArgumentLimit(compiledPattern)) {
260             throw new IllegalArgumentException("Too few values.");
261         }
262 
263         // If the pattern starts with an argument whose value is the same object
264         // as the result, then we keep the result contents and append to it.
265         // Otherwise we replace its contents.
266         int firstArg = -1;
267         // If any non-initial argument value is the same object as the result,
268         // then we first copy its contents and use that instead while formatting.
269         String resultCopy = null;
270         if (getArgumentLimit(compiledPattern) > 0) {
271             for (int i = 1; i < compiledPattern.length();) {
272                 int n = compiledPattern.charAt(i++);
273                 if (n < ARG_NUM_LIMIT) {
274                     if (values[n] == result) {
275                         if (i == 2) {
276                             firstArg = n;
277                         } else if (resultCopy == null) {
278                             resultCopy = result.toString();
279                         }
280                     }
281                 } else {
282                     i += n - ARG_NUM_LIMIT;
283                 }
284             }
285         }
286         if (firstArg < 0) {
287             result.setLength(0);
288         }
289         return format(compiledPattern, values, result, resultCopy, false, offsets);
290     }
291 
292     /**
293      * Returns the pattern text with none of the arguments.
294      * Like formatting with all-empty string values.
295      *
296      * @param compiledPattern Compiled form of a pattern string.
297      */
getTextWithNoArguments(String compiledPattern)298     public static String getTextWithNoArguments(String compiledPattern) {
299         int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern);
300         StringBuilder sb = new StringBuilder(capacity);
301         for (int i = 1; i < compiledPattern.length();) {
302             int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
303             if (segmentLength > 0) {
304                 int limit = i + segmentLength;
305                 sb.append(compiledPattern, i, limit);
306                 i = limit;
307             }
308         }
309         return sb.toString();
310     }
311 
312     /**
313      * Returns the length of the pattern text with none of the arguments.
314      * @param compiledPattern Compiled form of a pattern string.
315      * @param codePoints true to count code points; false to count code units.
316      * @return The number of code points or code units.
317      */
getLength(String compiledPattern, boolean codePoints)318     public static int getLength(String compiledPattern, boolean codePoints) {
319         int result = 0;
320         for (int i = 1; i < compiledPattern.length();) {
321             int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
322             if (segmentLength > 0) {
323                 int limit = i + segmentLength;
324                 if (codePoints) {
325                     result += Character.codePointCount(compiledPattern, i, limit);
326                 } else {
327                     result += (limit - i);
328                 }
329                 i = limit;
330             }
331         }
332         return result;
333     }
334 
335     /**
336      * Returns the length in code units of the pattern text up until the first argument.
337      * @param compiledPattern Compiled form of a pattern string.
338      * @return The number of code units.
339      */
getPrefixLength(String compiledPattern)340     public static int getPrefixLength(String compiledPattern) {
341         if (compiledPattern.length() == 1) {
342             return 0;
343         } else if (compiledPattern.charAt(0) == 0) {
344             return compiledPattern.length() - 2;
345         } else if (compiledPattern.charAt(1) <= ARG_NUM_LIMIT) {
346             return 0;
347         } else {
348             return compiledPattern.charAt(1) - ARG_NUM_LIMIT;
349         }
350     }
351 
352     /**
353      * Special case for using FormattedStringBuilder with patterns with 0 or 1 argument.
354      *
355      * With 1 argument, treat the current contents of the FormattedStringBuilder between
356      * start and end as the argument {0}. Insert the extra strings from compiledPattern
357      * to surround the argument in the output.
358      *
359      * With 0 arguments, overwrite the entire contents of the FormattedStringBuilder
360      * between start and end.
361      *
362      * @param compiledPattern Compiled form of a pattern string.
363      * @param field Field to use when adding chars to the output.
364      * @param start The start index of the argument already in the output string.
365      * @param end The end index of the argument already in the output string.
366      * @param output Destination for formatted output.
367      * @return Net number of characters added to the formatted string.
368      */
formatPrefixSuffix( String compiledPattern, Format.Field field, int start, int end, FormattedStringBuilder output)369     public static int formatPrefixSuffix(
370             String compiledPattern,
371             Format.Field field,
372             int start,
373             int end,
374             FormattedStringBuilder output) {
375         int argLimit = getArgumentLimit(compiledPattern);
376         if (argLimit == 0) {
377             // No arguments in compiled pattern; overwrite the entire segment with our string.
378             return output.splice(start, end, compiledPattern, 2, compiledPattern.length(), field);
379         } else {
380             assert argLimit == 1;
381             int suffixOffset;
382             int length = 0;
383             if (compiledPattern.charAt(1) != '\u0000') {
384                 int prefixLength = compiledPattern.charAt(1) - ARG_NUM_LIMIT;
385                 length = output.insert(start, compiledPattern, 2, 2 + prefixLength, field);
386                 suffixOffset = 3 + prefixLength;
387             } else {
388                 suffixOffset = 2;
389             }
390             if (suffixOffset < compiledPattern.length()) {
391                 int suffixLength = compiledPattern.charAt(suffixOffset) - ARG_NUM_LIMIT;
392                 length += output.insert(end + length, compiledPattern, 1 + suffixOffset,
393                         1 + suffixOffset + suffixLength, field);
394             }
395             return length;
396         }
397     }
398 
399     /** Internal iterator interface for maximum efficiency.
400      *
401      * Usage boilerplate:
402      *
403      * <pre>
404      * long state = 0;
405      * while (true) {
406      *     state = IterInternal.step(state, compiledPattern, output);
407      *     if (state == IterInternal.DONE) {
408      *         break;
409      *     }
410      *     int argIndex = IterInternal.getArgIndex(state);
411      *     // Append the string corresponding to argIndex to output
412      * }
413      * </pre>
414      *
415      */
416     public static class IterInternal {
417         public static final long DONE = -1;
418 
step(long state, CharSequence compiledPattern, Appendable output)419         public static long step(long state, CharSequence compiledPattern, Appendable output) {
420             int i = (int) (state >>> 32);
421             assert i < compiledPattern.length();
422             i++;
423             while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) {
424                 int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT;
425                 try {
426                     output.append(compiledPattern, i + 1, limit);
427                 } catch (IOException e) {
428                     throw new ICUUncheckedIOException(e);
429                 }
430                 i = limit;
431             }
432             if (i == compiledPattern.length()) {
433                 return DONE;
434             }
435             return (((long) i) << 32) | compiledPattern.charAt(i);
436         }
437 
438         public static int getArgIndex(long state) {
439             return (int) state;
440         }
441     }
442 
443     private static StringBuilder format(
444             String compiledPattern, CharSequence[] values,
445             StringBuilder result, String resultCopy, boolean forbidResultAsValue,
446             int[] offsets) {
447         int offsetsLength;
448         if (offsets == null) {
449             offsetsLength = 0;
450         } else {
451             offsetsLength = offsets.length;
452             for (int i = 0; i < offsetsLength; i++) {
453                 offsets[i] = -1;
454             }
455         }
456         for (int i = 1; i < compiledPattern.length();) {
457             int n = compiledPattern.charAt(i++);
458             if (n < ARG_NUM_LIMIT) {
459                 CharSequence value = values[n];
460                 if (value == result) {
461                     if (forbidResultAsValue) {
462                         throw new IllegalArgumentException("Value must not be same object as result");
463                     }
464                     if (i == 2) {
465                         // We are appending to result which is also the first value object.
466                         if (n < offsetsLength) {
467                             offsets[n] = 0;
468                         }
469                     } else {
470                         if (n < offsetsLength) {
471                             offsets[n] = result.length();
472                         }
473                         result.append(resultCopy);
474                     }
475                 } else {
476                     if (n < offsetsLength) {
477                         offsets[n] = result.length();
478                     }
479                     result.append(value);
480                 }
481             } else {
482                 int limit = i + (n - ARG_NUM_LIMIT);
483                 result.append(compiledPattern, i, limit);
484                 i = limit;
485             }
486         }
487         return result;
488     }
489 }
490