• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2014-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 package com.ibm.icu.impl;
10 
11 /**
12  * Formats simple patterns like "{1} was born in {0}".
13  * Internal version of {@link com.ibm.icu.text.SimpleFormatter}
14  * with only static methods, to avoid wrapper objects.
15  *
16  * <p>This class "compiles" pattern strings into a binary format
17  * and implements formatting etc. based on that.
18  *
19  * <p>Format:
20  * Index 0: One more than the highest argument number.
21  * Followed by zero or more arguments or literal-text segments.
22  *
23  * <p>An argument is stored as its number, less than ARG_NUM_LIMIT.
24  * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
25  * followed by that many chars.
26  */
27 public final class SimpleFormatterImpl {
28     /**
29      * Argument numbers must be smaller than this limit.
30      * Text segment lengths are offset by this much.
31      * This is currently the only unused char value in compiled patterns,
32      * except it is the maximum value of the first unit (max arg +1).
33      */
34     private static final int ARG_NUM_LIMIT = 0x100;
35     private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1);
36     private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2);
37     private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3);
38     /**
39      * Initial and maximum char/UChar value set for a text segment.
40      * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
41      * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
42      */
43     private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff;
44     /**
45      * Maximum length of a text segment. Longer segments are split into shorter ones.
46      */
47     private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT;
48 
49     /** "Intern" some common patterns. */
50     private static final String[][] COMMON_PATTERNS = {
51         { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" },
52         { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' },
53         { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" },
54         { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" },  // en dash
55     };
56 
57     /** Use only static methods. */
SimpleFormatterImpl()58     private SimpleFormatterImpl() {}
59 
60     /**
61      * Creates a compiled form of the pattern string, for use with appropriate static methods.
62      * The number of arguments checked against the given limits is the
63      * highest argument number plus one, not the number of occurrences of arguments.
64      *
65      * @param pattern The pattern string.
66      * @param sb A StringBuilder instance which may or may not be used.
67      * @param min The pattern must have at least this many arguments.
68      * @param max The pattern must have at most this many arguments.
69      * @return The compiled-pattern string.
70      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
71      */
compileToStringMinMaxArguments( CharSequence pattern, StringBuilder sb, int min, int max)72     public static String compileToStringMinMaxArguments(
73             CharSequence pattern, StringBuilder sb, int min, int max) {
74         // Return some precompiled common two-argument patterns.
75         if (min <= 2 && 2 <= max) {
76             for (String[] pair : COMMON_PATTERNS) {
77                 if (pair[0].contentEquals(pattern)) {
78                     assert pair[1].charAt(0) == 2;
79                     return pair[1];
80                 }
81             }
82         }
83         // Parse consistent with MessagePattern, but
84         // - support only simple numbered arguments
85         // - build a simple binary structure into the result string
86         int patternLength = pattern.length();
87         sb.ensureCapacity(patternLength);
88         // Reserve the first char for the number of arguments.
89         sb.setLength(1);
90         int textLength = 0;
91         int maxArg = -1;
92         boolean inQuote = false;
93         for (int i = 0; i < patternLength;) {
94             char c = pattern.charAt(i++);
95             if (c == '\'') {
96                 if (i < patternLength && (c = pattern.charAt(i)) == '\'') {
97                     // double apostrophe, skip the second one
98                     ++i;
99                 } else if (inQuote) {
100                     // skip the quote-ending apostrophe
101                     inQuote = false;
102                     continue;
103                 } else if (c == '{' || c == '}') {
104                     // Skip the quote-starting apostrophe, find the end of the quoted literal text.
105                     ++i;
106                     inQuote = true;
107                 } else {
108                     // The apostrophe is part of literal text.
109                     c = '\'';
110                 }
111             } else if (!inQuote && c == '{') {
112                 if (textLength > 0) {
113                     sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
114                     textLength = 0;
115                 }
116                 int argNumber;
117                 if ((i + 1) < patternLength &&
118                         0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 &&
119                         pattern.charAt(i + 1) == '}') {
120                     i += 2;
121                 } else {
122                     // Multi-digit argument number (no leading zero) or syntax error.
123                     // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
124                     // around the number, but this class does not.
125                     int argStart = i - 1;
126                     argNumber = -1;
127                     if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') {
128                         argNumber = c - '0';
129                         while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') {
130                             argNumber = argNumber * 10 + (c - '0');
131                             if (argNumber >= ARG_NUM_LIMIT) {
132                                 break;
133                             }
134                         }
135                     }
136                     if (argNumber < 0 || c != '}') {
137                         throw new IllegalArgumentException(
138                                 "Argument syntax error in pattern \"" + pattern +
139                                 "\" at index " + argStart +
140                                 ": " + pattern.subSequence(argStart, i));
141                     }
142                 }
143                 if (argNumber > maxArg) {
144                     maxArg = argNumber;
145                 }
146                 sb.append((char)argNumber);
147                 continue;
148             }  // else: c is part of literal text
149             // Append c and track the literal-text segment length.
150             if (textLength == 0) {
151                 // Reserve a char for the length of a new text segment, preset the maximum length.
152                 sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR);
153             }
154             sb.append(c);
155             if (++textLength == MAX_SEGMENT_LENGTH) {
156                 textLength = 0;
157             }
158         }
159         if (textLength > 0) {
160             sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
161         }
162         int argCount = maxArg + 1;
163         if (argCount < min) {
164             throw new IllegalArgumentException(
165                     "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\"");
166         }
167         if (argCount > max) {
168             throw new IllegalArgumentException(
169                     "More than maximum " + max + " arguments in pattern \"" + pattern + "\"");
170         }
171         sb.setCharAt(0, (char)argCount);
172         return sb.toString();
173     }
174 
175     /**
176      * @param compiledPattern Compiled form of a pattern string.
177      * @return The max argument number + 1.
178      */
getArgumentLimit(String compiledPattern)179     public static int getArgumentLimit(String compiledPattern) {
180         return compiledPattern.charAt(0);
181     }
182 
183     /**
184      * Formats the given values.
185      *
186      * @param compiledPattern Compiled form of a pattern string.
187      */
formatCompiledPattern(String compiledPattern, CharSequence... values)188     public static String formatCompiledPattern(String compiledPattern, CharSequence... values) {
189         return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString();
190     }
191 
192     /**
193      * Formats the not-compiled pattern with the given values.
194      * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern().
195      * The number of arguments checked against the given limits is the
196      * highest argument number plus one, not the number of occurrences of arguments.
197      *
198      * @param pattern Not-compiled form of a pattern string.
199      * @param min The pattern must have at least this many arguments.
200      * @param max The pattern must have at most this many arguments.
201      * @return The compiled-pattern string.
202      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
203      */
formatRawPattern(String pattern, int min, int max, CharSequence... values)204     public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) {
205         StringBuilder sb = new StringBuilder();
206         String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max);
207         sb.setLength(0);
208         return formatAndAppend(compiledPattern, sb, null, values).toString();
209     }
210 
211     /**
212      * Formats the given values, appending to the appendTo builder.
213      *
214      * @param compiledPattern Compiled form of a pattern string.
215      * @param appendTo Gets the formatted pattern and values appended.
216      * @param offsets offsets[i] receives the offset of where
217      *                values[i] replaced pattern argument {i}.
218      *                Can be null, or can be shorter or longer than values.
219      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
220      * @param values The argument values.
221      *               An argument value must not be the same object as appendTo.
222      *               values.length must be at least getArgumentLimit().
223      *               Can be null if getArgumentLimit()==0.
224      * @return appendTo
225      */
formatAndAppend( String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values)226     public static StringBuilder formatAndAppend(
227             String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) {
228         int valuesLength = values != null ? values.length : 0;
229         if (valuesLength < getArgumentLimit(compiledPattern)) {
230             throw new IllegalArgumentException("Too few values.");
231         }
232         return format(compiledPattern, values, appendTo, null, true, offsets);
233     }
234 
235     /**
236      * Formats the given values, replacing the contents of the result builder.
237      * May optimize by actually appending to the result if it is the same object
238      * as the value corresponding to the initial argument in the pattern.
239      *
240      * @param compiledPattern Compiled form of a pattern string.
241      * @param result Gets its contents replaced by the formatted pattern and values.
242      * @param offsets offsets[i] receives the offset of where
243      *                values[i] replaced pattern argument {i}.
244      *                Can be null, or can be shorter or longer than values.
245      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
246      * @param values The argument values.
247      *               An argument value may be the same object as result.
248      *               values.length must be at least getArgumentLimit().
249      * @return result
250      */
formatAndReplace( String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values)251     public static StringBuilder formatAndReplace(
252             String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) {
253         int valuesLength = values != null ? values.length : 0;
254         if (valuesLength < getArgumentLimit(compiledPattern)) {
255             throw new IllegalArgumentException("Too few values.");
256         }
257 
258         // If the pattern starts with an argument whose value is the same object
259         // as the result, then we keep the result contents and append to it.
260         // Otherwise we replace its contents.
261         int firstArg = -1;
262         // If any non-initial argument value is the same object as the result,
263         // then we first copy its contents and use that instead while formatting.
264         String resultCopy = null;
265         if (getArgumentLimit(compiledPattern) > 0) {
266             for (int i = 1; i < compiledPattern.length();) {
267                 int n = compiledPattern.charAt(i++);
268                 if (n < ARG_NUM_LIMIT) {
269                     if (values[n] == result) {
270                         if (i == 2) {
271                             firstArg = n;
272                         } else if (resultCopy == null) {
273                             resultCopy = result.toString();
274                         }
275                     }
276                 } else {
277                     i += n - ARG_NUM_LIMIT;
278                 }
279             }
280         }
281         if (firstArg < 0) {
282             result.setLength(0);
283         }
284         return format(compiledPattern, values, result, resultCopy, false, offsets);
285     }
286 
287     /**
288      * Returns the pattern text with none of the arguments.
289      * Like formatting with all-empty string values.
290      *
291      * @param compiledPattern Compiled form of a pattern string.
292      */
getTextWithNoArguments(String compiledPattern)293     public static String getTextWithNoArguments(String compiledPattern) {
294         int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern);
295         StringBuilder sb = new StringBuilder(capacity);
296         for (int i = 1; i < compiledPattern.length();) {
297             int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
298             if (segmentLength > 0) {
299                 int limit = i + segmentLength;
300                 sb.append(compiledPattern, i, limit);
301                 i = limit;
302             }
303         }
304         return sb.toString();
305     }
306 
307     /** Poor-man's iterator interface. See ICU-20406. */
308     public static class Int64Iterator {
309         public static final long DONE = -1;
310 
step(CharSequence compiledPattern, long state, StringBuffer output)311         public static long step(CharSequence compiledPattern, long state, StringBuffer output) {
312             int i = (int) (state >>> 32);
313             assert i < compiledPattern.length();
314             i++;
315             while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) {
316                 int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT;
317                 output.append(compiledPattern, i + 1, limit);
318                 i = limit;
319             }
320             if (i == compiledPattern.length()) {
321                 return DONE;
322             }
323             return (((long) i) << 32) | compiledPattern.charAt(i);
324         }
325 
326         public static int getArgIndex(long state) {
327             return (int) state;
328         }
329     }
330 
331     private static StringBuilder format(
332             String compiledPattern, CharSequence[] values,
333             StringBuilder result, String resultCopy, boolean forbidResultAsValue,
334             int[] offsets) {
335         int offsetsLength;
336         if (offsets == null) {
337             offsetsLength = 0;
338         } else {
339             offsetsLength = offsets.length;
340             for (int i = 0; i < offsetsLength; i++) {
341                 offsets[i] = -1;
342             }
343         }
344         for (int i = 1; i < compiledPattern.length();) {
345             int n = compiledPattern.charAt(i++);
346             if (n < ARG_NUM_LIMIT) {
347                 CharSequence value = values[n];
348                 if (value == result) {
349                     if (forbidResultAsValue) {
350                         throw new IllegalArgumentException("Value must not be same object as result");
351                     }
352                     if (i == 2) {
353                         // We are appending to result which is also the first value object.
354                         if (n < offsetsLength) {
355                             offsets[n] = 0;
356                         }
357                     } else {
358                         if (n < offsetsLength) {
359                             offsets[n] = result.length();
360                         }
361                         result.append(resultCopy);
362                     }
363                 } else {
364                     if (n < offsetsLength) {
365                         offsets[n] = result.length();
366                     }
367                     result.append(value);
368                 }
369             } else {
370                 int limit = i + (n - ARG_NUM_LIMIT);
371                 result.append(compiledPattern, i, limit);
372                 i = limit;
373             }
374         }
375         return result;
376     }
377 }
378