1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.impl; 10 11 /** 12 * Formats simple patterns like "{1} was born in {0}". 13 * Internal version of {@link com.ibm.icu.text.SimpleFormatter} 14 * with only static methods, to avoid wrapper objects. 15 * 16 * <p>This class "compiles" pattern strings into a binary format 17 * and implements formatting etc. based on that. 18 * 19 * <p>Format: 20 * Index 0: One more than the highest argument number. 21 * Followed by zero or more arguments or literal-text segments. 22 * 23 * <p>An argument is stored as its number, less than ARG_NUM_LIMIT. 24 * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, 25 * followed by that many chars. 26 */ 27 public final class SimpleFormatterImpl { 28 /** 29 * Argument numbers must be smaller than this limit. 30 * Text segment lengths are offset by this much. 31 * This is currently the only unused char value in compiled patterns, 32 * except it is the maximum value of the first unit (max arg +1). 33 */ 34 private static final int ARG_NUM_LIMIT = 0x100; 35 private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1); 36 private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2); 37 private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3); 38 /** 39 * Initial and maximum char/UChar value set for a text segment. 40 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. 41 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. 42 */ 43 private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff; 44 /** 45 * Maximum length of a text segment. Longer segments are split into shorter ones. 46 */ 47 private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT; 48 49 /** "Intern" some common patterns. */ 50 private static final String[][] COMMON_PATTERNS = { 51 { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" }, 52 { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' }, 53 { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" }, 54 { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" }, // en dash 55 }; 56 57 /** Use only static methods. */ SimpleFormatterImpl()58 private SimpleFormatterImpl() {} 59 60 /** 61 * Creates a compiled form of the pattern string, for use with appropriate static methods. 62 * The number of arguments checked against the given limits is the 63 * highest argument number plus one, not the number of occurrences of arguments. 64 * 65 * @param pattern The pattern string. 66 * @param sb A StringBuilder instance which may or may not be used. 67 * @param min The pattern must have at least this many arguments. 68 * @param max The pattern must have at most this many arguments. 69 * @return The compiled-pattern string. 70 * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. 71 */ compileToStringMinMaxArguments( CharSequence pattern, StringBuilder sb, int min, int max)72 public static String compileToStringMinMaxArguments( 73 CharSequence pattern, StringBuilder sb, int min, int max) { 74 // Return some precompiled common two-argument patterns. 75 if (min <= 2 && 2 <= max) { 76 for (String[] pair : COMMON_PATTERNS) { 77 if (pair[0].contentEquals(pattern)) { 78 assert pair[1].charAt(0) == 2; 79 return pair[1]; 80 } 81 } 82 } 83 // Parse consistent with MessagePattern, but 84 // - support only simple numbered arguments 85 // - build a simple binary structure into the result string 86 int patternLength = pattern.length(); 87 sb.ensureCapacity(patternLength); 88 // Reserve the first char for the number of arguments. 89 sb.setLength(1); 90 int textLength = 0; 91 int maxArg = -1; 92 boolean inQuote = false; 93 for (int i = 0; i < patternLength;) { 94 char c = pattern.charAt(i++); 95 if (c == '\'') { 96 if (i < patternLength && (c = pattern.charAt(i)) == '\'') { 97 // double apostrophe, skip the second one 98 ++i; 99 } else if (inQuote) { 100 // skip the quote-ending apostrophe 101 inQuote = false; 102 continue; 103 } else if (c == '{' || c == '}') { 104 // Skip the quote-starting apostrophe, find the end of the quoted literal text. 105 ++i; 106 inQuote = true; 107 } else { 108 // The apostrophe is part of literal text. 109 c = '\''; 110 } 111 } else if (!inQuote && c == '{') { 112 if (textLength > 0) { 113 sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); 114 textLength = 0; 115 } 116 int argNumber; 117 if ((i + 1) < patternLength && 118 0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 && 119 pattern.charAt(i + 1) == '}') { 120 i += 2; 121 } else { 122 // Multi-digit argument number (no leading zero) or syntax error. 123 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) 124 // around the number, but this class does not. 125 int argStart = i - 1; 126 argNumber = -1; 127 if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') { 128 argNumber = c - '0'; 129 while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') { 130 argNumber = argNumber * 10 + (c - '0'); 131 if (argNumber >= ARG_NUM_LIMIT) { 132 break; 133 } 134 } 135 } 136 if (argNumber < 0 || c != '}') { 137 throw new IllegalArgumentException( 138 "Argument syntax error in pattern \"" + pattern + 139 "\" at index " + argStart + 140 ": " + pattern.subSequence(argStart, i)); 141 } 142 } 143 if (argNumber > maxArg) { 144 maxArg = argNumber; 145 } 146 sb.append((char)argNumber); 147 continue; 148 } // else: c is part of literal text 149 // Append c and track the literal-text segment length. 150 if (textLength == 0) { 151 // Reserve a char for the length of a new text segment, preset the maximum length. 152 sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR); 153 } 154 sb.append(c); 155 if (++textLength == MAX_SEGMENT_LENGTH) { 156 textLength = 0; 157 } 158 } 159 if (textLength > 0) { 160 sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); 161 } 162 int argCount = maxArg + 1; 163 if (argCount < min) { 164 throw new IllegalArgumentException( 165 "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\""); 166 } 167 if (argCount > max) { 168 throw new IllegalArgumentException( 169 "More than maximum " + max + " arguments in pattern \"" + pattern + "\""); 170 } 171 sb.setCharAt(0, (char)argCount); 172 return sb.toString(); 173 } 174 175 /** 176 * @param compiledPattern Compiled form of a pattern string. 177 * @return The max argument number + 1. 178 */ getArgumentLimit(String compiledPattern)179 public static int getArgumentLimit(String compiledPattern) { 180 return compiledPattern.charAt(0); 181 } 182 183 /** 184 * Formats the given values. 185 * 186 * @param compiledPattern Compiled form of a pattern string. 187 */ formatCompiledPattern(String compiledPattern, CharSequence... values)188 public static String formatCompiledPattern(String compiledPattern, CharSequence... values) { 189 return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString(); 190 } 191 192 /** 193 * Formats the not-compiled pattern with the given values. 194 * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern(). 195 * The number of arguments checked against the given limits is the 196 * highest argument number plus one, not the number of occurrences of arguments. 197 * 198 * @param pattern Not-compiled form of a pattern string. 199 * @param min The pattern must have at least this many arguments. 200 * @param max The pattern must have at most this many arguments. 201 * @return The compiled-pattern string. 202 * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. 203 */ formatRawPattern(String pattern, int min, int max, CharSequence... values)204 public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) { 205 StringBuilder sb = new StringBuilder(); 206 String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max); 207 sb.setLength(0); 208 return formatAndAppend(compiledPattern, sb, null, values).toString(); 209 } 210 211 /** 212 * Formats the given values, appending to the appendTo builder. 213 * 214 * @param compiledPattern Compiled form of a pattern string. 215 * @param appendTo Gets the formatted pattern and values appended. 216 * @param offsets offsets[i] receives the offset of where 217 * values[i] replaced pattern argument {i}. 218 * Can be null, or can be shorter or longer than values. 219 * If there is no {i} in the pattern, then offsets[i] is set to -1. 220 * @param values The argument values. 221 * An argument value must not be the same object as appendTo. 222 * values.length must be at least getArgumentLimit(). 223 * Can be null if getArgumentLimit()==0. 224 * @return appendTo 225 */ formatAndAppend( String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values)226 public static StringBuilder formatAndAppend( 227 String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) { 228 int valuesLength = values != null ? values.length : 0; 229 if (valuesLength < getArgumentLimit(compiledPattern)) { 230 throw new IllegalArgumentException("Too few values."); 231 } 232 return format(compiledPattern, values, appendTo, null, true, offsets); 233 } 234 235 /** 236 * Formats the given values, replacing the contents of the result builder. 237 * May optimize by actually appending to the result if it is the same object 238 * as the value corresponding to the initial argument in the pattern. 239 * 240 * @param compiledPattern Compiled form of a pattern string. 241 * @param result Gets its contents replaced by the formatted pattern and values. 242 * @param offsets offsets[i] receives the offset of where 243 * values[i] replaced pattern argument {i}. 244 * Can be null, or can be shorter or longer than values. 245 * If there is no {i} in the pattern, then offsets[i] is set to -1. 246 * @param values The argument values. 247 * An argument value may be the same object as result. 248 * values.length must be at least getArgumentLimit(). 249 * @return result 250 */ formatAndReplace( String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values)251 public static StringBuilder formatAndReplace( 252 String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) { 253 int valuesLength = values != null ? values.length : 0; 254 if (valuesLength < getArgumentLimit(compiledPattern)) { 255 throw new IllegalArgumentException("Too few values."); 256 } 257 258 // If the pattern starts with an argument whose value is the same object 259 // as the result, then we keep the result contents and append to it. 260 // Otherwise we replace its contents. 261 int firstArg = -1; 262 // If any non-initial argument value is the same object as the result, 263 // then we first copy its contents and use that instead while formatting. 264 String resultCopy = null; 265 if (getArgumentLimit(compiledPattern) > 0) { 266 for (int i = 1; i < compiledPattern.length();) { 267 int n = compiledPattern.charAt(i++); 268 if (n < ARG_NUM_LIMIT) { 269 if (values[n] == result) { 270 if (i == 2) { 271 firstArg = n; 272 } else if (resultCopy == null) { 273 resultCopy = result.toString(); 274 } 275 } 276 } else { 277 i += n - ARG_NUM_LIMIT; 278 } 279 } 280 } 281 if (firstArg < 0) { 282 result.setLength(0); 283 } 284 return format(compiledPattern, values, result, resultCopy, false, offsets); 285 } 286 287 /** 288 * Returns the pattern text with none of the arguments. 289 * Like formatting with all-empty string values. 290 * 291 * @param compiledPattern Compiled form of a pattern string. 292 */ getTextWithNoArguments(String compiledPattern)293 public static String getTextWithNoArguments(String compiledPattern) { 294 int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern); 295 StringBuilder sb = new StringBuilder(capacity); 296 for (int i = 1; i < compiledPattern.length();) { 297 int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; 298 if (segmentLength > 0) { 299 int limit = i + segmentLength; 300 sb.append(compiledPattern, i, limit); 301 i = limit; 302 } 303 } 304 return sb.toString(); 305 } 306 307 /** Poor-man's iterator interface. See ICU-20406. */ 308 public static class Int64Iterator { 309 public static final long DONE = -1; 310 step(CharSequence compiledPattern, long state, StringBuffer output)311 public static long step(CharSequence compiledPattern, long state, StringBuffer output) { 312 int i = (int) (state >>> 32); 313 assert i < compiledPattern.length(); 314 i++; 315 while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) { 316 int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT; 317 output.append(compiledPattern, i + 1, limit); 318 i = limit; 319 } 320 if (i == compiledPattern.length()) { 321 return DONE; 322 } 323 return (((long) i) << 32) | compiledPattern.charAt(i); 324 } 325 326 public static int getArgIndex(long state) { 327 return (int) state; 328 } 329 } 330 331 private static StringBuilder format( 332 String compiledPattern, CharSequence[] values, 333 StringBuilder result, String resultCopy, boolean forbidResultAsValue, 334 int[] offsets) { 335 int offsetsLength; 336 if (offsets == null) { 337 offsetsLength = 0; 338 } else { 339 offsetsLength = offsets.length; 340 for (int i = 0; i < offsetsLength; i++) { 341 offsets[i] = -1; 342 } 343 } 344 for (int i = 1; i < compiledPattern.length();) { 345 int n = compiledPattern.charAt(i++); 346 if (n < ARG_NUM_LIMIT) { 347 CharSequence value = values[n]; 348 if (value == result) { 349 if (forbidResultAsValue) { 350 throw new IllegalArgumentException("Value must not be same object as result"); 351 } 352 if (i == 2) { 353 // We are appending to result which is also the first value object. 354 if (n < offsetsLength) { 355 offsets[n] = 0; 356 } 357 } else { 358 if (n < offsetsLength) { 359 offsets[n] = result.length(); 360 } 361 result.append(resultCopy); 362 } 363 } else { 364 if (n < offsetsLength) { 365 offsets[n] = result.length(); 366 } 367 result.append(value); 368 } 369 } else { 370 int limit = i + (n - ARG_NUM_LIMIT); 371 result.append(compiledPattern, i, limit); 372 i = limit; 373 } 374 } 375 return result; 376 } 377 } 378