1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.impl; 10 11 import java.io.IOException; 12 import java.text.Format; 13 14 import com.ibm.icu.util.ICUUncheckedIOException; 15 16 /** 17 * Formats simple patterns like "{1} was born in {0}". 18 * Internal version of {@link com.ibm.icu.text.SimpleFormatter} 19 * with only static methods, to avoid wrapper objects. 20 * 21 * <p>This class "compiles" pattern strings into a binary format 22 * and implements formatting etc. based on that. 23 * 24 * <p>Format: 25 * Index 0: One more than the highest argument number. 26 * Followed by zero or more arguments or literal-text segments. 27 * 28 * <p>An argument is stored as its number, less than ARG_NUM_LIMIT. 29 * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, 30 * followed by that many chars. 31 */ 32 public final class SimpleFormatterImpl { 33 /** 34 * Argument numbers must be smaller than this limit. 35 * Text segment lengths are offset by this much. 36 * This is currently the only unused char value in compiled patterns, 37 * except it is the maximum value of the first unit (max arg +1). 38 */ 39 private static final int ARG_NUM_LIMIT = 0x100; 40 private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1); 41 private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2); 42 private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3); 43 /** 44 * Initial and maximum char/UChar value set for a text segment. 45 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. 46 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. 47 */ 48 private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff; 49 /** 50 * Maximum length of a text segment. Longer segments are split into shorter ones. 51 */ 52 private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT; 53 54 /** "Intern" some common patterns. */ 55 private static final String[][] COMMON_PATTERNS = { 56 { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" }, 57 { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' }, 58 { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" }, 59 { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" }, // en dash 60 }; 61 62 /** Use only static methods. */ SimpleFormatterImpl()63 private SimpleFormatterImpl() {} 64 65 /** 66 * Creates a compiled form of the pattern string, for use with appropriate static methods. 67 * The number of arguments checked against the given limits is the 68 * highest argument number plus one, not the number of occurrences of arguments. 69 * 70 * @param pattern The pattern string. 71 * @param sb A StringBuilder instance which may or may not be used. 72 * @param min The pattern must have at least this many arguments. 73 * @param max The pattern must have at most this many arguments. 74 * @return The compiled-pattern string. 75 * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. 76 */ compileToStringMinMaxArguments( CharSequence pattern, StringBuilder sb, int min, int max)77 public static String compileToStringMinMaxArguments( 78 CharSequence pattern, StringBuilder sb, int min, int max) { 79 // Return some precompiled common two-argument patterns. 80 if (min <= 2 && 2 <= max) { 81 for (String[] pair : COMMON_PATTERNS) { 82 if (pair[0].contentEquals(pattern)) { 83 assert pair[1].charAt(0) == 2; 84 return pair[1]; 85 } 86 } 87 } 88 // Parse consistent with MessagePattern, but 89 // - support only simple numbered arguments 90 // - build a simple binary structure into the result string 91 int patternLength = pattern.length(); 92 sb.ensureCapacity(patternLength); 93 // Reserve the first char for the number of arguments. 94 sb.setLength(1); 95 int textLength = 0; 96 int maxArg = -1; 97 boolean inQuote = false; 98 for (int i = 0; i < patternLength;) { 99 char c = pattern.charAt(i++); 100 if (c == '\'') { 101 if (i < patternLength && (c = pattern.charAt(i)) == '\'') { 102 // double apostrophe, skip the second one 103 ++i; 104 } else if (inQuote) { 105 // skip the quote-ending apostrophe 106 inQuote = false; 107 continue; 108 } else if (c == '{' || c == '}') { 109 // Skip the quote-starting apostrophe, find the end of the quoted literal text. 110 ++i; 111 inQuote = true; 112 } else { 113 // The apostrophe is part of literal text. 114 c = '\''; 115 } 116 } else if (!inQuote && c == '{') { 117 if (textLength > 0) { 118 sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); 119 textLength = 0; 120 } 121 int argNumber; 122 if ((i + 1) < patternLength && 123 0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 && 124 pattern.charAt(i + 1) == '}') { 125 i += 2; 126 } else { 127 // Multi-digit argument number (no leading zero) or syntax error. 128 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) 129 // around the number, but this class does not. 130 int argStart = i - 1; 131 argNumber = -1; 132 if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') { 133 argNumber = c - '0'; 134 while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') { 135 argNumber = argNumber * 10 + (c - '0'); 136 if (argNumber >= ARG_NUM_LIMIT) { 137 break; 138 } 139 } 140 } 141 if (argNumber < 0 || c != '}') { 142 throw new IllegalArgumentException( 143 "Argument syntax error in pattern \"" + pattern + 144 "\" at index " + argStart + 145 ": " + pattern.subSequence(argStart, i)); 146 } 147 } 148 if (argNumber > maxArg) { 149 maxArg = argNumber; 150 } 151 sb.append((char)argNumber); 152 continue; 153 } // else: c is part of literal text 154 // Append c and track the literal-text segment length. 155 if (textLength == 0) { 156 // Reserve a char for the length of a new text segment, preset the maximum length. 157 sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR); 158 } 159 sb.append(c); 160 if (++textLength == MAX_SEGMENT_LENGTH) { 161 textLength = 0; 162 } 163 } 164 if (textLength > 0) { 165 sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); 166 } 167 int argCount = maxArg + 1; 168 if (argCount < min) { 169 throw new IllegalArgumentException( 170 "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\""); 171 } 172 if (argCount > max) { 173 throw new IllegalArgumentException( 174 "More than maximum " + max + " arguments in pattern \"" + pattern + "\""); 175 } 176 sb.setCharAt(0, (char)argCount); 177 return sb.toString(); 178 } 179 180 /** 181 * @param compiledPattern Compiled form of a pattern string. 182 * @return The max argument number + 1. 183 */ getArgumentLimit(String compiledPattern)184 public static int getArgumentLimit(String compiledPattern) { 185 return compiledPattern.charAt(0); 186 } 187 188 /** 189 * Formats the given values. 190 * 191 * @param compiledPattern Compiled form of a pattern string. 192 */ formatCompiledPattern(String compiledPattern, CharSequence... values)193 public static String formatCompiledPattern(String compiledPattern, CharSequence... values) { 194 return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString(); 195 } 196 197 /** 198 * Formats the not-compiled pattern with the given values. 199 * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern(). 200 * The number of arguments checked against the given limits is the 201 * highest argument number plus one, not the number of occurrences of arguments. 202 * 203 * @param pattern Not-compiled form of a pattern string. 204 * @param min The pattern must have at least this many arguments. 205 * @param max The pattern must have at most this many arguments. 206 * @return The compiled-pattern string. 207 * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. 208 */ formatRawPattern(String pattern, int min, int max, CharSequence... values)209 public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) { 210 StringBuilder sb = new StringBuilder(); 211 String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max); 212 sb.setLength(0); 213 return formatAndAppend(compiledPattern, sb, null, values).toString(); 214 } 215 216 /** 217 * Formats the given values, appending to the appendTo builder. 218 * 219 * @param compiledPattern Compiled form of a pattern string. 220 * @param appendTo Gets the formatted pattern and values appended. 221 * @param offsets offsets[i] receives the offset of where 222 * values[i] replaced pattern argument {i}. 223 * Can be null, or can be shorter or longer than values. 224 * If there is no {i} in the pattern, then offsets[i] is set to -1. 225 * @param values The argument values. 226 * An argument value must not be the same object as appendTo. 227 * values.length must be at least getArgumentLimit(). 228 * Can be null if getArgumentLimit()==0. 229 * @return appendTo 230 */ formatAndAppend( String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values)231 public static StringBuilder formatAndAppend( 232 String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) { 233 int valuesLength = values != null ? values.length : 0; 234 if (valuesLength < getArgumentLimit(compiledPattern)) { 235 throw new IllegalArgumentException("Too few values."); 236 } 237 return format(compiledPattern, values, appendTo, null, true, offsets); 238 } 239 240 /** 241 * Formats the given values, replacing the contents of the result builder. 242 * May optimize by actually appending to the result if it is the same object 243 * as the value corresponding to the initial argument in the pattern. 244 * 245 * @param compiledPattern Compiled form of a pattern string. 246 * @param result Gets its contents replaced by the formatted pattern and values. 247 * @param offsets offsets[i] receives the offset of where 248 * values[i] replaced pattern argument {i}. 249 * Can be null, or can be shorter or longer than values. 250 * If there is no {i} in the pattern, then offsets[i] is set to -1. 251 * @param values The argument values. 252 * An argument value may be the same object as result. 253 * values.length must be at least getArgumentLimit(). 254 * @return result 255 */ formatAndReplace( String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values)256 public static StringBuilder formatAndReplace( 257 String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) { 258 int valuesLength = values != null ? values.length : 0; 259 if (valuesLength < getArgumentLimit(compiledPattern)) { 260 throw new IllegalArgumentException("Too few values."); 261 } 262 263 // If the pattern starts with an argument whose value is the same object 264 // as the result, then we keep the result contents and append to it. 265 // Otherwise we replace its contents. 266 int firstArg = -1; 267 // If any non-initial argument value is the same object as the result, 268 // then we first copy its contents and use that instead while formatting. 269 String resultCopy = null; 270 if (getArgumentLimit(compiledPattern) > 0) { 271 for (int i = 1; i < compiledPattern.length();) { 272 int n = compiledPattern.charAt(i++); 273 if (n < ARG_NUM_LIMIT) { 274 if (values[n] == result) { 275 if (i == 2) { 276 firstArg = n; 277 } else if (resultCopy == null) { 278 resultCopy = result.toString(); 279 } 280 } 281 } else { 282 i += n - ARG_NUM_LIMIT; 283 } 284 } 285 } 286 if (firstArg < 0) { 287 result.setLength(0); 288 } 289 return format(compiledPattern, values, result, resultCopy, false, offsets); 290 } 291 292 /** 293 * Returns the pattern text with none of the arguments. 294 * Like formatting with all-empty string values. 295 * 296 * @param compiledPattern Compiled form of a pattern string. 297 */ getTextWithNoArguments(String compiledPattern)298 public static String getTextWithNoArguments(String compiledPattern) { 299 int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern); 300 StringBuilder sb = new StringBuilder(capacity); 301 for (int i = 1; i < compiledPattern.length();) { 302 int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; 303 if (segmentLength > 0) { 304 int limit = i + segmentLength; 305 sb.append(compiledPattern, i, limit); 306 i = limit; 307 } 308 } 309 return sb.toString(); 310 } 311 312 /** 313 * Returns the length of the pattern text with none of the arguments. 314 * @param compiledPattern Compiled form of a pattern string. 315 * @param codePoints true to count code points; false to count code units. 316 * @return The number of code points or code units. 317 */ getLength(String compiledPattern, boolean codePoints)318 public static int getLength(String compiledPattern, boolean codePoints) { 319 int result = 0; 320 for (int i = 1; i < compiledPattern.length();) { 321 int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; 322 if (segmentLength > 0) { 323 int limit = i + segmentLength; 324 if (codePoints) { 325 result += Character.codePointCount(compiledPattern, i, limit); 326 } else { 327 result += (limit - i); 328 } 329 i = limit; 330 } 331 } 332 return result; 333 } 334 335 /** 336 * Returns the length in code units of the pattern text up until the first argument. 337 * @param compiledPattern Compiled form of a pattern string. 338 * @return The number of code units. 339 */ getPrefixLength(String compiledPattern)340 public static int getPrefixLength(String compiledPattern) { 341 if (compiledPattern.length() == 1) { 342 return 0; 343 } else if (compiledPattern.charAt(0) == 0) { 344 return compiledPattern.length() - 2; 345 } else if (compiledPattern.charAt(1) <= ARG_NUM_LIMIT) { 346 return 0; 347 } else { 348 return compiledPattern.charAt(1) - ARG_NUM_LIMIT; 349 } 350 } 351 352 /** 353 * Special case for using FormattedStringBuilder with patterns with 0 or 1 argument. 354 * 355 * With 1 argument, treat the current contents of the FormattedStringBuilder between 356 * start and end as the argument {0}. Insert the extra strings from compiledPattern 357 * to surround the argument in the output. 358 * 359 * With 0 arguments, overwrite the entire contents of the FormattedStringBuilder 360 * between start and end. 361 * 362 * @param compiledPattern Compiled form of a pattern string. 363 * @param field Field to use when adding chars to the output. 364 * @param start The start index of the argument already in the output string. 365 * @param end The end index of the argument already in the output string. 366 * @param output Destination for formatted output. 367 * @return Net number of characters added to the formatted string. 368 */ formatPrefixSuffix( String compiledPattern, Format.Field field, int start, int end, FormattedStringBuilder output)369 public static int formatPrefixSuffix( 370 String compiledPattern, 371 Format.Field field, 372 int start, 373 int end, 374 FormattedStringBuilder output) { 375 int argLimit = getArgumentLimit(compiledPattern); 376 if (argLimit == 0) { 377 // No arguments in compiled pattern; overwrite the entire segment with our string. 378 return output.splice(start, end, compiledPattern, 2, compiledPattern.length(), field); 379 } else { 380 assert argLimit == 1; 381 int suffixOffset; 382 int length = 0; 383 if (compiledPattern.charAt(1) != '\u0000') { 384 int prefixLength = compiledPattern.charAt(1) - ARG_NUM_LIMIT; 385 length = output.insert(start, compiledPattern, 2, 2 + prefixLength, field); 386 suffixOffset = 3 + prefixLength; 387 } else { 388 suffixOffset = 2; 389 } 390 if (suffixOffset < compiledPattern.length()) { 391 int suffixLength = compiledPattern.charAt(suffixOffset) - ARG_NUM_LIMIT; 392 length += output.insert(end + length, compiledPattern, 1 + suffixOffset, 393 1 + suffixOffset + suffixLength, field); 394 } 395 return length; 396 } 397 } 398 399 /** Internal iterator interface for maximum efficiency. 400 * 401 * Usage boilerplate: 402 * 403 * <pre> 404 * long state = 0; 405 * while (true) { 406 * state = IterInternal.step(state, compiledPattern, output); 407 * if (state == IterInternal.DONE) { 408 * break; 409 * } 410 * int argIndex = IterInternal.getArgIndex(state); 411 * // Append the string corresponding to argIndex to output 412 * } 413 * </pre> 414 * 415 */ 416 public static class IterInternal { 417 public static final long DONE = -1; 418 step(long state, CharSequence compiledPattern, Appendable output)419 public static long step(long state, CharSequence compiledPattern, Appendable output) { 420 int i = (int) (state >>> 32); 421 assert i < compiledPattern.length(); 422 i++; 423 while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) { 424 int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT; 425 try { 426 output.append(compiledPattern, i + 1, limit); 427 } catch (IOException e) { 428 throw new ICUUncheckedIOException(e); 429 } 430 i = limit; 431 } 432 if (i == compiledPattern.length()) { 433 return DONE; 434 } 435 return (((long) i) << 32) | compiledPattern.charAt(i); 436 } 437 438 public static int getArgIndex(long state) { 439 return (int) state; 440 } 441 } 442 443 private static StringBuilder format( 444 String compiledPattern, CharSequence[] values, 445 StringBuilder result, String resultCopy, boolean forbidResultAsValue, 446 int[] offsets) { 447 int offsetsLength; 448 if (offsets == null) { 449 offsetsLength = 0; 450 } else { 451 offsetsLength = offsets.length; 452 for (int i = 0; i < offsetsLength; i++) { 453 offsets[i] = -1; 454 } 455 } 456 for (int i = 1; i < compiledPattern.length();) { 457 int n = compiledPattern.charAt(i++); 458 if (n < ARG_NUM_LIMIT) { 459 CharSequence value = values[n]; 460 if (value == result) { 461 if (forbidResultAsValue) { 462 throw new IllegalArgumentException("Value must not be same object as result"); 463 } 464 if (i == 2) { 465 // We are appending to result which is also the first value object. 466 if (n < offsetsLength) { 467 offsets[n] = 0; 468 } 469 } else { 470 if (n < offsetsLength) { 471 offsets[n] = result.length(); 472 } 473 result.append(resultCopy); 474 } 475 } else { 476 if (n < offsetsLength) { 477 offsets[n] = result.length(); 478 } 479 result.append(value); 480 } 481 } else { 482 int limit = i + (n - ARG_NUM_LIMIT); 483 result.append(compiledPattern, i, limit); 484 i = limit; 485 } 486 } 487 return result; 488 } 489 } 490