1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * Copyright (C) 2014-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ****************************************************************************** 8 * simpleformatter.h 9 */ 10 11 #ifndef __SIMPLEFORMATTER_H__ 12 #define __SIMPLEFORMATTER_H__ 13 14 /** 15 * \file 16 * \brief C++ API: Simple formatter, minimal subset of MessageFormat. 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if U_SHOW_CPLUSPLUS_API 22 23 #include "unicode/unistr.h" 24 25 U_NAMESPACE_BEGIN 26 27 // Forward declaration: 28 namespace number { 29 namespace impl { 30 class SimpleModifier; 31 } 32 } 33 34 /** 35 * Formats simple patterns like "{1} was born in {0}". 36 * Minimal subset of MessageFormat; fast, simple, minimal dependencies. 37 * Supports only numbered arguments with no type nor style parameters, 38 * and formats only string values. 39 * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior. 40 * 41 * Factory methods set error codes for syntax errors 42 * and for too few or too many arguments/placeholders. 43 * 44 * SimpleFormatter objects are thread-safe except for assignment and applying new patterns. 45 * 46 * Example: 47 * <pre> 48 * UErrorCode errorCode = U_ZERO_ERROR; 49 * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode); 50 * UnicodeString result; 51 * 52 * // Output: "paul {born} in england" 53 * fmt.format("england", "paul", result, errorCode); 54 * </pre> 55 * 56 * This class is not intended for public subclassing. 57 * 58 * @see MessageFormat 59 * @see UMessagePatternApostropheMode 60 * @stable ICU 57 61 */ 62 class U_COMMON_API SimpleFormatter U_FINAL : public UMemory { 63 public: 64 /** 65 * Default constructor. 66 * @stable ICU 57 67 */ SimpleFormatter()68 SimpleFormatter() : compiledPattern((char16_t)0) {} 69 70 /** 71 * Constructs a formatter from the pattern string. 72 * 73 * @param pattern The pattern string. 74 * @param errorCode ICU error code in/out parameter. 75 * Must fulfill U_SUCCESS before the function call. 76 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. 77 * @stable ICU 57 78 */ SimpleFormatter(const UnicodeString & pattern,UErrorCode & errorCode)79 SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) { 80 applyPattern(pattern, errorCode); 81 } 82 83 /** 84 * Constructs a formatter from the pattern string. 85 * The number of arguments checked against the given limits is the 86 * highest argument number plus one, not the number of occurrences of arguments. 87 * 88 * @param pattern The pattern string. 89 * @param min The pattern must have at least this many arguments. 90 * @param max The pattern must have at most this many arguments. 91 * @param errorCode ICU error code in/out parameter. 92 * Must fulfill U_SUCCESS before the function call. 93 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and 94 * too few or too many arguments. 95 * @stable ICU 57 96 */ SimpleFormatter(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)97 SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max, 98 UErrorCode &errorCode) { 99 applyPatternMinMaxArguments(pattern, min, max, errorCode); 100 } 101 102 /** 103 * Copy constructor. 104 * @stable ICU 57 105 */ SimpleFormatter(const SimpleFormatter & other)106 SimpleFormatter(const SimpleFormatter& other) 107 : compiledPattern(other.compiledPattern) {} 108 109 /** 110 * Assignment operator. 111 * @stable ICU 57 112 */ 113 SimpleFormatter &operator=(const SimpleFormatter& other); 114 115 /** 116 * Destructor. 117 * @stable ICU 57 118 */ 119 ~SimpleFormatter(); 120 121 /** 122 * Changes this object according to the new pattern. 123 * 124 * @param pattern The pattern string. 125 * @param errorCode ICU error code in/out parameter. 126 * Must fulfill U_SUCCESS before the function call. 127 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. 128 * @return true if U_SUCCESS(errorCode). 129 * @stable ICU 57 130 */ applyPattern(const UnicodeString & pattern,UErrorCode & errorCode)131 UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) { 132 return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode); 133 } 134 135 /** 136 * Changes this object according to the new pattern. 137 * The number of arguments checked against the given limits is the 138 * highest argument number plus one, not the number of occurrences of arguments. 139 * 140 * @param pattern The pattern string. 141 * @param min The pattern must have at least this many arguments. 142 * @param max The pattern must have at most this many arguments. 143 * @param errorCode ICU error code in/out parameter. 144 * Must fulfill U_SUCCESS before the function call. 145 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and 146 * too few or too many arguments. 147 * @return true if U_SUCCESS(errorCode). 148 * @stable ICU 57 149 */ 150 UBool applyPatternMinMaxArguments(const UnicodeString &pattern, 151 int32_t min, int32_t max, UErrorCode &errorCode); 152 153 /** 154 * @return The max argument number + 1. 155 * @stable ICU 57 156 */ getArgumentLimit()157 int32_t getArgumentLimit() const { 158 return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length()); 159 } 160 161 /** 162 * Formats the given value, appending to the appendTo builder. 163 * The argument value must not be the same object as appendTo. 164 * getArgumentLimit() must be at most 1. 165 * 166 * @param value0 Value for argument {0}. 167 * @param appendTo Gets the formatted pattern and value appended. 168 * @param errorCode ICU error code in/out parameter. 169 * Must fulfill U_SUCCESS before the function call. 170 * @return appendTo 171 * @stable ICU 57 172 */ 173 UnicodeString &format( 174 const UnicodeString &value0, 175 UnicodeString &appendTo, UErrorCode &errorCode) const; 176 177 /** 178 * Formats the given values, appending to the appendTo builder. 179 * An argument value must not be the same object as appendTo. 180 * getArgumentLimit() must be at most 2. 181 * 182 * @param value0 Value for argument {0}. 183 * @param value1 Value for argument {1}. 184 * @param appendTo Gets the formatted pattern and values appended. 185 * @param errorCode ICU error code in/out parameter. 186 * Must fulfill U_SUCCESS before the function call. 187 * @return appendTo 188 * @stable ICU 57 189 */ 190 UnicodeString &format( 191 const UnicodeString &value0, 192 const UnicodeString &value1, 193 UnicodeString &appendTo, UErrorCode &errorCode) const; 194 195 /** 196 * Formats the given values, appending to the appendTo builder. 197 * An argument value must not be the same object as appendTo. 198 * getArgumentLimit() must be at most 3. 199 * 200 * @param value0 Value for argument {0}. 201 * @param value1 Value for argument {1}. 202 * @param value2 Value for argument {2}. 203 * @param appendTo Gets the formatted pattern and values appended. 204 * @param errorCode ICU error code in/out parameter. 205 * Must fulfill U_SUCCESS before the function call. 206 * @return appendTo 207 * @stable ICU 57 208 */ 209 UnicodeString &format( 210 const UnicodeString &value0, 211 const UnicodeString &value1, 212 const UnicodeString &value2, 213 UnicodeString &appendTo, UErrorCode &errorCode) const; 214 215 /** 216 * Formats the given values, appending to the appendTo string. 217 * 218 * @param values The argument values. 219 * An argument value must not be the same object as appendTo. 220 * Can be NULL if valuesLength==getArgumentLimit()==0. 221 * @param valuesLength The length of the values array. 222 * Must be at least getArgumentLimit(). 223 * @param appendTo Gets the formatted pattern and values appended. 224 * @param offsets offsets[i] receives the offset of where 225 * values[i] replaced pattern argument {i}. 226 * Can be shorter or longer than values. Can be NULL if offsetsLength==0. 227 * If there is no {i} in the pattern, then offsets[i] is set to -1. 228 * @param offsetsLength The length of the offsets array. 229 * @param errorCode ICU error code in/out parameter. 230 * Must fulfill U_SUCCESS before the function call. 231 * @return appendTo 232 * @stable ICU 57 233 */ 234 UnicodeString &formatAndAppend( 235 const UnicodeString *const *values, int32_t valuesLength, 236 UnicodeString &appendTo, 237 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; 238 239 /** 240 * Formats the given values, replacing the contents of the result string. 241 * May optimize by actually appending to the result if it is the same object 242 * as the value corresponding to the initial argument in the pattern. 243 * 244 * @param values The argument values. 245 * An argument value may be the same object as result. 246 * Can be NULL if valuesLength==getArgumentLimit()==0. 247 * @param valuesLength The length of the values array. 248 * Must be at least getArgumentLimit(). 249 * @param result Gets its contents replaced by the formatted pattern and values. 250 * @param offsets offsets[i] receives the offset of where 251 * values[i] replaced pattern argument {i}. 252 * Can be shorter or longer than values. Can be NULL if offsetsLength==0. 253 * If there is no {i} in the pattern, then offsets[i] is set to -1. 254 * @param offsetsLength The length of the offsets array. 255 * @param errorCode ICU error code in/out parameter. 256 * Must fulfill U_SUCCESS before the function call. 257 * @return result 258 * @stable ICU 57 259 */ 260 UnicodeString &formatAndReplace( 261 const UnicodeString *const *values, int32_t valuesLength, 262 UnicodeString &result, 263 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; 264 265 /** 266 * Returns the pattern text with none of the arguments. 267 * Like formatting with all-empty string values. 268 * @stable ICU 57 269 */ getTextWithNoArguments()270 UnicodeString getTextWithNoArguments() const { 271 return getTextWithNoArguments( 272 compiledPattern.getBuffer(), 273 compiledPattern.length(), 274 nullptr, 275 0); 276 } 277 278 #ifndef U_HIDE_INTERNAL_API 279 /** 280 * Returns the pattern text with none of the arguments. 281 * Like formatting with all-empty string values. 282 * 283 * TODO(ICU-20406): Replace this with an Iterator interface. 284 * 285 * @param offsets offsets[i] receives the offset of where {i} was located 286 * before it was replaced by an empty string. 287 * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1. 288 * Can be nullptr if offsetsLength==0. 289 * If there is no {i} in the pattern, then offsets[i] is set to -1. 290 * @param offsetsLength The length of the offsets array. 291 * 292 * @internal 293 */ getTextWithNoArguments(int32_t * offsets,int32_t offsetsLength)294 UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const { 295 return getTextWithNoArguments( 296 compiledPattern.getBuffer(), 297 compiledPattern.length(), 298 offsets, 299 offsetsLength); 300 } 301 #endif // U_HIDE_INTERNAL_API 302 303 private: 304 /** 305 * Binary representation of the compiled pattern. 306 * Index 0: One more than the highest argument number. 307 * Followed by zero or more arguments or literal-text segments. 308 * 309 * An argument is stored as its number, less than ARG_NUM_LIMIT. 310 * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, 311 * followed by that many chars. 312 */ 313 UnicodeString compiledPattern; 314 getArgumentLimit(const char16_t * compiledPattern,int32_t compiledPatternLength)315 static inline int32_t getArgumentLimit(const char16_t *compiledPattern, 316 int32_t compiledPatternLength) { 317 return compiledPatternLength == 0 ? 0 : compiledPattern[0]; 318 } 319 320 static UnicodeString getTextWithNoArguments( 321 const char16_t *compiledPattern, 322 int32_t compiledPatternLength, 323 int32_t *offsets, 324 int32_t offsetsLength); 325 326 static UnicodeString &format( 327 const char16_t *compiledPattern, int32_t compiledPatternLength, 328 const UnicodeString *const *values, 329 UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, 330 int32_t *offsets, int32_t offsetsLength, 331 UErrorCode &errorCode); 332 333 // Give access to internals to SimpleModifier for number formatting 334 friend class number::impl::SimpleModifier; 335 }; 336 337 U_NAMESPACE_END 338 339 #endif /* U_SHOW_CPLUSPLUS_API */ 340 341 #endif // __SIMPLEFORMATTER_H__ 342