1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #ifndef MESSAGEFORMAT2_H 7 #define MESSAGEFORMAT2_H 8 9 #if U_SHOW_CPLUSPLUS_API 10 11 #if !UCONFIG_NO_FORMATTING 12 13 #if !UCONFIG_NO_MF2 14 15 /** 16 * \file 17 * \brief C++ API: Formats messages using the draft MessageFormat 2.0. 18 */ 19 20 #include "unicode/messageformat2_arguments.h" 21 #include "unicode/messageformat2_data_model.h" 22 #include "unicode/messageformat2_function_registry.h" 23 #include "unicode/unistr.h" 24 25 #ifndef U_HIDE_DEPRECATED_API 26 27 U_NAMESPACE_BEGIN 28 29 namespace message2 { 30 31 class Environment; 32 class MessageContext; 33 class ResolvedSelector; 34 class StaticErrors; 35 36 /** 37 * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. 38 * 39 * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the 40 * description of the syntax with examples and use cases</a> and the corresponding 41 * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p> 42 * 43 * The MessageFormatter class is mutable and movable. It is not copyable. 44 * (It is mutable because if it has a custom function registry, the registry may include 45 * `FormatterFactory` objects implementing custom formatters, which are allowed to contain 46 * mutable state.) 47 * 48 * @internal ICU 75 technology preview 49 * @deprecated This API is for technology preview only. 50 */ 51 class U_I18N_API MessageFormatter : public UObject { 52 // Note: This class does not currently inherit from the existing 53 // `Format` class. 54 public: 55 /** 56 * Move assignment operator: 57 * The source MessageFormatter will be left in a valid but undefined state. 58 * 59 * @internal ICU 75 technology preview 60 * @deprecated This API is for technology preview only. 61 */ 62 MessageFormatter& operator=(MessageFormatter&&) noexcept; 63 /** 64 * Destructor. 65 * 66 * @internal ICU 75 technology preview 67 * @deprecated This API is for technology preview only. 68 */ 69 virtual ~MessageFormatter(); 70 71 /** 72 * Formats the message to a string, using the data model that was previously set or parsed, 73 * and the given `arguments` object. 74 * 75 * @param arguments Reference to message arguments 76 * @param status Input/output error code used to indicate syntax errors, data model 77 * errors, resolution errors, formatting errors, selection errors, as well 78 * as other errors (such as memory allocation failures). Partial output 79 * is still provided in the presence of most error types. 80 * @return The string result of formatting the message with the given arguments. 81 * 82 * @internal ICU 75 technology preview 83 * @deprecated This API is for technology preview only. 84 */ 85 UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status); 86 87 /** 88 * Not yet implemented; formats the message to a `FormattedMessage` object, 89 * using the data model that was previously set or parsed, 90 * and the given `arguments` object. 91 * 92 * @param arguments Reference to message arguments 93 * @param status Input/output error code used to indicate syntax errors, data model 94 * errors, resolution errors, formatting errors, selection errors, as well 95 * as other errors (such as memory allocation failures). Partial output 96 * is still provided in the presence of most error types. 97 * @return The `FormattedMessage` representing the formatted message. 98 * 99 * @internal ICU 75 technology preview 100 * @deprecated This API is for technology preview only. 101 */ format(const MessageArguments & arguments,UErrorCode & status)102 FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const { 103 (void) arguments; 104 if (U_SUCCESS(status)) { 105 status = U_UNSUPPORTED_ERROR; 106 } 107 return FormattedMessage(status); 108 } 109 110 /** 111 * Accesses the locale that this `MessageFormatter` object was created with. 112 * 113 * @return A reference to the locale. 114 * 115 * @internal ICU 75 technology preview 116 * @deprecated This API is for technology preview only. 117 */ getLocale()118 const Locale& getLocale() const { return locale; } 119 120 /** 121 * Serializes the data model as a string in MessageFormat 2.0 syntax. 122 * 123 * @return result A string representation of the data model. 124 * The string is a valid MessageFormat 2.0 message. 125 * 126 * @internal ICU 75 technology preview 127 * @deprecated This API is for technology preview only. 128 */ 129 UnicodeString getPattern() const; 130 131 /** 132 * Accesses the data model referred to by this 133 * `MessageFormatter` object. 134 * 135 * @return A reference to the data model. 136 * 137 * @internal ICU 75 technology preview 138 * @deprecated This API is for technology preview only. 139 */ 140 const MFDataModel& getDataModel() const; 141 142 /** 143 * Used in conjunction with the 144 * MessageFormatter::Builder::setErrorHandlingBehavior() method. 145 * 146 * @internal ICU 76 technology preview 147 * @deprecated This API is for technology preview only. 148 */ 149 typedef enum UMFErrorHandlingBehavior { 150 /** 151 * Suppress errors and return best-effort output. 152 * 153 * @internal ICU 76 technology preview 154 * @deprecated This API is for technology preview only. 155 */ 156 U_MF_BEST_EFFORT = 0, 157 /** 158 * Signal all MessageFormat errors using the UErrorCode 159 * argument. 160 * 161 * @internal ICU 76 technology preview 162 * @deprecated This API is for technology preview only. 163 */ 164 U_MF_STRICT 165 } UMFErrorHandlingBehavior; 166 167 /** 168 * The mutable Builder class allows each part of the MessageFormatter to be initialized 169 * separately; calling its `build()` method yields an immutable MessageFormatter. 170 * 171 * Not copyable or movable. 172 */ 173 class U_I18N_API Builder : public UObject { 174 private: 175 friend class MessageFormatter; 176 177 // The pattern to be parsed to generate the formatted message 178 UnicodeString pattern; 179 bool hasPattern = false; 180 bool hasDataModel = false; 181 // The data model to be used to generate the formatted message 182 // Initialized either by `setDataModel()`, or by the parser 183 // through a call to `setPattern()` 184 MFDataModel dataModel; 185 // Normalized representation of the pattern; 186 // ignored if `setPattern()` wasn't called 187 UnicodeString normalizedInput; 188 // Errors (internal representation of parse errors) 189 // Ignored if `setPattern()` wasn't called 190 StaticErrors* errors; 191 Locale locale; 192 // Not owned 193 const MFFunctionRegistry* customMFFunctionRegistry; 194 // Error behavior; see comment in `MessageFormatter` class 195 bool signalErrors = false; 196 197 void clearState(); 198 public: 199 /** 200 * Sets the locale to use for formatting. 201 * 202 * @param locale The desired locale. 203 * @return A reference to the builder. 204 * 205 * @internal ICU 75 technology preview 206 * @deprecated This API is for technology preview only. 207 */ 208 Builder& setLocale(const Locale& locale); 209 /** 210 * Sets the pattern (contents of the message) and parses it 211 * into a data model. If a data model was 212 * previously set, it is removed. 213 * 214 * @param pattern A string in MessageFormat 2.0 syntax. 215 * @param parseError Struct to receive information on the position 216 * of an error within the pattern. 217 * @param status Input/output error code. If the 218 * pattern cannot be parsed, set to failure code. 219 * @return A reference to the builder. 220 * 221 * @internal ICU 75 technology preview 222 * @deprecated This API is for technology preview only. 223 */ 224 Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status); 225 /** 226 * Sets a custom function registry. 227 * 228 * @param functionRegistry Reference to the function registry to use. 229 * `functionRegistry` is not copied, 230 * and the caller must ensure its lifetime contains 231 * the lifetime of the `MessageFormatter` object built by this 232 * builder. 233 * @return A reference to the builder. 234 * 235 * @internal ICU 75 technology preview 236 * @deprecated This API is for technology preview only. 237 */ 238 Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry); 239 /** 240 * Sets a data model. If a pattern was previously set, it is removed. 241 * 242 * @param dataModel Data model to format. Passed by move. 243 * @return A reference to the builder. 244 * 245 * @internal ICU 75 technology preview 246 * @deprecated This API is for technology preview only. 247 */ 248 Builder& setDataModel(MFDataModel&& dataModel); 249 /** 250 * Set the error handling behavior for this formatter. 251 * 252 * "Strict" error behavior means that that formatting methods 253 * will set their UErrorCode arguments to signal MessageFormat 254 * data model, resolution, and runtime errors. Syntax errors are 255 * always signaled. 256 * 257 * "Best effort" error behavior means that MessageFormat errors are 258 * suppressed: formatting methods will _not_ set their 259 * UErrorCode arguments to signal MessageFormat data model, 260 * resolution, or runtime errors. Best-effort output 261 * will be returned. Syntax errors are always signaled. 262 * This is the default behavior. 263 * 264 * @param type An enum with type UMFErrorHandlingBehavior; 265 * if type == `U_MF_STRICT`, then 266 * errors are handled strictly. 267 * If type == `U_MF_BEST_EFFORT`, then 268 * best-effort output is returned. 269 * 270 * The default is to suppress all MessageFormat errors 271 * and return best-effort output. 272 * 273 * @return A reference to the builder. 274 * 275 * @internal ICU 76 technology preview 276 * @deprecated This API is for technology preview only. 277 */ 278 Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type); 279 /** 280 * Constructs a new immutable MessageFormatter using the pattern or data model 281 * that was previously set, and the locale (if it was previously set) 282 * or default locale (otherwise). 283 * 284 * The builder object (`this`) can still be used after calling `build()`. 285 * 286 * @param status Input/output error code. If neither the pattern 287 * nor the data model is set, set to failure code. 288 * @return The new MessageFormatter object 289 * 290 * @internal ICU 75 technology preview 291 * @deprecated This API is for technology preview only. 292 */ 293 MessageFormatter build(UErrorCode& status) const; 294 /** 295 * Default constructor. 296 * Returns a Builder with the default locale and with no 297 * data model or pattern set. Either `setPattern()` 298 * or `setDataModel()` has to be called before calling `build()`. 299 * 300 * @param status Input/output error code. 301 * 302 * @internal ICU 75 technology preview 303 * @deprecated This API is for technology preview only. 304 */ 305 Builder(UErrorCode& status); 306 /** 307 * Destructor. 308 * 309 * @internal ICU 75 technology preview 310 * @deprecated This API is for technology preview only. 311 */ 312 virtual ~Builder(); 313 }; // class MessageFormatter::Builder 314 315 // TODO: Shouldn't be public; only used for testing 316 /** 317 * Returns a string consisting of the input with optional spaces removed. 318 * 319 * @return A normalized string representation of the input 320 * 321 * @internal ICU 75 technology preview 322 * @deprecated This API is for technology preview only. 323 */ getNormalizedPattern()324 const UnicodeString& getNormalizedPattern() const { return normalizedInput; } 325 326 private: 327 friend class Builder; 328 friend class MessageContext; 329 330 MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status); 331 332 MessageFormatter() = delete; // default constructor not implemented 333 334 // Do not define default assignment operator 335 const MessageFormatter &operator=(const MessageFormatter &) = delete; 336 337 ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const; 338 ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const; 339 340 // Selection methods 341 342 // Takes a vector of FormattedPlaceholders 343 void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; 344 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) 345 void filterVariants(const UVector&, UVector&, UErrorCode&) const; 346 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) 347 void sortVariants(const UVector&, UVector&, UErrorCode&) const; 348 // Takes a vector of strings (input) and a vector of strings (output) 349 void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const; 350 // Takes a vector of FormattedPlaceholders (input), 351 // and a vector of vectors of strings (output) 352 void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; 353 354 // Formatting methods 355 [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; 356 void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; 357 // Formats a call to a formatting function 358 // Dispatches on argument type 359 [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument, 360 MessageContext& context, 361 UErrorCode& status) const; 362 // Dispatches on function name 363 [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName, 364 FormattedPlaceholder&& argument, 365 FunctionOptions&& options, 366 MessageContext& context, 367 UErrorCode& status) const; 368 // Formats an expression that appears as a selector 369 ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const; 370 // Formats an expression that appears in a pattern or as the definition of a local variable 371 [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; 372 [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; 373 [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; 374 [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; 375 void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; 376 377 // Function registry methods hasCustomMFFunctionRegistry()378 bool hasCustomMFFunctionRegistry() const { 379 return (customMFFunctionRegistry != nullptr); 380 } 381 382 // Precondition: custom function registry exists 383 // Note: this is non-const because the values in the MFFunctionRegistry are mutable 384 // (a FormatterFactory can have mutable state) 385 const MFFunctionRegistry& getCustomMFFunctionRegistry() const; 386 387 bool isCustomFormatter(const FunctionName&) const; 388 FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const; 389 bool isBuiltInSelector(const FunctionName&) const; 390 bool isBuiltInFormatter(const FunctionName&) const; 391 bool isCustomSelector(const FunctionName&) const; 392 const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; isSelector(const FunctionName & fn)393 bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } isFormatter(const FunctionName & fn)394 bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } 395 const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; 396 397 Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; 398 Formatter* getFormatter(const FunctionName&, UErrorCode&) const; 399 bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; 400 401 // Checking for resolution errors 402 void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const; 403 void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const; 404 void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const; 405 void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const; 406 407 void initErrors(UErrorCode&); 408 void clearErrors() const; 409 void cleanup() noexcept; 410 411 // The locale this MessageFormatter was created with 412 /* const */ Locale locale; 413 414 // Registry for built-in functions 415 MFFunctionRegistry standardMFFunctionRegistry; 416 // Registry for custom functions; may be null if no custom registry supplied 417 // Note: this is *not* owned by the MessageFormatter object 418 // The reason for this choice is to have a non-destructive MessageFormatter::Builder, 419 // while also not requiring the function registry to be deeply-copyable. Making the 420 // function registry copyable would impose a requirement on any implementations 421 // of the FormatterFactory and SelectorFactory interfaces to implement a custom 422 // clone() method, which is necessary to avoid sharing between copies of the 423 // function registry (and thus double-frees) 424 // Not deeply immutable (the values in the function registry are mutable, 425 // as a FormatterFactory can have mutable state 426 const MFFunctionRegistry* customMFFunctionRegistry; 427 428 // Data model, representing the parsed message 429 MFDataModel dataModel; 430 431 // Normalized version of the input string (optional whitespace removed) 432 UnicodeString normalizedInput; 433 434 // Errors -- only used while parsing and checking for data model errors; then 435 // the MessageContext keeps track of errors 436 // Must be a raw pointer to avoid including the internal header file 437 // defining StaticErrors 438 // Owned by `this` 439 StaticErrors* errors = nullptr; 440 441 // Error handling behavior. 442 // If true, then formatting methods set their UErrorCode arguments 443 // to signal MessageFormat errors, and no useful output is returned. 444 // If false, then MessageFormat errors are not signaled and the 445 // formatting methods return best-effort output. 446 // The default is false. 447 bool signalErrors = false; 448 }; // class MessageFormatter 449 450 } // namespace message2 451 452 U_NAMESPACE_END 453 454 #endif // U_HIDE_DEPRECATED_API 455 456 #endif /* #if !UCONFIG_NO_MF2 */ 457 458 #endif /* #if !UCONFIG_NO_FORMATTING */ 459 460 #endif /* U_SHOW_CPLUSPLUS_API */ 461 462 #endif // MESSAGEFORMAT2_H 463 464 // eof 465