1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #if !UCONFIG_NO_MF2
9
10 #include "unicode/messageformat2_arguments.h"
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/messageformat2_formattable.h"
13 #include "unicode/messageformat2.h"
14 #include "unicode/unistr.h"
15 #include "messageformat2_allocation.h"
16 #include "messageformat2_evaluation.h"
17 #include "messageformat2_macros.h"
18
19
20 U_NAMESPACE_BEGIN
21
22 namespace message2 {
23
24 using namespace data_model;
25
26 // ------------------------------------------------------
27 // Formatting
28
29 // The result of formatting a literal is just itself.
evalLiteral(const Literal & lit)30 static Formattable evalLiteral(const Literal& lit) {
31 return Formattable(lit.unquoted());
32 }
33
34 // Assumes that `var` is a message argument; returns the argument's value.
evalArgument(const VariableName & var,MessageContext & context,UErrorCode & errorCode) const35 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
36 if (U_SUCCESS(errorCode)) {
37 // The fallback for a variable name is itself.
38 UnicodeString str(DOLLAR);
39 str += var;
40 const Formattable* val = context.getGlobal(var, errorCode);
41 if (U_SUCCESS(errorCode)) {
42 return (FormattedPlaceholder(*val, str));
43 }
44 }
45 return {};
46 }
47
48 // Returns the contents of the literal
formatLiteral(const Literal & lit) const49 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
50 // The fallback for a literal is itself.
51 return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
52 }
53
formatOperand(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const54 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
55 const Operand& rand,
56 MessageContext& context,
57 UErrorCode &status) const {
58 if (U_FAILURE(status)) {
59 return {};
60 }
61
62 if (rand.isNull()) {
63 return FormattedPlaceholder();
64 }
65 if (rand.isVariable()) {
66 // Check if it's local or global
67 // Note: there is no name shadowing; this is enforced by the parser
68 const VariableName& var = rand.asVariable();
69 // TODO: Currently, this code implements lazy evaluation of locals.
70 // That is, the environment binds names to a closure, not a resolved value.
71 // Eager vs. lazy evaluation is an open issue:
72 // see https://github.com/unicode-org/message-format-wg/issues/299
73
74 // Look up the variable in the environment
75 if (env.has(var)) {
76 // `var` is a local -- look it up
77 const Closure& rhs = env.lookup(var);
78 // Format the expression using the environment from the closure
79 return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
80 }
81 // Variable wasn't found in locals -- check if it's global
82 FormattedPlaceholder result = evalArgument(var, context, status);
83 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
84 status = U_ZERO_ERROR;
85 // Unbound variable -- set a resolution error
86 context.getErrors().setUnresolvedVariable(var, status);
87 // Use fallback per
88 // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
89 UnicodeString str(DOLLAR);
90 str += var;
91 return FormattedPlaceholder(str);
92 }
93 return result;
94 } else {
95 U_ASSERT(rand.isLiteral());
96 return formatLiteral(rand.asLiteral());
97 }
98 }
99
100 // Resolves a function's options
resolveOptions(const Environment & env,const OptionMap & options,MessageContext & context,UErrorCode & status) const101 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
102 LocalPointer<UVector> optionsVector(createUVector(status));
103 if (U_FAILURE(status)) {
104 return {};
105 }
106 LocalPointer<ResolvedFunctionOption> resolvedOpt;
107 for (int i = 0; i < options.size(); i++) {
108 const Option& opt = options.getOption(i, status);
109 if (U_FAILURE(status)) {
110 return {};
111 }
112 const UnicodeString& k = opt.getName();
113 const Operand& v = opt.getValue();
114
115 // Options are fully evaluated before calling the function
116 // Format the operand
117 FormattedPlaceholder rhsVal = formatOperand(env, v, context, status);
118 if (U_FAILURE(status)) {
119 return {};
120 }
121 if (!rhsVal.isFallback()) {
122 resolvedOpt.adoptInstead(create<ResolvedFunctionOption>(ResolvedFunctionOption(k, rhsVal.asFormattable()), status));
123 if (U_FAILURE(status)) {
124 return {};
125 }
126 optionsVector->adoptElement(resolvedOpt.orphan(), status);
127 }
128 }
129
130 return FunctionOptions(std::move(*optionsVector), status);
131 }
132
133 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
evalFormatterCall(FormattedPlaceholder && argument,MessageContext & context,UErrorCode & status) const134 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument,
135 MessageContext& context,
136 UErrorCode& status) const {
137 if (U_FAILURE(status)) {
138 return {};
139 }
140
141 // These cases should have been checked for already
142 U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
143
144 const Formattable& toFormat = argument.asFormattable();
145 switch (toFormat.getType()) {
146 case UFMT_OBJECT: {
147 const FormattableObject* obj = toFormat.getObject(status);
148 U_ASSERT(U_SUCCESS(status));
149 U_ASSERT(obj != nullptr);
150 const UnicodeString& type = obj->tag();
151 FunctionName functionName;
152 if (!getDefaultFormatterNameByType(type, functionName)) {
153 // No formatter for this type -- follow default behavior
154 break;
155 }
156 return evalFormatterCall(functionName,
157 std::move(argument),
158 FunctionOptions(),
159 context,
160 status);
161 }
162 default: {
163 // TODO: The array case isn't handled yet; not sure whether it's desirable
164 // to have a default list formatter
165 break;
166 }
167 }
168 // No formatter for this type, or it's a primitive type (which will be formatted later)
169 // -- just return the argument itself
170 return std::move(argument);
171 }
172
173 // Overload that dispatches on function name
evalFormatterCall(const FunctionName & functionName,FormattedPlaceholder && argument,FunctionOptions && options,MessageContext & context,UErrorCode & status) const174 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName,
175 FormattedPlaceholder&& argument,
176 FunctionOptions&& options,
177 MessageContext& context,
178 UErrorCode& status) const {
179 if (U_FAILURE(status)) {
180 return {};
181 }
182
183 DynamicErrors& errs = context.getErrors();
184
185 UnicodeString fallback(COLON);
186 fallback += functionName;
187 if (!argument.isNullOperand()) {
188 fallback = argument.fallback;
189 }
190
191 if (isFormatter(functionName)) {
192 LocalPointer<Formatter> formatterImpl(getFormatter(functionName, status));
193 if (U_FAILURE(status)) {
194 if (status == U_MF_FORMATTING_ERROR) {
195 errs.setFormattingError(functionName, status);
196 status = U_ZERO_ERROR;
197 return {};
198 }
199 if (status == U_MF_UNKNOWN_FUNCTION_ERROR) {
200 errs.setUnknownFunction(functionName, status);
201 status = U_ZERO_ERROR;
202 return {};
203 }
204 // Other errors are non-recoverable
205 return {};
206 }
207 U_ASSERT(formatterImpl != nullptr);
208
209 UErrorCode savedStatus = status;
210 FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status);
211 // Update errors
212 if (savedStatus != status) {
213 if (U_FAILURE(status)) {
214 if (status == U_MF_OPERAND_MISMATCH_ERROR) {
215 status = U_ZERO_ERROR;
216 errs.setOperandMismatchError(functionName, status);
217 } else {
218 status = U_ZERO_ERROR;
219 // Convey any error generated by the formatter
220 // as a formatting error, except for operand mismatch errors
221 errs.setFormattingError(functionName, status);
222 }
223 return FormattedPlaceholder(fallback);
224 } else {
225 // Ignore warnings
226 status = savedStatus;
227 }
228 }
229 // Ignore the output if any errors occurred
230 if (errs.hasFormattingError()) {
231 return FormattedPlaceholder(fallback);
232 }
233 return result;
234 }
235 // No formatter with this name -- set error
236 if (isSelector(functionName)) {
237 errs.setFormattingError(functionName, status);
238 } else {
239 errs.setUnknownFunction(functionName, status);
240 }
241 return FormattedPlaceholder(fallback);
242 }
243
244 // Formats an expression using `globalEnv` for the values of variables
formatExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const245 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv,
246 const Expression& expr,
247 MessageContext& context,
248 UErrorCode &status) const {
249 if (U_FAILURE(status)) {
250 return {};
251 }
252
253 const Operand& rand = expr.getOperand();
254 // Format the operand (formatOperand handles the case of a null operand)
255 FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status);
256
257 // Don't call the function on error values
258 if (randVal.isFallback()) {
259 return randVal;
260 }
261
262 if (!expr.isFunctionCall()) {
263 // Dispatch based on type of `randVal`
264 return evalFormatterCall(std::move(randVal),
265 context,
266 status);
267 } else {
268 const Operator* rator = expr.getOperator(status);
269 U_ASSERT(U_SUCCESS(status));
270 const FunctionName& functionName = rator->getFunctionName();
271 const OptionMap& options = rator->getOptionsInternal();
272 // Resolve the options
273 FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
274
275 // Call the formatter function
276 // The fallback for a nullary function call is the function name
277 UnicodeString fallback;
278 if (rand.isNull()) {
279 fallback = UnicodeString(COLON);
280 fallback += functionName;
281 } else {
282 fallback = randVal.fallback;
283 }
284 return evalFormatterCall(functionName,
285 std::move(randVal),
286 std::move(resolvedOptions),
287 context,
288 status);
289 }
290 }
291
292 // Formats each text and expression part of a pattern, appending the results to `result`
formatPattern(MessageContext & context,const Environment & globalEnv,const Pattern & pat,UErrorCode & status,UnicodeString & result) const293 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
294 CHECK_ERROR(status);
295
296 for (int32_t i = 0; i < pat.numParts(); i++) {
297 const PatternPart& part = pat.getPart(i);
298 if (part.isText()) {
299 result += part.asText();
300 } else if (part.isMarkup()) {
301 // Markup is ignored
302 } else {
303 // Format the expression
304 FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status);
305 // Force full evaluation, e.g. applying default formatters to
306 // unformatted input (or formatting numbers as strings)
307 UnicodeString partResult = partVal.formatToString(locale, status);
308 result += partResult;
309 // Handle formatting errors. `formatToString()` can't take a context and thus can't
310 // register an error directly
311 if (status == U_MF_FORMATTING_ERROR) {
312 status = U_ZERO_ERROR;
313 // TODO: The name of the formatter that failed is unavailable.
314 // Not ideal, but it's hard for `formatToString()`
315 // to pass along more detailed diagnostics
316 context.getErrors().setFormattingError(status);
317 }
318 }
319 }
320 }
321
322 // ------------------------------------------------------
323 // Selection
324
325 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
326 // `res` is a vector of ResolvedSelectors
resolveSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UVector & res) const327 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
328 CHECK_ERROR(status);
329 U_ASSERT(!dataModel.hasPattern());
330
331 const Expression* selectors = dataModel.getSelectorsInternal();
332 // 1. Let res be a new empty list of resolved values that support selection.
333 // (Implicit, since `res` is an out-parameter)
334 // 2. For each expression exp of the message's selectors
335 for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
336 // 2i. Let rv be the resolved value of exp.
337 ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
338 if (rv.hasSelector()) {
339 // 2ii. If selection is supported for rv:
340 // (True if this code has been reached)
341 } else {
342 // 2iii. Else:
343 // Let nomatch be a resolved value for which selection always fails.
344 // Append nomatch as the last element of the list res.
345 // Emit a Selection Error.
346 // (Note: in this case, rv, being a fallback, serves as `nomatch`)
347 #if U_DEBUG
348 const DynamicErrors& err = context.getErrors();
349 U_ASSERT(err.hasError());
350 U_ASSERT(rv.argument().isFallback());
351 #endif
352 }
353 // 2ii(a). Append rv as the last element of the list res.
354 // (Also fulfills 2iii)
355 LocalPointer<ResolvedSelector> v(create<ResolvedSelector>(std::move(rv), status));
356 CHECK_ERROR(status);
357 res.adoptElement(v.orphan(), status);
358 }
359 }
360
361 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
362 // `keys` and `matches` are vectors of strings
matchSelectorKeys(const UVector & keys,MessageContext & context,ResolvedSelector && rv,UVector & keysOut,UErrorCode & status) const363 void MessageFormatter::matchSelectorKeys(const UVector& keys,
364 MessageContext& context,
365 ResolvedSelector&& rv,
366 UVector& keysOut,
367 UErrorCode& status) const {
368 CHECK_ERROR(status);
369
370 if (!rv.hasSelector()) {
371 // Return an empty list of matches
372 return;
373 }
374
375 auto selectorImpl = rv.getSelector();
376 U_ASSERT(selectorImpl != nullptr);
377 UErrorCode savedStatus = status;
378
379 // Convert `keys` to an array
380 int32_t keysLen = keys.size();
381 UnicodeString* keysArr = new UnicodeString[keysLen];
382 if (keysArr == nullptr) {
383 status = U_MEMORY_ALLOCATION_ERROR;
384 return;
385 }
386 for (int32_t i = 0; i < keysLen; i++) {
387 const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
388 U_ASSERT(k != nullptr);
389 keysArr[i] = *k;
390 }
391 LocalArray<UnicodeString> adoptedKeys(keysArr);
392
393 // Create an array to hold the output
394 UnicodeString* prefsArr = new UnicodeString[keysLen];
395 if (prefsArr == nullptr) {
396 status = U_MEMORY_ALLOCATION_ERROR;
397 return;
398 }
399 LocalArray<UnicodeString> adoptedPrefs(prefsArr);
400 int32_t prefsLen = 0;
401
402 // Call the selector
403 selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(),
404 adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen,
405 status);
406
407 // Update errors
408 if (savedStatus != status) {
409 if (U_FAILURE(status)) {
410 status = U_ZERO_ERROR;
411 context.getErrors().setSelectorError(rv.getSelectorName(), status);
412 } else {
413 // Ignore warnings
414 status = savedStatus;
415 }
416 }
417
418 CHECK_ERROR(status);
419
420 // Copy the resulting keys (if there was no error)
421 keysOut.removeAllElements();
422 for (int32_t i = 0; i < prefsLen; i++) {
423 UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
424 if (k == nullptr) {
425 status = U_MEMORY_ALLOCATION_ERROR;
426 return;
427 }
428 keysOut.adoptElement(k, status);
429 CHECK_ERROR(status);
430 }
431 }
432
433 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
434 // `res` is a vector of FormattedPlaceholders;
435 // `pref` is a vector of vectors of strings
resolvePreferences(MessageContext & context,UVector & res,UVector & pref,UErrorCode & status) const436 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
437 CHECK_ERROR(status);
438
439 // 1. Let pref be a new empty list of lists of strings.
440 // (Implicit, since `pref` is an out-parameter)
441 UnicodeString ks;
442 LocalPointer<UnicodeString> ksP;
443 int32_t numVariants = dataModel.numVariants();
444 const Variant* variants = dataModel.getVariantsInternal();
445 // 2. For each index i in res
446 for (int32_t i = 0; i < res.size(); i++) {
447 // 2i. Let keys be a new empty list of strings.
448 LocalPointer<UVector> keys(createUVector(status));
449 CHECK_ERROR(status);
450 // 2ii. For each variant `var` of the message
451 for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
452 const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
453
454 // Note: Here, `var` names the key list of `var`,
455 // not a Variant itself
456 const Key* var = selectorKeys.getKeysInternal();
457 // 2ii(a). Let `key` be the `var` key at position i.
458 U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
459 const Key& key = var[i];
460 // 2ii(b). If `key` is not the catch-all key '*'
461 if (!key.isWildcard()) {
462 // 2ii(b)(a) Assert that key is a literal.
463 // (Not needed)
464 // 2ii(b)(b) Let `ks` be the resolved value of `key`.
465 ks = key.asLiteral().unquoted();
466 // 2ii(b)(c) Append `ks` as the last element of the list `keys`.
467 ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
468 CHECK_ERROR(status);
469 keys->adoptElement(ksP.orphan(), status);
470 }
471 }
472 // 2iii. Let `rv` be the resolved value at index `i` of `res`.
473 U_ASSERT(i < res.size());
474 ResolvedSelector rv = std::move(*(static_cast<ResolvedSelector*>(res[i])));
475 // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
476 LocalPointer<UVector> matches(createUVector(status));
477 matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
478 // 2v. Append `matches` as the last element of the list `pref`
479 pref.adoptElement(matches.orphan(), status);
480 }
481 }
482
483 // `v` is assumed to be a vector of strings
vectorFind(const UVector & v,const UnicodeString & k)484 static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
485 for (int32_t i = 0; i < v.size(); i++) {
486 if (*static_cast<UnicodeString*>(v[i]) == k) {
487 return i;
488 }
489 }
490 return -1;
491 }
492
vectorContains(const UVector & v,const UnicodeString & k)493 static UBool vectorContains(const UVector& v, const UnicodeString& k) {
494 return (vectorFind(v, k) != -1);
495 }
496
497 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
498 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
filterVariants(const UVector & pref,UVector & vars,UErrorCode & status) const499 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
500 const Variant* variants = dataModel.getVariantsInternal();
501
502 // 1. Let `vars` be a new empty list of variants.
503 // (Not needed since `vars` is an out-parameter)
504 // 2. For each variant `var` of the message:
505 for (int32_t j = 0; j < dataModel.numVariants(); j++) {
506 const SelectorKeys& selectorKeys = variants[j].getKeys();
507 const Pattern& p = variants[j].getPattern();
508
509 // Note: Here, `var` names the key list of `var`,
510 // not a Variant itself
511 const Key* var = selectorKeys.getKeysInternal();
512 // 2i. For each index `i` in `pref`:
513 bool noMatch = false;
514 for (int32_t i = 0; i < pref.size(); i++) {
515 // 2i(a). Let `key` be the `var` key at position `i`.
516 U_ASSERT(i < selectorKeys.len);
517 const Key& key = var[i];
518 // 2i(b). If key is the catch-all key '*':
519 if (key.isWildcard()) {
520 // 2i(b)(a). Continue the inner loop on pref.
521 continue;
522 }
523 // 2i(c). Assert that `key` is a literal.
524 // (Not needed)
525 // 2i(d). Let `ks` be the resolved value of `key`.
526 UnicodeString ks = key.asLiteral().unquoted();
527 // 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
528 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
529 // 2i(f). If `matches` includes `ks`
530 if (vectorContains(matches, ks)) {
531 // 2i(f)(a). Continue the inner loop on `pref`.
532 continue;
533 }
534 // 2i(g). Else:
535 // 2i(g)(a). Continue the outer loop on message variants.
536 noMatch = true;
537 break;
538 }
539 if (!noMatch) {
540 // Append `var` as the last element of the list `vars`.
541 PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
542 CHECK_ERROR(status);
543 vars.adoptElement(tuple, status);
544 }
545 }
546 }
547
548 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
549 // Leaves the preferred variant as element 0 in `sortable`
550 // Note: this sorts in-place, so `sortable` is just `vars`
551 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
sortVariants(const UVector & pref,UVector & vars,UErrorCode & status) const552 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
553 CHECK_ERROR(status);
554
555 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
556 // 1. Let `sortable` be a new empty list of (integer, variant) tuples.
557 // (Not needed since `sortable` is an out-parameter)
558 // 2. For each variant `var` of `vars`
559 // 2i. Let tuple be a new tuple (-1, var).
560 // 2ii. Append `tuple` as the last element of the list `sortable`.
561
562 // 3. Let `len` be the integer count of items in `pref`.
563 int32_t len = pref.size();
564 // 4. Let `i` be `len` - 1.
565 int32_t i = len - 1;
566 // 5. While i >= 0:
567 while (i >= 0) {
568 // 5i. Let `matches` be the list of strings at index `i` of `pref`.
569 U_ASSERT(pref[i] != nullptr);
570 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
571 // 5ii. Let `minpref` be the integer count of items in `matches`.
572 int32_t minpref = matches.size();
573 // 5iii. For each tuple `tuple` of `sortable`:
574 for (int32_t j = 0; j < vars.size(); j++) {
575 U_ASSERT(vars[j] != nullptr);
576 PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
577 // 5iii(a). Let matchpref be an integer with the value minpref.
578 int32_t matchpref = minpref;
579 // 5iii(b). Let `key` be the tuple variant key at position `i`.
580 const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
581 U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
582 const Key& key = tupleVariantKeys[i];
583 // 5iii(c) If `key` is not the catch-all key '*':
584 if (!key.isWildcard()) {
585 // 5iii(c)(a). Assert that `key` is a literal.
586 // (Not needed)
587 // 5iii(c)(b). Let `ks` be the resolved value of `key`.
588 UnicodeString ks = key.asLiteral().unquoted();
589 // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
590 matchpref = vectorFind(matches, ks);
591 U_ASSERT(matchpref >= 0);
592 }
593 // 5iii(d) Set the `tuple` integer value as matchpref.
594 tuple.priority = matchpref;
595 }
596 // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
597 vars.sort(comparePrioritizedVariants, status);
598 CHECK_ERROR(status);
599 // 5v. Set `i` to be `i` - 1.
600 i--;
601 }
602 // The caller is responsible for steps 6 and 7
603 // 6. Let `var` be the `variant` element of the first element of `sortable`.
604 // 7. Select the pattern of `var`
605 }
606
607
608 // Evaluate the operand
resolveVariables(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const609 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
610 if (U_FAILURE(status)) {
611 return {};
612 }
613
614 if (rand.isNull()) {
615 return ResolvedSelector(FormattedPlaceholder());
616 }
617
618 if (rand.isLiteral()) {
619 return ResolvedSelector(formatLiteral(rand.asLiteral()));
620 }
621
622 // Must be variable
623 const VariableName& var = rand.asVariable();
624 // Resolve the variable
625 if (env.has(var)) {
626 const Closure& referent = env.lookup(var);
627 // Resolve the referent
628 return resolveVariables(referent.getEnv(), referent.getExpr(), context, status);
629 }
630 // Either this is a global var or an unbound var --
631 // either way, it can't be bound to a function call.
632 // Check globals
633 FormattedPlaceholder val = evalArgument(var, context, status);
634 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
635 status = U_ZERO_ERROR;
636 // Unresolved variable -- could be a previous warning. Nothing to resolve
637 U_ASSERT(context.getErrors().hasUnresolvedVariableError());
638 return ResolvedSelector(FormattedPlaceholder(var));
639 }
640 // Pass through other errors
641 return ResolvedSelector(std::move(val));
642 }
643
644 // Evaluate the expression except for not performing the top-level function call
645 // (which is expected to be a selector, but may not be, in error cases)
resolveVariables(const Environment & env,const Expression & expr,MessageContext & context,UErrorCode & status) const646 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
647 const Expression& expr,
648 MessageContext& context,
649 UErrorCode &status) const {
650 if (U_FAILURE(status)) {
651 return {};
652 }
653
654 // Function call -- resolve the operand and options
655 if (expr.isFunctionCall()) {
656 const Operator* rator = expr.getOperator(status);
657 U_ASSERT(U_SUCCESS(status));
658 // Already checked that rator is non-reserved
659 const FunctionName& selectorName = rator->getFunctionName();
660 if (isSelector(selectorName)) {
661 auto selector = getSelector(context, selectorName, status);
662 if (U_SUCCESS(status)) {
663 FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status);
664 // Operand may be the null argument, but resolveVariables() handles that
665 FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status);
666 return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument));
667 }
668 } else if (isFormatter(selectorName)) {
669 context.getErrors().setSelectorError(selectorName, status);
670 } else {
671 context.getErrors().setUnknownFunction(selectorName, status);
672 }
673 // Non-selector used as selector; an error would have been recorded earlier
674 UnicodeString fallback(COLON);
675 fallback += selectorName;
676 if (!expr.getOperand().isNull()) {
677 fallback = formatOperand(env, expr.getOperand(), context, status).fallback;
678 }
679 return ResolvedSelector(FormattedPlaceholder(fallback));
680 } else {
681 // Might be a variable reference, so expand one more level of variable
682 return resolveVariables(env, expr.getOperand(), context, status);
683 }
684 }
685
formatSelectorExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const686 ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
687 if (U_FAILURE(status)) {
688 return {};
689 }
690
691 // Resolve expression to determine if it's a function call
692 ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
693
694 DynamicErrors& err = context.getErrors();
695
696 // If there is a selector, then `resolveVariables()` recorded it in the context
697 if (exprResult.hasSelector()) {
698 // Check if there was an error
699 if (exprResult.argument().isFallback()) {
700 // Use a null expression if it's a syntax or data model warning;
701 // create a valid (non-fallback) formatted placeholder from the
702 // fallback string otherwise
703 if (err.hasSyntaxError() || err.hasDataModelError()) {
704 return ResolvedSelector(FormattedPlaceholder()); // Null operand
705 } else {
706 return ResolvedSelector(exprResult.takeArgument());
707 }
708 }
709 return exprResult;
710 }
711
712 // No selector was found; error should already have been set
713 U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError());
714 return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback));
715 }
716
formatSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UnicodeString & result) const717 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
718 CHECK_ERROR(status);
719
720 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
721
722 // Resolve Selectors
723 // res is a vector of FormattedPlaceholders
724 LocalPointer<UVector> res(createUVector(status));
725 CHECK_ERROR(status);
726 resolveSelectors(context, env, status, *res);
727
728 // Resolve Preferences
729 // pref is a vector of vectors of strings
730 LocalPointer<UVector> pref(createUVector(status));
731 CHECK_ERROR(status);
732 resolvePreferences(context, *res, *pref, status);
733
734 // Filter Variants
735 // vars is a vector of PrioritizedVariants
736 LocalPointer<UVector> vars(createUVector(status));
737 CHECK_ERROR(status);
738 filterVariants(*pref, *vars, status);
739
740 // Sort Variants and select the final pattern
741 // Note: `sortable` in the spec is just `vars` here,
742 // which is sorted in-place
743 sortVariants(*pref, *vars, status);
744
745 CHECK_ERROR(status);
746
747 // 6. Let `var` be the `variant` element of the first element of `sortable`.
748 U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
749 const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
750 // 7. Select the pattern of `var`
751 const Pattern& pat = var.pat;
752
753 // Format the pattern
754 formatPattern(context, env, pat, status, result);
755 }
756
757 // Note: this is non-const due to the function registry being non-const, which is in turn
758 // due to the values (`FormatterFactory` objects in the map) having mutable state.
759 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
760 // state within the factory objects that represent custom formatters.
formatToString(const MessageArguments & arguments,UErrorCode & status)761 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
762 EMPTY_ON_ERROR(status);
763
764 // Create a new environment that will store closures for all local variables
765 Environment* env = Environment::create(status);
766 // Create a new context with the given arguments and the `errors` structure
767 MessageContext context(arguments, *errors, status);
768
769 // Check for unresolved variable errors
770 checkDeclarations(context, env, status);
771 LocalPointer<Environment> globalEnv(env);
772
773 UnicodeString result;
774 if (dataModel.hasPattern()) {
775 formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
776 } else {
777 // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
778 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
779 const DynamicErrors& err = context.getErrors();
780 if (err.hasSyntaxError() || err.hasDataModelError()) {
781 result += REPLACEMENT;
782 } else {
783 formatSelectors(context, *globalEnv, status, result);
784 }
785 }
786 // Update status according to all errors seen while formatting
787 if (signalErrors) {
788 context.checkErrors(status);
789 }
790 if (U_FAILURE(status)) {
791 result.remove();
792 }
793 return result;
794 }
795
796 // ----------------------------------------
797 // Checking for resolution errors
798
check(MessageContext & context,const Environment & localEnv,const OptionMap & options,UErrorCode & status) const799 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
800 // Check the RHS of each option
801 for (int32_t i = 0; i < options.size(); i++) {
802 const Option& opt = options.getOption(i, status);
803 CHECK_ERROR(status);
804 check(context, localEnv, opt.getValue(), status);
805 }
806 }
807
check(MessageContext & context,const Environment & localEnv,const Operand & rand,UErrorCode & status) const808 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
809 // Nothing to check for literals
810 if (rand.isLiteral() || rand.isNull()) {
811 return;
812 }
813
814 // Check that variable is in scope
815 const VariableName& var = rand.asVariable();
816 // Check local scope
817 if (localEnv.has(var)) {
818 return;
819 }
820 // Check global scope
821 context.getGlobal(var, status);
822 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
823 status = U_ZERO_ERROR;
824 context.getErrors().setUnresolvedVariable(var, status);
825 }
826 // Either `var` is a global, or some other error occurred.
827 // Nothing more to do either way
828 return;
829 }
830
check(MessageContext & context,const Environment & localEnv,const Expression & expr,UErrorCode & status) const831 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
832 // Check for unresolved variable errors
833 if (expr.isFunctionCall()) {
834 const Operator* rator = expr.getOperator(status);
835 U_ASSERT(U_SUCCESS(status));
836 const Operand& rand = expr.getOperand();
837 check(context, localEnv, rand, status);
838 check(context, localEnv, rator->getOptionsInternal(), status);
839 }
840 }
841
842 // Check for resolution errors
checkDeclarations(MessageContext & context,Environment * & env,UErrorCode & status) const843 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
844 CHECK_ERROR(status);
845
846 const Binding* decls = getDataModel().getLocalVariablesInternal();
847 U_ASSERT(env != nullptr && (decls != nullptr || getDataModel().bindingsLen == 0));
848
849 for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
850 const Binding& decl = decls[i];
851 const Expression& rhs = decl.getValue();
852 check(context, *env, rhs, status);
853
854 // Add a closure to the global environment,
855 // memoizing the value of localEnv up to this point
856
857 // Add the LHS to the environment for checking the next declaration
858 env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
859 CHECK_ERROR(status);
860 }
861 }
862 } // namespace message2
863
864 U_NAMESPACE_END
865
866 #endif /* #if !UCONFIG_NO_MF2 */
867
868 #endif /* #if !UCONFIG_NO_FORMATTING */
869