• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #if !UCONFIG_NO_MF2
9 
10 #include "unicode/messageformat2_arguments.h"
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/messageformat2_formattable.h"
13 #include "unicode/messageformat2.h"
14 #include "unicode/unistr.h"
15 #include "messageformat2_allocation.h"
16 #include "messageformat2_evaluation.h"
17 #include "messageformat2_macros.h"
18 
19 
20 U_NAMESPACE_BEGIN
21 
22 namespace message2 {
23 
24 using namespace data_model;
25 
26 // ------------------------------------------------------
27 // Formatting
28 
29 // The result of formatting a literal is just itself.
evalLiteral(const Literal & lit)30 static Formattable evalLiteral(const Literal& lit) {
31     return Formattable(lit.unquoted());
32 }
33 
34 // Assumes that `var` is a message argument; returns the argument's value.
evalArgument(const VariableName & var,MessageContext & context,UErrorCode & errorCode) const35 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
36     if (U_SUCCESS(errorCode)) {
37         // The fallback for a variable name is itself.
38         UnicodeString str(DOLLAR);
39         str += var;
40         const Formattable* val = context.getGlobal(var, errorCode);
41         if (U_SUCCESS(errorCode)) {
42             return (FormattedPlaceholder(*val, str));
43         }
44     }
45     return {};
46 }
47 
48 // Returns the contents of the literal
formatLiteral(const Literal & lit) const49 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
50     // The fallback for a literal is itself.
51     return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
52 }
53 
formatOperand(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const54 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
55                                                              const Operand& rand,
56                                                              MessageContext& context,
57                                                              UErrorCode &status) const {
58     if (U_FAILURE(status)) {
59         return {};
60     }
61 
62     if (rand.isNull()) {
63         return FormattedPlaceholder();
64     }
65     if (rand.isVariable()) {
66         // Check if it's local or global
67         // Note: there is no name shadowing; this is enforced by the parser
68         const VariableName& var = rand.asVariable();
69         // TODO: Currently, this code implements lazy evaluation of locals.
70         // That is, the environment binds names to a closure, not a resolved value.
71         // Eager vs. lazy evaluation is an open issue:
72         // see https://github.com/unicode-org/message-format-wg/issues/299
73 
74         // Look up the variable in the environment
75         if (env.has(var)) {
76           // `var` is a local -- look it up
77           const Closure& rhs = env.lookup(var);
78           // Format the expression using the environment from the closure
79           return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
80         }
81         // Variable wasn't found in locals -- check if it's global
82         FormattedPlaceholder result = evalArgument(var, context, status);
83         if (status == U_ILLEGAL_ARGUMENT_ERROR) {
84             status = U_ZERO_ERROR;
85             // Unbound variable -- set a resolution error
86             context.getErrors().setUnresolvedVariable(var, status);
87             // Use fallback per
88             // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
89             UnicodeString str(DOLLAR);
90             str += var;
91             return FormattedPlaceholder(str);
92         }
93         return result;
94     } else {
95         U_ASSERT(rand.isLiteral());
96         return formatLiteral(rand.asLiteral());
97     }
98 }
99 
100 // Resolves a function's options
resolveOptions(const Environment & env,const OptionMap & options,MessageContext & context,UErrorCode & status) const101 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
102     LocalPointer<UVector> optionsVector(createUVector(status));
103     if (U_FAILURE(status)) {
104         return {};
105     }
106     LocalPointer<ResolvedFunctionOption> resolvedOpt;
107     for (int i = 0; i < options.size(); i++) {
108         const Option& opt = options.getOption(i, status);
109         if (U_FAILURE(status)) {
110             return {};
111         }
112         const UnicodeString& k = opt.getName();
113         const Operand& v = opt.getValue();
114 
115         // Options are fully evaluated before calling the function
116         // Format the operand
117         FormattedPlaceholder rhsVal = formatOperand(env, v, context, status);
118         if (U_FAILURE(status)) {
119             return {};
120         }
121         if (!rhsVal.isFallback()) {
122             resolvedOpt.adoptInstead(create<ResolvedFunctionOption>(ResolvedFunctionOption(k, rhsVal.asFormattable()), status));
123             if (U_FAILURE(status)) {
124                 return {};
125             }
126             optionsVector->adoptElement(resolvedOpt.orphan(), status);
127         }
128     }
129 
130     return FunctionOptions(std::move(*optionsVector), status);
131 }
132 
133 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
evalFormatterCall(FormattedPlaceholder && argument,MessageContext & context,UErrorCode & status) const134 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument,
135                                                                        MessageContext& context,
136                                                                        UErrorCode& status) const {
137     if (U_FAILURE(status)) {
138         return {};
139     }
140 
141     // These cases should have been checked for already
142     U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
143 
144     const Formattable& toFormat = argument.asFormattable();
145     switch (toFormat.getType()) {
146     case UFMT_OBJECT: {
147         const FormattableObject* obj = toFormat.getObject(status);
148         U_ASSERT(U_SUCCESS(status));
149         U_ASSERT(obj != nullptr);
150         const UnicodeString& type = obj->tag();
151         FunctionName functionName;
152         if (!getDefaultFormatterNameByType(type, functionName)) {
153             // No formatter for this type -- follow default behavior
154             break;
155         }
156         return evalFormatterCall(functionName,
157                                  std::move(argument),
158                                  FunctionOptions(),
159                                  context,
160                                  status);
161     }
162     default: {
163         // TODO: The array case isn't handled yet; not sure whether it's desirable
164         // to have a default list formatter
165         break;
166     }
167     }
168     // No formatter for this type, or it's a primitive type (which will be formatted later)
169     // -- just return the argument itself
170     return std::move(argument);
171 }
172 
173 // Overload that dispatches on function name
evalFormatterCall(const FunctionName & functionName,FormattedPlaceholder && argument,FunctionOptions && options,MessageContext & context,UErrorCode & status) const174 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName,
175                                                                  FormattedPlaceholder&& argument,
176                                                                  FunctionOptions&& options,
177                                                                  MessageContext& context,
178                                                                  UErrorCode& status) const {
179     if (U_FAILURE(status)) {
180         return {};
181     }
182 
183     DynamicErrors& errs = context.getErrors();
184 
185     UnicodeString fallback(COLON);
186     fallback += functionName;
187     if (!argument.isNullOperand()) {
188         fallback = argument.fallback;
189     }
190 
191     if (isFormatter(functionName)) {
192         LocalPointer<Formatter> formatterImpl(getFormatter(functionName, status));
193         if (U_FAILURE(status)) {
194             if (status == U_MF_FORMATTING_ERROR) {
195                 errs.setFormattingError(functionName, status);
196                 status = U_ZERO_ERROR;
197                 return {};
198             }
199             if (status == U_MF_UNKNOWN_FUNCTION_ERROR) {
200                 errs.setUnknownFunction(functionName, status);
201                 status = U_ZERO_ERROR;
202                 return {};
203             }
204             // Other errors are non-recoverable
205             return {};
206         }
207         U_ASSERT(formatterImpl != nullptr);
208 
209         UErrorCode savedStatus = status;
210         FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status);
211         // Update errors
212         if (savedStatus != status) {
213             if (U_FAILURE(status)) {
214                 if (status == U_MF_OPERAND_MISMATCH_ERROR) {
215                     status = U_ZERO_ERROR;
216                     errs.setOperandMismatchError(functionName, status);
217                 } else {
218                     status = U_ZERO_ERROR;
219                     // Convey any error generated by the formatter
220                     // as a formatting error, except for operand mismatch errors
221                     errs.setFormattingError(functionName, status);
222                 }
223                 return FormattedPlaceholder(fallback);
224             } else {
225                 // Ignore warnings
226                 status = savedStatus;
227             }
228         }
229         // Ignore the output if any errors occurred
230         if (errs.hasFormattingError()) {
231             return FormattedPlaceholder(fallback);
232         }
233         return result;
234     }
235     // No formatter with this name -- set error
236     if (isSelector(functionName)) {
237         errs.setFormattingError(functionName, status);
238     } else {
239         errs.setUnknownFunction(functionName, status);
240     }
241     return FormattedPlaceholder(fallback);
242 }
243 
244 // Formats an expression using `globalEnv` for the values of variables
formatExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const245 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv,
246                                                                 const Expression& expr,
247                                                                 MessageContext& context,
248                                                                 UErrorCode &status) const {
249     if (U_FAILURE(status)) {
250         return {};
251     }
252 
253     const Operand& rand = expr.getOperand();
254     // Format the operand (formatOperand handles the case of a null operand)
255     FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status);
256 
257     // Don't call the function on error values
258     if (randVal.isFallback()) {
259         return randVal;
260     }
261 
262     if (!expr.isFunctionCall()) {
263         // Dispatch based on type of `randVal`
264         return evalFormatterCall(std::move(randVal),
265                                  context,
266                                  status);
267     } else {
268         const Operator* rator = expr.getOperator(status);
269         U_ASSERT(U_SUCCESS(status));
270         const FunctionName& functionName = rator->getFunctionName();
271         const OptionMap& options = rator->getOptionsInternal();
272         // Resolve the options
273         FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
274 
275         // Call the formatter function
276         // The fallback for a nullary function call is the function name
277         UnicodeString fallback;
278         if (rand.isNull()) {
279             fallback = UnicodeString(COLON);
280             fallback += functionName;
281         } else {
282             fallback = randVal.fallback;
283         }
284         return evalFormatterCall(functionName,
285                                  std::move(randVal),
286                                  std::move(resolvedOptions),
287                                  context,
288                                  status);
289     }
290 }
291 
292 // Formats each text and expression part of a pattern, appending the results to `result`
formatPattern(MessageContext & context,const Environment & globalEnv,const Pattern & pat,UErrorCode & status,UnicodeString & result) const293 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
294     CHECK_ERROR(status);
295 
296     for (int32_t i = 0; i < pat.numParts(); i++) {
297         const PatternPart& part = pat.getPart(i);
298         if (part.isText()) {
299             result += part.asText();
300         } else if (part.isMarkup()) {
301             // Markup is ignored
302         } else {
303 	      // Format the expression
304 	      FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status);
305 	      // Force full evaluation, e.g. applying default formatters to
306 	      // unformatted input (or formatting numbers as strings)
307               UnicodeString partResult = partVal.formatToString(locale, status);
308               result += partResult;
309               // Handle formatting errors. `formatToString()` can't take a context and thus can't
310               // register an error directly
311               if (status == U_MF_FORMATTING_ERROR) {
312                   status = U_ZERO_ERROR;
313                   // TODO: The name of the formatter that failed is unavailable.
314                   // Not ideal, but it's hard for `formatToString()`
315                   // to pass along more detailed diagnostics
316                   context.getErrors().setFormattingError(status);
317               }
318         }
319     }
320 }
321 
322 // ------------------------------------------------------
323 // Selection
324 
325 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
326 // `res` is a vector of ResolvedSelectors
resolveSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UVector & res) const327 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
328     CHECK_ERROR(status);
329     U_ASSERT(!dataModel.hasPattern());
330 
331     const Expression* selectors = dataModel.getSelectorsInternal();
332     // 1. Let res be a new empty list of resolved values that support selection.
333     // (Implicit, since `res` is an out-parameter)
334     // 2. For each expression exp of the message's selectors
335     for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
336         // 2i. Let rv be the resolved value of exp.
337         ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
338         if (rv.hasSelector()) {
339             // 2ii. If selection is supported for rv:
340             // (True if this code has been reached)
341         } else {
342             // 2iii. Else:
343             // Let nomatch be a resolved value for which selection always fails.
344             // Append nomatch as the last element of the list res.
345             // Emit a Selection Error.
346             // (Note: in this case, rv, being a fallback, serves as `nomatch`)
347             #if U_DEBUG
348             const DynamicErrors& err = context.getErrors();
349             U_ASSERT(err.hasError());
350             U_ASSERT(rv.argument().isFallback());
351             #endif
352         }
353         // 2ii(a). Append rv as the last element of the list res.
354         // (Also fulfills 2iii)
355         LocalPointer<ResolvedSelector> v(create<ResolvedSelector>(std::move(rv), status));
356         CHECK_ERROR(status);
357         res.adoptElement(v.orphan(), status);
358     }
359 }
360 
361 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
362 // `keys` and `matches` are vectors of strings
matchSelectorKeys(const UVector & keys,MessageContext & context,ResolvedSelector && rv,UVector & keysOut,UErrorCode & status) const363 void MessageFormatter::matchSelectorKeys(const UVector& keys,
364                                          MessageContext& context,
365 					 ResolvedSelector&& rv,
366 					 UVector& keysOut,
367 					 UErrorCode& status) const {
368     CHECK_ERROR(status);
369 
370     if (!rv.hasSelector()) {
371         // Return an empty list of matches
372         return;
373     }
374 
375     auto selectorImpl = rv.getSelector();
376     U_ASSERT(selectorImpl != nullptr);
377     UErrorCode savedStatus = status;
378 
379     // Convert `keys` to an array
380     int32_t keysLen = keys.size();
381     UnicodeString* keysArr = new UnicodeString[keysLen];
382     if (keysArr == nullptr) {
383         status = U_MEMORY_ALLOCATION_ERROR;
384         return;
385     }
386     for (int32_t i = 0; i < keysLen; i++) {
387         const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
388         U_ASSERT(k != nullptr);
389         keysArr[i] = *k;
390     }
391     LocalArray<UnicodeString> adoptedKeys(keysArr);
392 
393     // Create an array to hold the output
394     UnicodeString* prefsArr = new UnicodeString[keysLen];
395     if (prefsArr == nullptr) {
396         status = U_MEMORY_ALLOCATION_ERROR;
397         return;
398     }
399     LocalArray<UnicodeString> adoptedPrefs(prefsArr);
400     int32_t prefsLen = 0;
401 
402     // Call the selector
403     selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(),
404                             adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen,
405                             status);
406 
407     // Update errors
408     if (savedStatus != status) {
409         if (U_FAILURE(status)) {
410             status = U_ZERO_ERROR;
411             context.getErrors().setSelectorError(rv.getSelectorName(), status);
412         } else {
413             // Ignore warnings
414             status = savedStatus;
415         }
416     }
417 
418     CHECK_ERROR(status);
419 
420     // Copy the resulting keys (if there was no error)
421     keysOut.removeAllElements();
422     for (int32_t i = 0; i < prefsLen; i++) {
423         UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
424         if (k == nullptr) {
425             status = U_MEMORY_ALLOCATION_ERROR;
426             return;
427         }
428         keysOut.adoptElement(k, status);
429         CHECK_ERROR(status);
430     }
431 }
432 
433 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
434 // `res` is a vector of FormattedPlaceholders;
435 // `pref` is a vector of vectors of strings
resolvePreferences(MessageContext & context,UVector & res,UVector & pref,UErrorCode & status) const436 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
437     CHECK_ERROR(status);
438 
439     // 1. Let pref be a new empty list of lists of strings.
440     // (Implicit, since `pref` is an out-parameter)
441     UnicodeString ks;
442     LocalPointer<UnicodeString> ksP;
443     int32_t numVariants = dataModel.numVariants();
444     const Variant* variants = dataModel.getVariantsInternal();
445     // 2. For each index i in res
446     for (int32_t i = 0; i < res.size(); i++) {
447         // 2i. Let keys be a new empty list of strings.
448         LocalPointer<UVector> keys(createUVector(status));
449         CHECK_ERROR(status);
450         // 2ii. For each variant `var` of the message
451         for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
452             const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
453 
454             // Note: Here, `var` names the key list of `var`,
455             // not a Variant itself
456             const Key* var = selectorKeys.getKeysInternal();
457             // 2ii(a). Let `key` be the `var` key at position i.
458             U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
459             const Key& key = var[i];
460             // 2ii(b). If `key` is not the catch-all key '*'
461             if (!key.isWildcard()) {
462                 // 2ii(b)(a) Assert that key is a literal.
463                 // (Not needed)
464                 // 2ii(b)(b) Let `ks` be the resolved value of `key`.
465                 ks = key.asLiteral().unquoted();
466                 // 2ii(b)(c) Append `ks` as the last element of the list `keys`.
467                 ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
468                 CHECK_ERROR(status);
469                 keys->adoptElement(ksP.orphan(), status);
470             }
471         }
472         // 2iii. Let `rv` be the resolved value at index `i` of `res`.
473         U_ASSERT(i < res.size());
474         ResolvedSelector rv = std::move(*(static_cast<ResolvedSelector*>(res[i])));
475         // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
476         LocalPointer<UVector> matches(createUVector(status));
477         matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
478         // 2v. Append `matches` as the last element of the list `pref`
479         pref.adoptElement(matches.orphan(), status);
480     }
481 }
482 
483 // `v` is assumed to be a vector of strings
vectorFind(const UVector & v,const UnicodeString & k)484 static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
485     for (int32_t i = 0; i < v.size(); i++) {
486         if (*static_cast<UnicodeString*>(v[i]) == k) {
487             return i;
488         }
489     }
490     return -1;
491 }
492 
vectorContains(const UVector & v,const UnicodeString & k)493 static UBool vectorContains(const UVector& v, const UnicodeString& k) {
494     return (vectorFind(v, k) != -1);
495 }
496 
497 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
498 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
filterVariants(const UVector & pref,UVector & vars,UErrorCode & status) const499 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
500     const Variant* variants = dataModel.getVariantsInternal();
501 
502     // 1. Let `vars` be a new empty list of variants.
503     // (Not needed since `vars` is an out-parameter)
504     // 2. For each variant `var` of the message:
505     for (int32_t j = 0; j < dataModel.numVariants(); j++) {
506         const SelectorKeys& selectorKeys = variants[j].getKeys();
507         const Pattern& p = variants[j].getPattern();
508 
509         // Note: Here, `var` names the key list of `var`,
510         // not a Variant itself
511         const Key* var = selectorKeys.getKeysInternal();
512         // 2i. For each index `i` in `pref`:
513         bool noMatch = false;
514         for (int32_t i = 0; i < pref.size(); i++) {
515             // 2i(a). Let `key` be the `var` key at position `i`.
516             U_ASSERT(i < selectorKeys.len);
517             const Key& key = var[i];
518             // 2i(b). If key is the catch-all key '*':
519             if (key.isWildcard()) {
520                 // 2i(b)(a). Continue the inner loop on pref.
521                 continue;
522             }
523             // 2i(c). Assert that `key` is a literal.
524             // (Not needed)
525             // 2i(d). Let `ks` be the resolved value of `key`.
526             UnicodeString ks = key.asLiteral().unquoted();
527             // 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
528             const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
529             // 2i(f). If `matches` includes `ks`
530             if (vectorContains(matches, ks)) {
531                 // 2i(f)(a). Continue the inner loop on `pref`.
532                 continue;
533             }
534             // 2i(g). Else:
535             // 2i(g)(a). Continue the outer loop on message variants.
536             noMatch = true;
537             break;
538         }
539         if (!noMatch) {
540             // Append `var` as the last element of the list `vars`.
541 	    PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
542             CHECK_ERROR(status);
543             vars.adoptElement(tuple, status);
544         }
545     }
546 }
547 
548 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
549 // Leaves the preferred variant as element 0 in `sortable`
550 // Note: this sorts in-place, so `sortable` is just `vars`
551 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
sortVariants(const UVector & pref,UVector & vars,UErrorCode & status) const552 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
553     CHECK_ERROR(status);
554 
555 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
556     // 1. Let `sortable` be a new empty list of (integer, variant) tuples.
557     // (Not needed since `sortable` is an out-parameter)
558     // 2. For each variant `var` of `vars`
559     // 2i. Let tuple be a new tuple (-1, var).
560     // 2ii. Append `tuple` as the last element of the list `sortable`.
561 
562     // 3. Let `len` be the integer count of items in `pref`.
563     int32_t len = pref.size();
564     // 4. Let `i` be `len` - 1.
565     int32_t i = len - 1;
566     // 5. While i >= 0:
567     while (i >= 0) {
568         // 5i. Let `matches` be the list of strings at index `i` of `pref`.
569         U_ASSERT(pref[i] != nullptr);
570 	const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
571         // 5ii. Let `minpref` be the integer count of items in `matches`.
572         int32_t minpref = matches.size();
573         // 5iii. For each tuple `tuple` of `sortable`:
574         for (int32_t j = 0; j < vars.size(); j++) {
575             U_ASSERT(vars[j] != nullptr);
576             PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
577             // 5iii(a). Let matchpref be an integer with the value minpref.
578             int32_t matchpref = minpref;
579             // 5iii(b). Let `key` be the tuple variant key at position `i`.
580             const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
581             U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
582             const Key& key = tupleVariantKeys[i];
583             // 5iii(c) If `key` is not the catch-all key '*':
584             if (!key.isWildcard()) {
585                 // 5iii(c)(a). Assert that `key` is a literal.
586                 // (Not needed)
587                 // 5iii(c)(b). Let `ks` be the resolved value of `key`.
588                 UnicodeString ks = key.asLiteral().unquoted();
589                 // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
590                 matchpref = vectorFind(matches, ks);
591                 U_ASSERT(matchpref >= 0);
592             }
593             // 5iii(d) Set the `tuple` integer value as matchpref.
594             tuple.priority = matchpref;
595         }
596         // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
597         vars.sort(comparePrioritizedVariants, status);
598         CHECK_ERROR(status);
599         // 5v. Set `i` to be `i` - 1.
600         i--;
601     }
602     // The caller is responsible for steps 6 and 7
603     // 6. Let `var` be the `variant` element of the first element of `sortable`.
604     // 7. Select the pattern of `var`
605 }
606 
607 
608 // Evaluate the operand
resolveVariables(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const609 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
610     if (U_FAILURE(status)) {
611         return {};
612     }
613 
614     if (rand.isNull()) {
615         return ResolvedSelector(FormattedPlaceholder());
616     }
617 
618     if (rand.isLiteral()) {
619         return ResolvedSelector(formatLiteral(rand.asLiteral()));
620     }
621 
622     // Must be variable
623     const VariableName& var = rand.asVariable();
624     // Resolve the variable
625     if (env.has(var)) {
626         const Closure& referent = env.lookup(var);
627         // Resolve the referent
628         return resolveVariables(referent.getEnv(), referent.getExpr(), context, status);
629     }
630     // Either this is a global var or an unbound var --
631     // either way, it can't be bound to a function call.
632     // Check globals
633     FormattedPlaceholder val = evalArgument(var, context, status);
634     if (status == U_ILLEGAL_ARGUMENT_ERROR) {
635         status = U_ZERO_ERROR;
636         // Unresolved variable -- could be a previous warning. Nothing to resolve
637         U_ASSERT(context.getErrors().hasUnresolvedVariableError());
638         return ResolvedSelector(FormattedPlaceholder(var));
639     }
640     // Pass through other errors
641     return ResolvedSelector(std::move(val));
642 }
643 
644 // Evaluate the expression except for not performing the top-level function call
645 // (which is expected to be a selector, but may not be, in error cases)
resolveVariables(const Environment & env,const Expression & expr,MessageContext & context,UErrorCode & status) const646 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
647                                                     const Expression& expr,
648                                                     MessageContext& context,
649                                                     UErrorCode &status) const {
650     if (U_FAILURE(status)) {
651         return {};
652     }
653 
654     // Function call -- resolve the operand and options
655     if (expr.isFunctionCall()) {
656         const Operator* rator = expr.getOperator(status);
657         U_ASSERT(U_SUCCESS(status));
658         // Already checked that rator is non-reserved
659         const FunctionName& selectorName = rator->getFunctionName();
660         if (isSelector(selectorName)) {
661             auto selector = getSelector(context, selectorName, status);
662             if (U_SUCCESS(status)) {
663                 FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status);
664                 // Operand may be the null argument, but resolveVariables() handles that
665                 FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status);
666                 return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument));
667             }
668         } else if (isFormatter(selectorName)) {
669             context.getErrors().setSelectorError(selectorName, status);
670         } else {
671             context.getErrors().setUnknownFunction(selectorName, status);
672         }
673         // Non-selector used as selector; an error would have been recorded earlier
674         UnicodeString fallback(COLON);
675         fallback += selectorName;
676         if (!expr.getOperand().isNull()) {
677             fallback = formatOperand(env, expr.getOperand(), context, status).fallback;
678         }
679         return ResolvedSelector(FormattedPlaceholder(fallback));
680     } else {
681         // Might be a variable reference, so expand one more level of variable
682         return resolveVariables(env, expr.getOperand(), context, status);
683     }
684 }
685 
formatSelectorExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const686 ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
687     if (U_FAILURE(status)) {
688         return {};
689     }
690 
691     // Resolve expression to determine if it's a function call
692     ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
693 
694     DynamicErrors& err = context.getErrors();
695 
696     // If there is a selector, then `resolveVariables()` recorded it in the context
697     if (exprResult.hasSelector()) {
698         // Check if there was an error
699         if (exprResult.argument().isFallback()) {
700             // Use a null expression if it's a syntax or data model warning;
701             // create a valid (non-fallback) formatted placeholder from the
702             // fallback string otherwise
703             if (err.hasSyntaxError() || err.hasDataModelError()) {
704                 return ResolvedSelector(FormattedPlaceholder()); // Null operand
705             } else {
706                 return ResolvedSelector(exprResult.takeArgument());
707             }
708         }
709         return exprResult;
710     }
711 
712     // No selector was found; error should already have been set
713     U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError());
714     return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback));
715 }
716 
formatSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UnicodeString & result) const717 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
718     CHECK_ERROR(status);
719 
720     // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
721 
722     // Resolve Selectors
723     // res is a vector of FormattedPlaceholders
724     LocalPointer<UVector> res(createUVector(status));
725     CHECK_ERROR(status);
726     resolveSelectors(context, env, status, *res);
727 
728     // Resolve Preferences
729     // pref is a vector of vectors of strings
730     LocalPointer<UVector> pref(createUVector(status));
731     CHECK_ERROR(status);
732     resolvePreferences(context, *res, *pref, status);
733 
734     // Filter Variants
735     // vars is a vector of PrioritizedVariants
736     LocalPointer<UVector> vars(createUVector(status));
737     CHECK_ERROR(status);
738     filterVariants(*pref, *vars, status);
739 
740     // Sort Variants and select the final pattern
741     // Note: `sortable` in the spec is just `vars` here,
742     // which is sorted in-place
743     sortVariants(*pref, *vars, status);
744 
745     CHECK_ERROR(status);
746 
747     // 6. Let `var` be the `variant` element of the first element of `sortable`.
748     U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
749     const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
750     // 7. Select the pattern of `var`
751     const Pattern& pat = var.pat;
752 
753     // Format the pattern
754     formatPattern(context, env, pat, status, result);
755 }
756 
757 // Note: this is non-const due to the function registry being non-const, which is in turn
758 // due to the values (`FormatterFactory` objects in the map) having mutable state.
759 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
760 // state within the factory objects that represent custom formatters.
formatToString(const MessageArguments & arguments,UErrorCode & status)761 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
762     EMPTY_ON_ERROR(status);
763 
764     // Create a new environment that will store closures for all local variables
765     Environment* env = Environment::create(status);
766     // Create a new context with the given arguments and the `errors` structure
767     MessageContext context(arguments, *errors, status);
768 
769     // Check for unresolved variable errors
770     checkDeclarations(context, env, status);
771     LocalPointer<Environment> globalEnv(env);
772 
773     UnicodeString result;
774     if (dataModel.hasPattern()) {
775         formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
776     } else {
777         // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
778         // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
779         const DynamicErrors& err = context.getErrors();
780         if (err.hasSyntaxError() || err.hasDataModelError()) {
781             result += REPLACEMENT;
782         } else {
783             formatSelectors(context, *globalEnv, status, result);
784         }
785     }
786     // Update status according to all errors seen while formatting
787     if (signalErrors) {
788         context.checkErrors(status);
789     }
790     if (U_FAILURE(status)) {
791         result.remove();
792     }
793     return result;
794 }
795 
796 // ----------------------------------------
797 // Checking for resolution errors
798 
check(MessageContext & context,const Environment & localEnv,const OptionMap & options,UErrorCode & status) const799 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
800     // Check the RHS of each option
801     for (int32_t i = 0; i < options.size(); i++) {
802         const Option& opt = options.getOption(i, status);
803         CHECK_ERROR(status);
804         check(context, localEnv, opt.getValue(), status);
805     }
806 }
807 
check(MessageContext & context,const Environment & localEnv,const Operand & rand,UErrorCode & status) const808 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
809     // Nothing to check for literals
810     if (rand.isLiteral() || rand.isNull()) {
811         return;
812     }
813 
814     // Check that variable is in scope
815     const VariableName& var = rand.asVariable();
816     // Check local scope
817     if (localEnv.has(var)) {
818         return;
819     }
820     // Check global scope
821     context.getGlobal(var, status);
822     if (status == U_ILLEGAL_ARGUMENT_ERROR) {
823         status = U_ZERO_ERROR;
824         context.getErrors().setUnresolvedVariable(var, status);
825     }
826     // Either `var` is a global, or some other error occurred.
827     // Nothing more to do either way
828     return;
829 }
830 
check(MessageContext & context,const Environment & localEnv,const Expression & expr,UErrorCode & status) const831 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
832     // Check for unresolved variable errors
833     if (expr.isFunctionCall()) {
834         const Operator* rator = expr.getOperator(status);
835         U_ASSERT(U_SUCCESS(status));
836         const Operand& rand = expr.getOperand();
837         check(context, localEnv, rand, status);
838         check(context, localEnv, rator->getOptionsInternal(), status);
839     }
840 }
841 
842 // Check for resolution errors
checkDeclarations(MessageContext & context,Environment * & env,UErrorCode & status) const843 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
844     CHECK_ERROR(status);
845 
846     const Binding* decls = getDataModel().getLocalVariablesInternal();
847     U_ASSERT(env != nullptr && (decls != nullptr || getDataModel().bindingsLen == 0));
848 
849     for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
850         const Binding& decl = decls[i];
851         const Expression& rhs = decl.getValue();
852         check(context, *env, rhs, status);
853 
854         // Add a closure to the global environment,
855         // memoizing the value of localEnv up to this point
856 
857         // Add the LHS to the environment for checking the next declaration
858         env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
859         CHECK_ERROR(status);
860     }
861 }
862 } // namespace message2
863 
864 U_NAMESPACE_END
865 
866 #endif /* #if !UCONFIG_NO_MF2 */
867 
868 #endif /* #if !UCONFIG_NO_FORMATTING */
869