• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2024 and later: Unicode, Inc. and others.
2 
3 #include "unicode/utypes.h"
4 
5 #if !UCONFIG_NO_FORMATTING
6 
7 #if !UCONFIG_NO_MF2
8 
9 #include <fstream>
10 #include <string>
11 
12 #include "charstr.h"
13 #include "json-json.hpp"
14 #include "messageformat2test.h"
15 #include "messageformat2test_utils.h"
16 
17 using namespace nlohmann;
18 
19 using namespace icu::message2;
20 
getExpectedRuntimeErrorFromString(const std::string & errorName)21 static UErrorCode getExpectedRuntimeErrorFromString(const std::string& errorName) {
22     if (errorName == "syntax-error") {
23         return U_MF_SYNTAX_ERROR;
24     }
25     if (errorName == "variant-key-mismatch") {
26         return U_MF_VARIANT_KEY_MISMATCH_ERROR;
27     }
28     if (errorName == "missing-fallback-variant") {
29         return U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
30     }
31     if (errorName == "missing-selector-annotation") {
32         return U_MF_MISSING_SELECTOR_ANNOTATION_ERROR;
33     }
34     if (errorName == "unresolved-variable") {
35         return U_MF_UNRESOLVED_VARIABLE_ERROR;
36     }
37     if (errorName == "bad-operand") {
38         return U_MF_OPERAND_MISMATCH_ERROR;
39     }
40     if (errorName == "bad-option") {
41         return U_MF_FORMATTING_ERROR;
42     }
43     if (errorName == "unknown-function") {
44         return U_MF_UNKNOWN_FUNCTION_ERROR;
45     }
46     if (errorName == "duplicate-declaration") {
47         return U_MF_DUPLICATE_DECLARATION_ERROR;
48     }
49     if (errorName == "duplicate-option-name") {
50         return U_MF_DUPLICATE_OPTION_NAME_ERROR;
51     }
52     if (errorName == "duplicate-variant") {
53         return U_MF_DUPLICATE_VARIANT_ERROR;
54     }
55     if (errorName == "bad-selector") {
56         return U_MF_SELECTOR_ERROR;
57     }
58 // Arbitrary default
59     return U_MF_FORMATTING_ERROR;
60 }
61 
u_str(std::string s)62 static UnicodeString u_str(std::string s) {
63     return UnicodeString::fromUTF8(s);
64 }
65 
successTest(const std::string & testName,const std::string & messageText)66 static TestCase::Builder successTest(const std::string& testName,
67                                      const std::string& messageText) {
68     return TestCase::Builder().setName(u_str(testName))
69         .setPattern(u_str(messageText))
70         .setExpectSuccess();
71 }
72 
makeTestName(char * buffer,size_t size,std::string fileName,int32_t testNum)73 static void makeTestName(char* buffer, size_t size, std::string fileName, int32_t testNum) {
74     snprintf(buffer, size, "test from file: %s[%u]", fileName.c_str(), ++testNum);
75 }
76 
setArguments(TestMessageFormat2 & t,TestCase::Builder & test,const std::vector<json> & params,UErrorCode & errorCode)77 static bool setArguments(TestMessageFormat2& t,
78                          TestCase::Builder& test,
79                          const std::vector<json>& params,
80                          UErrorCode& errorCode) {
81     if (U_FAILURE(errorCode)) {
82         return true;
83     }
84     bool schemaError = false;
85     for (auto argsIter = params.begin(); argsIter != params.end(); ++argsIter) {
86         auto j_object = argsIter->template get<json::object_t>();
87         if (!j_object["name"].is_null()) {
88             const UnicodeString argName = u_str(j_object["name"].template get<std::string>());
89             if (!j_object["value"].is_null()) {
90                 json val = j_object["value"];
91                 // Determine type of value
92                 if (val.is_number()) {
93                     test.setArgument(argName,
94                                      val.template get<double>());
95                 } else if (val.is_string()) {
96                     test.setArgument(argName,
97                                      u_str(val.template get<std::string>()));
98                 } else if (val.is_object()) {
99                     // Dates: represent in tests as { "date" : timestamp }, to distinguish
100                     // from number values
101                     auto obj = val.template get<json::object_t>();
102                     if (obj["date"].is_number()) {
103                         test.setDateArgument(argName, val["date"]);
104                     } else if (obj["decimal"].is_string()) {
105                         // Decimal strings: represent in tests as { "decimal" : string },
106                         // to distinguish from string values
107                         test.setDecimalArgument(argName, obj["decimal"].template get<std::string>(), errorCode);
108                     }
109                 } else if (val.is_boolean() || val.is_null()) {
110                     return false; // For now, boolean and null arguments are unsupported
111                 }
112             } else {
113                schemaError = true;
114                break;
115             }
116         } else {
117             schemaError = true;
118             break;
119         }
120     }
121     if (schemaError) {
122         t.logln("Warning: test with missing 'name' or 'value' in params");
123         if (U_SUCCESS(errorCode)) {
124             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
125         }
126     }
127     return true;
128 }
129 
130 
131 /*
132   Test files are expected to follow the schema in:
133   https://github.com/unicode-org/conformance/blob/main/schema/message_fmt2/testgen_schema.json
134   as of https://github.com/unicode-org/conformance/pull/255
135 */
runValidTest(TestMessageFormat2 & icuTest,const std::string & testName,const std::string & defaultError,const json & j,IcuTestErrorCode & errorCode)136 static void runValidTest(TestMessageFormat2& icuTest,
137                          const std::string& testName,
138                          const std::string& defaultError,
139                          const json& j,
140                          IcuTestErrorCode& errorCode) {
141     auto j_object = j.template get<json::object_t>();
142     std::string messageText;
143 
144     // src can be a single string or an array of strings
145     if (!j_object["src"].is_null()) {
146         if (j_object["src"].is_string()) {
147             messageText = j_object["src"].template get<std::string>();
148         } else {
149             auto strings = j_object["src"].template get<std::vector<std::string>>();
150             for (const auto &piece : strings) {
151                 messageText += piece;
152             }
153         }
154     }
155     // Otherwise, it should probably be an error, but we just
156     // treat this as the empty string
157 
158     TestCase::Builder test = successTest(testName, messageText);
159 
160     // Certain ICU4J tests don't work yet in ICU4C.
161     // See ICU-22754
162     // ignoreCpp => only works in Java
163     if (!j_object["ignoreCpp"].is_null()) {
164         return;
165     }
166 
167     if (!j_object["exp"].is_null()) {
168         // Set expected result if it's present
169         std::string expectedOutput = j["exp"].template get<std::string>();
170         test.setExpected(u_str(expectedOutput));
171     }
172 
173     if (!j_object["locale"].is_null()) {
174         std::string localeStr = j_object["locale"].template get<std::string>();
175         test.setLocale(Locale(localeStr.c_str()));
176     }
177 
178     if (!j_object["params"].is_null()) {
179         // `params` is an array of objects
180         auto params = j_object["params"].template get<std::vector<json>>();
181         if (!setArguments(icuTest, test, params, errorCode)) {
182             return; // Skip tests with unsupported arguments
183         }
184     }
185 
186     bool expectedError = false;
187     if (!j_object["expErrors"].is_null()) {
188         // Map from string to string
189         auto errors = j_object["expErrors"].template get<std::vector<std::map<std::string, std::string>>>();
190         // We only emit the first error, so we just hope the first error
191         // in the list in the test is also the error we emit
192         U_ASSERT(errors.size() > 0);
193         std::string errorType = errors[0]["type"];
194         if (errorType.length() <= 0) {
195             errorType = errors[0]["name"];
196         }
197         // See TODO(options); ignore these tests for now
198         if (errorType == "bad-option") {
199             return;
200         }
201         test.setExpectedError(getExpectedRuntimeErrorFromString(errorType));
202         expectedError = true;
203     } else if (defaultError.length() > 0) {
204         test.setExpectedError(getExpectedRuntimeErrorFromString(defaultError));
205         expectedError = true;
206     }
207 
208     // If no expected result and no error, then set the test builder to expect success
209     if (j_object["exp"].is_null() && !expectedError) {
210         test.setNoSyntaxError();
211     }
212 
213     // Check for expected diagnostic values
214     int32_t lineNumber = 0;
215     int32_t offset = -1;
216     if (!j_object["char"].is_null()) {
217         offset = j_object["char"].template get<int32_t>();
218     }
219     if (!j_object["line"].is_null()) {
220         lineNumber = j_object["line"].template get<int32_t>();
221     }
222     if (offset != -1) {
223         test.setExpectedLineNumberAndOffset(lineNumber, offset);
224     }
225 
226 
227     TestCase t = test.build();
228     TestUtils::runTestCase(icuTest, t, errorCode);
229 }
230 
231 // File name is relative to message2/ in the test data directory
runTestsFromJsonFile(TestMessageFormat2 & t,const std::string & fileName,IcuTestErrorCode & errorCode)232 static void runTestsFromJsonFile(TestMessageFormat2& t,
233                                       const std::string& fileName,
234                                       IcuTestErrorCode& errorCode) {
235     const char* testDataDirectory = IntlTest::getSharedTestData(errorCode);
236     CHECK_ERROR(errorCode);
237 
238     std::string testFileName(testDataDirectory);
239     testFileName.append("message2/");
240     testFileName.append(fileName);
241     std::ifstream testFile(testFileName);
242     json data = json::parse(testFile);
243 
244     int32_t testNum = 0;
245     char testName[100];
246 
247     auto j_object = data.template get<json::object_t>();
248 
249     // Some files have an expected error
250     std::string defaultError;
251     if (!j_object["defaultTestProperties"].is_null()
252         && !j_object["defaultTestProperties"]["expErrors"].is_null()) {
253         auto expErrors = j_object["defaultTestProperties"]["expErrors"];
254         // expErrors might also be a boolean, in which case we ignore it --
255         // so we have to check if it's an array
256         if (expErrors.is_array()) {
257             auto expErrorsObj = expErrors.template get<std::vector<json>>();
258             if (expErrorsObj.size() > 0) {
259                 if (!expErrorsObj[0]["type"].is_null()) {
260                     defaultError = expErrorsObj[0]["type"].template get<std::string>();
261                 }
262             }
263         }
264     }
265 
266     if (!j_object["tests"].is_null()) {
267         auto tests = j_object["tests"].template get<std::vector<json>>();
268         for (auto iter = tests.begin(); iter != tests.end(); ++iter) {
269             makeTestName(testName, sizeof(testName), fileName, ++testNum);
270             t.logln(testName);
271 
272             t.logln(u_str(iter->dump()));
273 
274             runValidTest(t, testName, defaultError, *iter, errorCode);
275         }
276     } else {
277         // Test doesn't follow schema -- probably an error
278         t.logln("Warning: no tests in filename: ");
279         t.logln(u_str(fileName));
280         (UErrorCode&) errorCode = U_ILLEGAL_ARGUMENT_ERROR;
281     }
282 }
283 
jsonTestsFromFiles(IcuTestErrorCode & errorCode)284 void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
285     // Spec tests are fairly limited as the spec doesn't dictate formatter
286     // output. Tests under testdata/message2/spec are taken from
287     // https://github.com/unicode-org/message-format-wg/tree/main/test .
288     // Tests directly under testdata/message2 are specific to ICU4C.
289 
290     // Do spec tests for syntax errors
291     runTestsFromJsonFile(*this, "spec/syntax-errors.json", errorCode);
292     runTestsFromJsonFile(*this, "unsupported-expressions.json", errorCode);
293     runTestsFromJsonFile(*this, "unsupported-statements.json", errorCode);
294     runTestsFromJsonFile(*this, "syntax-errors-reserved.json", errorCode);
295 
296     // Do tests for data model errors
297     runTestsFromJsonFile(*this, "spec/data-model-errors.json", errorCode);
298     runTestsFromJsonFile(*this, "more-data-model-errors.json", errorCode);
299 
300     // Do valid spec tests
301     runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
302 
303     // Do valid function tests
304     runTestsFromJsonFile(*this, "spec/functions/date.json", errorCode);
305     runTestsFromJsonFile(*this, "spec/functions/datetime.json", errorCode);
306     runTestsFromJsonFile(*this, "spec/functions/integer.json", errorCode);
307     runTestsFromJsonFile(*this, "spec/functions/number.json", errorCode);
308     runTestsFromJsonFile(*this, "spec/functions/string.json", errorCode);
309     runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
310 
311     // Other tests (non-spec)
312     runTestsFromJsonFile(*this, "more-functions.json", errorCode);
313     runTestsFromJsonFile(*this, "valid-tests.json", errorCode);
314     runTestsFromJsonFile(*this, "resolution-errors.json", errorCode);
315     runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode);
316     runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode);
317     runTestsFromJsonFile(*this, "runtime-errors.json", errorCode);
318 
319     // Re: the expected output for the first test in this file:
320     // Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the
321     // overriding .input binding of $var2) but that's hard to achieve
322     // as so-called "implicit declarations" can only be detected after parsing, at which
323     // point the data model can't be modified.
324     // Probably this is going to change anyway so that any data model error gets replaced
325     // with a fallback for the whole message.
326     // The second test has a similar issue with the output.
327     runTestsFromJsonFile(*this, "tricky-declarations.json", errorCode);
328 
329     // Markup is ignored when formatting to string
330     runTestsFromJsonFile(*this, "markup.json", errorCode);
331 
332     // TODO(duplicates): currently the expected output is based on using
333     // the last definition of the duplicate-declared variable;
334     // perhaps it's better to remove all declarations for $foo before formatting.
335     // however if https://github.com/unicode-org/message-format-wg/pull/704 lands,
336     // it'll be a moot point since the output will be expected to be the fallback string
337     // (This applies to the expected output for all the U_DUPLICATE_DECLARATION_ERROR tests)
338     runTestsFromJsonFile(*this, "duplicate-declarations.json", errorCode);
339 
340     // TODO(options):
341     // Bad options. The spec is unclear about this
342     // -- see https://github.com/unicode-org/message-format-wg/issues/738
343     // The current behavior is to set a U_MF_FORMATTING_ERROR for any invalid options.
344     runTestsFromJsonFile(*this, "invalid-options.json", errorCode);
345 
346     runTestsFromJsonFile(*this, "syntax-errors-end-of-input.json", errorCode);
347     runTestsFromJsonFile(*this, "syntax-errors-diagnostics.json", errorCode);
348     runTestsFromJsonFile(*this, "invalid-number-literals-diagnostics.json", errorCode);
349     runTestsFromJsonFile(*this, "syntax-errors-diagnostics-multiline.json", errorCode);
350 
351     // ICU4J tests
352     runTestsFromJsonFile(*this, "icu-test-functions.json", errorCode);
353     runTestsFromJsonFile(*this, "icu-parser-tests.json", errorCode);
354     runTestsFromJsonFile(*this, "icu-test-selectors.json", errorCode);
355     runTestsFromJsonFile(*this, "icu-test-previous-release.json", errorCode);
356 }
357 
358 #endif /* #if !UCONFIG_NO_MF2 */
359 
360 #endif /* #if !UCONFIG_NO_FORMATTING */
361