1 // © 2024 and later: Unicode, Inc. and others.
2
3 #include "unicode/utypes.h"
4
5 #if !UCONFIG_NO_FORMATTING
6
7 #if !UCONFIG_NO_MF2
8
9 #include <fstream>
10 #include <string>
11
12 #include "charstr.h"
13 #include "json-json.hpp"
14 #include "messageformat2test.h"
15 #include "messageformat2test_utils.h"
16
17 using namespace nlohmann;
18
19 using namespace icu::message2;
20
getExpectedRuntimeErrorFromString(const std::string & errorName)21 static UErrorCode getExpectedRuntimeErrorFromString(const std::string& errorName) {
22 if (errorName == "syntax-error") {
23 return U_MF_SYNTAX_ERROR;
24 }
25 if (errorName == "variant-key-mismatch") {
26 return U_MF_VARIANT_KEY_MISMATCH_ERROR;
27 }
28 if (errorName == "missing-fallback-variant") {
29 return U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
30 }
31 if (errorName == "missing-selector-annotation") {
32 return U_MF_MISSING_SELECTOR_ANNOTATION_ERROR;
33 }
34 if (errorName == "unresolved-variable") {
35 return U_MF_UNRESOLVED_VARIABLE_ERROR;
36 }
37 if (errorName == "bad-operand") {
38 return U_MF_OPERAND_MISMATCH_ERROR;
39 }
40 if (errorName == "bad-option") {
41 return U_MF_FORMATTING_ERROR;
42 }
43 if (errorName == "unknown-function") {
44 return U_MF_UNKNOWN_FUNCTION_ERROR;
45 }
46 if (errorName == "duplicate-declaration") {
47 return U_MF_DUPLICATE_DECLARATION_ERROR;
48 }
49 if (errorName == "duplicate-option-name") {
50 return U_MF_DUPLICATE_OPTION_NAME_ERROR;
51 }
52 if (errorName == "duplicate-variant") {
53 return U_MF_DUPLICATE_VARIANT_ERROR;
54 }
55 if (errorName == "bad-selector") {
56 return U_MF_SELECTOR_ERROR;
57 }
58 // Arbitrary default
59 return U_MF_FORMATTING_ERROR;
60 }
61
u_str(std::string s)62 static UnicodeString u_str(std::string s) {
63 return UnicodeString::fromUTF8(s);
64 }
65
successTest(const std::string & testName,const std::string & messageText)66 static TestCase::Builder successTest(const std::string& testName,
67 const std::string& messageText) {
68 return TestCase::Builder().setName(u_str(testName))
69 .setPattern(u_str(messageText))
70 .setExpectSuccess();
71 }
72
makeTestName(char * buffer,size_t size,std::string fileName,int32_t testNum)73 static void makeTestName(char* buffer, size_t size, std::string fileName, int32_t testNum) {
74 snprintf(buffer, size, "test from file: %s[%u]", fileName.c_str(), ++testNum);
75 }
76
setArguments(TestMessageFormat2 & t,TestCase::Builder & test,const std::vector<json> & params,UErrorCode & errorCode)77 static bool setArguments(TestMessageFormat2& t,
78 TestCase::Builder& test,
79 const std::vector<json>& params,
80 UErrorCode& errorCode) {
81 if (U_FAILURE(errorCode)) {
82 return true;
83 }
84 bool schemaError = false;
85 for (auto argsIter = params.begin(); argsIter != params.end(); ++argsIter) {
86 auto j_object = argsIter->template get<json::object_t>();
87 if (!j_object["name"].is_null()) {
88 const UnicodeString argName = u_str(j_object["name"].template get<std::string>());
89 if (!j_object["value"].is_null()) {
90 json val = j_object["value"];
91 // Determine type of value
92 if (val.is_number()) {
93 test.setArgument(argName,
94 val.template get<double>());
95 } else if (val.is_string()) {
96 test.setArgument(argName,
97 u_str(val.template get<std::string>()));
98 } else if (val.is_object()) {
99 // Dates: represent in tests as { "date" : timestamp }, to distinguish
100 // from number values
101 auto obj = val.template get<json::object_t>();
102 if (obj["date"].is_number()) {
103 test.setDateArgument(argName, val["date"]);
104 } else if (obj["decimal"].is_string()) {
105 // Decimal strings: represent in tests as { "decimal" : string },
106 // to distinguish from string values
107 test.setDecimalArgument(argName, obj["decimal"].template get<std::string>(), errorCode);
108 }
109 } else if (val.is_boolean() || val.is_null()) {
110 return false; // For now, boolean and null arguments are unsupported
111 }
112 } else {
113 schemaError = true;
114 break;
115 }
116 } else {
117 schemaError = true;
118 break;
119 }
120 }
121 if (schemaError) {
122 t.logln("Warning: test with missing 'name' or 'value' in params");
123 if (U_SUCCESS(errorCode)) {
124 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
125 }
126 }
127 return true;
128 }
129
130
131 /*
132 Test files are expected to follow the schema in:
133 https://github.com/unicode-org/conformance/blob/main/schema/message_fmt2/testgen_schema.json
134 as of https://github.com/unicode-org/conformance/pull/255
135 */
runValidTest(TestMessageFormat2 & icuTest,const std::string & testName,const std::string & defaultError,const json & j,IcuTestErrorCode & errorCode)136 static void runValidTest(TestMessageFormat2& icuTest,
137 const std::string& testName,
138 const std::string& defaultError,
139 const json& j,
140 IcuTestErrorCode& errorCode) {
141 auto j_object = j.template get<json::object_t>();
142 std::string messageText;
143
144 // src can be a single string or an array of strings
145 if (!j_object["src"].is_null()) {
146 if (j_object["src"].is_string()) {
147 messageText = j_object["src"].template get<std::string>();
148 } else {
149 auto strings = j_object["src"].template get<std::vector<std::string>>();
150 for (const auto &piece : strings) {
151 messageText += piece;
152 }
153 }
154 }
155 // Otherwise, it should probably be an error, but we just
156 // treat this as the empty string
157
158 TestCase::Builder test = successTest(testName, messageText);
159
160 // Certain ICU4J tests don't work yet in ICU4C.
161 // See ICU-22754
162 // ignoreCpp => only works in Java
163 if (!j_object["ignoreCpp"].is_null()) {
164 return;
165 }
166
167 if (!j_object["exp"].is_null()) {
168 // Set expected result if it's present
169 std::string expectedOutput = j["exp"].template get<std::string>();
170 test.setExpected(u_str(expectedOutput));
171 }
172
173 if (!j_object["locale"].is_null()) {
174 std::string localeStr = j_object["locale"].template get<std::string>();
175 test.setLocale(Locale(localeStr.c_str()));
176 }
177
178 if (!j_object["params"].is_null()) {
179 // `params` is an array of objects
180 auto params = j_object["params"].template get<std::vector<json>>();
181 if (!setArguments(icuTest, test, params, errorCode)) {
182 return; // Skip tests with unsupported arguments
183 }
184 }
185
186 bool expectedError = false;
187 if (!j_object["expErrors"].is_null()) {
188 // Map from string to string
189 auto errors = j_object["expErrors"].template get<std::vector<std::map<std::string, std::string>>>();
190 // We only emit the first error, so we just hope the first error
191 // in the list in the test is also the error we emit
192 U_ASSERT(errors.size() > 0);
193 std::string errorType = errors[0]["type"];
194 if (errorType.length() <= 0) {
195 errorType = errors[0]["name"];
196 }
197 // See TODO(options); ignore these tests for now
198 if (errorType == "bad-option") {
199 return;
200 }
201 test.setExpectedError(getExpectedRuntimeErrorFromString(errorType));
202 expectedError = true;
203 } else if (defaultError.length() > 0) {
204 test.setExpectedError(getExpectedRuntimeErrorFromString(defaultError));
205 expectedError = true;
206 }
207
208 // If no expected result and no error, then set the test builder to expect success
209 if (j_object["exp"].is_null() && !expectedError) {
210 test.setNoSyntaxError();
211 }
212
213 // Check for expected diagnostic values
214 int32_t lineNumber = 0;
215 int32_t offset = -1;
216 if (!j_object["char"].is_null()) {
217 offset = j_object["char"].template get<int32_t>();
218 }
219 if (!j_object["line"].is_null()) {
220 lineNumber = j_object["line"].template get<int32_t>();
221 }
222 if (offset != -1) {
223 test.setExpectedLineNumberAndOffset(lineNumber, offset);
224 }
225
226
227 TestCase t = test.build();
228 TestUtils::runTestCase(icuTest, t, errorCode);
229 }
230
231 // File name is relative to message2/ in the test data directory
runTestsFromJsonFile(TestMessageFormat2 & t,const std::string & fileName,IcuTestErrorCode & errorCode)232 static void runTestsFromJsonFile(TestMessageFormat2& t,
233 const std::string& fileName,
234 IcuTestErrorCode& errorCode) {
235 const char* testDataDirectory = IntlTest::getSharedTestData(errorCode);
236 CHECK_ERROR(errorCode);
237
238 std::string testFileName(testDataDirectory);
239 testFileName.append("message2/");
240 testFileName.append(fileName);
241 std::ifstream testFile(testFileName);
242 json data = json::parse(testFile);
243
244 int32_t testNum = 0;
245 char testName[100];
246
247 auto j_object = data.template get<json::object_t>();
248
249 // Some files have an expected error
250 std::string defaultError;
251 if (!j_object["defaultTestProperties"].is_null()
252 && !j_object["defaultTestProperties"]["expErrors"].is_null()) {
253 auto expErrors = j_object["defaultTestProperties"]["expErrors"];
254 // expErrors might also be a boolean, in which case we ignore it --
255 // so we have to check if it's an array
256 if (expErrors.is_array()) {
257 auto expErrorsObj = expErrors.template get<std::vector<json>>();
258 if (expErrorsObj.size() > 0) {
259 if (!expErrorsObj[0]["type"].is_null()) {
260 defaultError = expErrorsObj[0]["type"].template get<std::string>();
261 }
262 }
263 }
264 }
265
266 if (!j_object["tests"].is_null()) {
267 auto tests = j_object["tests"].template get<std::vector<json>>();
268 for (auto iter = tests.begin(); iter != tests.end(); ++iter) {
269 makeTestName(testName, sizeof(testName), fileName, ++testNum);
270 t.logln(testName);
271
272 t.logln(u_str(iter->dump()));
273
274 runValidTest(t, testName, defaultError, *iter, errorCode);
275 }
276 } else {
277 // Test doesn't follow schema -- probably an error
278 t.logln("Warning: no tests in filename: ");
279 t.logln(u_str(fileName));
280 (UErrorCode&) errorCode = U_ILLEGAL_ARGUMENT_ERROR;
281 }
282 }
283
jsonTestsFromFiles(IcuTestErrorCode & errorCode)284 void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
285 // Spec tests are fairly limited as the spec doesn't dictate formatter
286 // output. Tests under testdata/message2/spec are taken from
287 // https://github.com/unicode-org/message-format-wg/tree/main/test .
288 // Tests directly under testdata/message2 are specific to ICU4C.
289
290 // Do spec tests for syntax errors
291 runTestsFromJsonFile(*this, "spec/syntax-errors.json", errorCode);
292 runTestsFromJsonFile(*this, "unsupported-expressions.json", errorCode);
293 runTestsFromJsonFile(*this, "unsupported-statements.json", errorCode);
294 runTestsFromJsonFile(*this, "syntax-errors-reserved.json", errorCode);
295
296 // Do tests for data model errors
297 runTestsFromJsonFile(*this, "spec/data-model-errors.json", errorCode);
298 runTestsFromJsonFile(*this, "more-data-model-errors.json", errorCode);
299
300 // Do valid spec tests
301 runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
302
303 // Do valid function tests
304 runTestsFromJsonFile(*this, "spec/functions/date.json", errorCode);
305 runTestsFromJsonFile(*this, "spec/functions/datetime.json", errorCode);
306 runTestsFromJsonFile(*this, "spec/functions/integer.json", errorCode);
307 runTestsFromJsonFile(*this, "spec/functions/number.json", errorCode);
308 runTestsFromJsonFile(*this, "spec/functions/string.json", errorCode);
309 runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
310
311 // Other tests (non-spec)
312 runTestsFromJsonFile(*this, "more-functions.json", errorCode);
313 runTestsFromJsonFile(*this, "valid-tests.json", errorCode);
314 runTestsFromJsonFile(*this, "resolution-errors.json", errorCode);
315 runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode);
316 runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode);
317 runTestsFromJsonFile(*this, "runtime-errors.json", errorCode);
318
319 // Re: the expected output for the first test in this file:
320 // Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the
321 // overriding .input binding of $var2) but that's hard to achieve
322 // as so-called "implicit declarations" can only be detected after parsing, at which
323 // point the data model can't be modified.
324 // Probably this is going to change anyway so that any data model error gets replaced
325 // with a fallback for the whole message.
326 // The second test has a similar issue with the output.
327 runTestsFromJsonFile(*this, "tricky-declarations.json", errorCode);
328
329 // Markup is ignored when formatting to string
330 runTestsFromJsonFile(*this, "markup.json", errorCode);
331
332 // TODO(duplicates): currently the expected output is based on using
333 // the last definition of the duplicate-declared variable;
334 // perhaps it's better to remove all declarations for $foo before formatting.
335 // however if https://github.com/unicode-org/message-format-wg/pull/704 lands,
336 // it'll be a moot point since the output will be expected to be the fallback string
337 // (This applies to the expected output for all the U_DUPLICATE_DECLARATION_ERROR tests)
338 runTestsFromJsonFile(*this, "duplicate-declarations.json", errorCode);
339
340 // TODO(options):
341 // Bad options. The spec is unclear about this
342 // -- see https://github.com/unicode-org/message-format-wg/issues/738
343 // The current behavior is to set a U_MF_FORMATTING_ERROR for any invalid options.
344 runTestsFromJsonFile(*this, "invalid-options.json", errorCode);
345
346 runTestsFromJsonFile(*this, "syntax-errors-end-of-input.json", errorCode);
347 runTestsFromJsonFile(*this, "syntax-errors-diagnostics.json", errorCode);
348 runTestsFromJsonFile(*this, "invalid-number-literals-diagnostics.json", errorCode);
349 runTestsFromJsonFile(*this, "syntax-errors-diagnostics-multiline.json", errorCode);
350
351 // ICU4J tests
352 runTestsFromJsonFile(*this, "icu-test-functions.json", errorCode);
353 runTestsFromJsonFile(*this, "icu-parser-tests.json", errorCode);
354 runTestsFromJsonFile(*this, "icu-test-selectors.json", errorCode);
355 runTestsFromJsonFile(*this, "icu-test-previous-release.json", errorCode);
356 }
357
358 #endif /* #if !UCONFIG_NO_MF2 */
359
360 #endif /* #if !UCONFIG_NO_FORMATTING */
361