1 // __ _____ _____ _____
2 // __| | __| | | | JSON for Modern C++ (supporting code)
3 // | | |__ | | | | | | version 3.11.2
4 // |_____|_____|_____|_|___| https://github.com/nlohmann/json
5 //
6 // SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7 // SPDX-License-Identifier: MIT
8
9 #include "doctest_compatibility.h"
10
11 // for some reason including this after the json header leads to linker errors with VS 2017...
12 #include <locale>
13
14 #include <nlohmann/json.hpp>
15 using nlohmann::json;
16
17 #include <fstream>
18 #include <sstream>
19 #include <iostream>
20 #include <iomanip>
21 #include "make_test_data_available.hpp"
22
23 // this test suite uses static variables with non-trivial destructors
24 DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
25 DOCTEST_CLANG_SUPPRESS_WARNING("-Wexit-time-destructors")
26
27 namespace
28 {
29 extern size_t calls;
30 size_t calls = 0;
31
32 void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4);
33
check_utf8dump(bool success_expected,int byte1,int byte2=-1,int byte3=-1,int byte4=-1)34 void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
35 {
36 static std::string json_string;
37 json_string.clear();
38
39 CAPTURE(byte1)
40 CAPTURE(byte2)
41 CAPTURE(byte3)
42 CAPTURE(byte4)
43
44 json_string += std::string(1, static_cast<char>(byte1));
45
46 if (byte2 != -1)
47 {
48 json_string += std::string(1, static_cast<char>(byte2));
49 }
50
51 if (byte3 != -1)
52 {
53 json_string += std::string(1, static_cast<char>(byte3));
54 }
55
56 if (byte4 != -1)
57 {
58 json_string += std::string(1, static_cast<char>(byte4));
59 }
60
61 CAPTURE(json_string)
62
63 // store the string in a JSON value
64 static json j;
65 static json j2;
66 j = json_string;
67 j2 = "abc" + json_string + "xyz";
68
69 static std::string s_ignored;
70 static std::string s_ignored2;
71 static std::string s_ignored_ascii;
72 static std::string s_ignored2_ascii;
73 static std::string s_replaced;
74 static std::string s_replaced2;
75 static std::string s_replaced_ascii;
76 static std::string s_replaced2_ascii;
77
78 // dumping with ignore/replace must not throw in any case
79 s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore);
80 s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore);
81 s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore);
82 s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore);
83 s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace);
84 s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace);
85 s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace);
86 s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace);
87
88 if (success_expected)
89 {
90 static std::string s_strict;
91 // strict mode must not throw if success is expected
92 s_strict = j.dump();
93 // all dumps should agree on the string
94 CHECK(s_strict == s_ignored);
95 CHECK(s_strict == s_replaced);
96 }
97 else
98 {
99 // strict mode must throw if success is not expected
100 CHECK_THROWS_AS(j.dump(), json::type_error&);
101 // ignore and replace must create different dumps
102 CHECK(s_ignored != s_replaced);
103
104 // check that replace string contains a replacement character
105 CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos);
106 }
107
108 // check that prefix and suffix are preserved
109 CHECK(s_ignored2.substr(1, 3) == "abc");
110 CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz");
111 CHECK(s_ignored2_ascii.substr(1, 3) == "abc");
112 CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz");
113 CHECK(s_replaced2.substr(1, 3) == "abc");
114 CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz");
115 CHECK(s_replaced2_ascii.substr(1, 3) == "abc");
116 CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz");
117 }
118
119 void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4);
120
121 // create and check a JSON string with up to four UTF-8 bytes
check_utf8string(bool success_expected,int byte1,int byte2=-1,int byte3=-1,int byte4=-1)122 void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
123 {
124 if (++calls % 100000 == 0)
125 {
126 std::cout << calls << " of 1641521 UTF-8 strings checked" << std::endl;
127 }
128
129 static std::string json_string;
130 json_string = "\"";
131
132 CAPTURE(byte1)
133 json_string += std::string(1, static_cast<char>(byte1));
134
135 if (byte2 != -1)
136 {
137 CAPTURE(byte2)
138 json_string += std::string(1, static_cast<char>(byte2));
139 }
140
141 if (byte3 != -1)
142 {
143 CAPTURE(byte3)
144 json_string += std::string(1, static_cast<char>(byte3));
145 }
146
147 if (byte4 != -1)
148 {
149 CAPTURE(byte4)
150 json_string += std::string(1, static_cast<char>(byte4));
151 }
152
153 json_string += "\"";
154
155 CAPTURE(json_string)
156
157 json _;
158 if (success_expected)
159 {
160 CHECK_NOTHROW(_ = json::parse(json_string));
161 }
162 else
163 {
164 CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&);
165 }
166 }
167 } // namespace
168
skip()169 TEST_CASE("Unicode (3/5)" * doctest::skip())
170 {
171 SECTION("RFC 3629")
172 {
173 /*
174 RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
175 follows:
176
177 A UTF-8 string is a sequence of octets representing a sequence of UCS
178 characters. An octet sequence is valid UTF-8 only if it matches the
179 following syntax, which is derived from the rules for encoding UTF-8
180 and is expressed in the ABNF of [RFC2234].
181
182 UTF8-octets = *( UTF8-char )
183 UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
184 UTF8-1 = %x00-7F
185 UTF8-2 = %xC2-DF UTF8-tail
186 UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
187 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
188 UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
189 %xF4 %x80-8F 2( UTF8-tail )
190 UTF8-tail = %x80-BF
191 */
192
193 SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)")
194 {
195 SECTION("well-formed")
196 {
197 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
198 {
199 for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
200 {
201 for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
202 {
203 for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
204 {
205 check_utf8string(true, byte1, byte2, byte3, byte4);
206 check_utf8dump(true, byte1, byte2, byte3, byte4);
207 }
208 }
209 }
210 }
211 }
212
213 SECTION("ill-formed: missing second byte")
214 {
215 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
216 {
217 check_utf8string(false, byte1);
218 check_utf8dump(false, byte1);
219 }
220 }
221
222 SECTION("ill-formed: missing third byte")
223 {
224 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
225 {
226 for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
227 {
228 check_utf8string(false, byte1, byte2);
229 check_utf8dump(false, byte1, byte2);
230 }
231 }
232 }
233
234 SECTION("ill-formed: missing fourth byte")
235 {
236 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
237 {
238 for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
239 {
240 for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
241 {
242 check_utf8string(false, byte1, byte2, byte3);
243 check_utf8dump(false, byte1, byte2, byte3);
244 }
245 }
246 }
247 }
248
249 SECTION("ill-formed: wrong second byte")
250 {
251 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
252 {
253 for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
254 {
255 // skip correct second byte
256 if (0x90 <= byte2 && byte2 <= 0xBF)
257 {
258 continue;
259 }
260
261 for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
262 {
263 for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
264 {
265 check_utf8string(false, byte1, byte2, byte3, byte4);
266 check_utf8dump(false, byte1, byte2, byte3, byte4);
267 }
268 }
269 }
270 }
271 }
272
273 SECTION("ill-formed: wrong third byte")
274 {
275 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
276 {
277 for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
278 {
279 for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
280 {
281 // skip correct third byte
282 if (0x80 <= byte3 && byte3 <= 0xBF)
283 {
284 continue;
285 }
286
287 for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
288 {
289 check_utf8string(false, byte1, byte2, byte3, byte4);
290 check_utf8dump(false, byte1, byte2, byte3, byte4);
291 }
292 }
293 }
294 }
295 }
296
297 SECTION("ill-formed: wrong fourth byte")
298 {
299 for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
300 {
301 for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
302 {
303 for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
304 {
305 for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
306 {
307 // skip fourth second byte
308 if (0x80 <= byte3 && byte3 <= 0xBF)
309 {
310 continue;
311 }
312
313 check_utf8string(false, byte1, byte2, byte3, byte4);
314 check_utf8dump(false, byte1, byte2, byte3, byte4);
315 }
316 }
317 }
318 }
319 }
320 }
321 }
322 }
323
324 DOCTEST_CLANG_SUPPRESS_WARNING_POP
325