1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/string_escape.h"
6
7 #include <stddef.h>
8
9 #include "base/strings/string_util.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12
13 namespace base {
14
TEST(JSONStringEscapeTest,EscapeUTF8)15 TEST(JSONStringEscapeTest, EscapeUTF8) {
16 const struct {
17 const char* to_escape;
18 const char* escaped;
19 } cases[] = {
20 {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
21 {"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
22 {"b\x0f\x7f\xf0\xff!", // \xf0\xff is not a valid UTF-8 unit.
23 "b\\u000F\x7F\xEF\xBF\xBD\xEF\xBF\xBD!"},
24 {"c<>d", "c\\u003C>d"},
25 {"Hello\xE2\x80\xA8world", "Hello\\u2028world"}, // U+2028
26 {"\xE2\x80\xA9purple", "\\u2029purple"}, // U+2029
27 // Unicode non-characters.
28 {"\xEF\xB7\x90", "\xEF\xB7\x90"}, // U+FDD0
29 {"\xEF\xB7\x9F", "\xEF\xB7\x9F"}, // U+FDDF
30 {"\xEF\xB7\xAF", "\xEF\xB7\xAF"}, // U+FDEF
31 {"\xEF\xBF\xBE", "\xEF\xBF\xBE"}, // U+FFFE
32 {"\xEF\xBF\xBF", "\xEF\xBF\xBF"}, // U+FFFF
33 {"\xF0\x9F\xBF\xBE", "\xF0\x9F\xBF\xBE"}, // U+01FFFE
34 {"\xF0\x9F\xBF\xBF", "\xF0\x9F\xBF\xBF"}, // U+01FFFF
35 {"\xF0\xAF\xBF\xBE", "\xF0\xAF\xBF\xBE"}, // U+02FFFE
36 {"\xF0\xAF\xBF\xBF", "\xF0\xAF\xBF\xBF"}, // U+02FFFF
37 {"\xF0\xBF\xBF\xBE", "\xF0\xBF\xBF\xBE"}, // U+03FFFE
38 {"\xF0\xBF\xBF\xBF", "\xF0\xBF\xBF\xBF"}, // U+03FFFF
39 {"\xF1\x8F\xBF\xBE", "\xF1\x8F\xBF\xBE"}, // U+04FFFE
40 {"\xF1\x8F\xBF\xBF", "\xF1\x8F\xBF\xBF"}, // U+04FFFF
41 {"\xF1\x9F\xBF\xBE", "\xF1\x9F\xBF\xBE"}, // U+05FFFE
42 {"\xF1\x9F\xBF\xBF", "\xF1\x9F\xBF\xBF"}, // U+05FFFF
43 {"\xF1\xAF\xBF\xBE", "\xF1\xAF\xBF\xBE"}, // U+06FFFE
44 {"\xF1\xAF\xBF\xBF", "\xF1\xAF\xBF\xBF"}, // U+06FFFF
45 {"\xF1\xBF\xBF\xBE", "\xF1\xBF\xBF\xBE"}, // U+07FFFE
46 {"\xF1\xBF\xBF\xBF", "\xF1\xBF\xBF\xBF"}, // U+07FFFF
47 {"\xF2\x8F\xBF\xBE", "\xF2\x8F\xBF\xBE"}, // U+08FFFE
48 {"\xF2\x8F\xBF\xBF", "\xF2\x8F\xBF\xBF"}, // U+08FFFF
49 {"\xF2\x9F\xBF\xBE", "\xF2\x9F\xBF\xBE"}, // U+09FFFE
50 {"\xF2\x9F\xBF\xBF", "\xF2\x9F\xBF\xBF"}, // U+09FFFF
51 {"\xF2\xAF\xBF\xBE", "\xF2\xAF\xBF\xBE"}, // U+0AFFFE
52 {"\xF2\xAF\xBF\xBF", "\xF2\xAF\xBF\xBF"}, // U+0AFFFF
53 {"\xF2\xBF\xBF\xBE", "\xF2\xBF\xBF\xBE"}, // U+0BFFFE
54 {"\xF2\xBF\xBF\xBF", "\xF2\xBF\xBF\xBF"}, // U+0BFFFF
55 {"\xF3\x8F\xBF\xBE", "\xF3\x8F\xBF\xBE"}, // U+0CFFFE
56 {"\xF3\x8F\xBF\xBF", "\xF3\x8F\xBF\xBF"}, // U+0CFFFF
57 {"\xF3\x9F\xBF\xBE", "\xF3\x9F\xBF\xBE"}, // U+0DFFFE
58 {"\xF3\x9F\xBF\xBF", "\xF3\x9F\xBF\xBF"}, // U+0DFFFF
59 {"\xF3\xAF\xBF\xBE", "\xF3\xAF\xBF\xBE"}, // U+0EFFFE
60 {"\xF3\xAF\xBF\xBF", "\xF3\xAF\xBF\xBF"}, // U+0EFFFF
61 {"\xF3\xBF\xBF\xBE", "\xF3\xBF\xBF\xBE"}, // U+0FFFFE
62 {"\xF3\xBF\xBF\xBF", "\xF3\xBF\xBF\xBF"}, // U+0FFFFF
63 {"\xF4\x8F\xBF\xBE", "\xF4\x8F\xBF\xBE"}, // U+10FFFE
64 {"\xF4\x8F\xBF\xBF", "\xF4\x8F\xBF\xBF"}, // U+10FFFF
65 };
66
67 for (const auto& i : cases) {
68 const char* in_ptr = i.to_escape;
69 std::string in_str = in_ptr;
70
71 std::string out;
72 EscapeJSONString(in_ptr, false, &out);
73 EXPECT_EQ(std::string(i.escaped), out);
74 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
75
76 out.erase();
77 EscapeJSONString(in_str, false, &out);
78 EXPECT_EQ(std::string(i.escaped), out);
79 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
80
81 std::string fooout = GetQuotedJSONString(in_str);
82 EXPECT_EQ("\"" + std::string(i.escaped) + "\"", fooout);
83 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
84 }
85
86 std::string in = cases[0].to_escape;
87 std::string out;
88 EscapeJSONString(in, false, &out);
89 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
90
91 // test quoting
92 std::string out_quoted;
93 EscapeJSONString(in, true, &out_quoted);
94 EXPECT_EQ(out.length() + 2, out_quoted.length());
95 EXPECT_EQ(out_quoted.find(out), 1U);
96 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out_quoted));
97
98 // now try with a NULL in the string
99 std::string null_prepend = "test";
100 null_prepend.push_back(0);
101 in = null_prepend + in;
102 std::string expected = "test\\u0000";
103 expected += cases[0].escaped;
104 out.clear();
105 EscapeJSONString(in, false, &out);
106 EXPECT_EQ(expected, out);
107 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
108 }
109
TEST(JSONStringEscapeTest,EscapeUTF16)110 TEST(JSONStringEscapeTest, EscapeUTF16) {
111 const struct {
112 const wchar_t* to_escape;
113 const char* escaped;
114 } cases[] = {
115 {L"b\uffb1\u00ff", "b\xEF\xBE\xB1\xC3\xBF"},
116 {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
117 {L"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
118 {L"b\x0F\x7F\xF0\xFF!", "b\\u000F\x7F\xC3\xB0\xC3\xBF!"},
119 {L"c<>d", "c\\u003C>d"},
120 {L"Hello\u2028world", "Hello\\u2028world"},
121 {L"\u2029purple", "\\u2029purple"},
122 // Unicode non-characters.
123 {L"\uFDD0", "\xEF\xB7\x90"}, // U+FDD0
124 {L"\uFDDF", "\xEF\xB7\x9F"}, // U+FDDF
125 {L"\uFDEF", "\xEF\xB7\xAF"}, // U+FDEF
126 {L"\uFFFE", "\xEF\xBF\xBE"}, // U+FFFE
127 {L"\uFFFF", "\xEF\xBF\xBF"}, // U+FFFF
128 {L"\U0001FFFE", "\xF0\x9F\xBF\xBE"}, // U+01FFFE
129 {L"\U0001FFFF", "\xF0\x9F\xBF\xBF"}, // U+01FFFF
130 {L"\U0002FFFE", "\xF0\xAF\xBF\xBE"}, // U+02FFFE
131 {L"\U0002FFFF", "\xF0\xAF\xBF\xBF"}, // U+02FFFF
132 {L"\U0003FFFE", "\xF0\xBF\xBF\xBE"}, // U+03FFFE
133 {L"\U0003FFFF", "\xF0\xBF\xBF\xBF"}, // U+03FFFF
134 {L"\U0004FFFE", "\xF1\x8F\xBF\xBE"}, // U+04FFFE
135 {L"\U0004FFFF", "\xF1\x8F\xBF\xBF"}, // U+04FFFF
136 {L"\U0005FFFE", "\xF1\x9F\xBF\xBE"}, // U+05FFFE
137 {L"\U0005FFFF", "\xF1\x9F\xBF\xBF"}, // U+05FFFF
138 {L"\U0006FFFE", "\xF1\xAF\xBF\xBE"}, // U+06FFFE
139 {L"\U0006FFFF", "\xF1\xAF\xBF\xBF"}, // U+06FFFF
140 {L"\U0007FFFE", "\xF1\xBF\xBF\xBE"}, // U+07FFFE
141 {L"\U0007FFFF", "\xF1\xBF\xBF\xBF"}, // U+07FFFF
142 {L"\U0008FFFE", "\xF2\x8F\xBF\xBE"}, // U+08FFFE
143 {L"\U0008FFFF", "\xF2\x8F\xBF\xBF"}, // U+08FFFF
144 {L"\U0009FFFE", "\xF2\x9F\xBF\xBE"}, // U+09FFFE
145 {L"\U0009FFFF", "\xF2\x9F\xBF\xBF"}, // U+09FFFF
146 {L"\U000AFFFE", "\xF2\xAF\xBF\xBE"}, // U+0AFFFE
147 {L"\U000AFFFF", "\xF2\xAF\xBF\xBF"}, // U+0AFFFF
148 {L"\U000BFFFE", "\xF2\xBF\xBF\xBE"}, // U+0BFFFE
149 {L"\U000BFFFF", "\xF2\xBF\xBF\xBF"}, // U+0BFFFF
150 {L"\U000CFFFE", "\xF3\x8F\xBF\xBE"}, // U+0CFFFE
151 {L"\U000CFFFF", "\xF3\x8F\xBF\xBF"}, // U+0CFFFF
152 {L"\U000DFFFE", "\xF3\x9F\xBF\xBE"}, // U+0DFFFE
153 {L"\U000DFFFF", "\xF3\x9F\xBF\xBF"}, // U+0DFFFF
154 {L"\U000EFFFE", "\xF3\xAF\xBF\xBE"}, // U+0EFFFE
155 {L"\U000EFFFF", "\xF3\xAF\xBF\xBF"}, // U+0EFFFF
156 {L"\U000FFFFE", "\xF3\xBF\xBF\xBE"}, // U+0FFFFE
157 {L"\U000FFFFF", "\xF3\xBF\xBF\xBF"}, // U+0FFFFF
158 {L"\U0010FFFE", "\xF4\x8F\xBF\xBE"}, // U+10FFFE
159 {L"\U0010FFFF", "\xF4\x8F\xBF\xBF"}, // U+10FFFF
160 };
161
162 for (const auto& i : cases) {
163 std::u16string in = WideToUTF16(i.to_escape);
164
165 std::string out;
166 EscapeJSONString(in, false, &out);
167 EXPECT_EQ(std::string(i.escaped), out);
168 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
169
170 out = GetQuotedJSONString(in);
171 EXPECT_EQ("\"" + std::string(i.escaped) + "\"", out);
172 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
173 }
174
175 std::u16string in = WideToUTF16(cases[0].to_escape);
176 std::string out;
177 EscapeJSONString(in, false, &out);
178 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
179
180 // test quoting
181 std::string out_quoted;
182 EscapeJSONString(in, true, &out_quoted);
183 EXPECT_EQ(out.length() + 2, out_quoted.length());
184 EXPECT_EQ(out_quoted.find(out), 1U);
185 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
186
187 // now try with a NULL in the string
188 std::u16string null_prepend = u"test";
189 null_prepend.push_back(0);
190 in = null_prepend + in;
191 std::string expected = "test\\u0000";
192 expected += cases[0].escaped;
193 out.clear();
194 EscapeJSONString(in, false, &out);
195 EXPECT_EQ(expected, out);
196 EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
197 }
198
TEST(JSONStringEscapeTest,EscapeUTF16OutsideBMP)199 TEST(JSONStringEscapeTest, EscapeUTF16OutsideBMP) {
200 {
201 // {a, U+10300, !}, SMP.
202 std::u16string test;
203 test.push_back('a');
204 test.push_back(0xD800);
205 test.push_back(0xDF00);
206 test.push_back('!');
207 std::string actual;
208 EXPECT_TRUE(EscapeJSONString(test, false, &actual));
209 EXPECT_EQ("a\xF0\x90\x8C\x80!", actual);
210 }
211 {
212 // {U+20021, U+2002B}, SIP.
213 std::u16string test;
214 test.push_back(0xD840);
215 test.push_back(0xDC21);
216 test.push_back(0xD840);
217 test.push_back(0xDC2B);
218 std::string actual;
219 EXPECT_TRUE(EscapeJSONString(test, false, &actual));
220 EXPECT_EQ("\xF0\xA0\x80\xA1\xF0\xA0\x80\xAB", actual);
221 }
222 {
223 // {?, U+D800, @}, lone surrogate.
224 std::u16string test;
225 test.push_back('?');
226 test.push_back(0xD800);
227 test.push_back('@');
228 std::string actual;
229 EXPECT_FALSE(EscapeJSONString(test, false, &actual));
230 EXPECT_EQ("?\xEF\xBF\xBD@", actual);
231 }
232 }
233
TEST(JSONStringEscapeTest,EscapeBytes)234 TEST(JSONStringEscapeTest, EscapeBytes) {
235 const struct {
236 const char* to_escape;
237 const char* escaped;
238 } cases[] = {
239 {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
240 {"\xe5\xc4\x4f\x05\xb6\xfd", "\\u00E5\\u00C4O\\u0005\\u00B6\\u00FD"},
241 };
242
243 for (const auto& i : cases) {
244 std::string in = std::string(i.to_escape);
245 EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in));
246
247 EXPECT_EQ(std::string(i.escaped),
248 EscapeBytesAsInvalidJSONString(in, false));
249 EXPECT_EQ("\"" + std::string(i.escaped) + "\"",
250 EscapeBytesAsInvalidJSONString(in, true));
251 }
252
253 const char kEmbedNull[] = { '\xab', '\x39', '\0', '\x9f', '\xab' };
254 std::string in(kEmbedNull, std::size(kEmbedNull));
255 EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in));
256 EXPECT_EQ(std::string("\\u00AB9\\u0000\\u009F\\u00AB"),
257 EscapeBytesAsInvalidJSONString(in, false));
258 }
259
260 } // namespace base
261