• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/string_escape.h"
6 
7 #include <stddef.h>
8 
9 #include "base/strings/string_util.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 
13 namespace base {
14 
TEST(JSONStringEscapeTest,EscapeUTF8)15 TEST(JSONStringEscapeTest, EscapeUTF8) {
16   const struct {
17     const char* to_escape;
18     const char* escaped;
19   } cases[] = {
20       {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
21       {"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
22       {"b\x0f\x7f\xf0\xff!",  // \xf0\xff is not a valid UTF-8 unit.
23        "b\\u000F\x7F\xEF\xBF\xBD\xEF\xBF\xBD!"},
24       {"c<>d", "c\\u003C>d"},
25       {"Hello\xE2\x80\xA8world", "Hello\\u2028world"},  // U+2028
26       {"\xE2\x80\xA9purple", "\\u2029purple"},          // U+2029
27       // Unicode non-characters.
28       {"\xEF\xB7\x90", "\xEF\xB7\x90"},          // U+FDD0
29       {"\xEF\xB7\x9F", "\xEF\xB7\x9F"},          // U+FDDF
30       {"\xEF\xB7\xAF", "\xEF\xB7\xAF"},          // U+FDEF
31       {"\xEF\xBF\xBE", "\xEF\xBF\xBE"},          // U+FFFE
32       {"\xEF\xBF\xBF", "\xEF\xBF\xBF"},          // U+FFFF
33       {"\xF0\x9F\xBF\xBE", "\xF0\x9F\xBF\xBE"},  // U+01FFFE
34       {"\xF0\x9F\xBF\xBF", "\xF0\x9F\xBF\xBF"},  // U+01FFFF
35       {"\xF0\xAF\xBF\xBE", "\xF0\xAF\xBF\xBE"},  // U+02FFFE
36       {"\xF0\xAF\xBF\xBF", "\xF0\xAF\xBF\xBF"},  // U+02FFFF
37       {"\xF0\xBF\xBF\xBE", "\xF0\xBF\xBF\xBE"},  // U+03FFFE
38       {"\xF0\xBF\xBF\xBF", "\xF0\xBF\xBF\xBF"},  // U+03FFFF
39       {"\xF1\x8F\xBF\xBE", "\xF1\x8F\xBF\xBE"},  // U+04FFFE
40       {"\xF1\x8F\xBF\xBF", "\xF1\x8F\xBF\xBF"},  // U+04FFFF
41       {"\xF1\x9F\xBF\xBE", "\xF1\x9F\xBF\xBE"},  // U+05FFFE
42       {"\xF1\x9F\xBF\xBF", "\xF1\x9F\xBF\xBF"},  // U+05FFFF
43       {"\xF1\xAF\xBF\xBE", "\xF1\xAF\xBF\xBE"},  // U+06FFFE
44       {"\xF1\xAF\xBF\xBF", "\xF1\xAF\xBF\xBF"},  // U+06FFFF
45       {"\xF1\xBF\xBF\xBE", "\xF1\xBF\xBF\xBE"},  // U+07FFFE
46       {"\xF1\xBF\xBF\xBF", "\xF1\xBF\xBF\xBF"},  // U+07FFFF
47       {"\xF2\x8F\xBF\xBE", "\xF2\x8F\xBF\xBE"},  // U+08FFFE
48       {"\xF2\x8F\xBF\xBF", "\xF2\x8F\xBF\xBF"},  // U+08FFFF
49       {"\xF2\x9F\xBF\xBE", "\xF2\x9F\xBF\xBE"},  // U+09FFFE
50       {"\xF2\x9F\xBF\xBF", "\xF2\x9F\xBF\xBF"},  // U+09FFFF
51       {"\xF2\xAF\xBF\xBE", "\xF2\xAF\xBF\xBE"},  // U+0AFFFE
52       {"\xF2\xAF\xBF\xBF", "\xF2\xAF\xBF\xBF"},  // U+0AFFFF
53       {"\xF2\xBF\xBF\xBE", "\xF2\xBF\xBF\xBE"},  // U+0BFFFE
54       {"\xF2\xBF\xBF\xBF", "\xF2\xBF\xBF\xBF"},  // U+0BFFFF
55       {"\xF3\x8F\xBF\xBE", "\xF3\x8F\xBF\xBE"},  // U+0CFFFE
56       {"\xF3\x8F\xBF\xBF", "\xF3\x8F\xBF\xBF"},  // U+0CFFFF
57       {"\xF3\x9F\xBF\xBE", "\xF3\x9F\xBF\xBE"},  // U+0DFFFE
58       {"\xF3\x9F\xBF\xBF", "\xF3\x9F\xBF\xBF"},  // U+0DFFFF
59       {"\xF3\xAF\xBF\xBE", "\xF3\xAF\xBF\xBE"},  // U+0EFFFE
60       {"\xF3\xAF\xBF\xBF", "\xF3\xAF\xBF\xBF"},  // U+0EFFFF
61       {"\xF3\xBF\xBF\xBE", "\xF3\xBF\xBF\xBE"},  // U+0FFFFE
62       {"\xF3\xBF\xBF\xBF", "\xF3\xBF\xBF\xBF"},  // U+0FFFFF
63       {"\xF4\x8F\xBF\xBE", "\xF4\x8F\xBF\xBE"},  // U+10FFFE
64       {"\xF4\x8F\xBF\xBF", "\xF4\x8F\xBF\xBF"},  // U+10FFFF
65   };
66 
67   for (const auto& i : cases) {
68     const char* in_ptr = i.to_escape;
69     std::string in_str = in_ptr;
70 
71     std::string out;
72     EscapeJSONString(in_ptr, false, &out);
73     EXPECT_EQ(std::string(i.escaped), out);
74     EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
75 
76     out.erase();
77     EscapeJSONString(in_str, false, &out);
78     EXPECT_EQ(std::string(i.escaped), out);
79     EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
80 
81     std::string fooout = GetQuotedJSONString(in_str);
82     EXPECT_EQ("\"" + std::string(i.escaped) + "\"", fooout);
83     EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
84   }
85 
86   std::string in = cases[0].to_escape;
87   std::string out;
88   EscapeJSONString(in, false, &out);
89   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
90 
91   // test quoting
92   std::string out_quoted;
93   EscapeJSONString(in, true, &out_quoted);
94   EXPECT_EQ(out.length() + 2, out_quoted.length());
95   EXPECT_EQ(out_quoted.find(out), 1U);
96   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out_quoted));
97 
98   // now try with a NULL in the string
99   std::string null_prepend = "test";
100   null_prepend.push_back(0);
101   in = null_prepend + in;
102   std::string expected = "test\\u0000";
103   expected += cases[0].escaped;
104   out.clear();
105   EscapeJSONString(in, false, &out);
106   EXPECT_EQ(expected, out);
107   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
108 }
109 
TEST(JSONStringEscapeTest,EscapeUTF16)110 TEST(JSONStringEscapeTest, EscapeUTF16) {
111   const struct {
112     const wchar_t* to_escape;
113     const char* escaped;
114   } cases[] = {
115       {L"b\uffb1\u00ff", "b\xEF\xBE\xB1\xC3\xBF"},
116       {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
117       {L"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
118       {L"b\x0F\x7F\xF0\xFF!", "b\\u000F\x7F\xC3\xB0\xC3\xBF!"},
119       {L"c<>d", "c\\u003C>d"},
120       {L"Hello\u2028world", "Hello\\u2028world"},
121       {L"\u2029purple", "\\u2029purple"},
122       // Unicode non-characters.
123       {L"\uFDD0", "\xEF\xB7\x90"},          // U+FDD0
124       {L"\uFDDF", "\xEF\xB7\x9F"},          // U+FDDF
125       {L"\uFDEF", "\xEF\xB7\xAF"},          // U+FDEF
126       {L"\uFFFE", "\xEF\xBF\xBE"},          // U+FFFE
127       {L"\uFFFF", "\xEF\xBF\xBF"},          // U+FFFF
128       {L"\U0001FFFE", "\xF0\x9F\xBF\xBE"},  // U+01FFFE
129       {L"\U0001FFFF", "\xF0\x9F\xBF\xBF"},  // U+01FFFF
130       {L"\U0002FFFE", "\xF0\xAF\xBF\xBE"},  // U+02FFFE
131       {L"\U0002FFFF", "\xF0\xAF\xBF\xBF"},  // U+02FFFF
132       {L"\U0003FFFE", "\xF0\xBF\xBF\xBE"},  // U+03FFFE
133       {L"\U0003FFFF", "\xF0\xBF\xBF\xBF"},  // U+03FFFF
134       {L"\U0004FFFE", "\xF1\x8F\xBF\xBE"},  // U+04FFFE
135       {L"\U0004FFFF", "\xF1\x8F\xBF\xBF"},  // U+04FFFF
136       {L"\U0005FFFE", "\xF1\x9F\xBF\xBE"},  // U+05FFFE
137       {L"\U0005FFFF", "\xF1\x9F\xBF\xBF"},  // U+05FFFF
138       {L"\U0006FFFE", "\xF1\xAF\xBF\xBE"},  // U+06FFFE
139       {L"\U0006FFFF", "\xF1\xAF\xBF\xBF"},  // U+06FFFF
140       {L"\U0007FFFE", "\xF1\xBF\xBF\xBE"},  // U+07FFFE
141       {L"\U0007FFFF", "\xF1\xBF\xBF\xBF"},  // U+07FFFF
142       {L"\U0008FFFE", "\xF2\x8F\xBF\xBE"},  // U+08FFFE
143       {L"\U0008FFFF", "\xF2\x8F\xBF\xBF"},  // U+08FFFF
144       {L"\U0009FFFE", "\xF2\x9F\xBF\xBE"},  // U+09FFFE
145       {L"\U0009FFFF", "\xF2\x9F\xBF\xBF"},  // U+09FFFF
146       {L"\U000AFFFE", "\xF2\xAF\xBF\xBE"},  // U+0AFFFE
147       {L"\U000AFFFF", "\xF2\xAF\xBF\xBF"},  // U+0AFFFF
148       {L"\U000BFFFE", "\xF2\xBF\xBF\xBE"},  // U+0BFFFE
149       {L"\U000BFFFF", "\xF2\xBF\xBF\xBF"},  // U+0BFFFF
150       {L"\U000CFFFE", "\xF3\x8F\xBF\xBE"},  // U+0CFFFE
151       {L"\U000CFFFF", "\xF3\x8F\xBF\xBF"},  // U+0CFFFF
152       {L"\U000DFFFE", "\xF3\x9F\xBF\xBE"},  // U+0DFFFE
153       {L"\U000DFFFF", "\xF3\x9F\xBF\xBF"},  // U+0DFFFF
154       {L"\U000EFFFE", "\xF3\xAF\xBF\xBE"},  // U+0EFFFE
155       {L"\U000EFFFF", "\xF3\xAF\xBF\xBF"},  // U+0EFFFF
156       {L"\U000FFFFE", "\xF3\xBF\xBF\xBE"},  // U+0FFFFE
157       {L"\U000FFFFF", "\xF3\xBF\xBF\xBF"},  // U+0FFFFF
158       {L"\U0010FFFE", "\xF4\x8F\xBF\xBE"},  // U+10FFFE
159       {L"\U0010FFFF", "\xF4\x8F\xBF\xBF"},  // U+10FFFF
160   };
161 
162   for (const auto& i : cases) {
163     std::u16string in = WideToUTF16(i.to_escape);
164 
165     std::string out;
166     EscapeJSONString(in, false, &out);
167     EXPECT_EQ(std::string(i.escaped), out);
168     EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
169 
170     out = GetQuotedJSONString(in);
171     EXPECT_EQ("\"" + std::string(i.escaped) + "\"", out);
172     EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
173   }
174 
175   std::u16string in = WideToUTF16(cases[0].to_escape);
176   std::string out;
177   EscapeJSONString(in, false, &out);
178   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
179 
180   // test quoting
181   std::string out_quoted;
182   EscapeJSONString(in, true, &out_quoted);
183   EXPECT_EQ(out.length() + 2, out_quoted.length());
184   EXPECT_EQ(out_quoted.find(out), 1U);
185   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
186 
187   // now try with a NULL in the string
188   std::u16string null_prepend = u"test";
189   null_prepend.push_back(0);
190   in = null_prepend + in;
191   std::string expected = "test\\u0000";
192   expected += cases[0].escaped;
193   out.clear();
194   EscapeJSONString(in, false, &out);
195   EXPECT_EQ(expected, out);
196   EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out));
197 }
198 
TEST(JSONStringEscapeTest,EscapeUTF16OutsideBMP)199 TEST(JSONStringEscapeTest, EscapeUTF16OutsideBMP) {
200   {
201     // {a, U+10300, !}, SMP.
202     std::u16string test;
203     test.push_back('a');
204     test.push_back(0xD800);
205     test.push_back(0xDF00);
206     test.push_back('!');
207     std::string actual;
208     EXPECT_TRUE(EscapeJSONString(test, false, &actual));
209     EXPECT_EQ("a\xF0\x90\x8C\x80!", actual);
210   }
211   {
212     // {U+20021, U+2002B}, SIP.
213     std::u16string test;
214     test.push_back(0xD840);
215     test.push_back(0xDC21);
216     test.push_back(0xD840);
217     test.push_back(0xDC2B);
218     std::string actual;
219     EXPECT_TRUE(EscapeJSONString(test, false, &actual));
220     EXPECT_EQ("\xF0\xA0\x80\xA1\xF0\xA0\x80\xAB", actual);
221   }
222   {
223     // {?, U+D800, @}, lone surrogate.
224     std::u16string test;
225     test.push_back('?');
226     test.push_back(0xD800);
227     test.push_back('@');
228     std::string actual;
229     EXPECT_FALSE(EscapeJSONString(test, false, &actual));
230     EXPECT_EQ("?\xEF\xBF\xBD@", actual);
231   }
232 }
233 
TEST(JSONStringEscapeTest,EscapeBytes)234 TEST(JSONStringEscapeTest, EscapeBytes) {
235   const struct {
236     const char* to_escape;
237     const char* escaped;
238   } cases[] = {
239     {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
240     {"\xe5\xc4\x4f\x05\xb6\xfd", "\\u00E5\\u00C4O\\u0005\\u00B6\\u00FD"},
241   };
242 
243   for (const auto& i : cases) {
244     std::string in = std::string(i.to_escape);
245     EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in));
246 
247     EXPECT_EQ(std::string(i.escaped),
248               EscapeBytesAsInvalidJSONString(in, false));
249     EXPECT_EQ("\"" + std::string(i.escaped) + "\"",
250               EscapeBytesAsInvalidJSONString(in, true));
251   }
252 
253   const char kEmbedNull[] = { '\xab', '\x39', '\0', '\x9f', '\xab' };
254   std::string in(kEmbedNull, std::size(kEmbedNull));
255   EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in));
256   EXPECT_EQ(std::string("\\u00AB9\\u0000\\u009F\\u00AB"),
257             EscapeBytesAsInvalidJSONString(in, false));
258 }
259 
260 }  // namespace base
261