1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <string>
7
8 #include "net/base/escape.h"
9
10 #include "base/basictypes.h"
11 #include "base/i18n/icu_string_conversions.h"
12 #include "base/string_util.h"
13 #include "base/stringprintf.h"
14 #include "base/utf_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16
17 namespace {
18
19 static const size_t kNpos = string16::npos;
20
21 struct EscapeCase {
22 const wchar_t* input;
23 const wchar_t* output;
24 };
25
26 struct UnescapeURLCase {
27 const wchar_t* input;
28 UnescapeRule::Type rules;
29 const wchar_t* output;
30 };
31
32 struct UnescapeURLCaseASCII {
33 const char* input;
34 UnescapeRule::Type rules;
35 const char* output;
36 };
37
38 struct UnescapeAndDecodeCase {
39 const char* input;
40
41 // The expected output when run through UnescapeURL.
42 const char* url_unescaped;
43
44 // The expected output when run through UnescapeQuery.
45 const char* query_unescaped;
46
47 // The expected output when run through UnescapeAndDecodeURLComponent.
48 const wchar_t* decoded;
49 };
50
51 struct AdjustOffsetCase {
52 const char* input;
53 size_t input_offset;
54 size_t output_offset;
55 };
56
57 struct EscapeForHTMLCase {
58 const char* input;
59 const char* expected_output;
60 };
61
62 } // namespace
63
TEST(EscapeTest,EscapeTextForFormSubmission)64 TEST(EscapeTest, EscapeTextForFormSubmission) {
65 const EscapeCase escape_cases[] = {
66 {L"foo", L"foo"},
67 {L"foo bar", L"foo+bar"},
68 {L"foo++", L"foo%2B%2B"}
69 };
70 for (size_t i = 0; i < arraysize(escape_cases); ++i) {
71 EscapeCase value = escape_cases[i];
72 EXPECT_EQ(WideToUTF16Hack(value.output),
73 EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true));
74 }
75
76 const EscapeCase escape_cases_no_plus[] = {
77 {L"foo", L"foo"},
78 {L"foo bar", L"foo%20bar"},
79 {L"foo++", L"foo%2B%2B"}
80 };
81 for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
82 EscapeCase value = escape_cases_no_plus[i];
83 EXPECT_EQ(WideToUTF16Hack(value.output),
84 EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false));
85 }
86
87 // Test all the values in we're supposed to be escaping.
88 const std::string no_escape(
89 "abcdefghijklmnopqrstuvwxyz"
90 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
91 "0123456789"
92 "!'()*-._~");
93 for (int i = 0; i < 256; ++i) {
94 std::string in;
95 in.push_back(i);
96 std::string out = EscapeQueryParamValue(in, true);
97 if (0 == i) {
98 EXPECT_EQ(out, std::string("%00"));
99 } else if (32 == i) {
100 // Spaces are plus escaped like web forms.
101 EXPECT_EQ(out, std::string("+"));
102 } else if (no_escape.find(in) == std::string::npos) {
103 // Check %hex escaping
104 std::string expected = base::StringPrintf("%%%02X", i);
105 EXPECT_EQ(expected, out);
106 } else {
107 // No change for things in the no_escape list.
108 EXPECT_EQ(out, in);
109 }
110 }
111
112 // Check to see if EscapeQueryParamValueUTF8 is the same as
113 // EscapeQueryParamValue(..., kCodepageUTF8,)
114 string16 test_str;
115 test_str.reserve(5000);
116 for (int i = 1; i < 5000; ++i) {
117 test_str.push_back(i);
118 }
119 string16 wide;
120 EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true,
121 &wide));
122 EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true));
123 EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false,
124 &wide));
125 EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false));
126 }
127
TEST(EscapeTest,EscapePath)128 TEST(EscapeTest, EscapePath) {
129 ASSERT_EQ(
130 // Most of the character space we care about, un-escaped
131 EscapePath(
132 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
133 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
134 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
135 "{|}~\x7f\x80\xff"),
136 // Escaped
137 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
138 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
139 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
140 "%7B%7C%7D~%7F%80%FF");
141 }
142
TEST(EscapeTest,EscapeUrlEncodedData)143 TEST(EscapeTest, EscapeUrlEncodedData) {
144 ASSERT_EQ(
145 // Most of the character space we care about, un-escaped
146 EscapeUrlEncodedData(
147 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
148 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
149 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
150 "{|}~\x7f\x80\xff"),
151 // Escaped
152 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
153 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
154 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
155 "%7B%7C%7D~%7F%80%FF");
156 }
157
TEST(EscapeTest,UnescapeURLComponentASCII)158 TEST(EscapeTest, UnescapeURLComponentASCII) {
159 const UnescapeURLCaseASCII unescape_cases[] = {
160 {"", UnescapeRule::NORMAL, ""},
161 {"%2", UnescapeRule::NORMAL, "%2"},
162 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
163 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
164 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
165 {"Some%20random text %25%2dOK", UnescapeRule::NONE,
166 "Some%20random text %25%2dOK"},
167 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
168 "Some%20random text %25-OK"},
169 {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
170 "Some random text %25-OK"},
171 {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
172 "Some%20random text %-OK"},
173 {"Some%20random text %25%2dOK",
174 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
175 "Some random text %-OK"},
176 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
177 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
178 // Certain URL-sensitive characters should not be unescaped unless asked.
179 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
180 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
181 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
182 UnescapeRule::URL_SPECIAL_CHARS,
183 "Hello%20%13%10world ## ?? == && %% ++"},
184 // Control characters.
185 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
186 "%01%02%03%04%05%06%07%08%09 %"},
187 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
188 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
189 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
190 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
191 };
192
193 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
194 std::string str(unescape_cases[i].input);
195 EXPECT_EQ(std::string(unescape_cases[i].output),
196 UnescapeURLComponent(str, unescape_cases[i].rules));
197 }
198
199 // Test the NULL character unescaping (which wouldn't work above since those
200 // are just char pointers).
201 std::string input("Null");
202 input.push_back(0); // Also have a NULL in the input.
203 input.append("%00%39Test");
204
205 // When we're unescaping NULLs
206 std::string expected("Null");
207 expected.push_back(0);
208 expected.push_back(0);
209 expected.append("9Test");
210 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
211
212 // When we're not unescaping NULLs.
213 expected = "Null";
214 expected.push_back(0);
215 expected.append("%009Test");
216 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
217 }
218
TEST(EscapeTest,UnescapeURLComponent)219 TEST(EscapeTest, UnescapeURLComponent) {
220 const UnescapeURLCase unescape_cases[] = {
221 {L"", UnescapeRule::NORMAL, L""},
222 {L"%2", UnescapeRule::NORMAL, L"%2"},
223 {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
224 {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
225 {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
226 {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
227 L"Some%20random text %25%2dOK"},
228 {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
229 L"Some%20random text %25-OK"},
230 {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
231 L"Some random text %25-OK"},
232 {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
233 L"Some%20random text %-OK"},
234 {L"Some%20random text %25%2dOK",
235 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
236 L"Some random text %-OK"},
237 {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
238 {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
239 // Certain URL-sensitive characters should not be unescaped unless asked.
240 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
241 L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
242 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
243 UnescapeRule::URL_SPECIAL_CHARS,
244 L"Hello%20%13%10world ## ?? == && %% ++"},
245 // We can neither escape nor unescape '@' since some websites expect it to
246 // be preserved as either '@' or "%40".
247 // See http://b/996720 and http://crbug.com/23933 .
248 {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
249 // Control characters.
250 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
251 L"%01%02%03%04%05%06%07%08%09 %"},
252 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
253 L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
254 {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
255 {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
256 L"Hello%20\x13\x10\x02"},
257 {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
258 L"Hello\x9824\x9827"},
259 };
260
261 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
262 string16 str(WideToUTF16(unescape_cases[i].input));
263 EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
264 UnescapeURLComponent(str, unescape_cases[i].rules));
265 }
266
267 // Test the NULL character unescaping (which wouldn't work above since those
268 // are just char pointers).
269 string16 input(WideToUTF16(L"Null"));
270 input.push_back(0); // Also have a NULL in the input.
271 input.append(WideToUTF16(L"%00%39Test"));
272
273 // When we're unescaping NULLs
274 string16 expected(WideToUTF16(L"Null"));
275 expected.push_back(0);
276 expected.push_back(0);
277 expected.append(ASCIIToUTF16("9Test"));
278 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
279
280 // When we're not unescaping NULLs.
281 expected = WideToUTF16(L"Null");
282 expected.push_back(0);
283 expected.append(WideToUTF16(L"%009Test"));
284 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
285 }
286
TEST(EscapeTest,UnescapeAndDecodeUTF8URLComponent)287 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
288 const UnescapeAndDecodeCase unescape_cases[] = {
289 { "%",
290 "%",
291 "%",
292 L"%"},
293 { "+",
294 "+",
295 " ",
296 L"+"},
297 { "%2+",
298 "%2+",
299 "%2 ",
300 L"%2+"},
301 { "+%%%+%%%",
302 "+%%%+%%%",
303 " %%% %%%",
304 L"+%%%+%%%"},
305 { "Don't escape anything",
306 "Don't escape anything",
307 "Don't escape anything",
308 L"Don't escape anything"},
309 { "+Invalid %escape %2+",
310 "+Invalid %escape %2+",
311 " Invalid %escape %2 ",
312 L"+Invalid %escape %2+"},
313 { "Some random text %25%2dOK",
314 "Some random text %25-OK",
315 "Some random text %25-OK",
316 L"Some random text %25-OK"},
317 { "%01%02%03%04%05%06%07%08%09",
318 "%01%02%03%04%05%06%07%08%09",
319 "%01%02%03%04%05%06%07%08%09",
320 L"%01%02%03%04%05%06%07%08%09"},
321 { "%E4%BD%A0+%E5%A5%BD",
322 "\xE4\xBD\xA0+\xE5\xA5\xBD",
323 "\xE4\xBD\xA0 \xE5\xA5\xBD",
324 L"\x4f60+\x597d"},
325 { "%ED%ED", // Invalid UTF-8.
326 "\xED\xED",
327 "\xED\xED",
328 L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
329 };
330
331 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
332 std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
333 UnescapeRule::NORMAL);
334 EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
335
336 unescaped = UnescapeURLComponent(unescape_cases[i].input,
337 UnescapeRule::REPLACE_PLUS_WITH_SPACE);
338 EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
339
340 // TODO: Need to test unescape_spaces and unescape_percent.
341 string16 decoded = UnescapeAndDecodeUTF8URLComponent(
342 unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
343 EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)),
344 decoded);
345 }
346 }
347
TEST(EscapeTest,AdjustOffset)348 TEST(EscapeTest, AdjustOffset) {
349 const AdjustOffsetCase adjust_cases[] = {
350 {"", 0, std::wstring::npos},
351 {"test", 0, 0},
352 {"test", 2, 2},
353 {"test", 4, std::wstring::npos},
354 {"test", std::wstring::npos, std::wstring::npos},
355 {"%2dtest", 6, 4},
356 {"%2dtest", 2, std::wstring::npos},
357 {"test%2d", 2, 2},
358 {"%E4%BD%A0+%E5%A5%BD", 9, 1},
359 {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
360 {"%ED%B0%80+%E5%A5%BD", 6, 6},
361 };
362
363 for (size_t i = 0; i < arraysize(adjust_cases); i++) {
364 size_t offset = adjust_cases[i].input_offset;
365 UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
366 UnescapeRule::NORMAL, &offset);
367 EXPECT_EQ(adjust_cases[i].output_offset, offset);
368 }
369 }
370
TEST(EscapeTest,EscapeForHTML)371 TEST(EscapeTest, EscapeForHTML) {
372 const EscapeForHTMLCase tests[] = {
373 { "hello", "hello" },
374 { "<hello>", "<hello>" },
375 { "don\'t mess with me", "don't mess with me" },
376 };
377 for (size_t i = 0; i < arraysize(tests); ++i) {
378 std::string result = EscapeForHTML(std::string(tests[i].input));
379 EXPECT_EQ(std::string(tests[i].expected_output), result);
380 }
381 }
382
TEST(EscapeTest,UnescapeForHTML)383 TEST(EscapeTest, UnescapeForHTML) {
384 const EscapeForHTMLCase tests[] = {
385 { "", "" },
386 { "<hello>", "<hello>" },
387 { "don't mess with me", "don\'t mess with me" },
388 { "<>&"'", "<>&\"'" },
389 { "& lt; & ; &; '", "& lt; & ; &; '" },
390 { "&", "&" },
391 { """, "\"" },
392 { "'", "'" },
393 { "<", "<" },
394 { ">", ">" },
395 { "& &", "& &" },
396 };
397 for (size_t i = 0; i < arraysize(tests); ++i) {
398 string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
399 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
400 }
401 }
402
TEST(EscapeTest,AdjustEncodingOffset)403 TEST(EscapeTest, AdjustEncodingOffset) {
404 // Imagine we have strings as shown in the following cases where the
405 // %XX's represent encoded characters
406
407 // 1: abc%ECdef ==> abcXdef
408 std::vector<size_t> offsets;
409 for (size_t t = 0; t < 9; ++t)
410 offsets.push_back(t);
411 AdjustEncodingOffset::Adjustments adjustments;
412 adjustments.push_back(3);
413 std::for_each(offsets.begin(), offsets.end(),
414 AdjustEncodingOffset(adjustments));
415 size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
416 EXPECT_EQ(offsets.size(), arraysize(expected_1));
417 for (size_t i = 0; i < arraysize(expected_1); ++i)
418 EXPECT_EQ(expected_1[i], offsets[i]);
419
420
421 // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
422 offsets.clear();
423 for (size_t t = 0; t < 18; ++t)
424 offsets.push_back(t);
425 adjustments.clear();
426 adjustments.push_back(0);
427 adjustments.push_back(6);
428 adjustments.push_back(9);
429 adjustments.push_back(15);
430 std::for_each(offsets.begin(), offsets.end(),
431 AdjustEncodingOffset(adjustments));
432 size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
433 kNpos, 6, 7, 8, 9, kNpos, kNpos};
434 EXPECT_EQ(offsets.size(), arraysize(expected_2));
435 for (size_t i = 0; i < arraysize(expected_2); ++i)
436 EXPECT_EQ(expected_2[i], offsets[i]);
437 }
438