• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <string>
7 
8 #include "net/base/escape.h"
9 
10 #include "base/basictypes.h"
11 #include "base/i18n/icu_string_conversions.h"
12 #include "base/string_util.h"
13 #include "base/stringprintf.h"
14 #include "base/utf_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 
17 namespace {
18 
19 static const size_t kNpos = string16::npos;
20 
21 struct EscapeCase {
22   const wchar_t* input;
23   const wchar_t* output;
24 };
25 
26 struct UnescapeURLCase {
27   const wchar_t* input;
28   UnescapeRule::Type rules;
29   const wchar_t* output;
30 };
31 
32 struct UnescapeURLCaseASCII {
33   const char* input;
34   UnescapeRule::Type rules;
35   const char* output;
36 };
37 
38 struct UnescapeAndDecodeCase {
39   const char* input;
40 
41   // The expected output when run through UnescapeURL.
42   const char* url_unescaped;
43 
44   // The expected output when run through UnescapeQuery.
45   const char* query_unescaped;
46 
47   // The expected output when run through UnescapeAndDecodeURLComponent.
48   const wchar_t* decoded;
49 };
50 
51 struct AdjustOffsetCase {
52   const char* input;
53   size_t input_offset;
54   size_t output_offset;
55 };
56 
57 struct EscapeForHTMLCase {
58   const char* input;
59   const char* expected_output;
60 };
61 
62 }  // namespace
63 
TEST(EscapeTest,EscapeTextForFormSubmission)64 TEST(EscapeTest, EscapeTextForFormSubmission) {
65   const EscapeCase escape_cases[] = {
66     {L"foo", L"foo"},
67     {L"foo bar", L"foo+bar"},
68     {L"foo++", L"foo%2B%2B"}
69   };
70   for (size_t i = 0; i < arraysize(escape_cases); ++i) {
71     EscapeCase value = escape_cases[i];
72     EXPECT_EQ(WideToUTF16Hack(value.output),
73               EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true));
74   }
75 
76   const EscapeCase escape_cases_no_plus[] = {
77     {L"foo", L"foo"},
78     {L"foo bar", L"foo%20bar"},
79     {L"foo++", L"foo%2B%2B"}
80   };
81   for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
82     EscapeCase value = escape_cases_no_plus[i];
83     EXPECT_EQ(WideToUTF16Hack(value.output),
84               EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false));
85   }
86 
87   // Test all the values in we're supposed to be escaping.
88   const std::string no_escape(
89     "abcdefghijklmnopqrstuvwxyz"
90     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
91     "0123456789"
92     "!'()*-._~");
93   for (int i = 0; i < 256; ++i) {
94     std::string in;
95     in.push_back(i);
96     std::string out = EscapeQueryParamValue(in, true);
97     if (0 == i) {
98       EXPECT_EQ(out, std::string("%00"));
99     } else if (32 == i) {
100       // Spaces are plus escaped like web forms.
101       EXPECT_EQ(out, std::string("+"));
102     } else if (no_escape.find(in) == std::string::npos) {
103       // Check %hex escaping
104       std::string expected = base::StringPrintf("%%%02X", i);
105       EXPECT_EQ(expected, out);
106     } else {
107       // No change for things in the no_escape list.
108       EXPECT_EQ(out, in);
109     }
110   }
111 
112   // Check to see if EscapeQueryParamValueUTF8 is the same as
113   // EscapeQueryParamValue(..., kCodepageUTF8,)
114   string16 test_str;
115   test_str.reserve(5000);
116   for (int i = 1; i < 5000; ++i) {
117     test_str.push_back(i);
118   }
119   string16 wide;
120   EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true,
121                                     &wide));
122   EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true));
123   EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false,
124                                     &wide));
125   EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false));
126 }
127 
TEST(EscapeTest,EscapePath)128 TEST(EscapeTest, EscapePath) {
129   ASSERT_EQ(
130     // Most of the character space we care about, un-escaped
131     EscapePath(
132       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
133       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
134       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
135       "{|}~\x7f\x80\xff"),
136     // Escaped
137     "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
138     "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
139     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
140     "%7B%7C%7D~%7F%80%FF");
141 }
142 
TEST(EscapeTest,EscapeUrlEncodedData)143 TEST(EscapeTest, EscapeUrlEncodedData) {
144   ASSERT_EQ(
145     // Most of the character space we care about, un-escaped
146     EscapeUrlEncodedData(
147       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
148       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
149       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
150       "{|}~\x7f\x80\xff"),
151     // Escaped
152     "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
153     "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
154     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
155     "%7B%7C%7D~%7F%80%FF");
156 }
157 
TEST(EscapeTest,UnescapeURLComponentASCII)158 TEST(EscapeTest, UnescapeURLComponentASCII) {
159   const UnescapeURLCaseASCII unescape_cases[] = {
160     {"", UnescapeRule::NORMAL, ""},
161     {"%2", UnescapeRule::NORMAL, "%2"},
162     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
163     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
164     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
165     {"Some%20random text %25%2dOK", UnescapeRule::NONE,
166      "Some%20random text %25%2dOK"},
167     {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
168      "Some%20random text %25-OK"},
169     {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
170      "Some random text %25-OK"},
171     {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
172      "Some%20random text %-OK"},
173     {"Some%20random text %25%2dOK",
174      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
175      "Some random text %-OK"},
176     {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
177     {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
178     // Certain URL-sensitive characters should not be unescaped unless asked.
179     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
180      "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
181     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
182      UnescapeRule::URL_SPECIAL_CHARS,
183      "Hello%20%13%10world ## ?? == && %% ++"},
184     // Control characters.
185     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
186      "%01%02%03%04%05%06%07%08%09 %"},
187     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
188      "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
189     {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
190     {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
191   };
192 
193   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
194     std::string str(unescape_cases[i].input);
195     EXPECT_EQ(std::string(unescape_cases[i].output),
196               UnescapeURLComponent(str, unescape_cases[i].rules));
197   }
198 
199   // Test the NULL character unescaping (which wouldn't work above since those
200   // are just char pointers).
201   std::string input("Null");
202   input.push_back(0);  // Also have a NULL in the input.
203   input.append("%00%39Test");
204 
205   // When we're unescaping NULLs
206   std::string expected("Null");
207   expected.push_back(0);
208   expected.push_back(0);
209   expected.append("9Test");
210   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
211 
212   // When we're not unescaping NULLs.
213   expected = "Null";
214   expected.push_back(0);
215   expected.append("%009Test");
216   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
217 }
218 
TEST(EscapeTest,UnescapeURLComponent)219 TEST(EscapeTest, UnescapeURLComponent) {
220   const UnescapeURLCase unescape_cases[] = {
221     {L"", UnescapeRule::NORMAL, L""},
222     {L"%2", UnescapeRule::NORMAL, L"%2"},
223     {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
224     {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
225     {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
226     {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
227      L"Some%20random text %25%2dOK"},
228     {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
229      L"Some%20random text %25-OK"},
230     {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
231      L"Some random text %25-OK"},
232     {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
233      L"Some%20random text %-OK"},
234     {L"Some%20random text %25%2dOK",
235      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
236      L"Some random text %-OK"},
237     {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
238     {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
239     // Certain URL-sensitive characters should not be unescaped unless asked.
240     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
241      L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
242     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
243      UnescapeRule::URL_SPECIAL_CHARS,
244      L"Hello%20%13%10world ## ?? == && %% ++"},
245     // We can neither escape nor unescape '@' since some websites expect it to
246     // be preserved as either '@' or "%40".
247     // See http://b/996720 and http://crbug.com/23933 .
248     {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
249     // Control characters.
250     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
251      L"%01%02%03%04%05%06%07%08%09 %"},
252     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
253      L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
254     {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
255     {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
256      L"Hello%20\x13\x10\x02"},
257     {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
258      L"Hello\x9824\x9827"},
259   };
260 
261   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
262     string16 str(WideToUTF16(unescape_cases[i].input));
263     EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
264               UnescapeURLComponent(str, unescape_cases[i].rules));
265   }
266 
267   // Test the NULL character unescaping (which wouldn't work above since those
268   // are just char pointers).
269   string16 input(WideToUTF16(L"Null"));
270   input.push_back(0);  // Also have a NULL in the input.
271   input.append(WideToUTF16(L"%00%39Test"));
272 
273   // When we're unescaping NULLs
274   string16 expected(WideToUTF16(L"Null"));
275   expected.push_back(0);
276   expected.push_back(0);
277   expected.append(ASCIIToUTF16("9Test"));
278   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
279 
280   // When we're not unescaping NULLs.
281   expected = WideToUTF16(L"Null");
282   expected.push_back(0);
283   expected.append(WideToUTF16(L"%009Test"));
284   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
285 }
286 
TEST(EscapeTest,UnescapeAndDecodeUTF8URLComponent)287 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
288   const UnescapeAndDecodeCase unescape_cases[] = {
289     { "%",
290       "%",
291       "%",
292      L"%"},
293     { "+",
294       "+",
295       " ",
296      L"+"},
297     { "%2+",
298       "%2+",
299       "%2 ",
300      L"%2+"},
301     { "+%%%+%%%",
302       "+%%%+%%%",
303       " %%% %%%",
304      L"+%%%+%%%"},
305     { "Don't escape anything",
306       "Don't escape anything",
307       "Don't escape anything",
308      L"Don't escape anything"},
309     { "+Invalid %escape %2+",
310       "+Invalid %escape %2+",
311       " Invalid %escape %2 ",
312      L"+Invalid %escape %2+"},
313     { "Some random text %25%2dOK",
314       "Some random text %25-OK",
315       "Some random text %25-OK",
316      L"Some random text %25-OK"},
317     { "%01%02%03%04%05%06%07%08%09",
318       "%01%02%03%04%05%06%07%08%09",
319       "%01%02%03%04%05%06%07%08%09",
320      L"%01%02%03%04%05%06%07%08%09"},
321     { "%E4%BD%A0+%E5%A5%BD",
322       "\xE4\xBD\xA0+\xE5\xA5\xBD",
323       "\xE4\xBD\xA0 \xE5\xA5\xBD",
324      L"\x4f60+\x597d"},
325     { "%ED%ED",  // Invalid UTF-8.
326       "\xED\xED",
327       "\xED\xED",
328      L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
329   };
330 
331   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
332     std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
333                                                  UnescapeRule::NORMAL);
334     EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
335 
336     unescaped = UnescapeURLComponent(unescape_cases[i].input,
337                                      UnescapeRule::REPLACE_PLUS_WITH_SPACE);
338     EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
339 
340     // TODO: Need to test unescape_spaces and unescape_percent.
341     string16 decoded = UnescapeAndDecodeUTF8URLComponent(
342         unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
343     EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)),
344               decoded);
345   }
346 }
347 
TEST(EscapeTest,AdjustOffset)348 TEST(EscapeTest, AdjustOffset) {
349   const AdjustOffsetCase adjust_cases[] = {
350     {"", 0, std::wstring::npos},
351     {"test", 0, 0},
352     {"test", 2, 2},
353     {"test", 4, std::wstring::npos},
354     {"test", std::wstring::npos, std::wstring::npos},
355     {"%2dtest", 6, 4},
356     {"%2dtest", 2, std::wstring::npos},
357     {"test%2d", 2, 2},
358     {"%E4%BD%A0+%E5%A5%BD", 9, 1},
359     {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
360     {"%ED%B0%80+%E5%A5%BD", 6, 6},
361   };
362 
363   for (size_t i = 0; i < arraysize(adjust_cases); i++) {
364     size_t offset = adjust_cases[i].input_offset;
365     UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
366                                       UnescapeRule::NORMAL, &offset);
367     EXPECT_EQ(adjust_cases[i].output_offset, offset);
368   }
369 }
370 
TEST(EscapeTest,EscapeForHTML)371 TEST(EscapeTest, EscapeForHTML) {
372   const EscapeForHTMLCase tests[] = {
373     { "hello", "hello" },
374     { "<hello>", "&lt;hello&gt;" },
375     { "don\'t mess with me", "don&#39;t mess with me" },
376   };
377   for (size_t i = 0; i < arraysize(tests); ++i) {
378     std::string result = EscapeForHTML(std::string(tests[i].input));
379     EXPECT_EQ(std::string(tests[i].expected_output), result);
380   }
381 }
382 
TEST(EscapeTest,UnescapeForHTML)383 TEST(EscapeTest, UnescapeForHTML) {
384   const EscapeForHTMLCase tests[] = {
385     { "", "" },
386     { "&lt;hello&gt;", "<hello>" },
387     { "don&#39;t mess with me", "don\'t mess with me" },
388     { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
389     { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
390     { "&amp;", "&" },
391     { "&quot;", "\"" },
392     { "&#39;", "'" },
393     { "&lt;", "<" },
394     { "&gt;", ">" },
395     { "&amp; &", "& &" },
396   };
397   for (size_t i = 0; i < arraysize(tests); ++i) {
398     string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
399     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
400   }
401 }
402 
TEST(EscapeTest,AdjustEncodingOffset)403 TEST(EscapeTest, AdjustEncodingOffset) {
404   // Imagine we have strings as shown in the following cases where the
405   // %XX's represent encoded characters
406 
407   // 1: abc%ECdef ==> abcXdef
408   std::vector<size_t> offsets;
409   for (size_t t = 0; t < 9; ++t)
410     offsets.push_back(t);
411   AdjustEncodingOffset::Adjustments adjustments;
412   adjustments.push_back(3);
413   std::for_each(offsets.begin(), offsets.end(),
414                 AdjustEncodingOffset(adjustments));
415   size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
416   EXPECT_EQ(offsets.size(), arraysize(expected_1));
417   for (size_t i = 0; i < arraysize(expected_1); ++i)
418     EXPECT_EQ(expected_1[i], offsets[i]);
419 
420 
421   // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
422   offsets.clear();
423   for (size_t t = 0; t < 18; ++t)
424     offsets.push_back(t);
425   adjustments.clear();
426   adjustments.push_back(0);
427   adjustments.push_back(6);
428   adjustments.push_back(9);
429   adjustments.push_back(15);
430   std::for_each(offsets.begin(), offsets.end(),
431                 AdjustEncodingOffset(adjustments));
432   size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
433                          kNpos, 6, 7, 8, 9, kNpos, kNpos};
434   EXPECT_EQ(offsets.size(), arraysize(expected_2));
435   for (size_t i = 0; i < arraysize(expected_2); ++i)
436     EXPECT_EQ(expected_2[i], offsets[i]);
437 }
438