• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "url/url_canon_icu.h"
11 
12 #include <stddef.h>
13 
14 #include "base/logging.h"
15 #include "base/memory/raw_ptr.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "third_party/icu/source/common/unicode/ucnv.h"
18 #include "url/url_canon.h"
19 #include "url/url_canon_icu_test_helpers.h"
20 #include "url/url_canon_stdstring.h"
21 #include "url/url_test_utils.h"
22 
23 namespace url {
24 
25 namespace {
26 
TEST(URLCanonIcuTest,ICUCharsetConverter)27 TEST(URLCanonIcuTest, ICUCharsetConverter) {
28   struct ICUCase {
29     const wchar_t* input;
30     const char* encoding;
31     const char* expected;
32   } icu_cases[] = {
33       // UTF-8.
34     {L"Hello, world", "utf-8", "Hello, world"},
35     {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
36       // Non-BMP UTF-8.
37     {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
38       // Big5
39     {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
40       // Unrepresentable character in the destination set.
41     {L"hello\x4f60\x06de\x597dworld", "big5",
42       "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
43   };
44 
45   for (size_t i = 0; i < std::size(icu_cases); i++) {
46     test::UConvScoper conv(icu_cases[i].encoding);
47     ASSERT_TRUE(conv.converter() != NULL);
48     ICUCharsetConverter converter(conv.converter());
49 
50     std::string str;
51     StdStringCanonOutput output(&str);
52 
53     std::u16string input_str(
54         test_utils::TruncateWStringToUTF16(icu_cases[i].input));
55     converter.ConvertFromUTF16(input_str, &output);
56     output.Complete();
57 
58     EXPECT_STREQ(icu_cases[i].expected, str.c_str());
59   }
60 
61   // Test string sizes around the resize boundary for the output to make sure
62   // the converter resizes as needed.
63   const int static_size = 16;
64   test::UConvScoper conv("utf-8");
65   ASSERT_TRUE(conv.converter());
66   ICUCharsetConverter converter(conv.converter());
67   for (int i = static_size - 2; i <= static_size + 2; i++) {
68     // Make a string with the appropriate length.
69     std::u16string input;
70     for (int ch = 0; ch < i; ch++)
71       input.push_back('a');
72 
73     RawCanonOutput<static_size> output;
74     converter.ConvertFromUTF16(input, &output);
75     EXPECT_EQ(input.length(), output.length());
76   }
77 }
78 
TEST(URLCanonIcuTest,QueryWithConverter)79 TEST(URLCanonIcuTest, QueryWithConverter) {
80   struct QueryCase {
81     const char* input8;
82     const wchar_t* input16;
83     const char* encoding;
84     const char* expected;
85   } query_cases[] = {
86       // Regular ASCII case in some different encodings.
87     {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
88     {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
89     {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
90       // Chinese input/output
91     {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",
92       "?q=%C4%E3%BA%C3"},
93     {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
94       // Unencodable character in the destination character set should be
95       // escaped. The escape sequence unescapes to be the entity name:
96       // "?q=&#20320;"
97     {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
98       "?q=Chinese%26%2365319%3B"},
99   };
100 
101   for (size_t i = 0; i < std::size(query_cases); i++) {
102     Component out_comp;
103 
104     test::UConvScoper conv(query_cases[i].encoding);
105     ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
106     ICUCharsetConverter converter(conv.converter());
107 
108     if (query_cases[i].input8) {
109       int len = static_cast<int>(strlen(query_cases[i].input8));
110       Component in_comp(0, len);
111       std::string out_str;
112 
113       StdStringCanonOutput output(&out_str);
114       CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,
115                         &out_comp);
116       output.Complete();
117 
118       EXPECT_EQ(query_cases[i].expected, out_str);
119     }
120 
121     if (query_cases[i].input16) {
122       std::u16string input16(
123           test_utils::TruncateWStringToUTF16(query_cases[i].input16));
124       int len = static_cast<int>(input16.length());
125       Component in_comp(0, len);
126       std::string out_str;
127 
128       StdStringCanonOutput output(&out_str);
129       CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,
130                         &out_comp);
131       output.Complete();
132 
133       EXPECT_EQ(query_cases[i].expected, out_str);
134     }
135   }
136 
137   // Extra test for input with embedded NULL;
138   std::string out_str;
139   StdStringCanonOutput output(&out_str);
140   Component out_comp;
141   CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
142   output.Complete();
143   EXPECT_EQ("?a%20%00z%01", out_str);
144 }
145 
146 }  // namespace
147 
148 }  // namespace url
149