1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/data_url.h"
6
7 #include "base/memory/ref_counted.h"
8 #include "base/test/scoped_feature_list.h"
9 #include "net/base/features.h"
10 #include "net/base/net_errors.h"
11 #include "net/http/http_response_headers.h"
12 #include "net/http/http_version.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "url/gurl.h"
15
16 namespace net {
17
18 namespace {
19
20 struct ParseTestData {
21 const char* url;
22 bool is_valid;
23 const char* mime_type;
24 const char* charset;
25 const std::string data;
26 };
27
28 } // namespace
29
30 class DataURLTest
31 : public testing::Test,
32 public ::testing::WithParamInterface<std::tuple<bool, bool>> {
33 public:
DataURLTest()34 DataURLTest() {
35 using FeatureList = std::vector<base::test::FeatureRef>;
36 FeatureList enabled_features;
37 FeatureList disabled_features;
38 const auto feature_set = [&](bool flag_on) -> FeatureList& {
39 return flag_on ? enabled_features : disabled_features;
40 };
41 feature_set(OptimizedParsing())
42 .push_back(features::kOptimizeParsingDataUrls);
43 feature_set(KeepWhitespace())
44 .push_back(features::kKeepWhitespaceForDataUrls);
45 feature_list_.InitWithFeatures(enabled_features, disabled_features);
46 }
47
OptimizedParsing() const48 bool OptimizedParsing() const { return std::get<0>(GetParam()); }
KeepWhitespace() const49 bool KeepWhitespace() const { return std::get<1>(GetParam()); }
50
51 private:
52 base::test::ScopedFeatureList feature_list_;
53 };
54
55 INSTANTIATE_TEST_SUITE_P(DataURLTest,
56 DataURLTest,
57 testing::Combine(
58 /*optimize_parsing=*/testing::Bool(),
59 /*keep_whitespace=*/testing::Bool()));
60
TEST_P(DataURLTest,Parse)61 TEST_P(DataURLTest, Parse) {
62 const ParseTestData tests[] = {
63 {"data:", false, "", "", ""},
64
65 {"data:,", true, "text/plain", "US-ASCII", ""},
66
67 {"data:;base64,", true, "text/plain", "US-ASCII", ""},
68
69 {"data:;charset=,test", false, "", "", ""},
70
71 {"data:TeXt/HtMl,<b>x</b>", true, "text/html", "", "<b>x</b>"},
72
73 {"data:,foo", true, "text/plain", "US-ASCII", "foo"},
74
75 {"data:;base64,aGVsbG8gd29ybGQ=", true, "text/plain", "US-ASCII",
76 "hello world"},
77
78 // Allow invalid mediatype for backward compatibility but set mime_type to
79 // "text/plain" instead of the invalid mediatype.
80 {"data:foo,boo", true, "text/plain", "US-ASCII", "boo"},
81
82 // When accepting an invalid mediatype, override charset with "US-ASCII"
83 {"data:foo;charset=UTF-8,boo", true, "text/plain", "US-ASCII", "boo"},
84
85 // Invalid mediatype. Includes a slash but the type part is not a token.
86 {"data:f(oo/bar;baz=1;charset=kk,boo", true, "text/plain", "US-ASCII",
87 "boo"},
88
89 {"data:foo/bar;baz=1;charset=kk,boo", true, "foo/bar", "kk", "boo"},
90
91 {"data:foo/bar;charset=kk;baz=1,boo", true, "foo/bar", "kk", "boo"},
92
93 {"data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
94 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
95 true, "text/html", "", "<html><body><b>hello world</b></body></html>"},
96
97 {"data:text/html,<html><body><b>hello world</b></body></html>", true,
98 "text/html", "", "<html><body><b>hello world</b></body></html>"},
99
100 // the comma cannot be url-escaped!
101 {"data:%2Cblah", false, "", "", ""},
102
103 // invalid base64 content
104 {"data:;base64,aGVs_-_-", false, "", "", ""},
105
106 // Spaces should NOT be removed from non-base64 encoded data URLs.
107 {"data:image/fractal,a b c d e f g", true, "image/fractal", "",
108 KeepWhitespace() ? "a b c d e f g" : "abcdefg"},
109
110 // Spaces should also be removed from anything base-64 encoded
111 {"data:;base64,aGVs bG8gd2 9ybGQ=", true, "text/plain", "US-ASCII",
112 "hello world"},
113
114 // Other whitespace should also be removed from anything base-64 encoded.
115 {"data:;base64,aGVs bG8gd2 \n9ybGQ=", true, "text/plain", "US-ASCII",
116 "hello world"},
117
118 // In base64 encoding, escaped whitespace should be stripped.
119 // (This test was taken from acid3)
120 // http://b/1054495
121 {"data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
122 "%20",
123 true, "text/javascript", "", "d4 = 'four';"},
124
125 // All whitespace should be preserved on non-base64 encoded content.
126 {"data:img/png,A B %20 %0A C", true, "img/png", "",
127 KeepWhitespace() ? "A B \n C" : "AB \nC"},
128
129 {"data:text/plain;charset=utf-8;base64,SGVsbMO2", true, "text/plain",
130 "utf-8", "Hell\xC3\xB6"},
131
132 // no mimetype
133 {"data:;charset=utf-8;base64,SGVsbMO2", true, "text/plain", "utf-8",
134 "Hell\xC3\xB6"},
135
136 // Not sufficiently padded.
137 {"data:;base64,aGVsbG8gd29ybGQ", true, "text/plain", "US-ASCII",
138 "hello world"},
139
140 // Not sufficiently padded with whitespace.
141 {"data:;base64,aGV sbG8g d29ybGQ", true, "text/plain", "US-ASCII",
142 "hello world"},
143
144 // Not sufficiently padded with escaped whitespace.
145 {"data:;base64,aGV%20sbG8g%20d29ybGQ", true, "text/plain", "US-ASCII",
146 "hello world"},
147
148 // Bad encoding (truncated).
149 {"data:;base64,aGVsbG8gd29yb", false, "", "", ""},
150
151 // BiDi control characters should be unescaped and preserved as is, and
152 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
153 // is the RTL mark and the parsed text should preserve it as is.
154 {"data:text/plain;charset=utf-8,\xE2\x80\x8Ftest", true, "text/plain",
155 "utf-8", "\xE2\x80\x8Ftest"},
156
157 // Same as above but with Arabic text after RTL mark.
158 {"data:text/plain;charset=utf-8,"
159 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
160 true, "text/plain", "utf-8",
161 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
162
163 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
164 // wrapped in a GURL, this URL and the next effectively become the same as
165 // the previous two URLs.
166 {"data:text/plain;charset=utf-8,%E2%80%8Ftest", true, "text/plain",
167 "utf-8", "\xE2\x80\x8Ftest"},
168
169 // Same as above but with Arabic text after RTL mark.
170 {"data:text/plain;charset=utf-8,"
171 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
172 true, "text/plain", "utf-8",
173 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
174
175 // The 'data' of a data URI does not include any ref it has.
176 {"data:text/plain,this/is/a/test/%23include/#dontinclude", true,
177 "text/plain", "", "this/is/a/test/#include/"},
178
179 // More unescaping tests and tests with nulls.
180 {"data:%00text/plain%41,foo", true, "%00text/plain%41", "", "foo"},
181 {"data:text/plain;charset=%00US-ASCII%41,foo", true, "text/plain",
182 "%00US-ASCII%41", "foo"},
183 {"data:text/plain,%00_%41", true, "text/plain", "",
184 std::string("\x00_A", 3)},
185 {"data:text/plain;base64,AA//", true, "text/plain", "",
186 std::string("\x00\x0F\xFF", 3)},
187 // "%62ase64" unescapes to base64, but should not be treated as such.
188 {"data:text/plain;%62ase64,AA//", true, "text/plain", "", "AA//"},
189 };
190
191 for (const auto& test : tests) {
192 SCOPED_TRACE(test.url);
193
194 std::string mime_type;
195 std::string charset;
196 std::string data;
197 bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
198 EXPECT_EQ(ok, test.is_valid);
199 EXPECT_EQ(test.mime_type, mime_type);
200 EXPECT_EQ(test.charset, charset);
201 EXPECT_EQ(test.data, data);
202 }
203 }
204
TEST_P(DataURLTest,BuildResponseSimple)205 TEST_P(DataURLTest, BuildResponseSimple) {
206 std::string mime_type;
207 std::string charset;
208 std::string data;
209 scoped_refptr<HttpResponseHeaders> headers;
210
211 ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:,Hello"), "GET", &mime_type,
212 &charset, &data, &headers));
213
214 EXPECT_EQ("text/plain", mime_type);
215 EXPECT_EQ("US-ASCII", charset);
216 EXPECT_EQ("Hello", data);
217
218 ASSERT_TRUE(headers);
219 const HttpVersion& version = headers->GetHttpVersion();
220 EXPECT_EQ(1, version.major_value());
221 EXPECT_EQ(1, version.minor_value());
222 EXPECT_EQ("OK", headers->GetStatusText());
223 EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
224 "text/plain;charset=US-ASCII");
225 }
226
TEST_P(DataURLTest,BuildResponseHead)227 TEST_P(DataURLTest, BuildResponseHead) {
228 for (const char* method : {"HEAD", "head", "hEaD"}) {
229 SCOPED_TRACE(method);
230
231 std::string mime_type;
232 std::string charset;
233 std::string data;
234 scoped_refptr<HttpResponseHeaders> headers;
235 ASSERT_EQ(OK,
236 DataURL::BuildResponse(GURL("data:,Hello"), method, &mime_type,
237 &charset, &data, &headers));
238
239 EXPECT_EQ("text/plain", mime_type);
240 EXPECT_EQ("US-ASCII", charset);
241 EXPECT_EQ("", data);
242
243 ASSERT_TRUE(headers);
244 HttpVersion version = headers->GetHttpVersion();
245 EXPECT_EQ(1, version.major_value());
246 EXPECT_EQ(1, version.minor_value());
247 EXPECT_EQ("OK", headers->GetStatusText());
248 EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
249 "text/plain;charset=US-ASCII");
250 }
251 }
252
TEST_P(DataURLTest,BuildResponseInput)253 TEST_P(DataURLTest, BuildResponseInput) {
254 std::string mime_type;
255 std::string charset;
256 std::string data;
257 scoped_refptr<HttpResponseHeaders> headers;
258
259 ASSERT_EQ(ERR_INVALID_URL,
260 DataURL::BuildResponse(GURL("bogus"), "GET", &mime_type, &charset,
261 &data, &headers));
262 EXPECT_FALSE(headers);
263 EXPECT_TRUE(mime_type.empty());
264 EXPECT_TRUE(charset.empty());
265 EXPECT_TRUE(data.empty());
266 }
267
TEST_P(DataURLTest,BuildResponseInvalidMimeType)268 TEST_P(DataURLTest, BuildResponseInvalidMimeType) {
269 std::string mime_type;
270 std::string charset;
271 std::string data;
272 scoped_refptr<HttpResponseHeaders> headers;
273
274 // MIME type contains delimiters. Must be accepted but Content-Type header
275 // should be generated as if the mediatype was text/plain.
276 ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:f(o/b)r,test"), "GET",
277 &mime_type, &charset, &data, &headers));
278
279 ASSERT_TRUE(headers);
280 EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
281 "text/plain;charset=US-ASCII");
282 }
283
TEST_P(DataURLTest,InvalidCharset)284 TEST_P(DataURLTest, InvalidCharset) {
285 std::string mime_type;
286 std::string charset;
287 std::string data;
288 scoped_refptr<HttpResponseHeaders> headers;
289
290 // MIME type contains delimiters. Must be rejected.
291 ASSERT_EQ(ERR_INVALID_URL, DataURL::BuildResponse(
292 GURL("data:text/html;charset=(),test"), "GET",
293 &mime_type, &charset, &data, &headers));
294 EXPECT_FALSE(headers);
295 EXPECT_TRUE(mime_type.empty());
296 EXPECT_TRUE(charset.empty());
297 EXPECT_TRUE(data.empty());
298 }
299
300 // Test a slightly larger data URL.
TEST_P(DataURLTest,Image)301 TEST_P(DataURLTest, Image) {
302 // Use our nice little Chrome logo.
303 GURL image_url(
304 "data:image/png;base64,"
305 "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
306 "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
307 "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
308 "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
309 "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
310 "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
311 "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
312 "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
313 "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
314 "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
315 "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
316 "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
317 "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
318 "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
319 "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
320 "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
321 "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
322 "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
323 "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");
324
325 std::string mime_type;
326 std::string charset;
327 std::string data;
328 scoped_refptr<HttpResponseHeaders> headers;
329
330 EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
331 &data, &headers));
332
333 EXPECT_EQ(911u, data.size());
334 EXPECT_EQ("image/png", mime_type);
335 EXPECT_TRUE(charset.empty());
336
337 ASSERT_TRUE(headers);
338 std::string value;
339 EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
340 EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"), "image/png");
341 }
342
343 // Tests the application of the kRemoveWhitespaceForDataURLs command line
344 // switch.
TEST(DataURLRemoveWhitespaceTest,Parse)345 TEST(DataURLRemoveWhitespaceTest, Parse) {
346 base::CommandLine::ForCurrentProcess()->AppendSwitch(
347 kRemoveWhitespaceForDataURLs);
348 const ParseTestData tests[] = {
349 {"data:image/fractal,a b c d e f g", true, "image/fractal", "",
350 "abcdefg"},
351 {"data:img/png,A B %20 %0A C", true, "img/png", "", "AB \nC"},
352 };
353
354 for (const auto& test : tests) {
355 SCOPED_TRACE(test.url);
356
357 std::string mime_type;
358 std::string charset;
359 std::string data;
360 bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
361 EXPECT_EQ(ok, test.is_valid);
362 EXPECT_EQ(test.mime_type, mime_type);
363 EXPECT_EQ(test.charset, charset);
364 EXPECT_EQ(test.data, data);
365 }
366 }
367
368 } // namespace net
369