• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/data_url.h"
6 
7 #include "base/memory/ref_counted.h"
8 #include "base/test/scoped_feature_list.h"
9 #include "net/base/features.h"
10 #include "net/base/net_errors.h"
11 #include "net/http/http_response_headers.h"
12 #include "net/http/http_version.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "url/gurl.h"
15 
16 namespace net {
17 
18 namespace {
19 
20 struct ParseTestData {
21   const char* url;
22   bool is_valid;
23   const char* mime_type;
24   const char* charset;
25   const std::string data;
26 };
27 
28 }  // namespace
29 
30 class DataURLTest
31     : public testing::Test,
32       public ::testing::WithParamInterface<std::tuple<bool, bool>> {
33  public:
DataURLTest()34   DataURLTest() {
35     using FeatureList = std::vector<base::test::FeatureRef>;
36     FeatureList enabled_features;
37     FeatureList disabled_features;
38     const auto feature_set = [&](bool flag_on) -> FeatureList& {
39       return flag_on ? enabled_features : disabled_features;
40     };
41     feature_set(OptimizedParsing())
42         .push_back(features::kOptimizeParsingDataUrls);
43     feature_set(KeepWhitespace())
44         .push_back(features::kKeepWhitespaceForDataUrls);
45     feature_list_.InitWithFeatures(enabled_features, disabled_features);
46   }
47 
OptimizedParsing() const48   bool OptimizedParsing() const { return std::get<0>(GetParam()); }
KeepWhitespace() const49   bool KeepWhitespace() const { return std::get<1>(GetParam()); }
50 
51  private:
52   base::test::ScopedFeatureList feature_list_;
53 };
54 
55 INSTANTIATE_TEST_SUITE_P(DataURLTest,
56                          DataURLTest,
57                          testing::Combine(
58                              /*optimize_parsing=*/testing::Bool(),
59                              /*keep_whitespace=*/testing::Bool()));
60 
TEST_P(DataURLTest,Parse)61 TEST_P(DataURLTest, Parse) {
62   const ParseTestData tests[] = {
63       {"data:", false, "", "", ""},
64 
65       {"data:,", true, "text/plain", "US-ASCII", ""},
66 
67       {"data:;base64,", true, "text/plain", "US-ASCII", ""},
68 
69       {"data:;charset=,test", false, "", "", ""},
70 
71       {"data:TeXt/HtMl,<b>x</b>", true, "text/html", "", "<b>x</b>"},
72 
73       {"data:,foo", true, "text/plain", "US-ASCII", "foo"},
74 
75       {"data:;base64,aGVsbG8gd29ybGQ=", true, "text/plain", "US-ASCII",
76        "hello world"},
77 
78       // Allow invalid mediatype for backward compatibility but set mime_type to
79       // "text/plain" instead of the invalid mediatype.
80       {"data:foo,boo", true, "text/plain", "US-ASCII", "boo"},
81 
82       // When accepting an invalid mediatype, override charset with "US-ASCII"
83       {"data:foo;charset=UTF-8,boo", true, "text/plain", "US-ASCII", "boo"},
84 
85       // Invalid mediatype. Includes a slash but the type part is not a token.
86       {"data:f(oo/bar;baz=1;charset=kk,boo", true, "text/plain", "US-ASCII",
87        "boo"},
88 
89       {"data:foo/bar;baz=1;charset=kk,boo", true, "foo/bar", "kk", "boo"},
90 
91       {"data:foo/bar;charset=kk;baz=1,boo", true, "foo/bar", "kk", "boo"},
92 
93       {"data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
94        "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
95        true, "text/html", "", "<html><body><b>hello world</b></body></html>"},
96 
97       {"data:text/html,<html><body><b>hello world</b></body></html>", true,
98        "text/html", "", "<html><body><b>hello world</b></body></html>"},
99 
100       // the comma cannot be url-escaped!
101       {"data:%2Cblah", false, "", "", ""},
102 
103       // invalid base64 content
104       {"data:;base64,aGVs_-_-", false, "", "", ""},
105 
106       // Spaces should NOT be removed from non-base64 encoded data URLs.
107       {"data:image/fractal,a b c d e f g", true, "image/fractal", "",
108        KeepWhitespace() ? "a b c d e f g" : "abcdefg"},
109 
110       // Spaces should also be removed from anything base-64 encoded
111       {"data:;base64,aGVs bG8gd2  9ybGQ=", true, "text/plain", "US-ASCII",
112        "hello world"},
113 
114       // Other whitespace should also be removed from anything base-64 encoded.
115       {"data:;base64,aGVs bG8gd2  \n9ybGQ=", true, "text/plain", "US-ASCII",
116        "hello world"},
117 
118       // In base64 encoding, escaped whitespace should be stripped.
119       // (This test was taken from acid3)
120       // http://b/1054495
121       {"data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
122        "%20",
123        true, "text/javascript", "", "d4 = 'four';"},
124 
125       // All whitespace should be preserved on non-base64 encoded content.
126       {"data:img/png,A  B  %20  %0A  C", true, "img/png", "",
127        KeepWhitespace() ? "A  B     \n  C" : "AB \nC"},
128 
129       {"data:text/plain;charset=utf-8;base64,SGVsbMO2", true, "text/plain",
130        "utf-8", "Hell\xC3\xB6"},
131 
132       // no mimetype
133       {"data:;charset=utf-8;base64,SGVsbMO2", true, "text/plain", "utf-8",
134        "Hell\xC3\xB6"},
135 
136       // Not sufficiently padded.
137       {"data:;base64,aGVsbG8gd29ybGQ", true, "text/plain", "US-ASCII",
138        "hello world"},
139 
140       // Not sufficiently padded with whitespace.
141       {"data:;base64,aGV sbG8g d29ybGQ", true, "text/plain", "US-ASCII",
142        "hello world"},
143 
144       // Not sufficiently padded with escaped whitespace.
145       {"data:;base64,aGV%20sbG8g%20d29ybGQ", true, "text/plain", "US-ASCII",
146        "hello world"},
147 
148       // Bad encoding (truncated).
149       {"data:;base64,aGVsbG8gd29yb", false, "", "", ""},
150 
151       // BiDi control characters should be unescaped and preserved as is, and
152       // should not be replaced with % versions. In the below case, \xE2\x80\x8F
153       // is the RTL mark and the parsed text should preserve it as is.
154       {"data:text/plain;charset=utf-8,\xE2\x80\x8Ftest", true, "text/plain",
155        "utf-8", "\xE2\x80\x8Ftest"},
156 
157       // Same as above but with Arabic text after RTL mark.
158       {"data:text/plain;charset=utf-8,"
159        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
160        true, "text/plain", "utf-8",
161        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
162 
163       // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
164       // wrapped in a GURL, this URL and the next effectively become the same as
165       // the previous two URLs.
166       {"data:text/plain;charset=utf-8,%E2%80%8Ftest", true, "text/plain",
167        "utf-8", "\xE2\x80\x8Ftest"},
168 
169       // Same as above but with Arabic text after RTL mark.
170       {"data:text/plain;charset=utf-8,"
171        "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
172        true, "text/plain", "utf-8",
173        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
174 
175       // The 'data' of a data URI does not include any ref it has.
176       {"data:text/plain,this/is/a/test/%23include/#dontinclude", true,
177        "text/plain", "", "this/is/a/test/#include/"},
178 
179       // More unescaping tests and tests with nulls.
180       {"data:%00text/plain%41,foo", true, "%00text/plain%41", "", "foo"},
181       {"data:text/plain;charset=%00US-ASCII%41,foo", true, "text/plain",
182        "%00US-ASCII%41", "foo"},
183       {"data:text/plain,%00_%41", true, "text/plain", "",
184        std::string("\x00_A", 3)},
185       {"data:text/plain;base64,AA//", true, "text/plain", "",
186        std::string("\x00\x0F\xFF", 3)},
187       // "%62ase64" unescapes to base64, but should not be treated as such.
188       {"data:text/plain;%62ase64,AA//", true, "text/plain", "", "AA//"},
189   };
190 
191   for (const auto& test : tests) {
192     SCOPED_TRACE(test.url);
193 
194     std::string mime_type;
195     std::string charset;
196     std::string data;
197     bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
198     EXPECT_EQ(ok, test.is_valid);
199     EXPECT_EQ(test.mime_type, mime_type);
200     EXPECT_EQ(test.charset, charset);
201     EXPECT_EQ(test.data, data);
202   }
203 }
204 
TEST_P(DataURLTest,BuildResponseSimple)205 TEST_P(DataURLTest, BuildResponseSimple) {
206   std::string mime_type;
207   std::string charset;
208   std::string data;
209   scoped_refptr<HttpResponseHeaders> headers;
210 
211   ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:,Hello"), "GET", &mime_type,
212                                        &charset, &data, &headers));
213 
214   EXPECT_EQ("text/plain", mime_type);
215   EXPECT_EQ("US-ASCII", charset);
216   EXPECT_EQ("Hello", data);
217 
218   ASSERT_TRUE(headers);
219   const HttpVersion& version = headers->GetHttpVersion();
220   EXPECT_EQ(1, version.major_value());
221   EXPECT_EQ(1, version.minor_value());
222   EXPECT_EQ("OK", headers->GetStatusText());
223   EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
224             "text/plain;charset=US-ASCII");
225 }
226 
TEST_P(DataURLTest,BuildResponseHead)227 TEST_P(DataURLTest, BuildResponseHead) {
228   for (const char* method : {"HEAD", "head", "hEaD"}) {
229     SCOPED_TRACE(method);
230 
231     std::string mime_type;
232     std::string charset;
233     std::string data;
234     scoped_refptr<HttpResponseHeaders> headers;
235     ASSERT_EQ(OK,
236               DataURL::BuildResponse(GURL("data:,Hello"), method, &mime_type,
237                                      &charset, &data, &headers));
238 
239     EXPECT_EQ("text/plain", mime_type);
240     EXPECT_EQ("US-ASCII", charset);
241     EXPECT_EQ("", data);
242 
243     ASSERT_TRUE(headers);
244     HttpVersion version = headers->GetHttpVersion();
245     EXPECT_EQ(1, version.major_value());
246     EXPECT_EQ(1, version.minor_value());
247     EXPECT_EQ("OK", headers->GetStatusText());
248     EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
249               "text/plain;charset=US-ASCII");
250   }
251 }
252 
TEST_P(DataURLTest,BuildResponseInput)253 TEST_P(DataURLTest, BuildResponseInput) {
254   std::string mime_type;
255   std::string charset;
256   std::string data;
257   scoped_refptr<HttpResponseHeaders> headers;
258 
259   ASSERT_EQ(ERR_INVALID_URL,
260             DataURL::BuildResponse(GURL("bogus"), "GET", &mime_type, &charset,
261                                    &data, &headers));
262   EXPECT_FALSE(headers);
263   EXPECT_TRUE(mime_type.empty());
264   EXPECT_TRUE(charset.empty());
265   EXPECT_TRUE(data.empty());
266 }
267 
TEST_P(DataURLTest,BuildResponseInvalidMimeType)268 TEST_P(DataURLTest, BuildResponseInvalidMimeType) {
269   std::string mime_type;
270   std::string charset;
271   std::string data;
272   scoped_refptr<HttpResponseHeaders> headers;
273 
274   // MIME type contains delimiters. Must be accepted but Content-Type header
275   // should be generated as if the mediatype was text/plain.
276   ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:f(o/b)r,test"), "GET",
277                                        &mime_type, &charset, &data, &headers));
278 
279   ASSERT_TRUE(headers);
280   EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"),
281             "text/plain;charset=US-ASCII");
282 }
283 
TEST_P(DataURLTest,InvalidCharset)284 TEST_P(DataURLTest, InvalidCharset) {
285   std::string mime_type;
286   std::string charset;
287   std::string data;
288   scoped_refptr<HttpResponseHeaders> headers;
289 
290   // MIME type contains delimiters. Must be rejected.
291   ASSERT_EQ(ERR_INVALID_URL, DataURL::BuildResponse(
292                                  GURL("data:text/html;charset=(),test"), "GET",
293                                  &mime_type, &charset, &data, &headers));
294   EXPECT_FALSE(headers);
295   EXPECT_TRUE(mime_type.empty());
296   EXPECT_TRUE(charset.empty());
297   EXPECT_TRUE(data.empty());
298 }
299 
300 // Test a slightly larger data URL.
TEST_P(DataURLTest,Image)301 TEST_P(DataURLTest, Image) {
302   // Use our nice little Chrome logo.
303   GURL image_url(
304       "data:image/png;base64,"
305       "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
306       "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
307       "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
308       "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
309       "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
310       "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
311       "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
312       "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
313       "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
314       "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
315       "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
316       "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
317       "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
318       "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
319       "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
320       "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
321       "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
322       "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
323       "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");
324 
325   std::string mime_type;
326   std::string charset;
327   std::string data;
328   scoped_refptr<HttpResponseHeaders> headers;
329 
330   EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
331                                        &data, &headers));
332 
333   EXPECT_EQ(911u, data.size());
334   EXPECT_EQ("image/png", mime_type);
335   EXPECT_TRUE(charset.empty());
336 
337   ASSERT_TRUE(headers);
338   std::string value;
339   EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
340   EXPECT_EQ(headers->GetNormalizedHeader("Content-Type"), "image/png");
341 }
342 
343 // Tests the application of the kRemoveWhitespaceForDataURLs command line
344 // switch.
TEST(DataURLRemoveWhitespaceTest,Parse)345 TEST(DataURLRemoveWhitespaceTest, Parse) {
346   base::CommandLine::ForCurrentProcess()->AppendSwitch(
347       kRemoveWhitespaceForDataURLs);
348   const ParseTestData tests[] = {
349       {"data:image/fractal,a b c d e f g", true, "image/fractal", "",
350        "abcdefg"},
351       {"data:img/png,A  B  %20  %0A  C", true, "img/png", "", "AB \nC"},
352   };
353 
354   for (const auto& test : tests) {
355     SCOPED_TRACE(test.url);
356 
357     std::string mime_type;
358     std::string charset;
359     std::string data;
360     bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
361     EXPECT_EQ(ok, test.is_valid);
362     EXPECT_EQ(test.mime_type, mime_type);
363     EXPECT_EQ(test.charset, charset);
364     EXPECT_EQ(test.data, data);
365   }
366 }
367 
368 }  // namespace net
369