1 // Copyright 2011 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_DATA_URL_H_ 6 #define NET_BASE_DATA_URL_H_ 7 8 #include <string> 9 #include <string_view> 10 11 #include "base/memory/scoped_refptr.h" 12 #include "net/base/net_errors.h" 13 #include "net/base/net_export.h" 14 15 class GURL; 16 17 namespace net { 18 19 class HttpResponseHeaders; 20 21 // This command line switch provides a means to disable data URL whitespace 22 // kKeepWhitespaceForDataUrls feature flag. This is set part of an enterprise 23 // policy and is intended as a kill switch. 24 inline constexpr std::string_view kRemoveWhitespaceForDataURLs = 25 "remove-keep-whitespace-for-data-urls"; 26 27 // See RFC 2397 for a complete description of the 'data' URL scheme. 28 // 29 // Briefly, a 'data' URL has the form: 30 // 31 // data:[<mediatype>][;base64],<data> 32 // 33 // The <mediatype> is an Internet media type specification (with optional 34 // parameters.) The appearance of ";base64" means that the data is encoded as 35 // base64. Without ";base64", the data (as a sequence of octets) is represented 36 // using ASCII encoding for octets inside the range of safe URL characters and 37 // using the standard %xx hex encoding of URLs for octets outside that range. 38 // If <mediatype> is omitted, it defaults to text/plain;charset=US-ASCII. As a 39 // shorthand, "text/plain" can be omitted but the charset parameter supplied. 40 // 41 class NET_EXPORT DataURL { 42 public: 43 // This method can be used to parse a 'data' URL into its component pieces. 44 // 45 // |mime_type| and |charset| must be non-null and point to empty strings. 46 // 47 // If |data| is null, then the <data> section will not be parsed or validated. 48 // If non-null, it must point to an empty string. 49 // 50 // The resulting mime_type is normalized to lowercase. The data is the 51 // decoded data (e.g.., if the data URL specifies base64 encoding, then the 52 // returned data is base64 decoded, and any %-escaped bytes are unescaped). 53 // 54 // If the media type value doesn't match the media-type production defined in 55 // RFC 7231, mime_type will be set to the default value "text/plain". We 56 // don't simply fail for this grammar violation since Chromium had been 57 // accepting such invalid values. For example, <img> element with the src 58 // attribute set to a data URL with an invalid media type "image" (without a 59 // slash and subtype) had been displayed. However, the value this method will 60 // store in mime_type argument can be used for generating other headers, etc. 61 // This could lead to security vulnerability. We don't want to accept 62 // arbitrary value and ask each caller to validate the return value. 63 // 64 // If the charset parameter is specified but its value doesn't match the 65 // token production defined in RFC 7230, this method simply fails and returns 66 // false. 67 // 68 // If there's any other grammar violation in the URL, then this method will 69 // return false, and all passed in pointers will be unmodified. On success, 70 // true is returned. 71 [[nodiscard]] static bool Parse(const GURL& url, 72 std::string* mime_type, 73 std::string* charset, 74 std::string* data); 75 76 // Similar to parse, except that it also generates a bogus set of response 77 // headers, with Content-Type populated, and takes a method. Only the "HEAD" 78 // method modifies the response, resulting in a 0-length body. All arguments 79 // except must be non-null. All std::string pointers must point to empty 80 // strings, and |*headers| must be nullptr. Returns net::OK on success. 81 [[nodiscard]] static Error BuildResponse( 82 const GURL& url, 83 std::string_view method, 84 std::string* mime_type, 85 std::string* charset, 86 std::string* data, 87 scoped_refptr<HttpResponseHeaders>* headers); 88 }; 89 90 } // namespace net 91 92 #endif // NET_BASE_DATA_URL_H_ 93