1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/mime_util.h"
6
7 #include <algorithm>
8 #include <iterator>
9 #include <map>
10 #include <optional>
11 #include <string>
12 #include <string_view>
13 #include <unordered_set>
14
15 #include "base/base64.h"
16 #include "base/check_op.h"
17 #include "base/containers/span.h"
18 #include "base/lazy_instance.h"
19 #include "base/memory/raw_ptr_exclusion.h"
20 #include "base/no_destructor.h"
21 #include "base/rand_util.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/strings/string_split.h"
24 #include "base/strings/string_util.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "build/build_config.h"
27 #include "net/base/platform_mime_util.h"
28 #include "net/http/http_util.h"
29
30 using std::string;
31
32 namespace net {
33
34 namespace {
35
36 // Overrides the mime type for "get a mime type" functions below, for test
37 // purposes. (Empty string by default, indicates no override.)
GetOverridingMimeType()38 std::string& GetOverridingMimeType() {
39 static base::NoDestructor<std::string> overriding_mime_type;
40 return *overriding_mime_type;
41 }
42
43 } // namespace
44
45 // Singleton utility class for mime types.
46 class MimeUtil : public PlatformMimeUtil {
47 public:
48 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
49 std::string* mime_type) const;
50
51 bool GetMimeTypeFromFile(const base::FilePath& file_path,
52 std::string* mime_type) const;
53
54 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
55 std::string* mime_type) const;
56
57 bool GetPreferredExtensionForMimeType(
58 std::string_view mime_type,
59 base::FilePath::StringType* extension) const;
60
61 bool MatchesMimeType(std::string_view mime_type_pattern,
62 std::string_view mime_type) const;
63
64 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
65 std::string* top_level_type,
66 std::string* subtype) const;
67
68 bool IsValidTopLevelMimeType(std::string_view type_string) const;
69
70 private:
71 friend struct base::LazyInstanceTraitsBase<MimeUtil>;
72
73 MimeUtil();
74
75 bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
76 bool include_platform_types,
77 std::string* mime_type) const;
78 }; // class MimeUtil
79
80 // This variable is Leaky because we need to access it from WorkerPool threads.
81 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
82 LAZY_INSTANCE_INITIALIZER;
83
84 struct MimeInfo {
85 const std::string_view mime_type;
86
87 // Comma-separated list of possible extensions for the type. The first
88 // extension is considered preferred.
89 const std::string_view extensions;
90 };
91
92 // How to use the MIME maps
93 // ------------------------
94 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
95 //
96 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
97 // kSecondaryMappings.
98 //
99 // kPrimaryMappings:
100 //
101 // Use this for mappings that are critical to the web platform. Mappings you
102 // add to this list take priority over the underlying platform when converting
103 // from file extension -> MIME type. Thus file extensions listed here will
104 // work consistently across platforms.
105 //
106 // kSecondaryMappings:
107 //
108 // Use this for mappings that must exist, but can be overridden by user
109 // preferences.
110 //
111 // The following applies to both lists:
112 //
113 // * The same extension can appear multiple times in the same list under
114 // different MIME types. Extensions that appear earlier take precedence over
115 // those that appear later.
116 //
117 // * A MIME type must not appear more than once in a single list. It is valid
118 // for the same MIME type to appear in kPrimaryMappings and
119 // kSecondaryMappings.
120 //
121 // The MIME maps are used for three types of lookups:
122 //
123 // 1) MIME type -> file extension. Implemented as
124 // GetPreferredExtensionForMimeType().
125 //
126 // Sources are consulted in the following order:
127 //
128 // a) As a special case application/octet-stream is mapped to nothing. Web
129 // sites are supposed to use this MIME type to indicate that the content
130 // is opaque and shouldn't be parsed as any specific type of content. It
131 // doesn't make sense to map this to anything.
132 //
133 // b) The underlying platform. If the operating system has a mapping from
134 // the MIME type to a file extension, then that takes priority. The
135 // platform is assumed to represent the user's preference.
136 //
137 // c) kPrimaryMappings. Order doesn't matter since there should only be at
138 // most one entry per MIME type.
139 //
140 // d) kSecondaryMappings. Again, order doesn't matter.
141 //
142 // 2) File extension -> MIME type. Implemented in GetMimeTypeFromExtension().
143 //
144 // Sources are considered in the following order:
145 //
146 // a) kPrimaryMappings. Order matters here since file extensions can appear
147 // multiple times on these lists. The first mapping in order of
148 // appearance in the list wins.
149 //
150 // b) Underlying platform.
151 //
152 // c) kSecondaryMappings. Again, the order matters.
153 //
154 // 3) File extension -> Well known MIME type. Implemented as
155 // GetWellKnownMimeTypeFromExtension().
156 //
157 // This is similar to 2), with the exception that b) is skipped. I.e. Only
158 // considers the hardcoded mappings in kPrimaryMappings and
159 // kSecondaryMappings.
160
161 // See comments above for details on how this list is used.
162 static const MimeInfo kPrimaryMappings[] = {
163 // Must precede audio/webm .
164 {"video/webm", "webm"},
165
166 // Must precede audio/mp3
167 {"audio/mpeg", "mp3"},
168
169 {"application/wasm", "wasm"},
170 {"application/x-chrome-extension", "crx"},
171 {"application/xhtml+xml", "xhtml,xht,xhtm"},
172 {"audio/flac", "flac"},
173 {"audio/mp3", "mp3"},
174 {"audio/ogg", "ogg,oga,opus"},
175 {"audio/wav", "wav"},
176 {"audio/webm", "webm"},
177 {"audio/x-m4a", "m4a"},
178 {"image/avif", "avif"},
179 {"image/gif", "gif"},
180 {"image/jpeg", "jpeg,jpg"},
181 {"image/png", "png"},
182 {"image/apng", "png,apng"},
183 {"image/svg+xml", "svg,svgz"},
184 {"image/webp", "webp"},
185 {"multipart/related", "mht,mhtml"},
186 {"text/css", "css"},
187 {"text/html", "html,htm,shtml,shtm"},
188 {"text/javascript", "js,mjs"},
189 {"text/xml", "xml"},
190 {"video/mp4", "mp4,m4v"},
191 {"video/ogg", "ogv,ogm"},
192
193 // This is a primary mapping (overrides the platform) rather than secondary
194 // to work around an issue when Excel is installed on Windows. Excel
195 // registers csv as application/vnd.ms-excel instead of text/csv from RFC
196 // 4180. See https://crbug.com/139105.
197 {"text/csv", "csv"},
198 };
199
200 // See comments above for details on how this list is used.
201 static const MimeInfo kSecondaryMappings[] = {
202 // Must precede image/vnd.microsoft.icon .
203 {"image/x-icon", "ico"},
204
205 {"application/epub+zip", "epub"},
206 {"application/font-woff", "woff"},
207 {"application/gzip", "gz,tgz"},
208 {"application/javascript", "js"},
209 {"application/json", "json"}, // Per http://www.ietf.org/rfc/rfc4627.txt.
210 {"application/msword", "doc,dot"},
211 {"application/octet-stream", "bin,exe,com"},
212 {"application/pdf", "pdf"},
213 {"application/pkcs7-mime", "p7m,p7c,p7z"},
214 {"application/pkcs7-signature", "p7s"},
215 {"application/postscript", "ps,eps,ai"},
216 {"application/rdf+xml", "rdf"},
217 {"application/rss+xml", "rss"},
218 {"application/rtf", "rtf"},
219 {"application/vnd.android.package-archive", "apk"},
220 {"application/vnd.mozilla.xul+xml", "xul"},
221 {"application/vnd.ms-excel", "xls"},
222 {"application/vnd.ms-powerpoint", "ppt"},
223 {"application/"
224 "vnd.openxmlformats-officedocument.presentationml.presentation",
225 "pptx"},
226 {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
227 "xlsx"},
228 {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
229 "docx"},
230 {"application/x-gzip", "gz,tgz"},
231 {"application/x-mpegurl", "m3u8"},
232 {"application/x-shockwave-flash", "swf,swl"},
233 {"application/x-tar", "tar"},
234 {"application/x-x509-ca-cert", "cer,crt"},
235 {"application/zip", "zip"},
236 // This is the platform mapping on recent versions of Windows 10.
237 {"audio/webm", "weba"},
238 {"image/bmp", "bmp"},
239 {"image/jpeg", "jfif,pjpeg,pjp"},
240 {"image/tiff", "tiff,tif"},
241 {"image/vnd.microsoft.icon", "ico"},
242 {"image/x-png", "png"},
243 {"image/x-xbitmap", "xbm"},
244 {"message/rfc822", "eml"},
245 {"text/calendar", "ics"},
246 {"text/html", "ehtml"},
247 {"text/plain", "txt,text"},
248 {"text/vtt", "vtt"},
249 {"text/x-sh", "sh"},
250 {"text/xml", "xsl,xbl,xslt"},
251 {"video/mpeg", "mpeg,mpg"},
252 };
253
254 // Finds mime type of |ext| from |mappings|.
255 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)256 static std::optional<std::string_view> FindMimeType(
257 const MimeInfo (&mappings)[num_mappings],
258 const std::string& ext) {
259 for (const auto& mapping : mappings) {
260 for (std::string_view extension :
261 base::SplitStringPiece(mapping.extensions, ",", base::TRIM_WHITESPACE,
262 base::SPLIT_WANT_ALL)) {
263 if (base::EqualsCaseInsensitiveASCII(extension, ext)) {
264 return mapping.mime_type;
265 }
266 }
267 }
268 return std::nullopt;
269 }
270
StringToFilePathStringType(std::string_view string_piece)271 static base::FilePath::StringType StringToFilePathStringType(
272 std::string_view string_piece) {
273 #if BUILDFLAG(IS_WIN)
274 return base::UTF8ToWide(string_piece);
275 #else
276 return std::string(string_piece);
277 #endif
278 }
279
280 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
281 // preferred extension in MimeInfo arrays.
282 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],std::string_view mime_type,base::FilePath::StringType * result)283 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
284 std::string_view mime_type,
285 base::FilePath::StringType* result) {
286 // There is no preferred extension for "application/octet-stream".
287 if (mime_type == "application/octet-stream")
288 return false;
289
290 for (const auto& mapping : mappings) {
291 if (mapping.mime_type == mime_type) {
292 const size_t pos = mapping.extensions.find(',');
293 *result = StringToFilePathStringType(mapping.extensions.substr(0, pos));
294 return true;
295 }
296 }
297 return false;
298 }
299
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const300 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
301 string* result) const {
302 return GetMimeTypeFromExtensionHelper(ext, true, result);
303 }
304
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const305 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
306 const base::FilePath::StringType& ext,
307 string* result) const {
308 return GetMimeTypeFromExtensionHelper(ext, false, result);
309 }
310
GetPreferredExtensionForMimeType(std::string_view mime_type,base::FilePath::StringType * extension) const311 bool MimeUtil::GetPreferredExtensionForMimeType(
312 std::string_view mime_type,
313 base::FilePath::StringType* extension) const {
314 // Search the MIME type in the platform DB first, then in kPrimaryMappings and
315 // kSecondaryMappings.
316 return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
317 FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
318 FindPreferredExtension(kSecondaryMappings, mime_type, extension);
319 }
320
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const321 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
322 string* result) const {
323 base::FilePath::StringType file_name_str = file_path.Extension();
324 if (file_name_str.empty())
325 return false;
326 return GetMimeTypeFromExtension(file_name_str.substr(1), result);
327 }
328
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const329 bool MimeUtil::GetMimeTypeFromExtensionHelper(
330 const base::FilePath::StringType& ext,
331 bool include_platform_types,
332 string* result) const {
333 DCHECK(ext.empty() || ext[0] != '.')
334 << "extension passed in must not include leading dot";
335
336 // Used for tests.
337 if (!GetOverridingMimeType().empty()) {
338 *result = GetOverridingMimeType();
339 return true;
340 }
341
342 // Avoids crash when unable to handle a long file path. See crbug.com/48733.
343 const unsigned kMaxFilePathSize = 65536;
344 if (ext.length() > kMaxFilePathSize)
345 return false;
346
347 // Reject a string which contains null character.
348 base::FilePath::StringType::size_type nul_pos =
349 ext.find(FILE_PATH_LITERAL('\0'));
350 if (nul_pos != base::FilePath::StringType::npos)
351 return false;
352
353 // We implement the same algorithm as Mozilla for mapping a file extension to
354 // a mime type. That is, we first check a hard-coded list (that cannot be
355 // overridden), and then if not found there, we defer to the system registry.
356 // Finally, we scan a secondary hard-coded list to catch types that we can
357 // deduce but that we also want to allow the OS to override.
358
359 base::FilePath path_ext(ext);
360 const string ext_narrow_str = path_ext.AsUTF8Unsafe();
361 std::optional<std::string_view> mime_type =
362 FindMimeType(kPrimaryMappings, ext_narrow_str);
363 if (mime_type) {
364 *result = mime_type.value();
365 return true;
366 }
367
368 if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
369 return true;
370
371 mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
372 if (mime_type) {
373 *result = mime_type.value();
374 return true;
375 }
376
377 return false;
378 }
379
380 MimeUtil::MimeUtil() = default;
381
382 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
383 // must be matched by a parameter in the |mime_type|. If there are no
384 // parameters in the pattern, the match is a success.
385 //
386 // According rfc2045 keys of parameters are case-insensitive, while values may
387 // or may not be case-sensitive, but they are usually case-sensitive. So, this
388 // function matches values in *case-sensitive* manner, however note that this
389 // may produce some false negatives.
MatchesMimeTypeParameters(std::string_view mime_type_pattern,std::string_view mime_type)390 bool MatchesMimeTypeParameters(std::string_view mime_type_pattern,
391 std::string_view mime_type) {
392 typedef std::map<std::string, std::string> StringPairMap;
393
394 const std::string_view::size_type semicolon = mime_type_pattern.find(';');
395 const std::string_view::size_type test_semicolon = mime_type.find(';');
396 if (semicolon != std::string::npos) {
397 if (test_semicolon == std::string::npos)
398 return false;
399
400 base::StringPairs pattern_parameters;
401 base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
402 '=', ';', &pattern_parameters);
403 base::StringPairs test_parameters;
404 base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
405 '=', ';', &test_parameters);
406
407 // Put the parameters to maps with the keys converted to lower case.
408 StringPairMap pattern_parameter_map;
409 for (const auto& pair : pattern_parameters) {
410 pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
411 }
412
413 StringPairMap test_parameter_map;
414 for (const auto& pair : test_parameters) {
415 test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
416 }
417
418 if (pattern_parameter_map.size() > test_parameter_map.size())
419 return false;
420
421 for (const auto& parameter_pair : pattern_parameter_map) {
422 const auto& test_parameter_pair_it =
423 test_parameter_map.find(parameter_pair.first);
424 if (test_parameter_pair_it == test_parameter_map.end())
425 return false;
426 if (parameter_pair.second != test_parameter_pair_it->second)
427 return false;
428 }
429 }
430
431 return true;
432 }
433
434 // This comparison handles absolute maching and also basic
435 // wildcards. The plugin mime types could be:
436 // application/x-foo
437 // application/*
438 // application/*+xml
439 // *
440 // Also tests mime parameters -- all parameters in the pattern must be present
441 // in the tested type for a match to succeed.
MatchesMimeType(std::string_view mime_type_pattern,std::string_view mime_type) const442 bool MimeUtil::MatchesMimeType(std::string_view mime_type_pattern,
443 std::string_view mime_type) const {
444 if (mime_type_pattern.empty())
445 return false;
446
447 std::string_view::size_type semicolon = mime_type_pattern.find(';');
448 const std::string_view base_pattern = mime_type_pattern.substr(0, semicolon);
449 semicolon = mime_type.find(';');
450 const std::string_view base_type = mime_type.substr(0, semicolon);
451
452 if (base_pattern == "*" || base_pattern == "*/*")
453 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
454
455 const std::string_view::size_type star = base_pattern.find('*');
456 if (star == std::string::npos) {
457 if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
458 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
459 else
460 return false;
461 }
462
463 // Test length to prevent overlap between |left| and |right|.
464 if (base_type.length() < base_pattern.length() - 1)
465 return false;
466
467 std::string_view base_pattern_piece(base_pattern);
468 std::string_view left(base_pattern_piece.substr(0, star));
469 std::string_view right(base_pattern_piece.substr(star + 1));
470
471 if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
472 return false;
473
474 if (!right.empty() &&
475 !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
476 return false;
477
478 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
479 }
480
ParseMimeType(std::string_view type_str,std::string * mime_type,base::StringPairs * params)481 bool ParseMimeType(std::string_view type_str,
482 std::string* mime_type,
483 base::StringPairs* params) {
484 // Trim leading and trailing whitespace from type. We include '(' in
485 // the trailing trim set to catch media-type comments, which are not at all
486 // standard, but may occur in rare cases.
487 size_t type_val = type_str.find_first_not_of(HTTP_LWS);
488 type_val = std::min(type_val, type_str.length());
489 size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
490 if (type_end == std::string::npos)
491 type_end = type_str.length();
492
493 // Reject a mime-type if it does not include a slash.
494 size_t slash_pos = type_str.find_first_of('/');
495 if (slash_pos == std::string::npos || slash_pos > type_end)
496 return false;
497 if (mime_type)
498 *mime_type = type_str.substr(type_val, type_end - type_val);
499
500 // Iterate over parameters. Can't split the string around semicolons
501 // preemptively because quoted strings may include semicolons. Mostly matches
502 // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
503 // validate characters are HTTP token code points / HTTP quoted-string token
504 // code points, and ignores spaces after "=" in parameters.
505 if (params)
506 params->clear();
507 std::string::size_type offset = type_str.find_first_of(';', type_end);
508 while (offset < type_str.size()) {
509 DCHECK_EQ(';', type_str[offset]);
510 // Trim off the semicolon.
511 ++offset;
512
513 // Trim off any following spaces.
514 offset = type_str.find_first_not_of(HTTP_LWS, offset);
515 std::string::size_type param_name_start = offset;
516
517 // Extend parameter name until run into a semicolon or equals sign. Per
518 // spec, trailing spaces are not removed.
519 offset = type_str.find_first_of(";=", offset);
520
521 // Nothing more to do if at end of string, or if there's no parameter
522 // value, since names without values aren't allowed.
523 if (offset == std::string::npos || type_str[offset] == ';')
524 continue;
525
526 auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
527 type_str.begin() + offset);
528
529 // Now parse the value.
530 DCHECK_EQ('=', type_str[offset]);
531 // Trim off the '='.
532 offset++;
533
534 // Remove leading spaces. This violates the spec, though it matches
535 // pre-existing behavior.
536 //
537 // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
538 // seems to align more with the spec - not the content-type spec, but the
539 // GET spec's way of getting an encoding, and the spec for handling
540 // boundary values as well.
541 // See https://encoding.spec.whatwg.org/#names-and-labels.
542 offset = type_str.find_first_not_of(HTTP_LWS, offset);
543
544 std::string param_value;
545 if (offset == std::string::npos || type_str[offset] == ';') {
546 // Nothing to do here - an unquoted string of only whitespace should be
547 // skipped.
548 continue;
549 } else if (type_str[offset] != '"') {
550 // If the first character is not a quotation mark, copy data directly.
551 std::string::size_type value_start = offset;
552 offset = type_str.find_first_of(';', offset);
553 std::string::size_type value_end = offset;
554
555 // Remove terminal whitespace. If ran off the end of the string, have to
556 // update |value_end| first.
557 if (value_end == std::string::npos)
558 value_end = type_str.size();
559 while (value_end > value_start &&
560 HttpUtil::IsLWS(type_str[value_end - 1])) {
561 --value_end;
562 }
563
564 param_value = type_str.substr(value_start, value_end - value_start);
565 } else {
566 // Otherwise, append data, with special handling for backslashes, until
567 // a close quote. Do not trim whitespace for quoted-string.
568
569 // Skip open quote.
570 DCHECK_EQ('"', type_str[offset]);
571 ++offset;
572
573 while (offset < type_str.size() && type_str[offset] != '"') {
574 // Skip over backslash and append the next character, when not at
575 // the end of the string. Otherwise, copy the next character (Which may
576 // be a backslash).
577 if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
578 ++offset;
579 }
580 param_value += type_str[offset];
581 ++offset;
582 }
583
584 offset = type_str.find_first_of(';', offset);
585 }
586 if (params)
587 params->emplace_back(param_name, param_value);
588 }
589 return true;
590 }
591
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype) const592 bool MimeUtil::ParseMimeTypeWithoutParameter(std::string_view type_string,
593 std::string* top_level_type,
594 std::string* subtype) const {
595 std::vector<std::string_view> components = base::SplitStringPiece(
596 type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
597 if (components.size() != 2)
598 return false;
599 components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
600 components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
601 if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
602 return false;
603
604 if (top_level_type)
605 top_level_type->assign(std::string(components[0]));
606
607 if (subtype)
608 subtype->assign(std::string(components[1]));
609
610 return true;
611 }
612
613 // See https://www.iana.org/assignments/media-types/media-types.xhtml
614 static const char* const kLegalTopLevelTypes[] = {
615 "application", "audio", "example", "font", "image",
616 "message", "model", "multipart", "text", "video",
617 };
618
IsValidTopLevelMimeType(std::string_view type_string) const619 bool MimeUtil::IsValidTopLevelMimeType(std::string_view type_string) const {
620 std::string lower_type = base::ToLowerASCII(type_string);
621 for (const char* const legal_type : kLegalTopLevelTypes) {
622 if (lower_type.compare(legal_type) == 0) {
623 return true;
624 }
625 }
626
627 return type_string.size() > 2 &&
628 base::StartsWith(type_string, "x-",
629 base::CompareCase::INSENSITIVE_ASCII);
630 }
631
632 //----------------------------------------------------------------------------
633 // Wrappers for the singleton
634 //----------------------------------------------------------------------------
635
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)636 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
637 std::string* mime_type) {
638 return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
639 }
640
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)641 bool GetMimeTypeFromFile(const base::FilePath& file_path,
642 std::string* mime_type) {
643 return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
644 }
645
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)646 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
647 std::string* mime_type) {
648 return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
649 }
650
GetPreferredExtensionForMimeType(std::string_view mime_type,base::FilePath::StringType * extension)651 bool GetPreferredExtensionForMimeType(std::string_view mime_type,
652 base::FilePath::StringType* extension) {
653 return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
654 extension);
655 }
656
MatchesMimeType(std::string_view mime_type_pattern,std::string_view mime_type)657 bool MatchesMimeType(std::string_view mime_type_pattern,
658 std::string_view mime_type) {
659 return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
660 }
661
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype)662 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
663 std::string* top_level_type,
664 std::string* subtype) {
665 return g_mime_util.Get().ParseMimeTypeWithoutParameter(
666 type_string, top_level_type, subtype);
667 }
668
IsValidTopLevelMimeType(std::string_view type_string)669 bool IsValidTopLevelMimeType(std::string_view type_string) {
670 return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
671 }
672
673 namespace {
674
675 // From http://www.w3schools.com/media/media_mimeref.asp and
676 // http://plugindoc.mozdev.org/winmime.php
677 static const char* const kStandardImageTypes[] = {"image/avif",
678 "image/bmp",
679 "image/cis-cod",
680 "image/gif",
681 "image/heic",
682 "image/heif",
683 "image/ief",
684 "image/jpeg",
685 "image/webp",
686 "image/pict",
687 "image/pipeg",
688 "image/png",
689 "image/svg+xml",
690 "image/tiff",
691 "image/vnd.microsoft.icon",
692 "image/x-cmu-raster",
693 "image/x-cmx",
694 "image/x-icon",
695 "image/x-portable-anymap",
696 "image/x-portable-bitmap",
697 "image/x-portable-graymap",
698 "image/x-portable-pixmap",
699 "image/x-rgb",
700 "image/x-xbitmap",
701 "image/x-xpixmap",
702 "image/x-xwindowdump"};
703 static const char* const kStandardAudioTypes[] = {
704 "audio/aac",
705 "audio/aiff",
706 "audio/amr",
707 "audio/basic",
708 "audio/flac",
709 "audio/midi",
710 "audio/mp3",
711 "audio/mp4",
712 "audio/mpeg",
713 "audio/mpeg3",
714 "audio/ogg",
715 "audio/vorbis",
716 "audio/wav",
717 "audio/webm",
718 "audio/x-m4a",
719 "audio/x-ms-wma",
720 "audio/vnd.rn-realaudio",
721 "audio/vnd.wave"
722 };
723 // https://tools.ietf.org/html/rfc8081
724 static const char* const kStandardFontTypes[] = {
725 "font/collection", "font/otf", "font/sfnt",
726 "font/ttf", "font/woff", "font/woff2",
727 };
728 static const char* const kStandardVideoTypes[] = {
729 "video/avi",
730 "video/divx",
731 "video/flc",
732 "video/mp4",
733 "video/mpeg",
734 "video/ogg",
735 "video/quicktime",
736 "video/sd-video",
737 "video/webm",
738 "video/x-dv",
739 "video/x-m4v",
740 "video/x-mpeg",
741 "video/x-ms-asf",
742 "video/x-ms-wmv"
743 };
744
745 struct StandardType {
746 const char* const leading_mime_type;
747 // TODO(367764863) Rewrite to base::raw_span.
748 RAW_PTR_EXCLUSION base::span<const char* const> standard_types;
749 };
750 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
751 {"audio/", kStandardAudioTypes},
752 {"font/", kStandardFontTypes},
753 {"video/", kStandardVideoTypes},
754 {nullptr, {}}};
755
756 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
757 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
758 //
759 // The meaning of |mime_type| depends on the value of |prefix_match|:
760 //
761 // * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
762 // string such as "text/plain".
763 //
764 // * If |prefix_match = true| then |mime_type| is treated as the prefix for a
765 // (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)766 void GetExtensionsFromHardCodedMappings(
767 base::span<const MimeInfo> mappings,
768 const std::string& mime_type,
769 bool prefix_match,
770 std::unordered_set<base::FilePath::StringType>* extensions) {
771 for (const auto& mapping : mappings) {
772 std::string_view cur_mime_type(mapping.mime_type);
773
774 if (base::StartsWith(cur_mime_type, mime_type,
775 base::CompareCase::INSENSITIVE_ASCII) &&
776 (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
777 for (std::string_view this_extension : base::SplitStringPiece(
778 mapping.extensions, ",", base::TRIM_WHITESPACE,
779 base::SPLIT_WANT_ALL)) {
780 extensions->insert(StringToFilePathStringType(this_extension));
781 }
782 }
783 }
784 }
785
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)786 void GetExtensionsHelper(
787 base::span<const char* const> standard_types,
788 const std::string& leading_mime_type,
789 std::unordered_set<base::FilePath::StringType>* extensions) {
790 for (auto* standard_type : standard_types) {
791 g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
792 extensions);
793 }
794
795 // Also look up the extensions from hard-coded mappings in case that some
796 // supported extensions are not registered in the system registry, like ogg.
797 GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
798 extensions);
799
800 GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
801 true, extensions);
802 }
803
804 // Note that the elements in the source set will be appended to the target
805 // vector.
806 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)807 void UnorderedSetToVector(std::unordered_set<T>* source,
808 std::vector<T>* target) {
809 size_t old_target_size = target->size();
810 target->resize(old_target_size + source->size());
811 size_t i = 0;
812 for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
813 (*target)[old_target_size + i] = *iter;
814 }
815
816 // Characters to be used for mime multipart boundary.
817 //
818 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
819 // The RFC 2046 spec says the alphanumeric characters plus the
820 // following characters are legal for boundaries: '()+_,-./:=?
821 // However the following characters, though legal, cause some sites
822 // to fail: (),./:=+
823 constexpr std::string_view kMimeBoundaryCharacters(
824 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
825
826 // Size of mime multipart boundary.
827 const size_t kMimeBoundarySize = 69;
828
829 } // namespace
830
GetExtensionsForMimeType(std::string_view unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)831 void GetExtensionsForMimeType(
832 std::string_view unsafe_mime_type,
833 std::vector<base::FilePath::StringType>* extensions) {
834 if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
835 return;
836
837 const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
838 std::unordered_set<base::FilePath::StringType> unique_extensions;
839
840 if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
841 std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
842
843 // Find the matching StandardType from within kStandardTypes, or fall
844 // through to the last (default) StandardType.
845 const StandardType* type = nullptr;
846 for (const StandardType& standard_type : kStandardTypes) {
847 type = &standard_type;
848 if (type->leading_mime_type &&
849 leading_mime_type == type->leading_mime_type) {
850 break;
851 }
852 }
853 DCHECK(type);
854 GetExtensionsHelper(type->standard_types,
855 leading_mime_type,
856 &unique_extensions);
857 } else {
858 g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
859 &unique_extensions);
860
861 // Also look up the extensions from hard-coded mappings in case that some
862 // supported extensions are not registered in the system registry, like ogg.
863 GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
864 &unique_extensions);
865
866 GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
867 &unique_extensions);
868 }
869
870 UnorderedSetToVector(&unique_extensions, extensions);
871 }
872
GenerateMimeMultipartBoundary()873 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
874 // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
875 // Because encapsulation boundaries must not appear in the body parts being
876 // encapsulated, a user agent must exercise care to choose a unique
877 // boundary. The boundary in the example above could have been the result of
878 // an algorithm designed to produce boundaries with a very low probability
879 // of already existing in the data to be encapsulated without having to
880 // prescan the data.
881 // [...]
882 // the boundary parameter [...] consists of 1 to 70 characters from a set of
883 // characters known to be very robust through email gateways, and NOT ending
884 // with white space.
885 // [...]
886 // boundary := 0*69<bchars> bcharsnospace
887 // bchars := bcharsnospace / " "
888 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
889 // "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
890
891 std::string result;
892 result.reserve(kMimeBoundarySize);
893 result.append("----MultipartBoundary--");
894 while (result.size() < (kMimeBoundarySize - 4)) {
895 char c = kMimeBoundaryCharacters[base::RandInt(
896 0, kMimeBoundaryCharacters.size() - 1)];
897 result.push_back(c);
898 }
899 result.append("----");
900
901 // Not a strict requirement - documentation only.
902 DCHECK_EQ(kMimeBoundarySize, result.size());
903
904 return result;
905 }
906
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)907 void AddMultipartValueForUpload(const std::string& value_name,
908 const std::string& value,
909 const std::string& mime_boundary,
910 const std::string& content_type,
911 std::string* post_data) {
912 DCHECK(post_data);
913 // First line is the boundary.
914 post_data->append("--" + mime_boundary + "\r\n");
915 // Next line is the Content-disposition.
916 post_data->append("Content-Disposition: form-data; name=\"" +
917 value_name + "\"\r\n");
918 if (!content_type.empty()) {
919 // If Content-type is specified, the next line is that.
920 post_data->append("Content-Type: " + content_type + "\r\n");
921 }
922 // Leave an empty line and append the value.
923 post_data->append("\r\n" + value + "\r\n");
924 }
925
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)926 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
927 const std::string& file_name,
928 const std::string& value,
929 const std::string& mime_boundary,
930 const std::string& content_type,
931 std::string* post_data) {
932 DCHECK(post_data);
933 // First line is the boundary.
934 post_data->append("--" + mime_boundary + "\r\n");
935 // Next line is the Content-disposition.
936 post_data->append("Content-Disposition: form-data; name=\"" + value_name +
937 "\"; filename=\"" + file_name + "\"\r\n");
938 if (!content_type.empty()) {
939 // If Content-type is specified, the next line is that.
940 post_data->append("Content-Type: " + content_type + "\r\n");
941 }
942 // Leave an empty line and append the value.
943 post_data->append("\r\n" + value + "\r\n");
944 }
945
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)946 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
947 std::string* post_data) {
948 DCHECK(post_data);
949 post_data->append("--" + mime_boundary + "--\r\n");
950 }
951
952 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
953 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(std::string_view type_string,bool accept_comma_separated)954 std::optional<std::string> ExtractMimeTypeFromMediaType(
955 std::string_view type_string,
956 bool accept_comma_separated) {
957 std::string::size_type end = type_string.find(';');
958 if (accept_comma_separated) {
959 end = std::min(end, type_string.find(','));
960 }
961 std::string top_level_type;
962 std::string subtype;
963 if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
964 &subtype)) {
965 return top_level_type + "/" + subtype;
966 }
967 return std::nullopt;
968 }
969
ScopedOverrideGetMimeTypeForTesting(std::string_view overriding_mime_type)970 ScopedOverrideGetMimeTypeForTesting::ScopedOverrideGetMimeTypeForTesting(
971 std::string_view overriding_mime_type) {
972 GetOverridingMimeType() = overriding_mime_type;
973 }
974
~ScopedOverrideGetMimeTypeForTesting()975 ScopedOverrideGetMimeTypeForTesting::~ScopedOverrideGetMimeTypeForTesting() {
976 GetOverridingMimeType().clear();
977 }
978
979 } // namespace net
980