1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <iterator>
7 #include <map>
8 #include <string>
9 #include <unordered_set>
10
11 #include "base/base64.h"
12 #include "base/check_op.h"
13 #include "base/containers/span.h"
14 #include "base/lazy_instance.h"
15 #include "base/rand_util.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_piece.h"
18 #include "base/strings/string_split.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "build/build_config.h"
22 #include "net/base/mime_util.h"
23 #include "net/base/platform_mime_util.h"
24 #include "net/http/http_util.h"
25
26 using std::string;
27
28 namespace net {
29
30 // Singleton utility class for mime types.
31 class MimeUtil : public PlatformMimeUtil {
32 public:
33 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
34 std::string* mime_type) const;
35
36 bool GetMimeTypeFromFile(const base::FilePath& file_path,
37 std::string* mime_type) const;
38
39 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
40 std::string* mime_type) const;
41
42 bool GetPreferredExtensionForMimeType(
43 const std::string& mime_type,
44 base::FilePath::StringType* extension) const;
45
46 bool MatchesMimeType(const std::string& mime_type_pattern,
47 const std::string& mime_type) const;
48
49 bool ParseMimeTypeWithoutParameter(base::StringPiece type_string,
50 std::string* top_level_type,
51 std::string* subtype) const;
52
53 bool IsValidTopLevelMimeType(const std::string& type_string) const;
54
55 private:
56 friend struct base::LazyInstanceTraitsBase<MimeUtil>;
57
58 MimeUtil();
59
60 bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
61 bool include_platform_types,
62 std::string* mime_type) const;
63 }; // class MimeUtil
64
65 // This variable is Leaky because we need to access it from WorkerPool threads.
66 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
67 LAZY_INSTANCE_INITIALIZER;
68
69 struct MimeInfo {
70 const char* const mime_type;
71
72 // Comma-separated list of possible extensions for the type. The first
73 // extension is considered preferred.
74 const char* const extensions;
75 };
76
77 // How to use the MIME maps
78 // ------------------------
79 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
80 //
81 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
82 // kSecondaryMappings.
83 //
84 // kPrimaryMappings:
85 //
86 // Use this for mappings that are critical to the web platform. Mappings you
87 // add to this list take priority over the underlying platform when converting
88 // from file extension -> MIME type. Thus file extensions listed here will
89 // work consistently across platforms.
90 //
91 // kSecondaryMappings:
92 //
93 // Use this for mappings that must exist, but can be overridden by user
94 // preferences.
95 //
96 // The following applies to both lists:
97 //
98 // * The same extension can appear multiple times in the same list under
99 // different MIME types. Extensions that appear earlier take precedence over
100 // those that appear later.
101 //
102 // * A MIME type must not appear more than once in a single list. It is valid
103 // for the same MIME type to appear in kPrimaryMappings and
104 // kSecondaryMappings.
105 //
106 // The MIME maps are used for three types of lookups:
107 //
108 // 1) MIME type -> file extension. Implemented as
109 // GetPreferredExtensionForMimeType().
110 //
111 // Sources are consulted in the following order:
112 //
113 // a) As a special case application/octet-stream is mapped to nothing. Web
114 // sites are supposed to use this MIME type to indicate that the content
115 // is opaque and shouldn't be parsed as any specific type of content. It
116 // doesn't make sense to map this to anything.
117 //
118 // b) The underlying platform. If the operating system has a mapping from
119 // the MIME type to a file extension, then that takes priority. The
120 // platform is assumed to represent the user's preference.
121 //
122 // c) kPrimaryMappings. Order doesn't matter since there should only be at
123 // most one entry per MIME type.
124 //
125 // d) kSecondaryMappings. Again, order doesn't matter.
126 //
127 // 2) File extension -> MIME type. Implemented in GetMimeTypeFromExtension().
128 //
129 // Sources are considered in the following order:
130 //
131 // a) kPrimaryMappings. Order matters here since file extensions can appear
132 // multiple times on these lists. The first mapping in order of
133 // appearance in the list wins.
134 //
135 // b) Underlying platform.
136 //
137 // c) kSecondaryMappings. Again, the order matters.
138 //
139 // 3) File extension -> Well known MIME type. Implemented as
140 // GetWellKnownMimeTypeFromExtension().
141 //
142 // This is similar to 2), with the exception that b) is skipped. I.e. Only
143 // considers the hardcoded mappings in kPrimaryMappings and
144 // kSecondaryMappings.
145
146 // See comments above for details on how this list is used.
147 static const MimeInfo kPrimaryMappings[] = {
148 // Must precede audio/webm .
149 {"video/webm", "webm"},
150
151 // Must precede audio/mp3
152 {"audio/mpeg", "mp3"},
153
154 {"application/wasm", "wasm"},
155 {"application/x-chrome-extension", "crx"},
156 {"application/xhtml+xml", "xhtml,xht,xhtm"},
157 {"audio/flac", "flac"},
158 {"audio/mp3", "mp3"},
159 {"audio/ogg", "ogg,oga,opus"},
160 {"audio/wav", "wav"},
161 {"audio/webm", "webm"},
162 {"audio/x-m4a", "m4a"},
163 {"image/avif", "avif"},
164 {"image/gif", "gif"},
165 {"image/jpeg", "jpeg,jpg"},
166 {"image/png", "png"},
167 {"image/apng", "png,apng"},
168 {"image/svg+xml", "svg,svgz"},
169 {"image/webp", "webp"},
170 {"multipart/related", "mht,mhtml"},
171 {"text/css", "css"},
172 {"text/html", "html,htm,shtml,shtm"},
173 {"text/javascript", "js,mjs"},
174 {"text/xml", "xml"},
175 {"video/mp4", "mp4,m4v"},
176 {"video/ogg", "ogv,ogm"},
177
178 // This is a primary mapping (overrides the platform) rather than secondary
179 // to work around an issue when Excel is installed on Windows. Excel
180 // registers csv as application/vnd.ms-excel instead of text/csv from RFC
181 // 4180. See https://crbug.com/139105.
182 {"text/csv", "csv"},
183 };
184
185 // See comments above for details on how this list is used.
186 static const MimeInfo kSecondaryMappings[] = {
187 // Must precede image/vnd.microsoft.icon .
188 {"image/x-icon", "ico"},
189
190 {"application/epub+zip", "epub"},
191 {"application/font-woff", "woff"},
192 {"application/gzip", "gz,tgz"},
193 {"application/javascript", "js"},
194 {"application/json", "json"}, // Per http://www.ietf.org/rfc/rfc4627.txt.
195 {"application/msword", "doc,dot"},
196 {"application/octet-stream", "bin,exe,com"},
197 {"application/pdf", "pdf"},
198 {"application/pkcs7-mime", "p7m,p7c,p7z"},
199 {"application/pkcs7-signature", "p7s"},
200 {"application/postscript", "ps,eps,ai"},
201 {"application/rdf+xml", "rdf"},
202 {"application/rss+xml", "rss"},
203 {"application/rtf", "rtf"},
204 {"application/vnd.android.package-archive", "apk"},
205 {"application/vnd.mozilla.xul+xml", "xul"},
206 {"application/vnd.ms-excel", "xls"},
207 {"application/vnd.ms-powerpoint", "ppt"},
208 {"application/"
209 "vnd.openxmlformats-officedocument.presentationml.presentation",
210 "pptx"},
211 {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
212 "xlsx"},
213 {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
214 "docx"},
215 {"application/x-gzip", "gz,tgz"},
216 {"application/x-mpegurl", "m3u8"},
217 {"application/x-shockwave-flash", "swf,swl"},
218 {"application/x-tar", "tar"},
219 {"application/x-x509-ca-cert", "cer,crt"},
220 {"application/zip", "zip"},
221 // This is the platform mapping on recent versions of Windows 10.
222 {"audio/webm", "weba"},
223 {"image/bmp", "bmp"},
224 {"image/jpeg", "jfif,pjpeg,pjp"},
225 {"image/tiff", "tiff,tif"},
226 {"image/vnd.microsoft.icon", "ico"},
227 {"image/x-png", "png"},
228 {"image/x-xbitmap", "xbm"},
229 {"message/rfc822", "eml"},
230 {"text/calendar", "ics"},
231 {"text/html", "ehtml"},
232 {"text/plain", "txt,text"},
233 {"text/x-sh", "sh"},
234 {"text/xml", "xsl,xbl,xslt"},
235 {"video/mpeg", "mpeg,mpg"},
236 };
237
238 // Finds mime type of |ext| from |mappings|.
239 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)240 static const char* FindMimeType(const MimeInfo (&mappings)[num_mappings],
241 const std::string& ext) {
242 for (const auto& mapping : mappings) {
243 const char* extensions = mapping.extensions;
244 for (;;) {
245 size_t end_pos = strcspn(extensions, ",");
246 // The length check is required to prevent the StringPiece below from
247 // including uninitialized memory if ext is longer than extensions.
248 if (end_pos == ext.size() &&
249 base::EqualsCaseInsensitiveASCII(
250 base::StringPiece(extensions, ext.size()), ext)) {
251 return mapping.mime_type;
252 }
253 extensions += end_pos;
254 if (!*extensions)
255 break;
256 extensions += 1; // skip over comma
257 }
258 }
259 return nullptr;
260 }
261
StringToFilePathStringType(base::StringPiece string_piece)262 static base::FilePath::StringType StringToFilePathStringType(
263 base::StringPiece string_piece) {
264 #if BUILDFLAG(IS_WIN)
265 return base::UTF8ToWide(string_piece);
266 #else
267 return std::string(string_piece);
268 #endif
269 }
270
271 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
272 // preferred extension in MimeInfo arrays.
273 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],const std::string & mime_type,base::FilePath::StringType * result)274 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
275 const std::string& mime_type,
276 base::FilePath::StringType* result) {
277 // There is no preferred extension for "application/octet-stream".
278 if (mime_type == "application/octet-stream")
279 return false;
280
281 for (const auto& mapping : mappings) {
282 if (mapping.mime_type == mime_type) {
283 const char* extensions = mapping.extensions;
284 const char* extension_end = strchr(extensions, ',');
285 size_t len =
286 extension_end ? extension_end - extensions : strlen(extensions);
287 *result = StringToFilePathStringType(base::StringPiece(extensions, len));
288 return true;
289 }
290 }
291 return false;
292 }
293
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const294 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
295 string* result) const {
296 return GetMimeTypeFromExtensionHelper(ext, true, result);
297 }
298
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const299 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
300 const base::FilePath::StringType& ext,
301 string* result) const {
302 return GetMimeTypeFromExtensionHelper(ext, false, result);
303 }
304
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension) const305 bool MimeUtil::GetPreferredExtensionForMimeType(
306 const std::string& mime_type,
307 base::FilePath::StringType* extension) const {
308 // Search the MIME type in the platform DB first, then in kPrimaryMappings and
309 // kSecondaryMappings.
310 return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
311 FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
312 FindPreferredExtension(kSecondaryMappings, mime_type, extension);
313 }
314
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const315 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
316 string* result) const {
317 base::FilePath::StringType file_name_str = file_path.Extension();
318 if (file_name_str.empty())
319 return false;
320 return GetMimeTypeFromExtension(file_name_str.substr(1), result);
321 }
322
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const323 bool MimeUtil::GetMimeTypeFromExtensionHelper(
324 const base::FilePath::StringType& ext,
325 bool include_platform_types,
326 string* result) const {
327 DCHECK(ext.empty() || ext[0] != '.')
328 << "extension passed in must not include leading dot";
329
330 // Avoids crash when unable to handle a long file path. See crbug.com/48733.
331 const unsigned kMaxFilePathSize = 65536;
332 if (ext.length() > kMaxFilePathSize)
333 return false;
334
335 // Reject a string which contains null character.
336 base::FilePath::StringType::size_type nul_pos =
337 ext.find(FILE_PATH_LITERAL('\0'));
338 if (nul_pos != base::FilePath::StringType::npos)
339 return false;
340
341 // We implement the same algorithm as Mozilla for mapping a file extension to
342 // a mime type. That is, we first check a hard-coded list (that cannot be
343 // overridden), and then if not found there, we defer to the system registry.
344 // Finally, we scan a secondary hard-coded list to catch types that we can
345 // deduce but that we also want to allow the OS to override.
346
347 base::FilePath path_ext(ext);
348 const string ext_narrow_str = path_ext.AsUTF8Unsafe();
349 const char* mime_type = FindMimeType(kPrimaryMappings, ext_narrow_str);
350 if (mime_type) {
351 *result = mime_type;
352 return true;
353 }
354
355 if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
356 return true;
357
358 mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
359 if (mime_type) {
360 *result = mime_type;
361 return true;
362 }
363
364 return false;
365 }
366
367 MimeUtil::MimeUtil() = default;
368
369 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
370 // must be matched by a parameter in the |mime_type|. If there are no
371 // parameters in the pattern, the match is a success.
372 //
373 // According rfc2045 keys of parameters are case-insensitive, while values may
374 // or may not be case-sensitive, but they are usually case-sensitive. So, this
375 // function matches values in *case-sensitive* manner, however note that this
376 // may produce some false negatives.
MatchesMimeTypeParameters(const std::string & mime_type_pattern,const std::string & mime_type)377 bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
378 const std::string& mime_type) {
379 typedef std::map<std::string, std::string> StringPairMap;
380
381 const std::string::size_type semicolon = mime_type_pattern.find(';');
382 const std::string::size_type test_semicolon = mime_type.find(';');
383 if (semicolon != std::string::npos) {
384 if (test_semicolon == std::string::npos)
385 return false;
386
387 base::StringPairs pattern_parameters;
388 base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
389 '=', ';', &pattern_parameters);
390 base::StringPairs test_parameters;
391 base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
392 '=', ';', &test_parameters);
393
394 // Put the parameters to maps with the keys converted to lower case.
395 StringPairMap pattern_parameter_map;
396 for (const auto& pair : pattern_parameters) {
397 pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
398 }
399
400 StringPairMap test_parameter_map;
401 for (const auto& pair : test_parameters) {
402 test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
403 }
404
405 if (pattern_parameter_map.size() > test_parameter_map.size())
406 return false;
407
408 for (const auto& parameter_pair : pattern_parameter_map) {
409 const auto& test_parameter_pair_it =
410 test_parameter_map.find(parameter_pair.first);
411 if (test_parameter_pair_it == test_parameter_map.end())
412 return false;
413 if (parameter_pair.second != test_parameter_pair_it->second)
414 return false;
415 }
416 }
417
418 return true;
419 }
420
421 // This comparison handles absolute maching and also basic
422 // wildcards. The plugin mime types could be:
423 // application/x-foo
424 // application/*
425 // application/*+xml
426 // *
427 // Also tests mime parameters -- all parameters in the pattern must be present
428 // in the tested type for a match to succeed.
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type) const429 bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
430 const std::string& mime_type) const {
431 if (mime_type_pattern.empty())
432 return false;
433
434 std::string::size_type semicolon = mime_type_pattern.find(';');
435 const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
436 semicolon = mime_type.find(';');
437 const std::string base_type(mime_type.substr(0, semicolon));
438
439 if (base_pattern == "*" || base_pattern == "*/*")
440 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
441
442 const std::string::size_type star = base_pattern.find('*');
443 if (star == std::string::npos) {
444 if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
445 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
446 else
447 return false;
448 }
449
450 // Test length to prevent overlap between |left| and |right|.
451 if (base_type.length() < base_pattern.length() - 1)
452 return false;
453
454 base::StringPiece base_pattern_piece(base_pattern);
455 base::StringPiece left(base_pattern_piece.substr(0, star));
456 base::StringPiece right(base_pattern_piece.substr(star + 1));
457
458 if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
459 return false;
460
461 if (!right.empty() &&
462 !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
463 return false;
464
465 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
466 }
467
ParseMimeType(const std::string & type_str,std::string * mime_type,base::StringPairs * params)468 bool ParseMimeType(const std::string& type_str,
469 std::string* mime_type,
470 base::StringPairs* params) {
471 // Trim leading and trailing whitespace from type. We include '(' in
472 // the trailing trim set to catch media-type comments, which are not at all
473 // standard, but may occur in rare cases.
474 size_t type_val = type_str.find_first_not_of(HTTP_LWS);
475 type_val = std::min(type_val, type_str.length());
476 size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
477 if (type_end == std::string::npos)
478 type_end = type_str.length();
479
480 // Reject a mime-type if it does not include a slash.
481 size_t slash_pos = type_str.find_first_of('/');
482 if (slash_pos == std::string::npos || slash_pos > type_end)
483 return false;
484 if (mime_type)
485 *mime_type = type_str.substr(type_val, type_end - type_val);
486
487 // Iterate over parameters. Can't split the string around semicolons
488 // preemptively because quoted strings may include semicolons. Mostly matches
489 // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
490 // validate characters are HTTP token code points / HTTP quoted-string token
491 // code points, and ignores spaces after "=" in parameters.
492 if (params)
493 params->clear();
494 std::string::size_type offset = type_str.find_first_of(';', type_end);
495 while (offset < type_str.size()) {
496 DCHECK_EQ(';', type_str[offset]);
497 // Trim off the semicolon.
498 ++offset;
499
500 // Trim off any following spaces.
501 offset = type_str.find_first_not_of(HTTP_LWS, offset);
502 std::string::size_type param_name_start = offset;
503
504 // Extend parameter name until run into a semicolon or equals sign. Per
505 // spec, trailing spaces are not removed.
506 offset = type_str.find_first_of(";=", offset);
507
508 // Nothing more to do if at end of string, or if there's no parameter
509 // value, since names without values aren't allowed.
510 if (offset == std::string::npos || type_str[offset] == ';')
511 continue;
512
513 auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
514 type_str.begin() + offset);
515
516 // Now parse the value.
517 DCHECK_EQ('=', type_str[offset]);
518 // Trim off the '='.
519 offset++;
520
521 // Remove leading spaces. This violates the spec, though it matches
522 // pre-existing behavior.
523 //
524 // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
525 // seems to align more with the spec - not the content-type spec, but the
526 // GET spec's way of getting an encoding, and the spec for handling
527 // boundary values as well.
528 // See https://encoding.spec.whatwg.org/#names-and-labels.
529 offset = type_str.find_first_not_of(HTTP_LWS, offset);
530
531 std::string param_value;
532 if (offset == std::string::npos || type_str[offset] == ';') {
533 // Nothing to do here - an unquoted string of only whitespace should be
534 // skipped.
535 continue;
536 } else if (type_str[offset] != '"') {
537 // If the first character is not a quotation mark, copy data directly.
538 std::string::size_type value_start = offset;
539 offset = type_str.find_first_of(';', offset);
540 std::string::size_type value_end = offset;
541
542 // Remove terminal whitespace. If ran off the end of the string, have to
543 // update |value_end| first.
544 if (value_end == std::string::npos)
545 value_end = type_str.size();
546 while (value_end > value_start &&
547 HttpUtil::IsLWS(type_str[value_end - 1])) {
548 --value_end;
549 }
550
551 param_value = type_str.substr(value_start, value_end - value_start);
552 } else {
553 // Otherwise, append data, with special handling for backslashes, until
554 // a close quote. Do not trim whitespace for quoted-string.
555
556 // Skip open quote.
557 DCHECK_EQ('"', type_str[offset]);
558 ++offset;
559
560 while (offset < type_str.size() && type_str[offset] != '"') {
561 // Skip over backslash and append the next character, when not at
562 // the end of the string. Otherwise, copy the next character (Which may
563 // be a backslash).
564 if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
565 ++offset;
566 }
567 param_value += type_str[offset];
568 ++offset;
569 }
570
571 offset = type_str.find_first_of(';', offset);
572 }
573 if (params)
574 params->emplace_back(param_name, param_value);
575 }
576 return true;
577 }
578
ParseMimeTypeWithoutParameter(base::StringPiece type_string,std::string * top_level_type,std::string * subtype) const579 bool MimeUtil::ParseMimeTypeWithoutParameter(base::StringPiece type_string,
580 std::string* top_level_type,
581 std::string* subtype) const {
582 std::vector<base::StringPiece> components = base::SplitStringPiece(
583 type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
584 if (components.size() != 2)
585 return false;
586 components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
587 components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
588 if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
589 return false;
590
591 if (top_level_type)
592 top_level_type->assign(std::string(components[0]));
593
594 if (subtype)
595 subtype->assign(std::string(components[1]));
596
597 return true;
598 }
599
600 // See https://www.iana.org/assignments/media-types/media-types.xhtml
601 static const char* const kLegalTopLevelTypes[] = {
602 "application", "audio", "example", "font", "image",
603 "message", "model", "multipart", "text", "video",
604 };
605
IsValidTopLevelMimeType(const std::string & type_string) const606 bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
607 std::string lower_type = base::ToLowerASCII(type_string);
608 for (const char* const legal_type : kLegalTopLevelTypes) {
609 if (lower_type.compare(legal_type) == 0)
610 return true;
611 }
612
613 return type_string.size() > 2 &&
614 base::StartsWith(type_string, "x-",
615 base::CompareCase::INSENSITIVE_ASCII);
616 }
617
618 //----------------------------------------------------------------------------
619 // Wrappers for the singleton
620 //----------------------------------------------------------------------------
621
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)622 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
623 std::string* mime_type) {
624 return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
625 }
626
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)627 bool GetMimeTypeFromFile(const base::FilePath& file_path,
628 std::string* mime_type) {
629 return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
630 }
631
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)632 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
633 std::string* mime_type) {
634 return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
635 }
636
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension)637 bool GetPreferredExtensionForMimeType(const std::string& mime_type,
638 base::FilePath::StringType* extension) {
639 return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
640 extension);
641 }
642
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type)643 bool MatchesMimeType(const std::string& mime_type_pattern,
644 const std::string& mime_type) {
645 return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
646 }
647
ParseMimeTypeWithoutParameter(base::StringPiece type_string,std::string * top_level_type,std::string * subtype)648 bool ParseMimeTypeWithoutParameter(base::StringPiece type_string,
649 std::string* top_level_type,
650 std::string* subtype) {
651 return g_mime_util.Get().ParseMimeTypeWithoutParameter(
652 type_string, top_level_type, subtype);
653 }
654
IsValidTopLevelMimeType(const std::string & type_string)655 bool IsValidTopLevelMimeType(const std::string& type_string) {
656 return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
657 }
658
659 namespace {
660
661 // From http://www.w3schools.com/media/media_mimeref.asp and
662 // http://plugindoc.mozdev.org/winmime.php
663 static const char* const kStandardImageTypes[] = {"image/avif",
664 "image/bmp",
665 "image/cis-cod",
666 "image/gif",
667 "image/ief",
668 "image/jpeg",
669 "image/webp",
670 "image/pict",
671 "image/pipeg",
672 "image/png",
673 "image/svg+xml",
674 "image/tiff",
675 "image/vnd.microsoft.icon",
676 "image/x-cmu-raster",
677 "image/x-cmx",
678 "image/x-icon",
679 "image/x-portable-anymap",
680 "image/x-portable-bitmap",
681 "image/x-portable-graymap",
682 "image/x-portable-pixmap",
683 "image/x-rgb",
684 "image/x-xbitmap",
685 "image/x-xpixmap",
686 "image/x-xwindowdump"};
687 static const char* const kStandardAudioTypes[] = {
688 "audio/aac",
689 "audio/aiff",
690 "audio/amr",
691 "audio/basic",
692 "audio/flac",
693 "audio/midi",
694 "audio/mp3",
695 "audio/mp4",
696 "audio/mpeg",
697 "audio/mpeg3",
698 "audio/ogg",
699 "audio/vorbis",
700 "audio/wav",
701 "audio/webm",
702 "audio/x-m4a",
703 "audio/x-ms-wma",
704 "audio/vnd.rn-realaudio",
705 "audio/vnd.wave"
706 };
707 // https://tools.ietf.org/html/rfc8081
708 static const char* const kStandardFontTypes[] = {
709 "font/collection", "font/otf", "font/sfnt",
710 "font/ttf", "font/woff", "font/woff2",
711 };
712 static const char* const kStandardVideoTypes[] = {
713 "video/avi",
714 "video/divx",
715 "video/flc",
716 "video/mp4",
717 "video/mpeg",
718 "video/ogg",
719 "video/quicktime",
720 "video/sd-video",
721 "video/webm",
722 "video/x-dv",
723 "video/x-m4v",
724 "video/x-mpeg",
725 "video/x-ms-asf",
726 "video/x-ms-wmv"
727 };
728
729 struct StandardType {
730 const char* const leading_mime_type;
731 base::span<const char* const> standard_types;
732 };
733 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
734 {"audio/", kStandardAudioTypes},
735 {"font/", kStandardFontTypes},
736 {"video/", kStandardVideoTypes},
737 {nullptr, {}}};
738
739 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
740 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
741 //
742 // The meaning of |mime_type| depends on the value of |prefix_match|:
743 //
744 // * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
745 // string such as "text/plain".
746 //
747 // * If |prefix_match = true| then |mime_type| is treated as the prefix for a
748 // (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)749 void GetExtensionsFromHardCodedMappings(
750 base::span<const MimeInfo> mappings,
751 const std::string& mime_type,
752 bool prefix_match,
753 std::unordered_set<base::FilePath::StringType>* extensions) {
754 for (const auto& mapping : mappings) {
755 base::StringPiece cur_mime_type(mapping.mime_type);
756
757 if (base::StartsWith(cur_mime_type, mime_type,
758 base::CompareCase::INSENSITIVE_ASCII) &&
759 (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
760 for (base::StringPiece this_extension : base::SplitStringPiece(
761 mapping.extensions, ",", base::TRIM_WHITESPACE,
762 base::SPLIT_WANT_ALL)) {
763 extensions->insert(StringToFilePathStringType(this_extension));
764 }
765 }
766 }
767 }
768
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)769 void GetExtensionsHelper(
770 base::span<const char* const> standard_types,
771 const std::string& leading_mime_type,
772 std::unordered_set<base::FilePath::StringType>* extensions) {
773 for (auto* standard_type : standard_types) {
774 g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
775 extensions);
776 }
777
778 // Also look up the extensions from hard-coded mappings in case that some
779 // supported extensions are not registered in the system registry, like ogg.
780 GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
781 extensions);
782
783 GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
784 true, extensions);
785 }
786
787 // Note that the elements in the source set will be appended to the target
788 // vector.
789 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)790 void UnorderedSetToVector(std::unordered_set<T>* source,
791 std::vector<T>* target) {
792 size_t old_target_size = target->size();
793 target->resize(old_target_size + source->size());
794 size_t i = 0;
795 for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
796 (*target)[old_target_size + i] = *iter;
797 }
798
799 // Characters to be used for mime multipart boundary.
800 //
801 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
802 // The RFC 2046 spec says the alphanumeric characters plus the
803 // following characters are legal for boundaries: '()+_,-./:=?
804 // However the following characters, though legal, cause some sites
805 // to fail: (),./:=+
806 constexpr base::StringPiece kMimeBoundaryCharacters(
807 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
808
809 // Size of mime multipart boundary.
810 const size_t kMimeBoundarySize = 69;
811
812 } // namespace
813
GetExtensionsForMimeType(const std::string & unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)814 void GetExtensionsForMimeType(
815 const std::string& unsafe_mime_type,
816 std::vector<base::FilePath::StringType>* extensions) {
817 if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
818 return;
819
820 const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
821 std::unordered_set<base::FilePath::StringType> unique_extensions;
822
823 if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
824 std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
825
826 // Find the matching StandardType from within kStandardTypes, or fall
827 // through to the last (default) StandardType.
828 const StandardType* type = nullptr;
829 for (const StandardType& standard_type : kStandardTypes) {
830 type = &standard_type;
831 if (type->leading_mime_type &&
832 leading_mime_type == type->leading_mime_type) {
833 break;
834 }
835 }
836 DCHECK(type);
837 GetExtensionsHelper(type->standard_types,
838 leading_mime_type,
839 &unique_extensions);
840 } else {
841 g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
842 &unique_extensions);
843
844 // Also look up the extensions from hard-coded mappings in case that some
845 // supported extensions are not registered in the system registry, like ogg.
846 GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
847 &unique_extensions);
848
849 GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
850 &unique_extensions);
851 }
852
853 UnorderedSetToVector(&unique_extensions, extensions);
854 }
855
GenerateMimeMultipartBoundary()856 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
857 // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
858 // Because encapsulation boundaries must not appear in the body parts being
859 // encapsulated, a user agent must exercise care to choose a unique
860 // boundary. The boundary in the example above could have been the result of
861 // an algorithm designed to produce boundaries with a very low probability
862 // of already existing in the data to be encapsulated without having to
863 // prescan the data.
864 // [...]
865 // the boundary parameter [...] consists of 1 to 70 characters from a set of
866 // characters known to be very robust through email gateways, and NOT ending
867 // with white space.
868 // [...]
869 // boundary := 0*69<bchars> bcharsnospace
870 // bchars := bcharsnospace / " "
871 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
872 // "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
873
874 std::string result;
875 result.reserve(kMimeBoundarySize);
876 result.append("----MultipartBoundary--");
877 while (result.size() < (kMimeBoundarySize - 4)) {
878 char c = kMimeBoundaryCharacters[base::RandInt(
879 0, kMimeBoundaryCharacters.size() - 1)];
880 result.push_back(c);
881 }
882 result.append("----");
883
884 // Not a strict requirement - documentation only.
885 DCHECK_EQ(kMimeBoundarySize, result.size());
886
887 return result;
888 }
889
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)890 void AddMultipartValueForUpload(const std::string& value_name,
891 const std::string& value,
892 const std::string& mime_boundary,
893 const std::string& content_type,
894 std::string* post_data) {
895 DCHECK(post_data);
896 // First line is the boundary.
897 post_data->append("--" + mime_boundary + "\r\n");
898 // Next line is the Content-disposition.
899 post_data->append("Content-Disposition: form-data; name=\"" +
900 value_name + "\"\r\n");
901 if (!content_type.empty()) {
902 // If Content-type is specified, the next line is that.
903 post_data->append("Content-Type: " + content_type + "\r\n");
904 }
905 // Leave an empty line and append the value.
906 post_data->append("\r\n" + value + "\r\n");
907 }
908
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)909 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
910 const std::string& file_name,
911 const std::string& value,
912 const std::string& mime_boundary,
913 const std::string& content_type,
914 std::string* post_data) {
915 DCHECK(post_data);
916 // First line is the boundary.
917 post_data->append("--" + mime_boundary + "\r\n");
918 // Next line is the Content-disposition.
919 post_data->append("Content-Disposition: form-data; name=\"" + value_name +
920 "\"; filename=\"" + file_name + "\"\r\n");
921 if (!content_type.empty()) {
922 // If Content-type is specified, the next line is that.
923 post_data->append("Content-Type: " + content_type + "\r\n");
924 }
925 // Leave an empty line and append the value.
926 post_data->append("\r\n" + value + "\r\n");
927 }
928
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)929 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
930 std::string* post_data) {
931 DCHECK(post_data);
932 post_data->append("--" + mime_boundary + "--\r\n");
933 }
934
935 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
936 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(const std::string & type_string,bool accept_comma_separated)937 absl::optional<std::string> ExtractMimeTypeFromMediaType(
938 const std::string& type_string,
939 bool accept_comma_separated) {
940 std::string::size_type end = type_string.find(';');
941 if (accept_comma_separated) {
942 end = std::min(end, type_string.find(','));
943 }
944 std::string top_level_type;
945 std::string subtype;
946 if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
947 &subtype)) {
948 return top_level_type + "/" + subtype;
949 }
950 return absl::nullopt;
951 }
952
953 } // namespace net
954