• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/mime_util.h"
6 
7 #include <algorithm>
8 #include <iterator>
9 #include <map>
10 #include <optional>
11 #include <string>
12 #include <string_view>
13 #include <unordered_set>
14 
15 #include "base/base64.h"
16 #include "base/check_op.h"
17 #include "base/containers/span.h"
18 #include "base/lazy_instance.h"
19 #include "base/memory/raw_ptr_exclusion.h"
20 #include "base/no_destructor.h"
21 #include "base/rand_util.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/strings/string_split.h"
24 #include "base/strings/string_util.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "build/build_config.h"
27 #include "net/base/platform_mime_util.h"
28 #include "net/http/http_util.h"
29 
30 using std::string;
31 
32 namespace net {
33 
34 namespace {
35 
36 // Overrides the mime type for "get a mime type" functions below, for test
37 // purposes. (Empty string by default, indicates no override.)
GetOverridingMimeType()38 std::string& GetOverridingMimeType() {
39   static base::NoDestructor<std::string> overriding_mime_type;
40   return *overriding_mime_type;
41 }
42 
43 }  // namespace
44 
45 // Singleton utility class for mime types.
46 class MimeUtil : public PlatformMimeUtil {
47  public:
48   bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
49                                 std::string* mime_type) const;
50 
51   bool GetMimeTypeFromFile(const base::FilePath& file_path,
52                            std::string* mime_type) const;
53 
54   bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
55                                          std::string* mime_type) const;
56 
57   bool GetPreferredExtensionForMimeType(
58       std::string_view mime_type,
59       base::FilePath::StringType* extension) const;
60 
61   bool MatchesMimeType(std::string_view mime_type_pattern,
62                        std::string_view mime_type) const;
63 
64   bool ParseMimeTypeWithoutParameter(std::string_view type_string,
65                                      std::string* top_level_type,
66                                      std::string* subtype) const;
67 
68   bool IsValidTopLevelMimeType(std::string_view type_string) const;
69 
70  private:
71   friend struct base::LazyInstanceTraitsBase<MimeUtil>;
72 
73   MimeUtil();
74 
75   bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
76                                       bool include_platform_types,
77                                       std::string* mime_type) const;
78 };  // class MimeUtil
79 
80 // This variable is Leaky because we need to access it from WorkerPool threads.
81 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
82     LAZY_INSTANCE_INITIALIZER;
83 
84 struct MimeInfo {
85   const std::string_view mime_type;
86 
87   // Comma-separated list of possible extensions for the type. The first
88   // extension is considered preferred.
89   const std::string_view extensions;
90 };
91 
92 // How to use the MIME maps
93 // ------------------------
94 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
95 //
96 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
97 // kSecondaryMappings.
98 //
99 // kPrimaryMappings:
100 //
101 //   Use this for mappings that are critical to the web platform.  Mappings you
102 //   add to this list take priority over the underlying platform when converting
103 //   from file extension -> MIME type.  Thus file extensions listed here will
104 //   work consistently across platforms.
105 //
106 // kSecondaryMappings:
107 //
108 //   Use this for mappings that must exist, but can be overridden by user
109 //   preferences.
110 //
111 // The following applies to both lists:
112 //
113 // * The same extension can appear multiple times in the same list under
114 //   different MIME types.  Extensions that appear earlier take precedence over
115 //   those that appear later.
116 //
117 // * A MIME type must not appear more than once in a single list.  It is valid
118 //   for the same MIME type to appear in kPrimaryMappings and
119 //   kSecondaryMappings.
120 //
121 // The MIME maps are used for three types of lookups:
122 //
123 // 1) MIME type -> file extension.  Implemented as
124 //    GetPreferredExtensionForMimeType().
125 //
126 //    Sources are consulted in the following order:
127 //
128 //    a) As a special case application/octet-stream is mapped to nothing.  Web
129 //       sites are supposed to use this MIME type to indicate that the content
130 //       is opaque and shouldn't be parsed as any specific type of content.  It
131 //       doesn't make sense to map this to anything.
132 //
133 //    b) The underlying platform.  If the operating system has a mapping from
134 //       the MIME type to a file extension, then that takes priority.  The
135 //       platform is assumed to represent the user's preference.
136 //
137 //    c) kPrimaryMappings.  Order doesn't matter since there should only be at
138 //       most one entry per MIME type.
139 //
140 //    d) kSecondaryMappings.  Again, order doesn't matter.
141 //
142 // 2) File extension -> MIME type.  Implemented in GetMimeTypeFromExtension().
143 //
144 //    Sources are considered in the following order:
145 //
146 //    a) kPrimaryMappings.  Order matters here since file extensions can appear
147 //       multiple times on these lists.  The first mapping in order of
148 //       appearance in the list wins.
149 //
150 //    b) Underlying platform.
151 //
152 //    c) kSecondaryMappings.  Again, the order matters.
153 //
154 // 3) File extension -> Well known MIME type.  Implemented as
155 //    GetWellKnownMimeTypeFromExtension().
156 //
157 //    This is similar to 2), with the exception that b) is skipped.  I.e.  Only
158 //    considers the hardcoded mappings in kPrimaryMappings and
159 //    kSecondaryMappings.
160 
161 // See comments above for details on how this list is used.
162 static const MimeInfo kPrimaryMappings[] = {
163     // Must precede audio/webm .
164     {"video/webm", "webm"},
165 
166     // Must precede audio/mp3
167     {"audio/mpeg", "mp3"},
168 
169     {"application/wasm", "wasm"},
170     {"application/x-chrome-extension", "crx"},
171     {"application/xhtml+xml", "xhtml,xht,xhtm"},
172     {"audio/flac", "flac"},
173     {"audio/mp3", "mp3"},
174     {"audio/ogg", "ogg,oga,opus"},
175     {"audio/wav", "wav"},
176     {"audio/webm", "webm"},
177     {"audio/x-m4a", "m4a"},
178     {"image/avif", "avif"},
179     {"image/gif", "gif"},
180     {"image/jpeg", "jpeg,jpg"},
181     {"image/png", "png"},
182     {"image/apng", "png,apng"},
183     {"image/svg+xml", "svg,svgz"},
184     {"image/webp", "webp"},
185     {"multipart/related", "mht,mhtml"},
186     {"text/css", "css"},
187     {"text/html", "html,htm,shtml,shtm"},
188     {"text/javascript", "js,mjs"},
189     {"text/xml", "xml"},
190     {"video/mp4", "mp4,m4v"},
191     {"video/ogg", "ogv,ogm"},
192 
193     // This is a primary mapping (overrides the platform) rather than secondary
194     // to work around an issue when Excel is installed on Windows. Excel
195     // registers csv as application/vnd.ms-excel instead of text/csv from RFC
196     // 4180. See https://crbug.com/139105.
197     {"text/csv", "csv"},
198 };
199 
200 // See comments above for details on how this list is used.
201 static const MimeInfo kSecondaryMappings[] = {
202     // Must precede image/vnd.microsoft.icon .
203     {"image/x-icon", "ico"},
204 
205     {"application/epub+zip", "epub"},
206     {"application/font-woff", "woff"},
207     {"application/gzip", "gz,tgz"},
208     {"application/javascript", "js"},
209     {"application/json", "json"},  // Per http://www.ietf.org/rfc/rfc4627.txt.
210     {"application/msword", "doc,dot"},
211     {"application/octet-stream", "bin,exe,com"},
212     {"application/pdf", "pdf"},
213     {"application/pkcs7-mime", "p7m,p7c,p7z"},
214     {"application/pkcs7-signature", "p7s"},
215     {"application/postscript", "ps,eps,ai"},
216     {"application/rdf+xml", "rdf"},
217     {"application/rss+xml", "rss"},
218     {"application/rtf", "rtf"},
219     {"application/vnd.android.package-archive", "apk"},
220     {"application/vnd.mozilla.xul+xml", "xul"},
221     {"application/vnd.ms-excel", "xls"},
222     {"application/vnd.ms-powerpoint", "ppt"},
223     {"application/"
224      "vnd.openxmlformats-officedocument.presentationml.presentation",
225      "pptx"},
226     {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
227      "xlsx"},
228     {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
229      "docx"},
230     {"application/x-gzip", "gz,tgz"},
231     {"application/x-mpegurl", "m3u8"},
232     {"application/x-shockwave-flash", "swf,swl"},
233     {"application/x-tar", "tar"},
234     {"application/x-x509-ca-cert", "cer,crt"},
235     {"application/zip", "zip"},
236     // This is the platform mapping on recent versions of Windows 10.
237     {"audio/webm", "weba"},
238     {"image/bmp", "bmp"},
239     {"image/jpeg", "jfif,pjpeg,pjp"},
240     {"image/tiff", "tiff,tif"},
241     {"image/vnd.microsoft.icon", "ico"},
242     {"image/x-png", "png"},
243     {"image/x-xbitmap", "xbm"},
244     {"message/rfc822", "eml"},
245     {"text/calendar", "ics"},
246     {"text/html", "ehtml"},
247     {"text/plain", "txt,text"},
248     {"text/vtt", "vtt"},
249     {"text/x-sh", "sh"},
250     {"text/xml", "xsl,xbl,xslt"},
251     {"video/mpeg", "mpeg,mpg"},
252 };
253 
254 // Finds mime type of |ext| from |mappings|.
255 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)256 static std::optional<std::string_view> FindMimeType(
257     const MimeInfo (&mappings)[num_mappings],
258     const std::string& ext) {
259   for (const auto& mapping : mappings) {
260     for (std::string_view extension :
261          base::SplitStringPiece(mapping.extensions, ",", base::TRIM_WHITESPACE,
262                                 base::SPLIT_WANT_ALL)) {
263       if (base::EqualsCaseInsensitiveASCII(extension, ext)) {
264         return mapping.mime_type;
265       }
266     }
267   }
268   return std::nullopt;
269 }
270 
StringToFilePathStringType(std::string_view string_piece)271 static base::FilePath::StringType StringToFilePathStringType(
272     std::string_view string_piece) {
273 #if BUILDFLAG(IS_WIN)
274   return base::UTF8ToWide(string_piece);
275 #else
276   return std::string(string_piece);
277 #endif
278 }
279 
280 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
281 // preferred extension in MimeInfo arrays.
282 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],std::string_view mime_type,base::FilePath::StringType * result)283 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
284                                    std::string_view mime_type,
285                                    base::FilePath::StringType* result) {
286   // There is no preferred extension for "application/octet-stream".
287   if (mime_type == "application/octet-stream")
288     return false;
289 
290   for (const auto& mapping : mappings) {
291     if (mapping.mime_type == mime_type) {
292       const size_t pos = mapping.extensions.find(',');
293       *result = StringToFilePathStringType(mapping.extensions.substr(0, pos));
294       return true;
295     }
296   }
297   return false;
298 }
299 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const300 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
301                                         string* result) const {
302   return GetMimeTypeFromExtensionHelper(ext, true, result);
303 }
304 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const305 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
306     const base::FilePath::StringType& ext,
307     string* result) const {
308   return GetMimeTypeFromExtensionHelper(ext, false, result);
309 }
310 
GetPreferredExtensionForMimeType(std::string_view mime_type,base::FilePath::StringType * extension) const311 bool MimeUtil::GetPreferredExtensionForMimeType(
312     std::string_view mime_type,
313     base::FilePath::StringType* extension) const {
314   // Search the MIME type in the platform DB first, then in kPrimaryMappings and
315   // kSecondaryMappings.
316   return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
317          FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
318          FindPreferredExtension(kSecondaryMappings, mime_type, extension);
319 }
320 
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const321 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
322                                    string* result) const {
323   base::FilePath::StringType file_name_str = file_path.Extension();
324   if (file_name_str.empty())
325     return false;
326   return GetMimeTypeFromExtension(file_name_str.substr(1), result);
327 }
328 
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const329 bool MimeUtil::GetMimeTypeFromExtensionHelper(
330     const base::FilePath::StringType& ext,
331     bool include_platform_types,
332     string* result) const {
333   DCHECK(ext.empty() || ext[0] != '.')
334       << "extension passed in must not include leading dot";
335 
336   // Used for tests.
337   if (!GetOverridingMimeType().empty()) {
338     *result = GetOverridingMimeType();
339     return true;
340   }
341 
342   // Avoids crash when unable to handle a long file path. See crbug.com/48733.
343   const unsigned kMaxFilePathSize = 65536;
344   if (ext.length() > kMaxFilePathSize)
345     return false;
346 
347   // Reject a string which contains null character.
348   base::FilePath::StringType::size_type nul_pos =
349       ext.find(FILE_PATH_LITERAL('\0'));
350   if (nul_pos != base::FilePath::StringType::npos)
351     return false;
352 
353   // We implement the same algorithm as Mozilla for mapping a file extension to
354   // a mime type.  That is, we first check a hard-coded list (that cannot be
355   // overridden), and then if not found there, we defer to the system registry.
356   // Finally, we scan a secondary hard-coded list to catch types that we can
357   // deduce but that we also want to allow the OS to override.
358 
359   base::FilePath path_ext(ext);
360   const string ext_narrow_str = path_ext.AsUTF8Unsafe();
361   std::optional<std::string_view> mime_type =
362       FindMimeType(kPrimaryMappings, ext_narrow_str);
363   if (mime_type) {
364     *result = mime_type.value();
365     return true;
366   }
367 
368   if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
369     return true;
370 
371   mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
372   if (mime_type) {
373     *result = mime_type.value();
374     return true;
375   }
376 
377   return false;
378 }
379 
380 MimeUtil::MimeUtil() = default;
381 
382 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
383 // must be matched by a parameter in the |mime_type|. If there are no
384 // parameters in the pattern, the match is a success.
385 //
386 // According rfc2045 keys of parameters are case-insensitive, while values may
387 // or may not be case-sensitive, but they are usually case-sensitive. So, this
388 // function matches values in *case-sensitive* manner, however note that this
389 // may produce some false negatives.
MatchesMimeTypeParameters(std::string_view mime_type_pattern,std::string_view mime_type)390 bool MatchesMimeTypeParameters(std::string_view mime_type_pattern,
391                                std::string_view mime_type) {
392   typedef std::map<std::string, std::string> StringPairMap;
393 
394   const std::string_view::size_type semicolon = mime_type_pattern.find(';');
395   const std::string_view::size_type test_semicolon = mime_type.find(';');
396   if (semicolon != std::string::npos) {
397     if (test_semicolon == std::string::npos)
398       return false;
399 
400     base::StringPairs pattern_parameters;
401     base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
402                                        '=', ';', &pattern_parameters);
403     base::StringPairs test_parameters;
404     base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
405                                        '=', ';', &test_parameters);
406 
407     // Put the parameters to maps with the keys converted to lower case.
408     StringPairMap pattern_parameter_map;
409     for (const auto& pair : pattern_parameters) {
410       pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
411     }
412 
413     StringPairMap test_parameter_map;
414     for (const auto& pair : test_parameters) {
415       test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
416     }
417 
418     if (pattern_parameter_map.size() > test_parameter_map.size())
419       return false;
420 
421     for (const auto& parameter_pair : pattern_parameter_map) {
422       const auto& test_parameter_pair_it =
423           test_parameter_map.find(parameter_pair.first);
424       if (test_parameter_pair_it == test_parameter_map.end())
425         return false;
426       if (parameter_pair.second != test_parameter_pair_it->second)
427         return false;
428     }
429   }
430 
431   return true;
432 }
433 
434 // This comparison handles absolute maching and also basic
435 // wildcards.  The plugin mime types could be:
436 //      application/x-foo
437 //      application/*
438 //      application/*+xml
439 //      *
440 // Also tests mime parameters -- all parameters in the pattern must be present
441 // in the tested type for a match to succeed.
MatchesMimeType(std::string_view mime_type_pattern,std::string_view mime_type) const442 bool MimeUtil::MatchesMimeType(std::string_view mime_type_pattern,
443                                std::string_view mime_type) const {
444   if (mime_type_pattern.empty())
445     return false;
446 
447   std::string_view::size_type semicolon = mime_type_pattern.find(';');
448   const std::string_view base_pattern = mime_type_pattern.substr(0, semicolon);
449   semicolon = mime_type.find(';');
450   const std::string_view base_type = mime_type.substr(0, semicolon);
451 
452   if (base_pattern == "*" || base_pattern == "*/*")
453     return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
454 
455   const std::string_view::size_type star = base_pattern.find('*');
456   if (star == std::string::npos) {
457     if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
458       return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
459     else
460       return false;
461   }
462 
463   // Test length to prevent overlap between |left| and |right|.
464   if (base_type.length() < base_pattern.length() - 1)
465     return false;
466 
467   std::string_view base_pattern_piece(base_pattern);
468   std::string_view left(base_pattern_piece.substr(0, star));
469   std::string_view right(base_pattern_piece.substr(star + 1));
470 
471   if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
472     return false;
473 
474   if (!right.empty() &&
475       !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
476     return false;
477 
478   return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
479 }
480 
ParseMimeType(std::string_view type_str,std::string * mime_type,base::StringPairs * params)481 bool ParseMimeType(std::string_view type_str,
482                    std::string* mime_type,
483                    base::StringPairs* params) {
484   // Trim leading and trailing whitespace from type.  We include '(' in
485   // the trailing trim set to catch media-type comments, which are not at all
486   // standard, but may occur in rare cases.
487   size_t type_val = type_str.find_first_not_of(HTTP_LWS);
488   type_val = std::min(type_val, type_str.length());
489   size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
490   if (type_end == std::string::npos)
491     type_end = type_str.length();
492 
493   // Reject a mime-type if it does not include a slash.
494   size_t slash_pos = type_str.find_first_of('/');
495   if (slash_pos == std::string::npos || slash_pos > type_end)
496     return false;
497   if (mime_type)
498     *mime_type = type_str.substr(type_val, type_end - type_val);
499 
500   // Iterate over parameters. Can't split the string around semicolons
501   // preemptively because quoted strings may include semicolons. Mostly matches
502   // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
503   // validate characters are HTTP token code points / HTTP quoted-string token
504   // code points, and ignores spaces after "=" in parameters.
505   if (params)
506     params->clear();
507   std::string::size_type offset = type_str.find_first_of(';', type_end);
508   while (offset < type_str.size()) {
509     DCHECK_EQ(';', type_str[offset]);
510     // Trim off the semicolon.
511     ++offset;
512 
513     // Trim off any following spaces.
514     offset = type_str.find_first_not_of(HTTP_LWS, offset);
515     std::string::size_type param_name_start = offset;
516 
517     // Extend parameter name until run into a semicolon or equals sign.  Per
518     // spec, trailing spaces are not removed.
519     offset = type_str.find_first_of(";=", offset);
520 
521     // Nothing more to do if at end of string, or if there's no parameter
522     // value, since names without values aren't allowed.
523     if (offset == std::string::npos || type_str[offset] == ';')
524       continue;
525 
526     auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
527                                             type_str.begin() + offset);
528 
529     // Now parse the value.
530     DCHECK_EQ('=', type_str[offset]);
531     // Trim off the '='.
532     offset++;
533 
534     // Remove leading spaces. This violates the spec, though it matches
535     // pre-existing behavior.
536     //
537     // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
538     // seems to align more with the spec - not the content-type spec, but the
539     // GET spec's way of getting an encoding, and the spec for handling
540     // boundary values as well.
541     // See https://encoding.spec.whatwg.org/#names-and-labels.
542     offset = type_str.find_first_not_of(HTTP_LWS, offset);
543 
544     std::string param_value;
545     if (offset == std::string::npos || type_str[offset] == ';') {
546       // Nothing to do here - an unquoted string of only whitespace should be
547       // skipped.
548       continue;
549     } else if (type_str[offset] != '"') {
550       // If the first character is not a quotation mark, copy data directly.
551       std::string::size_type value_start = offset;
552       offset = type_str.find_first_of(';', offset);
553       std::string::size_type value_end = offset;
554 
555       // Remove terminal whitespace. If ran off the end of the string, have to
556       // update |value_end| first.
557       if (value_end == std::string::npos)
558         value_end = type_str.size();
559       while (value_end > value_start &&
560              HttpUtil::IsLWS(type_str[value_end - 1])) {
561         --value_end;
562       }
563 
564       param_value = type_str.substr(value_start, value_end - value_start);
565     } else {
566       // Otherwise, append data, with special handling for backslashes, until
567       // a close quote.  Do not trim whitespace for quoted-string.
568 
569       // Skip open quote.
570       DCHECK_EQ('"', type_str[offset]);
571       ++offset;
572 
573       while (offset < type_str.size() && type_str[offset] != '"') {
574         // Skip over backslash and append the next character, when not at
575         // the end of the string. Otherwise, copy the next character (Which may
576         // be a backslash).
577         if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
578           ++offset;
579         }
580         param_value += type_str[offset];
581         ++offset;
582       }
583 
584       offset = type_str.find_first_of(';', offset);
585     }
586     if (params)
587       params->emplace_back(param_name, param_value);
588   }
589   return true;
590 }
591 
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype) const592 bool MimeUtil::ParseMimeTypeWithoutParameter(std::string_view type_string,
593                                              std::string* top_level_type,
594                                              std::string* subtype) const {
595   std::vector<std::string_view> components = base::SplitStringPiece(
596       type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
597   if (components.size() != 2)
598     return false;
599   components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
600   components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
601   if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
602     return false;
603 
604   if (top_level_type)
605     top_level_type->assign(std::string(components[0]));
606 
607   if (subtype)
608     subtype->assign(std::string(components[1]));
609 
610   return true;
611 }
612 
613 // See https://www.iana.org/assignments/media-types/media-types.xhtml
614 static const char* const kLegalTopLevelTypes[] = {
615     "application", "audio", "example",   "font", "image",
616     "message",     "model", "multipart", "text", "video",
617 };
618 
IsValidTopLevelMimeType(std::string_view type_string) const619 bool MimeUtil::IsValidTopLevelMimeType(std::string_view type_string) const {
620   std::string lower_type = base::ToLowerASCII(type_string);
621   for (const char* const legal_type : kLegalTopLevelTypes) {
622     if (lower_type.compare(legal_type) == 0) {
623       return true;
624     }
625   }
626 
627   return type_string.size() > 2 &&
628          base::StartsWith(type_string, "x-",
629                           base::CompareCase::INSENSITIVE_ASCII);
630 }
631 
632 //----------------------------------------------------------------------------
633 // Wrappers for the singleton
634 //----------------------------------------------------------------------------
635 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)636 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
637                               std::string* mime_type) {
638   return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
639 }
640 
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)641 bool GetMimeTypeFromFile(const base::FilePath& file_path,
642                          std::string* mime_type) {
643   return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
644 }
645 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)646 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
647                                        std::string* mime_type) {
648   return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
649 }
650 
GetPreferredExtensionForMimeType(std::string_view mime_type,base::FilePath::StringType * extension)651 bool GetPreferredExtensionForMimeType(std::string_view mime_type,
652                                       base::FilePath::StringType* extension) {
653   return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
654                                                             extension);
655 }
656 
MatchesMimeType(std::string_view mime_type_pattern,std::string_view mime_type)657 bool MatchesMimeType(std::string_view mime_type_pattern,
658                      std::string_view mime_type) {
659   return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
660 }
661 
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype)662 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
663                                    std::string* top_level_type,
664                                    std::string* subtype) {
665   return g_mime_util.Get().ParseMimeTypeWithoutParameter(
666       type_string, top_level_type, subtype);
667 }
668 
IsValidTopLevelMimeType(std::string_view type_string)669 bool IsValidTopLevelMimeType(std::string_view type_string) {
670   return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
671 }
672 
673 namespace {
674 
675 // From http://www.w3schools.com/media/media_mimeref.asp and
676 // http://plugindoc.mozdev.org/winmime.php
677 static const char* const kStandardImageTypes[] = {"image/avif",
678                                                   "image/bmp",
679                                                   "image/cis-cod",
680                                                   "image/gif",
681                                                   "image/heic",
682                                                   "image/heif",
683                                                   "image/ief",
684                                                   "image/jpeg",
685                                                   "image/webp",
686                                                   "image/pict",
687                                                   "image/pipeg",
688                                                   "image/png",
689                                                   "image/svg+xml",
690                                                   "image/tiff",
691                                                   "image/vnd.microsoft.icon",
692                                                   "image/x-cmu-raster",
693                                                   "image/x-cmx",
694                                                   "image/x-icon",
695                                                   "image/x-portable-anymap",
696                                                   "image/x-portable-bitmap",
697                                                   "image/x-portable-graymap",
698                                                   "image/x-portable-pixmap",
699                                                   "image/x-rgb",
700                                                   "image/x-xbitmap",
701                                                   "image/x-xpixmap",
702                                                   "image/x-xwindowdump"};
703 static const char* const kStandardAudioTypes[] = {
704   "audio/aac",
705   "audio/aiff",
706   "audio/amr",
707   "audio/basic",
708   "audio/flac",
709   "audio/midi",
710   "audio/mp3",
711   "audio/mp4",
712   "audio/mpeg",
713   "audio/mpeg3",
714   "audio/ogg",
715   "audio/vorbis",
716   "audio/wav",
717   "audio/webm",
718   "audio/x-m4a",
719   "audio/x-ms-wma",
720   "audio/vnd.rn-realaudio",
721   "audio/vnd.wave"
722 };
723 // https://tools.ietf.org/html/rfc8081
724 static const char* const kStandardFontTypes[] = {
725     "font/collection", "font/otf",  "font/sfnt",
726     "font/ttf",        "font/woff", "font/woff2",
727 };
728 static const char* const kStandardVideoTypes[] = {
729   "video/avi",
730   "video/divx",
731   "video/flc",
732   "video/mp4",
733   "video/mpeg",
734   "video/ogg",
735   "video/quicktime",
736   "video/sd-video",
737   "video/webm",
738   "video/x-dv",
739   "video/x-m4v",
740   "video/x-mpeg",
741   "video/x-ms-asf",
742   "video/x-ms-wmv"
743 };
744 
745 struct StandardType {
746   const char* const leading_mime_type;
747   // TODO(367764863) Rewrite to base::raw_span.
748   RAW_PTR_EXCLUSION base::span<const char* const> standard_types;
749 };
750 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
751                                               {"audio/", kStandardAudioTypes},
752                                               {"font/", kStandardFontTypes},
753                                               {"video/", kStandardVideoTypes},
754                                               {nullptr, {}}};
755 
756 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
757 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
758 //
759 // The meaning of |mime_type| depends on the value of |prefix_match|:
760 //
761 //  * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
762 //    string such as "text/plain".
763 //
764 //  * If |prefix_match = true| then |mime_type| is treated as the prefix for a
765 //    (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)766 void GetExtensionsFromHardCodedMappings(
767     base::span<const MimeInfo> mappings,
768     const std::string& mime_type,
769     bool prefix_match,
770     std::unordered_set<base::FilePath::StringType>* extensions) {
771   for (const auto& mapping : mappings) {
772     std::string_view cur_mime_type(mapping.mime_type);
773 
774     if (base::StartsWith(cur_mime_type, mime_type,
775                          base::CompareCase::INSENSITIVE_ASCII) &&
776         (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
777       for (std::string_view this_extension : base::SplitStringPiece(
778                mapping.extensions, ",", base::TRIM_WHITESPACE,
779                base::SPLIT_WANT_ALL)) {
780         extensions->insert(StringToFilePathStringType(this_extension));
781       }
782     }
783   }
784 }
785 
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)786 void GetExtensionsHelper(
787     base::span<const char* const> standard_types,
788     const std::string& leading_mime_type,
789     std::unordered_set<base::FilePath::StringType>* extensions) {
790   for (auto* standard_type : standard_types) {
791     g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
792                                                        extensions);
793   }
794 
795   // Also look up the extensions from hard-coded mappings in case that some
796   // supported extensions are not registered in the system registry, like ogg.
797   GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
798                                      extensions);
799 
800   GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
801                                      true, extensions);
802 }
803 
804 // Note that the elements in the source set will be appended to the target
805 // vector.
806 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)807 void UnorderedSetToVector(std::unordered_set<T>* source,
808                           std::vector<T>* target) {
809   size_t old_target_size = target->size();
810   target->resize(old_target_size + source->size());
811   size_t i = 0;
812   for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
813     (*target)[old_target_size + i] = *iter;
814 }
815 
816 // Characters to be used for mime multipart boundary.
817 //
818 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
819 // The RFC 2046 spec says the alphanumeric characters plus the
820 // following characters are legal for boundaries:  '()+_,-./:=?
821 // However the following characters, though legal, cause some sites
822 // to fail: (),./:=+
823 constexpr std::string_view kMimeBoundaryCharacters(
824     "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
825 
826 // Size of mime multipart boundary.
827 const size_t kMimeBoundarySize = 69;
828 
829 }  // namespace
830 
GetExtensionsForMimeType(std::string_view unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)831 void GetExtensionsForMimeType(
832     std::string_view unsafe_mime_type,
833     std::vector<base::FilePath::StringType>* extensions) {
834   if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
835     return;
836 
837   const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
838   std::unordered_set<base::FilePath::StringType> unique_extensions;
839 
840   if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
841     std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
842 
843     // Find the matching StandardType from within kStandardTypes, or fall
844     // through to the last (default) StandardType.
845     const StandardType* type = nullptr;
846     for (const StandardType& standard_type : kStandardTypes) {
847       type = &standard_type;
848       if (type->leading_mime_type &&
849           leading_mime_type == type->leading_mime_type) {
850         break;
851       }
852     }
853     DCHECK(type);
854     GetExtensionsHelper(type->standard_types,
855                         leading_mime_type,
856                         &unique_extensions);
857   } else {
858     g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
859                                                        &unique_extensions);
860 
861     // Also look up the extensions from hard-coded mappings in case that some
862     // supported extensions are not registered in the system registry, like ogg.
863     GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
864                                        &unique_extensions);
865 
866     GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
867                                        &unique_extensions);
868   }
869 
870   UnorderedSetToVector(&unique_extensions, extensions);
871 }
872 
GenerateMimeMultipartBoundary()873 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
874   // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
875   //   Because encapsulation boundaries must not appear in the body parts being
876   //   encapsulated, a user agent must exercise care to choose a unique
877   //   boundary. The boundary in the example above could have been the result of
878   //   an algorithm designed to produce boundaries with a very low probability
879   //   of already existing in the data to be encapsulated without having to
880   //   prescan the data.
881   //   [...]
882   //   the boundary parameter [...] consists of 1 to 70 characters from a set of
883   //   characters known to be very robust through email gateways, and NOT ending
884   //   with white space.
885   //   [...]
886   //   boundary := 0*69<bchars> bcharsnospace
887   //   bchars := bcharsnospace / " "
888   //   bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
889   //            "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
890 
891   std::string result;
892   result.reserve(kMimeBoundarySize);
893   result.append("----MultipartBoundary--");
894   while (result.size() < (kMimeBoundarySize - 4)) {
895     char c = kMimeBoundaryCharacters[base::RandInt(
896         0, kMimeBoundaryCharacters.size() - 1)];
897     result.push_back(c);
898   }
899   result.append("----");
900 
901   // Not a strict requirement - documentation only.
902   DCHECK_EQ(kMimeBoundarySize, result.size());
903 
904   return result;
905 }
906 
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)907 void AddMultipartValueForUpload(const std::string& value_name,
908                                 const std::string& value,
909                                 const std::string& mime_boundary,
910                                 const std::string& content_type,
911                                 std::string* post_data) {
912   DCHECK(post_data);
913   // First line is the boundary.
914   post_data->append("--" + mime_boundary + "\r\n");
915   // Next line is the Content-disposition.
916   post_data->append("Content-Disposition: form-data; name=\"" +
917                     value_name + "\"\r\n");
918   if (!content_type.empty()) {
919     // If Content-type is specified, the next line is that.
920     post_data->append("Content-Type: " + content_type + "\r\n");
921   }
922   // Leave an empty line and append the value.
923   post_data->append("\r\n" + value + "\r\n");
924 }
925 
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)926 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
927                                             const std::string& file_name,
928                                             const std::string& value,
929                                             const std::string& mime_boundary,
930                                             const std::string& content_type,
931                                             std::string* post_data) {
932   DCHECK(post_data);
933   // First line is the boundary.
934   post_data->append("--" + mime_boundary + "\r\n");
935   // Next line is the Content-disposition.
936   post_data->append("Content-Disposition: form-data; name=\"" + value_name +
937                     "\"; filename=\"" + file_name + "\"\r\n");
938   if (!content_type.empty()) {
939     // If Content-type is specified, the next line is that.
940     post_data->append("Content-Type: " + content_type + "\r\n");
941   }
942   // Leave an empty line and append the value.
943   post_data->append("\r\n" + value + "\r\n");
944 }
945 
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)946 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
947                                          std::string* post_data) {
948   DCHECK(post_data);
949   post_data->append("--" + mime_boundary + "--\r\n");
950 }
951 
952 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
953 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(std::string_view type_string,bool accept_comma_separated)954 std::optional<std::string> ExtractMimeTypeFromMediaType(
955     std::string_view type_string,
956     bool accept_comma_separated) {
957   std::string::size_type end = type_string.find(';');
958   if (accept_comma_separated) {
959     end = std::min(end, type_string.find(','));
960   }
961   std::string top_level_type;
962   std::string subtype;
963   if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
964                                     &subtype)) {
965     return top_level_type + "/" + subtype;
966   }
967   return std::nullopt;
968 }
969 
ScopedOverrideGetMimeTypeForTesting(std::string_view overriding_mime_type)970 ScopedOverrideGetMimeTypeForTesting::ScopedOverrideGetMimeTypeForTesting(
971     std::string_view overriding_mime_type) {
972   GetOverridingMimeType() = overriding_mime_type;
973 }
974 
~ScopedOverrideGetMimeTypeForTesting()975 ScopedOverrideGetMimeTypeForTesting::~ScopedOverrideGetMimeTypeForTesting() {
976   GetOverridingMimeType().clear();
977 }
978 
979 }  // namespace net
980