• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <iterator>
7 #include <map>
8 #include <string>
9 #include <unordered_set>
10 
11 #include "base/base64.h"
12 #include "base/check_op.h"
13 #include "base/containers/span.h"
14 #include "base/lazy_instance.h"
15 #include "base/rand_util.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_piece.h"
18 #include "base/strings/string_split.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "build/build_config.h"
22 #include "net/base/mime_util.h"
23 #include "net/base/platform_mime_util.h"
24 #include "net/http/http_util.h"
25 
26 using std::string;
27 
28 namespace net {
29 
30 // Singleton utility class for mime types.
31 class MimeUtil : public PlatformMimeUtil {
32  public:
33   bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
34                                 std::string* mime_type) const;
35 
36   bool GetMimeTypeFromFile(const base::FilePath& file_path,
37                            std::string* mime_type) const;
38 
39   bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
40                                          std::string* mime_type) const;
41 
42   bool GetPreferredExtensionForMimeType(
43       const std::string& mime_type,
44       base::FilePath::StringType* extension) const;
45 
46   bool MatchesMimeType(const std::string& mime_type_pattern,
47                        const std::string& mime_type) const;
48 
49   bool ParseMimeTypeWithoutParameter(base::StringPiece type_string,
50                                      std::string* top_level_type,
51                                      std::string* subtype) const;
52 
53   bool IsValidTopLevelMimeType(const std::string& type_string) const;
54 
55  private:
56   friend struct base::LazyInstanceTraitsBase<MimeUtil>;
57 
58   MimeUtil();
59 
60   bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
61                                       bool include_platform_types,
62                                       std::string* mime_type) const;
63 };  // class MimeUtil
64 
65 // This variable is Leaky because we need to access it from WorkerPool threads.
66 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
67     LAZY_INSTANCE_INITIALIZER;
68 
69 struct MimeInfo {
70   const char* const mime_type;
71 
72   // Comma-separated list of possible extensions for the type. The first
73   // extension is considered preferred.
74   const char* const extensions;
75 };
76 
77 // How to use the MIME maps
78 // ------------------------
79 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
80 //
81 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
82 // kSecondaryMappings.
83 //
84 // kPrimaryMappings:
85 //
86 //   Use this for mappings that are critical to the web platform.  Mappings you
87 //   add to this list take priority over the underlying platform when converting
88 //   from file extension -> MIME type.  Thus file extensions listed here will
89 //   work consistently across platforms.
90 //
91 // kSecondaryMappings:
92 //
93 //   Use this for mappings that must exist, but can be overridden by user
94 //   preferences.
95 //
96 // The following applies to both lists:
97 //
98 // * The same extension can appear multiple times in the same list under
99 //   different MIME types.  Extensions that appear earlier take precedence over
100 //   those that appear later.
101 //
102 // * A MIME type must not appear more than once in a single list.  It is valid
103 //   for the same MIME type to appear in kPrimaryMappings and
104 //   kSecondaryMappings.
105 //
106 // The MIME maps are used for three types of lookups:
107 //
108 // 1) MIME type -> file extension.  Implemented as
109 //    GetPreferredExtensionForMimeType().
110 //
111 //    Sources are consulted in the following order:
112 //
113 //    a) As a special case application/octet-stream is mapped to nothing.  Web
114 //       sites are supposed to use this MIME type to indicate that the content
115 //       is opaque and shouldn't be parsed as any specific type of content.  It
116 //       doesn't make sense to map this to anything.
117 //
118 //    b) The underlying platform.  If the operating system has a mapping from
119 //       the MIME type to a file extension, then that takes priority.  The
120 //       platform is assumed to represent the user's preference.
121 //
122 //    c) kPrimaryMappings.  Order doesn't matter since there should only be at
123 //       most one entry per MIME type.
124 //
125 //    d) kSecondaryMappings.  Again, order doesn't matter.
126 //
127 // 2) File extension -> MIME type.  Implemented in GetMimeTypeFromExtension().
128 //
129 //    Sources are considered in the following order:
130 //
131 //    a) kPrimaryMappings.  Order matters here since file extensions can appear
132 //       multiple times on these lists.  The first mapping in order of
133 //       appearance in the list wins.
134 //
135 //    b) Underlying platform.
136 //
137 //    c) kSecondaryMappings.  Again, the order matters.
138 //
139 // 3) File extension -> Well known MIME type.  Implemented as
140 //    GetWellKnownMimeTypeFromExtension().
141 //
142 //    This is similar to 2), with the exception that b) is skipped.  I.e.  Only
143 //    considers the hardcoded mappings in kPrimaryMappings and
144 //    kSecondaryMappings.
145 
146 // See comments above for details on how this list is used.
147 static const MimeInfo kPrimaryMappings[] = {
148     // Must precede audio/webm .
149     {"video/webm", "webm"},
150 
151     // Must precede audio/mp3
152     {"audio/mpeg", "mp3"},
153 
154     {"application/wasm", "wasm"},
155     {"application/x-chrome-extension", "crx"},
156     {"application/xhtml+xml", "xhtml,xht,xhtm"},
157     {"audio/flac", "flac"},
158     {"audio/mp3", "mp3"},
159     {"audio/ogg", "ogg,oga,opus"},
160     {"audio/wav", "wav"},
161     {"audio/webm", "webm"},
162     {"audio/x-m4a", "m4a"},
163     {"image/avif", "avif"},
164     {"image/gif", "gif"},
165     {"image/jpeg", "jpeg,jpg"},
166     {"image/png", "png"},
167     {"image/apng", "png,apng"},
168     {"image/svg+xml", "svg,svgz"},
169     {"image/webp", "webp"},
170     {"multipart/related", "mht,mhtml"},
171     {"text/css", "css"},
172     {"text/html", "html,htm,shtml,shtm"},
173     {"text/javascript", "js,mjs"},
174     {"text/xml", "xml"},
175     {"video/mp4", "mp4,m4v"},
176     {"video/ogg", "ogv,ogm"},
177 
178     // This is a primary mapping (overrides the platform) rather than secondary
179     // to work around an issue when Excel is installed on Windows. Excel
180     // registers csv as application/vnd.ms-excel instead of text/csv from RFC
181     // 4180. See https://crbug.com/139105.
182     {"text/csv", "csv"},
183 };
184 
185 // See comments above for details on how this list is used.
186 static const MimeInfo kSecondaryMappings[] = {
187     // Must precede image/vnd.microsoft.icon .
188     {"image/x-icon", "ico"},
189 
190     {"application/epub+zip", "epub"},
191     {"application/font-woff", "woff"},
192     {"application/gzip", "gz,tgz"},
193     {"application/javascript", "js"},
194     {"application/json", "json"},  // Per http://www.ietf.org/rfc/rfc4627.txt.
195     {"application/msword", "doc,dot"},
196     {"application/octet-stream", "bin,exe,com"},
197     {"application/pdf", "pdf"},
198     {"application/pkcs7-mime", "p7m,p7c,p7z"},
199     {"application/pkcs7-signature", "p7s"},
200     {"application/postscript", "ps,eps,ai"},
201     {"application/rdf+xml", "rdf"},
202     {"application/rss+xml", "rss"},
203     {"application/rtf", "rtf"},
204     {"application/vnd.android.package-archive", "apk"},
205     {"application/vnd.mozilla.xul+xml", "xul"},
206     {"application/vnd.ms-excel", "xls"},
207     {"application/vnd.ms-powerpoint", "ppt"},
208     {"application/"
209      "vnd.openxmlformats-officedocument.presentationml.presentation",
210      "pptx"},
211     {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
212      "xlsx"},
213     {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
214      "docx"},
215     {"application/x-gzip", "gz,tgz"},
216     {"application/x-mpegurl", "m3u8"},
217     {"application/x-shockwave-flash", "swf,swl"},
218     {"application/x-tar", "tar"},
219     {"application/x-x509-ca-cert", "cer,crt"},
220     {"application/zip", "zip"},
221     // This is the platform mapping on recent versions of Windows 10.
222     {"audio/webm", "weba"},
223     {"image/bmp", "bmp"},
224     {"image/jpeg", "jfif,pjpeg,pjp"},
225     {"image/tiff", "tiff,tif"},
226     {"image/vnd.microsoft.icon", "ico"},
227     {"image/x-png", "png"},
228     {"image/x-xbitmap", "xbm"},
229     {"message/rfc822", "eml"},
230     {"text/calendar", "ics"},
231     {"text/html", "ehtml"},
232     {"text/plain", "txt,text"},
233     {"text/x-sh", "sh"},
234     {"text/xml", "xsl,xbl,xslt"},
235     {"video/mpeg", "mpeg,mpg"},
236 };
237 
238 // Finds mime type of |ext| from |mappings|.
239 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)240 static const char* FindMimeType(const MimeInfo (&mappings)[num_mappings],
241                                 const std::string& ext) {
242   for (const auto& mapping : mappings) {
243     const char* extensions = mapping.extensions;
244     for (;;) {
245       size_t end_pos = strcspn(extensions, ",");
246       // The length check is required to prevent the StringPiece below from
247       // including uninitialized memory if ext is longer than extensions.
248       if (end_pos == ext.size() &&
249           base::EqualsCaseInsensitiveASCII(
250               base::StringPiece(extensions, ext.size()), ext)) {
251         return mapping.mime_type;
252       }
253       extensions += end_pos;
254       if (!*extensions)
255         break;
256       extensions += 1;  // skip over comma
257     }
258   }
259   return nullptr;
260 }
261 
StringToFilePathStringType(base::StringPiece string_piece)262 static base::FilePath::StringType StringToFilePathStringType(
263     base::StringPiece string_piece) {
264 #if BUILDFLAG(IS_WIN)
265   return base::UTF8ToWide(string_piece);
266 #else
267   return std::string(string_piece);
268 #endif
269 }
270 
271 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
272 // preferred extension in MimeInfo arrays.
273 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],const std::string & mime_type,base::FilePath::StringType * result)274 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
275                                    const std::string& mime_type,
276                                    base::FilePath::StringType* result) {
277   // There is no preferred extension for "application/octet-stream".
278   if (mime_type == "application/octet-stream")
279     return false;
280 
281   for (const auto& mapping : mappings) {
282     if (mapping.mime_type == mime_type) {
283       const char* extensions = mapping.extensions;
284       const char* extension_end = strchr(extensions, ',');
285       size_t len =
286           extension_end ? extension_end - extensions : strlen(extensions);
287       *result = StringToFilePathStringType(base::StringPiece(extensions, len));
288       return true;
289     }
290   }
291   return false;
292 }
293 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const294 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
295                                         string* result) const {
296   return GetMimeTypeFromExtensionHelper(ext, true, result);
297 }
298 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const299 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
300     const base::FilePath::StringType& ext,
301     string* result) const {
302   return GetMimeTypeFromExtensionHelper(ext, false, result);
303 }
304 
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension) const305 bool MimeUtil::GetPreferredExtensionForMimeType(
306     const std::string& mime_type,
307     base::FilePath::StringType* extension) const {
308   // Search the MIME type in the platform DB first, then in kPrimaryMappings and
309   // kSecondaryMappings.
310   return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
311          FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
312          FindPreferredExtension(kSecondaryMappings, mime_type, extension);
313 }
314 
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const315 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
316                                    string* result) const {
317   base::FilePath::StringType file_name_str = file_path.Extension();
318   if (file_name_str.empty())
319     return false;
320   return GetMimeTypeFromExtension(file_name_str.substr(1), result);
321 }
322 
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const323 bool MimeUtil::GetMimeTypeFromExtensionHelper(
324     const base::FilePath::StringType& ext,
325     bool include_platform_types,
326     string* result) const {
327   DCHECK(ext.empty() || ext[0] != '.')
328       << "extension passed in must not include leading dot";
329 
330   // Avoids crash when unable to handle a long file path. See crbug.com/48733.
331   const unsigned kMaxFilePathSize = 65536;
332   if (ext.length() > kMaxFilePathSize)
333     return false;
334 
335   // Reject a string which contains null character.
336   base::FilePath::StringType::size_type nul_pos =
337       ext.find(FILE_PATH_LITERAL('\0'));
338   if (nul_pos != base::FilePath::StringType::npos)
339     return false;
340 
341   // We implement the same algorithm as Mozilla for mapping a file extension to
342   // a mime type.  That is, we first check a hard-coded list (that cannot be
343   // overridden), and then if not found there, we defer to the system registry.
344   // Finally, we scan a secondary hard-coded list to catch types that we can
345   // deduce but that we also want to allow the OS to override.
346 
347   base::FilePath path_ext(ext);
348   const string ext_narrow_str = path_ext.AsUTF8Unsafe();
349   const char* mime_type = FindMimeType(kPrimaryMappings, ext_narrow_str);
350   if (mime_type) {
351     *result = mime_type;
352     return true;
353   }
354 
355   if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
356     return true;
357 
358   mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
359   if (mime_type) {
360     *result = mime_type;
361     return true;
362   }
363 
364   return false;
365 }
366 
367 MimeUtil::MimeUtil() = default;
368 
369 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
370 // must be matched by a parameter in the |mime_type|. If there are no
371 // parameters in the pattern, the match is a success.
372 //
373 // According rfc2045 keys of parameters are case-insensitive, while values may
374 // or may not be case-sensitive, but they are usually case-sensitive. So, this
375 // function matches values in *case-sensitive* manner, however note that this
376 // may produce some false negatives.
MatchesMimeTypeParameters(const std::string & mime_type_pattern,const std::string & mime_type)377 bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
378                                const std::string& mime_type) {
379   typedef std::map<std::string, std::string> StringPairMap;
380 
381   const std::string::size_type semicolon = mime_type_pattern.find(';');
382   const std::string::size_type test_semicolon = mime_type.find(';');
383   if (semicolon != std::string::npos) {
384     if (test_semicolon == std::string::npos)
385       return false;
386 
387     base::StringPairs pattern_parameters;
388     base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
389                                        '=', ';', &pattern_parameters);
390     base::StringPairs test_parameters;
391     base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
392                                        '=', ';', &test_parameters);
393 
394     // Put the parameters to maps with the keys converted to lower case.
395     StringPairMap pattern_parameter_map;
396     for (const auto& pair : pattern_parameters) {
397       pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
398     }
399 
400     StringPairMap test_parameter_map;
401     for (const auto& pair : test_parameters) {
402       test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
403     }
404 
405     if (pattern_parameter_map.size() > test_parameter_map.size())
406       return false;
407 
408     for (const auto& parameter_pair : pattern_parameter_map) {
409       const auto& test_parameter_pair_it =
410           test_parameter_map.find(parameter_pair.first);
411       if (test_parameter_pair_it == test_parameter_map.end())
412         return false;
413       if (parameter_pair.second != test_parameter_pair_it->second)
414         return false;
415     }
416   }
417 
418   return true;
419 }
420 
421 // This comparison handles absolute maching and also basic
422 // wildcards.  The plugin mime types could be:
423 //      application/x-foo
424 //      application/*
425 //      application/*+xml
426 //      *
427 // Also tests mime parameters -- all parameters in the pattern must be present
428 // in the tested type for a match to succeed.
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type) const429 bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
430                                const std::string& mime_type) const {
431   if (mime_type_pattern.empty())
432     return false;
433 
434   std::string::size_type semicolon = mime_type_pattern.find(';');
435   const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
436   semicolon = mime_type.find(';');
437   const std::string base_type(mime_type.substr(0, semicolon));
438 
439   if (base_pattern == "*" || base_pattern == "*/*")
440     return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
441 
442   const std::string::size_type star = base_pattern.find('*');
443   if (star == std::string::npos) {
444     if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
445       return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
446     else
447       return false;
448   }
449 
450   // Test length to prevent overlap between |left| and |right|.
451   if (base_type.length() < base_pattern.length() - 1)
452     return false;
453 
454   base::StringPiece base_pattern_piece(base_pattern);
455   base::StringPiece left(base_pattern_piece.substr(0, star));
456   base::StringPiece right(base_pattern_piece.substr(star + 1));
457 
458   if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
459     return false;
460 
461   if (!right.empty() &&
462       !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
463     return false;
464 
465   return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
466 }
467 
ParseMimeType(const std::string & type_str,std::string * mime_type,base::StringPairs * params)468 bool ParseMimeType(const std::string& type_str,
469                    std::string* mime_type,
470                    base::StringPairs* params) {
471   // Trim leading and trailing whitespace from type.  We include '(' in
472   // the trailing trim set to catch media-type comments, which are not at all
473   // standard, but may occur in rare cases.
474   size_t type_val = type_str.find_first_not_of(HTTP_LWS);
475   type_val = std::min(type_val, type_str.length());
476   size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
477   if (type_end == std::string::npos)
478     type_end = type_str.length();
479 
480   // Reject a mime-type if it does not include a slash.
481   size_t slash_pos = type_str.find_first_of('/');
482   if (slash_pos == std::string::npos || slash_pos > type_end)
483     return false;
484   if (mime_type)
485     *mime_type = type_str.substr(type_val, type_end - type_val);
486 
487   // Iterate over parameters. Can't split the string around semicolons
488   // preemptively because quoted strings may include semicolons. Mostly matches
489   // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
490   // validate characters are HTTP token code points / HTTP quoted-string token
491   // code points, and ignores spaces after "=" in parameters.
492   if (params)
493     params->clear();
494   std::string::size_type offset = type_str.find_first_of(';', type_end);
495   while (offset < type_str.size()) {
496     DCHECK_EQ(';', type_str[offset]);
497     // Trim off the semicolon.
498     ++offset;
499 
500     // Trim off any following spaces.
501     offset = type_str.find_first_not_of(HTTP_LWS, offset);
502     std::string::size_type param_name_start = offset;
503 
504     // Extend parameter name until run into a semicolon or equals sign.  Per
505     // spec, trailing spaces are not removed.
506     offset = type_str.find_first_of(";=", offset);
507 
508     // Nothing more to do if at end of string, or if there's no parameter
509     // value, since names without values aren't allowed.
510     if (offset == std::string::npos || type_str[offset] == ';')
511       continue;
512 
513     auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
514                                             type_str.begin() + offset);
515 
516     // Now parse the value.
517     DCHECK_EQ('=', type_str[offset]);
518     // Trim off the '='.
519     offset++;
520 
521     // Remove leading spaces. This violates the spec, though it matches
522     // pre-existing behavior.
523     //
524     // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
525     // seems to align more with the spec - not the content-type spec, but the
526     // GET spec's way of getting an encoding, and the spec for handling
527     // boundary values as well.
528     // See https://encoding.spec.whatwg.org/#names-and-labels.
529     offset = type_str.find_first_not_of(HTTP_LWS, offset);
530 
531     std::string param_value;
532     if (offset == std::string::npos || type_str[offset] == ';') {
533       // Nothing to do here - an unquoted string of only whitespace should be
534       // skipped.
535       continue;
536     } else if (type_str[offset] != '"') {
537       // If the first character is not a quotation mark, copy data directly.
538       std::string::size_type value_start = offset;
539       offset = type_str.find_first_of(';', offset);
540       std::string::size_type value_end = offset;
541 
542       // Remove terminal whitespace. If ran off the end of the string, have to
543       // update |value_end| first.
544       if (value_end == std::string::npos)
545         value_end = type_str.size();
546       while (value_end > value_start &&
547              HttpUtil::IsLWS(type_str[value_end - 1])) {
548         --value_end;
549       }
550 
551       param_value = type_str.substr(value_start, value_end - value_start);
552     } else {
553       // Otherwise, append data, with special handling for backslashes, until
554       // a close quote.  Do not trim whitespace for quoted-string.
555 
556       // Skip open quote.
557       DCHECK_EQ('"', type_str[offset]);
558       ++offset;
559 
560       while (offset < type_str.size() && type_str[offset] != '"') {
561         // Skip over backslash and append the next character, when not at
562         // the end of the string. Otherwise, copy the next character (Which may
563         // be a backslash).
564         if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
565           ++offset;
566         }
567         param_value += type_str[offset];
568         ++offset;
569       }
570 
571       offset = type_str.find_first_of(';', offset);
572     }
573     if (params)
574       params->emplace_back(param_name, param_value);
575   }
576   return true;
577 }
578 
ParseMimeTypeWithoutParameter(base::StringPiece type_string,std::string * top_level_type,std::string * subtype) const579 bool MimeUtil::ParseMimeTypeWithoutParameter(base::StringPiece type_string,
580                                              std::string* top_level_type,
581                                              std::string* subtype) const {
582   std::vector<base::StringPiece> components = base::SplitStringPiece(
583       type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
584   if (components.size() != 2)
585     return false;
586   components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
587   components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
588   if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
589     return false;
590 
591   if (top_level_type)
592     top_level_type->assign(std::string(components[0]));
593 
594   if (subtype)
595     subtype->assign(std::string(components[1]));
596 
597   return true;
598 }
599 
600 // See https://www.iana.org/assignments/media-types/media-types.xhtml
601 static const char* const kLegalTopLevelTypes[] = {
602     "application", "audio", "example",   "font", "image",
603     "message",     "model", "multipart", "text", "video",
604 };
605 
IsValidTopLevelMimeType(const std::string & type_string) const606 bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
607   std::string lower_type = base::ToLowerASCII(type_string);
608   for (const char* const legal_type : kLegalTopLevelTypes) {
609     if (lower_type.compare(legal_type) == 0)
610       return true;
611   }
612 
613   return type_string.size() > 2 &&
614          base::StartsWith(type_string, "x-",
615                           base::CompareCase::INSENSITIVE_ASCII);
616 }
617 
618 //----------------------------------------------------------------------------
619 // Wrappers for the singleton
620 //----------------------------------------------------------------------------
621 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)622 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
623                               std::string* mime_type) {
624   return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
625 }
626 
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)627 bool GetMimeTypeFromFile(const base::FilePath& file_path,
628                          std::string* mime_type) {
629   return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
630 }
631 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)632 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
633                                        std::string* mime_type) {
634   return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
635 }
636 
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension)637 bool GetPreferredExtensionForMimeType(const std::string& mime_type,
638                                       base::FilePath::StringType* extension) {
639   return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
640                                                             extension);
641 }
642 
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type)643 bool MatchesMimeType(const std::string& mime_type_pattern,
644                      const std::string& mime_type) {
645   return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
646 }
647 
ParseMimeTypeWithoutParameter(base::StringPiece type_string,std::string * top_level_type,std::string * subtype)648 bool ParseMimeTypeWithoutParameter(base::StringPiece type_string,
649                                    std::string* top_level_type,
650                                    std::string* subtype) {
651   return g_mime_util.Get().ParseMimeTypeWithoutParameter(
652       type_string, top_level_type, subtype);
653 }
654 
IsValidTopLevelMimeType(const std::string & type_string)655 bool IsValidTopLevelMimeType(const std::string& type_string) {
656   return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
657 }
658 
659 namespace {
660 
661 // From http://www.w3schools.com/media/media_mimeref.asp and
662 // http://plugindoc.mozdev.org/winmime.php
663 static const char* const kStandardImageTypes[] = {"image/avif",
664                                                   "image/bmp",
665                                                   "image/cis-cod",
666                                                   "image/gif",
667                                                   "image/ief",
668                                                   "image/jpeg",
669                                                   "image/webp",
670                                                   "image/pict",
671                                                   "image/pipeg",
672                                                   "image/png",
673                                                   "image/svg+xml",
674                                                   "image/tiff",
675                                                   "image/vnd.microsoft.icon",
676                                                   "image/x-cmu-raster",
677                                                   "image/x-cmx",
678                                                   "image/x-icon",
679                                                   "image/x-portable-anymap",
680                                                   "image/x-portable-bitmap",
681                                                   "image/x-portable-graymap",
682                                                   "image/x-portable-pixmap",
683                                                   "image/x-rgb",
684                                                   "image/x-xbitmap",
685                                                   "image/x-xpixmap",
686                                                   "image/x-xwindowdump"};
687 static const char* const kStandardAudioTypes[] = {
688   "audio/aac",
689   "audio/aiff",
690   "audio/amr",
691   "audio/basic",
692   "audio/flac",
693   "audio/midi",
694   "audio/mp3",
695   "audio/mp4",
696   "audio/mpeg",
697   "audio/mpeg3",
698   "audio/ogg",
699   "audio/vorbis",
700   "audio/wav",
701   "audio/webm",
702   "audio/x-m4a",
703   "audio/x-ms-wma",
704   "audio/vnd.rn-realaudio",
705   "audio/vnd.wave"
706 };
707 // https://tools.ietf.org/html/rfc8081
708 static const char* const kStandardFontTypes[] = {
709     "font/collection", "font/otf",  "font/sfnt",
710     "font/ttf",        "font/woff", "font/woff2",
711 };
712 static const char* const kStandardVideoTypes[] = {
713   "video/avi",
714   "video/divx",
715   "video/flc",
716   "video/mp4",
717   "video/mpeg",
718   "video/ogg",
719   "video/quicktime",
720   "video/sd-video",
721   "video/webm",
722   "video/x-dv",
723   "video/x-m4v",
724   "video/x-mpeg",
725   "video/x-ms-asf",
726   "video/x-ms-wmv"
727 };
728 
729 struct StandardType {
730   const char* const leading_mime_type;
731   base::span<const char* const> standard_types;
732 };
733 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
734                                               {"audio/", kStandardAudioTypes},
735                                               {"font/", kStandardFontTypes},
736                                               {"video/", kStandardVideoTypes},
737                                               {nullptr, {}}};
738 
739 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
740 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
741 //
742 // The meaning of |mime_type| depends on the value of |prefix_match|:
743 //
744 //  * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
745 //    string such as "text/plain".
746 //
747 //  * If |prefix_match = true| then |mime_type| is treated as the prefix for a
748 //    (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)749 void GetExtensionsFromHardCodedMappings(
750     base::span<const MimeInfo> mappings,
751     const std::string& mime_type,
752     bool prefix_match,
753     std::unordered_set<base::FilePath::StringType>* extensions) {
754   for (const auto& mapping : mappings) {
755     base::StringPiece cur_mime_type(mapping.mime_type);
756 
757     if (base::StartsWith(cur_mime_type, mime_type,
758                          base::CompareCase::INSENSITIVE_ASCII) &&
759         (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
760       for (base::StringPiece this_extension : base::SplitStringPiece(
761                mapping.extensions, ",", base::TRIM_WHITESPACE,
762                base::SPLIT_WANT_ALL)) {
763         extensions->insert(StringToFilePathStringType(this_extension));
764       }
765     }
766   }
767 }
768 
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)769 void GetExtensionsHelper(
770     base::span<const char* const> standard_types,
771     const std::string& leading_mime_type,
772     std::unordered_set<base::FilePath::StringType>* extensions) {
773   for (auto* standard_type : standard_types) {
774     g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
775                                                        extensions);
776   }
777 
778   // Also look up the extensions from hard-coded mappings in case that some
779   // supported extensions are not registered in the system registry, like ogg.
780   GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
781                                      extensions);
782 
783   GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
784                                      true, extensions);
785 }
786 
787 // Note that the elements in the source set will be appended to the target
788 // vector.
789 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)790 void UnorderedSetToVector(std::unordered_set<T>* source,
791                           std::vector<T>* target) {
792   size_t old_target_size = target->size();
793   target->resize(old_target_size + source->size());
794   size_t i = 0;
795   for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
796     (*target)[old_target_size + i] = *iter;
797 }
798 
799 // Characters to be used for mime multipart boundary.
800 //
801 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
802 // The RFC 2046 spec says the alphanumeric characters plus the
803 // following characters are legal for boundaries:  '()+_,-./:=?
804 // However the following characters, though legal, cause some sites
805 // to fail: (),./:=+
806 constexpr base::StringPiece kMimeBoundaryCharacters(
807     "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
808 
809 // Size of mime multipart boundary.
810 const size_t kMimeBoundarySize = 69;
811 
812 }  // namespace
813 
GetExtensionsForMimeType(const std::string & unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)814 void GetExtensionsForMimeType(
815     const std::string& unsafe_mime_type,
816     std::vector<base::FilePath::StringType>* extensions) {
817   if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
818     return;
819 
820   const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
821   std::unordered_set<base::FilePath::StringType> unique_extensions;
822 
823   if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
824     std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
825 
826     // Find the matching StandardType from within kStandardTypes, or fall
827     // through to the last (default) StandardType.
828     const StandardType* type = nullptr;
829     for (const StandardType& standard_type : kStandardTypes) {
830       type = &standard_type;
831       if (type->leading_mime_type &&
832           leading_mime_type == type->leading_mime_type) {
833         break;
834       }
835     }
836     DCHECK(type);
837     GetExtensionsHelper(type->standard_types,
838                         leading_mime_type,
839                         &unique_extensions);
840   } else {
841     g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
842                                                        &unique_extensions);
843 
844     // Also look up the extensions from hard-coded mappings in case that some
845     // supported extensions are not registered in the system registry, like ogg.
846     GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
847                                        &unique_extensions);
848 
849     GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
850                                        &unique_extensions);
851   }
852 
853   UnorderedSetToVector(&unique_extensions, extensions);
854 }
855 
GenerateMimeMultipartBoundary()856 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
857   // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
858   //   Because encapsulation boundaries must not appear in the body parts being
859   //   encapsulated, a user agent must exercise care to choose a unique
860   //   boundary. The boundary in the example above could have been the result of
861   //   an algorithm designed to produce boundaries with a very low probability
862   //   of already existing in the data to be encapsulated without having to
863   //   prescan the data.
864   //   [...]
865   //   the boundary parameter [...] consists of 1 to 70 characters from a set of
866   //   characters known to be very robust through email gateways, and NOT ending
867   //   with white space.
868   //   [...]
869   //   boundary := 0*69<bchars> bcharsnospace
870   //   bchars := bcharsnospace / " "
871   //   bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
872   //            "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
873 
874   std::string result;
875   result.reserve(kMimeBoundarySize);
876   result.append("----MultipartBoundary--");
877   while (result.size() < (kMimeBoundarySize - 4)) {
878     char c = kMimeBoundaryCharacters[base::RandInt(
879         0, kMimeBoundaryCharacters.size() - 1)];
880     result.push_back(c);
881   }
882   result.append("----");
883 
884   // Not a strict requirement - documentation only.
885   DCHECK_EQ(kMimeBoundarySize, result.size());
886 
887   return result;
888 }
889 
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)890 void AddMultipartValueForUpload(const std::string& value_name,
891                                 const std::string& value,
892                                 const std::string& mime_boundary,
893                                 const std::string& content_type,
894                                 std::string* post_data) {
895   DCHECK(post_data);
896   // First line is the boundary.
897   post_data->append("--" + mime_boundary + "\r\n");
898   // Next line is the Content-disposition.
899   post_data->append("Content-Disposition: form-data; name=\"" +
900                     value_name + "\"\r\n");
901   if (!content_type.empty()) {
902     // If Content-type is specified, the next line is that.
903     post_data->append("Content-Type: " + content_type + "\r\n");
904   }
905   // Leave an empty line and append the value.
906   post_data->append("\r\n" + value + "\r\n");
907 }
908 
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)909 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
910                                             const std::string& file_name,
911                                             const std::string& value,
912                                             const std::string& mime_boundary,
913                                             const std::string& content_type,
914                                             std::string* post_data) {
915   DCHECK(post_data);
916   // First line is the boundary.
917   post_data->append("--" + mime_boundary + "\r\n");
918   // Next line is the Content-disposition.
919   post_data->append("Content-Disposition: form-data; name=\"" + value_name +
920                     "\"; filename=\"" + file_name + "\"\r\n");
921   if (!content_type.empty()) {
922     // If Content-type is specified, the next line is that.
923     post_data->append("Content-Type: " + content_type + "\r\n");
924   }
925   // Leave an empty line and append the value.
926   post_data->append("\r\n" + value + "\r\n");
927 }
928 
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)929 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
930                                          std::string* post_data) {
931   DCHECK(post_data);
932   post_data->append("--" + mime_boundary + "--\r\n");
933 }
934 
935 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
936 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(const std::string & type_string,bool accept_comma_separated)937 absl::optional<std::string> ExtractMimeTypeFromMediaType(
938     const std::string& type_string,
939     bool accept_comma_separated) {
940   std::string::size_type end = type_string.find(';');
941   if (accept_comma_separated) {
942     end = std::min(end, type_string.find(','));
943   }
944   std::string top_level_type;
945   std::string subtype;
946   if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
947                                     &subtype)) {
948     return top_level_type + "/" + subtype;
949   }
950   return absl::nullopt;
951 }
952 
953 }  // namespace net
954