Lines Matching +full:html +full:- +full:encoding +full:- +full:sniffer
2 // Use of this source code is governed by a BSD-style license that can be
9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML
11 // * Firefox 2: Render as HTML
12 // * Safari 3: Render as HTML
13 // * Opera 9: Render as HTML
16 // => Chrome: Render as HTML
18 // HTML payload, Content-Type: "text/plain":
19 // * IE 7: Render as HTML
21 // * Safari 3: Render as text (Note: Safari will Render as HTML if the URL
22 // has an HTML extension)
28 // We generalize this as follows. If the Content-Type header is text/plain
31 // HTML payload, Content-Type: "application/octet-stream":
32 // * IE 7: Render as HTML
33 // * Firefox 2: Download as application/octet-stream
34 // * Safari 3: Render as HTML
35 // * Opera 9: Render as HTML
38 // => Chrome: Download as application/octet-stream
40 // application/octet-stream for .xhtml files (because they don't recognize
44 // GIF payload, no Content-Type header:
53 // Once we decide to render HTML without a Content-Type header, there isn't much
56 // GIF payload, Content-Type: "text/plain":
58 // * Firefox 2: Download as application/octet-stream (Note: Firefox will
68 // GIF payload, Content-Type: "application/octet-stream":
70 // * Firefox 2: Download as application/octet-stream (Note: Firefox will
77 // trigger downloads by sending application/octet-stream (even though they
78 // should be sending Content-Disposition: attachment). Although it is safe
81 // => Chrome: Download as application/octet-stream
83 // Note that our definition of HTML payload is much stricter than IE's
112 { (mime_type), std::string_view((magic), sizeof(magic) - 1), false, nullptr }
127 (mime_type), std::string_view((magic), verified_sizeof(magic, mask) - 1), \
133 { (mime_type), std::string_view((magic), sizeof(magic) - 1), true, nullptr }
136 // Source: HTML 5 specification
137 MAGIC_NUMBER("application/pdf", "%PDF-"),
138 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-"),
150 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08"),
151 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46"),
152 MAGIC_NUMBER("video/x-ms-asf",
161 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00"),
162 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A"),
163 MAGIC_NUMBER("application/octet-stream", "MZ"), // EXE
166 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS"),
167 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV"),
168 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS"),
206 { (type), std::string_view((extension), sizeof(extension) - 1) }
218 MAGIC_NUMBER("image/x-xbitmap", "#define"),
219 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00"),
229 MAGIC_NUMBER("application/x-shockwave-flash", "CWS"),
230 MAGIC_NUMBER("application/x-shockwave-flash", "FWS"),
231 MAGIC_NUMBER("video/x-flv", "FLV"),
232 MAGIC_NUMBER("audio/x-flac", "fLaC"),
233 // Per https://tools.ietf.org/html/rfc3267#section-8.1
237 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR"),
238 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR"),
239 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM"),
240 MAGIC_NUMBER("image/x-olympus-orf", "MMOR"), // big-endian
241 MAGIC_NUMBER("image/x-olympus-orf", "IIRO"), // little-endian
242 MAGIC_NUMBER("image/x-olympus-orf", "IIRS"), // little-endian
243 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW "),
244 MAGIC_NUMBER("image/x-panasonic-raw",
246 MAGIC_NUMBER("image/x-panasonic-raw",
248 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw"),
249 MAGIC_NUMBER("image/x-x3f", "FOVb"),
252 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will
253 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is
254 // HTML, but we will not.
257 MAGIC_STRING("text/html", "<" tag)
260 // XML processing directive. Although this is not an HTML mime type, we sniff
261 // for this in the HTML phase because text/xml is just as powerful as HTML and
265 MAGIC_HTML_TAG("!DOCTYPE html"), // HTML5 spec
268 MAGIC_HTML_TAG("html"), // HTML5 spec, Mozilla
269 MAGIC_HTML_TAG("!--"),
320 // Consistency check - string entries should have no embedded nulls. in MatchMagicNumber()
323 // Do a case-insensitive prefix comparison. in MatchMagicNumber()
335 result->assign(magic_entry.mime_type); in MatchMagicNumber()
358 *string_piece = string_piece->substr(0, max_size); in TruncateStringPiece()
359 return string_piece->length() == max_size; in TruncateStringPiece()
362 // Returns true and sets result if the content appears to be HTML.
367 // For HTML, we are willing to consider up to 512 bytes. This may be overly in SniffForHTML()
371 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, in SniffForHTML()
376 // |trimmed| now starts at first non-whitespace character (or is empty). in SniffForHTML()
425 *result = "application/vnd.ms-excel"; in SniffForOfficeDocs()
428 *result = "application/vnd.ms-powerpoint"; in SniffForOfficeDocs()
437 *result = "application/vnd.openxmlformats-officedocument." in SniffForOfficeDocs()
441 *result = "application/vnd.openxmlformats-officedocument." in SniffForOfficeDocs()
445 *result = "application/vnd.openxmlformats-officedocument." in SniffForOfficeDocs()
460 type_hint == "application/vnd.ms-excel" || in IsOfficeType()
461 type_hint == "application/vnd.ms-powerpoint" || in IsOfficeType()
462 type_hint == "application/vnd.openxmlformats-officedocument." in IsOfficeType()
464 type_hint == "application/vnd.openxmlformats-officedocument." in IsOfficeType()
466 type_hint == "application/vnd.openxmlformats-officedocument." in IsOfficeType()
468 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" || in IsOfficeType()
469 type_hint == "application/vnd.ms-word.document.macroenabled.12" || in IsOfficeType()
470 type_hint == "application/vnd.ms-powerpoint.presentation." in IsOfficeType()
474 type_hint == "application/vnd.ms-word" || in IsOfficeType()
475 type_hint == "application/vnd.ms-word.document.12" || in IsOfficeType()
483 // "application/octet-stream", otherwise it is not modified.
497 *result = "application/octet-stream"; in SniffForInvalidOfficeDocs()
516 // while HTML5 has a different recommendation -- what should we do?
517 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset
518 // of ASCII -- do we care?
525 // This loop iterates through tag-looking offsets in the file. in SniffXML()
527 // and stop at the first "plain" tag, then make a decision on the mime-type in SniffXML()
569 MAGIC_NUMBER("text/plain", "\xFE\xFF"), // UTF-16BE
570 MAGIC_NUMBER("text/plain", "\xFF\xFE"), // UTF-16LE
571 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF"), // UTF-8
574 // Returns true and sets result to "application/octet-stream" if the content
593 result->assign("text/plain"); in SniffBinary()
599 result->assign("application/octet-stream"); in SniffBinary()
603 // No evidence either way. Default to non-binary and, if truncated, clear in SniffBinary()
607 result->assign("text/plain"); in SniffBinary()
647 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00"), in SniffCRX()
648 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x03\x00\x00\x00")}; in SniffCRX()
672 // We want to sniff application/octet-stream for in ShouldSniffMimeType()
673 // application/x-chrome-extension, but nothing else. in ShouldSniffMimeType()
674 "application/octet-stream", in ShouldSniffMimeType()
681 "application/vnd.ms-excel", in ShouldSniffMimeType()
682 "application/vnd.ms-powerpoint", in ShouldSniffMimeType()
683 "application/vnd.openxmlformats-officedocument.wordprocessingml.document", in ShouldSniffMimeType()
684 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", in ShouldSniffMimeType()
685 "application/vnd.openxmlformats-officedocument.presentationml.presentation", in ShouldSniffMimeType()
686 "application/vnd.ms-excel.sheet.macroenabled.12", in ShouldSniffMimeType()
687 "application/vnd.ms-word.document.macroenabled.12", in ShouldSniffMimeType()
688 "application/vnd.ms-powerpoint.presentation.macroenabled.12", in ShouldSniffMimeType()
691 "application/vnd.ms-word", in ShouldSniffMimeType()
692 "application/vnd.ms-word.document.12", in ShouldSniffMimeType()
722 result->assign(type_hint); in SniffMimeType()
733 // First check for HTML, unless it's a file URL and in SniffMimeType()
738 // We're only willing to sniff HTML if the server has not supplied a mime in SniffMimeType()
742 return true; // We succeeded in sniffing HTML. No more content needed. in SniffMimeType()
750 // could be indicative of a mis-configuration that we shield the user from. in SniffMimeType()
785 // is application/octet-stream. Time to bail out. in SniffMimeType()
786 if (type_hint == "application/octet-stream") in SniffMimeType()
807 // https://mimesniff.spec.whatwg.org/#binary-data-byte in LooksLikeBinary()
811 // least-significant bit represents byte 0x00, the most-significant bit in LooksLikeBinary()