• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Adam Barth. All Rights Reserved.
3  * Copyright (C) 2011 Daniel Bates (dbates@intudata.com).
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "core/html/parser/XSSAuditor.h"
29 
30 #include "core/HTMLNames.h"
31 #include "core/SVGNames.h"
32 #include "core/XLinkNames.h"
33 #include "core/dom/Document.h"
34 #include "core/frame/LocalFrame.h"
35 #include "core/frame/Settings.h"
36 #include "core/frame/csp/ContentSecurityPolicy.h"
37 #include "core/html/HTMLParamElement.h"
38 #include "core/html/parser/HTMLDocumentParser.h"
39 #include "core/html/parser/HTMLParserIdioms.h"
40 #include "core/html/parser/TextResourceDecoder.h"
41 #include "core/html/parser/XSSAuditorDelegate.h"
42 #include "core/inspector/ConsoleMessage.h"
43 #include "core/loader/DocumentLoader.h"
44 #include "platform/JSONValues.h"
45 #include "platform/network/FormData.h"
46 #include "platform/text/DecodeEscapeSequences.h"
47 #include "wtf/ASCIICType.h"
48 #include "wtf/MainThread.h"
49 
50 namespace {
51 
52 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.
53 const char kURLWithUniqueOrigin[] = "data:,";
54 
55 } // namespace
56 
57 namespace blink {
58 
59 using namespace HTMLNames;
60 
isNonCanonicalCharacter(UChar c)61 static bool isNonCanonicalCharacter(UChar c)
62 {
63     // We remove all non-ASCII characters, including non-printable ASCII characters.
64     //
65     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
66     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
67     // adverse effect that we remove any legitimate zeros from a string.
68     //
69     // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg,
70     // a//b becomes a/b.
71     //
72     // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We
73     // are already stripping the high-bytes so we also strip the questionmark to match.
74     //
75     // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x").
76     return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || c >= 127);
77 }
78 
isRequiredForInjection(UChar c)79 static bool isRequiredForInjection(UChar c)
80 {
81     return (c == '\'' || c == '"' || c == '<' || c == '>');
82 }
83 
isTerminatingCharacter(UChar c)84 static bool isTerminatingCharacter(UChar c)
85 {
86     return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ',');
87 }
88 
isHTMLQuote(UChar c)89 static bool isHTMLQuote(UChar c)
90 {
91     return (c == '"' || c == '\'');
92 }
93 
isJSNewline(UChar c)94 static bool isJSNewline(UChar c)
95 {
96     // Per ecma-262 section 7.3 Line Terminators.
97     return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
98 }
99 
startsHTMLCommentAt(const String & string,size_t start)100 static bool startsHTMLCommentAt(const String& string, size_t start)
101 {
102     return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-');
103 }
104 
startsSingleLineCommentAt(const String & string,size_t start)105 static bool startsSingleLineCommentAt(const String& string, size_t start)
106 {
107     return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/');
108 }
109 
startsMultiLineCommentAt(const String & string,size_t start)110 static bool startsMultiLineCommentAt(const String& string, size_t start)
111 {
112     return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*');
113 }
114 
startsOpeningScriptTagAt(const String & string,size_t start)115 static bool startsOpeningScriptTagAt(const String& string, size_t start)
116 {
117     return start + 6 < string.length() && string[start] == '<'
118         && WTF::toASCIILowerUnchecked(string[start + 1]) == 's'
119         && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c'
120         && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r'
121         && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i'
122         && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p'
123         && WTF::toASCIILowerUnchecked(string[start + 6]) == 't';
124 }
125 
126 // If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h
127 template<size_t inlineCapacity>
threadSafeMatch(const Vector<UChar,inlineCapacity> & vector,const QualifiedName & qname)128 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname)
129 {
130     return equalIgnoringNullity(vector, qname.localName().impl());
131 }
132 
hasName(const HTMLToken & token,const QualifiedName & name)133 static bool hasName(const HTMLToken& token, const QualifiedName& name)
134 {
135     return threadSafeMatch(token.name(), name);
136 }
137 
findAttributeWithName(const HTMLToken & token,const QualifiedName & name,size_t & indexOfMatchingAttribute)138 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
139 {
140     // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.
141     const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string();
142 
143     for (size_t i = 0; i < token.attributes().size(); ++i) {
144         if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) {
145             indexOfMatchingAttribute = i;
146             return true;
147         }
148     }
149     return false;
150 }
151 
isNameOfInlineEventHandler(const Vector<UChar,32> & name)152 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
153 {
154     const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
155     if (name.size() < lengthOfShortestInlineEventHandlerName)
156         return false;
157     return name[0] == 'o' && name[1] == 'n';
158 }
159 
isDangerousHTTPEquiv(const String & value)160 static bool isDangerousHTTPEquiv(const String& value)
161 {
162     String equiv = value.stripWhiteSpace();
163     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
164 }
165 
decode16BitUnicodeEscapeSequences(const String & string)166 static inline String decode16BitUnicodeEscapeSequences(const String& string)
167 {
168     // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
169     return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding());
170 }
171 
decodeStandardURLEscapeSequences(const String & string,const WTF::TextEncoding & encoding)172 static inline String decodeStandardURLEscapeSequences(const String& string, const WTF::TextEncoding& encoding)
173 {
174     // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to
175     // avoid platform-specific URL decoding differences (e.g. KURLGoogle).
176     return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
177 }
178 
fullyDecodeString(const String & string,const WTF::TextEncoding & encoding)179 static String fullyDecodeString(const String& string, const WTF::TextEncoding& encoding)
180 {
181     size_t oldWorkingStringLength;
182     String workingString = string;
183     do {
184         oldWorkingStringLength = workingString.length();
185         workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
186     } while (workingString.length() < oldWorkingStringLength);
187     workingString.replace('+', ' ');
188     return workingString;
189 }
190 
truncateForSrcLikeAttribute(String & decodedSnippet)191 static void truncateForSrcLikeAttribute(String& decodedSnippet)
192 {
193     // In HTTP URLs, characters following the first ?, #, or third slash may come from
194     // the page itself and can be merely ignored by an attacker's server when a remote
195     // script or script-like resource is requested. In DATA URLS, the payload starts at
196     // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters
197     // following this may come from the page itself and may be ignored when the script is
198     // executed. For simplicity, we don't differentiate based on URL scheme, and stop at
199     // the first # or ?, the third slash, or the first slash or < once a comma is seen.
200     int slashCount = 0;
201     bool commaSeen = false;
202     for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
203         UChar currentChar = decodedSnippet[currentLength];
204         if (currentChar == '?'
205             || currentChar == '#'
206             || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))
207             || (currentChar == '<' && commaSeen)) {
208             decodedSnippet.truncate(currentLength);
209             return;
210         }
211         if (currentChar == ',')
212             commaSeen = true;
213     }
214 }
215 
truncateForScriptLikeAttribute(String & decodedSnippet)216 static void truncateForScriptLikeAttribute(String& decodedSnippet)
217 {
218     // Beware of trailing characters which came from the page itself, not the
219     // injected vector. Excluding the terminating character covers common cases
220     // where the page immediately ends the attribute, but doesn't cover more
221     // complex cases where there is other page data following the injection.
222     // Generally, these won't parse as javascript, so the injected vector
223     // typically excludes them from consideration via a single-line comment or
224     // by enclosing them in a string literal terminated later by the page's own
225     // closing punctuation. Since the snippet has not been parsed, the vector
226     // may also try to introduce these via entities. As a result, we'd like to
227     // stop before the first "//", the first <!--, the first entity, or the first
228     // quote not immediately following the first equals sign (taking whitespace
229     // into consideration). To keep things simpler, we don't try to distinguish
230     // between entity-introducing amperands vs. other uses, nor do we bother to
231     // check for a second slash for a comment, nor do we bother to check for
232     // !-- following a less-than sign. We stop instead on any ampersand
233     // slash, or less-than sign.
234     size_t position = 0;
235     if ((position = decodedSnippet.find("=")) != kNotFound
236         && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound
237         && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {
238         decodedSnippet.truncate(position);
239     }
240 }
241 
combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection,ReflectedXSSDisposition reflectedXSS)242 static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS)
243 {
244     ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS);
245 
246     if (result == ReflectedXSSInvalid || result == FilterReflectedXSS || result == ReflectedXSSUnset)
247         return FilterReflectedXSS;
248 
249     return result;
250 }
251 
isSemicolonSeparatedAttribute(const HTMLToken::Attribute & attribute)252 static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute)
253 {
254     return threadSafeMatch(attribute.name, SVGNames::valuesAttr);
255 }
256 
semicolonSeparatedValueContainingJavaScriptURL(const String & value)257 static String semicolonSeparatedValueContainingJavaScriptURL(const String& value)
258 {
259     Vector<String> valueList;
260     value.split(';', valueList);
261     for (size_t i = 0; i < valueList.size(); ++i) {
262         String stripped = stripLeadingAndTrailingHTMLSpaces(valueList[i]);
263         if (protocolIsJavaScript(stripped))
264             return stripped;
265     }
266     return emptyString();
267 }
268 
XSSAuditor()269 XSSAuditor::XSSAuditor()
270     : m_isEnabled(false)
271     , m_xssProtection(FilterReflectedXSS)
272     , m_didSendValidCSPHeader(false)
273     , m_didSendValidXSSProtectionHeader(false)
274     , m_state(Uninitialized)
275     , m_scriptTagFoundInRequest(false)
276     , m_scriptTagNestingLevel(0)
277     , m_encoding(UTF8Encoding())
278 {
279     // Although tempting to call init() at this point, the various objects
280     // we want to reference might not all have been constructed yet.
281 }
282 
initForFragment()283 void XSSAuditor::initForFragment()
284 {
285     ASSERT(isMainThread());
286     ASSERT(m_state == Uninitialized);
287     m_state = FilteringTokens;
288     // When parsing a fragment, we don't enable the XSS auditor because it's
289     // too much overhead.
290     ASSERT(!m_isEnabled);
291 }
292 
init(Document * document,XSSAuditorDelegate * auditorDelegate)293 void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate)
294 {
295     ASSERT(isMainThread());
296     if (m_state != Uninitialized)
297         return;
298     m_state = FilteringTokens;
299 
300     if (Settings* settings = document->settings())
301         m_isEnabled = settings->xssAuditorEnabled();
302 
303     if (!m_isEnabled)
304         return;
305 
306     m_documentURL = document->url().copy();
307 
308     // In theory, the Document could have detached from the LocalFrame after the
309     // XSSAuditor was constructed.
310     if (!document->frame()) {
311         m_isEnabled = false;
312         return;
313     }
314 
315     if (m_documentURL.isEmpty()) {
316         // The URL can be empty when opening a new browser window or calling window.open("").
317         m_isEnabled = false;
318         return;
319     }
320 
321     if (m_documentURL.protocolIsData()) {
322         m_isEnabled = false;
323         return;
324     }
325 
326     if (document->encoding().isValid())
327         m_encoding = document->encoding();
328 
329     if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) {
330         DEFINE_STATIC_LOCAL(const AtomicString, XSSProtectionHeader, ("X-XSS-Protection", AtomicString::ConstructFromLiteral));
331         const AtomicString& headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader);
332         String errorDetails;
333         unsigned errorPosition = 0;
334         String reportURL;
335         KURL xssProtectionReportURL;
336 
337         // Process the X-XSS-Protection header, then mix in the CSP header's value.
338         ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL);
339         m_didSendValidXSSProtectionHeader = xssProtectionHeader != ReflectedXSSUnset && xssProtectionHeader != ReflectedXSSInvalid;
340         if ((xssProtectionHeader == FilterReflectedXSS || xssProtectionHeader == BlockReflectedXSS) && !reportURL.isEmpty()) {
341             xssProtectionReportURL = document->completeURL(reportURL);
342             if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) {
343                 errorDetails = "insecure reporting URL for secure page";
344                 xssProtectionHeader = ReflectedXSSInvalid;
345                 xssProtectionReportURL = KURL();
346             }
347         }
348         if (xssProtectionHeader == ReflectedXSSInvalid)
349             document->addConsoleMessage(ConsoleMessage::create(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": "  + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."));
350 
351         ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition();
352         m_didSendValidCSPHeader = cspHeader != ReflectedXSSUnset && cspHeader != ReflectedXSSInvalid;
353 
354         m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader);
355         // FIXME: Combine the two report URLs in some reasonable way.
356         if (auditorDelegate)
357             auditorDelegate->setReportURL(xssProtectionReportURL.copy());
358 
359         FormData* httpBody = documentLoader->request().httpBody();
360         if (httpBody && !httpBody->isEmpty())
361             m_httpBodyAsString = httpBody->flattenToString();
362     }
363 
364     setEncoding(m_encoding);
365 }
366 
setEncoding(const WTF::TextEncoding & encoding)367 void XSSAuditor::setEncoding(const WTF::TextEncoding& encoding)
368 {
369     const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
370     const int suffixTreeDepth = 5;
371 
372     if (!encoding.isValid())
373         return;
374 
375     m_encoding = encoding;
376 
377     m_decodedURL = canonicalize(m_documentURL.string(), NoTruncation);
378     if (m_decodedURL.find(isRequiredForInjection) == kNotFound)
379         m_decodedURL = String();
380 
381     if (!m_httpBodyAsString.isEmpty()) {
382         m_decodedHTTPBody = canonicalize(m_httpBodyAsString, NoTruncation);
383         m_httpBodyAsString = String();
384         if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound)
385             m_decodedHTTPBody = String();
386             if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
387                 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
388     }
389 
390     if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
391         m_isEnabled = false;
392 }
393 
filterToken(const FilterTokenRequest & request)394 PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request)
395 {
396     ASSERT(m_state != Uninitialized);
397     if (!m_isEnabled || m_xssProtection == AllowReflectedXSS)
398         return nullptr;
399 
400     bool didBlockScript = false;
401     if (request.token.type() == HTMLToken::StartTag)
402         didBlockScript = filterStartToken(request);
403     else if (m_scriptTagNestingLevel) {
404         if (request.token.type() == HTMLToken::Character)
405             didBlockScript = filterCharacterToken(request);
406         else if (request.token.type() == HTMLToken::EndTag)
407             filterEndToken(request);
408     }
409 
410     if (didBlockScript) {
411         bool didBlockEntirePage = (m_xssProtection == BlockReflectedXSS);
412         OwnPtr<XSSInfo> xssInfo = XSSInfo::create(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader);
413         return xssInfo.release();
414     }
415     return nullptr;
416 }
417 
filterStartToken(const FilterTokenRequest & request)418 bool XSSAuditor::filterStartToken(const FilterTokenRequest& request)
419 {
420     m_state = FilteringTokens;
421     bool didBlockScript = eraseDangerousAttributesIfInjected(request);
422 
423     if (hasName(request.token, scriptTag)) {
424         didBlockScript |= filterScriptToken(request);
425         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
426         m_scriptTagNestingLevel++;
427     } else if (hasName(request.token, objectTag))
428         didBlockScript |= filterObjectToken(request);
429     else if (hasName(request.token, paramTag))
430         didBlockScript |= filterParamToken(request);
431     else if (hasName(request.token, embedTag))
432         didBlockScript |= filterEmbedToken(request);
433     else if (hasName(request.token, appletTag))
434         didBlockScript |= filterAppletToken(request);
435     else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag))
436         didBlockScript |= filterFrameToken(request);
437     else if (hasName(request.token, metaTag))
438         didBlockScript |= filterMetaToken(request);
439     else if (hasName(request.token, baseTag))
440         didBlockScript |= filterBaseToken(request);
441     else if (hasName(request.token, formTag))
442         didBlockScript |= filterFormToken(request);
443     else if (hasName(request.token, inputTag))
444         didBlockScript |= filterInputToken(request);
445     else if (hasName(request.token, buttonTag))
446         didBlockScript |= filterButtonToken(request);
447 
448     return didBlockScript;
449 }
450 
filterEndToken(const FilterTokenRequest & request)451 void XSSAuditor::filterEndToken(const FilterTokenRequest& request)
452 {
453     ASSERT(m_scriptTagNestingLevel);
454     m_state = FilteringTokens;
455     if (hasName(request.token, scriptTag)) {
456         m_scriptTagNestingLevel--;
457         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
458     }
459 }
460 
filterCharacterToken(const FilterTokenRequest & request)461 bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request)
462 {
463     ASSERT(m_scriptTagNestingLevel);
464     ASSERT(m_state != Uninitialized);
465     if (m_state == PermittingAdjacentCharacterTokens)
466         return false;
467 
468     if ((m_state == SuppressingAdjacentCharacterTokens)
469         || (m_scriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request)))) {
470         request.token.eraseCharacters();
471         request.token.appendToCharacter(' '); // Technically, character tokens can't be empty.
472         m_state = SuppressingAdjacentCharacterTokens;
473         return true;
474     }
475 
476     m_state = PermittingAdjacentCharacterTokens;
477     return false;
478 }
479 
filterScriptToken(const FilterTokenRequest & request)480 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request)
481 {
482     ASSERT(request.token.type() == HTMLToken::StartTag);
483     ASSERT(hasName(request.token, scriptTag));
484 
485     bool didBlockScript = false;
486     m_scriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request));
487     if (m_scriptTagFoundInRequest) {
488         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
489         didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttributeTruncation);
490     }
491     return didBlockScript;
492 }
493 
filterObjectToken(const FilterTokenRequest & request)494 bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request)
495 {
496     ASSERT(request.token.type() == HTMLToken::StartTag);
497     ASSERT(hasName(request.token, objectTag));
498 
499     bool didBlockScript = false;
500     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
501         didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttributeTruncation);
502         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
503         didBlockScript |= eraseAttributeIfInjected(request, classidAttr);
504     }
505     return didBlockScript;
506 }
507 
filterParamToken(const FilterTokenRequest & request)508 bool XSSAuditor::filterParamToken(const FilterTokenRequest& request)
509 {
510     ASSERT(request.token.type() == HTMLToken::StartTag);
511     ASSERT(hasName(request.token, paramTag));
512 
513     size_t indexOfNameAttribute;
514     if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute))
515         return false;
516 
517     const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute);
518     if (!HTMLParamElement::isURLParameter(String(nameAttribute.value)))
519         return false;
520 
521     return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttributeTruncation);
522 }
523 
filterEmbedToken(const FilterTokenRequest & request)524 bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)
525 {
526     ASSERT(request.token.type() == HTMLToken::StartTag);
527     ASSERT(hasName(request.token, embedTag));
528 
529     bool didBlockScript = false;
530     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
531         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
532         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
533         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
534     }
535     return didBlockScript;
536 }
537 
filterAppletToken(const FilterTokenRequest & request)538 bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request)
539 {
540     ASSERT(request.token.type() == HTMLToken::StartTag);
541     ASSERT(hasName(request.token, appletTag));
542 
543     bool didBlockScript = false;
544     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
545         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
546         didBlockScript |= eraseAttributeIfInjected(request, objectAttr);
547     }
548     return didBlockScript;
549 }
550 
filterFrameToken(const FilterTokenRequest & request)551 bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request)
552 {
553     ASSERT(request.token.type() == HTMLToken::StartTag);
554     ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag));
555 
556     bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttributeTruncation);
557     if (isContainedInRequest(canonicalizedSnippetForTagName(request)))
558         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttributeTruncation);
559 
560     return didBlockScript;
561 }
562 
filterMetaToken(const FilterTokenRequest & request)563 bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request)
564 {
565     ASSERT(request.token.type() == HTMLToken::StartTag);
566     ASSERT(hasName(request.token, metaTag));
567 
568     return eraseAttributeIfInjected(request, http_equivAttr);
569 }
570 
filterBaseToken(const FilterTokenRequest & request)571 bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request)
572 {
573     ASSERT(request.token.type() == HTMLToken::StartTag);
574     ASSERT(hasName(request.token, baseTag));
575 
576     return eraseAttributeIfInjected(request, hrefAttr);
577 }
578 
filterFormToken(const FilterTokenRequest & request)579 bool XSSAuditor::filterFormToken(const FilterTokenRequest& request)
580 {
581     ASSERT(request.token.type() == HTMLToken::StartTag);
582     ASSERT(hasName(request.token, formTag));
583 
584     return eraseAttributeIfInjected(request, actionAttr, kURLWithUniqueOrigin);
585 }
586 
filterInputToken(const FilterTokenRequest & request)587 bool XSSAuditor::filterInputToken(const FilterTokenRequest& request)
588 {
589     ASSERT(request.token.type() == HTMLToken::StartTag);
590     ASSERT(hasName(request.token, inputTag));
591 
592     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
593 }
594 
filterButtonToken(const FilterTokenRequest & request)595 bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)
596 {
597     ASSERT(request.token.type() == HTMLToken::StartTag);
598     ASSERT(hasName(request.token, buttonTag));
599 
600     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
601 }
602 
eraseDangerousAttributesIfInjected(const FilterTokenRequest & request)603 bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request)
604 {
605     DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
606 
607     bool didBlockScript = false;
608     for (size_t i = 0; i < request.token.attributes().size(); ++i) {
609         bool eraseAttribute = false;
610         bool valueContainsJavaScriptURL = false;
611         const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
612         // FIXME: Don't create a new String for every attribute.value in the document.
613         if (isNameOfInlineEventHandler(attribute.name)) {
614             eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
615         } else if (isSemicolonSeparatedAttribute(attribute)) {
616             String subValue = semicolonSeparatedValueContainingJavaScriptURL(String(attribute.value));
617             if (!subValue.isEmpty()) {
618                 valueContainsJavaScriptURL = true;
619                 eraseAttribute = isContainedInRequest(canonicalize(nameFromAttribute(request, attribute), NoTruncation))
620                     && isContainedInRequest(canonicalize(subValue, ScriptLikeAttributeTruncation));
621             }
622         } else if (protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.value)))) {
623             valueContainsJavaScriptURL = true;
624             eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
625         }
626         if (!eraseAttribute)
627             continue;
628         request.token.eraseValueOfAttribute(i);
629         if (valueContainsJavaScriptURL)
630             request.token.appendToAttributeValue(i, safeJavaScriptURL);
631         didBlockScript = true;
632     }
633     return didBlockScript;
634 }
635 
eraseAttributeIfInjected(const FilterTokenRequest & request,const QualifiedName & attributeName,const String & replacementValue,TruncationKind treatment)636 bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationKind treatment)
637 {
638     size_t indexOfAttribute = 0;
639     if (!findAttributeWithName(request.token, attributeName, indexOfAttribute))
640         return false;
641 
642     const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);
643     if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), treatment)))
644         return false;
645 
646     if (threadSafeMatch(attributeName, srcAttr)) {
647         if (isLikelySafeResource(String(attribute.value)))
648             return false;
649     } else if (threadSafeMatch(attributeName, http_equivAttr)) {
650         if (!isDangerousHTTPEquiv(String(attribute.value)))
651             return false;
652     }
653 
654     request.token.eraseValueOfAttribute(indexOfAttribute);
655     if (!replacementValue.isEmpty())
656         request.token.appendToAttributeValue(indexOfAttribute, replacementValue);
657 
658     return true;
659 }
660 
canonicalizedSnippetForTagName(const FilterTokenRequest & request)661 String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request)
662 {
663     // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").
664     return canonicalize(request.sourceTracker.sourceForToken(request.token).substring(0, request.token.name().size() + 1), NoTruncation);
665 }
666 
nameFromAttribute(const FilterTokenRequest & request,const HTMLToken::Attribute & attribute)667 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
668 {
669     // The range inlcudes the character which terminates the name. So,
670     // for an input of |name="value"|, the snippet is |name=|.
671     int start = attribute.nameRange.start - request.token.startIndex();
672     int end = attribute.valueRange.start - request.token.startIndex();
673     return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
674 }
675 
snippetFromAttribute(const FilterTokenRequest & request,const HTMLToken::Attribute & attribute)676 String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
677 {
678     // The range doesn't include the character which terminates the value. So,
679     // for an input of |name="value"|, the snippet is |name="value|. For an
680     // unquoted input of |name=value |, the snippet is |name=value|.
681     // FIXME: We should grab one character before the name also.
682     int start = attribute.nameRange.start - request.token.startIndex();
683     int end = attribute.valueRange.end - request.token.startIndex();
684     return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
685 }
686 
canonicalize(String snippet,TruncationKind treatment)687 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment)
688 {
689     String decodedSnippet = fullyDecodeString(snippet, m_encoding);
690 
691     if (treatment != NoTruncation) {
692         decodedSnippet.truncate(kMaximumFragmentLengthTarget);
693         if (treatment == SrcLikeAttributeTruncation)
694             truncateForSrcLikeAttribute(decodedSnippet);
695         else if (treatment == ScriptLikeAttributeTruncation)
696             truncateForScriptLikeAttribute(decodedSnippet);
697     }
698 
699     return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);
700 }
701 
canonicalizedSnippetForJavaScript(const FilterTokenRequest & request)702 String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request)
703 {
704     String string = request.sourceTracker.sourceForToken(request.token);
705     size_t startPosition = 0;
706     size_t endPosition = string.length();
707     size_t foundPosition = kNotFound;
708     size_t lastNonSpacePosition = kNotFound;
709 
710     // Skip over initial comments to find start of code.
711     while (startPosition < endPosition) {
712         while (startPosition < endPosition && isHTMLSpace<UChar>(string[startPosition]))
713             startPosition++;
714 
715         // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
716         // these as a separate comment tokens. Having consumed whitespace, we need not look
717         // further for these.
718         if (request.shouldAllowCDATA)
719             break;
720 
721         // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
722         // comment ends at the end of the line, not with -->.
723         if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
724             while (startPosition < endPosition && !isJSNewline(string[startPosition]))
725                 startPosition++;
726         } else if (startsMultiLineCommentAt(string, startPosition)) {
727             if (startPosition + 2 < endPosition && (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)
728                 startPosition = foundPosition + 2;
729             else
730                 startPosition = endPosition;
731         } else
732             break;
733     }
734 
735     String result;
736     while (startPosition < endPosition && !result.length()) {
737         // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,
738         // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule
739         // covers a common parameter concatenation case performed by some web servers.
740         lastNonSpacePosition = kNotFound;
741         for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
742             if (!request.shouldAllowCDATA) {
743                 if (startsSingleLineCommentAt(string, foundPosition)
744                     || startsMultiLineCommentAt(string, foundPosition)
745                     || startsHTMLCommentAt(string, foundPosition)) {
746                     break;
747                 }
748             }
749             if (string[foundPosition] == ',')
750                 break;
751 
752             if (lastNonSpacePosition != kNotFound && startsOpeningScriptTagAt(string, foundPosition)) {
753                 foundPosition = lastNonSpacePosition;
754                 break;
755             }
756             if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {
757                 // After hitting the length target, we can only stop at a point where we know we are
758                 // not in the middle of a %-escape sequence. For the sake of simplicity, approximate
759                 // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on
760                 // whitespace only. We should have enough text in these cases to avoid false positives.
761                 if (isHTMLSpace<UChar>(string[foundPosition]))
762                     break;
763             }
764             if (!isHTMLSpace<UChar>(string[foundPosition]))
765                 lastNonSpacePosition = foundPosition;
766         }
767         result = canonicalize(string.substring(startPosition, foundPosition - startPosition), NoTruncation);
768         startPosition = foundPosition + 1;
769     }
770 
771     return result;
772 }
773 
isContainedInRequest(const String & decodedSnippet)774 bool XSSAuditor::isContainedInRequest(const String& decodedSnippet)
775 {
776     if (decodedSnippet.isEmpty())
777         return false;
778     if (m_decodedURL.find(decodedSnippet, 0, false) != kNotFound)
779         return true;
780     if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(decodedSnippet))
781         return false;
782     return m_decodedHTTPBody.find(decodedSnippet, 0, false) != kNotFound;
783 }
784 
isLikelySafeResource(const String & url)785 bool XSSAuditor::isLikelySafeResource(const String& url)
786 {
787     // Give empty URLs and about:blank a pass. Making a resourceURL from an
788     // empty string below will likely later fail the "no query args test" as
789     // it inherits the document's query args.
790     if (url.isEmpty() || url == blankURL().string())
791         return true;
792 
793     // If the resource is loaded from the same host as the enclosing page, it's
794     // probably not an XSS attack, so we reduce false positives by allowing the
795     // request, ignoring scheme and port considerations. If the resource has a
796     // query string, we're more suspicious, however, because that's pretty rare
797     // and the attacker might be able to trick a server-side script into doing
798     // something dangerous with the query string.
799     if (m_documentURL.host().isEmpty())
800         return false;
801 
802     KURL resourceURL(m_documentURL, url);
803     return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty());
804 }
805 
isSafeToSendToAnotherThread() const806 bool XSSAuditor::isSafeToSendToAnotherThread() const
807 {
808     return m_documentURL.isSafeToSendToAnotherThread()
809         && m_decodedURL.isSafeToSendToAnotherThread()
810         && m_decodedHTTPBody.isSafeToSendToAnotherThread()
811         && m_httpBodyAsString.isSafeToSendToAnotherThread();
812 }
813 
814 } // namespace blink
815