• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Adam Barth. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "XSSFilter.h"
28 
29 #include "DOMWindow.h"
30 #include "Document.h"
31 #include "DocumentLoader.h"
32 #include "Frame.h"
33 #include "HTMLDocumentParser.h"
34 #include "HTMLNames.h"
35 #include "HTMLParamElement.h"
36 #include "HTMLParserIdioms.h"
37 #include "Settings.h"
38 #include "TextEncoding.h"
39 #include "TextResourceDecoder.h"
40 #include <wtf/text/CString.h>
41 
42 namespace WebCore {
43 
44 using namespace HTMLNames;
45 
isNonCanonicalCharacter(UChar c)46 static bool isNonCanonicalCharacter(UChar c)
47 {
48     // We remove all non-ASCII characters, including non-printable ASCII characters.
49     //
50     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
51     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
52     // adverse effect that we remove any legitimate zeros from a string.
53     //
54     // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
55     return (c == '\\' || c == '0' || c == '\0' || c >= 127);
56 }
57 
canonicalize(const String & string)58 static String canonicalize(const String& string)
59 {
60     return string.removeCharacters(&isNonCanonicalCharacter);
61 }
62 
isRequiredForInjection(UChar c)63 static bool isRequiredForInjection(UChar c)
64 {
65     return (c == '\'' || c == '"' || c == '<' || c == '>');
66 }
67 
hasName(const HTMLToken & token,const QualifiedName & name)68 static bool hasName(const HTMLToken& token, const QualifiedName& name)
69 {
70     return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName()));
71 }
72 
findAttributeWithName(const HTMLToken & token,const QualifiedName & name,size_t & indexOfMatchingAttribute)73 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
74 {
75     for (size_t i = 0; i < token.attributes().size(); ++i) {
76         if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) {
77             indexOfMatchingAttribute = i;
78             return true;
79         }
80     }
81     return false;
82 }
83 
isNameOfInlineEventHandler(const Vector<UChar,32> & name)84 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
85 {
86     const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
87     if (name.size() < lengthOfShortestInlineEventHandlerName)
88         return false;
89     return name[0] == 'o' && name[1] == 'n';
90 }
91 
isDangerousHTTPEquiv(const String & value)92 static bool isDangerousHTTPEquiv(const String& value)
93 {
94     String equiv = value.stripWhiteSpace();
95     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
96 }
97 
containsJavaScriptURL(const Vector<UChar,32> & value)98 static bool containsJavaScriptURL(const Vector<UChar, 32>& value)
99 {
100     static const char javaScriptScheme[] = "javascript:";
101     static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
102 
103     size_t i;
104     for (i = 0; i < value.size(); ++i) {
105         if (!isHTMLSpace(value[i]))
106             break;
107     }
108 
109     if (value.size() - i < lengthOfJavaScriptScheme)
110         return false;
111 
112     return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
113 }
114 
decodeURL(const String & string,const TextEncoding & encoding)115 static String decodeURL(const String& string, const TextEncoding& encoding)
116 {
117     String workingString = string;
118     workingString.replace('+', ' ');
119     workingString = decodeURLEscapeSequences(workingString);
120     CString workingStringUTF8 = workingString.utf8();
121     String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length());
122     // FIXME: Is this check necessary?
123     if (decodedString.isEmpty())
124         return canonicalize(workingString);
125     return canonicalize(decodedString);
126 }
127 
XSSFilter(HTMLDocumentParser * parser)128 XSSFilter::XSSFilter(HTMLDocumentParser* parser)
129     : m_parser(parser)
130     , m_isEnabled(false)
131     , m_xssProtection(XSSProtectionEnabled)
132     , m_state(Uninitialized)
133 {
134     ASSERT(m_parser);
135     if (Frame* frame = parser->document()->frame()) {
136         if (Settings* settings = frame->settings())
137             m_isEnabled = settings->xssAuditorEnabled();
138     }
139     // Although tempting to call init() at this point, the various objects
140     // we want to reference might not all have been constructed yet.
141 }
142 
init()143 void XSSFilter::init()
144 {
145     const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
146     const int suffixTreeDepth = 5;
147 
148     ASSERT(m_state == Uninitialized);
149     m_state = Initial;
150 
151     if (!m_isEnabled)
152         return;
153 
154     // In theory, the Document could have detached from the Frame after the
155     // XSSFilter was constructed.
156     if (!m_parser->document()->frame()) {
157         m_isEnabled = false;
158         return;
159     }
160 
161     const KURL& url = m_parser->document()->url();
162 
163     if (url.protocolIsData()) {
164         m_isEnabled = false;
165         return;
166     }
167 
168     TextResourceDecoder* decoder = m_parser->document()->decoder();
169     m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string();
170     if (m_decodedURL.find(isRequiredForInjection, 0) == notFound)
171         m_decodedURL = String();
172 
173     if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) {
174         DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
175         m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader));
176 
177         FormData* httpBody = documentLoader->originalRequest().httpBody();
178         if (httpBody && !httpBody->isEmpty()) {
179             String httpBodyAsString = httpBody->flattenToString();
180             m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString;
181             if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound)
182                 m_decodedHTTPBody = String();
183             if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
184                 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
185         }
186     }
187 
188     if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
189         m_isEnabled = false;
190 }
191 
filterToken(HTMLToken & token)192 void XSSFilter::filterToken(HTMLToken& token)
193 {
194     if (m_state == Uninitialized) {
195         init();
196         ASSERT(m_state == Initial);
197     }
198 
199     if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
200         return;
201 
202     bool didBlockScript = false;
203 
204     switch (m_state) {
205     case Uninitialized:
206         ASSERT_NOT_REACHED();
207         break;
208     case Initial:
209         didBlockScript = filterTokenInitial(token);
210         break;
211     case AfterScriptStartTag:
212         didBlockScript = filterTokenAfterScriptStartTag(token);
213         ASSERT(m_state == Initial);
214         m_cachedSnippet = String();
215         break;
216     }
217 
218     if (didBlockScript) {
219         // FIXME: Consider using a more helpful console message.
220         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
221         // FIXME: We should add the real line number to the console.
222         m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
223 
224         if (m_xssProtection == XSSProtectionBlockEnabled) {
225             m_parser->document()->frame()->loader()->stopAllLoaders();
226             m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String());
227         }
228     }
229 }
230 
filterTokenInitial(HTMLToken & token)231 bool XSSFilter::filterTokenInitial(HTMLToken& token)
232 {
233     ASSERT(m_state == Initial);
234 
235     if (token.type() != HTMLToken::StartTag)
236         return false;
237 
238     bool didBlockScript = eraseDangerousAttributesIfInjected(token);
239 
240     if (hasName(token, scriptTag))
241         didBlockScript |= filterScriptToken(token);
242     else if (hasName(token, objectTag))
243         didBlockScript |= filterObjectToken(token);
244     else if (hasName(token, paramTag))
245         didBlockScript |= filterParamToken(token);
246     else if (hasName(token, embedTag))
247         didBlockScript |= filterEmbedToken(token);
248     else if (hasName(token, appletTag))
249         didBlockScript |= filterAppletToken(token);
250     else if (hasName(token, iframeTag))
251         didBlockScript |= filterIframeToken(token);
252     else if (hasName(token, metaTag))
253         didBlockScript |= filterMetaToken(token);
254     else if (hasName(token, baseTag))
255         didBlockScript |= filterBaseToken(token);
256     else if (hasName(token, formTag))
257         didBlockScript |= filterFormToken(token);
258 
259     return didBlockScript;
260 }
261 
filterTokenAfterScriptStartTag(HTMLToken & token)262 bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
263 {
264     ASSERT(m_state == AfterScriptStartTag);
265     m_state = Initial;
266 
267     if (token.type() != HTMLToken::Character) {
268         ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
269         return false;
270     }
271 
272     int start = 0;
273     // FIXME: We probably want to grab only the first few characters of the
274     //        contents of the script element.
275     int end = token.endIndex() - token.startIndex();
276     if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
277         token.eraseCharacters();
278         token.appendToCharacter(' '); // Technically, character tokens can't be empty.
279         return true;
280     }
281     return false;
282 }
283 
filterScriptToken(HTMLToken & token)284 bool XSSFilter::filterScriptToken(HTMLToken& token)
285 {
286     ASSERT(m_state == Initial);
287     ASSERT(token.type() == HTMLToken::StartTag);
288     ASSERT(hasName(token, scriptTag));
289 
290     if (eraseAttributeIfInjected(token, srcAttr, blankURL().string()))
291         return true;
292 
293     m_state = AfterScriptStartTag;
294     m_cachedSnippet = m_parser->sourceForToken(token);
295     return false;
296 }
297 
filterObjectToken(HTMLToken & token)298 bool XSSFilter::filterObjectToken(HTMLToken& token)
299 {
300     ASSERT(m_state == Initial);
301     ASSERT(token.type() == HTMLToken::StartTag);
302     ASSERT(hasName(token, objectTag));
303 
304     bool didBlockScript = false;
305 
306     didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string());
307     didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
308     didBlockScript |= eraseAttributeIfInjected(token, classidAttr);
309 
310     return didBlockScript;
311 }
312 
filterParamToken(HTMLToken & token)313 bool XSSFilter::filterParamToken(HTMLToken& token)
314 {
315     ASSERT(m_state == Initial);
316     ASSERT(token.type() == HTMLToken::StartTag);
317     ASSERT(hasName(token, paramTag));
318 
319     size_t indexOfNameAttribute;
320     if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute))
321         return false;
322 
323     const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute);
324     String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size());
325 
326     if (!HTMLParamElement::isURLParameter(name))
327         return false;
328 
329     return eraseAttributeIfInjected(token, valueAttr, blankURL().string());
330 }
331 
filterEmbedToken(HTMLToken & token)332 bool XSSFilter::filterEmbedToken(HTMLToken& token)
333 {
334     ASSERT(m_state == Initial);
335     ASSERT(token.type() == HTMLToken::StartTag);
336     ASSERT(hasName(token, embedTag));
337 
338     bool didBlockScript = false;
339 
340     didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string());
341     didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
342 
343     return didBlockScript;
344 }
345 
filterAppletToken(HTMLToken & token)346 bool XSSFilter::filterAppletToken(HTMLToken& token)
347 {
348     ASSERT(m_state == Initial);
349     ASSERT(token.type() == HTMLToken::StartTag);
350     ASSERT(hasName(token, appletTag));
351 
352     bool didBlockScript = false;
353 
354     didBlockScript |= eraseAttributeIfInjected(token, codeAttr);
355     didBlockScript |= eraseAttributeIfInjected(token, objectAttr);
356 
357     return didBlockScript;
358 }
359 
filterIframeToken(HTMLToken & token)360 bool XSSFilter::filterIframeToken(HTMLToken& token)
361 {
362     ASSERT(m_state == Initial);
363     ASSERT(token.type() == HTMLToken::StartTag);
364     ASSERT(hasName(token, iframeTag));
365 
366     return eraseAttributeIfInjected(token, srcAttr);
367 }
368 
filterMetaToken(HTMLToken & token)369 bool XSSFilter::filterMetaToken(HTMLToken& token)
370 {
371     ASSERT(m_state == Initial);
372     ASSERT(token.type() == HTMLToken::StartTag);
373     ASSERT(hasName(token, metaTag));
374 
375     return eraseAttributeIfInjected(token, http_equivAttr);
376 }
377 
filterBaseToken(HTMLToken & token)378 bool XSSFilter::filterBaseToken(HTMLToken& token)
379 {
380     ASSERT(m_state == Initial);
381     ASSERT(token.type() == HTMLToken::StartTag);
382     ASSERT(hasName(token, baseTag));
383 
384     return eraseAttributeIfInjected(token, hrefAttr);
385 }
386 
filterFormToken(HTMLToken & token)387 bool XSSFilter::filterFormToken(HTMLToken& token)
388 {
389     ASSERT(m_state == Initial);
390     ASSERT(token.type() == HTMLToken::StartTag);
391     ASSERT(hasName(token, formTag));
392 
393     return eraseAttributeIfInjected(token, actionAttr);
394 }
395 
eraseDangerousAttributesIfInjected(HTMLToken & token)396 bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token)
397 {
398     DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
399 
400     bool didBlockScript = false;
401     for (size_t i = 0; i < token.attributes().size(); ++i) {
402         const HTMLToken::Attribute& attribute = token.attributes().at(i);
403         bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
404         bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
405         if (!isInlineEventHandler && !valueContainsJavaScriptURL)
406             continue;
407         if (!isContainedInRequest(snippetForAttribute(token, attribute)))
408             continue;
409         token.eraseValueOfAttribute(i);
410         if (valueContainsJavaScriptURL)
411             token.appendToAttributeValue(i, safeJavaScriptURL);
412         didBlockScript = true;
413     }
414     return didBlockScript;
415 }
416 
eraseAttributeIfInjected(HTMLToken & token,const QualifiedName & attributeName,const String & replacementValue)417 bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue)
418 {
419     size_t indexOfAttribute;
420     if (findAttributeWithName(token, attributeName, indexOfAttribute)) {
421         const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute);
422         if (isContainedInRequest(snippetForAttribute(token, attribute))) {
423             if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size())))
424                 return false;
425             if (attributeName == http_equivAttr && !isDangerousHTTPEquiv(String(attribute.m_value.data(), attribute.m_value.size())))
426                 return false;
427             token.eraseValueOfAttribute(indexOfAttribute);
428             if (!replacementValue.isEmpty())
429                 token.appendToAttributeValue(indexOfAttribute, replacementValue);
430             return true;
431         }
432     }
433     return false;
434 }
435 
snippetForRange(const HTMLToken & token,int start,int end)436 String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end)
437 {
438     // FIXME: There's an extra allocation here that we could save by
439     //        passing the range to the parser.
440     return m_parser->sourceForToken(token).substring(start, end - start);
441 }
442 
snippetForAttribute(const HTMLToken & token,const HTMLToken::Attribute & attribute)443 String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
444 {
445     // FIXME: We should grab one character before the name also.
446     int start = attribute.m_nameRange.m_start - token.startIndex();
447     // FIXME: We probably want to grab only the first few characters of the attribute value.
448     int end = attribute.m_valueRange.m_end - token.startIndex();
449     return snippetForRange(token, start, end);
450 }
451 
isContainedInRequest(const String & snippet)452 bool XSSFilter::isContainedInRequest(const String& snippet)
453 {
454     ASSERT(!snippet.isEmpty());
455     String canonicalizedSnippet = canonicalize(snippet);
456     ASSERT(!canonicalizedSnippet.isEmpty());
457     if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound)
458         return true;
459     if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet))
460         return false;
461     return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound;
462 }
463 
isSameOriginResource(const String & url)464 bool XSSFilter::isSameOriginResource(const String& url)
465 {
466     // If the resource is loaded from the same URL as the enclosing page, it's
467     // probably not an XSS attack, so we reduce false positives by allowing the
468     // request. If the resource has a query string, we're more suspicious,
469     // however, because that's pretty rare and the attacker might be able to
470     // trick a server-side script into doing something dangerous with the query
471     // string.
472     KURL resourceURL(m_parser->document()->url(), url);
473     return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
474 }
475 
476 }
477