1 /*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "UserContentURLPattern.h"
28 #include "KURL.h"
29 #include <wtf/StdLibExtras.h>
30
31 namespace WebCore {
32
matchesPatterns(const KURL & url,const Vector<String> * whitelist,const Vector<String> * blacklist)33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
34 {
35 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
36 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
37 bool matchesWhitelist = !whitelist || whitelist->isEmpty();
38 if (!matchesWhitelist) {
39 for (unsigned i = 0; i < whitelist->size(); ++i) {
40 UserContentURLPattern contentPattern(whitelist->at(i));
41 if (contentPattern.matches(url)) {
42 matchesWhitelist = true;
43 break;
44 }
45 }
46 }
47
48 bool matchesBlacklist = false;
49 if (blacklist) {
50 for (unsigned i = 0; i < blacklist->size(); ++i) {
51 UserContentURLPattern contentPattern(blacklist->at(i));
52 if (contentPattern.matches(url)) {
53 matchesBlacklist = true;
54 break;
55 }
56 }
57 }
58
59 return matchesWhitelist && !matchesBlacklist;
60 }
61
parse(const String & pattern)62 bool UserContentURLPattern::parse(const String& pattern)
63 {
64 DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
65
66 size_t schemeEndPos = pattern.find(schemeSeparator);
67 if (schemeEndPos == notFound)
68 return false;
69
70 m_scheme = pattern.left(schemeEndPos);
71
72 unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
73 if (hostStartPos >= pattern.length())
74 return false;
75
76 int pathStartPos = 0;
77
78 if (equalIgnoringCase(m_scheme, "file"))
79 pathStartPos = hostStartPos;
80 else {
81 size_t hostEndPos = pattern.find("/", hostStartPos);
82 if (hostEndPos == notFound)
83 return false;
84
85 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
86 m_matchSubdomains = false;
87
88 if (m_host == "*") {
89 // The pattern can be just '*', which means match all domains.
90 m_host = "";
91 m_matchSubdomains = true;
92 } else if (m_host.startsWith("*.")) {
93 // The first component can be '*', which means to match all subdomains.
94 m_host = m_host.substring(2); // Length of "*."
95 m_matchSubdomains = true;
96 }
97
98 // No other '*' can occur in the host.
99 if (m_host.find("*") != notFound)
100 return false;
101
102 pathStartPos = hostEndPos;
103 }
104
105 m_path = pattern.right(pattern.length() - pathStartPos);
106
107 return true;
108 }
109
matches(const KURL & test) const110 bool UserContentURLPattern::matches(const KURL& test) const
111 {
112 if (m_invalid)
113 return false;
114
115 if (!equalIgnoringCase(test.protocol(), m_scheme))
116 return false;
117
118 if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
119 return false;
120
121 return matchesPath(test);
122 }
123
matchesHost(const KURL & test) const124 bool UserContentURLPattern::matchesHost(const KURL& test) const
125 {
126 const String& host = test.host();
127 if (equalIgnoringCase(host, m_host))
128 return true;
129
130 if (!m_matchSubdomains)
131 return false;
132
133 // If we're matching subdomains, and we have no host, that means the pattern
134 // was <scheme>://*/<whatever>, so we match anything.
135 if (!m_host.length())
136 return true;
137
138 // Check if the domain is a subdomain of our host.
139 if (!host.endsWith(m_host, false))
140 return false;
141
142 ASSERT(host.length() > m_host.length());
143
144 // Check that the character before the suffix is a period.
145 return host[host.length() - m_host.length() - 1] == '.';
146 }
147
148 struct MatchTester
149 {
150 const String m_pattern;
151 unsigned m_patternIndex;
152
153 const String m_test;
154 unsigned m_testIndex;
155
MatchTesterWebCore::MatchTester156 MatchTester(const String& pattern, const String& test)
157 : m_pattern(pattern)
158 , m_patternIndex(0)
159 , m_test(test)
160 , m_testIndex(0)
161 {
162 }
163
testStringFinishedWebCore::MatchTester164 bool testStringFinished() const { return m_testIndex >= m_test.length(); }
patternStringFinishedWebCore::MatchTester165 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
166
eatWildcardWebCore::MatchTester167 void eatWildcard()
168 {
169 while (!patternStringFinished()) {
170 if (m_pattern[m_patternIndex] != '*')
171 return;
172 m_patternIndex++;
173 }
174 }
175
eatSameCharsWebCore::MatchTester176 void eatSameChars()
177 {
178 while (!patternStringFinished() && !testStringFinished()) {
179 if (m_pattern[m_patternIndex] == '*')
180 return;
181 if (m_pattern[m_patternIndex] != m_test[m_testIndex])
182 return;
183 m_patternIndex++;
184 m_testIndex++;
185 }
186 }
187
testWebCore::MatchTester188 bool test()
189 {
190 // Eat all the matching chars.
191 eatSameChars();
192
193 // If the string is finished, then the pattern must be empty too, or contains
194 // only wildcards.
195 if (testStringFinished()) {
196 eatWildcard();
197 if (patternStringFinished())
198 return true;
199 return false;
200 }
201
202 // Pattern is empty but not string, this is not a match.
203 if (patternStringFinished())
204 return false;
205
206 // If we don't encounter a *, then we're hosed.
207 if (m_pattern[m_patternIndex] != '*')
208 return false;
209
210 while (!testStringFinished()) {
211 MatchTester nextMatch(*this);
212 nextMatch.m_patternIndex++;
213 if (nextMatch.test())
214 return true;
215 m_testIndex++;
216 }
217
218 // We reached the end of the string. Let's see if the pattern contains only
219 // wildcards.
220 eatWildcard();
221 return patternStringFinished();
222 }
223 };
224
matchesPath(const KURL & test) const225 bool UserContentURLPattern::matchesPath(const KURL& test) const
226 {
227 MatchTester match(m_path, test.path());
228 return match.test();
229 }
230
231 } // namespace WebCore
232