1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h"
6
7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h"
9 #include "base/json/json_file_value_serializer.h"
10 #include "base/metrics/histogram.h"
11 #include "base/sha1.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/task_runner_util.h"
15 #include "base/threading/sequenced_worker_pool.h"
16 #include "components/policy/core/browser/url_blacklist_manager.h"
17 #include "components/url_fixer/url_fixer.h"
18 #include "components/url_matcher/url_matcher.h"
19 #include "content/public/browser/browser_thread.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "url/gurl.h"
22
23 using content::BrowserThread;
24 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
25 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
26 using net::registry_controlled_domains::GetRegistryLength;
27 using policy::URLBlacklist;
28 using url_matcher::URLMatcher;
29 using url_matcher::URLMatcherConditionSet;
30
31 struct SupervisedUserURLFilter::Contents {
32 URLMatcher url_matcher;
33 std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
34 base::hash_multimap<std::string, int> hash_site_map;
35 std::vector<SupervisedUserSiteList::Site> sites;
36 };
37
38 namespace {
39
40 // URL schemes not in this list (e.g., file:// and chrome://) will always be
41 // allowed.
42 const char* kFilteredSchemes[] = {
43 "http",
44 "https",
45 "ftp",
46 "gopher",
47 "ws",
48 "wss"
49 };
50
51
52 // This class encapsulates all the state that is required during construction of
53 // a new SupervisedUserURLFilter::Contents.
54 class FilterBuilder {
55 public:
56 FilterBuilder();
57 ~FilterBuilder();
58
59 // Adds a single URL pattern for the site identified by |site_id|.
60 bool AddPattern(const std::string& pattern, int site_id);
61
62 // Adds a single hostname SHA1 hash for the site identified by |site_id|.
63 void AddHostnameHash(const std::string& hash, int site_id);
64
65 // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
66 void AddSiteList(SupervisedUserSiteList* site_list);
67
68 // Finalizes construction of the SupervisedUserURLFilter::Contents and returns
69 // them. This method should be called before this object is destroyed.
70 scoped_ptr<SupervisedUserURLFilter::Contents> Build();
71
72 private:
73 scoped_ptr<SupervisedUserURLFilter::Contents> contents_;
74 URLMatcherConditionSet::Vector all_conditions_;
75 URLMatcherConditionSet::ID matcher_id_;
76 };
77
FilterBuilder()78 FilterBuilder::FilterBuilder()
79 : contents_(new SupervisedUserURLFilter::Contents()),
80 matcher_id_(0) {}
81
~FilterBuilder()82 FilterBuilder::~FilterBuilder() {
83 DCHECK(!contents_.get());
84 }
85
AddPattern(const std::string & pattern,int site_id)86 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
87 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
88 std::string scheme;
89 std::string host;
90 uint16 port;
91 std::string path;
92 std::string query;
93 bool match_subdomains = true;
94 URLBlacklist::SegmentURLCallback callback =
95 static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL);
96 if (!URLBlacklist::FilterToComponents(
97 callback, pattern,
98 &scheme, &host, &match_subdomains, &port, &path, &query)) {
99 LOG(ERROR) << "Invalid pattern " << pattern;
100 return false;
101 }
102
103 scoped_refptr<URLMatcherConditionSet> condition_set =
104 URLBlacklist::CreateConditionSet(
105 &contents_->url_matcher, ++matcher_id_,
106 scheme, host, match_subdomains, port, path, query, true);
107 all_conditions_.push_back(condition_set);
108 contents_->matcher_site_map[matcher_id_] = site_id;
109 return true;
110 }
111
AddHostnameHash(const std::string & hash,int site_id)112 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
113 contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
114 site_id));
115 }
116
AddSiteList(SupervisedUserSiteList * site_list)117 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) {
118 std::vector<SupervisedUserSiteList::Site> sites;
119 site_list->GetSites(&sites);
120 int site_id = contents_->sites.size();
121 for (std::vector<SupervisedUserSiteList::Site>::const_iterator it =
122 sites.begin(); it != sites.end(); ++it) {
123 const SupervisedUserSiteList::Site& site = *it;
124 contents_->sites.push_back(site);
125
126 for (std::vector<std::string>::const_iterator pattern_it =
127 site.patterns.begin();
128 pattern_it != site.patterns.end(); ++pattern_it) {
129 AddPattern(*pattern_it, site_id);
130 }
131
132 for (std::vector<std::string>::const_iterator hash_it =
133 site.hostname_hashes.begin();
134 hash_it != site.hostname_hashes.end(); ++hash_it) {
135 AddHostnameHash(*hash_it, site_id);
136 }
137
138 site_id++;
139 }
140 }
141
Build()142 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() {
143 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
144 contents_->url_matcher.AddConditionSets(all_conditions_);
145 return contents_.Pass();
146 }
147
CreateWhitelistFromPatterns(const std::vector<std::string> & patterns)148 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns(
149 const std::vector<std::string>& patterns) {
150 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
151
152 FilterBuilder builder;
153 for (std::vector<std::string>::const_iterator it = patterns.begin();
154 it != patterns.end(); ++it) {
155 // TODO(bauerb): We should create a fake site for the whitelist.
156 builder.AddPattern(*it, -1);
157 }
158
159 return builder.Build();
160 }
161
162 scoped_ptr<SupervisedUserURLFilter::Contents>
LoadWhitelistsOnBlockingPoolThread(ScopedVector<SupervisedUserSiteList> site_lists)163 LoadWhitelistsOnBlockingPoolThread(
164 ScopedVector<SupervisedUserSiteList> site_lists) {
165 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
166
167 FilterBuilder builder;
168 for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin();
169 it != site_lists.end(); ++it) {
170 builder.AddSiteList(*it);
171 }
172
173 return builder.Build();
174 }
175
176 } // namespace
177
SupervisedUserURLFilter()178 SupervisedUserURLFilter::SupervisedUserURLFilter()
179 : default_behavior_(ALLOW),
180 contents_(new Contents()) {
181 // Detach from the current thread so we can be constructed on a different
182 // thread than the one where we're used.
183 DetachFromThread();
184 }
185
~SupervisedUserURLFilter()186 SupervisedUserURLFilter::~SupervisedUserURLFilter() {
187 DCHECK(CalledOnValidThread());
188 }
189
190 // static
191 SupervisedUserURLFilter::FilteringBehavior
BehaviorFromInt(int behavior_value)192 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) {
193 DCHECK_GE(behavior_value, ALLOW);
194 DCHECK_LE(behavior_value, BLOCK);
195 return static_cast<FilteringBehavior>(behavior_value);
196 }
197
198 // static
Normalize(const GURL & url)199 GURL SupervisedUserURLFilter::Normalize(const GURL& url) {
200 GURL normalized_url = url;
201 GURL::Replacements replacements;
202 // Strip username, password, query, and ref.
203 replacements.ClearUsername();
204 replacements.ClearPassword();
205 replacements.ClearQuery();
206 replacements.ClearRef();
207 return url.ReplaceComponents(replacements);
208 }
209
210 // static
HasFilteredScheme(const GURL & url)211 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) {
212 for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) {
213 if (url.scheme() == kFilteredSchemes[i])
214 return true;
215 }
216 return false;
217 }
218
GetHostnameHash(const GURL & url)219 std::string GetHostnameHash(const GURL& url) {
220 std::string hash = base::SHA1HashString(url.host());
221 return base::HexEncode(hash.data(), hash.length());
222 }
223
224 // static
HostMatchesPattern(const std::string & host,const std::string & pattern)225 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host,
226 const std::string& pattern) {
227 std::string trimmed_pattern = pattern;
228 std::string trimmed_host = host;
229 if (EndsWith(pattern, ".*", true)) {
230 size_t registry_length = GetRegistryLength(
231 trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
232 // A host without a known registry part does not match.
233 if (registry_length == 0)
234 return false;
235
236 trimmed_pattern.erase(trimmed_pattern.length() - 2);
237 trimmed_host.erase(trimmed_host.length() - (registry_length + 1));
238 }
239
240 if (StartsWithASCII(trimmed_pattern, "*.", true)) {
241 trimmed_pattern.erase(0, 2);
242
243 // The remaining pattern should be non-empty, and it should not contain
244 // further stars. Also the trimmed host needs to end with the trimmed
245 // pattern.
246 if (trimmed_pattern.empty() ||
247 trimmed_pattern.find('*') != std::string::npos ||
248 !EndsWith(trimmed_host, trimmed_pattern, true)) {
249 return false;
250 }
251
252 // The trimmed host needs to have a dot separating the subdomain from the
253 // matched pattern piece, unless there is no subdomain.
254 int pos = trimmed_host.length() - trimmed_pattern.length();
255 DCHECK_GE(pos, 0);
256 return (pos == 0) || (trimmed_host[pos - 1] == '.');
257 }
258
259 return trimmed_host == trimmed_pattern;
260 }
261
262 SupervisedUserURLFilter::FilteringBehavior
GetFilteringBehaviorForURL(const GURL & url) const263 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
264 DCHECK(CalledOnValidThread());
265
266 // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
267 if (!HasFilteredScheme(url))
268 return ALLOW;
269
270 // Check manual overrides for the exact URL.
271 std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
272 if (url_it != url_map_.end())
273 return url_it->second ? ALLOW : BLOCK;
274
275 // Check manual overrides for the hostname.
276 std::string host = url.host();
277 std::map<std::string, bool>::const_iterator host_it = host_map_.find(host);
278 if (host_it != host_map_.end())
279 return host_it->second ? ALLOW : BLOCK;
280
281 // Look for patterns matching the hostname, with a value that is different
282 // from the default (a value of true in the map meaning allowed).
283 for (std::map<std::string, bool>::const_iterator host_it =
284 host_map_.begin(); host_it != host_map_.end(); ++host_it) {
285 if ((host_it->second == (default_behavior_ == BLOCK)) &&
286 HostMatchesPattern(host, host_it->first)) {
287 return host_it->second ? ALLOW : BLOCK;
288 }
289 }
290
291 // If the default behavior is to allow, we don't need to check anything else.
292 if (default_behavior_ == ALLOW)
293 return ALLOW;
294
295 // Check the list of URL patterns.
296 std::set<URLMatcherConditionSet::ID> matching_ids =
297 contents_->url_matcher.MatchURL(url);
298 if (!matching_ids.empty())
299 return ALLOW;
300
301 // Check the list of hostname hashes.
302 if (contents_->hash_site_map.count(GetHostnameHash(url)))
303 return ALLOW;
304
305 // Fall back to the default behavior.
306 return default_behavior_;
307 }
308
GetSites(const GURL & url,std::vector<SupervisedUserSiteList::Site * > * sites) const309 void SupervisedUserURLFilter::GetSites(
310 const GURL& url,
311 std::vector<SupervisedUserSiteList::Site*>* sites) const {
312 std::set<URLMatcherConditionSet::ID> matching_ids =
313 contents_->url_matcher.MatchURL(url);
314 for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
315 matching_ids.begin(); it != matching_ids.end(); ++it) {
316 std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
317 contents_->matcher_site_map.find(*it);
318 if (entry == contents_->matcher_site_map.end()) {
319 NOTREACHED();
320 continue;
321 }
322 sites->push_back(&contents_->sites[entry->second]);
323 }
324
325 typedef base::hash_multimap<std::string, int>::const_iterator
326 hash_site_map_iterator;
327 std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
328 contents_->hash_site_map.equal_range(GetHostnameHash(url));
329 for (hash_site_map_iterator hash_it = bounds.first;
330 hash_it != bounds.second; hash_it++) {
331 sites->push_back(&contents_->sites[hash_it->second]);
332 }
333 }
334
SetDefaultFilteringBehavior(FilteringBehavior behavior)335 void SupervisedUserURLFilter::SetDefaultFilteringBehavior(
336 FilteringBehavior behavior) {
337 DCHECK(CalledOnValidThread());
338 default_behavior_ = behavior;
339 }
340
LoadWhitelists(ScopedVector<SupervisedUserSiteList> site_lists)341 void SupervisedUserURLFilter::LoadWhitelists(
342 ScopedVector<SupervisedUserSiteList> site_lists) {
343 DCHECK(CalledOnValidThread());
344
345 base::PostTaskAndReplyWithResult(
346 BrowserThread::GetBlockingPool(),
347 FROM_HERE,
348 base::Bind(&LoadWhitelistsOnBlockingPoolThread,
349 base::Passed(&site_lists)),
350 base::Bind(&SupervisedUserURLFilter::SetContents, this));
351 }
352
SetFromPatterns(const std::vector<std::string> & patterns)353 void SupervisedUserURLFilter::SetFromPatterns(
354 const std::vector<std::string>& patterns) {
355 DCHECK(CalledOnValidThread());
356
357 base::PostTaskAndReplyWithResult(
358 BrowserThread::GetBlockingPool(),
359 FROM_HERE,
360 base::Bind(&CreateWhitelistFromPatterns, patterns),
361 base::Bind(&SupervisedUserURLFilter::SetContents, this));
362 }
363
SetManualHosts(const std::map<std::string,bool> * host_map)364 void SupervisedUserURLFilter::SetManualHosts(
365 const std::map<std::string, bool>* host_map) {
366 DCHECK(CalledOnValidThread());
367 host_map_ = *host_map;
368 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
369 host_map->size(), 1, 1000, 50);
370 }
371
SetManualURLs(const std::map<GURL,bool> * url_map)372 void SupervisedUserURLFilter::SetManualURLs(
373 const std::map<GURL, bool>* url_map) {
374 DCHECK(CalledOnValidThread());
375 url_map_ = *url_map;
376 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
377 url_map->size(), 1, 1000, 50);
378 }
379
AddObserver(Observer * observer)380 void SupervisedUserURLFilter::AddObserver(Observer* observer) {
381 observers_.AddObserver(observer);
382 }
383
RemoveObserver(Observer * observer)384 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) {
385 observers_.RemoveObserver(observer);
386 }
387
SetContents(scoped_ptr<Contents> contents)388 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) {
389 DCHECK(CalledOnValidThread());
390 contents_ = contents.Pass();
391 FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
392 }
393