• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h"
6 
7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h"
9 #include "base/json/json_file_value_serializer.h"
10 #include "base/metrics/histogram.h"
11 #include "base/sha1.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/task_runner_util.h"
15 #include "base/threading/sequenced_worker_pool.h"
16 #include "components/policy/core/browser/url_blacklist_manager.h"
17 #include "components/url_fixer/url_fixer.h"
18 #include "components/url_matcher/url_matcher.h"
19 #include "content/public/browser/browser_thread.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "url/gurl.h"
22 
23 using content::BrowserThread;
24 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
25 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
26 using net::registry_controlled_domains::GetRegistryLength;
27 using policy::URLBlacklist;
28 using url_matcher::URLMatcher;
29 using url_matcher::URLMatcherConditionSet;
30 
31 struct SupervisedUserURLFilter::Contents {
32   URLMatcher url_matcher;
33   std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
34   base::hash_multimap<std::string, int> hash_site_map;
35   std::vector<SupervisedUserSiteList::Site> sites;
36 };
37 
38 namespace {
39 
40 // URL schemes not in this list (e.g., file:// and chrome://) will always be
41 // allowed.
42 const char* kFilteredSchemes[] = {
43   "http",
44   "https",
45   "ftp",
46   "gopher",
47   "ws",
48   "wss"
49 };
50 
51 
52 // This class encapsulates all the state that is required during construction of
53 // a new SupervisedUserURLFilter::Contents.
54 class FilterBuilder {
55  public:
56   FilterBuilder();
57   ~FilterBuilder();
58 
59   // Adds a single URL pattern for the site identified by |site_id|.
60   bool AddPattern(const std::string& pattern, int site_id);
61 
62   // Adds a single hostname SHA1 hash for the site identified by |site_id|.
63   void AddHostnameHash(const std::string& hash, int site_id);
64 
65   // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
66   void AddSiteList(SupervisedUserSiteList* site_list);
67 
68   // Finalizes construction of the SupervisedUserURLFilter::Contents and returns
69   // them. This method should be called before this object is destroyed.
70   scoped_ptr<SupervisedUserURLFilter::Contents> Build();
71 
72  private:
73   scoped_ptr<SupervisedUserURLFilter::Contents> contents_;
74   URLMatcherConditionSet::Vector all_conditions_;
75   URLMatcherConditionSet::ID matcher_id_;
76 };
77 
FilterBuilder()78 FilterBuilder::FilterBuilder()
79     : contents_(new SupervisedUserURLFilter::Contents()),
80       matcher_id_(0) {}
81 
~FilterBuilder()82 FilterBuilder::~FilterBuilder() {
83   DCHECK(!contents_.get());
84 }
85 
AddPattern(const std::string & pattern,int site_id)86 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
87   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
88   std::string scheme;
89   std::string host;
90   uint16 port;
91   std::string path;
92   std::string query;
93   bool match_subdomains = true;
94   URLBlacklist::SegmentURLCallback callback =
95       static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL);
96   if (!URLBlacklist::FilterToComponents(
97           callback, pattern,
98           &scheme, &host, &match_subdomains, &port, &path, &query)) {
99     LOG(ERROR) << "Invalid pattern " << pattern;
100     return false;
101   }
102 
103   scoped_refptr<URLMatcherConditionSet> condition_set =
104       URLBlacklist::CreateConditionSet(
105           &contents_->url_matcher, ++matcher_id_,
106           scheme, host, match_subdomains, port, path, query, true);
107   all_conditions_.push_back(condition_set);
108   contents_->matcher_site_map[matcher_id_] = site_id;
109   return true;
110 }
111 
AddHostnameHash(const std::string & hash,int site_id)112 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
113   contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
114                                                  site_id));
115 }
116 
AddSiteList(SupervisedUserSiteList * site_list)117 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) {
118   std::vector<SupervisedUserSiteList::Site> sites;
119   site_list->GetSites(&sites);
120   int site_id = contents_->sites.size();
121   for (std::vector<SupervisedUserSiteList::Site>::const_iterator it =
122            sites.begin(); it != sites.end(); ++it) {
123     const SupervisedUserSiteList::Site& site = *it;
124     contents_->sites.push_back(site);
125 
126     for (std::vector<std::string>::const_iterator pattern_it =
127              site.patterns.begin();
128          pattern_it != site.patterns.end(); ++pattern_it) {
129       AddPattern(*pattern_it, site_id);
130     }
131 
132     for (std::vector<std::string>::const_iterator hash_it =
133              site.hostname_hashes.begin();
134          hash_it != site.hostname_hashes.end(); ++hash_it) {
135       AddHostnameHash(*hash_it, site_id);
136     }
137 
138     site_id++;
139   }
140 }
141 
Build()142 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() {
143   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
144   contents_->url_matcher.AddConditionSets(all_conditions_);
145   return contents_.Pass();
146 }
147 
CreateWhitelistFromPatterns(const std::vector<std::string> & patterns)148 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns(
149     const std::vector<std::string>& patterns) {
150   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
151 
152   FilterBuilder builder;
153   for (std::vector<std::string>::const_iterator it = patterns.begin();
154        it != patterns.end(); ++it) {
155     // TODO(bauerb): We should create a fake site for the whitelist.
156     builder.AddPattern(*it, -1);
157   }
158 
159   return builder.Build();
160 }
161 
162 scoped_ptr<SupervisedUserURLFilter::Contents>
LoadWhitelistsOnBlockingPoolThread(ScopedVector<SupervisedUserSiteList> site_lists)163 LoadWhitelistsOnBlockingPoolThread(
164     ScopedVector<SupervisedUserSiteList> site_lists) {
165   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
166 
167   FilterBuilder builder;
168   for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin();
169        it != site_lists.end(); ++it) {
170     builder.AddSiteList(*it);
171   }
172 
173   return builder.Build();
174 }
175 
176 }  // namespace
177 
SupervisedUserURLFilter()178 SupervisedUserURLFilter::SupervisedUserURLFilter()
179     : default_behavior_(ALLOW),
180       contents_(new Contents()) {
181   // Detach from the current thread so we can be constructed on a different
182   // thread than the one where we're used.
183   DetachFromThread();
184 }
185 
~SupervisedUserURLFilter()186 SupervisedUserURLFilter::~SupervisedUserURLFilter() {
187   DCHECK(CalledOnValidThread());
188 }
189 
190 // static
191 SupervisedUserURLFilter::FilteringBehavior
BehaviorFromInt(int behavior_value)192 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) {
193   DCHECK_GE(behavior_value, ALLOW);
194   DCHECK_LE(behavior_value, BLOCK);
195   return static_cast<FilteringBehavior>(behavior_value);
196 }
197 
198 // static
Normalize(const GURL & url)199 GURL SupervisedUserURLFilter::Normalize(const GURL& url) {
200   GURL normalized_url = url;
201   GURL::Replacements replacements;
202   // Strip username, password, query, and ref.
203   replacements.ClearUsername();
204   replacements.ClearPassword();
205   replacements.ClearQuery();
206   replacements.ClearRef();
207   return url.ReplaceComponents(replacements);
208 }
209 
210 // static
HasFilteredScheme(const GURL & url)211 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) {
212   for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) {
213       if (url.scheme() == kFilteredSchemes[i])
214         return true;
215     }
216   return false;
217 }
218 
GetHostnameHash(const GURL & url)219 std::string GetHostnameHash(const GURL& url) {
220   std::string hash = base::SHA1HashString(url.host());
221   return base::HexEncode(hash.data(), hash.length());
222 }
223 
224 // static
HostMatchesPattern(const std::string & host,const std::string & pattern)225 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host,
226                                                  const std::string& pattern) {
227   std::string trimmed_pattern = pattern;
228   std::string trimmed_host = host;
229   if (EndsWith(pattern, ".*", true)) {
230     size_t registry_length = GetRegistryLength(
231         trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
232     // A host without a known registry part does not match.
233     if (registry_length == 0)
234       return false;
235 
236     trimmed_pattern.erase(trimmed_pattern.length() - 2);
237     trimmed_host.erase(trimmed_host.length() - (registry_length + 1));
238   }
239 
240   if (StartsWithASCII(trimmed_pattern, "*.", true)) {
241     trimmed_pattern.erase(0, 2);
242 
243     // The remaining pattern should be non-empty, and it should not contain
244     // further stars. Also the trimmed host needs to end with the trimmed
245     // pattern.
246     if (trimmed_pattern.empty() ||
247         trimmed_pattern.find('*') != std::string::npos ||
248         !EndsWith(trimmed_host, trimmed_pattern, true)) {
249       return false;
250     }
251 
252     // The trimmed host needs to have a dot separating the subdomain from the
253     // matched pattern piece, unless there is no subdomain.
254     int pos = trimmed_host.length() - trimmed_pattern.length();
255     DCHECK_GE(pos, 0);
256     return (pos == 0) || (trimmed_host[pos - 1] == '.');
257   }
258 
259   return trimmed_host == trimmed_pattern;
260 }
261 
262 SupervisedUserURLFilter::FilteringBehavior
GetFilteringBehaviorForURL(const GURL & url) const263 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
264   DCHECK(CalledOnValidThread());
265 
266   // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
267   if (!HasFilteredScheme(url))
268     return ALLOW;
269 
270   // Check manual overrides for the exact URL.
271   std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
272   if (url_it != url_map_.end())
273     return url_it->second ? ALLOW : BLOCK;
274 
275   // Check manual overrides for the hostname.
276   std::string host = url.host();
277   std::map<std::string, bool>::const_iterator host_it = host_map_.find(host);
278   if (host_it != host_map_.end())
279     return host_it->second ? ALLOW : BLOCK;
280 
281   // Look for patterns matching the hostname, with a value that is different
282   // from the default (a value of true in the map meaning allowed).
283   for (std::map<std::string, bool>::const_iterator host_it =
284       host_map_.begin(); host_it != host_map_.end(); ++host_it) {
285     if ((host_it->second == (default_behavior_ == BLOCK)) &&
286         HostMatchesPattern(host, host_it->first)) {
287       return host_it->second ? ALLOW : BLOCK;
288     }
289   }
290 
291   // If the default behavior is to allow, we don't need to check anything else.
292   if (default_behavior_ == ALLOW)
293     return ALLOW;
294 
295   // Check the list of URL patterns.
296   std::set<URLMatcherConditionSet::ID> matching_ids =
297       contents_->url_matcher.MatchURL(url);
298   if (!matching_ids.empty())
299     return ALLOW;
300 
301   // Check the list of hostname hashes.
302   if (contents_->hash_site_map.count(GetHostnameHash(url)))
303     return ALLOW;
304 
305   // Fall back to the default behavior.
306   return default_behavior_;
307 }
308 
GetSites(const GURL & url,std::vector<SupervisedUserSiteList::Site * > * sites) const309 void SupervisedUserURLFilter::GetSites(
310     const GURL& url,
311     std::vector<SupervisedUserSiteList::Site*>* sites) const {
312   std::set<URLMatcherConditionSet::ID> matching_ids =
313       contents_->url_matcher.MatchURL(url);
314   for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
315            matching_ids.begin(); it != matching_ids.end(); ++it) {
316     std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
317         contents_->matcher_site_map.find(*it);
318     if (entry == contents_->matcher_site_map.end()) {
319       NOTREACHED();
320       continue;
321     }
322     sites->push_back(&contents_->sites[entry->second]);
323   }
324 
325   typedef base::hash_multimap<std::string, int>::const_iterator
326       hash_site_map_iterator;
327   std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
328       contents_->hash_site_map.equal_range(GetHostnameHash(url));
329   for (hash_site_map_iterator hash_it = bounds.first;
330        hash_it != bounds.second; hash_it++) {
331     sites->push_back(&contents_->sites[hash_it->second]);
332   }
333 }
334 
SetDefaultFilteringBehavior(FilteringBehavior behavior)335 void SupervisedUserURLFilter::SetDefaultFilteringBehavior(
336     FilteringBehavior behavior) {
337   DCHECK(CalledOnValidThread());
338   default_behavior_ = behavior;
339 }
340 
LoadWhitelists(ScopedVector<SupervisedUserSiteList> site_lists)341 void SupervisedUserURLFilter::LoadWhitelists(
342     ScopedVector<SupervisedUserSiteList> site_lists) {
343   DCHECK(CalledOnValidThread());
344 
345   base::PostTaskAndReplyWithResult(
346       BrowserThread::GetBlockingPool(),
347       FROM_HERE,
348       base::Bind(&LoadWhitelistsOnBlockingPoolThread,
349                  base::Passed(&site_lists)),
350       base::Bind(&SupervisedUserURLFilter::SetContents, this));
351 }
352 
SetFromPatterns(const std::vector<std::string> & patterns)353 void SupervisedUserURLFilter::SetFromPatterns(
354     const std::vector<std::string>& patterns) {
355   DCHECK(CalledOnValidThread());
356 
357   base::PostTaskAndReplyWithResult(
358       BrowserThread::GetBlockingPool(),
359       FROM_HERE,
360       base::Bind(&CreateWhitelistFromPatterns, patterns),
361       base::Bind(&SupervisedUserURLFilter::SetContents, this));
362 }
363 
SetManualHosts(const std::map<std::string,bool> * host_map)364 void SupervisedUserURLFilter::SetManualHosts(
365     const std::map<std::string, bool>* host_map) {
366   DCHECK(CalledOnValidThread());
367   host_map_ = *host_map;
368   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
369                               host_map->size(), 1, 1000, 50);
370 }
371 
SetManualURLs(const std::map<GURL,bool> * url_map)372 void SupervisedUserURLFilter::SetManualURLs(
373     const std::map<GURL, bool>* url_map) {
374   DCHECK(CalledOnValidThread());
375   url_map_ = *url_map;
376   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
377                               url_map->size(), 1, 1000, 50);
378 }
379 
AddObserver(Observer * observer)380 void SupervisedUserURLFilter::AddObserver(Observer* observer) {
381   observers_.AddObserver(observer);
382 }
383 
RemoveObserver(Observer * observer)384 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) {
385   observers_.RemoveObserver(observer);
386 }
387 
SetContents(scoped_ptr<Contents> contents)388 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) {
389   DCHECK(CalledOnValidThread());
390   contents_ = contents.Pass();
391   FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
392 }
393