• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/managed_mode/managed_mode_url_filter.h"
6 
7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h"
9 #include "base/json/json_file_value_serializer.h"
10 #include "base/metrics/histogram.h"
11 #include "base/sha1.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/task_runner_util.h"
15 #include "base/threading/sequenced_worker_pool.h"
16 #include "chrome/browser/policy/url_blacklist_manager.h"
17 #include "components/url_matcher/url_matcher.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "url/gurl.h"
21 
22 using content::BrowserThread;
23 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
24 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
25 using net::registry_controlled_domains::GetRegistryLength;
26 using url_matcher::URLMatcher;
27 using url_matcher::URLMatcherConditionSet;
28 
29 struct ManagedModeURLFilter::Contents {
30   URLMatcher url_matcher;
31   std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
32   base::hash_multimap<std::string, int> hash_site_map;
33   std::vector<ManagedModeSiteList::Site> sites;
34 };
35 
36 namespace {
37 
38 // URL schemes not in this list (e.g., file:// and chrome://) will always be
39 // allowed.
40 const char* kFilteredSchemes[] = {
41   "http",
42   "https",
43   "ftp",
44   "gopher",
45   "ws",
46   "wss"
47 };
48 
49 
50 // This class encapsulates all the state that is required during construction of
51 // a new ManagedModeURLFilter::Contents.
52 class FilterBuilder {
53  public:
54   FilterBuilder();
55   ~FilterBuilder();
56 
57   // Adds a single URL pattern for the site identified by |site_id|.
58   bool AddPattern(const std::string& pattern, int site_id);
59 
60   // Adds a single hostname SHA1 hash for the site identified by |site_id|.
61   void AddHostnameHash(const std::string& hash, int site_id);
62 
63   // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
64   void AddSiteList(ManagedModeSiteList* site_list);
65 
66   // Finalizes construction of the ManagedModeURLFilter::Contents and returns
67   // them. This method should be called before this object is destroyed.
68   scoped_ptr<ManagedModeURLFilter::Contents> Build();
69 
70  private:
71   scoped_ptr<ManagedModeURLFilter::Contents> contents_;
72   URLMatcherConditionSet::Vector all_conditions_;
73   URLMatcherConditionSet::ID matcher_id_;
74 };
75 
FilterBuilder()76 FilterBuilder::FilterBuilder()
77     : contents_(new ManagedModeURLFilter::Contents()),
78       matcher_id_(0) {}
79 
~FilterBuilder()80 FilterBuilder::~FilterBuilder() {
81   DCHECK(!contents_.get());
82 }
83 
AddPattern(const std::string & pattern,int site_id)84 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
85   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
86   std::string scheme;
87   std::string host;
88   uint16 port;
89   std::string path;
90   bool match_subdomains = true;
91   if (!policy::URLBlacklist::FilterToComponents(
92           pattern, &scheme, &host, &match_subdomains, &port, &path)) {
93     LOG(ERROR) << "Invalid pattern " << pattern;
94     return false;
95   }
96 
97   scoped_refptr<URLMatcherConditionSet> condition_set =
98       policy::URLBlacklist::CreateConditionSet(
99           &contents_->url_matcher, ++matcher_id_,
100           scheme, host, match_subdomains, port, path);
101   all_conditions_.push_back(condition_set);
102   contents_->matcher_site_map[matcher_id_] = site_id;
103   return true;
104 }
105 
AddHostnameHash(const std::string & hash,int site_id)106 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
107   contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
108                                                  site_id));
109 }
110 
AddSiteList(ManagedModeSiteList * site_list)111 void FilterBuilder::AddSiteList(ManagedModeSiteList* site_list) {
112   std::vector<ManagedModeSiteList::Site> sites;
113   site_list->GetSites(&sites);
114   int site_id = contents_->sites.size();
115   for (std::vector<ManagedModeSiteList::Site>::const_iterator it =
116            sites.begin(); it != sites.end(); ++it) {
117     const ManagedModeSiteList::Site& site = *it;
118     contents_->sites.push_back(site);
119 
120     for (std::vector<std::string>::const_iterator pattern_it =
121              site.patterns.begin();
122          pattern_it != site.patterns.end(); ++pattern_it) {
123       AddPattern(*pattern_it, site_id);
124     }
125 
126     for (std::vector<std::string>::const_iterator hash_it =
127              site.hostname_hashes.begin();
128          hash_it != site.hostname_hashes.end(); ++hash_it) {
129       AddHostnameHash(*hash_it, site_id);
130     }
131 
132     site_id++;
133   }
134 }
135 
Build()136 scoped_ptr<ManagedModeURLFilter::Contents> FilterBuilder::Build() {
137   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
138   contents_->url_matcher.AddConditionSets(all_conditions_);
139   return contents_.Pass();
140 }
141 
CreateWhitelistFromPatterns(const std::vector<std::string> & patterns)142 scoped_ptr<ManagedModeURLFilter::Contents> CreateWhitelistFromPatterns(
143     const std::vector<std::string>& patterns) {
144   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
145 
146   FilterBuilder builder;
147   for (std::vector<std::string>::const_iterator it = patterns.begin();
148        it != patterns.end(); ++it) {
149     // TODO(bauerb): We should create a fake site for the whitelist.
150     builder.AddPattern(*it, -1);
151   }
152 
153   return builder.Build();
154 }
155 
LoadWhitelistsOnBlockingPoolThread(ScopedVector<ManagedModeSiteList> site_lists)156 scoped_ptr<ManagedModeURLFilter::Contents> LoadWhitelistsOnBlockingPoolThread(
157     ScopedVector<ManagedModeSiteList> site_lists) {
158   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
159 
160   FilterBuilder builder;
161   for (ScopedVector<ManagedModeSiteList>::iterator it = site_lists.begin();
162        it != site_lists.end(); ++it) {
163     builder.AddSiteList(*it);
164   }
165 
166   return builder.Build();
167 }
168 
169 }  // namespace
170 
ManagedModeURLFilter()171 ManagedModeURLFilter::ManagedModeURLFilter()
172     : default_behavior_(ALLOW),
173       contents_(new Contents()) {
174   // Detach from the current thread so we can be constructed on a different
175   // thread than the one where we're used.
176   DetachFromThread();
177 }
178 
~ManagedModeURLFilter()179 ManagedModeURLFilter::~ManagedModeURLFilter() {
180   DCHECK(CalledOnValidThread());
181 }
182 
183 // static
184 ManagedModeURLFilter::FilteringBehavior
BehaviorFromInt(int behavior_value)185 ManagedModeURLFilter::BehaviorFromInt(int behavior_value) {
186   DCHECK_GE(behavior_value, ALLOW);
187   DCHECK_LE(behavior_value, BLOCK);
188   return static_cast<FilteringBehavior>(behavior_value);
189 }
190 
191 // static
Normalize(const GURL & url)192 GURL ManagedModeURLFilter::Normalize(const GURL& url) {
193   GURL normalized_url = url;
194   GURL::Replacements replacements;
195   // Strip username, password, query, and ref.
196   replacements.ClearUsername();
197   replacements.ClearPassword();
198   replacements.ClearQuery();
199   replacements.ClearRef();
200   return url.ReplaceComponents(replacements);
201 }
202 
203 // static
HasFilteredScheme(const GURL & url)204 bool ManagedModeURLFilter::HasFilteredScheme(const GURL& url) {
205   for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) {
206       if (url.scheme() == kFilteredSchemes[i])
207         return true;
208     }
209   return false;
210 }
211 
GetHostnameHash(const GURL & url)212 std::string GetHostnameHash(const GURL& url) {
213   std::string hash = base::SHA1HashString(url.host());
214   return base::HexEncode(hash.data(), hash.length());
215 }
216 
217 // static
HostMatchesPattern(const std::string & host,const std::string & pattern)218 bool ManagedModeURLFilter::HostMatchesPattern(const std::string& host,
219                                               const std::string& pattern) {
220   std::string trimmed_pattern = pattern;
221   std::string trimmed_host = host;
222   if (EndsWith(pattern, ".*", true)) {
223     size_t registry_length = GetRegistryLength(
224         trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
225     // A host without a known registry part does not match.
226     if (registry_length == 0)
227       return false;
228 
229     trimmed_pattern.erase(trimmed_pattern.length() - 2);
230     trimmed_host.erase(trimmed_host.length() - (registry_length + 1));
231   }
232 
233   if (StartsWithASCII(trimmed_pattern, "*.", true)) {
234     trimmed_pattern.erase(0, 2);
235 
236     // The remaining pattern should be non-empty, and it should not contain
237     // further stars. Also the trimmed host needs to end with the trimmed
238     // pattern.
239     if (trimmed_pattern.empty() ||
240         trimmed_pattern.find('*') != std::string::npos ||
241         !EndsWith(trimmed_host, trimmed_pattern, true)) {
242       return false;
243     }
244 
245     // The trimmed host needs to have a dot separating the subdomain from the
246     // matched pattern piece, unless there is no subdomain.
247     int pos = trimmed_host.length() - trimmed_pattern.length();
248     DCHECK_GE(pos, 0);
249     return (pos == 0) || (trimmed_host[pos - 1] == '.');
250   }
251 
252   return trimmed_host == trimmed_pattern;
253 }
254 
255 ManagedModeURLFilter::FilteringBehavior
GetFilteringBehaviorForURL(const GURL & url) const256 ManagedModeURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
257   DCHECK(CalledOnValidThread());
258 
259   // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
260   if (!HasFilteredScheme(url))
261     return ALLOW;
262 
263   // Check manual overrides for the exact URL.
264   std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
265   if (url_it != url_map_.end())
266     return url_it->second ? ALLOW : BLOCK;
267 
268   // Check manual overrides for the hostname.
269   std::string host = url.host();
270   std::map<std::string, bool>::const_iterator host_it = host_map_.find(host);
271   if (host_it != host_map_.end())
272     return host_it->second ? ALLOW : BLOCK;
273 
274   // Look for patterns matching the hostname, with a value that is different
275   // from the default (a value of true in the map meaning allowed).
276   for (std::map<std::string, bool>::const_iterator host_it =
277       host_map_.begin(); host_it != host_map_.end(); ++host_it) {
278     if ((host_it->second == (default_behavior_ == BLOCK)) &&
279         HostMatchesPattern(host, host_it->first)) {
280       return host_it->second ? ALLOW : BLOCK;
281     }
282   }
283 
284   // If the default behavior is to allow, we don't need to check anything else.
285   if (default_behavior_ == ALLOW)
286     return ALLOW;
287 
288   // Check the list of URL patterns.
289   std::set<URLMatcherConditionSet::ID> matching_ids =
290       contents_->url_matcher.MatchURL(url);
291   if (!matching_ids.empty())
292     return ALLOW;
293 
294   // Check the list of hostname hashes.
295   if (contents_->hash_site_map.count(GetHostnameHash(url)))
296     return ALLOW;
297 
298   // Fall back to the default behavior.
299   return default_behavior_;
300 }
301 
GetSites(const GURL & url,std::vector<ManagedModeSiteList::Site * > * sites) const302 void ManagedModeURLFilter::GetSites(
303     const GURL& url,
304     std::vector<ManagedModeSiteList::Site*>* sites) const {
305   std::set<URLMatcherConditionSet::ID> matching_ids =
306       contents_->url_matcher.MatchURL(url);
307   for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
308            matching_ids.begin(); it != matching_ids.end(); ++it) {
309     std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
310         contents_->matcher_site_map.find(*it);
311     if (entry == contents_->matcher_site_map.end()) {
312       NOTREACHED();
313       continue;
314     }
315     sites->push_back(&contents_->sites[entry->second]);
316   }
317 
318   typedef base::hash_multimap<std::string, int>::const_iterator
319       hash_site_map_iterator;
320   std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
321       contents_->hash_site_map.equal_range(GetHostnameHash(url));
322   for (hash_site_map_iterator hash_it = bounds.first;
323        hash_it != bounds.second; hash_it++) {
324     sites->push_back(&contents_->sites[hash_it->second]);
325   }
326 }
327 
SetDefaultFilteringBehavior(FilteringBehavior behavior)328 void ManagedModeURLFilter::SetDefaultFilteringBehavior(
329     FilteringBehavior behavior) {
330   DCHECK(CalledOnValidThread());
331   default_behavior_ = behavior;
332 }
333 
LoadWhitelists(ScopedVector<ManagedModeSiteList> site_lists)334 void ManagedModeURLFilter::LoadWhitelists(
335     ScopedVector<ManagedModeSiteList> site_lists) {
336   DCHECK(CalledOnValidThread());
337 
338   base::PostTaskAndReplyWithResult(
339       BrowserThread::GetBlockingPool(),
340       FROM_HERE,
341       base::Bind(&LoadWhitelistsOnBlockingPoolThread,
342                  base::Passed(&site_lists)),
343       base::Bind(&ManagedModeURLFilter::SetContents, this));
344 }
345 
SetFromPatterns(const std::vector<std::string> & patterns)346 void ManagedModeURLFilter::SetFromPatterns(
347     const std::vector<std::string>& patterns) {
348   DCHECK(CalledOnValidThread());
349 
350   base::PostTaskAndReplyWithResult(
351       BrowserThread::GetBlockingPool(),
352       FROM_HERE,
353       base::Bind(&CreateWhitelistFromPatterns, patterns),
354       base::Bind(&ManagedModeURLFilter::SetContents, this));
355 }
356 
SetManualHosts(const std::map<std::string,bool> * host_map)357 void ManagedModeURLFilter::SetManualHosts(
358     const std::map<std::string, bool>* host_map) {
359   DCHECK(CalledOnValidThread());
360   host_map_ = *host_map;
361   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
362                               host_map->size(), 1, 1000, 50);
363 }
364 
SetManualURLs(const std::map<GURL,bool> * url_map)365 void ManagedModeURLFilter::SetManualURLs(
366     const std::map<GURL, bool>* url_map) {
367   DCHECK(CalledOnValidThread());
368   url_map_ = *url_map;
369   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
370                               url_map->size(), 1, 1000, 50);
371 }
372 
AddObserver(Observer * observer)373 void ManagedModeURLFilter::AddObserver(Observer* observer) {
374   observers_.AddObserver(observer);
375 }
376 
RemoveObserver(Observer * observer)377 void ManagedModeURLFilter::RemoveObserver(Observer* observer) {
378   observers_.RemoveObserver(observer);
379 }
380 
SetContents(scoped_ptr<Contents> contents)381 void ManagedModeURLFilter::SetContents(scoped_ptr<Contents> contents) {
382   DCHECK(CalledOnValidThread());
383   contents_ = contents.Pass();
384   FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
385 }
386