• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
6 #define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
7 
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
12 
13 #include "base/memory/scoped_ptr.h"
14 #include "components/url_matcher/string_pattern.h"
15 #include "components/url_matcher/substring_set_matcher.h"
16 #include "components/url_matcher/url_matcher_export.h"
17 
18 namespace re2 {
19 class FilteredRE2;
20 }
21 
22 namespace url_matcher {
23 
24 // Efficiently matches URLs against a collection of regular expressions,
25 // using FilteredRE2 to reduce the number of regexes that must be matched
26 // by pre-filtering with substring matching. See:
27 // http://swtch.com/~rsc/regexp/regexp3.html#analysis
28 class URL_MATCHER_EXPORT RegexSetMatcher {
29  public:
30   RegexSetMatcher();
31   virtual ~RegexSetMatcher();
32 
33   // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
34   // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
35   // patterns at once.
36   // Ownership of the patterns remains with the caller.
37   void AddPatterns(const std::vector<const StringPattern*>& regex_list);
38 
39   // Removes all regex patterns.
40   void ClearPatterns();
41 
42   // Appends the IDs of regular expressions in our set that match the |text|
43   // to |matches|.
44   bool Match(const std::string& text,
45              std::set<StringPattern::ID>* matches) const;
46 
47   bool IsEmpty() const;
48 
49  private:
50   typedef int RE2ID;
51   typedef std::map<StringPattern::ID, const StringPattern*> RegexMap;
52   typedef std::vector<StringPattern::ID> RE2IDMap;
53 
54   // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
55   // match the |text|.
56   std::vector<RE2ID> FindSubstringMatches(const std::string& text) const;
57 
58   // Rebuild FilteredRE2 from scratch. Needs to be called whenever
59   // our set of regexes changes.
60   // TODO(yoz): investigate if it could be done incrementally;
61   // apparently not supported by FilteredRE2.
62   void RebuildMatcher();
63 
64   // Clean up StringPatterns in |substring_patterns_|.
65   void DeleteSubstringPatterns();
66 
67   // Mapping of regex StringPattern::IDs to regexes.
68   RegexMap regexes_;
69   // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
70   // to regex StringPattern::IDs.
71   RE2IDMap re2_id_map_;
72 
73   scoped_ptr<re2::FilteredRE2> filtered_re2_;
74   scoped_ptr<SubstringSetMatcher> substring_matcher_;
75 
76   // The substring patterns from FilteredRE2, which are used in
77   // |substring_matcher_| but whose lifetime is managed here.
78   std::vector<const StringPattern*> substring_patterns_;
79 };
80 
81 }  // namespace url_matcher
82 
83 #endif  // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
84