1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ 6 #define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ 7 8 #include <map> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/memory/scoped_ptr.h" 14 #include "components/url_matcher/string_pattern.h" 15 #include "components/url_matcher/substring_set_matcher.h" 16 #include "components/url_matcher/url_matcher_export.h" 17 18 namespace re2 { 19 class FilteredRE2; 20 } 21 22 namespace url_matcher { 23 24 // Efficiently matches URLs against a collection of regular expressions, 25 // using FilteredRE2 to reduce the number of regexes that must be matched 26 // by pre-filtering with substring matching. See: 27 // http://swtch.com/~rsc/regexp/regexp3.html#analysis 28 class URL_MATCHER_EXPORT RegexSetMatcher { 29 public: 30 RegexSetMatcher(); 31 virtual ~RegexSetMatcher(); 32 33 // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds 34 // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple 35 // patterns at once. 36 // Ownership of the patterns remains with the caller. 37 void AddPatterns(const std::vector<const StringPattern*>& regex_list); 38 39 // Removes all regex patterns. 40 void ClearPatterns(); 41 42 // Appends the IDs of regular expressions in our set that match the |text| 43 // to |matches|. 44 bool Match(const std::string& text, 45 std::set<StringPattern::ID>* matches) const; 46 47 bool IsEmpty() const; 48 49 private: 50 typedef int RE2ID; 51 typedef std::map<StringPattern::ID, const StringPattern*> RegexMap; 52 typedef std::vector<StringPattern::ID> RE2IDMap; 53 54 // Use Aho-Corasick SubstringSetMatcher to find which literal patterns 55 // match the |text|. 56 std::vector<RE2ID> FindSubstringMatches(const std::string& text) const; 57 58 // Rebuild FilteredRE2 from scratch. Needs to be called whenever 59 // our set of regexes changes. 60 // TODO(yoz): investigate if it could be done incrementally; 61 // apparently not supported by FilteredRE2. 62 void RebuildMatcher(); 63 64 // Clean up StringPatterns in |substring_patterns_|. 65 void DeleteSubstringPatterns(); 66 67 // Mapping of regex StringPattern::IDs to regexes. 68 RegexMap regexes_; 69 // Mapping of RE2IDs from FilteredRE2 (which are assigned in order) 70 // to regex StringPattern::IDs. 71 RE2IDMap re2_id_map_; 72 73 scoped_ptr<re2::FilteredRE2> filtered_re2_; 74 scoped_ptr<SubstringSetMatcher> substring_matcher_; 75 76 // The substring patterns from FilteredRE2, which are used in 77 // |substring_matcher_| but whose lifetime is managed here. 78 std::vector<const StringPattern*> substring_patterns_; 79 }; 80 81 } // namespace url_matcher 82 83 #endif // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ 84