1 // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. 6 // It provides a prefilter mechanism that helps in cutting down the 7 // number of regexps that need to be actually searched. 8 // 9 // By design, it does not include a string matching engine. This is to 10 // allow the user of the class to use their favorite string match 11 // engine. The overall flow is: Add all the regexps using Add, then 12 // Compile the FilteredRE2. The compile returns strings that need to 13 // be matched. Note that all returned strings are lowercase. For 14 // applying regexps to a search text, the caller does the string 15 // matching using the strings returned. When doing the string match, 16 // note that the caller has to do that on lower cased version of the 17 // search text. Then call FirstMatch or AllMatches with a vector of 18 // indices of strings that were found in the text to get the actual 19 // regexp matches. 20 21 #ifndef RE2_FILTERED_RE2_H_ 22 #define RE2_FILTERED_RE2_H_ 23 24 #include <vector> 25 #include "re2/re2.h" 26 27 namespace re2 { 28 using std::vector; 29 30 class PrefilterTree; 31 32 class FilteredRE2 { 33 public: 34 FilteredRE2(); 35 ~FilteredRE2(); 36 37 // Uses RE2 constructor to create a RE2 object (re). Returns 38 // re->error_code(). If error_code is other than NoError, then re is 39 // deleted and not added to re2_vec_. 40 RE2::ErrorCode Add(const StringPiece& pattern, 41 const RE2::Options& options, 42 int *id); 43 44 // Prepares the regexps added by Add for filtering. Returns a set 45 // of strings that the caller should check for in candidate texts. 46 // The returned strings are lowercased. When doing string matching, 47 // the search text should be lowercased first to find matching 48 // strings from the set of strings returned by Compile. Call after 49 // all Add calls are done. 50 void Compile(vector<string>* strings_to_match); 51 52 // Returns the index of the first matching regexp. 53 // Returns -1 on no match. Can be called prior to Compile. 54 // Does not do any filtering: simply tries to Match the 55 // regexps in a loop. 56 int SlowFirstMatch(const StringPiece& text) const; 57 58 // Returns the index of the first matching regexp. 59 // Returns -1 on no match. Compile has to be called before 60 // calling this. 61 int FirstMatch(const StringPiece& text, 62 const vector<int>& atoms) const; 63 64 // Returns the indices of all matching regexps, after first clearing 65 // matched_regexps. 66 bool AllMatches(const StringPiece& text, 67 const vector<int>& atoms, 68 vector<int>* matching_regexps) const; 69 70 // The number of regexps added. NumRegexps()71 int NumRegexps() const { return re2_vec_.size(); } 72 73 private: 74 75 // Get the individual RE2 objects. Useful for testing. GetRE2(int regexpid)76 RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; } 77 78 // Print prefilter. 79 void PrintPrefilter(int regexpid); 80 81 // Useful for testing and debugging. 82 void RegexpsGivenStrings(const vector<int>& matched_atoms, 83 vector<int>* passed_regexps); 84 85 // All the regexps in the FilteredRE2. 86 vector<RE2*> re2_vec_; 87 88 // Has the FilteredRE2 been compiled using Compile() 89 bool compiled_; 90 91 // An AND-OR tree of string atoms used for filtering regexps. 92 PrefilterTree* prefilter_tree_; 93 94 //DISALLOW_EVIL_CONSTRUCTORS(FilteredRE2); 95 FilteredRE2(const FilteredRE2&); 96 void operator=(const FilteredRE2&); 97 }; 98 99 } // namespace re2 100 101 #endif // RE2_FILTERED_RE2_H_ 102