1 // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #ifndef RE2_PREFILTER_H_ 6 #define RE2_PREFILTER_H_ 7 8 // Prefilter is the class used to extract string guards from regexps. 9 // Rather than using Prefilter class directly, use FilteredRE2. 10 // See filtered_re2.h 11 12 #include <set> 13 #include <string> 14 #include <vector> 15 16 #include "absl/log/absl_check.h" 17 #include "absl/log/absl_log.h" 18 19 namespace re2 { 20 21 class RE2; 22 23 class Regexp; 24 25 class Prefilter { 26 // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h 27 public: 28 enum Op { 29 ALL = 0, // Everything matches 30 NONE, // Nothing matches 31 ATOM, // The string atom() must match 32 AND, // All in subs() must match 33 OR, // One of subs() must match 34 }; 35 36 explicit Prefilter(Op op); 37 ~Prefilter(); 38 op()39 Op op() { return op_; } atom()40 const std::string& atom() const { return atom_; } set_unique_id(int id)41 void set_unique_id(int id) { unique_id_ = id; } unique_id()42 int unique_id() const { return unique_id_; } 43 44 // The children of the Prefilter node. subs()45 std::vector<Prefilter*>* subs() { 46 ABSL_DCHECK(op_ == AND || op_ == OR); 47 return subs_; 48 } 49 50 // Set the children vector. Prefilter takes ownership of subs and 51 // subs_ will be deleted when Prefilter is deleted. set_subs(std::vector<Prefilter * > * subs)52 void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } 53 54 // Given a RE2, return a Prefilter. The caller takes ownership of 55 // the Prefilter and should deallocate it. Returns NULL if Prefilter 56 // cannot be formed. 57 static Prefilter* FromRE2(const RE2* re2); 58 59 // Returns a readable debug string of the prefilter. 60 std::string DebugString() const; 61 62 private: 63 template <typename H> AbslHashValue(H h,const Prefilter & a)64 friend H AbslHashValue(H h, const Prefilter& a) { 65 h = H::combine(std::move(h), a.op_); 66 if (a.op_ == ATOM) { 67 h = H::combine(std::move(h), a.atom_); 68 } else if (a.op_ == AND || a.op_ == OR) { 69 h = H::combine(std::move(h), a.subs_->size()); 70 for (size_t i = 0; i < a.subs_->size(); ++i) { 71 h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_); 72 } 73 } 74 return h; 75 } 76 77 friend bool operator==(const Prefilter& a, const Prefilter& b) { 78 if (&a == &b) { 79 return true; 80 } 81 if (a.op_ != b.op_) { 82 return false; 83 } 84 if (a.op_ == ATOM) { 85 if (a.atom_ != b.atom_) { 86 return false; 87 } 88 } else if (a.op_ == AND || a.op_ == OR) { 89 if (a.subs_->size() != b.subs_->size()) { 90 return false; 91 } 92 for (size_t i = 0; i < a.subs_->size(); ++i) { 93 if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) { 94 return false; 95 } 96 } 97 } 98 return true; 99 } 100 101 // A comparator used to store exact strings. We compare by length, 102 // then lexicographically. This ordering makes it easier to reduce the 103 // set of strings in SimplifyStringSet. 104 struct LengthThenLex { operatorLengthThenLex105 bool operator()(const std::string& a, const std::string& b) const { 106 return (a.size() < b.size()) || (a.size() == b.size() && a < b); 107 } 108 }; 109 110 class Info; 111 112 using SSet = std::set<std::string, LengthThenLex>; 113 using SSIter = SSet::iterator; 114 using ConstSSIter = SSet::const_iterator; 115 116 // Combines two prefilters together to create an AND. The passed 117 // Prefilters will be part of the returned Prefilter or deleted. 118 static Prefilter* And(Prefilter* a, Prefilter* b); 119 120 // Combines two prefilters together to create an OR. The passed 121 // Prefilters will be part of the returned Prefilter or deleted. 122 static Prefilter* Or(Prefilter* a, Prefilter* b); 123 124 // Generalized And/Or 125 static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); 126 127 static Prefilter* FromRegexp(Regexp* a); 128 129 static Prefilter* FromString(const std::string& str); 130 131 static Prefilter* OrStrings(SSet* ss); 132 133 static Info* BuildInfo(Regexp* re); 134 135 Prefilter* Simplify(); 136 137 // Removes redundant strings from the set. A string is redundant if 138 // any of the other strings appear as a substring. The empty string 139 // is a special case, which is ignored. 140 static void SimplifyStringSet(SSet* ss); 141 142 // Adds the cross-product of a and b to dst. 143 // (For each string i in a and j in b, add i+j.) 144 static void CrossProduct(const SSet& a, const SSet& b, SSet* dst); 145 146 // Kind of Prefilter. 147 Op op_; 148 149 // Sub-matches for AND or OR Prefilter. 150 std::vector<Prefilter*>* subs_; 151 152 // Actual string to match in leaf node. 153 std::string atom_; 154 155 // If different prefilters have the same string atom, or if they are 156 // structurally the same (e.g., OR of same atom strings) they are 157 // considered the same unique nodes. This is the id for each unique 158 // node. This field is populated with a unique id for every node, 159 // and -1 for duplicate nodes. 160 int unique_id_; 161 162 Prefilter(const Prefilter&) = delete; 163 Prefilter& operator=(const Prefilter&) = delete; 164 }; 165 166 } // namespace re2 167 168 #endif // RE2_PREFILTER_H_ 169