• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_PREFILTER_H_
6 #define RE2_PREFILTER_H_
7 
8 // Prefilter is the class used to extract string guards from regexps.
9 // Rather than using Prefilter class directly, use FilteredRE2.
10 // See filtered_re2.h
11 
12 #include <set>
13 #include <string>
14 #include <vector>
15 
16 #include "absl/log/absl_check.h"
17 #include "absl/log/absl_log.h"
18 
19 namespace re2 {
20 
21 class RE2;
22 
23 class Regexp;
24 
25 class Prefilter {
26   // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
27  public:
28   enum Op {
29     ALL = 0,  // Everything matches
30     NONE,  // Nothing matches
31     ATOM,  // The string atom() must match
32     AND,   // All in subs() must match
33     OR,   // One of subs() must match
34   };
35 
36   explicit Prefilter(Op op);
37   ~Prefilter();
38 
op()39   Op op() { return op_; }
atom()40   const std::string& atom() const { return atom_; }
set_unique_id(int id)41   void set_unique_id(int id) { unique_id_ = id; }
unique_id()42   int unique_id() const { return unique_id_; }
43 
44   // The children of the Prefilter node.
subs()45   std::vector<Prefilter*>* subs() {
46     ABSL_DCHECK(op_ == AND || op_ == OR);
47     return subs_;
48   }
49 
50   // Set the children vector. Prefilter takes ownership of subs and
51   // subs_ will be deleted when Prefilter is deleted.
set_subs(std::vector<Prefilter * > * subs)52   void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
53 
54   // Given a RE2, return a Prefilter. The caller takes ownership of
55   // the Prefilter and should deallocate it. Returns NULL if Prefilter
56   // cannot be formed.
57   static Prefilter* FromRE2(const RE2* re2);
58 
59   // Returns a readable debug string of the prefilter.
60   std::string DebugString() const;
61 
62  private:
63   template <typename H>
AbslHashValue(H h,const Prefilter & a)64   friend H AbslHashValue(H h, const Prefilter& a) {
65     h = H::combine(std::move(h), a.op_);
66     if (a.op_ == ATOM) {
67       h = H::combine(std::move(h), a.atom_);
68     } else if (a.op_ == AND || a.op_ == OR) {
69       h = H::combine(std::move(h), a.subs_->size());
70       for (size_t i = 0; i < a.subs_->size(); ++i) {
71         h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_);
72       }
73     }
74     return h;
75   }
76 
77   friend bool operator==(const Prefilter& a, const Prefilter& b) {
78     if (&a == &b) {
79       return true;
80     }
81     if (a.op_ != b.op_) {
82       return false;
83     }
84     if (a.op_ == ATOM) {
85       if (a.atom_ != b.atom_) {
86         return false;
87       }
88     } else if (a.op_ == AND || a.op_ == OR) {
89       if (a.subs_->size() != b.subs_->size()) {
90         return false;
91       }
92       for (size_t i = 0; i < a.subs_->size(); ++i) {
93         if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) {
94           return false;
95         }
96       }
97     }
98     return true;
99   }
100 
101   // A comparator used to store exact strings. We compare by length,
102   // then lexicographically. This ordering makes it easier to reduce the
103   // set of strings in SimplifyStringSet.
104   struct LengthThenLex {
operatorLengthThenLex105     bool operator()(const std::string& a, const std::string& b) const {
106        return (a.size() < b.size()) || (a.size() == b.size() && a < b);
107     }
108   };
109 
110   class Info;
111 
112   using SSet = std::set<std::string, LengthThenLex>;
113   using SSIter = SSet::iterator;
114   using ConstSSIter = SSet::const_iterator;
115 
116   // Combines two prefilters together to create an AND. The passed
117   // Prefilters will be part of the returned Prefilter or deleted.
118   static Prefilter* And(Prefilter* a, Prefilter* b);
119 
120   // Combines two prefilters together to create an OR. The passed
121   // Prefilters will be part of the returned Prefilter or deleted.
122   static Prefilter* Or(Prefilter* a, Prefilter* b);
123 
124   // Generalized And/Or
125   static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
126 
127   static Prefilter* FromRegexp(Regexp* a);
128 
129   static Prefilter* FromString(const std::string& str);
130 
131   static Prefilter* OrStrings(SSet* ss);
132 
133   static Info* BuildInfo(Regexp* re);
134 
135   Prefilter* Simplify();
136 
137   // Removes redundant strings from the set. A string is redundant if
138   // any of the other strings appear as a substring. The empty string
139   // is a special case, which is ignored.
140   static void SimplifyStringSet(SSet* ss);
141 
142   // Adds the cross-product of a and b to dst.
143   // (For each string i in a and j in b, add i+j.)
144   static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
145 
146   // Kind of Prefilter.
147   Op op_;
148 
149   // Sub-matches for AND or OR Prefilter.
150   std::vector<Prefilter*>* subs_;
151 
152   // Actual string to match in leaf node.
153   std::string atom_;
154 
155   // If different prefilters have the same string atom, or if they are
156   // structurally the same (e.g., OR of same atom strings) they are
157   // considered the same unique nodes. This is the id for each unique
158   // node. This field is populated with a unique id for every node,
159   // and -1 for duplicate nodes.
160   int unique_id_;
161 
162   Prefilter(const Prefilter&) = delete;
163   Prefilter& operator=(const Prefilter&) = delete;
164 };
165 
166 }  // namespace re2
167 
168 #endif  // RE2_PREFILTER_H_
169