• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_FILTERED_RE2_H_
6 #define RE2_FILTERED_RE2_H_
7 
8 // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
9 // It provides a prefilter mechanism that helps in cutting down the
10 // number of regexps that need to be actually searched.
11 //
12 // By design, it does not include a string matching engine. This is to
13 // allow the user of the class to use their favorite string matching
14 // engine. The overall flow is: Add all the regexps using Add, then
15 // Compile the FilteredRE2. Compile returns strings that need to be
16 // matched. Note that the returned strings are lowercased and distinct.
17 // For applying regexps to a search text, the caller does the string
18 // matching using the returned strings. When doing the string match,
19 // note that the caller has to do that in a case-insensitive way or
20 // on a lowercased version of the search text. Then call FirstMatch
21 // or AllMatches with a vector of indices of strings that were found
22 // in the text to get the actual regexp matches.
23 
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include "absl/strings/string_view.h"
29 #include "re2/re2.h"
30 
31 namespace re2 {
32 
33 class PrefilterTree;
34 
35 class FilteredRE2 {
36  public:
37   FilteredRE2();
38   explicit FilteredRE2(int min_atom_len);
39   ~FilteredRE2();
40 
41   // Not copyable.
42   FilteredRE2(const FilteredRE2&) = delete;
43   FilteredRE2& operator=(const FilteredRE2&) = delete;
44   // Movable.
45   FilteredRE2(FilteredRE2&& other);
46   FilteredRE2& operator=(FilteredRE2&& other);
47 
48   // Uses RE2 constructor to create a RE2 object (re). Returns
49   // re->error_code(). If error_code is other than NoError, then re is
50   // deleted and not added to re2_vec_.
51   RE2::ErrorCode Add(absl::string_view pattern,
52                      const RE2::Options& options,
53                      int* id);
54 
55   // Prepares the regexps added by Add for filtering.  Returns a set
56   // of strings that the caller should check for in candidate texts.
57   // The returned strings are lowercased and distinct. When doing
58   // string matching, it should be performed in a case-insensitive
59   // way or the search text should be lowercased first.  Call after
60   // all Add calls are done.
61   void Compile(std::vector<std::string>* strings_to_match);
62 
63   // Returns the index of the first matching regexp.
64   // Returns -1 on no match. Can be called prior to Compile.
65   // Does not do any filtering: simply tries to Match the
66   // regexps in a loop.
67   int SlowFirstMatch(absl::string_view text) const;
68 
69   // Returns the index of the first matching regexp.
70   // Returns -1 on no match. Compile has to be called before
71   // calling this.
72   int FirstMatch(absl::string_view text,
73                  const std::vector<int>& atoms) const;
74 
75   // Returns the indices of all matching regexps, after first clearing
76   // matched_regexps.
77   bool AllMatches(absl::string_view text,
78                   const std::vector<int>& atoms,
79                   std::vector<int>* matching_regexps) const;
80 
81   // Returns the indices of all potentially matching regexps after first
82   // clearing potential_regexps.
83   // A regexp is potentially matching if it passes the filter.
84   // If a regexp passes the filter it may still not match.
85   // A regexp that does not pass the filter is guaranteed to not match.
86   void AllPotentials(const std::vector<int>& atoms,
87                      std::vector<int>* potential_regexps) const;
88 
89   // The number of regexps added.
NumRegexps()90   int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
91 
92   // Get the individual RE2 objects.
GetRE2(int regexpid)93   const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
94 
95  private:
96   // Print prefilter.
97   void PrintPrefilter(int regexpid);
98 
99   // Useful for testing and debugging.
100   void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
101                            std::vector<int>* passed_regexps);
102 
103   // All the regexps in the FilteredRE2.
104   std::vector<RE2*> re2_vec_;
105 
106   // Has the FilteredRE2 been compiled using Compile()
107   bool compiled_;
108 
109   // An AND-OR tree of string atoms used for filtering regexps.
110   std::unique_ptr<PrefilterTree> prefilter_tree_;
111 };
112 
113 }  // namespace re2
114 
115 #endif  // RE2_FILTERED_RE2_H_
116