• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_TESTING_TESTER_H_
6 #define RE2_TESTING_TESTER_H_
7 
8 // Comparative tester for regular expression matching.
9 // Checks all implementations against each other.
10 
11 #include <vector>
12 
13 #include "re2/stringpiece.h"
14 #include "re2/prog.h"
15 #include "re2/regexp.h"
16 #include "re2/re2.h"
17 #include "util/pcre.h"
18 
19 namespace re2 {
20 
21 // All the supported regexp engines.
22 enum Engine {
23   kEngineBacktrack = 0,    // Prog::UnsafeSearchBacktrack
24   kEngineNFA,              // Prog::SearchNFA
25   kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
26   kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
27   kEngineOnePass,          // Prog::SearchOnePass, if applicable
28   kEngineBitState,         // Prog::SearchBitState
29   kEngineRE2,              // RE2, all submatches
30   kEngineRE2a,             // RE2, only ask for match[0]
31   kEngineRE2b,             // RE2, only ask whether it matched
32   kEnginePCRE,             // PCRE (util/pcre.h)
33 
34   kEngineMax,
35 };
36 
37 // Make normal math on the enum preserve the type.
38 // By default, C++ doesn't define ++ on enum, and e+1 has type int.
39 static inline void operator++(Engine& e, int unused) {
40   e = static_cast<Engine>(e+1);
41 }
42 
43 static inline Engine operator+(Engine e, int i) {
44   return static_cast<Engine>(static_cast<int>(e)+i);
45 }
46 
47 // A TestInstance caches per-regexp state for a given
48 // regular expression in a given configuration
49 // (UTF-8 vs Latin1, longest vs first match, etc.).
50 class TestInstance {
51  public:
52   struct Result;
53 
54   TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
55                Regexp::ParseFlags flags);
56   ~TestInstance();
flags()57   Regexp::ParseFlags flags() { return flags_; }
error()58   bool error() { return error_; }
59 
60   // Runs a single test case: search in text, which is in context,
61   // using the given anchoring.
62   bool RunCase(const StringPiece& text, const StringPiece& context,
63                Prog::Anchor anchor);
64 
65  private:
66   // Runs a single search using the named engine type.
67   void RunSearch(Engine type,
68                  const StringPiece& text, const StringPiece& context,
69                  Prog::Anchor anchor,
70                  Result *result);
71 
72   void LogMatch(const char* prefix, Engine e, const StringPiece& text,
73                 const StringPiece& context, Prog::Anchor anchor);
74 
75   const StringPiece regexp_str_;    // regexp being tested
76   Prog::MatchKind kind_;            // kind of match
77   Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
78   bool error_;                      // error during constructor?
79 
80   Regexp* regexp_;                  // parsed regexp
81   int num_captures_;                // regexp_->NumCaptures() cached
82   Prog* prog_;                      // compiled program
83   Prog* rprog_;                     // compiled reverse program
84   PCRE* re_;                        // PCRE implementation
85   RE2* re2_;                        // RE2 implementation
86 
87   TestInstance(const TestInstance&) = delete;
88   TestInstance& operator=(const TestInstance&) = delete;
89 };
90 
91 // A group of TestInstances for all possible configurations.
92 class Tester {
93  public:
94   explicit Tester(const StringPiece& regexp);
95   ~Tester();
96 
error()97   bool error() { return error_; }
98 
99   // Runs a single test case: search in text, which is in context,
100   // using the given anchoring.
101   bool TestCase(const StringPiece& text, const StringPiece& context,
102                 Prog::Anchor anchor);
103 
104   // Run TestCase(text, text, anchor) for all anchoring modes.
105   bool TestInput(const StringPiece& text);
106 
107   // Run TestCase(text, context, anchor) for all anchoring modes.
108   bool TestInputInContext(const StringPiece& text, const StringPiece& context);
109 
110  private:
111   bool error_;
112   std::vector<TestInstance*> v_;
113 
114   Tester(const Tester&) = delete;
115   Tester& operator=(const Tester&) = delete;
116 };
117 
118 // Run all possible tests using regexp and text.
119 bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
120 
121 }  // namespace re2
122 
123 #endif  // RE2_TESTING_TESTER_H_
124