1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #ifndef RE2_TESTING_TESTER_H_ 6 #define RE2_TESTING_TESTER_H_ 7 8 // Comparative tester for regular expression matching. 9 // Checks all implementations against each other. 10 11 #include <vector> 12 13 #include "absl/strings/string_view.h" 14 #include "re2/prog.h" 15 #include "re2/re2.h" 16 #include "re2/regexp.h" 17 #include "util/pcre.h" 18 19 namespace re2 { 20 21 // All the supported regexp engines. 22 enum Engine { 23 kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack 24 kEngineNFA, // Prog::SearchNFA 25 kEngineDFA, // Prog::SearchDFA, only ask whether it matched 26 kEngineDFA1, // Prog::SearchDFA, ask for match[0] 27 kEngineOnePass, // Prog::SearchOnePass, if applicable 28 kEngineBitState, // Prog::SearchBitState 29 kEngineRE2, // RE2, all submatches 30 kEngineRE2a, // RE2, only ask for match[0] 31 kEngineRE2b, // RE2, only ask whether it matched 32 kEnginePCRE, // PCRE (util/pcre.h) 33 34 kEngineMax, 35 }; 36 37 // Make normal math on the enum preserve the type. 38 // By default, C++ doesn't define ++ on enum, and e+1 has type int. 39 static inline void operator++(Engine& e, int unused) { 40 e = static_cast<Engine>(e+1); 41 } 42 43 static inline Engine operator+(Engine e, int i) { 44 return static_cast<Engine>(static_cast<int>(e)+i); 45 } 46 47 // A TestInstance caches per-regexp state for a given 48 // regular expression in a given configuration 49 // (UTF-8 vs Latin1, longest vs first match, etc.). 50 class TestInstance { 51 public: 52 struct Result; 53 54 TestInstance(absl::string_view regexp, Prog::MatchKind kind, 55 Regexp::ParseFlags flags); 56 ~TestInstance(); flags()57 Regexp::ParseFlags flags() { return flags_; } error()58 bool error() { return error_; } 59 60 // Runs a single test case: search in text, which is in context, 61 // using the given anchoring. 62 bool RunCase(absl::string_view text, absl::string_view context, 63 Prog::Anchor anchor); 64 65 private: 66 // Runs a single search using the named engine type. 67 void RunSearch(Engine type, absl::string_view text, absl::string_view context, 68 Prog::Anchor anchor, Result* result); 69 70 void LogMatch(const char* prefix, Engine e, absl::string_view text, 71 absl::string_view context, Prog::Anchor anchor); 72 73 absl::string_view regexp_str_; // regexp being tested 74 Prog::MatchKind kind_; // kind of match 75 Regexp::ParseFlags flags_; // flags for parsing regexp_str_ 76 bool error_; // error during constructor? 77 78 Regexp* regexp_; // parsed regexp 79 int num_captures_; // regexp_->NumCaptures() cached 80 Prog* prog_; // compiled program 81 Prog* rprog_; // compiled reverse program 82 PCRE* re_; // PCRE implementation 83 RE2* re2_; // RE2 implementation 84 85 TestInstance(const TestInstance&) = delete; 86 TestInstance& operator=(const TestInstance&) = delete; 87 }; 88 89 // A group of TestInstances for all possible configurations. 90 class Tester { 91 public: 92 explicit Tester(absl::string_view regexp); 93 ~Tester(); 94 error()95 bool error() { return error_; } 96 97 // Runs a single test case: search in text, which is in context, 98 // using the given anchoring. 99 bool TestCase(absl::string_view text, absl::string_view context, 100 Prog::Anchor anchor); 101 102 // Run TestCase(text, text, anchor) for all anchoring modes. 103 bool TestInput(absl::string_view text); 104 105 // Run TestCase(text, context, anchor) for all anchoring modes. 106 bool TestInputInContext(absl::string_view text, absl::string_view context); 107 108 private: 109 bool error_; 110 std::vector<TestInstance*> v_; 111 112 Tester(const Tester&) = delete; 113 Tester& operator=(const Tester&) = delete; 114 }; 115 116 // Run all possible tests using regexp and text. 117 bool TestRegexpOnText(absl::string_view regexp, absl::string_view text); 118 119 } // namespace re2 120 121 #endif // RE2_TESTING_TESTER_H_ 122