1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #ifndef RE2_TESTING_TESTER_H_ 6 #define RE2_TESTING_TESTER_H_ 7 8 // Comparative tester for regular expression matching. 9 // Checks all implementations against each other. 10 11 #include <vector> 12 13 #include "re2/stringpiece.h" 14 #include "re2/prog.h" 15 #include "re2/regexp.h" 16 #include "re2/re2.h" 17 #include "util/pcre.h" 18 19 namespace re2 { 20 21 // All the supported regexp engines. 22 enum Engine { 23 kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack 24 kEngineNFA, // Prog::SearchNFA 25 kEngineDFA, // Prog::SearchDFA, only ask whether it matched 26 kEngineDFA1, // Prog::SearchDFA, ask for match[0] 27 kEngineOnePass, // Prog::SearchOnePass, if applicable 28 kEngineBitState, // Prog::SearchBitState 29 kEngineRE2, // RE2, all submatches 30 kEngineRE2a, // RE2, only ask for match[0] 31 kEngineRE2b, // RE2, only ask whether it matched 32 kEnginePCRE, // PCRE (util/pcre.h) 33 34 kEngineMax, 35 }; 36 37 // Make normal math on the enum preserve the type. 38 // By default, C++ doesn't define ++ on enum, and e+1 has type int. 39 static inline void operator++(Engine& e, int unused) { 40 e = static_cast<Engine>(e+1); 41 } 42 43 static inline Engine operator+(Engine e, int i) { 44 return static_cast<Engine>(static_cast<int>(e)+i); 45 } 46 47 // A TestInstance caches per-regexp state for a given 48 // regular expression in a given configuration 49 // (UTF-8 vs Latin1, longest vs first match, etc.). 50 class TestInstance { 51 public: 52 struct Result; 53 54 TestInstance(const StringPiece& regexp, Prog::MatchKind kind, 55 Regexp::ParseFlags flags); 56 ~TestInstance(); flags()57 Regexp::ParseFlags flags() { return flags_; } error()58 bool error() { return error_; } 59 60 // Runs a single test case: search in text, which is in context, 61 // using the given anchoring. 62 bool RunCase(const StringPiece& text, const StringPiece& context, 63 Prog::Anchor anchor); 64 65 private: 66 // Runs a single search using the named engine type. 67 void RunSearch(Engine type, 68 const StringPiece& text, const StringPiece& context, 69 Prog::Anchor anchor, 70 Result *result); 71 72 void LogMatch(const char* prefix, Engine e, const StringPiece& text, 73 const StringPiece& context, Prog::Anchor anchor); 74 75 const StringPiece regexp_str_; // regexp being tested 76 Prog::MatchKind kind_; // kind of match 77 Regexp::ParseFlags flags_; // flags for parsing regexp_str_ 78 bool error_; // error during constructor? 79 80 Regexp* regexp_; // parsed regexp 81 int num_captures_; // regexp_->NumCaptures() cached 82 Prog* prog_; // compiled program 83 Prog* rprog_; // compiled reverse program 84 PCRE* re_; // PCRE implementation 85 RE2* re2_; // RE2 implementation 86 87 TestInstance(const TestInstance&) = delete; 88 TestInstance& operator=(const TestInstance&) = delete; 89 }; 90 91 // A group of TestInstances for all possible configurations. 92 class Tester { 93 public: 94 explicit Tester(const StringPiece& regexp); 95 ~Tester(); 96 error()97 bool error() { return error_; } 98 99 // Runs a single test case: search in text, which is in context, 100 // using the given anchoring. 101 bool TestCase(const StringPiece& text, const StringPiece& context, 102 Prog::Anchor anchor); 103 104 // Run TestCase(text, text, anchor) for all anchoring modes. 105 bool TestInput(const StringPiece& text); 106 107 // Run TestCase(text, context, anchor) for all anchoring modes. 108 bool TestInputInContext(const StringPiece& text, const StringPiece& context); 109 110 private: 111 bool error_; 112 std::vector<TestInstance*> v_; 113 114 Tester(const Tester&) = delete; 115 Tester& operator=(const Tester&) = delete; 116 }; 117 118 // Run all possible tests using regexp and text. 119 bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text); 120 121 } // namespace re2 122 123 #endif // RE2_TESTING_TESTER_H_ 124