• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 #include <stddef.h>
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "util/test.h"
13 #include "re2/testing/exhaustive_tester.h"
14 
15 namespace re2 {
16 
17 // Test empty string matches (aka "(?:)")
TEST(EmptyString,Exhaustive)18 TEST(EmptyString, Exhaustive) {
19   ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
20                  RegexpGenerator::EgrepOps(),
21                  5, Split("", "ab"), "", "");
22 }
23 
24 // Test escaped versions of regexp syntax.
TEST(Punctuation,Literals)25 TEST(Punctuation, Literals) {
26   std::vector<std::string> alphabet = Explode("()*+?{}[]\\^$.");
27   std::vector<std::string> escaped = alphabet;
28   for (size_t i = 0; i < escaped.size(); i++)
29     escaped[i] = "\\" + escaped[i];
30   ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
31                  2, alphabet, "", "");
32 }
33 
34 // Test ^ $ . \A \z in presence of line endings.
35 // Have to wrap the empty-width ones in (?:) so that
36 // they can be repeated -- PCRE rejects ^* but allows (?:^)*
TEST(LineEnds,Exhaustive)37 TEST(LineEnds, Exhaustive) {
38   ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
39                  RegexpGenerator::EgrepOps(),
40                  4, Explode("ab\n"), "", "");
41 }
42 
43 // Test what does and does not match \n.
44 // This would be a good test, except that PCRE seems to have a bug:
45 // in single-byte character set mode (the default),
46 // [^a] matches \n, but in UTF-8 mode it does not.
47 // So when we run the test, the tester complains that
48 // we don't agree with PCRE, but it's PCRE that is at fault.
49 // For what it's worth, Perl gets this right (matches
50 // regardless of whether UTF-8 input is selected):
51 //
52 //     #!/usr/bin/perl
53 //     use POSIX qw(locale_h);
54 //     print "matches in latin1\n" if "\n" =~ /[^a]/;
55 //     setlocale("en_US.utf8");
56 //     print "matches in utf8\n" if "\n" =~ /[^a]/;
57 //
58 // The rule chosen for RE2 is that by default, like Perl,
59 // dot does not match \n but negated character classes [^a] do.
60 // (?s) will allow dot to match \n; there is no way in RE2
61 // to stop [^a] from matching \n, though the underlying library
62 // provides a mechanism, and RE2 could add new syntax if needed.
63 //
64 // TEST(Newlines, Exhaustive) {
65 //   std::vector<std::string> empty_vector;
66 //   ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
67 //                  RegexpGenerator::EgrepOps(),
68 //                  4, Explode("a\n"), "");
69 // }
70 
71 }  // namespace re2
72 
73