1 use regex::internal::ExecBuilder;
2
3 /// Given a regex, check if all of the backends produce the same
4 /// results on a number of different inputs.
5 ///
6 /// For now this just throws quickcheck at the problem, which
7 /// is not very good because it only really tests half of the
8 /// problem space. It is pretty unlikely that a random string
9 /// will match any given regex, so this will probably just
10 /// be checking that the different backends fail in the same
11 /// way. This is still worthwhile to test, but is definitely not
12 /// the whole story.
13 ///
14 /// TODO(ethan): In order to cover the other half of the problem
15 /// space, we should generate a random matching string by inspecting
16 /// the AST of the input regex. The right way to do this probably
17 /// involves adding a custom Arbitrary instance around a couple
18 /// of newtypes. That way we can respect the quickcheck size hinting
19 /// and shrinking and whatnot.
backends_are_consistent(re: &str) -> Result<u64, String>20 pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
21 let standard_backends = vec![
22 (
23 "bounded_backtracking_re",
24 ExecBuilder::new(re)
25 .bounded_backtracking()
26 .build()
27 .map(|exec| exec.into_regex())
28 .map_err(|err| format!("{}", err))?,
29 ),
30 (
31 "pikevm_re",
32 ExecBuilder::new(re)
33 .nfa()
34 .build()
35 .map(|exec| exec.into_regex())
36 .map_err(|err| format!("{}", err))?,
37 ),
38 (
39 "default_re",
40 ExecBuilder::new(re)
41 .build()
42 .map(|exec| exec.into_regex())
43 .map_err(|err| format!("{}", err))?,
44 ),
45 ];
46
47 let utf8bytes_backends = vec![
48 (
49 "bounded_backtracking_utf8bytes_re",
50 ExecBuilder::new(re)
51 .bounded_backtracking()
52 .bytes(true)
53 .build()
54 .map(|exec| exec.into_regex())
55 .map_err(|err| format!("{}", err))?,
56 ),
57 (
58 "pikevm_utf8bytes_re",
59 ExecBuilder::new(re)
60 .nfa()
61 .bytes(true)
62 .build()
63 .map(|exec| exec.into_regex())
64 .map_err(|err| format!("{}", err))?,
65 ),
66 (
67 "default_utf8bytes_re",
68 ExecBuilder::new(re)
69 .bytes(true)
70 .build()
71 .map(|exec| exec.into_regex())
72 .map_err(|err| format!("{}", err))?,
73 ),
74 ];
75
76 let bytes_backends = vec![
77 (
78 "bounded_backtracking_bytes_re",
79 ExecBuilder::new(re)
80 .bounded_backtracking()
81 .only_utf8(false)
82 .build()
83 .map(|exec| exec.into_byte_regex())
84 .map_err(|err| format!("{}", err))?,
85 ),
86 (
87 "pikevm_bytes_re",
88 ExecBuilder::new(re)
89 .nfa()
90 .only_utf8(false)
91 .build()
92 .map(|exec| exec.into_byte_regex())
93 .map_err(|err| format!("{}", err))?,
94 ),
95 (
96 "default_bytes_re",
97 ExecBuilder::new(re)
98 .only_utf8(false)
99 .build()
100 .map(|exec| exec.into_byte_regex())
101 .map_err(|err| format!("{}", err))?,
102 ),
103 ];
104
105 Ok(string_checker::check_backends(&standard_backends)?
106 + string_checker::check_backends(&utf8bytes_backends)?
107 + bytes_checker::check_backends(&bytes_backends)?)
108 }
109
110 //
111 // A consistency checker parameterized by the input type (&str or &[u8]).
112 //
113
114 macro_rules! checker {
115 ($module_name:ident, $regex_type:path, $mk_input:expr) => {
116 mod $module_name {
117 use quickcheck;
118 use quickcheck::{Arbitrary, TestResult};
119
120 pub fn check_backends(
121 backends: &[(&str, $regex_type)],
122 ) -> Result<u64, String> {
123 let mut total_passed = 0;
124 for regex in backends[1..].iter() {
125 total_passed += quickcheck_regex_eq(&backends[0], regex)?;
126 }
127
128 Ok(total_passed)
129 }
130
131 fn quickcheck_regex_eq(
132 &(name1, ref re1): &(&str, $regex_type),
133 &(name2, ref re2): &(&str, $regex_type),
134 ) -> Result<u64, String> {
135 quickcheck::QuickCheck::new()
136 .quicktest(RegexEqualityTest::new(
137 re1.clone(),
138 re2.clone(),
139 ))
140 .map_err(|err| {
141 format!(
142 "{}(/{}/) and {}(/{}/) are inconsistent.\
143 QuickCheck Err: {:?}",
144 name1, re1, name2, re2, err
145 )
146 })
147 }
148
149 struct RegexEqualityTest {
150 re1: $regex_type,
151 re2: $regex_type,
152 }
153 impl RegexEqualityTest {
154 fn new(re1: $regex_type, re2: $regex_type) -> Self {
155 RegexEqualityTest { re1: re1, re2: re2 }
156 }
157 }
158
159 impl quickcheck::Testable for RegexEqualityTest {
160 fn result(&self, gen: &mut quickcheck::Gen) -> TestResult {
161 let input = $mk_input(gen);
162 let input = &input;
163
164 if self.re1.find(&input) != self.re2.find(input) {
165 return TestResult::error(format!(
166 "find mismatch input={:?}",
167 input
168 ));
169 }
170
171 let cap1 = self.re1.captures(input);
172 let cap2 = self.re2.captures(input);
173 match (cap1, cap2) {
174 (None, None) => {}
175 (Some(cap1), Some(cap2)) => {
176 for (c1, c2) in cap1.iter().zip(cap2.iter()) {
177 if c1 != c2 {
178 return TestResult::error(format!(
179 "captures mismatch input={:?}",
180 input
181 ));
182 }
183 }
184 }
185 _ => {
186 return TestResult::error(format!(
187 "captures mismatch input={:?}",
188 input
189 ))
190 }
191 }
192
193 let fi1 = self.re1.find_iter(input);
194 let fi2 = self.re2.find_iter(input);
195 for (m1, m2) in fi1.zip(fi2) {
196 if m1 != m2 {
197 return TestResult::error(format!(
198 "find_iter mismatch input={:?}",
199 input
200 ));
201 }
202 }
203
204 let ci1 = self.re1.captures_iter(input);
205 let ci2 = self.re2.captures_iter(input);
206 for (cap1, cap2) in ci1.zip(ci2) {
207 for (c1, c2) in cap1.iter().zip(cap2.iter()) {
208 if c1 != c2 {
209 return TestResult::error(format!(
210 "captures_iter mismatch input={:?}",
211 input
212 ));
213 }
214 }
215 }
216
217 let s1 = self.re1.split(input);
218 let s2 = self.re2.split(input);
219 for (chunk1, chunk2) in s1.zip(s2) {
220 if chunk1 != chunk2 {
221 return TestResult::error(format!(
222 "split mismatch input={:?}",
223 input
224 ));
225 }
226 }
227
228 TestResult::from_bool(true)
229 }
230 }
231 } // mod
232 }; // rule case
233 } // macro_rules!
234
235 checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen));
236 checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary(
237 gen
238 ));
239