• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![cfg_attr(feature = "pattern", feature(pattern))]
2 
3 use regex;
4 
5 // Due to macro scoping rules, this definition only applies for the modules
6 // defined below. Effectively, it allows us to use the same tests for both
7 // native and dynamic regexes.
8 //
9 // This is also used to test the various matching engines. This one exercises
10 // the normal code path which automatically chooses the engine based on the
11 // regex and the input. Other dynamic tests explicitly set the engine to use.
12 macro_rules! regex_new {
13     ($re:expr) => {{
14         use regex::Regex;
15         Regex::new($re)
16     }};
17 }
18 
19 macro_rules! regex {
20     ($re:expr) => {
21         regex_new!($re).unwrap()
22     };
23 }
24 
25 macro_rules! regex_set_new {
26     ($re:expr) => {{
27         use regex::RegexSet;
28         RegexSet::new($re)
29     }};
30 }
31 
32 macro_rules! regex_set {
33     ($res:expr) => {
34         regex_set_new!($res).unwrap()
35     };
36 }
37 
38 // Must come before other module definitions.
39 include!("macros_str.rs");
40 include!("macros.rs");
41 
42 mod api;
43 mod api_str;
44 mod crazy;
45 mod flags;
46 mod fowler;
47 mod misc;
48 mod multiline;
49 mod noparse;
50 mod regression;
51 mod regression_fuzz;
52 mod replace;
53 mod searcher;
54 mod set;
55 mod shortest_match;
56 mod suffix_reverse;
57 #[cfg(feature = "unicode")]
58 mod unicode;
59 #[cfg(feature = "unicode-perl")]
60 mod word_boundary;
61 #[cfg(feature = "unicode-perl")]
62 mod word_boundary_unicode;
63 
64 #[test]
disallow_non_utf8()65 fn disallow_non_utf8() {
66     assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
67     assert!(regex::Regex::new(r"(?-u).").is_err());
68     assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
69     assert!(regex::Regex::new(r"(?-u)☃").is_err());
70 }
71 
72 #[test]
disallow_octal()73 fn disallow_octal() {
74     assert!(regex::Regex::new(r"\0").is_err());
75 }
76 
77 #[test]
allow_octal()78 fn allow_octal() {
79     assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
80 }
81 
82 #[test]
oibits()83 fn oibits() {
84     use regex::bytes;
85     use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
86     use std::panic::{RefUnwindSafe, UnwindSafe};
87 
88     fn assert_send<T: Send>() {}
89     fn assert_sync<T: Sync>() {}
90     fn assert_unwind_safe<T: UnwindSafe>() {}
91     fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
92 
93     assert_send::<Regex>();
94     assert_sync::<Regex>();
95     assert_unwind_safe::<Regex>();
96     assert_ref_unwind_safe::<Regex>();
97     assert_send::<RegexBuilder>();
98     assert_sync::<RegexBuilder>();
99     assert_unwind_safe::<RegexBuilder>();
100     assert_ref_unwind_safe::<RegexBuilder>();
101 
102     assert_send::<bytes::Regex>();
103     assert_sync::<bytes::Regex>();
104     assert_unwind_safe::<bytes::Regex>();
105     assert_ref_unwind_safe::<bytes::Regex>();
106     assert_send::<bytes::RegexBuilder>();
107     assert_sync::<bytes::RegexBuilder>();
108     assert_unwind_safe::<bytes::RegexBuilder>();
109     assert_ref_unwind_safe::<bytes::RegexBuilder>();
110 
111     assert_send::<RegexSet>();
112     assert_sync::<RegexSet>();
113     assert_unwind_safe::<RegexSet>();
114     assert_ref_unwind_safe::<RegexSet>();
115     assert_send::<RegexSetBuilder>();
116     assert_sync::<RegexSetBuilder>();
117     assert_unwind_safe::<RegexSetBuilder>();
118     assert_ref_unwind_safe::<RegexSetBuilder>();
119 
120     assert_send::<bytes::RegexSet>();
121     assert_sync::<bytes::RegexSet>();
122     assert_unwind_safe::<bytes::RegexSet>();
123     assert_ref_unwind_safe::<bytes::RegexSet>();
124     assert_send::<bytes::RegexSetBuilder>();
125     assert_sync::<bytes::RegexSetBuilder>();
126     assert_unwind_safe::<bytes::RegexSetBuilder>();
127     assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
128 }
129 
130 // See: https://github.com/rust-lang/regex/issues/568
131 #[test]
oibits_regression()132 fn oibits_regression() {
133     use regex::Regex;
134     use std::panic;
135 
136     let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
137 }
138 
139 // See: https://github.com/rust-lang/regex/issues/750
140 #[test]
141 #[cfg(target_pointer_width = "64")]
regex_is_reasonably_small()142 fn regex_is_reasonably_small() {
143     use std::mem::size_of;
144 
145     use regex::bytes;
146     use regex::{Regex, RegexSet};
147 
148     assert_eq!(16, size_of::<Regex>());
149     assert_eq!(16, size_of::<RegexSet>());
150     assert_eq!(16, size_of::<bytes::Regex>());
151     assert_eq!(16, size_of::<bytes::RegexSet>());
152 }
153 
154 // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
155 // See: CVE-2022-24713
156 //
157 // We test that our regex compiler will correctly return a "too big" error when
158 // we try to use a very large repetition on an *empty* sub-expression.
159 //
160 // At the time this test was written, the regex compiler does not represent
161 // empty sub-expressions with any bytecode instructions. In effect, it's an
162 // "optimization" to leave them out, since they would otherwise correspond
163 // to an unconditional JUMP in the regex bytecode (i.e., an unconditional
164 // epsilon transition in the NFA graph). Therefore, an empty sub-expression
165 // represents an interesting case for the compiler's size limits. Since it
166 // doesn't actually contribute any additional memory to the compiled regex
167 // instructions, the size limit machinery never detects it. Instead, it just
168 // dumbly tries to compile the empty sub-expression N times, where N is the
169 // repetition size.
170 //
171 // When N is very large, this will cause the compiler to essentially spin and
172 // do nothing for a decently large amount of time. It causes the regex to take
173 // quite a bit of time to compile, despite the concrete syntax of the regex
174 // being quite small.
175 //
176 // The degree to which this is actually a problem is somewhat of a judgment
177 // call. Some regexes simply take a long time to compile. But in general, you
178 // should be able to reasonably control this by setting lower or higher size
179 // limits on the compiled object size. But this mitigation doesn't work at all
180 // for this case.
181 //
182 // This particular test is somewhat narrow. It merely checks that regex
183 // compilation will, at some point, return a "too big" error. Before the
184 // fix landed, this test would eventually fail because the regex would be
185 // successfully compiled (after enough time elapsed). So while this test
186 // doesn't check that we exit in a reasonable amount of time, it does at least
187 // check that we are properly returning an error at some point.
188 #[test]
big_empty_regex_fails()189 fn big_empty_regex_fails() {
190     use regex::Regex;
191 
192     let result = Regex::new("(?:){4294967295}");
193     assert!(result.is_err());
194 }
195 
196 // Below is a "billion laughs" variant of the previous test case.
197 #[test]
big_empty_reps_chain_regex_fails()198 fn big_empty_reps_chain_regex_fails() {
199     use regex::Regex;
200 
201     let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
202     assert!(result.is_err());
203 }
204 
205 // Below is another situation where a zero-length sub-expression can be
206 // introduced.
207 #[test]
big_zero_reps_regex_fails()208 fn big_zero_reps_regex_fails() {
209     use regex::Regex;
210 
211     let result = Regex::new(r"x{0}{4294967295}");
212     assert!(result.is_err());
213 }
214 
215 // Testing another case for completeness.
216 #[test]
empty_alt_regex_fails()217 fn empty_alt_regex_fails() {
218     use regex::Regex;
219 
220     let result = Regex::new(r"(?:|){4294967295}");
221     assert!(result.is_err());
222 }
223