• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_camel_case_types)]
2 
3 use libc::{c_int, c_uchar, c_void};
4 
5 /// Regex wraps an RE2 regular expression.
6 ///
7 /// It cannot be used safely from multiple threads simultaneously.
8 pub struct Regex {
9     re: *mut re2_regexp,
10 }
11 
12 unsafe impl Send for Regex {}
13 
14 impl Drop for Regex {
drop(&mut self)15     fn drop(&mut self) {
16         unsafe {
17             re2_regexp_free(self.re);
18         }
19     }
20 }
21 
22 #[derive(Debug)]
23 pub struct Error(());
24 
25 impl Regex {
new(pattern: &str) -> Result<Regex, Error>26     pub fn new(pattern: &str) -> Result<Regex, Error> {
27         unsafe { Ok(Regex { re: re2_regexp_new(pattern.into()) }) }
28     }
29 
is_match(&self, text: &str) -> bool30     pub fn is_match(&self, text: &str) -> bool {
31         unsafe {
32             re2_regexp_match(self.re, text.into(), 0, text.len() as c_int)
33         }
34     }
35 
find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't>36     pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
37         FindMatches { re: self, text: text, last_end: 0, last_match: None }
38     }
39 
find_at(&self, text: &str, start: usize) -> Option<(usize, usize)>40     fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
41         let (mut s, mut e): (c_int, c_int) = (0, 0);
42         let matched = unsafe {
43             re2_regexp_find(
44                 self.re,
45                 text.into(),
46                 start as c_int,
47                 text.len() as c_int,
48                 &mut s,
49                 &mut e,
50             )
51         };
52         if matched {
53             Some((s as usize, e as usize))
54         } else {
55             None
56         }
57     }
58 }
59 
60 pub struct FindMatches<'r, 't> {
61     re: &'r Regex,
62     text: &'t str,
63     last_end: usize,
64     last_match: Option<usize>,
65 }
66 
67 // This implementation is identical to the one Rust uses, since both Rust's
68 // regex engine and RE2 handle empty matches in the same way.
69 impl<'r, 't> Iterator for FindMatches<'r, 't> {
70     type Item = (usize, usize);
71 
next(&mut self) -> Option<(usize, usize)>72     fn next(&mut self) -> Option<(usize, usize)> {
73         fn next_after_empty(text: &str, i: usize) -> usize {
74             let b = match text.as_bytes().get(i) {
75                 None => return text.len() + 1,
76                 Some(&b) => b,
77             };
78             let inc = if b <= 0x7F {
79                 1
80             } else if b <= 0b110_11111 {
81                 2
82             } else if b <= 0b1110_1111 {
83                 3
84             } else {
85                 4
86             };
87             i + inc
88         }
89 
90         if self.last_end > self.text.len() {
91             return None;
92         }
93         let (s, e) = match self.re.find_at(self.text, self.last_end) {
94             None => return None,
95             Some((s, e)) => (s, e),
96         };
97         assert!(s >= self.last_end);
98         if s == e {
99             // This is an empty match. To ensure we make progress, start
100             // the next search at the smallest possible starting position
101             // of the next match following this one.
102             self.last_end = next_after_empty(&self.text, e);
103             // Don't accept empty matches immediately following a match.
104             // Just move on to the next match.
105             if Some(e) == self.last_match {
106                 return self.next();
107             }
108         } else {
109             self.last_end = e;
110         }
111         self.last_match = Some(self.last_end);
112         Some((s, e))
113     }
114 }
115 
116 // RE2 FFI is below. Note that this uses a hand-rolled C API that is defined
117 // in re2.cpp.
118 
119 type re2_regexp = c_void;
120 
121 #[repr(C)]
122 struct re2_string {
123     text: *const c_uchar,
124     len: c_int,
125 }
126 
127 impl<'a> From<&'a str> for re2_string {
from(s: &'a str) -> re2_string128     fn from(s: &'a str) -> re2_string {
129         re2_string { text: s.as_ptr(), len: s.len() as c_int }
130     }
131 }
132 
133 extern "C" {
re2_regexp_new(pat: re2_string) -> *mut re2_regexp134     fn re2_regexp_new(pat: re2_string) -> *mut re2_regexp;
re2_regexp_free(re: *mut re2_regexp)135     fn re2_regexp_free(re: *mut re2_regexp);
re2_regexp_match( re: *mut re2_regexp, text: re2_string, startpos: c_int, endpos: c_int, ) -> bool136     fn re2_regexp_match(
137         re: *mut re2_regexp,
138         text: re2_string,
139         startpos: c_int,
140         endpos: c_int,
141     ) -> bool;
re2_regexp_find( re: *mut re2_regexp, text: re2_string, startpos: c_int, endpos: c_int, match_start: *mut c_int, match_end: *mut c_int, ) -> bool142     fn re2_regexp_find(
143         re: *mut re2_regexp,
144         text: re2_string,
145         startpos: c_int,
146         endpos: c_int,
147         match_start: *mut c_int,
148         match_end: *mut c_int,
149     ) -> bool;
150 }
151