• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_snake_case)]
2 
3 use std::ffi::{CStr, CString};
4 use std::fmt;
5 use std::ptr;
6 
7 use libc::{c_char, c_int, c_void};
8 use libpcre_sys::{
9     pcre, pcre_compile, pcre_exec, pcre_extra, pcre_free, pcre_free_study,
10     pcre_study, PCRE_ERROR_NOMATCH, PCRE_NO_UTF8_CHECK, PCRE_UTF8,
11 };
12 
13 const PCRE_UCP: c_int = 0x20000000;
14 const PCRE_STUDY_JIT_COMPLETE: c_int = 0x0001;
15 
16 // We use libpcre-sys directly because the pcre crate has unavoidable
17 // performance problems in its core matching routines. (e.g., It always
18 // allocates an ovector.)
19 pub struct Regex {
20     code: *mut pcre,
21     extra: *mut pcre_extra,
22 }
23 
24 unsafe impl Send for Regex {}
25 
26 impl Drop for Regex {
drop(&mut self)27     fn drop(&mut self) {
28         unsafe {
29             pcre_free_study(self.extra);
30             pcre_free(self.code as *mut c_void);
31         }
32     }
33 }
34 
35 pub struct Error {
36     msg: String,
37     offset: c_int,
38 }
39 
40 impl Regex {
new(pattern: &str) -> Result<Regex, Error>41     pub fn new(pattern: &str) -> Result<Regex, Error> {
42         let pattern = CString::new(pattern.to_owned()).unwrap();
43         let mut errptr: *const c_char = ptr::null();
44         let mut erroffset: c_int = 0;
45         let code = unsafe {
46             pcre_compile(
47                 pattern.as_ptr(),
48                 PCRE_UCP | PCRE_UTF8,
49                 &mut errptr,
50                 &mut erroffset,
51                 ptr::null(),
52             )
53         };
54         if code.is_null() {
55             let msg =
56                 unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() };
57             return Err(Error { msg: msg, offset: erroffset });
58         }
59 
60         let extra =
61             unsafe { pcre_study(code, PCRE_STUDY_JIT_COMPLETE, &mut errptr) };
62         if extra.is_null() {
63             if errptr.is_null() {
64                 panic!("unexpected error. Maybe JIT support isn't enabled?");
65             }
66             let msg =
67                 unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() };
68             return Err(Error { msg: msg, offset: 0 });
69         }
70         Ok(Regex { code: code, extra: extra })
71     }
72 
is_match(&self, text: &str) -> bool73     pub fn is_match(&self, text: &str) -> bool {
74         self.find_at(text, 0).is_some()
75     }
76 
find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't>77     pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
78         FindMatches { re: self, text: text, last_match_end: 0 }
79     }
80 
find_at(&self, text: &str, start: usize) -> Option<(usize, usize)>81     fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
82         const OVEC_SIZE: usize = 15 * 3; // hopefully enough for benchmarks?
83         let mut ovec: [c_int; OVEC_SIZE] = [0; OVEC_SIZE];
84         let err = unsafe {
85             pcre_exec(
86                 self.code,
87                 self.extra,
88                 text.as_ptr() as *const i8,
89                 text.len() as c_int,
90                 start as c_int,
91                 PCRE_NO_UTF8_CHECK,
92                 ovec.as_mut_ptr(),
93                 OVEC_SIZE as c_int,
94             )
95         };
96         if err == PCRE_ERROR_NOMATCH {
97             None
98         } else if err < 0 {
99             panic!("unknown error code: {:?}", err)
100         } else {
101             Some((ovec[0] as usize, ovec[1] as usize))
102         }
103     }
104 }
105 
106 pub struct FindMatches<'r, 't> {
107     re: &'r Regex,
108     text: &'t str,
109     last_match_end: usize,
110 }
111 
112 impl<'r, 't> Iterator for FindMatches<'r, 't> {
113     type Item = (usize, usize);
114 
next(&mut self) -> Option<(usize, usize)>115     fn next(&mut self) -> Option<(usize, usize)> {
116         match self.re.find_at(self.text, self.last_match_end) {
117             None => None,
118             Some((s, e)) => {
119                 self.last_match_end = e;
120                 Some((s, e))
121             }
122         }
123     }
124 }
125 
126 impl fmt::Debug for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result127     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
128         write!(f, "PCRE error at {:?}: {}", self.offset, self.msg)
129     }
130 }
131