1 #![allow(non_snake_case)] 2 3 use std::ffi::{CStr, CString}; 4 use std::fmt; 5 use std::ptr; 6 7 use libc::{c_char, c_int, c_void}; 8 use libpcre_sys::{ 9 pcre, pcre_compile, pcre_exec, pcre_extra, pcre_free, pcre_free_study, 10 pcre_study, PCRE_ERROR_NOMATCH, PCRE_NO_UTF8_CHECK, PCRE_UTF8, 11 }; 12 13 const PCRE_UCP: c_int = 0x20000000; 14 const PCRE_STUDY_JIT_COMPLETE: c_int = 0x0001; 15 16 // We use libpcre-sys directly because the pcre crate has unavoidable 17 // performance problems in its core matching routines. (e.g., It always 18 // allocates an ovector.) 19 pub struct Regex { 20 code: *mut pcre, 21 extra: *mut pcre_extra, 22 } 23 24 unsafe impl Send for Regex {} 25 26 impl Drop for Regex { drop(&mut self)27 fn drop(&mut self) { 28 unsafe { 29 pcre_free_study(self.extra); 30 pcre_free(self.code as *mut c_void); 31 } 32 } 33 } 34 35 pub struct Error { 36 msg: String, 37 offset: c_int, 38 } 39 40 impl Regex { new(pattern: &str) -> Result<Regex, Error>41 pub fn new(pattern: &str) -> Result<Regex, Error> { 42 let pattern = CString::new(pattern.to_owned()).unwrap(); 43 let mut errptr: *const c_char = ptr::null(); 44 let mut erroffset: c_int = 0; 45 let code = unsafe { 46 pcre_compile( 47 pattern.as_ptr(), 48 PCRE_UCP | PCRE_UTF8, 49 &mut errptr, 50 &mut erroffset, 51 ptr::null(), 52 ) 53 }; 54 if code.is_null() { 55 let msg = 56 unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() }; 57 return Err(Error { msg: msg, offset: erroffset }); 58 } 59 60 let extra = 61 unsafe { pcre_study(code, PCRE_STUDY_JIT_COMPLETE, &mut errptr) }; 62 if extra.is_null() { 63 if errptr.is_null() { 64 panic!("unexpected error. Maybe JIT support isn't enabled?"); 65 } 66 let msg = 67 unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() }; 68 return Err(Error { msg: msg, offset: 0 }); 69 } 70 Ok(Regex { code: code, extra: extra }) 71 } 72 is_match(&self, text: &str) -> bool73 pub fn is_match(&self, text: &str) -> bool { 74 self.find_at(text, 0).is_some() 75 } 76 find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't>77 pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> { 78 FindMatches { re: self, text: text, last_match_end: 0 } 79 } 80 find_at(&self, text: &str, start: usize) -> Option<(usize, usize)>81 fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> { 82 const OVEC_SIZE: usize = 15 * 3; // hopefully enough for benchmarks? 83 let mut ovec: [c_int; OVEC_SIZE] = [0; OVEC_SIZE]; 84 let err = unsafe { 85 pcre_exec( 86 self.code, 87 self.extra, 88 text.as_ptr() as *const i8, 89 text.len() as c_int, 90 start as c_int, 91 PCRE_NO_UTF8_CHECK, 92 ovec.as_mut_ptr(), 93 OVEC_SIZE as c_int, 94 ) 95 }; 96 if err == PCRE_ERROR_NOMATCH { 97 None 98 } else if err < 0 { 99 panic!("unknown error code: {:?}", err) 100 } else { 101 Some((ovec[0] as usize, ovec[1] as usize)) 102 } 103 } 104 } 105 106 pub struct FindMatches<'r, 't> { 107 re: &'r Regex, 108 text: &'t str, 109 last_match_end: usize, 110 } 111 112 impl<'r, 't> Iterator for FindMatches<'r, 't> { 113 type Item = (usize, usize); 114 next(&mut self) -> Option<(usize, usize)>115 fn next(&mut self) -> Option<(usize, usize)> { 116 match self.re.find_at(self.text, self.last_match_end) { 117 None => None, 118 Some((s, e)) => { 119 self.last_match_end = e; 120 Some((s, e)) 121 } 122 } 123 } 124 } 125 126 impl fmt::Debug for Error { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result127 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 128 write!(f, "PCRE error at {:?}: {}", self.offset, self.msg) 129 } 130 } 131