1 #![allow(non_camel_case_types)] 2 3 use std::mem; 4 use std::ptr; 5 use std::sync::Once; 6 7 use libc::{c_char, c_int, c_long, c_void}; 8 9 // Used to initialize the TCL interpreter exactly once. 10 static ONCE: Once = Once::new(); 11 12 /// Text is a TCL string object backed by a Rust string. 13 /// 14 /// This is a special type that is created once per benchmark and is not 15 /// included in timings. In particular, all regex searches execute on values 16 /// of this type, so we're careful to avoid the overhead of creating such 17 /// objects on every search. 18 pub struct Text { 19 s: String, 20 obj: *mut tcl_obj, 21 } 22 23 // TCL's objects are ref-counted in a thread-unsafe manner, which would 24 // normally disqualify a Send bound. However, we don't permit Text to be used 25 // in a way that can lead to unsafety. In particular, the ref count is always 26 // 1, until it is dropped, in which the ref count is decreased to zero and 27 // the underlying memory is freed. 28 unsafe impl Send for Text {} 29 30 impl Drop for Text { drop(&mut self)31 fn drop(&mut self) { 32 unsafe { 33 assert_eq!((*self.obj).ref_count, 1); 34 // This will drop the ref count to 0 and cause it to be freed. 35 (*self.obj).decr_ref_count(); 36 } 37 } 38 } 39 40 impl Text { new(text: String) -> Text41 pub fn new(text: String) -> Text { 42 let ptr = text.as_ptr() as *const c_char; 43 let len = text.len() as c_int; 44 let obj = unsafe { Tcl_NewStringObj(ptr, len) }; 45 unsafe { 46 (*obj).incr_ref_count(); 47 } 48 Text { s: text, obj: obj } 49 } 50 len(&self) -> usize51 pub fn len(&self) -> usize { 52 self.s.len() 53 } 54 } 55 56 /// Regex wraps a TCL regex. It owns a TCL string object and a pointer to a 57 /// regexp object. The two share storage. 58 /// 59 /// There's no Drop impl for Regex because the memory for the regex will be 60 /// freed when `pat` is dropped. 61 pub struct Regex { 62 pat: Text, 63 re: *mut tcl_regexp, 64 } 65 66 unsafe impl Send for Regex {} 67 68 #[derive(Debug)] 69 pub struct Error(()); 70 71 impl Regex { new(pattern: &str) -> Result<Regex, Error>72 pub fn new(pattern: &str) -> Result<Regex, Error> { 73 ONCE.call_once(|| unsafe { 74 Tcl_CreateInterp(); 75 }); 76 77 let pat = Text::new(pattern.to_owned()); 78 let re = unsafe { 79 Tcl_GetRegExpFromObj(ptr::null_mut(), pat.obj, TCL_REG_ADVANCED) 80 }; 81 if re.is_null() { 82 return Err(Error(())); 83 } 84 Ok(Regex { pat: pat, re: re }) 85 } 86 is_match(&self, text: &Text) -> bool87 pub fn is_match(&self, text: &Text) -> bool { 88 let result = unsafe { 89 Tcl_RegExpExecObj(ptr::null_mut(), self.re, text.obj, 0, 1, 0) 90 }; 91 if result == -1 { 92 panic!("Tcl_RegExpExecObj failed"); 93 } 94 result > 0 95 } 96 find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't>97 pub fn find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't> { 98 FindMatches { re: self, text: text, last_match: 0 } 99 } 100 find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)>101 fn find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)> { 102 let result = unsafe { 103 Tcl_RegExpExecObj( 104 ptr::null_mut(), 105 self.re, 106 text.obj, 107 start as c_int, 108 1, 109 0, 110 ) 111 }; 112 if result == -1 { 113 panic!("Tcl_RegExpExecObj failed"); 114 } else if result == 0 { 115 return None; 116 } 117 let mut info: tcl_regexp_info = unsafe { mem::zeroed() }; 118 unsafe { 119 Tcl_RegExpGetInfo(self.re, &mut info); 120 let s = start as c_long + (*info.matches).start; 121 let e = start as c_long + (*info.matches).end; 122 Some((s as usize, e as usize)) 123 } 124 } 125 } 126 127 pub struct FindMatches<'r, 't> { 128 re: &'r Regex, 129 text: &'t Text, 130 last_match: usize, 131 } 132 133 impl<'r, 't> Iterator for FindMatches<'r, 't> { 134 type Item = (usize, usize); 135 next(&mut self) -> Option<(usize, usize)>136 fn next(&mut self) -> Option<(usize, usize)> { 137 match self.re.find_at(self.text, self.last_match) { 138 None => None, 139 Some((s, e)) => { 140 self.last_match = e; 141 Some((s, e)) 142 } 143 } 144 } 145 } 146 147 // TCL's FFI. We only wrap the bits we need. 148 149 const TCL_REG_ADVANCED: c_int = 3; 150 151 type tcl_interp = c_void; 152 type tcl_regexp = c_void; 153 154 #[repr(C)] 155 struct tcl_obj { 156 ref_count: c_int, 157 // There are more fields, but we don't care about them. 158 // We're careful to only access ref_count so we can increment/decrement it. 159 // This is necessary because Tcl_IncRefCount and Tcl_DecrRefCount are 160 // macros. 161 } 162 163 impl tcl_obj { incr_ref_count(&mut self)164 unsafe fn incr_ref_count(&mut self) { 165 self.ref_count += 1; 166 } 167 decr_ref_count(&mut self)168 unsafe fn decr_ref_count(&mut self) { 169 self.ref_count -= 1; 170 if self.ref_count <= 0 { 171 TclFreeObj(self); 172 } 173 } 174 } 175 176 #[repr(C)] 177 struct tcl_regexp_info { 178 nsubs: c_int, 179 matches: *mut tcl_regexp_indices, 180 extend_start: c_long, 181 reserved: c_long, 182 } 183 184 #[repr(C)] 185 struct tcl_regexp_indices { 186 start: c_long, 187 end: c_long, 188 } 189 190 extern "C" { Tcl_CreateInterp() -> *mut tcl_interp191 fn Tcl_CreateInterp() -> *mut tcl_interp; 192 Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj193 fn Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj; 194 TclFreeObj(obj: *mut tcl_obj)195 fn TclFreeObj(obj: *mut tcl_obj); 196 Tcl_GetRegExpFromObj( int: *mut tcl_interp, pat: *mut tcl_obj, flags: c_int, ) -> *mut tcl_regexp197 fn Tcl_GetRegExpFromObj( 198 int: *mut tcl_interp, 199 pat: *mut tcl_obj, 200 flags: c_int, 201 ) -> *mut tcl_regexp; 202 Tcl_RegExpExecObj( int: *mut tcl_interp, re: *mut tcl_regexp, text: *mut tcl_obj, offset: c_int, nmatches: c_int, flags: c_int, ) -> c_int203 fn Tcl_RegExpExecObj( 204 int: *mut tcl_interp, 205 re: *mut tcl_regexp, 206 text: *mut tcl_obj, 207 offset: c_int, 208 nmatches: c_int, 209 flags: c_int, 210 ) -> c_int; 211 Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info)212 fn Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info); 213 } 214