• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_camel_case_types)]
2 
3 use std::mem;
4 use std::ptr;
5 use std::sync::Once;
6 
7 use libc::{c_char, c_int, c_long, c_void};
8 
9 // Used to initialize the TCL interpreter exactly once.
10 static ONCE: Once = Once::new();
11 
12 /// Text is a TCL string object backed by a Rust string.
13 ///
14 /// This is a special type that is created once per benchmark and is not
15 /// included in timings. In particular, all regex searches execute on values
16 /// of this type, so we're careful to avoid the overhead of creating such
17 /// objects on every search.
18 pub struct Text {
19     s: String,
20     obj: *mut tcl_obj,
21 }
22 
23 // TCL's objects are ref-counted in a thread-unsafe manner, which would
24 // normally disqualify a Send bound. However, we don't permit Text to be used
25 // in a way that can lead to unsafety. In particular, the ref count is always
26 // 1, until it is dropped, in which the ref count is decreased to zero and
27 // the underlying memory is freed.
28 unsafe impl Send for Text {}
29 
30 impl Drop for Text {
drop(&mut self)31     fn drop(&mut self) {
32         unsafe {
33             assert_eq!((*self.obj).ref_count, 1);
34             // This will drop the ref count to 0 and cause it to be freed.
35             (*self.obj).decr_ref_count();
36         }
37     }
38 }
39 
40 impl Text {
new(text: String) -> Text41     pub fn new(text: String) -> Text {
42         let ptr = text.as_ptr() as *const c_char;
43         let len = text.len() as c_int;
44         let obj = unsafe { Tcl_NewStringObj(ptr, len) };
45         unsafe {
46             (*obj).incr_ref_count();
47         }
48         Text { s: text, obj: obj }
49     }
50 
len(&self) -> usize51     pub fn len(&self) -> usize {
52         self.s.len()
53     }
54 }
55 
56 /// Regex wraps a TCL regex. It owns a TCL string object and a pointer to a
57 /// regexp object. The two share storage.
58 ///
59 /// There's no Drop impl for Regex because the memory for the regex will be
60 /// freed when `pat` is dropped.
61 pub struct Regex {
62     pat: Text,
63     re: *mut tcl_regexp,
64 }
65 
66 unsafe impl Send for Regex {}
67 
68 #[derive(Debug)]
69 pub struct Error(());
70 
71 impl Regex {
new(pattern: &str) -> Result<Regex, Error>72     pub fn new(pattern: &str) -> Result<Regex, Error> {
73         ONCE.call_once(|| unsafe {
74             Tcl_CreateInterp();
75         });
76 
77         let pat = Text::new(pattern.to_owned());
78         let re = unsafe {
79             Tcl_GetRegExpFromObj(ptr::null_mut(), pat.obj, TCL_REG_ADVANCED)
80         };
81         if re.is_null() {
82             return Err(Error(()));
83         }
84         Ok(Regex { pat: pat, re: re })
85     }
86 
is_match(&self, text: &Text) -> bool87     pub fn is_match(&self, text: &Text) -> bool {
88         let result = unsafe {
89             Tcl_RegExpExecObj(ptr::null_mut(), self.re, text.obj, 0, 1, 0)
90         };
91         if result == -1 {
92             panic!("Tcl_RegExpExecObj failed");
93         }
94         result > 0
95     }
96 
find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't>97     pub fn find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't> {
98         FindMatches { re: self, text: text, last_match: 0 }
99     }
100 
find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)>101     fn find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)> {
102         let result = unsafe {
103             Tcl_RegExpExecObj(
104                 ptr::null_mut(),
105                 self.re,
106                 text.obj,
107                 start as c_int,
108                 1,
109                 0,
110             )
111         };
112         if result == -1 {
113             panic!("Tcl_RegExpExecObj failed");
114         } else if result == 0 {
115             return None;
116         }
117         let mut info: tcl_regexp_info = unsafe { mem::zeroed() };
118         unsafe {
119             Tcl_RegExpGetInfo(self.re, &mut info);
120             let s = start as c_long + (*info.matches).start;
121             let e = start as c_long + (*info.matches).end;
122             Some((s as usize, e as usize))
123         }
124     }
125 }
126 
127 pub struct FindMatches<'r, 't> {
128     re: &'r Regex,
129     text: &'t Text,
130     last_match: usize,
131 }
132 
133 impl<'r, 't> Iterator for FindMatches<'r, 't> {
134     type Item = (usize, usize);
135 
next(&mut self) -> Option<(usize, usize)>136     fn next(&mut self) -> Option<(usize, usize)> {
137         match self.re.find_at(self.text, self.last_match) {
138             None => None,
139             Some((s, e)) => {
140                 self.last_match = e;
141                 Some((s, e))
142             }
143         }
144     }
145 }
146 
147 // TCL's FFI. We only wrap the bits we need.
148 
149 const TCL_REG_ADVANCED: c_int = 3;
150 
151 type tcl_interp = c_void;
152 type tcl_regexp = c_void;
153 
154 #[repr(C)]
155 struct tcl_obj {
156     ref_count: c_int,
157     // There are more fields, but we don't care about them.
158     // We're careful to only access ref_count so we can increment/decrement it.
159     // This is necessary because Tcl_IncRefCount and Tcl_DecrRefCount are
160     // macros.
161 }
162 
163 impl tcl_obj {
incr_ref_count(&mut self)164     unsafe fn incr_ref_count(&mut self) {
165         self.ref_count += 1;
166     }
167 
decr_ref_count(&mut self)168     unsafe fn decr_ref_count(&mut self) {
169         self.ref_count -= 1;
170         if self.ref_count <= 0 {
171             TclFreeObj(self);
172         }
173     }
174 }
175 
176 #[repr(C)]
177 struct tcl_regexp_info {
178     nsubs: c_int,
179     matches: *mut tcl_regexp_indices,
180     extend_start: c_long,
181     reserved: c_long,
182 }
183 
184 #[repr(C)]
185 struct tcl_regexp_indices {
186     start: c_long,
187     end: c_long,
188 }
189 
190 extern "C" {
Tcl_CreateInterp() -> *mut tcl_interp191     fn Tcl_CreateInterp() -> *mut tcl_interp;
192 
Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj193     fn Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj;
194 
TclFreeObj(obj: *mut tcl_obj)195     fn TclFreeObj(obj: *mut tcl_obj);
196 
Tcl_GetRegExpFromObj( int: *mut tcl_interp, pat: *mut tcl_obj, flags: c_int, ) -> *mut tcl_regexp197     fn Tcl_GetRegExpFromObj(
198         int: *mut tcl_interp,
199         pat: *mut tcl_obj,
200         flags: c_int,
201     ) -> *mut tcl_regexp;
202 
Tcl_RegExpExecObj( int: *mut tcl_interp, re: *mut tcl_regexp, text: *mut tcl_obj, offset: c_int, nmatches: c_int, flags: c_int, ) -> c_int203     fn Tcl_RegExpExecObj(
204         int: *mut tcl_interp,
205         re: *mut tcl_regexp,
206         text: *mut tcl_obj,
207         offset: c_int,
208         nmatches: c_int,
209         flags: c_int,
210     ) -> c_int;
211 
Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info)212     fn Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info);
213 }
214