• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::HashMap;
2 use std::ffi::{CStr, CString};
3 use std::ops::Deref;
4 use std::ptr;
5 use std::slice;
6 use std::str;
7 
8 use libc::{c_char, size_t};
9 use regex::bytes;
10 
11 use crate::error::{Error, ErrorKind};
12 
13 const RURE_FLAG_CASEI: u32 = 1 << 0;
14 const RURE_FLAG_MULTI: u32 = 1 << 1;
15 const RURE_FLAG_DOTNL: u32 = 1 << 2;
16 const RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
17 const RURE_FLAG_SPACE: u32 = 1 << 4;
18 const RURE_FLAG_UNICODE: u32 = 1 << 5;
19 const RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
20 
21 pub struct Regex {
22     re: bytes::Regex,
23     capture_names: HashMap<String, i32>,
24 }
25 
26 pub struct Options {
27     size_limit: usize,
28     dfa_size_limit: usize,
29 }
30 
31 // The `RegexSet` is not exposed with option support or matching at an
32 // arbitrary position with a crate just yet. To circumvent this, we use
33 // the `Exec` structure directly.
34 pub struct RegexSet {
35     re: bytes::RegexSet,
36 }
37 
38 #[repr(C)]
39 pub struct rure_match {
40     pub start: size_t,
41     pub end: size_t,
42 }
43 
44 pub struct Captures(bytes::Locations);
45 
46 pub struct Iter {
47     re: *const Regex,
48     last_end: usize,
49     last_match: Option<usize>,
50 }
51 
52 pub struct IterCaptureNames {
53     capture_names: bytes::CaptureNames<'static>,
54     name_ptrs: Vec<*mut c_char>,
55 }
56 
57 impl Deref for Regex {
58     type Target = bytes::Regex;
deref(&self) -> &bytes::Regex59     fn deref(&self) -> &bytes::Regex {
60         &self.re
61     }
62 }
63 
64 impl Deref for RegexSet {
65     type Target = bytes::RegexSet;
deref(&self) -> &bytes::RegexSet66     fn deref(&self) -> &bytes::RegexSet {
67         &self.re
68     }
69 }
70 
71 impl Default for Options {
default() -> Options72     fn default() -> Options {
73         Options { size_limit: 10 * (1 << 20), dfa_size_limit: 2 * (1 << 20) }
74     }
75 }
76 
77 ffi_fn! {
78     fn rure_compile_must(pattern: *const c_char) -> *const Regex {
79         let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
80         let pat = pattern as *const u8;
81         let mut err = Error::new(ErrorKind::None);
82         let re = rure_compile(
83             pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
84         if err.is_err() {
85             let _ = writeln!(&mut io::stderr(), "{}", err);
86             let _ = writeln!(
87                 &mut io::stderr(), "aborting from rure_compile_must");
88             unsafe { abort() }
89         }
90         re
91     }
92 }
93 
94 ffi_fn! {
95     fn rure_compile(
96         pattern: *const u8,
97         length: size_t,
98         flags: u32,
99         options: *const Options,
100         error: *mut Error,
101     ) -> *const Regex {
102         let pat = unsafe { slice::from_raw_parts(pattern, length) };
103         let pat = match str::from_utf8(pat) {
104             Ok(pat) => pat,
105             Err(err) => {
106                 unsafe {
107                     if !error.is_null() {
108                         *error = Error::new(ErrorKind::Str(err));
109                     }
110                     return ptr::null();
111                 }
112             }
113         };
114         let mut builder = bytes::RegexBuilder::new(pat);
115         if !options.is_null() {
116             let options = unsafe { &*options };
117             builder.size_limit(options.size_limit);
118             builder.dfa_size_limit(options.dfa_size_limit);
119         }
120         builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
121         builder.multi_line(flags & RURE_FLAG_MULTI > 0);
122         builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
123         builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
124         builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
125         builder.unicode(flags & RURE_FLAG_UNICODE > 0);
126         match builder.build() {
127             Ok(re) => {
128                 let mut capture_names = HashMap::new();
129                 for (i, name) in re.capture_names().enumerate() {
130                     if let Some(name) = name {
131                         capture_names.insert(name.to_owned(), i as i32);
132                     }
133                 }
134                 let re = Regex {
135                     re: re,
136                     capture_names: capture_names,
137                 };
138                 Box::into_raw(Box::new(re))
139             }
140             Err(err) => {
141                 unsafe {
142                     if !error.is_null() {
143                         *error = Error::new(ErrorKind::Regex(err));
144                     }
145                     ptr::null()
146                 }
147             }
148         }
149     }
150 }
151 
152 ffi_fn! {
153     fn rure_free(re: *const Regex) {
154         unsafe { drop(Box::from_raw(re as *mut Regex)); }
155     }
156 }
157 
158 ffi_fn! {
159     fn rure_is_match(
160         re: *const Regex,
161         haystack: *const u8,
162         len: size_t,
163         start: size_t,
164     ) -> bool {
165         let re = unsafe { &*re };
166         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
167         re.is_match_at(haystack, start)
168     }
169 }
170 
171 ffi_fn! {
172     fn rure_find(
173         re: *const Regex,
174         haystack: *const u8,
175         len: size_t,
176         start: size_t,
177         match_info: *mut rure_match,
178     ) -> bool {
179         let re = unsafe { &*re };
180         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
181         re.find_at(haystack, start).map(|m| unsafe {
182             if !match_info.is_null() {
183                 (*match_info).start = m.start();
184                 (*match_info).end = m.end();
185             }
186         }).is_some()
187     }
188 }
189 
190 ffi_fn! {
191     fn rure_find_captures(
192         re: *const Regex,
193         haystack: *const u8,
194         len: size_t,
195         start: size_t,
196         captures: *mut Captures,
197     ) -> bool {
198         let re = unsafe { &*re };
199         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
200         let slots = unsafe { &mut (*captures).0 };
201         re.read_captures_at(slots, haystack, start).is_some()
202     }
203 }
204 
205 ffi_fn! {
206     fn rure_shortest_match(
207         re: *const Regex,
208         haystack: *const u8,
209         len: size_t,
210         start: size_t,
211         end: *mut usize,
212     ) -> bool {
213         let re = unsafe { &*re };
214         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
215         match re.shortest_match_at(haystack, start) {
216             None => false,
217             Some(i) => {
218                 if !end.is_null() {
219                     unsafe {
220                         *end = i;
221                     }
222                 }
223                 true
224             }
225         }
226     }
227 }
228 
229 ffi_fn! {
230     fn rure_capture_name_index(
231         re: *const Regex,
232         name: *const c_char,
233     ) -> i32 {
234         let re = unsafe { &*re };
235         let name = unsafe { CStr::from_ptr(name) };
236         let name = match name.to_str() {
237             Err(_) => return -1,
238             Ok(name) => name,
239         };
240         re.capture_names.get(name).map(|&i|i).unwrap_or(-1)
241     }
242 }
243 
244 ffi_fn! {
245     fn rure_iter_capture_names_new(
246         re: *const Regex,
247     ) -> *mut IterCaptureNames {
248         let re = unsafe { &*re };
249         Box::into_raw(Box::new(IterCaptureNames {
250             capture_names: re.re.capture_names(),
251             name_ptrs: Vec::new(),
252         }))
253     }
254 }
255 
256 ffi_fn! {
257     fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
258         unsafe {
259             let it = &mut *it;
260             while let Some(ptr) = it.name_ptrs.pop() {
261                 drop(CString::from_raw(ptr));
262             }
263             drop(Box::from_raw(it));
264         }
265     }
266 }
267 
268 ffi_fn! {
269     fn rure_iter_capture_names_next(
270         it: *mut IterCaptureNames,
271         capture_name: *mut *mut c_char,
272     ) -> bool {
273         if capture_name.is_null() {
274             return false;
275         }
276 
277         let it = unsafe { &mut *it };
278         let cn = match it.capture_names.next() {
279             // Top-level iterator ran out of capture groups
280             None => return false,
281             Some(val) => {
282                 let name = match val {
283                     // inner Option didn't have a name
284                     None => "",
285                     Some(name) => name
286                 };
287                 name
288             }
289         };
290 
291         unsafe {
292             let cs = match CString::new(cn.as_bytes()) {
293                 Result::Ok(val) => val,
294                 Result::Err(_) => return false
295             };
296             let ptr = cs.into_raw();
297             it.name_ptrs.push(ptr);
298             *capture_name = ptr;
299         }
300         true
301 
302     }
303 }
304 
305 ffi_fn! {
306     fn rure_iter_new(
307         re: *const Regex,
308     ) -> *mut Iter {
309         Box::into_raw(Box::new(Iter {
310             re: re,
311             last_end: 0,
312             last_match: None,
313         }))
314     }
315 }
316 
317 ffi_fn! {
318     fn rure_iter_free(it: *mut Iter) {
319         unsafe { drop(Box::from_raw(it)); }
320     }
321 }
322 
323 ffi_fn! {
324     fn rure_iter_next(
325         it: *mut Iter,
326         haystack: *const u8,
327         len: size_t,
328         match_info: *mut rure_match,
329     ) -> bool {
330         let it = unsafe { &mut *it };
331         let re = unsafe { &*it.re };
332         let text = unsafe { slice::from_raw_parts(haystack, len) };
333         if it.last_end > text.len() {
334             return false;
335         }
336         let (s, e) = match re.find_at(text, it.last_end) {
337             None => return false,
338             Some(m) => (m.start(), m.end()),
339         };
340         if s == e {
341             // This is an empty match. To ensure we make progress, start
342             // the next search at the smallest possible starting position
343             // of the next match following this one.
344             it.last_end += 1;
345             // Don't accept empty matches immediately following a match.
346             // Just move on to the next match.
347             if Some(e) == it.last_match {
348                 return rure_iter_next(it, haystack, len, match_info);
349             }
350         } else {
351             it.last_end = e;
352         }
353         it.last_match = Some(e);
354         if !match_info.is_null() {
355             unsafe {
356                 (*match_info).start = s;
357                 (*match_info).end = e;
358             }
359         }
360         true
361     }
362 }
363 
364 ffi_fn! {
365     fn rure_iter_next_captures(
366         it: *mut Iter,
367         haystack: *const u8,
368         len: size_t,
369         captures: *mut Captures,
370     ) -> bool {
371         let it = unsafe { &mut *it };
372         let re = unsafe { &*it.re };
373         let slots = unsafe { &mut (*captures).0 };
374         let text = unsafe { slice::from_raw_parts(haystack, len) };
375         if it.last_end > text.len() {
376             return false;
377         }
378         let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
379             None => return false,
380             Some(m) => (m.start(), m.end()),
381         };
382         if s == e {
383             // This is an empty match. To ensure we make progress, start
384             // the next search at the smallest possible starting position
385             // of the next match following this one.
386             it.last_end += 1;
387             // Don't accept empty matches immediately following a match.
388             // Just move on to the next match.
389             if Some(e) == it.last_match {
390                 return rure_iter_next_captures(it, haystack, len, captures);
391             }
392         } else {
393             it.last_end = e;
394         }
395         it.last_match = Some(e);
396         true
397     }
398 }
399 
400 ffi_fn! {
401     fn rure_captures_new(re: *const Regex) -> *mut Captures {
402         let re = unsafe { &*re };
403         let captures = Captures(re.locations());
404         Box::into_raw(Box::new(captures))
405     }
406 }
407 
408 ffi_fn! {
409     fn rure_captures_free(captures: *const Captures) {
410         unsafe { drop(Box::from_raw(captures as *mut Captures)); }
411     }
412 }
413 
414 ffi_fn! {
415     fn rure_captures_at(
416         captures: *const Captures,
417         i: size_t,
418         match_info: *mut rure_match,
419     ) -> bool {
420         let locs = unsafe { &(*captures).0 };
421         match locs.pos(i) {
422             Some((start, end)) => {
423                 if !match_info.is_null() {
424                     unsafe {
425                         (*match_info).start = start;
426                         (*match_info).end = end;
427                     }
428                 }
429                 true
430             }
431             _ => false
432         }
433     }
434 }
435 
436 ffi_fn! {
437     fn rure_captures_len(captures: *const Captures) -> size_t {
438         unsafe { (*captures).0.len() }
439     }
440 }
441 
442 ffi_fn! {
443     fn rure_options_new() -> *mut Options {
444         Box::into_raw(Box::new(Options::default()))
445     }
446 }
447 
448 ffi_fn! {
449     fn rure_options_free(options: *mut Options) {
450         unsafe { drop(Box::from_raw(options)); }
451     }
452 }
453 
454 ffi_fn! {
455     fn rure_options_size_limit(options: *mut Options, limit: size_t) {
456         let options = unsafe { &mut *options };
457         options.size_limit = limit;
458     }
459 }
460 
461 ffi_fn! {
462     fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) {
463         let options = unsafe { &mut *options };
464         options.dfa_size_limit = limit;
465     }
466 }
467 
468 ffi_fn! {
469     fn rure_compile_set(
470         patterns: *const *const u8,
471         patterns_lengths: *const size_t,
472         patterns_count: size_t,
473         flags: u32,
474         options: *const Options,
475         error: *mut Error
476     ) -> *const RegexSet {
477         let (raw_pats, raw_patsl) = unsafe {
478             (
479                 slice::from_raw_parts(patterns, patterns_count),
480                 slice::from_raw_parts(patterns_lengths, patterns_count)
481             )
482         };
483 
484         let mut pats = Vec::with_capacity(patterns_count);
485         for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) {
486             let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) };
487             pats.push(match str::from_utf8(pat) {
488                 Ok(pat) => pat,
489                 Err(err) => {
490                     unsafe {
491                         if !error.is_null() {
492                             *error = Error::new(ErrorKind::Str(err));
493                         }
494                         return ptr::null();
495                     }
496                 }
497             });
498         }
499 
500         let mut builder = bytes::RegexSetBuilder::new(pats);
501         if !options.is_null() {
502             let options = unsafe { &*options };
503             builder.size_limit(options.size_limit);
504             builder.dfa_size_limit(options.dfa_size_limit);
505         }
506         builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
507         builder.multi_line(flags & RURE_FLAG_MULTI > 0);
508         builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
509         builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
510         builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
511         builder.unicode(flags & RURE_FLAG_UNICODE > 0);
512         match builder.build() {
513             Ok(re) => {
514                 Box::into_raw(Box::new(RegexSet { re: re }))
515             }
516             Err(err) => {
517                 unsafe {
518                     if !error.is_null() {
519                         *error = Error::new(ErrorKind::Regex(err))
520                     }
521                     ptr::null()
522                 }
523             }
524         }
525     }
526 }
527 
528 ffi_fn! {
529     fn rure_set_free(re: *const RegexSet) {
530         unsafe { drop(Box::from_raw(re as *mut RegexSet)); }
531     }
532 }
533 
534 ffi_fn! {
535     fn rure_set_is_match(
536         re: *const RegexSet,
537         haystack: *const u8,
538         len: size_t,
539         start: size_t
540     ) -> bool {
541         let re = unsafe { &*re };
542         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
543         re.is_match_at(haystack, start)
544     }
545 }
546 
547 ffi_fn! {
548     fn rure_set_matches(
549         re: *const RegexSet,
550         haystack: *const u8,
551         len: size_t,
552         start: size_t,
553         matches: *mut bool
554     ) -> bool {
555         let re = unsafe { &*re };
556         let mut matches = unsafe {
557             slice::from_raw_parts_mut(matches, re.len())
558         };
559         let haystack = unsafe { slice::from_raw_parts(haystack, len) };
560 
561         // read_matches_at isn't guaranteed to set non-matches to false
562         for item in matches.iter_mut() {
563             *item = false;
564         }
565         re.read_matches_at(&mut matches, haystack, start)
566     }
567 }
568 
569 ffi_fn! {
570     fn rure_set_len(re: *const RegexSet) -> size_t {
571         unsafe { (*re).len() }
572     }
573 }
574 
575 ffi_fn! {
576     fn rure_escape_must(pattern: *const c_char) -> *const c_char {
577         let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
578         let pat = pattern as *const u8;
579         let mut err = Error::new(ErrorKind::None);
580         let esc = rure_escape(pat, len, &mut err);
581         if err.is_err() {
582             let _ = writeln!(&mut io::stderr(), "{}", err);
583             let _ = writeln!(
584                 &mut io::stderr(), "aborting from rure_escape_must");
585             unsafe { abort() }
586         }
587         esc
588     }
589 }
590 
591 /// A helper function that implements fallible escaping in a way that returns
592 /// an error if escaping failed.
593 ///
594 /// This should ideally be exposed, but it needs API design work. In
595 /// particular, this should not return a C string, but a `const uint8_t *`
596 /// instead, since it may contain a NUL byte.
rure_escape( pattern: *const u8, length: size_t, error: *mut Error, ) -> *const c_char597 fn rure_escape(
598     pattern: *const u8,
599     length: size_t,
600     error: *mut Error,
601 ) -> *const c_char {
602     let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
603     let str_pat = match str::from_utf8(pat) {
604         Ok(val) => val,
605         Err(err) => unsafe {
606             if !error.is_null() {
607                 *error = Error::new(ErrorKind::Str(err));
608             }
609             return ptr::null();
610         },
611     };
612     let esc_pat = regex::escape(str_pat);
613     let c_esc_pat = match CString::new(esc_pat) {
614         Ok(val) => val,
615         Err(err) => unsafe {
616             if !error.is_null() {
617                 *error = Error::new(ErrorKind::Nul(err));
618             }
619             return ptr::null();
620         },
621     };
622     c_esc_pat.into_raw() as *const c_char
623 }
624 
625 ffi_fn! {
626     fn rure_cstring_free(s: *mut c_char) {
627         unsafe { drop(CString::from_raw(s)); }
628     }
629 }
630