• Home
  • Raw
  • Download

Lines Matching +full:to +full:- +full:regex

17 /// match. Thus, in order to find the bounds of any given match, a forward
21 /// The type of the DFA used by a `Regex` corresponds to the `D` type
27 /// By default, a regex's DFA type parameter is set to
28 /// `DenseDFA<Vec<usize>, usize>`. For most in-memory work loads, this is the
33 /// Since a `Regex` is generic over the `DFA` trait, it can be used with any
35 /// enough to build corresponding sparse DFAs, and then build a regex from
39 /// use regex_automata::Regex;
41 /// # fn example() -> Result<(), regex_automata::Error> {
42 /// // First, build a regex that uses dense DFAs.
43 /// let dense_re = Regex::new("foo[0-9]+")?;
49 /// // Third, build a new regex from the constituent sparse DFAs.
50 /// let sparse_re = Regex::from_dfas(fwd, rev);
52 /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
58 pub struct Regex<D: DFA = DenseDFA<Vec<usize>, usize>> { struct
69 /// match. Thus, in order to find the bounds of any given match, a forward
73 /// The type of the DFA used by a `Regex` corresponds to the `D` type argument
79 /// When using this crate without the standard library, the `Regex` type has
84 /// Since a `Regex` is generic over the `DFA` trait, it can be used with any
86 /// enough to build corresponding sparse DFAs, and then build a regex from
90 /// use regex_automata::Regex;
92 /// # fn example() -> Result<(), regex_automata::Error> {
93 /// // First, build a regex that uses dense DFAs.
94 /// let dense_re = Regex::new("foo[0-9]+")?;
100 /// // Third, build a new regex from the constituent sparse DFAs.
101 /// let sparse_re = Regex::from_dfas(fwd, rev);
103 /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
109 pub struct Regex<D> { struct
115 impl Regex { argument
117 /// return the corresponding regex.
123 /// If you want a non-default configuration, then use the
125 /// to set your own configuration.
130 /// use regex_automata::Regex;
132 /// # fn example() -> Result<(), regex_automata::Error> {
133 /// let re = Regex::new("foo[0-9]+bar")?;
137 pub fn new(pattern: &str) -> Result<Regex> { in new() argument
143 impl Regex<SparseDFA<Vec<u8>, usize>> { implementation
145 /// return the corresponding regex using sparse DFAs.
151 /// If you want a non-default configuration, then use the
153 /// to set your own configuration.
158 /// use regex_automata::Regex;
160 /// # fn example() -> Result<(), regex_automata::Error> {
161 /// let re = Regex::new_sparse("foo[0-9]+bar")?;
167 ) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> { in new_sparse()
172 impl<D: DFA> Regex<D> { impl
176 /// will never lead to a different result. In particular, if the underlying
183 /// use regex_automata::Regex;
185 /// # fn example() -> Result<(), regex_automata::Error> {
186 /// let re = Regex::new("foo[0-9]+bar")?;
191 pub fn is_match(&self, input: &[u8]) -> bool { in is_match()
205 /// use regex_automata::Regex;
207 /// # fn example() -> Result<(), regex_automata::Error> {
208 /// let re = Regex::new("foo[0-9]+")?;
213 /// let re = Regex::new("abc|a")?;
217 pub fn shortest_match(&self, input: &[u8]) -> Option<usize> { in shortest_match()
224 /// The "leftmost first" match corresponds to the match with the smallest
231 /// regular expressions tend to work. This is in contrast to POSIX-style
239 /// use regex_automata::Regex;
241 /// # fn example() -> Result<(), regex_automata::Error> {
242 /// let re = Regex::new("foo[0-9]+")?;
248 /// let re = Regex::new("abc|a")?;
252 pub fn find(&self, input: &[u8]) -> Option<(usize, usize)> { in find()
262 pub fn is_match_at(&self, input: &[u8], start: usize) -> bool { in is_match_at()
276 ) -> Option<usize> { in shortest_match_at()
290 ) -> Option<(usize, usize)> { in find_at()
303 /// Returns an iterator over all non-overlapping leftmost first matches
307 /// Note that if the regex can match the empty string, then it is
308 /// possible for the iterator to yield a zero-width match at a location
309 /// that is not a valid UTF-8 boundary (for example, between the code units
310 /// of a UTF-8 encoded codepoint). This can happen regardless of whether
317 /// use regex_automata::Regex;
319 /// # fn example() -> Result<(), regex_automata::Error> {
320 /// let re = Regex::new("foo[0-9]+")?;
326 pub fn find_iter<'r, 't>(&'r self, input: &'t [u8]) -> Matches<'r, 't, D> { in find_iter()
330 /// Build a new regex from its constituent forward and reverse DFAs.
332 /// This is useful when deserializing a regex from some arbitrary
340 /// it later to build a regex.
343 /// use regex_automata::Regex;
345 /// # fn example() -> Result<(), regex_automata::Error> {
346 /// let initial_re = Regex::new("foo[0-9]+")?;
350 /// let re = Regex::from_dfas(fwd, rev);
356 /// smaller DFAs to build a new regex.
359 /// use regex_automata::Regex;
361 /// # fn example() -> Result<(), regex_automata::Error> {
362 /// let initial_re = Regex::new("foo[0-9]+")?;
367 /// let re = Regex::from_dfas(fwd, rev);
372 /// This example shows how to build a `Regex` that uses sparse DFAs instead
376 /// use regex_automata::Regex;
378 /// # fn example() -> Result<(), regex_automata::Error> {
379 /// let initial_re = Regex::new("foo[0-9]+")?;
384 /// let re = Regex::from_dfas(fwd, rev);
388 pub fn from_dfas(forward: D, reverse: D) -> Regex<D> { in from_dfas()
389 Regex { forward, reverse } in from_dfas()
393 pub fn forward(&self) -> &D { in forward()
398 pub fn reverse(&self) -> &D { in reverse()
403 /// An iterator over all non-overlapping matches for a particular search.
409 /// `S` is the type used to represent state identifiers in the underlying
410 /// regex. The lifetime variables are as follows:
416 re: &'r Regex<D>,
423 fn new(re: &'r Regex<D>, text: &'t [u8]) -> Matches<'r, 't, D> { in new()
431 fn next(&mut self) -> Option<(usize, usize)> { in next()
440 // This is an empty match. To ensure we make progress, start in next()
445 // Just move on to the next match. in next()
457 /// A builder for a regex based on deterministic finite automatons.
466 /// start of a match. If you only need to detect whether something matched,
469 /// to construct a single DFA, which is cheaper than building two DFAs.
478 /// Create a new regex builder with the default configuration.
479 pub fn new() -> RegexBuilder { in new()
483 /// Build a regex from the given pattern.
487 pub fn build(&self, pattern: &str) -> Result<Regex> { in build() argument
491 /// Build a regex from the given pattern using sparse DFAs.
498 ) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> { in build_sparse()
502 /// Build a regex from the given pattern using a specific representation
511 /// of specifying a representation for state IDs is to reduce the memory
518 /// still return an error. To get a minimized DFA with a smaller state ID
522 /// Finally, reconstitute the regex via
523 /// [`Regex::from_dfa`](struct.Regex.html#method.from_dfa).
527 ) -> Result<Regex<DenseDFA<Vec<S>, S>>> { in build_with_size()
536 Ok(Regex::from_dfas(forward, reverse)) in build_with_size()
539 /// Build a regex from the given pattern using a specific representation
544 ) -> Result<Regex<SparseDFA<Vec<u8>, S>>> { in build_with_size_sparse()
548 Ok(Regex::from_dfas(fwd, rev)) in build_with_size_sparse()
554 /// disabled, the regex will act as if the pattern started with a `.*?`,
555 /// which enables a match to appear anywhere.
558 pub fn anchored(&mut self, yes: bool) -> &mut RegexBuilder { in anchored()
567 pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { in case_insensitive()
580 pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { in ignore_whitespace()
589 pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder { in dot_matches_new_line()
598 pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { in swap_greed()
609 /// default), a regular expression will fail to parse if Unicode mode is
610 /// disabled and a sub-expression could possibly match invalid UTF-8.
611 pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { in unicode()
617 /// expression that may match invalid UTF-8.
619 /// When disabled (the default), the builder is guaranteed to produce a
620 /// regex that will only ever match valid UTF-8 (otherwise, the builder
622 pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut RegexBuilder { in allow_invalid_utf8()
630 /// to be. If the AST exceeds the given limit (e.g., with too many nested
633 /// The purpose of this limit is to act as a heuristic to prevent stack
640 /// if callers want to put a limit on the amount of heap space used, then
643 /// limit itself to heap space proportional to the lenth of the pattern
652 pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { in nest_limit()
659 /// When enabled, the DFAs powering the resulting regex will be minimized
663 /// you're willing to pay and how much you care about its benefits. In
667 /// space and time, so it should only be done if you're willing to wait
668 /// longer to produce a DFA. In general, you might want a minimal DFA in
671 /// 1. You would like to optimize for the size of the automaton. This can
676 /// building many DFAs and putting them on the heap, you'll be able to
683 /// inherent difference between matching with a bigger-than-minimal
686 /// 3. You are trying to establish an equivalence between regular
687 /// languages. The standard method for this is to build a minimal DFA
689 /// (up to state renaming), then the languages are equivalent.
692 pub fn minimize(&mut self, yes: bool) -> &mut RegexBuilder { in minimize()
699 /// When enabled, state identifiers are premultiplied to point to their
710 /// This has been observed to lead to a 20% performance benefit in
711 /// micro-benchmarks.
714 /// that they require a larger integer size to represent. For example,
716 /// 16 bits to represent every possible state identifier, where as its
717 /// non-premultiplied form only requires 8 bits.
720 pub fn premultiply(&mut self, yes: bool) -> &mut RegexBuilder { in premultiply()
725 /// Shrink the size of the underlying DFA alphabet by mapping bytes to
728 /// When enabled, each DFA will use a map from all possible bytes to their
730 /// set of bytes that does not discriminate between a match and a non-match
735 /// and a non-match.
738 /// be reduced drastically from `#states * 256 * sizeof(id)` to
745 /// passed through this map before it can be used to determine the next
749 pub fn byte_classes(&mut self, yes: bool) -> &mut RegexBuilder { in byte_classes()
754 /// Apply best effort heuristics to shrink the NFA at the expense of more
758 /// the `regex-automata-debug` tool.
760 pub fn shrink(&mut self, yes: bool) -> &mut RegexBuilder { in shrink()
768 fn default() -> RegexBuilder { in default()