regex.rs - OpenGrok cross reference for /external/rust/crates/regex-automata/src/regex.rs

Lines Matching +full:to +full:- +full:regex
17 /// match. Thus, in order to find the bounds of any given match, a forward
21 /// The type of the DFA used by a `Regex` corresponds to the `D` type
27 /// By default, a regex's DFA type parameter is set to
28 /// `DenseDFA<Vec<usize>, usize>`. For most in-memory work loads, this is the
33 /// Since a `Regex` is generic over the `DFA` trait, it can be used with any
35 /// enough to build corresponding sparse DFAs, and then build a regex from
39 /// use regex_automata::Regex;
41 /// # fn example() -> Result<(), regex_automata::Error> {
42 /// // First, build a regex that uses dense DFAs.
43 /// let dense_re = Regex::new("foo[0-9]+")?;
49 /// // Third, build a new regex from the constituent sparse DFAs.
50 /// let sparse_re = Regex::from_dfas(fwd, rev);
52 /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
58 pub struct Regex<D: DFA = DenseDFA<Vec<usize>, usize>> {  struct
69 /// match. Thus, in order to find the bounds of any given match, a forward
73 /// The type of the DFA used by a `Regex` corresponds to the `D` type  argument
79 /// When using this crate without the standard library, the `Regex` type has
84 /// Since a `Regex` is generic over the `DFA` trait, it can be used with any
86 /// enough to build corresponding sparse DFAs, and then build a regex from
90 /// use regex_automata::Regex;
92 /// # fn example() -> Result<(), regex_automata::Error> {
93 /// // First, build a regex that uses dense DFAs.
94 /// let dense_re = Regex::new("foo[0-9]+")?;
100 /// // Third, build a new regex from the constituent sparse DFAs.
101 /// let sparse_re = Regex::from_dfas(fwd, rev);
103 /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
109 pub struct Regex<D> {  struct
115 impl Regex {  argument
117     /// return the corresponding regex.
123     /// If you want a non-default configuration, then use the
125     /// to set your own configuration.
130     /// use regex_automata::Regex;
132     /// # fn example() -> Result<(), regex_automata::Error> {
133     /// let re = Regex::new("foo[0-9]+bar")?;
137     pub fn new(pattern: &str) -> Result<Regex> {  in new()  argument
143 impl Regex<SparseDFA<Vec<u8>, usize>> {  implementation
145     /// return the corresponding regex using sparse DFAs.
151     /// If you want a non-default configuration, then use the
153     /// to set your own configuration.
158     /// use regex_automata::Regex;
160     /// # fn example() -> Result<(), regex_automata::Error> {
161     /// let re = Regex::new_sparse("foo[0-9]+bar")?;
167     ) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> {  in new_sparse()
172 impl<D: DFA> Regex<D> {  impl
176     /// will never lead to a different result. In particular, if the underlying
183     /// use regex_automata::Regex;
185     /// # fn example() -> Result<(), regex_automata::Error> {
186     /// let re = Regex::new("foo[0-9]+bar")?;
191     pub fn is_match(&self, input: &[u8]) -> bool {  in is_match()
205     /// use regex_automata::Regex;
207     /// # fn example() -> Result<(), regex_automata::Error> {
208     /// let re = Regex::new("foo[0-9]+")?;
213     /// let re = Regex::new("abc|a")?;
217     pub fn shortest_match(&self, input: &[u8]) -> Option<usize> {  in shortest_match()
224     /// The "leftmost first" match corresponds to the match with the smallest
231     /// regular expressions tend to work. This is in contrast to POSIX-style
239     /// use regex_automata::Regex;
241     /// # fn example() -> Result<(), regex_automata::Error> {
242     /// let re = Regex::new("foo[0-9]+")?;
248     /// let re = Regex::new("abc|a")?;
252     pub fn find(&self, input: &[u8]) -> Option<(usize, usize)> {  in find()
262     pub fn is_match_at(&self, input: &[u8], start: usize) -> bool {  in is_match_at()
276     ) -> Option<usize> {  in shortest_match_at()
290     ) -> Option<(usize, usize)> {  in find_at()
303     /// Returns an iterator over all non-overlapping leftmost first matches
307     /// Note that if the regex can match the empty string, then it is
308     /// possible for the iterator to yield a zero-width match at a location
309     /// that is not a valid UTF-8 boundary (for example, between the code units
310     /// of a UTF-8 encoded codepoint). This can happen regardless of whether
317     /// use regex_automata::Regex;
319     /// # fn example() -> Result<(), regex_automata::Error> {
320     /// let re = Regex::new("foo[0-9]+")?;
326     pub fn find_iter<'r, 't>(&'r self, input: &'t [u8]) -> Matches<'r, 't, D> {  in find_iter()
330     /// Build a new regex from its constituent forward and reverse DFAs.
332     /// This is useful when deserializing a regex from some arbitrary
340     /// it later to build a regex.
343     /// use regex_automata::Regex;
345     /// # fn example() -> Result<(), regex_automata::Error> {
346     /// let initial_re = Regex::new("foo[0-9]+")?;
350     /// let re = Regex::from_dfas(fwd, rev);
356     /// smaller DFAs to build a new regex.
359     /// use regex_automata::Regex;
361     /// # fn example() -> Result<(), regex_automata::Error> {
362     /// let initial_re = Regex::new("foo[0-9]+")?;
367     /// let re = Regex::from_dfas(fwd, rev);
372     /// This example shows how to build a `Regex` that uses sparse DFAs instead
376     /// use regex_automata::Regex;
378     /// # fn example() -> Result<(), regex_automata::Error> {
379     /// let initial_re = Regex::new("foo[0-9]+")?;
384     /// let re = Regex::from_dfas(fwd, rev);
388     pub fn from_dfas(forward: D, reverse: D) -> Regex<D> {  in from_dfas()
389         Regex { forward, reverse }  in from_dfas()
393     pub fn forward(&self) -> &D {  in forward()
398     pub fn reverse(&self) -> &D {  in reverse()
403 /// An iterator over all non-overlapping matches for a particular search.
409 /// `S` is the type used to represent state identifiers in the underlying
410 /// regex. The lifetime variables are as follows:
416     re: &'r Regex<D>,
423     fn new(re: &'r Regex<D>, text: &'t [u8]) -> Matches<'r, 't, D> {  in new()
431     fn next(&mut self) -> Option<(usize, usize)> {  in next()
440             // This is an empty match. To ensure we make progress, start  in next()
445             // Just move on to the next match.  in next()
457 /// A builder for a regex based on deterministic finite automatons.
466 /// start of a match. If you only need to detect whether something matched,
469 /// to construct a single DFA, which is cheaper than building two DFAs.
478     /// Create a new regex builder with the default configuration.
479     pub fn new() -> RegexBuilder {  in new()
483     /// Build a regex from the given pattern.
487     pub fn build(&self, pattern: &str) -> Result<Regex> {  in build()  argument
491     /// Build a regex from the given pattern using sparse DFAs.
498     ) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> {  in build_sparse()
502     /// Build a regex from the given pattern using a specific representation
511     /// of specifying a representation for state IDs is to reduce the memory
518     /// still return an error. To get a minimized DFA with a smaller state ID
522     /// Finally, reconstitute the regex via
523     /// [`Regex::from_dfa`](struct.Regex.html#method.from_dfa).
527     ) -> Result<Regex<DenseDFA<Vec<S>, S>>> {  in build_with_size()
536         Ok(Regex::from_dfas(forward, reverse))  in build_with_size()
539     /// Build a regex from the given pattern using a specific representation
544     ) -> Result<Regex<SparseDFA<Vec<u8>, S>>> {  in build_with_size_sparse()
548         Ok(Regex::from_dfas(fwd, rev))  in build_with_size_sparse()
554     /// disabled, the regex will act as if the pattern started with a `.*?`,
555     /// which enables a match to appear anywhere.
558     pub fn anchored(&mut self, yes: bool) -> &mut RegexBuilder {  in anchored()
567     pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {  in case_insensitive()
580     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {  in ignore_whitespace()
589     pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder {  in dot_matches_new_line()
598     pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {  in swap_greed()
609     /// default), a regular expression will fail to parse if Unicode mode is
610     /// disabled and a sub-expression could possibly match invalid UTF-8.
611     pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {  in unicode()
617     /// expression that may match invalid UTF-8.
619     /// When disabled (the default), the builder is guaranteed to produce a
620     /// regex that will only ever match valid UTF-8 (otherwise, the builder
622     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut RegexBuilder {  in allow_invalid_utf8()
630     /// to be. If the AST exceeds the given limit (e.g., with too many nested
633     /// The purpose of this limit is to act as a heuristic to prevent stack
640     /// if callers want to put a limit on the amount of heap space used, then
643     /// limit itself to heap space proportional to the lenth of the pattern
652     pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {  in nest_limit()
659     /// When enabled, the DFAs powering the resulting regex will be minimized
663     /// you're willing to pay and how much you care about its benefits. In
667     /// space and time, so it should only be done if you're willing to wait
668     /// longer to produce a DFA. In general, you might want a minimal DFA in
671     /// 1. You would like to optimize for the size of the automaton. This can
676     ///    building many DFAs and putting them on the heap, you'll be able to
683     ///    inherent difference between matching with a bigger-than-minimal
686     /// 3. You are trying to establish an equivalence between regular
687     ///    languages. The standard method for this is to build a minimal DFA
689     ///    (up to state renaming), then the languages are equivalent.
692     pub fn minimize(&mut self, yes: bool) -> &mut RegexBuilder {  in minimize()
699     /// When enabled, state identifiers are premultiplied to point to their
710     /// This has been observed to lead to a 20% performance benefit in
711     /// micro-benchmarks.
714     /// that they require a larger integer size to represent. For example,
716     /// 16 bits to represent every possible state identifier, where as its
717     /// non-premultiplied form only requires 8 bits.
720     pub fn premultiply(&mut self, yes: bool) -> &mut RegexBuilder {  in premultiply()
725     /// Shrink the size of the underlying DFA alphabet by mapping bytes to
728     /// When enabled, each DFA will use a map from all possible bytes to their
730     /// set of bytes that does not discriminate between a match and a non-match
735     /// and a non-match.
738     /// be reduced drastically from `#states * 256 * sizeof(id)` to
745     /// passed through this map before it can be used to determine the next
749     pub fn byte_classes(&mut self, yes: bool) -> &mut RegexBuilder {  in byte_classes()
754     /// Apply best effort heuristics to shrink the NFA at the expense of more
758     /// the `regex-automata-debug` tool.
760     pub fn shrink(&mut self, yes: bool) -> &mut RegexBuilder {  in shrink()
768     fn default() -> RegexBuilder {  in default()