prog.rs - OpenGrok cross reference for /third_party/rust/crates/regex/src/prog.rs

Lines Matching +full:to +full:- +full:regex +full:- +full:range
12 /// `InstPtr` represents the index of an instruction in a regex program.
21     /// Pointers to each Match instruction in the sequence.
23     /// This is always length 1 unless this program represents a regex set.
28     /// Pointers to all named capture groups into `captures`.
30     /// A pointer to the start instruction. This can vary depending on how
39     /// When true, this program can only match valid UTF-8.
41     /// When true, this program uses byte range instructions instead of Unicode
42     /// range instructions.
51     /// Whether the regex must match from the start of the input.
53     /// Whether the regex must match at the end of the input.
59     /// A limit on the size of the cache that the DFA is allowed to use while
62     /// The cache limit specifies approximately how much space we're willing to
63     /// give to the state cache. Once the state cache exceeds the size, it is
64     /// wiped and all states must be re-computed.
66     /// Note that this value does not impact correctness. It can be set to 0
71     /// if the same regex is used to search text across multiple threads
80     pub fn new() -> Self {  in new()
100     /// If pc is an index to a no-op instruction (like Save), then return the
101     /// next pc that is not a no-op instruction.
102     pub fn skip(&self, mut pc: usize) -> usize {  in skip()
112     /// always lead to a match.
113     pub fn leads_to_match(&self, pc: usize) -> bool {  in leads_to_match()
115             // If we have a regex set, then we have more than one ending  in leads_to_match()
116             // state, so leading to one of those states is generally  in leads_to_match()
127     /// `.*?` be prepended to the instruction sequence.
128     pub fn needs_dotstar(&self) -> bool {  in needs_dotstar()
133     /// Char/Range instructions.
134     pub fn uses_bytes(&self) -> bool {  in uses_bytes()
138     /// Returns true if this program exclusively matches valid UTF-8 bytes.
140     /// That is, if an invalid UTF-8 byte is seen, then no match is possible.
141     pub fn only_utf8(&self) -> bool {  in only_utf8()
147     pub fn approximate_size(&self) -> usize {  in approximate_size()
149         // Unicode codepoint programs) to store non-overlapping codepoint  in approximate_size()
150         // ranges. To keep this operation constant time, we ignore them.  in approximate_size()
164     #[cfg_attr(feature = "perf-inline", inline(always))]
165     fn deref(&self) -> &Self::Target {  in deref()
171     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {  in fmt()
174         fn with_goto(cur: usize, goto: usize, fmtd: String) -> String {  in fmt()
182         fn visible_byte(b: u8) -> String {  in fmt()
214                         .map(|r| format!("{:?}-{:?}", r.0, r.1))  in fmt()
245     fn into_iter(self) -> Self::IntoIter {  in into_iter()
250 /// Inst is an instruction code in a Regex program.
252 /// Regrettably, a regex program either contains Unicode codepoint
254 /// A regex program can never contain both.
257 /// then figuring out how to make the matching engines polymorphic over those
262 /// instructions from the `Inst` enum, then its size shrinks from 32 bytes to
265 /// their Unicode analogues (because they can decode UTF-8 directly), this ends
271     /// The number in the match corresponds to the Nth logical regular
272     /// expression in this program. This index is always 0 for normal regex
273     /// programs. Values greater than 0 appear when compiling regex sets, and
275     /// to the Nth regex in the set.
277     /// Save causes the program to save the current location of the input in
280     /// Split causes the program to diverge to one of two paths in the
283     /// EmptyLook represents a zero-width assertion in a regex program. A
284     /// zero-width assertion does not consume any of the input text.
286     /// Char requires the regex program to match the character in InstChar at
289     /// Ranges requires the regex program to match the character at the current
292     /// Bytes is like Ranges, except it expresses a single byte range. It is
293     /// used in conjunction with Split instructions to implement multi-byte
300     pub fn is_match(&self) -> bool {  in is_match()
311     /// The next location to execute in the program.
313     /// The capture slot (there are two slots for every capture in a regex,
321     /// The first instruction to try. A match resulting from following goto1
324     /// The second instruction to try. A match resulting from following goto1
332     /// The next location to execute in the program if this instruction
335     /// The type of zero-width assertion to check.
339 /// The set of zero-width match instructions.
350     /// Word character on one side and non-word character on other.
352     /// Word character on both sides or non-word character on both sides.
363     /// The next location to execute in the program if this instruction
366     /// The character to test.
373     /// The next location to execute in the program if this instruction
376     /// The set of Unicode scalar value ranges to test.
382     pub fn matches(&self, c: Char) -> bool {  in matches()
409     pub fn num_chars(&self) -> usize {  in num_chars()
412             .map(|&(s, e)| 1 + (e as u32) - (s as u32))  in num_chars()
420     /// The next location to execute in the program if this instruction
423     /// The start (inclusive) of this byte range.
425     /// The end (inclusive) of this byte range.
430     /// Returns true if and only if the given byte is in this range.
431     pub fn matches(&self, byte: u8) -> bool {  in matches()