Lines Matching +full:to +full:- +full:regex +full:- +full:range
12 /// `InstPtr` represents the index of an instruction in a regex program.
21 /// Pointers to each Match instruction in the sequence.
23 /// This is always length 1 unless this program represents a regex set.
28 /// Pointers to all named capture groups into `captures`.
30 /// A pointer to the start instruction. This can vary depending on how
39 /// When true, this program can only match valid UTF-8.
41 /// When true, this program uses byte range instructions instead of Unicode
42 /// range instructions.
51 /// Whether the regex must match from the start of the input.
53 /// Whether the regex must match at the end of the input.
59 /// A limit on the size of the cache that the DFA is allowed to use while
62 /// The cache limit specifies approximately how much space we're willing to
63 /// give to the state cache. Once the state cache exceeds the size, it is
64 /// wiped and all states must be re-computed.
66 /// Note that this value does not impact correctness. It can be set to 0
71 /// if the same regex is used to search text across multiple threads
80 pub fn new() -> Self { in new()
100 /// If pc is an index to a no-op instruction (like Save), then return the
101 /// next pc that is not a no-op instruction.
102 pub fn skip(&self, mut pc: usize) -> usize { in skip()
112 /// always lead to a match.
113 pub fn leads_to_match(&self, pc: usize) -> bool { in leads_to_match()
115 // If we have a regex set, then we have more than one ending in leads_to_match()
116 // state, so leading to one of those states is generally in leads_to_match()
127 /// `.*?` be prepended to the instruction sequence.
128 pub fn needs_dotstar(&self) -> bool { in needs_dotstar()
133 /// Char/Range instructions.
134 pub fn uses_bytes(&self) -> bool { in uses_bytes()
138 /// Returns true if this program exclusively matches valid UTF-8 bytes.
140 /// That is, if an invalid UTF-8 byte is seen, then no match is possible.
141 pub fn only_utf8(&self) -> bool { in only_utf8()
147 pub fn approximate_size(&self) -> usize { in approximate_size()
149 // Unicode codepoint programs) to store non-overlapping codepoint in approximate_size()
150 // ranges. To keep this operation constant time, we ignore them. in approximate_size()
164 #[cfg_attr(feature = "perf-inline", inline(always))]
165 fn deref(&self) -> &Self::Target { in deref()
171 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { in fmt()
174 fn with_goto(cur: usize, goto: usize, fmtd: String) -> String { in fmt()
182 fn visible_byte(b: u8) -> String { in fmt()
214 .map(|r| format!("{:?}-{:?}", r.0, r.1)) in fmt()
245 fn into_iter(self) -> Self::IntoIter { in into_iter()
250 /// Inst is an instruction code in a Regex program.
252 /// Regrettably, a regex program either contains Unicode codepoint
254 /// A regex program can never contain both.
257 /// then figuring out how to make the matching engines polymorphic over those
262 /// instructions from the `Inst` enum, then its size shrinks from 32 bytes to
265 /// their Unicode analogues (because they can decode UTF-8 directly), this ends
271 /// The number in the match corresponds to the Nth logical regular
272 /// expression in this program. This index is always 0 for normal regex
273 /// programs. Values greater than 0 appear when compiling regex sets, and
275 /// to the Nth regex in the set.
277 /// Save causes the program to save the current location of the input in
280 /// Split causes the program to diverge to one of two paths in the
283 /// EmptyLook represents a zero-width assertion in a regex program. A
284 /// zero-width assertion does not consume any of the input text.
286 /// Char requires the regex program to match the character in InstChar at
289 /// Ranges requires the regex program to match the character at the current
292 /// Bytes is like Ranges, except it expresses a single byte range. It is
293 /// used in conjunction with Split instructions to implement multi-byte
300 pub fn is_match(&self) -> bool { in is_match()
311 /// The next location to execute in the program.
313 /// The capture slot (there are two slots for every capture in a regex,
321 /// The first instruction to try. A match resulting from following goto1
324 /// The second instruction to try. A match resulting from following goto1
332 /// The next location to execute in the program if this instruction
335 /// The type of zero-width assertion to check.
339 /// The set of zero-width match instructions.
350 /// Word character on one side and non-word character on other.
352 /// Word character on both sides or non-word character on both sides.
363 /// The next location to execute in the program if this instruction
366 /// The character to test.
373 /// The next location to execute in the program if this instruction
376 /// The set of Unicode scalar value ranges to test.
382 pub fn matches(&self, c: Char) -> bool { in matches()
409 pub fn num_chars(&self) -> usize { in num_chars()
412 .map(|&(s, e)| 1 + (e as u32) - (s as u32)) in num_chars()
420 /// The next location to execute in the program if this instruction
423 /// The start (inclusive) of this byte range.
425 /// The end (inclusive) of this byte range.
430 /// Returns true if and only if the given byte is in this range.
431 pub fn matches(&self, byte: u8) -> bool { in matches()