• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::fmt::Debug;
2 use std::hash::Hash;
3 
4 use crate::error::{Error, Result};
5 
6 // NOTE: Most of this code was copied from regex-automata, but without the
7 // (de)serialization specific stuff.
8 
9 /// Check that the premultiplication of the given state identifier can
10 /// fit into the representation indicated by `S`. If it cannot, or if it
11 /// overflows `usize` itself, then an error is returned.
premultiply_overflow_error<S: StateID>( last_state: S, alphabet_len: usize, ) -> Result<()>12 pub fn premultiply_overflow_error<S: StateID>(
13     last_state: S,
14     alphabet_len: usize,
15 ) -> Result<()> {
16     let requested = match last_state.to_usize().checked_mul(alphabet_len) {
17         Some(requested) => requested,
18         None => return Err(Error::premultiply_overflow(0, 0)),
19     };
20     if requested > S::max_id() {
21         return Err(Error::premultiply_overflow(S::max_id(), requested));
22     }
23     Ok(())
24 }
25 
26 /// Convert the given `usize` to the chosen state identifier
27 /// representation. If the given value cannot fit in the chosen
28 /// representation, then an error is returned.
usize_to_state_id<S: StateID>(value: usize) -> Result<S>29 pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
30     if value > S::max_id() {
31         Err(Error::state_id_overflow(S::max_id()))
32     } else {
33         Ok(S::from_usize(value))
34     }
35 }
36 
37 /// Return the unique identifier for an automaton's fail state in the chosen
38 /// representation indicated by `S`.
fail_id<S: StateID>() -> S39 pub fn fail_id<S: StateID>() -> S {
40     S::from_usize(0)
41 }
42 
43 /// Return the unique identifier for an automaton's fail state in the chosen
44 /// representation indicated by `S`.
dead_id<S: StateID>() -> S45 pub fn dead_id<S: StateID>() -> S {
46     S::from_usize(1)
47 }
48 
49 mod private {
50     /// Sealed stops crates other than aho-corasick from implementing any
51     /// traits that use it.
52     pub trait Sealed {}
53     impl Sealed for u8 {}
54     impl Sealed for u16 {}
55     impl Sealed for u32 {}
56     impl Sealed for u64 {}
57     impl Sealed for usize {}
58 }
59 
60 /// A trait describing the representation of an automaton's state identifier.
61 ///
62 /// The purpose of this trait is to safely express both the possible state
63 /// identifier representations that can be used in an automaton and to convert
64 /// between state identifier representations and types that can be used to
65 /// efficiently index memory (such as `usize`).
66 ///
67 /// In general, one should not need to implement this trait explicitly. Indeed,
68 /// for now, this trait is sealed such that it cannot be implemented by any
69 /// other type. In particular, this crate provides implementations for `u8`,
70 /// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for
71 /// targets that can represent all corresponding values in a `usize`.)
72 pub trait StateID:
73     private::Sealed
74     + Clone
75     + Copy
76     + Debug
77     + Eq
78     + Hash
79     + PartialEq
80     + PartialOrd
81     + Ord
82 {
83     /// Convert from a `usize` to this implementation's representation.
84     ///
85     /// Implementors may assume that `n <= Self::max_id`. That is, implementors
86     /// do not need to check whether `n` can fit inside this implementation's
87     /// representation.
from_usize(n: usize) -> Self88     fn from_usize(n: usize) -> Self;
89 
90     /// Convert this implementation's representation to a `usize`.
91     ///
92     /// Implementors must not return a `usize` value greater than
93     /// `Self::max_id` and must not permit overflow when converting between the
94     /// implementor's representation and `usize`. In general, the preferred
95     /// way for implementors to achieve this is to simply not provide
96     /// implementations of `StateID` that cannot fit into the target platform's
97     /// `usize`.
to_usize(self) -> usize98     fn to_usize(self) -> usize;
99 
100     /// Return the maximum state identifier supported by this representation.
101     ///
102     /// Implementors must return a correct bound. Doing otherwise may result
103     /// in unspecified behavior (but will not violate memory safety).
max_id() -> usize104     fn max_id() -> usize;
105 }
106 
107 impl StateID for usize {
108     #[inline]
from_usize(n: usize) -> usize109     fn from_usize(n: usize) -> usize {
110         n
111     }
112 
113     #[inline]
to_usize(self) -> usize114     fn to_usize(self) -> usize {
115         self
116     }
117 
118     #[inline]
max_id() -> usize119     fn max_id() -> usize {
120         ::std::usize::MAX
121     }
122 }
123 
124 impl StateID for u8 {
125     #[inline]
from_usize(n: usize) -> u8126     fn from_usize(n: usize) -> u8 {
127         n as u8
128     }
129 
130     #[inline]
to_usize(self) -> usize131     fn to_usize(self) -> usize {
132         self as usize
133     }
134 
135     #[inline]
max_id() -> usize136     fn max_id() -> usize {
137         ::std::u8::MAX as usize
138     }
139 }
140 
141 impl StateID for u16 {
142     #[inline]
from_usize(n: usize) -> u16143     fn from_usize(n: usize) -> u16 {
144         n as u16
145     }
146 
147     #[inline]
to_usize(self) -> usize148     fn to_usize(self) -> usize {
149         self as usize
150     }
151 
152     #[inline]
max_id() -> usize153     fn max_id() -> usize {
154         ::std::u16::MAX as usize
155     }
156 }
157 
158 #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
159 impl StateID for u32 {
160     #[inline]
from_usize(n: usize) -> u32161     fn from_usize(n: usize) -> u32 {
162         n as u32
163     }
164 
165     #[inline]
to_usize(self) -> usize166     fn to_usize(self) -> usize {
167         self as usize
168     }
169 
170     #[inline]
max_id() -> usize171     fn max_id() -> usize {
172         ::std::u32::MAX as usize
173     }
174 }
175 
176 #[cfg(target_pointer_width = "64")]
177 impl StateID for u64 {
178     #[inline]
from_usize(n: usize) -> u64179     fn from_usize(n: usize) -> u64 {
180         n as u64
181     }
182 
183     #[inline]
to_usize(self) -> usize184     fn to_usize(self) -> usize {
185         self as usize
186     }
187 
188     #[inline]
max_id() -> usize189     fn max_id() -> usize {
190         ::std::u64::MAX as usize
191     }
192 }
193