• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! Options for building and reading from a ZeroTrie.
6 //!
7 //! These options are internal to the crate. A small selection of options
8 //! are exported by way of the different public types on this crate.
9 
10 /// Whether to use the perfect hash function in the ZeroTrie.
11 #[derive(Copy, Clone)]
12 pub(crate) enum PhfMode {
13     /// Use binary search for all branch nodes.
14     BinaryOnly,
15     /// Use the perfect hash function for large branch nodes.
16     UsePhf,
17 }
18 
19 impl PhfMode {
20     #[cfg(feature = "serde")]
to_u8_flag(self) -> u821     const fn to_u8_flag(self) -> u8 {
22         match self {
23             Self::BinaryOnly => 0,
24             Self::UsePhf => 0x1,
25         }
26     }
27 }
28 
29 /// Whether to support non-ASCII data in the ZeroTrie.
30 #[derive(Copy, Clone)]
31 pub(crate) enum AsciiMode {
32     /// Support only ASCII, returning an error if non-ASCII is found.
33     AsciiOnly,
34     /// Support all data, creating span nodes for non-ASCII bytes.
35     BinarySpans,
36 }
37 
38 impl AsciiMode {
39     #[cfg(feature = "serde")]
to_u8_flag(self) -> u840     const fn to_u8_flag(self) -> u8 {
41         match self {
42             Self::AsciiOnly => 0,
43             Self::BinarySpans => 0x2,
44         }
45     }
46 }
47 
48 /// Whether to enforce a limit to the capacity of the ZeroTrie.
49 #[derive(Copy, Clone)]
50 pub(crate) enum CapacityMode {
51     /// Return an error if the trie requires a branch of more than 2^32 bytes.
52     Normal,
53     /// Construct the trie without returning an error.
54     Extended,
55 }
56 
57 impl CapacityMode {
58     #[cfg(feature = "serde")]
to_u8_flag(self) -> u859     const fn to_u8_flag(self) -> u8 {
60         match self {
61             Self::Normal => 0,
62             Self::Extended => 0x4,
63         }
64     }
65 }
66 
67 /// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
68 #[derive(Copy, Clone)]
69 pub(crate) enum CaseSensitivity {
70     /// Allow all strings and sort them by byte value.
71     Sensitive,
72     /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
73     IgnoreCase,
74 }
75 
76 impl CaseSensitivity {
77     #[cfg(feature = "serde")]
to_u8_flag(self) -> u878     const fn to_u8_flag(self) -> u8 {
79         match self {
80             Self::Sensitive => 0,
81             Self::IgnoreCase => 0x8,
82         }
83     }
84 }
85 
86 #[derive(Copy, Clone)]
87 pub(crate) struct ZeroTrieBuilderOptions {
88     pub phf_mode: PhfMode,
89     pub ascii_mode: AsciiMode,
90     pub capacity_mode: CapacityMode,
91     pub case_sensitivity: CaseSensitivity,
92 }
93 
94 impl ZeroTrieBuilderOptions {
95     #[cfg(feature = "serde")]
to_u8_flags(self) -> u896     pub(crate) const fn to_u8_flags(self) -> u8 {
97         self.phf_mode.to_u8_flag()
98             | self.ascii_mode.to_u8_flag()
99             | self.capacity_mode.to_u8_flag()
100             | self.case_sensitivity.to_u8_flag()
101     }
102 }
103 
104 pub(crate) trait ZeroTrieWithOptions {
105     const OPTIONS: ZeroTrieBuilderOptions;
106 }
107 
108 /// All branch nodes are binary search
109 /// and there are no span nodes.
110 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
111     const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
112         phf_mode: PhfMode::BinaryOnly,
113         ascii_mode: AsciiMode::AsciiOnly,
114         capacity_mode: CapacityMode::Normal,
115         case_sensitivity: CaseSensitivity::Sensitive,
116     };
117 }
118 
119 impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
120     #[cfg(feature = "serde")]
121     pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
122 }
123 
124 /// All branch nodes are binary search
125 /// and nodes use case-insensitive matching.
126 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
127     const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
128         phf_mode: PhfMode::BinaryOnly,
129         ascii_mode: AsciiMode::AsciiOnly,
130         capacity_mode: CapacityMode::Normal,
131         case_sensitivity: CaseSensitivity::IgnoreCase,
132     };
133 }
134 
135 /// Branch nodes could be either binary search or PHF.
136 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
137     const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
138         phf_mode: PhfMode::UsePhf,
139         ascii_mode: AsciiMode::BinarySpans,
140         capacity_mode: CapacityMode::Normal,
141         case_sensitivity: CaseSensitivity::Sensitive,
142     };
143 }
144 
145 /// No limited capacity assertion.
146 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
147     const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
148         phf_mode: PhfMode::UsePhf,
149         ascii_mode: AsciiMode::BinarySpans,
150         capacity_mode: CapacityMode::Extended,
151         case_sensitivity: CaseSensitivity::Sensitive,
152     };
153 }
154