1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! Options for building and reading from a ZeroTrie. 6 //! 7 //! These options are internal to the crate. A small selection of options 8 //! are exported by way of the different public types on this crate. 9 10 /// Whether to use the perfect hash function in the ZeroTrie. 11 #[derive(Copy, Clone)] 12 pub(crate) enum PhfMode { 13 /// Use binary search for all branch nodes. 14 BinaryOnly, 15 /// Use the perfect hash function for large branch nodes. 16 UsePhf, 17 } 18 19 impl PhfMode { 20 #[cfg(feature = "serde")] to_u8_flag(self) -> u821 const fn to_u8_flag(self) -> u8 { 22 match self { 23 Self::BinaryOnly => 0, 24 Self::UsePhf => 0x1, 25 } 26 } 27 } 28 29 /// Whether to support non-ASCII data in the ZeroTrie. 30 #[derive(Copy, Clone)] 31 pub(crate) enum AsciiMode { 32 /// Support only ASCII, returning an error if non-ASCII is found. 33 AsciiOnly, 34 /// Support all data, creating span nodes for non-ASCII bytes. 35 BinarySpans, 36 } 37 38 impl AsciiMode { 39 #[cfg(feature = "serde")] to_u8_flag(self) -> u840 const fn to_u8_flag(self) -> u8 { 41 match self { 42 Self::AsciiOnly => 0, 43 Self::BinarySpans => 0x2, 44 } 45 } 46 } 47 48 /// Whether to enforce a limit to the capacity of the ZeroTrie. 49 #[derive(Copy, Clone)] 50 pub(crate) enum CapacityMode { 51 /// Return an error if the trie requires a branch of more than 2^32 bytes. 52 Normal, 53 /// Construct the trie without returning an error. 54 Extended, 55 } 56 57 impl CapacityMode { 58 #[cfg(feature = "serde")] to_u8_flag(self) -> u859 const fn to_u8_flag(self) -> u8 { 60 match self { 61 Self::Normal => 0, 62 Self::Extended => 0x4, 63 } 64 } 65 } 66 67 /// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc" 68 #[derive(Copy, Clone)] 69 pub(crate) enum CaseSensitivity { 70 /// Allow all strings and sort them by byte value. 71 Sensitive, 72 /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called. 73 IgnoreCase, 74 } 75 76 impl CaseSensitivity { 77 #[cfg(feature = "serde")] to_u8_flag(self) -> u878 const fn to_u8_flag(self) -> u8 { 79 match self { 80 Self::Sensitive => 0, 81 Self::IgnoreCase => 0x8, 82 } 83 } 84 } 85 86 #[derive(Copy, Clone)] 87 pub(crate) struct ZeroTrieBuilderOptions { 88 pub phf_mode: PhfMode, 89 pub ascii_mode: AsciiMode, 90 pub capacity_mode: CapacityMode, 91 pub case_sensitivity: CaseSensitivity, 92 } 93 94 impl ZeroTrieBuilderOptions { 95 #[cfg(feature = "serde")] to_u8_flags(self) -> u896 pub(crate) const fn to_u8_flags(self) -> u8 { 97 self.phf_mode.to_u8_flag() 98 | self.ascii_mode.to_u8_flag() 99 | self.capacity_mode.to_u8_flag() 100 | self.case_sensitivity.to_u8_flag() 101 } 102 } 103 104 pub(crate) trait ZeroTrieWithOptions { 105 const OPTIONS: ZeroTrieBuilderOptions; 106 } 107 108 /// All branch nodes are binary search 109 /// and there are no span nodes. 110 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> { 111 const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { 112 phf_mode: PhfMode::BinaryOnly, 113 ascii_mode: AsciiMode::AsciiOnly, 114 capacity_mode: CapacityMode::Normal, 115 case_sensitivity: CaseSensitivity::Sensitive, 116 }; 117 } 118 119 impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> { 120 #[cfg(feature = "serde")] 121 pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags(); 122 } 123 124 /// All branch nodes are binary search 125 /// and nodes use case-insensitive matching. 126 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> { 127 const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { 128 phf_mode: PhfMode::BinaryOnly, 129 ascii_mode: AsciiMode::AsciiOnly, 130 capacity_mode: CapacityMode::Normal, 131 case_sensitivity: CaseSensitivity::IgnoreCase, 132 }; 133 } 134 135 /// Branch nodes could be either binary search or PHF. 136 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> { 137 const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { 138 phf_mode: PhfMode::UsePhf, 139 ascii_mode: AsciiMode::BinarySpans, 140 capacity_mode: CapacityMode::Normal, 141 case_sensitivity: CaseSensitivity::Sensitive, 142 }; 143 } 144 145 /// No limited capacity assertion. 146 impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> { 147 const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { 148 phf_mode: PhfMode::UsePhf, 149 ascii_mode: AsciiMode::BinarySpans, 150 capacity_mode: CapacityMode::Extended, 151 case_sensitivity: CaseSensitivity::Sensitive, 152 }; 153 } 154