1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use crate::bidi::BidiMirroringGlyph;
6 use crate::props::{
7 BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8 GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak,
9 Script, SentenceBreak, WordBreak,
10 };
11 use crate::script::ScriptWithExt;
12 use core::convert::TryInto;
13 use core::num::TryFromIntError;
14 use zerovec::ule::{AsULE, RawBytesULE};
15
16 use icu_collections::codepointtrie::TrieValue;
17
18 use core::convert::TryFrom;
19
20 impl TrieValue for CanonicalCombiningClass {
21 type TryFromU32Error = TryFromIntError;
22
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>23 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
24 u8::try_from(i).map(Self)
25 }
26
to_u32(self) -> u3227 fn to_u32(self) -> u32 {
28 u32::from(self.0)
29 }
30 }
31
32 impl TrieValue for BidiClass {
33 type TryFromU32Error = TryFromIntError;
34
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>35 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
36 u8::try_from(i).map(Self)
37 }
38
to_u32(self) -> u3239 fn to_u32(self) -> u32 {
40 u32::from(self.0)
41 }
42 }
43
44 impl TrieValue for GeneralCategory {
45 type TryFromU32Error = &'static str;
46
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>47 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
48 // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
49 GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
50 .ok_or("Cannot parse GeneralCategory from integer")
51 }
52
to_u32(self) -> u3253 fn to_u32(self) -> u32 {
54 u32::from(self as u8)
55 }
56 }
57
58 impl TrieValue for Script {
59 type TryFromU32Error = TryFromIntError;
60
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>61 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
62 u16::try_from(i).map(Script)
63 }
64
to_u32(self) -> u3265 fn to_u32(self) -> u32 {
66 u32::from(self.0)
67 }
68 }
69
70 impl TrieValue for HangulSyllableType {
71 type TryFromU32Error = TryFromIntError;
72
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>73 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
74 u8::try_from(i).map(Self)
75 }
76
to_u32(self) -> u3277 fn to_u32(self) -> u32 {
78 u32::from(self.0)
79 }
80 }
81
82 impl TrieValue for ScriptWithExt {
83 type TryFromU32Error = TryFromIntError;
84
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>85 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
86 u16::try_from(i).map(Self)
87 }
88
to_u32(self) -> u3289 fn to_u32(self) -> u32 {
90 u32::from(self.0)
91 }
92 }
93
94 impl TrieValue for EastAsianWidth {
95 type TryFromU32Error = TryFromIntError;
96
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>97 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
98 u8::try_from(i).map(Self)
99 }
100
to_u32(self) -> u32101 fn to_u32(self) -> u32 {
102 u32::from(self.0)
103 }
104 }
105
106 impl TrieValue for LineBreak {
107 type TryFromU32Error = TryFromIntError;
108
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>109 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
110 u8::try_from(i).map(Self)
111 }
112
to_u32(self) -> u32113 fn to_u32(self) -> u32 {
114 u32::from(self.0)
115 }
116 }
117
118 impl TrieValue for GraphemeClusterBreak {
119 type TryFromU32Error = TryFromIntError;
120
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>121 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
122 u8::try_from(i).map(Self)
123 }
124
to_u32(self) -> u32125 fn to_u32(self) -> u32 {
126 u32::from(self.0)
127 }
128 }
129
130 impl TrieValue for WordBreak {
131 type TryFromU32Error = TryFromIntError;
132
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>133 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
134 u8::try_from(i).map(Self)
135 }
136
to_u32(self) -> u32137 fn to_u32(self) -> u32 {
138 u32::from(self.0)
139 }
140 }
141
142 impl TrieValue for SentenceBreak {
143 type TryFromU32Error = TryFromIntError;
144
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>145 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
146 u8::try_from(i).map(Self)
147 }
148
to_u32(self) -> u32149 fn to_u32(self) -> u32 {
150 u32::from(self.0)
151 }
152 }
153
154 impl TrieValue for IndicSyllabicCategory {
155 type TryFromU32Error = TryFromIntError;
156
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>157 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
158 u8::try_from(i).map(Self)
159 }
160
to_u32(self) -> u32161 fn to_u32(self) -> u32 {
162 u32::from(self.0)
163 }
164 }
165
166 // GCG is not used inside tries, but it is used in the name lookup type, and we want
167 // to squeeze it into a u16 for storage. Its named mask values are specced so we can
168 // do this in code.
169 //
170 // This is done by:
171 // - Single-value masks are translated to their corresponding GeneralCategory values
172 // - we know all of the multi-value masks and we give them special values
173 // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
174 //
175 // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
176 // with malformed ICU4X generated data.
177 impl AsULE for GeneralCategoryGroup {
178 type ULE = RawBytesULE<2>;
to_unaligned(self) -> Self::ULE179 fn to_unaligned(self) -> Self::ULE {
180 let value = gcg_to_packed_u16(self);
181 value.to_unaligned()
182 }
from_unaligned(ule: Self::ULE) -> Self183 fn from_unaligned(ule: Self::ULE) -> Self {
184 let value = ule.as_unsigned_int();
185 packed_u16_to_gcg(value)
186 }
187 }
188
packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup189 fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
190 match value {
191 0xFFFF => GeneralCategoryGroup::CasedLetter,
192 0xFFFE => GeneralCategoryGroup::Letter,
193 0xFFFD => GeneralCategoryGroup::Mark,
194 0xFFFC => GeneralCategoryGroup::Number,
195 0xFFFB => GeneralCategoryGroup::Separator,
196 0xFFFA => GeneralCategoryGroup::Other,
197 0xFFF9 => GeneralCategoryGroup::Punctuation,
198 0xFFF8 => GeneralCategoryGroup::Symbol,
199 v if v < 32 => GeneralCategory::new_from_u8(v as u8)
200 .map(|gc| gc.into())
201 .unwrap_or(GeneralCategoryGroup(0)),
202 // unknown values produce an empty mask
203 _ => GeneralCategoryGroup(0),
204 }
205 }
206
gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16207 fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
208 // if it's a single property, translate to that property
209 if gcg.0.is_power_of_two() {
210 // inverse operation of a bitshift
211 gcg.0.trailing_zeros() as u16
212 } else {
213 match gcg {
214 GeneralCategoryGroup::CasedLetter => 0xFFFF,
215 GeneralCategoryGroup::Letter => 0xFFFE,
216 GeneralCategoryGroup::Mark => 0xFFFD,
217 GeneralCategoryGroup::Number => 0xFFFC,
218 GeneralCategoryGroup::Separator => 0xFFFB,
219 GeneralCategoryGroup::Other => 0xFFFA,
220 GeneralCategoryGroup::Punctuation => 0xFFF9,
221 GeneralCategoryGroup::Symbol => 0xFFF8,
222 _ => 0xFF00, // random sentinel value
223 }
224 }
225 }
226
227 impl TrieValue for GeneralCategoryGroup {
228 type TryFromU32Error = TryFromIntError;
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>229 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
230 // Even though we're dealing with u32s here, TrieValue is about converting
231 // trie storage types to the actual type. This type will always be a packed u16
232 // in our case since the names map upcasts from u16
233 u16::try_from(i).map(packed_u16_to_gcg)
234 }
235
to_u32(self) -> u32236 fn to_u32(self) -> u32 {
237 u32::from(gcg_to_packed_u16(self))
238 }
239 }
240
241 impl TrieValue for BidiMirroringGlyph {
242 type TryFromU32Error = u32;
243
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>244 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
245 let code_point = i & 0x1FFFFF;
246 let mirroring_glyph = if code_point == 0 {
247 None
248 } else {
249 Some(char::try_from_u32(code_point).map_err(|_| i)?)
250 };
251 let mirrored = ((i >> 21) & 0x1) == 1;
252 let paired_bracket_type = {
253 let value = ((i >> 22) & 0x3) as u8;
254 match value {
255 0 => crate::bidi::BidiPairedBracketType::None,
256 1 => crate::bidi::BidiPairedBracketType::Open,
257 2 => crate::bidi::BidiPairedBracketType::Close,
258 _ => return Err(i),
259 }
260 };
261 Ok(Self {
262 mirrored,
263 mirroring_glyph,
264 paired_bracket_type,
265 })
266 }
267
to_u32(self) -> u32268 fn to_u32(self) -> u32 {
269 self.mirroring_glyph.unwrap_or_default() as u32
270 | ((self.mirrored as u32) << 21)
271 | (match self.paired_bracket_type {
272 crate::bidi::BidiPairedBracketType::None => 0,
273 crate::bidi::BidiPairedBracketType::Open => 1,
274 crate::bidi::BidiPairedBracketType::Close => 2,
275 } << 22)
276 }
277 }
278
279 impl TrieValue for JoiningType {
280 type TryFromU32Error = TryFromIntError;
281
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>282 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
283 u8::try_from(i).map(Self)
284 }
285
to_u32(self) -> u32286 fn to_u32(self) -> u32 {
287 u32::from(self.0)
288 }
289 }
290