• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::bidi::BidiMirroringGlyph;
6 use crate::props::{
7     BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8     GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak,
9     Script, SentenceBreak, WordBreak,
10 };
11 use crate::script::ScriptWithExt;
12 use core::convert::TryInto;
13 use core::num::TryFromIntError;
14 use zerovec::ule::{AsULE, RawBytesULE};
15 
16 use icu_collections::codepointtrie::TrieValue;
17 
18 use core::convert::TryFrom;
19 
20 impl TrieValue for CanonicalCombiningClass {
21     type TryFromU32Error = TryFromIntError;
22 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>23     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
24         u8::try_from(i).map(Self)
25     }
26 
to_u32(self) -> u3227     fn to_u32(self) -> u32 {
28         u32::from(self.0)
29     }
30 }
31 
32 impl TrieValue for BidiClass {
33     type TryFromU32Error = TryFromIntError;
34 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>35     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
36         u8::try_from(i).map(Self)
37     }
38 
to_u32(self) -> u3239     fn to_u32(self) -> u32 {
40         u32::from(self.0)
41     }
42 }
43 
44 impl TrieValue for GeneralCategory {
45     type TryFromU32Error = &'static str;
46 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>47     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
48         // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
49         GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
50             .ok_or("Cannot parse GeneralCategory from integer")
51     }
52 
to_u32(self) -> u3253     fn to_u32(self) -> u32 {
54         u32::from(self as u8)
55     }
56 }
57 
58 impl TrieValue for Script {
59     type TryFromU32Error = TryFromIntError;
60 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>61     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
62         u16::try_from(i).map(Script)
63     }
64 
to_u32(self) -> u3265     fn to_u32(self) -> u32 {
66         u32::from(self.0)
67     }
68 }
69 
70 impl TrieValue for HangulSyllableType {
71     type TryFromU32Error = TryFromIntError;
72 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>73     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
74         u8::try_from(i).map(Self)
75     }
76 
to_u32(self) -> u3277     fn to_u32(self) -> u32 {
78         u32::from(self.0)
79     }
80 }
81 
82 impl TrieValue for ScriptWithExt {
83     type TryFromU32Error = TryFromIntError;
84 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>85     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
86         u16::try_from(i).map(Self)
87     }
88 
to_u32(self) -> u3289     fn to_u32(self) -> u32 {
90         u32::from(self.0)
91     }
92 }
93 
94 impl TrieValue for EastAsianWidth {
95     type TryFromU32Error = TryFromIntError;
96 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>97     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
98         u8::try_from(i).map(Self)
99     }
100 
to_u32(self) -> u32101     fn to_u32(self) -> u32 {
102         u32::from(self.0)
103     }
104 }
105 
106 impl TrieValue for LineBreak {
107     type TryFromU32Error = TryFromIntError;
108 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>109     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
110         u8::try_from(i).map(Self)
111     }
112 
to_u32(self) -> u32113     fn to_u32(self) -> u32 {
114         u32::from(self.0)
115     }
116 }
117 
118 impl TrieValue for GraphemeClusterBreak {
119     type TryFromU32Error = TryFromIntError;
120 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>121     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
122         u8::try_from(i).map(Self)
123     }
124 
to_u32(self) -> u32125     fn to_u32(self) -> u32 {
126         u32::from(self.0)
127     }
128 }
129 
130 impl TrieValue for WordBreak {
131     type TryFromU32Error = TryFromIntError;
132 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>133     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
134         u8::try_from(i).map(Self)
135     }
136 
to_u32(self) -> u32137     fn to_u32(self) -> u32 {
138         u32::from(self.0)
139     }
140 }
141 
142 impl TrieValue for SentenceBreak {
143     type TryFromU32Error = TryFromIntError;
144 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>145     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
146         u8::try_from(i).map(Self)
147     }
148 
to_u32(self) -> u32149     fn to_u32(self) -> u32 {
150         u32::from(self.0)
151     }
152 }
153 
154 impl TrieValue for IndicSyllabicCategory {
155     type TryFromU32Error = TryFromIntError;
156 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>157     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
158         u8::try_from(i).map(Self)
159     }
160 
to_u32(self) -> u32161     fn to_u32(self) -> u32 {
162         u32::from(self.0)
163     }
164 }
165 
166 // GCG is not used inside tries, but it is used in the name lookup type, and we want
167 // to squeeze it into a u16 for storage. Its named mask values are specced so we can
168 // do this in code.
169 //
170 // This is done by:
171 // - Single-value masks are translated to their corresponding GeneralCategory values
172 // - we know all of the multi-value masks and we give them special values
173 // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
174 //
175 // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
176 // with malformed ICU4X generated data.
177 impl AsULE for GeneralCategoryGroup {
178     type ULE = RawBytesULE<2>;
to_unaligned(self) -> Self::ULE179     fn to_unaligned(self) -> Self::ULE {
180         let value = gcg_to_packed_u16(self);
181         value.to_unaligned()
182     }
from_unaligned(ule: Self::ULE) -> Self183     fn from_unaligned(ule: Self::ULE) -> Self {
184         let value = ule.as_unsigned_int();
185         packed_u16_to_gcg(value)
186     }
187 }
188 
packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup189 fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
190     match value {
191         0xFFFF => GeneralCategoryGroup::CasedLetter,
192         0xFFFE => GeneralCategoryGroup::Letter,
193         0xFFFD => GeneralCategoryGroup::Mark,
194         0xFFFC => GeneralCategoryGroup::Number,
195         0xFFFB => GeneralCategoryGroup::Separator,
196         0xFFFA => GeneralCategoryGroup::Other,
197         0xFFF9 => GeneralCategoryGroup::Punctuation,
198         0xFFF8 => GeneralCategoryGroup::Symbol,
199         v if v < 32 => GeneralCategory::new_from_u8(v as u8)
200             .map(|gc| gc.into())
201             .unwrap_or(GeneralCategoryGroup(0)),
202         // unknown values produce an empty mask
203         _ => GeneralCategoryGroup(0),
204     }
205 }
206 
gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16207 fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
208     // if it's a single property, translate to that property
209     if gcg.0.is_power_of_two() {
210         // inverse operation of a bitshift
211         gcg.0.trailing_zeros() as u16
212     } else {
213         match gcg {
214             GeneralCategoryGroup::CasedLetter => 0xFFFF,
215             GeneralCategoryGroup::Letter => 0xFFFE,
216             GeneralCategoryGroup::Mark => 0xFFFD,
217             GeneralCategoryGroup::Number => 0xFFFC,
218             GeneralCategoryGroup::Separator => 0xFFFB,
219             GeneralCategoryGroup::Other => 0xFFFA,
220             GeneralCategoryGroup::Punctuation => 0xFFF9,
221             GeneralCategoryGroup::Symbol => 0xFFF8,
222             _ => 0xFF00, // random sentinel value
223         }
224     }
225 }
226 
227 impl TrieValue for GeneralCategoryGroup {
228     type TryFromU32Error = TryFromIntError;
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>229     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
230         // Even though we're dealing with u32s here, TrieValue is about converting
231         // trie storage types to the actual type. This type will always be a packed u16
232         // in our case since the names map upcasts from u16
233         u16::try_from(i).map(packed_u16_to_gcg)
234     }
235 
to_u32(self) -> u32236     fn to_u32(self) -> u32 {
237         u32::from(gcg_to_packed_u16(self))
238     }
239 }
240 
241 impl TrieValue for BidiMirroringGlyph {
242     type TryFromU32Error = u32;
243 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>244     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
245         let code_point = i & 0x1FFFFF;
246         let mirroring_glyph = if code_point == 0 {
247             None
248         } else {
249             Some(char::try_from_u32(code_point).map_err(|_| i)?)
250         };
251         let mirrored = ((i >> 21) & 0x1) == 1;
252         let paired_bracket_type = {
253             let value = ((i >> 22) & 0x3) as u8;
254             match value {
255                 0 => crate::bidi::BidiPairedBracketType::None,
256                 1 => crate::bidi::BidiPairedBracketType::Open,
257                 2 => crate::bidi::BidiPairedBracketType::Close,
258                 _ => return Err(i),
259             }
260         };
261         Ok(Self {
262             mirrored,
263             mirroring_glyph,
264             paired_bracket_type,
265         })
266     }
267 
to_u32(self) -> u32268     fn to_u32(self) -> u32 {
269         self.mirroring_glyph.unwrap_or_default() as u32
270             | ((self.mirrored as u32) << 21)
271             | (match self.paired_bracket_type {
272                 crate::bidi::BidiPairedBracketType::None => 0,
273                 crate::bidi::BidiPairedBracketType::Open => 1,
274                 crate::bidi::BidiPairedBracketType::Close => 2,
275             } << 22)
276     }
277 }
278 
279 impl TrieValue for JoiningType {
280     type TryFromU32Error = TryFromIntError;
281 
try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>282     fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
283         u8::try_from(i).map(Self)
284     }
285 
to_u32(self) -> u32286     fn to_u32(self) -> u32 {
287         u32::from(self.0)
288     }
289 }
290