• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 pub use super::errors::ParseError;
6 use crate::extensions::unicode::{Attribute, Key, Value};
7 use crate::extensions::ExtensionType;
8 use crate::parser::SubtagIterator;
9 #[cfg(feature = "alloc")]
10 use crate::shortvec::ShortBoxSlice;
11 use crate::subtags::Subtag;
12 #[cfg(feature = "alloc")]
13 use crate::LanguageIdentifier;
14 use crate::{extensions, subtags};
15 
16 #[derive(PartialEq, Clone, Copy)]
17 pub enum ParserMode {
18     LanguageIdentifier,
19     Locale,
20     #[allow(dead_code)]
21     Partial,
22 }
23 
24 #[derive(PartialEq, Clone, Copy)]
25 enum ParserPosition {
26     Script,
27     Region,
28     Variant,
29 }
30 
31 #[cfg(feature = "alloc")]
parse_language_identifier_from_iter( iter: &mut SubtagIterator, mode: ParserMode, ) -> Result<LanguageIdentifier, ParseError>32 pub fn parse_language_identifier_from_iter(
33     iter: &mut SubtagIterator,
34     mode: ParserMode,
35 ) -> Result<LanguageIdentifier, ParseError> {
36     let mut script = None;
37     let mut region = None;
38     let mut variants = ShortBoxSlice::new();
39 
40     let language = if let Some(subtag) = iter.next() {
41         subtags::Language::try_from_utf8(subtag)?
42     } else {
43         return Err(ParseError::InvalidLanguage);
44     };
45 
46     let mut position = ParserPosition::Script;
47 
48     while let Some(subtag) = iter.peek() {
49         if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
50             break;
51         }
52 
53         if position == ParserPosition::Script {
54             if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
55                 script = Some(s);
56                 position = ParserPosition::Region;
57             } else if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
58                 region = Some(s);
59                 position = ParserPosition::Variant;
60             } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
61                 if let Err(idx) = variants.binary_search(&v) {
62                     variants.insert(idx, v);
63                 }
64                 position = ParserPosition::Variant;
65             } else if mode == ParserMode::Partial {
66                 break;
67             } else {
68                 return Err(ParseError::InvalidSubtag);
69             }
70         } else if position == ParserPosition::Region {
71             if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
72                 region = Some(s);
73                 position = ParserPosition::Variant;
74             } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
75                 if let Err(idx) = variants.binary_search(&v) {
76                     variants.insert(idx, v);
77                 }
78                 position = ParserPosition::Variant;
79             } else if mode == ParserMode::Partial {
80                 break;
81             } else {
82                 return Err(ParseError::InvalidSubtag);
83             }
84         } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
85             if let Err(idx) = variants.binary_search(&v) {
86                 variants.insert(idx, v);
87             } else {
88                 return Err(ParseError::InvalidSubtag);
89             }
90         } else if mode == ParserMode::Partial {
91             break;
92         } else {
93             return Err(ParseError::InvalidSubtag);
94         }
95         iter.next();
96     }
97 
98     Ok(LanguageIdentifier {
99         language,
100         script,
101         region,
102         variants: subtags::Variants::from_short_slice_unchecked(variants),
103     })
104 }
105 
106 #[cfg(feature = "alloc")]
parse_language_identifier( t: &[u8], mode: ParserMode, ) -> Result<LanguageIdentifier, ParseError>107 pub fn parse_language_identifier(
108     t: &[u8],
109     mode: ParserMode,
110 ) -> Result<LanguageIdentifier, ParseError> {
111     let mut iter = SubtagIterator::new(t);
112     parse_language_identifier_from_iter(&mut iter, mode)
113 }
114 
115 #[allow(clippy::type_complexity)]
parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter( mut iter: SubtagIterator, mode: ParserMode, ) -> Result< ( subtags::Language, Option<subtags::Script>, Option<subtags::Region>, Option<subtags::Variant>, Option<(extensions::unicode::Key, Option<Subtag>)>, ), ParseError, >116 pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
117     mut iter: SubtagIterator,
118     mode: ParserMode,
119 ) -> Result<
120     (
121         subtags::Language,
122         Option<subtags::Script>,
123         Option<subtags::Region>,
124         Option<subtags::Variant>,
125         Option<(extensions::unicode::Key, Option<Subtag>)>,
126     ),
127     ParseError,
128 > {
129     let language;
130     let mut script = None;
131     let mut region = None;
132     let mut variant = None;
133     let mut keyword = None;
134 
135     if let (i, Some(subtag)) = iter.next_const() {
136         iter = i;
137         match subtags::Language::try_from_utf8(subtag) {
138             Ok(l) => language = l,
139             Err(e) => return Err(e),
140         }
141     } else {
142         return Err(ParseError::InvalidLanguage);
143     }
144 
145     let mut position = ParserPosition::Script;
146 
147     while let Some(subtag) = iter.peek() {
148         if !matches!(mode, ParserMode::LanguageIdentifier) && subtag.len() == 1 {
149             break;
150         }
151 
152         if matches!(position, ParserPosition::Script) {
153             if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
154                 script = Some(s);
155                 position = ParserPosition::Region;
156             } else if let Ok(r) = subtags::Region::try_from_utf8(subtag) {
157                 region = Some(r);
158                 position = ParserPosition::Variant;
159             } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
160                 // We cannot handle multiple variants in a const context
161                 debug_assert!(variant.is_none());
162                 variant = Some(v);
163                 position = ParserPosition::Variant;
164             } else if matches!(mode, ParserMode::Partial) {
165                 break;
166             } else {
167                 return Err(ParseError::InvalidSubtag);
168             }
169         } else if matches!(position, ParserPosition::Region) {
170             if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
171                 region = Some(s);
172                 position = ParserPosition::Variant;
173             } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
174                 // We cannot handle multiple variants in a const context
175                 debug_assert!(variant.is_none());
176                 variant = Some(v);
177                 position = ParserPosition::Variant;
178             } else if matches!(mode, ParserMode::Partial) {
179                 break;
180             } else {
181                 return Err(ParseError::InvalidSubtag);
182             }
183         } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
184             debug_assert!(matches!(position, ParserPosition::Variant));
185             if variant.is_some() {
186                 // We cannot handle multiple variants in a const context
187                 return Err(ParseError::InvalidSubtag);
188             }
189             variant = Some(v);
190         } else if matches!(mode, ParserMode::Partial) {
191             break;
192         } else {
193             return Err(ParseError::InvalidSubtag);
194         }
195 
196         iter = iter.next_const().0;
197     }
198 
199     if matches!(mode, ParserMode::Locale) {
200         if let Some(subtag) = iter.peek() {
201             match ExtensionType::try_from_utf8(subtag) {
202                 Ok(ExtensionType::Unicode) => {
203                     iter = iter.next_const().0;
204                     if let Some(peek) = iter.peek() {
205                         if Attribute::try_from_utf8(peek).is_ok() {
206                             // We cannot handle Attributes in a const context
207                             return Err(ParseError::InvalidSubtag);
208                         }
209                     }
210 
211                     let mut key = None;
212                     let mut current_type = None;
213 
214                     while let Some(peek) = iter.peek() {
215                         if peek.len() == 2 {
216                             if key.is_some() {
217                                 // We cannot handle more than one Key in a const context
218                                 return Err(ParseError::InvalidSubtag);
219                             }
220                             match Key::try_from_utf8(peek) {
221                                 Ok(k) => key = Some(k),
222                                 Err(e) => return Err(e),
223                             };
224                         } else if key.is_some() {
225                             match Value::parse_subtag_from_utf8(peek) {
226                                 Ok(Some(t)) => {
227                                     if current_type.is_some() {
228                                         // We cannot handle more than one type in a const context
229                                         return Err(ParseError::InvalidSubtag);
230                                     }
231                                     current_type = Some(t);
232                                 }
233                                 Ok(None) => {}
234                                 Err(e) => return Err(e),
235                             }
236                         } else {
237                             break;
238                         }
239                         iter = iter.next_const().0;
240                     }
241                     if let Some(k) = key {
242                         keyword = Some((k, current_type));
243                     }
244                 }
245                 // We cannot handle Transform, Private, Other extensions in a const context
246                 Ok(_) => return Err(ParseError::InvalidSubtag),
247                 Err(e) => return Err(e),
248             }
249         }
250     }
251 
252     Ok((language, script, region, variant, keyword))
253 }
254 
255 #[allow(clippy::type_complexity)]
parse_language_identifier_with_single_variant( t: &[u8], mode: ParserMode, ) -> Result< ( subtags::Language, Option<subtags::Script>, Option<subtags::Region>, Option<subtags::Variant>, ), ParseError, >256 pub const fn parse_language_identifier_with_single_variant(
257     t: &[u8],
258     mode: ParserMode,
259 ) -> Result<
260     (
261         subtags::Language,
262         Option<subtags::Script>,
263         Option<subtags::Region>,
264         Option<subtags::Variant>,
265     ),
266     ParseError,
267 > {
268     let iter = SubtagIterator::new(t);
269     match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
270         Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
271         Err(e) => Err(e),
272     }
273 }
274