1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 #[cfg(feature = "alloc")]
6 use alloc::borrow::Cow;
7 #[cfg(feature = "alloc")]
8 use alloc::borrow::ToOwned;
9 #[cfg(feature = "alloc")]
10 use alloc::boxed::Box;
11 #[cfg(feature = "alloc")]
12 use alloc::string::String;
13 #[cfg(feature = "alloc")]
14 use core::cmp::Ordering;
15 use core::default::Default;
16 use core::fmt;
17 use core::fmt::Debug;
18 use core::hash::Hash;
19 use core::ops::Deref;
20 #[cfg(feature = "alloc")]
21 use zerovec::ule::VarULE;
22
23 pub use icu_locale_core::DataLocale;
24
25 /// The request type passed into all data provider implementations.
26 #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
27 #[allow(clippy::exhaustive_structs)] // this type is stable
28 pub struct DataRequest<'a> {
29 /// The data identifier for which to load data.
30 ///
31 /// If locale fallback is enabled, the resulting data may be from a different identifier
32 /// than the one requested here.
33 pub id: DataIdentifierBorrowed<'a>,
34 /// Metadata that may affect the behavior of the data provider.
35 pub metadata: DataRequestMetadata,
36 }
37
38 /// Metadata for data requests. This is currently empty, but it may be extended with options
39 /// for tuning locale fallback, buffer layout, and so forth.
40 #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
41 #[non_exhaustive]
42 pub struct DataRequestMetadata {
43 /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks.
44 pub silent: bool,
45 /// Whether to allow prefix matches for the data marker attributes.
46 pub attributes_prefix_match: bool,
47 }
48
49 /// The borrowed version of a [`DataIdentifierCow`].
50 #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
51 #[non_exhaustive]
52 pub struct DataIdentifierBorrowed<'a> {
53 /// Marker-specific request attributes
54 pub marker_attributes: &'a DataMarkerAttributes,
55 /// The CLDR locale
56 pub locale: &'a DataLocale,
57 }
58
59 impl fmt::Display for DataIdentifierBorrowed<'_> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result60 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61 fmt::Display::fmt(self.locale, f)?;
62 if !self.marker_attributes.is_empty() {
63 write!(f, "/{}", self.marker_attributes.as_str())?;
64 }
65 Ok(())
66 }
67 }
68
69 impl<'a> DataIdentifierBorrowed<'a> {
70 /// Creates a [`DataIdentifierBorrowed`] for a borrowed [`DataLocale`].
for_locale(locale: &'a DataLocale) -> Self71 pub fn for_locale(locale: &'a DataLocale) -> Self {
72 Self {
73 locale,
74 ..Default::default()
75 }
76 }
77
78 /// Creates a [`DataIdentifierBorrowed`] for a borrowed [`DataMarkerAttributes`].
for_marker_attributes(marker_attributes: &'a DataMarkerAttributes) -> Self79 pub fn for_marker_attributes(marker_attributes: &'a DataMarkerAttributes) -> Self {
80 Self {
81 marker_attributes,
82 ..Default::default()
83 }
84 }
85
86 /// Creates a [`DataIdentifierBorrowed`] for a borrowed [`DataMarkerAttributes`] and [`DataLocale`].
for_marker_attributes_and_locale( marker_attributes: &'a DataMarkerAttributes, locale: &'a DataLocale, ) -> Self87 pub fn for_marker_attributes_and_locale(
88 marker_attributes: &'a DataMarkerAttributes,
89 locale: &'a DataLocale,
90 ) -> Self {
91 Self {
92 marker_attributes,
93 locale,
94 }
95 }
96
97 /// Converts this [`DataIdentifierBorrowed`] into a [`DataIdentifierCow<'static>`].
98 #[cfg(feature = "alloc")]
into_owned(self) -> DataIdentifierCow<'static>99 pub fn into_owned(self) -> DataIdentifierCow<'static> {
100 DataIdentifierCow {
101 marker_attributes: Cow::Owned(self.marker_attributes.to_owned()),
102 locale: *self.locale,
103 }
104 }
105
106 /// Borrows this [`DataIdentifierBorrowed`] as a [`DataIdentifierCow<'a>`].
107 #[cfg(feature = "alloc")]
as_cow(self) -> DataIdentifierCow<'a>108 pub fn as_cow(self) -> DataIdentifierCow<'a> {
109 DataIdentifierCow {
110 marker_attributes: Cow::Borrowed(self.marker_attributes),
111 locale: *self.locale,
112 }
113 }
114 }
115
116 /// A data identifier identifies a particular version of data, such as "English".
117 ///
118 /// It is a wrapper around a [`DataLocale`] and a [`DataMarkerAttributes`].
119 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
120 #[non_exhaustive]
121 #[cfg(feature = "alloc")]
122 pub struct DataIdentifierCow<'a> {
123 /// Marker-specific request attributes
124 pub marker_attributes: Cow<'a, DataMarkerAttributes>,
125 /// The CLDR locale
126 pub locale: DataLocale,
127 }
128
129 #[cfg(feature = "alloc")]
130 impl PartialOrd for DataIdentifierCow<'_> {
partial_cmp(&self, other: &Self) -> Option<Ordering>131 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
132 Some(self.cmp(other))
133 }
134 }
135
136 #[cfg(feature = "alloc")]
137 impl Ord for DataIdentifierCow<'_> {
cmp(&self, other: &Self) -> Ordering138 fn cmp(&self, other: &Self) -> Ordering {
139 self.marker_attributes
140 .cmp(&other.marker_attributes)
141 .then_with(|| self.locale.total_cmp(&other.locale))
142 }
143 }
144
145 #[cfg(feature = "alloc")]
146 impl fmt::Display for DataIdentifierCow<'_> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result147 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
148 fmt::Display::fmt(&self.locale, f)?;
149 if !self.marker_attributes.is_empty() {
150 write!(f, "/{}", self.marker_attributes.as_str())?;
151 }
152 Ok(())
153 }
154 }
155
156 #[cfg(feature = "alloc")]
157 impl<'a> DataIdentifierCow<'a> {
158 /// Borrows this [`DataIdentifierCow`] as a [`DataIdentifierBorrowed<'a>`].
as_borrowed(&'a self) -> DataIdentifierBorrowed<'a>159 pub fn as_borrowed(&'a self) -> DataIdentifierBorrowed<'a> {
160 DataIdentifierBorrowed {
161 marker_attributes: &self.marker_attributes,
162 locale: &self.locale,
163 }
164 }
165
166 /// Creates a [`DataIdentifierCow`] from an owned [`DataLocale`].
from_locale(locale: DataLocale) -> Self167 pub fn from_locale(locale: DataLocale) -> Self {
168 Self {
169 marker_attributes: Cow::Borrowed(DataMarkerAttributes::empty()),
170 locale,
171 }
172 }
173
174 /// Creates a [`DataIdentifierCow`] from a borrowed [`DataMarkerAttributes`].
from_marker_attributes(marker_attributes: &'a DataMarkerAttributes) -> Self175 pub fn from_marker_attributes(marker_attributes: &'a DataMarkerAttributes) -> Self {
176 Self {
177 marker_attributes: Cow::Borrowed(marker_attributes),
178 locale: Default::default(),
179 }
180 }
181
182 /// Creates a [`DataIdentifierCow`] from an owned [`DataMarkerAttributes`].
from_marker_attributes_owned(marker_attributes: Box<DataMarkerAttributes>) -> Self183 pub fn from_marker_attributes_owned(marker_attributes: Box<DataMarkerAttributes>) -> Self {
184 Self {
185 marker_attributes: Cow::Owned(marker_attributes),
186 locale: Default::default(),
187 }
188 }
189
190 /// Creates a [`DataIdentifierCow`] from an owned [`DataMarkerAttributes`] and an owned [`DataLocale`].
191 #[cfg(feature = "alloc")]
from_owned(marker_attributes: Box<DataMarkerAttributes>, locale: DataLocale) -> Self192 pub fn from_owned(marker_attributes: Box<DataMarkerAttributes>, locale: DataLocale) -> Self {
193 Self {
194 marker_attributes: Cow::Owned(marker_attributes),
195 locale,
196 }
197 }
198
199 /// Creates a [`DataIdentifierCow`] from a borrowed [`DataMarkerAttributes`] and an owned [`DataLocale`].
from_borrowed_and_owned( marker_attributes: &'a DataMarkerAttributes, locale: DataLocale, ) -> Self200 pub fn from_borrowed_and_owned(
201 marker_attributes: &'a DataMarkerAttributes,
202 locale: DataLocale,
203 ) -> Self {
204 Self {
205 marker_attributes: Cow::Borrowed(marker_attributes),
206 locale,
207 }
208 }
209
210 /// Returns whether this id is equal to the default.
is_default(&self) -> bool211 pub fn is_default(&self) -> bool {
212 self.marker_attributes.is_empty() && self.locale.is_default()
213 }
214 }
215
216 #[cfg(feature = "alloc")]
217 impl Default for DataIdentifierCow<'_> {
default() -> Self218 fn default() -> Self {
219 Self {
220 marker_attributes: Cow::Borrowed(Default::default()),
221 locale: Default::default(),
222 }
223 }
224 }
225
226 /// An additional key to identify data beyond a [`DataLocale`].
227 ///
228 /// The is a loose wrapper around a string, with semantics defined by each [`DataMarker`](crate::DataMarker).
229 #[derive(PartialEq, Eq, Ord, PartialOrd, Hash)]
230 #[repr(transparent)]
231 pub struct DataMarkerAttributes {
232 // Validated to be non-empty ASCII alphanumeric + hyphen + underscore
233 value: str,
234 }
235
236 impl Default for &DataMarkerAttributes {
default() -> Self237 fn default() -> Self {
238 DataMarkerAttributes::empty()
239 }
240 }
241
242 impl Deref for DataMarkerAttributes {
243 type Target = str;
244 #[inline]
deref(&self) -> &Self::Target245 fn deref(&self) -> &Self::Target {
246 &self.value
247 }
248 }
249
250 impl Debug for DataMarkerAttributes {
251 #[inline]
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result252 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253 self.value.fmt(f)
254 }
255 }
256
257 /// Invalid character
258 #[derive(Debug)]
259 #[non_exhaustive]
260 pub struct AttributeParseError;
261
262 impl DataMarkerAttributes {
263 /// Safety-usable invariant: validated bytes are ASCII only
validate(s: &[u8]) -> Result<(), AttributeParseError>264 const fn validate(s: &[u8]) -> Result<(), AttributeParseError> {
265 let mut i = 0;
266 while i < s.len() {
267 #[allow(clippy::indexing_slicing)] // duh
268 if !matches!(s[i], b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_') {
269 return Err(AttributeParseError);
270 }
271 i += 1;
272 }
273 Ok(())
274 }
275
276 /// Creates a borrowed [`DataMarkerAttributes`] from a borrowed string.
277 ///
278 /// Returns an error if the string contains characters other than `[a-zA-Z0-9_\-]`.
try_from_str(s: &str) -> Result<&Self, AttributeParseError>279 pub const fn try_from_str(s: &str) -> Result<&Self, AttributeParseError> {
280 Self::try_from_utf8(s.as_bytes())
281 }
282
283 /// Attempts to create a borrowed [`DataMarkerAttributes`] from a borrowed UTF-8 encoded byte slice.
284 ///
285 /// # Examples
286 ///
287 /// ```
288 /// use icu_provider::prelude::*;
289 ///
290 /// let bytes = b"long-meter";
291 /// let marker = DataMarkerAttributes::try_from_utf8(bytes).unwrap();
292 /// assert_eq!(marker.to_string(), "long-meter");
293 /// ```
294 ///
295 /// # Errors
296 ///
297 /// Returns an error if the byte slice contains code units other than `[a-zA-Z0-9_\-]`.
try_from_utf8(code_units: &[u8]) -> Result<&Self, AttributeParseError>298 pub const fn try_from_utf8(code_units: &[u8]) -> Result<&Self, AttributeParseError> {
299 let Ok(()) = Self::validate(code_units) else {
300 return Err(AttributeParseError);
301 };
302
303 // SAFETY: `validate` requires a UTF-8 subset
304 let s = unsafe { core::str::from_utf8_unchecked(code_units) };
305
306 // SAFETY: `Self` has the same layout as `str`
307 Ok(unsafe { &*(s as *const str as *const Self) })
308 }
309
310 /// Creates an owned [`DataMarkerAttributes`] from an owned string.
311 ///
312 /// Returns an error if the string contains characters other than `[a-zA-Z0-9_\-]`.
313 #[cfg(feature = "alloc")]
try_from_string(s: String) -> Result<Box<Self>, AttributeParseError>314 pub fn try_from_string(s: String) -> Result<Box<Self>, AttributeParseError> {
315 let Ok(()) = Self::validate(s.as_bytes()) else {
316 return Err(AttributeParseError);
317 };
318
319 // SAFETY: `Self` has the same layout as `str`
320 Ok(unsafe { core::mem::transmute::<Box<str>, Box<Self>>(s.into_boxed_str()) })
321 }
322
323 /// Creates a borrowed [`DataMarkerAttributes`] from a borrowed string.
324 ///
325 /// Panics if the string contains characters other than `[a-zA-Z0-9_\-]`.
from_str_or_panic(s: &str) -> &Self326 pub const fn from_str_or_panic(s: &str) -> &Self {
327 let Ok(r) = Self::try_from_str(s) else {
328 panic!("Invalid marker attribute syntax")
329 };
330 r
331 }
332
333 /// Creates an empty [`DataMarkerAttributes`].
empty() -> &'static Self334 pub const fn empty() -> &'static Self {
335 // SAFETY: `Self` has the same layout as `str`
336 unsafe { &*("" as *const str as *const Self) }
337 }
338
339 /// Returns this [`DataMarkerAttributes`] as a `&str`.
as_str(&self) -> &str340 pub const fn as_str(&self) -> &str {
341 &self.value
342 }
343 }
344
345 #[cfg(feature = "alloc")]
346 impl ToOwned for DataMarkerAttributes {
347 type Owned = Box<Self>;
to_owned(&self) -> Self::Owned348 fn to_owned(&self) -> Self::Owned {
349 // SAFETY: `Self` has the same layout as `str`
350 unsafe { core::mem::transmute::<Box<str>, Box<Self>>(self.as_str().to_boxed()) }
351 }
352 }
353
354 #[test]
test_data_marker_attributes_from_utf8()355 fn test_data_marker_attributes_from_utf8() {
356 let bytes_vec: Vec<&[u8]> = vec![
357 b"long-meter",
358 b"long",
359 b"meter",
360 b"short-meter-second",
361 b"usd",
362 ];
363
364 for bytes in bytes_vec {
365 let marker = DataMarkerAttributes::try_from_utf8(bytes).unwrap();
366 assert_eq!(marker.to_string().as_bytes(), bytes);
367 }
368 }
369