• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::asciibyte::AsciiByte;
6 use crate::int_ops::{Aligned4, Aligned8};
7 use crate::ParseError;
8 use core::borrow::Borrow;
9 use core::fmt;
10 use core::ops::Deref;
11 use core::str::{self, FromStr};
12 
13 #[repr(transparent)]
14 #[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
15 pub struct TinyAsciiStr<const N: usize> {
16     bytes: [AsciiByte; N],
17 }
18 
19 impl<const N: usize> TinyAsciiStr<N> {
20     #[inline]
try_from_str(s: &str) -> Result<Self, ParseError>21     pub const fn try_from_str(s: &str) -> Result<Self, ParseError> {
22         Self::try_from_utf8(s.as_bytes())
23     }
24 
25     /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
26     /// `code_units` may contain at most `N` non-null ASCII code points.
27     #[inline]
try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError>28     pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
29         Self::try_from_utf8_inner(code_units, false)
30     }
31 
32     /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
33     /// `code_units` may contain at most `N` non-null ASCII code points.
34     #[inline]
try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError>35     pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> {
36         Self::try_from_utf16_inner(code_units, 0, code_units.len(), false)
37     }
38 
39     /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
40     ///
41     /// Invalid code units, as well as null or non-ASCII code points
42     /// (i.e. those outside the range U+0001..=U+007F`)
43     /// will be replaced with the replacement byte.
44     ///
45     /// The input slice will be truncated if its length exceeds `N`.
from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self46     pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self {
47         let mut out = [0; N];
48         let mut i = 0;
49         // Ord is not available in const, so no `.min(N)`
50         let len = if code_units.len() > N {
51             N
52         } else {
53             code_units.len()
54         };
55 
56         // Indexing is protected by the len check above
57         #[allow(clippy::indexing_slicing)]
58         while i < len {
59             let b = code_units[i];
60             if b > 0 && b < 0x80 {
61                 out[i] = b;
62             } else {
63                 out[i] = replacement;
64             }
65             i += 1;
66         }
67 
68         Self {
69             // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
70             bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
71         }
72     }
73 
74     /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
75     ///
76     /// Invalid code units, as well as null or non-ASCII code points
77     /// (i.e. those outside the range U+0001..=U+007F`)
78     /// will be replaced with the replacement byte.
79     ///
80     /// The input slice will be truncated if its length exceeds `N`.
from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self81     pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self {
82         let mut out = [0; N];
83         let mut i = 0;
84         // Ord is not available in const, so no `.min(N)`
85         let len = if code_units.len() > N {
86             N
87         } else {
88             code_units.len()
89         };
90 
91         // Indexing is protected by the len check above
92         #[allow(clippy::indexing_slicing)]
93         while i < len {
94             let b = code_units[i];
95             if b > 0 && b < 0x80 {
96                 out[i] = b as u8;
97             } else {
98                 out[i] = replacement;
99             }
100             i += 1;
101         }
102 
103         Self {
104             // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
105             bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
106         }
107     }
108 
109     /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
110     ///
111     /// The byte array may contain trailing NUL bytes.
112     ///
113     /// # Example
114     ///
115     /// ```
116     /// use tinystr::tinystr;
117     /// use tinystr::TinyAsciiStr;
118     ///
119     /// assert_eq!(
120     ///     TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
121     ///     Ok(tinystr!(3, "GB"))
122     /// );
123     /// assert_eq!(
124     ///     TinyAsciiStr::<3>::try_from_raw(*b"USD"),
125     ///     Ok(tinystr!(3, "USD"))
126     /// );
127     /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
128     /// ```
try_from_raw(raw: [u8; N]) -> Result<Self, ParseError>129     pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> {
130         Self::try_from_utf8_inner(&raw, true)
131     }
132 
try_from_utf8_inner( code_units: &[u8], allow_trailing_null: bool, ) -> Result<Self, ParseError>133     pub(crate) const fn try_from_utf8_inner(
134         code_units: &[u8],
135         allow_trailing_null: bool,
136     ) -> Result<Self, ParseError> {
137         if code_units.len() > N {
138             return Err(ParseError::TooLong {
139                 max: N,
140                 len: code_units.len(),
141             });
142         }
143 
144         let mut out = [0; N];
145         let mut i = 0;
146         let mut found_null = false;
147         // Indexing is protected by TinyStrError::TooLarge
148         #[allow(clippy::indexing_slicing)]
149         while i < code_units.len() {
150             let b = code_units[i];
151 
152             if b == 0 {
153                 found_null = true;
154             } else if b >= 0x80 {
155                 return Err(ParseError::NonAscii);
156             } else if found_null {
157                 // Error if there are contentful bytes after null
158                 return Err(ParseError::ContainsNull);
159             }
160             out[i] = b;
161 
162             i += 1;
163         }
164 
165         if !allow_trailing_null && found_null {
166             // We found some trailing nulls, error
167             return Err(ParseError::ContainsNull);
168         }
169 
170         Ok(Self {
171             // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
172             bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
173         })
174     }
175 
try_from_utf16_inner( code_units: &[u16], start: usize, end: usize, allow_trailing_null: bool, ) -> Result<Self, ParseError>176     pub(crate) const fn try_from_utf16_inner(
177         code_units: &[u16],
178         start: usize,
179         end: usize,
180         allow_trailing_null: bool,
181     ) -> Result<Self, ParseError> {
182         let len = end - start;
183         if len > N {
184             return Err(ParseError::TooLong { max: N, len });
185         }
186 
187         let mut out = [0; N];
188         let mut i = 0;
189         let mut found_null = false;
190         // Indexing is protected by TinyStrError::TooLarge
191         #[allow(clippy::indexing_slicing)]
192         while i < len {
193             let b = code_units[start + i];
194 
195             if b == 0 {
196                 found_null = true;
197             } else if b >= 0x80 {
198                 return Err(ParseError::NonAscii);
199             } else if found_null {
200                 // Error if there are contentful bytes after null
201                 return Err(ParseError::ContainsNull);
202             }
203             out[i] = b as u8;
204 
205             i += 1;
206         }
207 
208         if !allow_trailing_null && found_null {
209             // We found some trailing nulls, error
210             return Err(ParseError::ContainsNull);
211         }
212 
213         Ok(Self {
214             // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
215             bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
216         })
217     }
218 
219     #[inline]
as_str(&self) -> &str220     pub const fn as_str(&self) -> &str {
221         // as_utf8 is valid utf8
222         unsafe { str::from_utf8_unchecked(self.as_utf8()) }
223     }
224 
225     #[inline]
226     #[must_use]
len(&self) -> usize227     pub const fn len(&self) -> usize {
228         if N <= 4 {
229             Aligned4::from_ascii_bytes(&self.bytes).len()
230         } else if N <= 8 {
231             Aligned8::from_ascii_bytes(&self.bytes).len()
232         } else {
233             let mut i = 0;
234             #[allow(clippy::indexing_slicing)] // < N is safe
235             while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
236                 i += 1
237             }
238             i
239         }
240     }
241 
242     #[inline]
243     #[must_use]
is_empty(&self) -> bool244     pub const fn is_empty(&self) -> bool {
245         self.bytes[0] as u8 == AsciiByte::B0 as u8
246     }
247 
248     #[inline]
249     #[must_use]
as_utf8(&self) -> &[u8]250     pub const fn as_utf8(&self) -> &[u8] {
251         // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
252         // and changing the length of that slice to self.len() < N is safe.
253         unsafe {
254             core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
255         }
256     }
257 
258     #[inline]
259     #[must_use]
all_bytes(&self) -> &[u8; N]260     pub const fn all_bytes(&self) -> &[u8; N] {
261         // SAFETY: `self.bytes` has same size as [u8; N]
262         unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
263     }
264 
265     #[inline]
266     #[must_use]
267     /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
268     ///
269     /// If `M < len()` the string gets truncated, otherwise only the
270     /// memory representation changes.
resize<const M: usize>(self) -> TinyAsciiStr<M>271     pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
272         let mut bytes = [0; M];
273         let mut i = 0;
274         // Indexing is protected by the loop guard
275         #[allow(clippy::indexing_slicing)]
276         while i < M && i < N {
277             bytes[i] = self.bytes[i] as u8;
278             i += 1;
279         }
280         // `self.bytes` only contains ASCII bytes, with no null bytes between
281         // ASCII characters, so this also holds for `bytes`.
282         unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
283     }
284 
285     #[inline]
286     #[must_use]
287     /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string,
288     /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`.
289     ///
290     /// If `Q < N + M`, the string gets truncated.
291     ///
292     /// # Examples
293     ///
294     /// ```
295     /// use tinystr::tinystr;
296     /// use tinystr::TinyAsciiStr;
297     ///
298     /// let abc = tinystr!(6, "abc");
299     /// let defg = tinystr!(6, "defg");
300     ///
301     /// // The concatenation is successful if Q is large enough...
302     /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg"));
303     /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg"));
304     /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg"));
305     /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg"));
306     ///
307     /// /// ...but it truncates of Q is too small.
308     /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef"));
309     /// assert_eq!(abc.concat(defg), tinystr!(2, "ab"));
310     /// ```
concat<const M: usize, const Q: usize>( self, other: TinyAsciiStr<M>, ) -> TinyAsciiStr<Q>311     pub const fn concat<const M: usize, const Q: usize>(
312         self,
313         other: TinyAsciiStr<M>,
314     ) -> TinyAsciiStr<Q> {
315         let mut result = self.resize::<Q>();
316         let mut i = self.len();
317         let mut j = 0;
318         // Indexing is protected by the loop guard
319         #[allow(clippy::indexing_slicing)]
320         while i < Q && j < M {
321             result.bytes[i] = other.bytes[j];
322             i += 1;
323             j += 1;
324         }
325         result
326     }
327 
328     /// # Safety
329     /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
330     /// between ASCII characters
331     #[must_use]
from_utf8_unchecked(code_units: [u8; N]) -> Self332     pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self {
333         Self {
334             bytes: AsciiByte::to_ascii_byte_array(&code_units),
335         }
336     }
337 }
338 
339 macro_rules! check_is {
340     ($self:ident, $check_int:ident, $check_u8:ident) => {
341         if N <= 4 {
342             Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
343         } else if N <= 8 {
344             Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
345         } else {
346             let mut i = 0;
347             // Won't panic because self.bytes has length N
348             #[allow(clippy::indexing_slicing)]
349             while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
350                 if !($self.bytes[i] as u8).$check_u8() {
351                     return false;
352                 }
353                 i += 1;
354             }
355             true
356         }
357     };
358     ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
359         if N <= 4 {
360             Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
361         } else if N <= 8 {
362             Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
363         } else {
364             // Won't panic because N is > 8
365             if ($self.bytes[0] as u8).$check_u8_0_inv() {
366                 return false;
367             }
368             let mut i = 1;
369             // Won't panic because self.bytes has length N
370             #[allow(clippy::indexing_slicing)]
371             while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
372                 if ($self.bytes[i] as u8).$check_u8_1_inv() {
373                     return false;
374                 }
375                 i += 1;
376             }
377             true
378         }
379     };
380     ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
381         if N <= 4 {
382             Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
383         } else if N <= 8 {
384             Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
385         } else {
386             // Won't panic because N is > 8
387             if !($self.bytes[0] as u8).$check_u8_0_inv() {
388                 return false;
389             }
390             let mut i = 1;
391             // Won't panic because self.bytes has length N
392             #[allow(clippy::indexing_slicing)]
393             while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
394                 if !($self.bytes[i] as u8).$check_u8_1_inv() {
395                     return false;
396                 }
397                 i += 1;
398             }
399             true
400         }
401     };
402 }
403 
404 impl<const N: usize> TinyAsciiStr<N> {
405     /// Checks if the value is composed of ASCII alphabetic characters:
406     ///
407     ///  * U+0041 'A' ..= U+005A 'Z', or
408     ///  * U+0061 'a' ..= U+007A 'z'.
409     ///
410     /// # Examples
411     ///
412     /// ```
413     /// use tinystr::TinyAsciiStr;
414     ///
415     /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
416     /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
417     ///
418     /// assert!(s1.is_ascii_alphabetic());
419     /// assert!(!s2.is_ascii_alphabetic());
420     /// ```
421     #[inline]
422     #[must_use]
is_ascii_alphabetic(&self) -> bool423     pub const fn is_ascii_alphabetic(&self) -> bool {
424         check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
425     }
426 
427     /// Checks if the value is composed of ASCII alphanumeric characters:
428     ///
429     ///  * U+0041 'A' ..= U+005A 'Z', or
430     ///  * U+0061 'a' ..= U+007A 'z', or
431     ///  * U+0030 '0' ..= U+0039 '9'.
432     ///
433     /// # Examples
434     ///
435     /// ```
436     /// use tinystr::TinyAsciiStr;
437     ///
438     /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
439     /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
440     ///
441     /// assert!(s1.is_ascii_alphanumeric());
442     /// assert!(!s2.is_ascii_alphanumeric());
443     /// ```
444     #[inline]
445     #[must_use]
is_ascii_alphanumeric(&self) -> bool446     pub const fn is_ascii_alphanumeric(&self) -> bool {
447         check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
448     }
449 
450     /// Checks if the value is composed of ASCII decimal digits:
451     ///
452     ///  * U+0030 '0' ..= U+0039 '9'.
453     ///
454     /// # Examples
455     ///
456     /// ```
457     /// use tinystr::TinyAsciiStr;
458     ///
459     /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
460     /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
461     ///
462     /// assert!(s1.is_ascii_numeric());
463     /// assert!(!s2.is_ascii_numeric());
464     /// ```
465     #[inline]
466     #[must_use]
is_ascii_numeric(&self) -> bool467     pub const fn is_ascii_numeric(&self) -> bool {
468         check_is!(self, is_ascii_numeric, is_ascii_digit)
469     }
470 
471     /// Checks if the value is in ASCII lower case.
472     ///
473     /// All letter characters are checked for case. Non-letter characters are ignored.
474     ///
475     /// # Examples
476     ///
477     /// ```
478     /// use tinystr::TinyAsciiStr;
479     ///
480     /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
481     /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
482     /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
483     ///
484     /// assert!(!s1.is_ascii_lowercase());
485     /// assert!(s2.is_ascii_lowercase());
486     /// assert!(s3.is_ascii_lowercase());
487     /// ```
488     #[inline]
489     #[must_use]
is_ascii_lowercase(&self) -> bool490     pub const fn is_ascii_lowercase(&self) -> bool {
491         check_is!(
492             self,
493             is_ascii_lowercase,
494             !is_ascii_uppercase,
495             !is_ascii_uppercase
496         )
497     }
498 
499     /// Checks if the value is in ASCII title case.
500     ///
501     /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
502     /// Non-letter characters are ignored.
503     ///
504     /// # Examples
505     ///
506     /// ```
507     /// use tinystr::TinyAsciiStr;
508     ///
509     /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
510     /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
511     /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
512     ///
513     /// assert!(!s1.is_ascii_titlecase());
514     /// assert!(s2.is_ascii_titlecase());
515     /// assert!(s3.is_ascii_titlecase());
516     /// ```
517     #[inline]
518     #[must_use]
is_ascii_titlecase(&self) -> bool519     pub const fn is_ascii_titlecase(&self) -> bool {
520         check_is!(
521             self,
522             is_ascii_titlecase,
523             !is_ascii_lowercase,
524             !is_ascii_uppercase
525         )
526     }
527 
528     /// Checks if the value is in ASCII upper case.
529     ///
530     /// All letter characters are checked for case. Non-letter characters are ignored.
531     ///
532     /// # Examples
533     ///
534     /// ```
535     /// use tinystr::TinyAsciiStr;
536     ///
537     /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
538     /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
539     /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
540     ///
541     /// assert!(!s1.is_ascii_uppercase());
542     /// assert!(s2.is_ascii_uppercase());
543     /// assert!(!s3.is_ascii_uppercase());
544     /// ```
545     #[inline]
546     #[must_use]
is_ascii_uppercase(&self) -> bool547     pub const fn is_ascii_uppercase(&self) -> bool {
548         check_is!(
549             self,
550             is_ascii_uppercase,
551             !is_ascii_lowercase,
552             !is_ascii_lowercase
553         )
554     }
555 
556     /// Checks if the value is composed of ASCII alphabetic lower case characters:
557     ///
558     ///  * U+0061 'a' ..= U+007A 'z',
559     ///
560     /// # Examples
561     ///
562     /// ```
563     /// use tinystr::TinyAsciiStr;
564     ///
565     /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
566     /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
567     /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
568     /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
569     /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
570     ///
571     /// assert!(!s1.is_ascii_alphabetic_lowercase());
572     /// assert!(!s2.is_ascii_alphabetic_lowercase());
573     /// assert!(!s3.is_ascii_alphabetic_lowercase());
574     /// assert!(s4.is_ascii_alphabetic_lowercase());
575     /// assert!(!s5.is_ascii_alphabetic_lowercase());
576     /// ```
577     #[inline]
578     #[must_use]
is_ascii_alphabetic_lowercase(&self) -> bool579     pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
580         check_is!(
581             self,
582             is_ascii_alphabetic_lowercase,
583             is_ascii_lowercase,
584             is_ascii_lowercase
585         )
586     }
587 
588     /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
589     ///
590     /// # Examples
591     ///
592     /// ```
593     /// use tinystr::TinyAsciiStr;
594     ///
595     /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
596     /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
597     /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
598     /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
599     /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
600     ///
601     /// assert!(s1.is_ascii_alphabetic_titlecase());
602     /// assert!(!s2.is_ascii_alphabetic_titlecase());
603     /// assert!(!s3.is_ascii_alphabetic_titlecase());
604     /// assert!(!s4.is_ascii_alphabetic_titlecase());
605     /// assert!(!s5.is_ascii_alphabetic_titlecase());
606     /// ```
607     #[inline]
608     #[must_use]
is_ascii_alphabetic_titlecase(&self) -> bool609     pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
610         check_is!(
611             self,
612             is_ascii_alphabetic_titlecase,
613             is_ascii_uppercase,
614             is_ascii_lowercase
615         )
616     }
617 
618     /// Checks if the value is composed of ASCII alphabetic upper case characters:
619     ///
620     ///  * U+0041 'A' ..= U+005A 'Z',
621     ///
622     /// # Examples
623     ///
624     /// ```
625     /// use tinystr::TinyAsciiStr;
626     ///
627     /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
628     /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
629     /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
630     /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
631     /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
632     ///
633     /// assert!(!s1.is_ascii_alphabetic_uppercase());
634     /// assert!(!s2.is_ascii_alphabetic_uppercase());
635     /// assert!(!s3.is_ascii_alphabetic_uppercase());
636     /// assert!(s4.is_ascii_alphabetic_uppercase());
637     /// assert!(!s5.is_ascii_alphabetic_uppercase());
638     /// ```
639     #[inline]
640     #[must_use]
is_ascii_alphabetic_uppercase(&self) -> bool641     pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
642         check_is!(
643             self,
644             is_ascii_alphabetic_uppercase,
645             is_ascii_uppercase,
646             is_ascii_uppercase
647         )
648     }
649 }
650 
651 macro_rules! to {
652     ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
653         let mut i = 0;
654         if N <= 4 {
655             let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
656             // Won't panic because self.bytes has length N and aligned has length >= N
657             #[allow(clippy::indexing_slicing)]
658             while i < N {
659                 $self.bytes[i] = aligned[i];
660                 i += 1;
661             }
662         } else if N <= 8 {
663             let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
664             // Won't panic because self.bytes has length N and aligned has length >= N
665             #[allow(clippy::indexing_slicing)]
666             while i < N {
667                 $self.bytes[i] = aligned[i];
668                 i += 1;
669             }
670         } else {
671             // Won't panic because self.bytes has length N
672             #[allow(clippy::indexing_slicing)]
673             while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
674                 // SAFETY: AsciiByte is repr(u8) and has same size as u8
675                 unsafe {
676                     $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
677                         ($self.bytes[i] as u8).$later_char_to()
678                     );
679                 }
680                 i += 1;
681             }
682             // SAFETY: AsciiByte is repr(u8) and has same size as u8
683             $(
684                 $self.bytes[0] = unsafe {
685                     core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
686                 };
687             )?
688         }
689         $self
690     }};
691 }
692 
693 impl<const N: usize> TinyAsciiStr<N> {
694     /// Converts this type to its ASCII lower case equivalent in-place.
695     ///
696     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
697     ///
698     /// # Examples
699     ///
700     /// ```
701     /// use tinystr::TinyAsciiStr;
702     ///
703     /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
704     ///
705     /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
706     /// ```
707     #[inline]
708     #[must_use]
to_ascii_lowercase(mut self) -> Self709     pub const fn to_ascii_lowercase(mut self) -> Self {
710         to!(self, to_ascii_lowercase, to_ascii_lowercase)
711     }
712 
713     /// Converts this type to its ASCII title case equivalent in-place.
714     ///
715     /// The first character is converted to ASCII uppercase; the remaining characters
716     /// are converted to ASCII lowercase.
717     ///
718     /// # Examples
719     ///
720     /// ```
721     /// use tinystr::TinyAsciiStr;
722     ///
723     /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
724     ///
725     /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
726     /// ```
727     #[inline]
728     #[must_use]
to_ascii_titlecase(mut self) -> Self729     pub const fn to_ascii_titlecase(mut self) -> Self {
730         to!(
731             self,
732             to_ascii_titlecase,
733             to_ascii_lowercase,
734             to_ascii_uppercase
735         )
736     }
737 
738     /// Converts this type to its ASCII upper case equivalent in-place.
739     ///
740     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
741     ///
742     /// # Examples
743     ///
744     /// ```
745     /// use tinystr::TinyAsciiStr;
746     ///
747     /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
748     ///
749     /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
750     /// ```
751     #[inline]
752     #[must_use]
to_ascii_uppercase(mut self) -> Self753     pub const fn to_ascii_uppercase(mut self) -> Self {
754         to!(self, to_ascii_uppercase, to_ascii_uppercase)
755     }
756 }
757 
758 impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result759     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
760         fmt::Debug::fmt(self.as_str(), f)
761     }
762 }
763 
764 impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result765     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
766         fmt::Display::fmt(self.as_str(), f)
767     }
768 }
769 
770 impl<const N: usize> Deref for TinyAsciiStr<N> {
771     type Target = str;
772     #[inline]
deref(&self) -> &str773     fn deref(&self) -> &str {
774         self.as_str()
775     }
776 }
777 
778 impl<const N: usize> Borrow<str> for TinyAsciiStr<N> {
779     #[inline]
borrow(&self) -> &str780     fn borrow(&self) -> &str {
781         self.as_str()
782     }
783 }
784 
785 impl<const N: usize> FromStr for TinyAsciiStr<N> {
786     type Err = ParseError;
787     #[inline]
from_str(s: &str) -> Result<Self, Self::Err>788     fn from_str(s: &str) -> Result<Self, Self::Err> {
789         Self::try_from_str(s)
790     }
791 }
792 
793 impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
eq(&self, other: &str) -> bool794     fn eq(&self, other: &str) -> bool {
795         self.deref() == other
796     }
797 }
798 
799 impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
eq(&self, other: &&str) -> bool800     fn eq(&self, other: &&str) -> bool {
801         self.deref() == *other
802     }
803 }
804 
805 #[cfg(feature = "alloc")]
806 impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
eq(&self, other: &alloc::string::String) -> bool807     fn eq(&self, other: &alloc::string::String) -> bool {
808         self.deref() == other.deref()
809     }
810 }
811 
812 #[cfg(feature = "alloc")]
813 impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
eq(&self, other: &TinyAsciiStr<N>) -> bool814     fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
815         self.deref() == other.deref()
816     }
817 }
818 
819 #[cfg(test)]
820 mod test {
821     use super::*;
822     use rand::distributions::Distribution;
823     use rand::distributions::Standard;
824     use rand::rngs::SmallRng;
825     use rand::seq::SliceRandom;
826     use rand::SeedableRng;
827 
828     const STRINGS: [&str; 26] = [
829         "Latn",
830         "laTn",
831         "windows",
832         "AR",
833         "Hans",
834         "macos",
835         "AT",
836         "infiniband",
837         "FR",
838         "en",
839         "Cyrl",
840         "FromIntegral",
841         "NO",
842         "419",
843         "MacintoshOSX2019",
844         "a3z",
845         "A3z",
846         "A3Z",
847         "a3Z",
848         "3A",
849         "3Z",
850         "3a",
851         "3z",
852         "@@[`{",
853         "UK",
854         "E12",
855     ];
856 
gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String>857     fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
858         let mut rng = SmallRng::seed_from_u64(2022);
859         // Need to do this in 2 steps since the RNG is needed twice
860         let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
861             .take(num_strings)
862             .collect::<Vec<usize>>();
863         string_lengths
864             .iter()
865             .map(|len| {
866                 Standard
867                     .sample_iter(&mut rng)
868                     .filter(|b: &u8| *b > 0 && *b < 0x80)
869                     .take(*len)
870                     .collect::<Vec<u8>>()
871             })
872             .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
873             .collect()
874     }
875 
check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2) where F1: Fn(&str) -> T, F2: Fn(TinyAsciiStr<N>) -> T, T: core::fmt::Debug + core::cmp::PartialEq,876     fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
877     where
878         F1: Fn(&str) -> T,
879         F2: Fn(TinyAsciiStr<N>) -> T,
880         T: core::fmt::Debug + core::cmp::PartialEq,
881     {
882         for s in STRINGS
883             .into_iter()
884             .map(str::to_owned)
885             .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
886         {
887             let t = match TinyAsciiStr::<N>::from_str(&s) {
888                 Ok(t) => t,
889                 Err(ParseError::TooLong { .. }) => continue,
890                 Err(e) => panic!("{}", e),
891             };
892             let expected = reference_f(&s);
893             let actual = tinystr_f(t);
894             assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
895 
896             let s_utf16: Vec<u16> = s.encode_utf16().collect();
897             let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
898                 Ok(t) => t,
899                 Err(ParseError::TooLong { .. }) => continue,
900                 Err(e) => panic!("{}", e),
901             };
902             let expected = reference_f(&s);
903             let actual = tinystr_f(t);
904             assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
905         }
906     }
907 
908     #[test]
test_is_ascii_alphabetic()909     fn test_is_ascii_alphabetic() {
910         fn check<const N: usize>() {
911             check_operation(
912                 |s| s.chars().all(|c| c.is_ascii_alphabetic()),
913                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
914             )
915         }
916         check::<2>();
917         check::<3>();
918         check::<4>();
919         check::<5>();
920         check::<8>();
921         check::<16>();
922     }
923 
924     #[test]
test_is_ascii_alphanumeric()925     fn test_is_ascii_alphanumeric() {
926         fn check<const N: usize>() {
927             check_operation(
928                 |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
929                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
930             )
931         }
932         check::<2>();
933         check::<3>();
934         check::<4>();
935         check::<5>();
936         check::<8>();
937         check::<16>();
938     }
939 
940     #[test]
test_is_ascii_numeric()941     fn test_is_ascii_numeric() {
942         fn check<const N: usize>() {
943             check_operation(
944                 |s| s.chars().all(|c| c.is_ascii_digit()),
945                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
946             )
947         }
948         check::<2>();
949         check::<3>();
950         check::<4>();
951         check::<5>();
952         check::<8>();
953         check::<16>();
954     }
955 
956     #[test]
test_is_ascii_lowercase()957     fn test_is_ascii_lowercase() {
958         fn check<const N: usize>() {
959             check_operation(
960                 |s| {
961                     s == TinyAsciiStr::<16>::try_from_str(s)
962                         .unwrap()
963                         .to_ascii_lowercase()
964                         .as_str()
965                 },
966                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
967             )
968         }
969         check::<2>();
970         check::<3>();
971         check::<4>();
972         check::<5>();
973         check::<8>();
974         check::<16>();
975     }
976 
977     #[test]
test_is_ascii_titlecase()978     fn test_is_ascii_titlecase() {
979         fn check<const N: usize>() {
980             check_operation(
981                 |s| {
982                     s == TinyAsciiStr::<16>::try_from_str(s)
983                         .unwrap()
984                         .to_ascii_titlecase()
985                         .as_str()
986                 },
987                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
988             )
989         }
990         check::<2>();
991         check::<3>();
992         check::<4>();
993         check::<5>();
994         check::<8>();
995         check::<16>();
996     }
997 
998     #[test]
test_is_ascii_uppercase()999     fn test_is_ascii_uppercase() {
1000         fn check<const N: usize>() {
1001             check_operation(
1002                 |s| {
1003                     s == TinyAsciiStr::<16>::try_from_str(s)
1004                         .unwrap()
1005                         .to_ascii_uppercase()
1006                         .as_str()
1007                 },
1008                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
1009             )
1010         }
1011         check::<2>();
1012         check::<3>();
1013         check::<4>();
1014         check::<5>();
1015         check::<8>();
1016         check::<16>();
1017     }
1018 
1019     #[test]
test_is_ascii_alphabetic_lowercase()1020     fn test_is_ascii_alphabetic_lowercase() {
1021         fn check<const N: usize>() {
1022             check_operation(
1023                 |s| {
1024                     // Check alphabetic
1025                     s.chars().all(|c| c.is_ascii_alphabetic()) &&
1026                     // Check lowercase
1027                     s == TinyAsciiStr::<16>::try_from_str(s)
1028                         .unwrap()
1029                         .to_ascii_lowercase()
1030                         .as_str()
1031                 },
1032                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
1033             )
1034         }
1035         check::<2>();
1036         check::<3>();
1037         check::<4>();
1038         check::<5>();
1039         check::<8>();
1040         check::<16>();
1041     }
1042 
1043     #[test]
test_is_ascii_alphabetic_titlecase()1044     fn test_is_ascii_alphabetic_titlecase() {
1045         fn check<const N: usize>() {
1046             check_operation(
1047                 |s| {
1048                     // Check alphabetic
1049                     s.chars().all(|c| c.is_ascii_alphabetic()) &&
1050                     // Check titlecase
1051                     s == TinyAsciiStr::<16>::try_from_str(s)
1052                         .unwrap()
1053                         .to_ascii_titlecase()
1054                         .as_str()
1055                 },
1056                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
1057             )
1058         }
1059         check::<2>();
1060         check::<3>();
1061         check::<4>();
1062         check::<5>();
1063         check::<8>();
1064         check::<16>();
1065     }
1066 
1067     #[test]
test_is_ascii_alphabetic_uppercase()1068     fn test_is_ascii_alphabetic_uppercase() {
1069         fn check<const N: usize>() {
1070             check_operation(
1071                 |s| {
1072                     // Check alphabetic
1073                     s.chars().all(|c| c.is_ascii_alphabetic()) &&
1074                     // Check uppercase
1075                     s == TinyAsciiStr::<16>::try_from_str(s)
1076                         .unwrap()
1077                         .to_ascii_uppercase()
1078                         .as_str()
1079                 },
1080                 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
1081             )
1082         }
1083         check::<2>();
1084         check::<3>();
1085         check::<4>();
1086         check::<5>();
1087         check::<8>();
1088         check::<16>();
1089     }
1090 
1091     #[test]
test_to_ascii_lowercase()1092     fn test_to_ascii_lowercase() {
1093         fn check<const N: usize>() {
1094             check_operation(
1095                 |s| {
1096                     s.chars()
1097                         .map(|c| c.to_ascii_lowercase())
1098                         .collect::<String>()
1099                 },
1100                 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
1101             )
1102         }
1103         check::<2>();
1104         check::<3>();
1105         check::<4>();
1106         check::<5>();
1107         check::<8>();
1108         check::<16>();
1109     }
1110 
1111     #[test]
test_to_ascii_titlecase()1112     fn test_to_ascii_titlecase() {
1113         fn check<const N: usize>() {
1114             check_operation(
1115                 |s| {
1116                     let mut r = s
1117                         .chars()
1118                         .map(|c| c.to_ascii_lowercase())
1119                         .collect::<String>();
1120                     // Safe because the string is nonempty and an ASCII string
1121                     unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
1122                     r
1123                 },
1124                 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
1125             )
1126         }
1127         check::<2>();
1128         check::<3>();
1129         check::<4>();
1130         check::<5>();
1131         check::<8>();
1132         check::<16>();
1133     }
1134 
1135     #[test]
test_to_ascii_uppercase()1136     fn test_to_ascii_uppercase() {
1137         fn check<const N: usize>() {
1138             check_operation(
1139                 |s| {
1140                     s.chars()
1141                         .map(|c| c.to_ascii_uppercase())
1142                         .collect::<String>()
1143                 },
1144                 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1145             )
1146         }
1147         check::<2>();
1148         check::<3>();
1149         check::<4>();
1150         check::<5>();
1151         check::<8>();
1152         check::<16>();
1153     }
1154 
1155     #[test]
lossy_constructor()1156     fn lossy_constructor() {
1157         assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1158         assert_eq!(
1159             TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1160             "oh?o"
1161         );
1162         assert_eq!(
1163             TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1164             "?"
1165         );
1166         assert_eq!(
1167             TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1168             "tool"
1169         );
1170         assert_eq!(
1171             TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1172             "a??1"
1173         );
1174     }
1175 }
1176