• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::char;
2 use std::char::DecodeUtf16;
3 use std::iter::FusedIterator;
4 use std::num::NonZeroU16;
5 
6 use crate::util::BYTE_SHIFT;
7 use crate::util::CONT_MASK;
8 use crate::util::CONT_TAG;
9 
10 use super::CodePoints;
11 use super::Result;
12 
13 const MIN_HIGH_SURROGATE: u16 = 0xD800;
14 
15 const MIN_LOW_SURROGATE: u16 = 0xDC00;
16 
17 const MIN_SURROGATE_CODE: u32 = (u16::MAX as u32) + 1;
18 
19 macro_rules! static_assert {
20     ( $condition:expr ) => {
21         const _: () = assert!($condition, "static assertion failed");
22     };
23 }
24 
25 pub(in super::super) struct DecodeWide<I>
26 where
27     I: Iterator<Item = u16>,
28 {
29     iter: DecodeUtf16<I>,
30     code_point: u32,
31     shifts: u8,
32 }
33 
34 impl<I> DecodeWide<I>
35 where
36     I: Iterator<Item = u16>,
37 {
new<S>(string: S) -> Self where S: IntoIterator<IntoIter = I, Item = I::Item>,38     pub(in super::super) fn new<S>(string: S) -> Self
39     where
40         S: IntoIterator<IntoIter = I, Item = I::Item>,
41     {
42         Self {
43             iter: char::decode_utf16(string),
44             code_point: 0,
45             shifts: 0,
46         }
47     }
48 
49     #[inline(always)]
get_raw_byte(&self) -> u850     fn get_raw_byte(&self) -> u8 {
51         (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8
52     }
53 }
54 
55 impl<I> Iterator for DecodeWide<I>
56 where
57     I: Iterator<Item = u16>,
58 {
59     type Item = u8;
60 
next(&mut self) -> Option<Self::Item>61     fn next(&mut self) -> Option<Self::Item> {
62         if let Some(shifts) = self.shifts.checked_sub(1) {
63             self.shifts = shifts;
64             return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG);
65         }
66 
67         self.code_point = self
68             .iter
69             .next()?
70             .map(Into::into)
71             .unwrap_or_else(|x| x.unpaired_surrogate().into());
72 
73         macro_rules! decode {
74             ( $tag:expr ) => {
75                 Some(self.get_raw_byte() | $tag)
76             };
77         }
78         macro_rules! try_decode {
79             ( $tag:expr , $upper_bound:expr ) => {
80                 if self.code_point < $upper_bound {
81                     return decode!($tag);
82                 }
83                 self.shifts += 1;
84             };
85         }
86         try_decode!(0, 0x80);
87         try_decode!(0xC0, 0x800);
88         try_decode!(0xE0, MIN_SURROGATE_CODE);
89         decode!(0xF0)
90     }
91 
size_hint(&self) -> (usize, Option<usize>)92     fn size_hint(&self) -> (usize, Option<usize>) {
93         let (low, high) = self.iter.size_hint();
94         let shifts = self.shifts.into();
95         (
96             low.saturating_add(shifts),
97             high.and_then(|x| x.checked_mul(4))
98                 .and_then(|x| x.checked_add(shifts)),
99         )
100     }
101 }
102 
103 pub(in super::super) struct EncodeWide<I>
104 where
105     I: Iterator<Item = u8>,
106 {
107     iter: CodePoints<I>,
108     surrogate: Option<NonZeroU16>,
109 }
110 
111 impl<I> EncodeWide<I>
112 where
113     I: Iterator<Item = u8>,
114 {
new<S>(string: S) -> Self where S: IntoIterator<IntoIter = I>,115     fn new<S>(string: S) -> Self
116     where
117         S: IntoIterator<IntoIter = I>,
118     {
119         Self {
120             iter: CodePoints::new(string),
121             surrogate: None,
122         }
123     }
124 
is_still_utf8(&self) -> bool125     pub(in super::super) fn is_still_utf8(&self) -> bool {
126         self.iter.is_still_utf8()
127     }
128 }
129 
130 impl<I> FusedIterator for EncodeWide<I> where
131     I: FusedIterator + Iterator<Item = u8>
132 {
133 }
134 
135 impl<I> Iterator for EncodeWide<I>
136 where
137     I: Iterator<Item = u8>,
138 {
139     type Item = Result<u16>;
140 
next(&mut self) -> Option<Self::Item>141     fn next(&mut self) -> Option<Self::Item> {
142         if let Some(surrogate) = self.surrogate.take() {
143             return Some(Ok(surrogate.get()));
144         }
145 
146         self.iter.next().map(|code_point| {
147             code_point.map(|code_point| {
148                 code_point
149                     .checked_sub(MIN_SURROGATE_CODE)
150                     .map(|offset| {
151                         static_assert!(MIN_LOW_SURROGATE != 0);
152 
153                         // SAFETY: The above static assertion guarantees that
154                         // this value will not be zero.
155                         self.surrogate = Some(unsafe {
156                             NonZeroU16::new_unchecked(
157                                 (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE,
158                             )
159                         });
160                         (offset >> 10) as u16 | MIN_HIGH_SURROGATE
161                     })
162                     .unwrap_or(code_point as u16)
163             })
164         })
165     }
166 
size_hint(&self) -> (usize, Option<usize>)167     fn size_hint(&self) -> (usize, Option<usize>) {
168         let (low, high) = self.iter.inner_size_hint();
169         let additional = self.surrogate.is_some().into();
170         (
171             (low.saturating_add(2) / 3).saturating_add(additional),
172             high.and_then(|x| x.checked_add(additional)),
173         )
174     }
175 }
176 
encode_wide( string: &[u8], ) -> EncodeWide<impl '_ + Iterator<Item = u8>>177 pub(in super::super) fn encode_wide(
178     string: &[u8],
179 ) -> EncodeWide<impl '_ + Iterator<Item = u8>> {
180     EncodeWide::new(string.iter().copied())
181 }
182