1 use std::char;
2 use std::char::DecodeUtf16;
3 use std::iter::FusedIterator;
4 use std::num::NonZeroU16;
5
6 use crate::util::BYTE_SHIFT;
7 use crate::util::CONT_MASK;
8 use crate::util::CONT_TAG;
9
10 use super::CodePoints;
11 use super::Result;
12
13 const MIN_HIGH_SURROGATE: u16 = 0xD800;
14
15 const MIN_LOW_SURROGATE: u16 = 0xDC00;
16
17 const MIN_SURROGATE_CODE: u32 = (u16::MAX as u32) + 1;
18
19 macro_rules! static_assert {
20 ( $condition:expr ) => {
21 const _: () = assert!($condition, "static assertion failed");
22 };
23 }
24
25 pub(in super::super) struct DecodeWide<I>
26 where
27 I: Iterator<Item = u16>,
28 {
29 iter: DecodeUtf16<I>,
30 code_point: u32,
31 shifts: u8,
32 }
33
34 impl<I> DecodeWide<I>
35 where
36 I: Iterator<Item = u16>,
37 {
new<S>(string: S) -> Self where S: IntoIterator<IntoIter = I, Item = I::Item>,38 pub(in super::super) fn new<S>(string: S) -> Self
39 where
40 S: IntoIterator<IntoIter = I, Item = I::Item>,
41 {
42 Self {
43 iter: char::decode_utf16(string),
44 code_point: 0,
45 shifts: 0,
46 }
47 }
48
49 #[inline(always)]
get_raw_byte(&self) -> u850 fn get_raw_byte(&self) -> u8 {
51 (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8
52 }
53 }
54
55 impl<I> Iterator for DecodeWide<I>
56 where
57 I: Iterator<Item = u16>,
58 {
59 type Item = u8;
60
next(&mut self) -> Option<Self::Item>61 fn next(&mut self) -> Option<Self::Item> {
62 if let Some(shifts) = self.shifts.checked_sub(1) {
63 self.shifts = shifts;
64 return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG);
65 }
66
67 self.code_point = self
68 .iter
69 .next()?
70 .map(Into::into)
71 .unwrap_or_else(|x| x.unpaired_surrogate().into());
72
73 macro_rules! decode {
74 ( $tag:expr ) => {
75 Some(self.get_raw_byte() | $tag)
76 };
77 }
78 macro_rules! try_decode {
79 ( $tag:expr , $upper_bound:expr ) => {
80 if self.code_point < $upper_bound {
81 return decode!($tag);
82 }
83 self.shifts += 1;
84 };
85 }
86 try_decode!(0, 0x80);
87 try_decode!(0xC0, 0x800);
88 try_decode!(0xE0, MIN_SURROGATE_CODE);
89 decode!(0xF0)
90 }
91
size_hint(&self) -> (usize, Option<usize>)92 fn size_hint(&self) -> (usize, Option<usize>) {
93 let (low, high) = self.iter.size_hint();
94 let shifts = self.shifts.into();
95 (
96 low.saturating_add(shifts),
97 high.and_then(|x| x.checked_mul(4))
98 .and_then(|x| x.checked_add(shifts)),
99 )
100 }
101 }
102
103 pub(in super::super) struct EncodeWide<I>
104 where
105 I: Iterator<Item = u8>,
106 {
107 iter: CodePoints<I>,
108 surrogate: Option<NonZeroU16>,
109 }
110
111 impl<I> EncodeWide<I>
112 where
113 I: Iterator<Item = u8>,
114 {
new<S>(string: S) -> Self where S: IntoIterator<IntoIter = I>,115 fn new<S>(string: S) -> Self
116 where
117 S: IntoIterator<IntoIter = I>,
118 {
119 Self {
120 iter: CodePoints::new(string),
121 surrogate: None,
122 }
123 }
124
is_still_utf8(&self) -> bool125 pub(in super::super) fn is_still_utf8(&self) -> bool {
126 self.iter.is_still_utf8()
127 }
128 }
129
130 impl<I> FusedIterator for EncodeWide<I> where
131 I: FusedIterator + Iterator<Item = u8>
132 {
133 }
134
135 impl<I> Iterator for EncodeWide<I>
136 where
137 I: Iterator<Item = u8>,
138 {
139 type Item = Result<u16>;
140
next(&mut self) -> Option<Self::Item>141 fn next(&mut self) -> Option<Self::Item> {
142 if let Some(surrogate) = self.surrogate.take() {
143 return Some(Ok(surrogate.get()));
144 }
145
146 self.iter.next().map(|code_point| {
147 code_point.map(|code_point| {
148 code_point
149 .checked_sub(MIN_SURROGATE_CODE)
150 .map(|offset| {
151 static_assert!(MIN_LOW_SURROGATE != 0);
152
153 // SAFETY: The above static assertion guarantees that
154 // this value will not be zero.
155 self.surrogate = Some(unsafe {
156 NonZeroU16::new_unchecked(
157 (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE,
158 )
159 });
160 (offset >> 10) as u16 | MIN_HIGH_SURROGATE
161 })
162 .unwrap_or(code_point as u16)
163 })
164 })
165 }
166
size_hint(&self) -> (usize, Option<usize>)167 fn size_hint(&self) -> (usize, Option<usize>) {
168 let (low, high) = self.iter.inner_size_hint();
169 let additional = self.surrogate.is_some().into();
170 (
171 (low.saturating_add(2) / 3).saturating_add(additional),
172 high.and_then(|x| x.checked_add(additional)),
173 )
174 }
175 }
176
encode_wide( string: &[u8], ) -> EncodeWide<impl '_ + Iterator<Item = u8>>177 pub(in super::super) fn encode_wide(
178 string: &[u8],
179 ) -> EncodeWide<impl '_ + Iterator<Item = u8>> {
180 EncodeWide::new(string.iter().copied())
181 }
182