use std::char; use std::char::DecodeUtf16; use std::iter::FusedIterator; use std::num::NonZeroU16; use crate::util::BYTE_SHIFT; use crate::util::CONT_MASK; use crate::util::CONT_TAG; use super::CodePoints; use super::Result; const MIN_HIGH_SURROGATE: u16 = 0xD800; const MIN_LOW_SURROGATE: u16 = 0xDC00; const MIN_SURROGATE_CODE: u32 = (u16::MAX as u32) + 1; macro_rules! static_assert { ( $condition:expr ) => { const _: () = assert!($condition, "static assertion failed"); }; } pub(in super::super) struct DecodeWide where I: Iterator, { iter: DecodeUtf16, code_point: u32, shifts: u8, } impl DecodeWide where I: Iterator, { pub(in super::super) fn new(string: S) -> Self where S: IntoIterator, { Self { iter: char::decode_utf16(string), code_point: 0, shifts: 0, } } #[inline(always)] const fn get_raw_byte(&self) -> u8 { (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8 } } impl Iterator for DecodeWide where I: Iterator, { type Item = u8; fn next(&mut self) -> Option { if let Some(shifts) = self.shifts.checked_sub(1) { self.shifts = shifts; return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG); } self.code_point = self .iter .next()? .map(Into::into) .unwrap_or_else(|x| x.unpaired_surrogate().into()); macro_rules! decode { ( $tag:expr ) => { Some(self.get_raw_byte() | $tag) }; } macro_rules! try_decode { ( $tag:expr , $upper_bound:expr ) => { if self.code_point < $upper_bound { return decode!($tag); } self.shifts += 1; }; } try_decode!(0, 0x80); try_decode!(0xC0, 0x800); try_decode!(0xE0, MIN_SURROGATE_CODE); decode!(0xF0) } fn size_hint(&self) -> (usize, Option) { let (low, high) = self.iter.size_hint(); let shifts = self.shifts.into(); ( low.saturating_add(shifts), high.and_then(|x| x.checked_mul(4)) .and_then(|x| x.checked_add(shifts)), ) } } pub(in super::super) struct EncodeWide where I: Iterator, { iter: CodePoints, surrogate: Option, } impl EncodeWide where I: Iterator, { fn new(string: S) -> Self where S: IntoIterator, { Self { iter: CodePoints::new(string), surrogate: None, } } pub(in super::super) const fn is_still_utf8(&self) -> bool { self.iter.is_still_utf8() } } impl FusedIterator for EncodeWide where I: FusedIterator + Iterator { } impl Iterator for EncodeWide where I: Iterator, { type Item = Result; fn next(&mut self) -> Option { if let Some(surrogate) = self.surrogate.take() { return Some(Ok(surrogate.get())); } self.iter.next().map(|code_point| { code_point.map(|code_point| { code_point .checked_sub(MIN_SURROGATE_CODE) .map(|offset| { static_assert!(MIN_LOW_SURROGATE != 0); // SAFETY: The above static assertion guarantees that // this value will not be zero. self.surrogate = Some(unsafe { NonZeroU16::new_unchecked( (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE, ) }); (offset >> 10) as u16 | MIN_HIGH_SURROGATE }) .unwrap_or(code_point as u16) }) }) } fn size_hint(&self) -> (usize, Option) { let (low, high) = self.iter.inner_size_hint(); let additional = self.surrogate.is_some().into(); ( (low.saturating_add(2) / 3).saturating_add(additional), high.and_then(|x| x.checked_add(additional)), ) } } pub(in super::super) fn encode_wide( string: &[u8], ) -> EncodeWide> { EncodeWide::new(string.iter().copied()) }