// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::codepointtrie::CodePointMapRange; /// This is an iterator that coalesces adjacent ranges in an iterator over code /// point ranges pub(crate) struct RangeListIteratorCoalescer { iter: I, peek: Option>, } impl RangeListIteratorCoalescer where I: Iterator>, { pub fn new(iter: I) -> Self { Self { iter, peek: None } } } impl Iterator for RangeListIteratorCoalescer where I: Iterator>, { type Item = CodePointMapRange; fn next(&mut self) -> Option { // Get the initial range we're working with: either a leftover // range from last time, or the next range let mut ret = if let Some(peek) = self.peek.take() { peek } else if let Some(next) = self.iter.next() { next } else { // No ranges, exit early return None; }; // Keep pulling ranges #[allow(clippy::while_let_on_iterator)] // can't move the iterator, also we want it to be explicit that we're not draining the iterator while let Some(next) = self.iter.next() { if *next.range.start() == ret.range.end() + 1 && next.value == ret.value { // Range has no gap, coalesce ret.range = *ret.range.start()..=*next.range.end(); } else { // Range has a gap, return what we have so far, update // peek self.peek = Some(next); return Some(ret); } } // Ran out of elements, exit Some(ret) } } #[cfg(test)] mod tests { use core::fmt::Debug; use icu::collections::codepointinvlist::CodePointInversionListBuilder; use icu::properties::props::{BinaryProperty, EnumeratedProperty}; use icu::properties::{CodePointMapData, CodePointSetData}; fn test_set(name: &str) { let mut builder = CodePointInversionListBuilder::new(); let mut builder_complement = CodePointInversionListBuilder::new(); for range in CodePointSetData::new::

().iter_ranges() { builder.add_range32(range) } for range in CodePointSetData::new::

().iter_ranges_complemented() { builder_complement.add_range32(range) } builder.complement(); let set1 = builder.build(); let set2 = builder_complement.build(); assert_eq!(set1, set2, "Set {name} failed to complement correctly"); } fn test_map(value: T, name: &str) { let mut builder = CodePointInversionListBuilder::new(); let mut builder_complement = CodePointInversionListBuilder::new(); for range in CodePointMapData::::new().iter_ranges_for_value(value) { builder.add_range32(range) } for range in CodePointMapData::::new().iter_ranges_for_value_complemented(value) { builder_complement.add_range32(range) } builder.complement(); let set1 = builder.build(); let set2 = builder_complement.build(); assert_eq!( set1, set2, "Map {name} failed to complement correctly with value {value:?}" ); } #[test] fn test_complement_sets() { use icu::properties::props::*; // Stress test the RangeListIteratorComplementer logic by ensuring it works for // a whole bunch of binary properties test_set::("ASCII_Hex_Digit"); test_set::("Alnum"); test_set::("Alphabetic"); test_set::("Bidi_Control"); test_set::("Bidi_Mirrored"); test_set::("Blank"); test_set::("Cased"); test_set::("Case_Ignorable"); test_set::("Full_Composition_Exclusion"); test_set::("Changes_When_Casefolded"); test_set::("Changes_When_Casemapped"); test_set::("Changes_When_NFKC_Casefolded"); test_set::("Changes_When_Lowercased"); test_set::("Changes_When_Titlecased"); test_set::("Changes_When_Uppercased"); test_set::("Dash"); test_set::("Deprecated"); test_set::("Default_Ignorable_Code_Point"); test_set::("Diacritic"); test_set::("Emoji_Modifier_Base"); test_set::("Emoji_Component"); test_set::("Emoji_Modifier"); test_set::("Emoji"); test_set::("Emoji_Presentation"); test_set::("Extender"); test_set::("Extended_Pictographic"); test_set::("Graph"); test_set::("Grapheme_Base"); test_set::("Grapheme_Extend"); test_set::("Grapheme_Link"); test_set::("Hex_Digit"); test_set::("Hyphen"); test_set::("Id_Continue"); test_set::("Ideographic"); test_set::("Id_Start"); test_set::("Ids_Binary_Operator"); test_set::("Ids_Trinary_Operator"); test_set::("Join_Control"); test_set::("Logical_Order_Exception"); test_set::("Lowercase"); test_set::("Math"); test_set::("Noncharacter_Code_Point"); test_set::("NFC_Inert"); test_set::("NFD_Inert"); test_set::("NFKC_Inert"); test_set::("NFKD_Inert"); test_set::("Pattern_Syntax"); test_set::("Pattern_White_Space"); test_set::("Prepended_Concatenation_Mark"); test_set::("Print"); test_set::("Quotation_Mark"); test_set::("Radical"); test_set::("Regional_Indicator"); test_set::("Soft_Dotted"); test_set::("Segment_Starter"); test_set::("Case_Sensitive"); test_set::("Sentence_Terminal"); test_set::("Terminal_Punctuation"); test_set::("Unified_Ideograph"); test_set::("Uppercase"); test_set::("Variation_Selector"); test_set::("White_Space"); test_set::("Xdigit"); test_set::("XID_Continue"); test_set::("XID_Start"); } #[test] fn test_complement_maps() { use icu::properties::props::{GeneralCategory, Script}; test_map(GeneralCategory::UppercaseLetter, "gc"); test_map(GeneralCategory::OtherPunctuation, "gc"); test_map(Script::Devanagari, "script"); test_map(Script::Latin, "script"); test_map(Script::Common, "script"); } }