1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 use crate::codepointtrie::CodePointMapRange; 6 7 /// This is an iterator that coalesces adjacent ranges in an iterator over code 8 /// point ranges 9 pub(crate) struct RangeListIteratorCoalescer<I, T> { 10 iter: I, 11 peek: Option<CodePointMapRange<T>>, 12 } 13 14 impl<I, T: Eq> RangeListIteratorCoalescer<I, T> 15 where 16 I: Iterator<Item = CodePointMapRange<T>>, 17 { new(iter: I) -> Self18 pub fn new(iter: I) -> Self { 19 Self { iter, peek: None } 20 } 21 } 22 23 impl<I, T: Eq> Iterator for RangeListIteratorCoalescer<I, T> 24 where 25 I: Iterator<Item = CodePointMapRange<T>>, 26 { 27 type Item = CodePointMapRange<T>; 28 next(&mut self) -> Option<Self::Item>29 fn next(&mut self) -> Option<Self::Item> { 30 // Get the initial range we're working with: either a leftover 31 // range from last time, or the next range 32 let mut ret = if let Some(peek) = self.peek.take() { 33 peek 34 } else if let Some(next) = self.iter.next() { 35 next 36 } else { 37 // No ranges, exit early 38 return None; 39 }; 40 41 // Keep pulling ranges 42 #[allow(clippy::while_let_on_iterator)] 43 // can't move the iterator, also we want it to be explicit that we're not draining the iterator 44 while let Some(next) = self.iter.next() { 45 if *next.range.start() == ret.range.end() + 1 && next.value == ret.value { 46 // Range has no gap, coalesce 47 ret.range = *ret.range.start()..=*next.range.end(); 48 } else { 49 // Range has a gap, return what we have so far, update 50 // peek 51 self.peek = Some(next); 52 return Some(ret); 53 } 54 } 55 56 // Ran out of elements, exit 57 Some(ret) 58 } 59 } 60 61 #[cfg(test)] 62 mod tests { 63 use core::fmt::Debug; 64 use icu::collections::codepointinvlist::CodePointInversionListBuilder; 65 use icu::properties::props::{BinaryProperty, EnumeratedProperty}; 66 use icu::properties::{CodePointMapData, CodePointSetData}; 67 test_set<P: BinaryProperty>(name: &str)68 fn test_set<P: BinaryProperty>(name: &str) { 69 let mut builder = CodePointInversionListBuilder::new(); 70 let mut builder_complement = CodePointInversionListBuilder::new(); 71 72 for range in CodePointSetData::new::<P>().iter_ranges() { 73 builder.add_range32(range) 74 } 75 76 for range in CodePointSetData::new::<P>().iter_ranges_complemented() { 77 builder_complement.add_range32(range) 78 } 79 80 builder.complement(); 81 let set1 = builder.build(); 82 let set2 = builder_complement.build(); 83 assert_eq!(set1, set2, "Set {name} failed to complement correctly"); 84 } 85 test_map<T: EnumeratedProperty + Debug>(value: T, name: &str)86 fn test_map<T: EnumeratedProperty + Debug>(value: T, name: &str) { 87 let mut builder = CodePointInversionListBuilder::new(); 88 let mut builder_complement = CodePointInversionListBuilder::new(); 89 90 for range in CodePointMapData::<T>::new().iter_ranges_for_value(value) { 91 builder.add_range32(range) 92 } 93 94 for range in CodePointMapData::<T>::new().iter_ranges_for_value_complemented(value) { 95 builder_complement.add_range32(range) 96 } 97 98 builder.complement(); 99 let set1 = builder.build(); 100 let set2 = builder_complement.build(); 101 assert_eq!( 102 set1, set2, 103 "Map {name} failed to complement correctly with value {value:?}" 104 ); 105 } 106 107 #[test] test_complement_sets()108 fn test_complement_sets() { 109 use icu::properties::props::*; 110 // Stress test the RangeListIteratorComplementer logic by ensuring it works for 111 // a whole bunch of binary properties 112 test_set::<AsciiHexDigit>("ASCII_Hex_Digit"); 113 test_set::<Alnum>("Alnum"); 114 test_set::<Alphabetic>("Alphabetic"); 115 test_set::<BidiControl>("Bidi_Control"); 116 test_set::<BidiMirrored>("Bidi_Mirrored"); 117 test_set::<Blank>("Blank"); 118 test_set::<Cased>("Cased"); 119 test_set::<CaseIgnorable>("Case_Ignorable"); 120 test_set::<FullCompositionExclusion>("Full_Composition_Exclusion"); 121 test_set::<ChangesWhenCasefolded>("Changes_When_Casefolded"); 122 test_set::<ChangesWhenCasemapped>("Changes_When_Casemapped"); 123 test_set::<ChangesWhenNfkcCasefolded>("Changes_When_NFKC_Casefolded"); 124 test_set::<ChangesWhenLowercased>("Changes_When_Lowercased"); 125 test_set::<ChangesWhenTitlecased>("Changes_When_Titlecased"); 126 test_set::<ChangesWhenUppercased>("Changes_When_Uppercased"); 127 test_set::<Dash>("Dash"); 128 test_set::<Deprecated>("Deprecated"); 129 test_set::<DefaultIgnorableCodePoint>("Default_Ignorable_Code_Point"); 130 test_set::<Diacritic>("Diacritic"); 131 test_set::<EmojiModifierBase>("Emoji_Modifier_Base"); 132 test_set::<EmojiComponent>("Emoji_Component"); 133 test_set::<EmojiModifier>("Emoji_Modifier"); 134 test_set::<Emoji>("Emoji"); 135 test_set::<EmojiPresentation>("Emoji_Presentation"); 136 test_set::<Extender>("Extender"); 137 test_set::<ExtendedPictographic>("Extended_Pictographic"); 138 test_set::<Graph>("Graph"); 139 test_set::<GraphemeBase>("Grapheme_Base"); 140 test_set::<GraphemeExtend>("Grapheme_Extend"); 141 test_set::<GraphemeLink>("Grapheme_Link"); 142 test_set::<HexDigit>("Hex_Digit"); 143 test_set::<Hyphen>("Hyphen"); 144 test_set::<IdContinue>("Id_Continue"); 145 test_set::<Ideographic>("Ideographic"); 146 test_set::<IdStart>("Id_Start"); 147 test_set::<IdsBinaryOperator>("Ids_Binary_Operator"); 148 test_set::<IdsTrinaryOperator>("Ids_Trinary_Operator"); 149 test_set::<JoinControl>("Join_Control"); 150 test_set::<LogicalOrderException>("Logical_Order_Exception"); 151 test_set::<Lowercase>("Lowercase"); 152 test_set::<Math>("Math"); 153 test_set::<NoncharacterCodePoint>("Noncharacter_Code_Point"); 154 test_set::<NfcInert>("NFC_Inert"); 155 test_set::<NfdInert>("NFD_Inert"); 156 test_set::<NfkcInert>("NFKC_Inert"); 157 test_set::<NfkdInert>("NFKD_Inert"); 158 test_set::<PatternSyntax>("Pattern_Syntax"); 159 test_set::<PatternWhiteSpace>("Pattern_White_Space"); 160 test_set::<PrependedConcatenationMark>("Prepended_Concatenation_Mark"); 161 test_set::<Print>("Print"); 162 test_set::<QuotationMark>("Quotation_Mark"); 163 test_set::<Radical>("Radical"); 164 test_set::<RegionalIndicator>("Regional_Indicator"); 165 test_set::<SoftDotted>("Soft_Dotted"); 166 test_set::<SegmentStarter>("Segment_Starter"); 167 test_set::<CaseSensitive>("Case_Sensitive"); 168 test_set::<SentenceTerminal>("Sentence_Terminal"); 169 test_set::<TerminalPunctuation>("Terminal_Punctuation"); 170 test_set::<UnifiedIdeograph>("Unified_Ideograph"); 171 test_set::<Uppercase>("Uppercase"); 172 test_set::<VariationSelector>("Variation_Selector"); 173 test_set::<WhiteSpace>("White_Space"); 174 test_set::<Xdigit>("Xdigit"); 175 test_set::<XidContinue>("XID_Continue"); 176 test_set::<XidStart>("XID_Start"); 177 } 178 179 #[test] test_complement_maps()180 fn test_complement_maps() { 181 use icu::properties::props::{GeneralCategory, Script}; 182 test_map(GeneralCategory::UppercaseLetter, "gc"); 183 test_map(GeneralCategory::OtherPunctuation, "gc"); 184 test_map(Script::Devanagari, "script"); 185 test_map(Script::Latin, "script"); 186 test_map(Script::Common, "script"); 187 } 188 } 189