• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::codepointtrie::CodePointMapRange;
6 
7 /// This is an iterator that coalesces adjacent ranges in an iterator over code
8 /// point ranges
9 pub(crate) struct RangeListIteratorCoalescer<I, T> {
10     iter: I,
11     peek: Option<CodePointMapRange<T>>,
12 }
13 
14 impl<I, T: Eq> RangeListIteratorCoalescer<I, T>
15 where
16     I: Iterator<Item = CodePointMapRange<T>>,
17 {
new(iter: I) -> Self18     pub fn new(iter: I) -> Self {
19         Self { iter, peek: None }
20     }
21 }
22 
23 impl<I, T: Eq> Iterator for RangeListIteratorCoalescer<I, T>
24 where
25     I: Iterator<Item = CodePointMapRange<T>>,
26 {
27     type Item = CodePointMapRange<T>;
28 
next(&mut self) -> Option<Self::Item>29     fn next(&mut self) -> Option<Self::Item> {
30         // Get the initial range we're working with: either a leftover
31         // range from last time, or the next range
32         let mut ret = if let Some(peek) = self.peek.take() {
33             peek
34         } else if let Some(next) = self.iter.next() {
35             next
36         } else {
37             // No ranges, exit early
38             return None;
39         };
40 
41         // Keep pulling ranges
42         #[allow(clippy::while_let_on_iterator)]
43         // can't move the iterator, also we want it to be explicit that we're not draining the iterator
44         while let Some(next) = self.iter.next() {
45             if *next.range.start() == ret.range.end() + 1 && next.value == ret.value {
46                 // Range has no gap, coalesce
47                 ret.range = *ret.range.start()..=*next.range.end();
48             } else {
49                 // Range has a gap, return what we have so far, update
50                 // peek
51                 self.peek = Some(next);
52                 return Some(ret);
53             }
54         }
55 
56         // Ran out of elements, exit
57         Some(ret)
58     }
59 }
60 
61 #[cfg(test)]
62 mod tests {
63     use core::fmt::Debug;
64     use icu::collections::codepointinvlist::CodePointInversionListBuilder;
65     use icu::properties::props::{BinaryProperty, EnumeratedProperty};
66     use icu::properties::{CodePointMapData, CodePointSetData};
67 
test_set<P: BinaryProperty>(name: &str)68     fn test_set<P: BinaryProperty>(name: &str) {
69         let mut builder = CodePointInversionListBuilder::new();
70         let mut builder_complement = CodePointInversionListBuilder::new();
71 
72         for range in CodePointSetData::new::<P>().iter_ranges() {
73             builder.add_range32(range)
74         }
75 
76         for range in CodePointSetData::new::<P>().iter_ranges_complemented() {
77             builder_complement.add_range32(range)
78         }
79 
80         builder.complement();
81         let set1 = builder.build();
82         let set2 = builder_complement.build();
83         assert_eq!(set1, set2, "Set {name} failed to complement correctly");
84     }
85 
test_map<T: EnumeratedProperty + Debug>(value: T, name: &str)86     fn test_map<T: EnumeratedProperty + Debug>(value: T, name: &str) {
87         let mut builder = CodePointInversionListBuilder::new();
88         let mut builder_complement = CodePointInversionListBuilder::new();
89 
90         for range in CodePointMapData::<T>::new().iter_ranges_for_value(value) {
91             builder.add_range32(range)
92         }
93 
94         for range in CodePointMapData::<T>::new().iter_ranges_for_value_complemented(value) {
95             builder_complement.add_range32(range)
96         }
97 
98         builder.complement();
99         let set1 = builder.build();
100         let set2 = builder_complement.build();
101         assert_eq!(
102             set1, set2,
103             "Map {name} failed to complement correctly with value {value:?}"
104         );
105     }
106 
107     #[test]
test_complement_sets()108     fn test_complement_sets() {
109         use icu::properties::props::*;
110         // Stress test the RangeListIteratorComplementer logic by ensuring it works for
111         // a whole bunch of binary properties
112         test_set::<AsciiHexDigit>("ASCII_Hex_Digit");
113         test_set::<Alnum>("Alnum");
114         test_set::<Alphabetic>("Alphabetic");
115         test_set::<BidiControl>("Bidi_Control");
116         test_set::<BidiMirrored>("Bidi_Mirrored");
117         test_set::<Blank>("Blank");
118         test_set::<Cased>("Cased");
119         test_set::<CaseIgnorable>("Case_Ignorable");
120         test_set::<FullCompositionExclusion>("Full_Composition_Exclusion");
121         test_set::<ChangesWhenCasefolded>("Changes_When_Casefolded");
122         test_set::<ChangesWhenCasemapped>("Changes_When_Casemapped");
123         test_set::<ChangesWhenNfkcCasefolded>("Changes_When_NFKC_Casefolded");
124         test_set::<ChangesWhenLowercased>("Changes_When_Lowercased");
125         test_set::<ChangesWhenTitlecased>("Changes_When_Titlecased");
126         test_set::<ChangesWhenUppercased>("Changes_When_Uppercased");
127         test_set::<Dash>("Dash");
128         test_set::<Deprecated>("Deprecated");
129         test_set::<DefaultIgnorableCodePoint>("Default_Ignorable_Code_Point");
130         test_set::<Diacritic>("Diacritic");
131         test_set::<EmojiModifierBase>("Emoji_Modifier_Base");
132         test_set::<EmojiComponent>("Emoji_Component");
133         test_set::<EmojiModifier>("Emoji_Modifier");
134         test_set::<Emoji>("Emoji");
135         test_set::<EmojiPresentation>("Emoji_Presentation");
136         test_set::<Extender>("Extender");
137         test_set::<ExtendedPictographic>("Extended_Pictographic");
138         test_set::<Graph>("Graph");
139         test_set::<GraphemeBase>("Grapheme_Base");
140         test_set::<GraphemeExtend>("Grapheme_Extend");
141         test_set::<GraphemeLink>("Grapheme_Link");
142         test_set::<HexDigit>("Hex_Digit");
143         test_set::<Hyphen>("Hyphen");
144         test_set::<IdContinue>("Id_Continue");
145         test_set::<Ideographic>("Ideographic");
146         test_set::<IdStart>("Id_Start");
147         test_set::<IdsBinaryOperator>("Ids_Binary_Operator");
148         test_set::<IdsTrinaryOperator>("Ids_Trinary_Operator");
149         test_set::<JoinControl>("Join_Control");
150         test_set::<LogicalOrderException>("Logical_Order_Exception");
151         test_set::<Lowercase>("Lowercase");
152         test_set::<Math>("Math");
153         test_set::<NoncharacterCodePoint>("Noncharacter_Code_Point");
154         test_set::<NfcInert>("NFC_Inert");
155         test_set::<NfdInert>("NFD_Inert");
156         test_set::<NfkcInert>("NFKC_Inert");
157         test_set::<NfkdInert>("NFKD_Inert");
158         test_set::<PatternSyntax>("Pattern_Syntax");
159         test_set::<PatternWhiteSpace>("Pattern_White_Space");
160         test_set::<PrependedConcatenationMark>("Prepended_Concatenation_Mark");
161         test_set::<Print>("Print");
162         test_set::<QuotationMark>("Quotation_Mark");
163         test_set::<Radical>("Radical");
164         test_set::<RegionalIndicator>("Regional_Indicator");
165         test_set::<SoftDotted>("Soft_Dotted");
166         test_set::<SegmentStarter>("Segment_Starter");
167         test_set::<CaseSensitive>("Case_Sensitive");
168         test_set::<SentenceTerminal>("Sentence_Terminal");
169         test_set::<TerminalPunctuation>("Terminal_Punctuation");
170         test_set::<UnifiedIdeograph>("Unified_Ideograph");
171         test_set::<Uppercase>("Uppercase");
172         test_set::<VariationSelector>("Variation_Selector");
173         test_set::<WhiteSpace>("White_Space");
174         test_set::<Xdigit>("Xdigit");
175         test_set::<XidContinue>("XID_Continue");
176         test_set::<XidStart>("XID_Start");
177     }
178 
179     #[test]
test_complement_maps()180     fn test_complement_maps() {
181         use icu::properties::props::{GeneralCategory, Script};
182         test_map(GeneralCategory::UppercaseLetter, "gc");
183         test_map(GeneralCategory::OtherPunctuation, "gc");
184         test_map(Script::Devanagari, "script");
185         test_map(Script::Latin, "script");
186         test_map(Script::Common, "script");
187     }
188 }
189