1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 use alloc::vec; 6 use alloc::vec::Vec; 7 use core::{char, cmp::Ordering, ops::RangeBounds}; 8 use potential_utf::PotentialCodePoint; 9 10 use crate::codepointinvlist::{utils::deconstruct_range, CodePointInversionList}; 11 use zerovec::{ule::AsULE, ZeroVec}; 12 13 /// A builder for [`CodePointInversionList`]. 14 /// 15 /// Provides exposure to builder functions and conversion to [`CodePointInversionList`] 16 #[derive(Default)] 17 pub struct CodePointInversionListBuilder { 18 // A sorted list of even length, with values <= char::MAX + 1 19 intervals: Vec<u32>, 20 } 21 22 impl CodePointInversionListBuilder { 23 /// Returns empty [`CodePointInversionListBuilder`] new() -> Self24 pub const fn new() -> Self { 25 Self { intervals: vec![] } 26 } 27 28 /// Returns a [`CodePointInversionList`] and consumes the [`CodePointInversionListBuilder`] build(self) -> CodePointInversionList<'static>29 pub fn build(self) -> CodePointInversionList<'static> { 30 let inv_list: ZeroVec<PotentialCodePoint> = self 31 .intervals 32 .into_iter() 33 .map(PotentialCodePoint::from_u24) 34 .collect(); 35 #[allow(clippy::unwrap_used)] // by invariant 36 CodePointInversionList::try_from_inversion_list(inv_list).unwrap() 37 } 38 39 /// Abstraction for adding/removing a range from start..end 40 /// 41 /// If add is true add, else remove add_remove_middle(&mut self, start: u32, end: u32, add: bool)42 fn add_remove_middle(&mut self, start: u32, end: u32, add: bool) { 43 if start >= end || end > char::MAX as u32 + 1 { 44 return; 45 } 46 let start_res = self.intervals.binary_search(&start); 47 let end_res = self.intervals.binary_search(&end); 48 let mut start_ind = start_res.unwrap_or_else(|x| x); 49 let mut end_ind = end_res.unwrap_or_else(|x| x); 50 let start_pos_check = (start_ind % 2 == 0) == add; 51 let end_pos_check = (end_ind % 2 == 0) == add; 52 let start_eq_end = start_ind == end_ind; 53 54 #[allow(clippy::indexing_slicing)] // all indices are binary search results 55 if start_eq_end && start_pos_check && end_res.is_err() { 56 self.intervals.splice(start_ind..end_ind, [start, end]); 57 } else { 58 if start_pos_check { 59 self.intervals[start_ind] = start; 60 start_ind += 1; 61 } 62 if end_pos_check { 63 if end_res.is_ok() { 64 end_ind += 1; 65 } else { 66 end_ind -= 1; 67 self.intervals[end_ind] = end; 68 } 69 } 70 if start_ind < end_ind { 71 self.intervals.drain(start_ind..end_ind); 72 } 73 } 74 } 75 76 /// Add the range to the [`CodePointInversionListBuilder`] 77 /// 78 /// Accomplishes this through binary search for the start and end indices and merges intervals 79 /// in between with inplace memory. Performs `O(1)` operation if adding to end of list, and `O(N)` otherwise, 80 /// where `N` is the number of endpoints. add(&mut self, start: u32, end: u32)81 fn add(&mut self, start: u32, end: u32) { 82 if start >= end { 83 return; 84 } 85 if self.intervals.is_empty() { 86 self.intervals.extend_from_slice(&[start, end]); 87 return; 88 } 89 self.add_remove_middle(start, end, true); 90 } 91 92 /// Add the character to the [`CodePointInversionListBuilder`] 93 /// 94 /// # Examples 95 /// 96 /// ``` 97 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 98 /// let mut builder = CodePointInversionListBuilder::new(); 99 /// builder.add_char('a'); 100 /// let check = builder.build(); 101 /// assert_eq!(check.iter_chars().next(), Some('a')); 102 /// ``` add_char(&mut self, c: char)103 pub fn add_char(&mut self, c: char) { 104 let to_add = c as u32; 105 self.add(to_add, to_add + 1); 106 } 107 108 /// Add the code point value to the [`CodePointInversionListBuilder`] 109 /// 110 /// Note: Even though [`u32`] and [`prim@char`] in Rust are non-negative 4-byte 111 /// values, there is an important difference. A [`u32`] can take values up to 112 /// a very large integer value, while a [`prim@char`] in Rust is defined to be in 113 /// the range from 0 to the maximum valid Unicode Scalar Value. 114 /// 115 /// # Examples 116 /// 117 /// ``` 118 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 119 /// let mut builder = CodePointInversionListBuilder::new(); 120 /// builder.add32(0x41); 121 /// let check = builder.build(); 122 /// assert!(check.contains32(0x41)); 123 /// ``` add32(&mut self, c: u32)124 pub fn add32(&mut self, c: u32) { 125 if c <= char::MAX as u32 { 126 // we already know 0 <= c because c: u32 127 self.add(c, c + 1); 128 } 129 } 130 131 /// Add the range of characters to the [`CodePointInversionListBuilder`] 132 /// 133 /// # Examples 134 /// 135 /// ``` 136 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 137 /// let mut builder = CodePointInversionListBuilder::new(); 138 /// builder.add_range('A'..='Z'); 139 /// let check = builder.build(); 140 /// assert_eq!(check.iter_chars().next(), Some('A')); 141 /// ``` add_range(&mut self, range: impl RangeBounds<char>)142 pub fn add_range(&mut self, range: impl RangeBounds<char>) { 143 let (start, end) = deconstruct_range(range); 144 self.add(start, end); 145 } 146 147 /// Add the range of characters, represented as u32, to the [`CodePointInversionListBuilder`] 148 /// 149 /// # Examples 150 /// 151 /// ``` 152 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 153 /// let mut builder = CodePointInversionListBuilder::new(); 154 /// builder.add_range32(0xd800..=0xdfff); 155 /// let check = builder.build(); 156 /// assert!(check.contains32(0xd900)); 157 /// ``` add_range32(&mut self, range: impl RangeBounds<u32>)158 pub fn add_range32(&mut self, range: impl RangeBounds<u32>) { 159 let (start, end) = deconstruct_range(range); 160 // Sets that include char::MAX need to allow an end value of MAX + 1 161 if start <= end && end <= char::MAX as u32 + 1 { 162 self.add(start, end); 163 } 164 } 165 166 /// Add the [`CodePointInversionList`] reference to the [`CodePointInversionListBuilder`] 167 /// 168 /// # Examples 169 /// 170 /// ``` 171 /// use icu::collections::codepointinvlist::{ 172 /// CodePointInversionList, CodePointInversionListBuilder, 173 /// }; 174 /// let mut builder = CodePointInversionListBuilder::new(); 175 /// let set = CodePointInversionList::try_from_u32_inversion_list_slice(&[ 176 /// 0x41, 0x4C, 177 /// ]) 178 /// .unwrap(); 179 /// builder.add_set(&set); 180 /// let check = builder.build(); 181 /// assert_eq!(check.iter_chars().next(), Some('A')); 182 /// ``` 183 #[allow(unused_assignments)] add_set(&mut self, set: &CodePointInversionList)184 pub fn add_set(&mut self, set: &CodePointInversionList) { 185 #[allow(clippy::indexing_slicing)] // chunks 186 set.as_inversion_list() 187 .as_ule_slice() 188 .chunks(2) 189 .for_each(|pair| { 190 self.add( 191 u32::from(PotentialCodePoint::from_unaligned(pair[0])), 192 u32::from(PotentialCodePoint::from_unaligned(pair[1])), 193 ) 194 }); 195 } 196 197 /// Removes the range from the [`CodePointInversionListBuilder`] 198 /// 199 /// Performs binary search to find start and end affected intervals, then removes in an `O(N)` fashion 200 /// where `N` is the number of endpoints, with in-place memory. remove(&mut self, start: u32, end: u32)201 fn remove(&mut self, start: u32, end: u32) { 202 if start >= end || self.intervals.is_empty() { 203 return; 204 } 205 if let Some(&last) = self.intervals.last() { 206 #[allow(clippy::indexing_slicing)] 207 // by invariant, if we have a last we have a (different) first 208 if start <= self.intervals[0] && end >= last { 209 self.intervals.clear(); 210 } else { 211 self.add_remove_middle(start, end, false); 212 } 213 } 214 } 215 216 /// Remove the character from the [`CodePointInversionListBuilder`] 217 /// 218 /// # Examples 219 /// 220 /// ``` 221 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 222 /// let mut builder = CodePointInversionListBuilder::new(); 223 /// builder.add_range('A'..='Z'); 224 /// builder.remove_char('A'); 225 /// let check = builder.build(); 226 /// assert_eq!(check.iter_chars().next(), Some('B')); remove_char(&mut self, c: char)227 pub fn remove_char(&mut self, c: char) { 228 self.remove32(c as u32) 229 } 230 231 /// See [`Self::remove_char`] remove32(&mut self, c: u32)232 pub fn remove32(&mut self, c: u32) { 233 self.remove(c, c + 1); 234 } 235 236 /// Remove the range of characters from the [`CodePointInversionListBuilder`] 237 /// 238 /// # Examples 239 /// 240 /// ``` 241 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 242 /// let mut builder = CodePointInversionListBuilder::new(); 243 /// builder.add_range('A'..='Z'); 244 /// builder.remove_range('A'..='C'); 245 /// let check = builder.build(); 246 /// assert_eq!(check.iter_chars().next(), Some('D')); remove_range(&mut self, range: impl RangeBounds<char>)247 pub fn remove_range(&mut self, range: impl RangeBounds<char>) { 248 let (start, end) = deconstruct_range(range); 249 self.remove(start, end); 250 } 251 252 /// See [`Self::remove_range`] remove_range32(&mut self, range: impl RangeBounds<u32>)253 pub fn remove_range32(&mut self, range: impl RangeBounds<u32>) { 254 let (start, end) = deconstruct_range(range); 255 self.remove(start, end); 256 } 257 258 /// Remove the [`CodePointInversionList`] from the [`CodePointInversionListBuilder`] 259 /// 260 /// # Examples 261 /// 262 /// ``` 263 /// use icu::collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder}; 264 /// let mut builder = CodePointInversionListBuilder::new(); 265 /// let set = CodePointInversionList::try_from_u32_inversion_list_slice(&[0x41, 0x46]).unwrap(); 266 /// builder.add_range('A'..='Z'); 267 /// builder.remove_set(&set); // removes 'A'..='E' 268 /// let check = builder.build(); 269 /// assert_eq!(check.iter_chars().next(), Some('F')); 270 #[allow(clippy::indexing_slicing)] // chunks remove_set(&mut self, set: &CodePointInversionList)271 pub fn remove_set(&mut self, set: &CodePointInversionList) { 272 set.as_inversion_list() 273 .as_ule_slice() 274 .chunks(2) 275 .for_each(|pair| { 276 self.remove( 277 u32::from(PotentialCodePoint::from_unaligned(pair[0])), 278 u32::from(PotentialCodePoint::from_unaligned(pair[1])), 279 ) 280 }); 281 } 282 283 /// Retain the specified character in the [`CodePointInversionListBuilder`] if it exists 284 /// 285 /// # Examples 286 /// 287 /// ``` 288 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 289 /// let mut builder = CodePointInversionListBuilder::new(); 290 /// builder.add_range('A'..='Z'); 291 /// builder.retain_char('A'); 292 /// let set = builder.build(); 293 /// let mut check = set.iter_chars(); 294 /// assert_eq!(check.next(), Some('A')); 295 /// assert_eq!(check.next(), None); 296 /// ``` retain_char(&mut self, c: char)297 pub fn retain_char(&mut self, c: char) { 298 self.retain32(c as u32) 299 } 300 301 /// See [`Self::retain_char`] retain32(&mut self, c: u32)302 pub fn retain32(&mut self, c: u32) { 303 self.remove(0, c); 304 self.remove(c + 1, (char::MAX as u32) + 1); 305 } 306 307 /// Retain the range of characters located within the [`CodePointInversionListBuilder`] 308 /// 309 /// # Examples 310 /// 311 /// ``` 312 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 313 /// let mut builder = CodePointInversionListBuilder::new(); 314 /// builder.add_range('A'..='Z'); 315 /// builder.retain_range('A'..='B'); 316 /// let set = builder.build(); 317 /// let mut check = set.iter_chars(); 318 /// assert_eq!(check.next(), Some('A')); 319 /// assert_eq!(check.next(), Some('B')); 320 /// assert_eq!(check.next(), None); 321 /// ``` retain_range(&mut self, range: impl RangeBounds<char>)322 pub fn retain_range(&mut self, range: impl RangeBounds<char>) { 323 let (start, end) = deconstruct_range(range); 324 self.remove(0, start); 325 self.remove(end, (char::MAX as u32) + 1); 326 } 327 328 /// See [`Self::retain_range`] retain_range32(&mut self, range: impl RangeBounds<u32>)329 pub fn retain_range32(&mut self, range: impl RangeBounds<u32>) { 330 let (start, end) = deconstruct_range(range); 331 self.remove(0, start); 332 self.remove(end, (char::MAX as u32) + 1); 333 } 334 335 /// Retain the elements in the specified set within the [`CodePointInversionListBuilder`] 336 /// 337 /// # Examples 338 /// 339 /// ``` 340 /// use icu::collections::codepointinvlist::{ 341 /// CodePointInversionList, CodePointInversionListBuilder, 342 /// }; 343 /// let mut builder = CodePointInversionListBuilder::new(); 344 /// let set = 345 /// CodePointInversionList::try_from_u32_inversion_list_slice(&[65, 70]) 346 /// .unwrap(); 347 /// builder.add_range('A'..='Z'); 348 /// builder.retain_set(&set); // retains 'A'..='E' 349 /// let check = builder.build(); 350 /// assert!(check.contains('A')); 351 /// assert!(!check.contains('G')); 352 /// ``` 353 #[allow(clippy::indexing_slicing)] // chunks retain_set(&mut self, set: &CodePointInversionList)354 pub fn retain_set(&mut self, set: &CodePointInversionList) { 355 let mut prev = 0; 356 for pair in set.as_inversion_list().as_ule_slice().chunks(2) { 357 let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0])); 358 let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1])); 359 self.remove(prev, range_start); 360 prev = range_limit; 361 } 362 self.remove(prev, (char::MAX as u32) + 1); 363 } 364 365 /// Computes the complement of the argument, adding any elements that do not yet exist in the builder, 366 /// and removing any elements that already exist in the builder. See public functions for examples. 367 /// 368 /// Performs in `O(B + S)`, where `B` is the number of endpoints in the Builder, and `S` is the number 369 /// of endpoints in the argument. complement_list(&mut self, set_iter: impl IntoIterator<Item = u32>)370 fn complement_list(&mut self, set_iter: impl IntoIterator<Item = u32>) { 371 let mut res: Vec<u32> = vec![]; // not the biggest fan of having to allocate new memory 372 let mut ai = self.intervals.iter(); 373 let mut bi = set_iter.into_iter(); 374 let mut a = ai.next(); 375 let mut b = bi.next(); 376 while let (Some(c), Some(d)) = (a, b) { 377 match c.cmp(&d) { 378 Ordering::Less => { 379 res.push(*c); 380 a = ai.next(); 381 } 382 Ordering::Greater => { 383 res.push(d); 384 b = bi.next(); 385 } 386 Ordering::Equal => { 387 a = ai.next(); 388 b = bi.next(); 389 } 390 } 391 } 392 if let Some(c) = a { 393 res.push(*c) 394 } 395 if let Some(d) = b { 396 res.push(d) 397 } 398 res.extend(ai); 399 res.extend(bi); 400 self.intervals = res; 401 } 402 403 /// Computes the complement of the builder, inverting the builder (any elements in the builder are removed, 404 /// while any elements not in the builder are added) 405 /// 406 /// # Examples 407 /// 408 /// ``` 409 /// use icu::collections::codepointinvlist::{ 410 /// CodePointInversionList, CodePointInversionListBuilder, 411 /// }; 412 /// let mut builder = CodePointInversionListBuilder::new(); 413 /// let set = CodePointInversionList::try_from_u32_inversion_list_slice(&[ 414 /// 0x0, 415 /// 0x41, 416 /// 0x46, 417 /// (std::char::MAX as u32) + 1, 418 /// ]) 419 /// .unwrap(); 420 /// builder.add_set(&set); 421 /// builder.complement(); 422 /// let check = builder.build(); 423 /// assert_eq!(check.iter_chars().next(), Some('A')); 424 /// ``` complement(&mut self)425 pub fn complement(&mut self) { 426 if !self.intervals.is_empty() { 427 #[allow(clippy::indexing_slicing)] // by invariant 428 if self.intervals[0] == 0 { 429 self.intervals.drain(0..1); 430 } else { 431 self.intervals.insert(0, 0); 432 } 433 if self.intervals.last() == Some(&(char::MAX as u32 + 1)) { 434 self.intervals.pop(); 435 } else { 436 self.intervals.push(char::MAX as u32 + 1); 437 } 438 } else { 439 self.intervals 440 .extend_from_slice(&[0, (char::MAX as u32 + 1)]); 441 } 442 } 443 444 /// Complements the character in the builder, adding it if not in the builder, and removing it otherwise. 445 /// 446 /// # Examples 447 /// 448 /// ``` 449 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 450 /// let mut builder = CodePointInversionListBuilder::new(); 451 /// builder.add_range('A'..='D'); 452 /// builder.complement_char('A'); 453 /// builder.complement_char('E'); 454 /// let check = builder.build(); 455 /// assert!(check.contains('E')); 456 /// assert!(!check.contains('A')); 457 /// ``` complement_char(&mut self, c: char)458 pub fn complement_char(&mut self, c: char) { 459 self.complement32(c as u32); 460 } 461 462 /// See [`Self::complement_char`] complement32(&mut self, c: u32)463 pub fn complement32(&mut self, c: u32) { 464 self.complement_list([c, c + 1]); 465 } 466 467 /// Complements the range in the builder, adding any elements in the range if not in the builder, and 468 /// removing them otherwise. 469 /// 470 /// # Examples 471 /// 472 /// ``` 473 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 474 /// let mut builder = CodePointInversionListBuilder::new(); 475 /// builder.add_range('A'..='D'); 476 /// builder.complement_range('C'..='F'); 477 /// let check = builder.build(); 478 /// assert!(check.contains('F')); 479 /// assert!(!check.contains('C')); 480 /// ``` complement_range(&mut self, range: impl RangeBounds<char>)481 pub fn complement_range(&mut self, range: impl RangeBounds<char>) { 482 let (start, end) = deconstruct_range(range); 483 self.complement_list([start, end]); 484 } 485 486 /// See [`Self::complement_range`] complement_range32(&mut self, range: impl RangeBounds<u32>)487 pub fn complement_range32(&mut self, range: impl RangeBounds<u32>) { 488 let (start, end) = deconstruct_range(range); 489 self.complement_list([start, end]); 490 } 491 492 /// Complements the set in the builder, adding any elements in the set if not in the builder, and 493 /// removing them otherwise. 494 /// 495 /// # Examples 496 /// 497 /// ``` 498 /// use icu::collections::codepointinvlist::{ 499 /// CodePointInversionList, CodePointInversionListBuilder, 500 /// }; 501 /// let mut builder = CodePointInversionListBuilder::new(); 502 /// let set = CodePointInversionList::try_from_u32_inversion_list_slice(&[ 503 /// 0x41, 0x46, 0x4B, 0x5A, 504 /// ]) 505 /// .unwrap(); 506 /// builder.add_range('C'..='N'); // 67 - 78 507 /// builder.complement_set(&set); 508 /// let check = builder.build(); 509 /// assert!(check.contains('Q')); // 81 510 /// assert!(!check.contains('N')); // 78 511 /// ``` complement_set(&mut self, set: &CodePointInversionList)512 pub fn complement_set(&mut self, set: &CodePointInversionList) { 513 let inv_list_iter_owned = set.as_inversion_list().iter().map(u32::from); 514 self.complement_list(inv_list_iter_owned); 515 } 516 517 /// Returns whether the build is empty. 518 /// 519 /// # Examples 520 /// 521 /// ``` 522 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 523 /// let mut builder = CodePointInversionListBuilder::new(); 524 /// let check = builder.build(); 525 /// assert!(check.is_empty()); 526 /// ``` is_empty(&self) -> bool527 pub fn is_empty(&self) -> bool { 528 self.intervals.is_empty() 529 } 530 } 531 532 #[cfg(test)] 533 mod tests { 534 use super::{CodePointInversionList, CodePointInversionListBuilder}; 535 use core::char; 536 generate_tester(ex: &[u32]) -> CodePointInversionListBuilder537 fn generate_tester(ex: &[u32]) -> CodePointInversionListBuilder { 538 let check = CodePointInversionList::try_from_u32_inversion_list_slice(ex).unwrap(); 539 let mut builder = CodePointInversionListBuilder::new(); 540 builder.add_set(&check); 541 builder 542 } 543 544 #[test] test_new()545 fn test_new() { 546 let ex = CodePointInversionListBuilder::new(); 547 assert!(ex.intervals.is_empty()); 548 } 549 550 #[test] test_build()551 fn test_build() { 552 let mut builder = CodePointInversionListBuilder::new(); 553 builder.add(0x41, 0x42); 554 let check: CodePointInversionList = builder.build(); 555 assert_eq!(check.iter_chars().next(), Some('A')); 556 } 557 558 #[test] test_empty_build()559 fn test_empty_build() { 560 let builder = CodePointInversionListBuilder::new(); 561 let check: CodePointInversionList = builder.build(); 562 assert!(check.is_empty()); 563 } 564 565 #[test] test_add_to_empty()566 fn test_add_to_empty() { 567 let mut builder = CodePointInversionListBuilder::new(); 568 builder.add(0x0, 0xA); 569 assert_eq!(builder.intervals, [0x0, 0xA]); 570 } 571 572 #[test] test_add_invalid()573 fn test_add_invalid() { 574 let mut builder = CodePointInversionListBuilder::new(); 575 builder.add(0x0, 0x0); 576 builder.add(0x5, 0x0); 577 assert!(builder.intervals.is_empty()); 578 } 579 580 #[test] test_add_to_start()581 fn test_add_to_start() { 582 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 583 builder.add(0x0, 0x5); 584 let expected = [0x0, 0x5, 0xA, 0x14, 0x28, 0x32]; 585 assert_eq!(builder.intervals, expected); 586 } 587 588 #[test] test_add_to_start_overlap()589 fn test_add_to_start_overlap() { 590 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 591 builder.add(0x0, 0xE); 592 let expected = [0x0, 0x14, 0x28, 0x32]; 593 assert_eq!(builder.intervals, expected); 594 } 595 596 #[test] test_add_to_end()597 fn test_add_to_end() { 598 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 599 builder.add(0x3C, 0x46); 600 let expected = [0xA, 0x14, 0x28, 0x32, 60, 70]; 601 assert_eq!(builder.intervals, expected); 602 } 603 604 #[test] test_add_to_end_overlap()605 fn test_add_to_end_overlap() { 606 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 607 builder.add(0x2B, 0x46); 608 let expected = [0xA, 0x14, 0x28, 0x46]; 609 assert_eq!(builder.intervals, expected); 610 } 611 612 #[test] test_add_to_middle_no_overlap()613 fn test_add_to_middle_no_overlap() { 614 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 615 builder.add(0x19, 0x1B); 616 let expected = [0xA, 0x14, 0x19, 0x1B, 0x28, 0x32]; 617 assert_eq!(builder.intervals, expected); 618 } 619 620 #[test] test_add_to_middle_inside()621 fn test_add_to_middle_inside() { 622 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 623 builder.add(0xA, 0x14); 624 let expected = [0xA, 0x14, 0x28, 0x32]; 625 assert_eq!(builder.intervals, expected); 626 } 627 628 #[test] test_add_to_middle_left_overlap()629 fn test_add_to_middle_left_overlap() { 630 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 631 builder.add(0xF, 0x19); 632 let expected = [0xA, 0x19, 0x28, 0x32]; 633 assert_eq!(builder.intervals, expected); 634 } 635 636 #[test] test_add_to_middle_right_overlap()637 fn test_add_to_middle_right_overlap() { 638 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 639 builder.add(0x1E, 0x28); 640 let expected = [0xA, 0x14, 0x1E, 0x32]; 641 assert_eq!(builder.intervals, expected); 642 } 643 644 #[test] test_add_to_full_encompass()645 fn test_add_to_full_encompass() { 646 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 647 builder.add(0x0, 0x3C); 648 let expected = [0x0, 0x3C]; 649 assert_eq!(builder.intervals, expected); 650 } 651 652 #[test] test_add_to_partial_encompass()653 fn test_add_to_partial_encompass() { 654 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 655 builder.add(0x0, 0x23); 656 let expected = [0x0, 0x23, 0x28, 0x32]; 657 assert_eq!(builder.intervals, expected); 658 } 659 660 #[test] test_add_aligned_front()661 fn test_add_aligned_front() { 662 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 663 builder.add(5, 10); 664 let expected = [5, 0x14, 0x28, 0x32]; 665 assert_eq!(builder.intervals, expected); 666 } 667 668 #[test] test_add_aligned_back()669 fn test_add_aligned_back() { 670 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 671 builder.add(0x32, 0x37); 672 let expected = [0xA, 0x14, 0x28, 0x37]; 673 assert_eq!(builder.intervals, expected); 674 } 675 676 #[test] test_add_aligned_start_middle()677 fn test_add_aligned_start_middle() { 678 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 679 builder.add(0x14, 0x19); 680 let expected = [0xA, 0x19, 0x28, 0x32]; 681 assert_eq!(builder.intervals, expected); 682 } 683 684 #[test] test_add_aligned_end_middle()685 fn test_add_aligned_end_middle() { 686 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 687 builder.add(0x23, 0x28); 688 let expected = [0xA, 0x14, 0x23, 0x32]; 689 assert_eq!(builder.intervals, expected); 690 } 691 692 #[test] test_add_aligned_in_between_end()693 fn test_add_aligned_in_between_end() { 694 let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); 695 builder.add(0xF, 0x1E); 696 let expected = [0xA, 0x28, 0x32, 0x3C]; 697 assert_eq!(builder.intervals, expected); 698 } 699 700 #[test] test_add_aligned_in_between_start()701 fn test_add_aligned_in_between_start() { 702 let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); 703 builder.add(20, 35); 704 let expected = [0xA, 0x28, 0x32, 0x3C]; 705 assert_eq!(builder.intervals, expected); 706 } 707 708 #[test] test_add_adjacent_ranges()709 fn test_add_adjacent_ranges() { 710 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 711 builder.add(0x13, 0x14); 712 builder.add(0x14, 0x15); 713 builder.add(0x15, 0x16); 714 let expected = [0xA, 0x16, 0x28, 0x32]; 715 assert_eq!(builder.intervals, expected); 716 } 717 718 #[test] test_add_codepointinversionlist()719 fn test_add_codepointinversionlist() { 720 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 721 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&[ 722 0x5, 0xA, 0x16, 0x21, 0x2C, 0x33, 723 ]) 724 .unwrap(); 725 builder.add_set(&check); 726 let expected = [0x5, 0x14, 0x16, 0x21, 0x28, 0x33]; 727 assert_eq!(builder.intervals, expected); 728 } 729 #[test] test_add_char()730 fn test_add_char() { 731 let mut builder = CodePointInversionListBuilder::new(); 732 builder.add_char('a'); 733 let expected = [0x61, 0x62]; 734 assert_eq!(builder.intervals, expected); 735 } 736 737 #[test] test_add_range()738 fn test_add_range() { 739 let mut builder = CodePointInversionListBuilder::new(); 740 builder.add_range('A'..='Z'); 741 let expected = [0x41, 0x5B]; 742 assert_eq!(builder.intervals, expected); 743 } 744 745 #[test] test_add_range32()746 fn test_add_range32() { 747 let mut builder = CodePointInversionListBuilder::new(); 748 builder.add_range32(0xd800..=0xdfff); 749 let expected = [0xd800, 0xe000]; 750 assert_eq!(builder.intervals, expected); 751 } 752 753 #[test] test_add_invalid_range()754 fn test_add_invalid_range() { 755 let mut builder = CodePointInversionListBuilder::new(); 756 builder.add_range('Z'..='A'); 757 assert!(builder.intervals.is_empty()); 758 } 759 760 #[test] test_remove_empty()761 fn test_remove_empty() { 762 let mut builder = CodePointInversionListBuilder::new(); 763 builder.remove(0x0, 0xA); 764 assert!(builder.intervals.is_empty()); 765 } 766 767 #[test] test_remove_entire_builder()768 fn test_remove_entire_builder() { 769 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 770 builder.remove(0xA, 0x32); 771 assert!(builder.intervals.is_empty()); 772 } 773 774 #[test] test_remove_entire_range()775 fn test_remove_entire_range() { 776 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 777 builder.remove(0xA, 0x14); 778 let expected = [0x28, 0x32]; 779 assert_eq!(builder.intervals, expected); 780 } 781 782 #[test] test_remove_partial_range_left()783 fn test_remove_partial_range_left() { 784 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 785 builder.remove(0xA, 0x2B); 786 let expected = [0x2B, 0x32]; 787 assert_eq!(builder.intervals, expected); 788 } 789 790 #[test] test_remove_ne_range()791 fn test_remove_ne_range() { 792 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 793 builder.remove(0x14, 0x28); 794 let expected = [0xA, 0x14, 0x28, 0x32]; 795 assert_eq!(builder.intervals, expected); 796 } 797 798 #[test] test_remove_partial_range_right()799 fn test_remove_partial_range_right() { 800 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 801 builder.remove(0xF, 0x37); 802 let expected = [0xA, 0xF]; 803 assert_eq!(builder.intervals, expected); 804 } 805 806 #[test] test_remove_middle_range()807 fn test_remove_middle_range() { 808 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 809 builder.remove(0xC, 0x12); 810 let expected = [0xA, 0xC, 0x12, 0x14, 0x28, 0x32]; 811 assert_eq!(builder.intervals, expected); 812 } 813 814 #[test] test_remove_ne_middle_range()815 fn test_remove_ne_middle_range() { 816 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 817 builder.remove(0x19, 0x1B); 818 let expected = [0xA, 0x14, 0x28, 0x32]; 819 assert_eq!(builder.intervals, expected); 820 } 821 822 #[test] test_remove_encompassed_range()823 fn test_remove_encompassed_range() { 824 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); 825 builder.remove(0x19, 0x37); 826 let expected = [0xA, 0x14, 0x46, 0x50]; 827 assert_eq!(builder.intervals, expected); 828 } 829 #[test] test_remove_adjacent_ranges()830 fn test_remove_adjacent_ranges() { 831 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 832 builder.remove(0x27, 0x28); 833 builder.remove(0x28, 0x29); 834 builder.remove(0x29, 0x2A); 835 let expected = [0xA, 0x14, 0x2A, 0x32]; 836 assert_eq!(builder.intervals, expected); 837 } 838 839 #[test] test_remove_char()840 fn test_remove_char() { 841 let mut builder = generate_tester(&[0x41, 0x46]); 842 builder.remove_char('A'); // 65 843 let expected = [0x42, 0x46]; 844 assert_eq!(builder.intervals, expected); 845 } 846 847 #[test] test_remove_range()848 fn test_remove_range() { 849 let mut builder = generate_tester(&[0x41, 0x5A]); 850 builder.remove_range('A'..'L'); // 65 - 76 851 let expected = [0x4C, 0x5A]; 852 assert_eq!(builder.intervals, expected); 853 } 854 855 #[test] test_remove_set()856 fn test_remove_set() { 857 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); 858 let remove = 859 CodePointInversionList::try_from_u32_inversion_list_slice(&[0xA, 0x14, 0x2D, 0x4B]) 860 .unwrap(); 861 builder.remove_set(&remove); 862 let expected = [0x28, 0x2D, 0x4B, 0x50]; 863 assert_eq!(builder.intervals, expected); 864 } 865 866 #[test] test_retain_char()867 fn test_retain_char() { 868 let mut builder = generate_tester(&[0x41, 0x5A]); 869 builder.retain_char('A'); // 65 870 let expected = [0x41, 0x42]; 871 assert_eq!(builder.intervals, expected); 872 } 873 874 #[test] test_retain_range()875 fn test_retain_range() { 876 let mut builder = generate_tester(&[0x41, 0x5A]); 877 builder.retain_range('C'..'F'); // 67 - 70 878 let expected = [0x43, 0x46]; 879 assert_eq!(builder.intervals, expected); 880 } 881 882 #[test] test_retain_range_empty()883 fn test_retain_range_empty() { 884 let mut builder = generate_tester(&[0x41, 0x46]); 885 builder.retain_range('F'..'Z'); 886 assert!(builder.intervals.is_empty()); 887 } 888 889 #[test] test_retain_set()890 fn test_retain_set() { 891 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); 892 let retain = CodePointInversionList::try_from_u32_inversion_list_slice(&[ 893 0xE, 0x14, 0x19, 0x37, 0x4D, 0x51, 894 ]) 895 .unwrap(); 896 builder.retain_set(&retain); 897 let expected = [0xE, 0x14, 0x28, 0x32, 0x4D, 0x50]; 898 assert_eq!(builder.intervals, expected); 899 } 900 901 #[test] test_complement()902 fn test_complement() { 903 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 904 builder.complement(); 905 let expected = [0x0, 0xA, 0x14, 0x28, 0x32, (char::MAX as u32) + 1]; 906 assert_eq!(builder.intervals, expected); 907 } 908 909 #[test] test_complement_empty()910 fn test_complement_empty() { 911 let mut builder = generate_tester(&[]); 912 builder.complement(); 913 let expected = [0x0, (char::MAX as u32) + 1]; 914 assert_eq!(builder.intervals, expected); 915 916 builder.complement(); 917 let expected: [u32; 0] = []; 918 assert_eq!(builder.intervals, expected); 919 } 920 921 #[test] test_complement_zero_max()922 fn test_complement_zero_max() { 923 let mut builder = generate_tester(&[0x0, 0xA, 0x5A, (char::MAX as u32) + 1]); 924 builder.complement(); 925 let expected = [0xA, 0x5A]; 926 assert_eq!(builder.intervals, expected); 927 } 928 929 #[test] test_complement_interior()930 fn test_complement_interior() { 931 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 932 builder.complement_list([0xE, 0x14]); 933 let expected = [0xA, 0xE, 0x28, 0x32]; 934 assert_eq!(builder.intervals, expected); 935 } 936 937 #[test] test_complement_exterior()938 fn test_complement_exterior() { 939 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 940 builder.complement_list([0x19, 0x23]); 941 let expected = [0xA, 0x14, 0x19, 0x23, 0x28, 0x32]; 942 assert_eq!(builder.intervals, expected); 943 } 944 945 #[test] test_complement_larger_list()946 fn test_complement_larger_list() { 947 let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); 948 builder.complement_list([0x1E, 0x37, 0x3C, 0x46]); 949 let expected = [0xA, 0x14, 0x1E, 0x28, 0x32, 0x37, 0x3C, 0x46]; 950 assert_eq!(builder.intervals, expected); 951 } 952 953 #[test] test_complement_char()954 fn test_complement_char() { 955 let mut builder = generate_tester(&[0x41, 0x4C]); // A - K 956 builder.complement_char('A'); 957 builder.complement_char('L'); 958 let expected = [0x42, 0x4D]; 959 assert_eq!(builder.intervals, expected); 960 } 961 962 #[test] test_complement_range()963 fn test_complement_range() { 964 let mut builder = generate_tester(&[0x46, 0x4C]); // F - K 965 builder.complement_range('A'..='Z'); 966 let expected = [0x41, 0x46, 0x4C, 0x5B]; 967 assert_eq!(builder.intervals, expected); 968 } 969 970 #[test] test_complement_set()971 fn test_complement_set() { 972 let mut builder = generate_tester(&[0x43, 0x4E]); 973 let set = 974 CodePointInversionList::try_from_u32_inversion_list_slice(&[0x41, 0x46, 0x4B, 0x5A]) 975 .unwrap(); 976 builder.complement_set(&set); 977 let expected = [0x41, 0x43, 0x46, 0x4B, 0x4E, 0x5A]; 978 assert_eq!(builder.intervals, expected); 979 } 980 981 #[test] test_is_empty()982 fn test_is_empty() { 983 let builder = CodePointInversionListBuilder::new(); 984 assert!(builder.is_empty()); 985 } 986 } 987