1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 use crate::provider::{CaseMapUnfold, CaseMapUnfoldV1, CaseMapV1}; 6 use crate::set::ClosureSink; 7 use crate::{CaseMapper, CaseMapperBorrowed}; 8 9 use icu_provider::prelude::*; 10 11 /// A wrapper around [`CaseMapper`] that can produce case mapping closures 12 /// over a character or string. This wrapper can be constructed directly, or 13 /// by wrapping a reference to an existing [`CaseMapper`]. 14 /// 15 /// Most methods for this type live on [`CaseMapCloserBorrowed`], which you can obtain via 16 /// [`CaseMapCloser::new()`] or [`CaseMapCloser::as_borrowed()`]. 17 /// 18 /// # Examples 19 /// 20 /// ```rust 21 /// use icu::casemap::CaseMapCloser; 22 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 23 /// 24 /// let cm = CaseMapCloser::new(); 25 /// let mut builder = CodePointInversionListBuilder::new(); 26 /// let found = cm.add_string_case_closure_to("ffi", &mut builder); 27 /// assert!(found); 28 /// let set = builder.build(); 29 /// 30 /// assert!(set.contains('ffi')); 31 /// 32 /// let mut builder = CodePointInversionListBuilder::new(); 33 /// let found = cm.add_string_case_closure_to("ss", &mut builder); 34 /// assert!(found); 35 /// let set = builder.build(); 36 /// 37 /// assert!(set.contains('ß')); 38 /// assert!(set.contains('ẞ')); 39 /// ``` 40 #[derive(Clone, Debug)] 41 pub struct CaseMapCloser<CM> { 42 cm: CM, 43 unfold: DataPayload<CaseMapUnfoldV1>, 44 } 45 46 impl CaseMapCloser<CaseMapper> { 47 icu_provider::gen_buffer_data_constructors!(() -> error: DataError, 48 functions: [ 49 new: skip, 50 try_new_with_buffer_provider, 51 try_new_unstable, 52 Self, 53 ]); 54 55 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] try_new_unstable<P>(provider: &P) -> Result<Self, DataError> where P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,56 pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError> 57 where 58 P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized, 59 { 60 let cm = CaseMapper::try_new_unstable(provider)?; 61 let unfold = provider.load(Default::default())?.payload; 62 Ok(Self { cm, unfold }) 63 } 64 } 65 66 impl CaseMapCloser<CaseMapper> { 67 /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data. 68 /// 69 /// # Examples 70 /// 71 /// ```rust 72 /// use icu::casemap::CaseMapCloser; 73 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 74 /// 75 /// let cm = CaseMapCloser::new(); 76 /// let mut builder = CodePointInversionListBuilder::new(); 77 /// let found = cm.add_string_case_closure_to("ffi", &mut builder); 78 /// assert!(found); 79 /// let set = builder.build(); 80 /// 81 /// assert!(set.contains('ffi')); 82 /// 83 /// let mut builder = CodePointInversionListBuilder::new(); 84 /// let found = cm.add_string_case_closure_to("ss", &mut builder); 85 /// assert!(found); 86 /// let set = builder.build(); 87 /// 88 /// assert!(set.contains('ß')); 89 /// assert!(set.contains('ẞ')); 90 /// ``` 91 /// 92 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 93 /// 94 /// [ Help choosing a constructor](icu_provider::constructors) 95 #[cfg(feature = "compiled_data")] 96 #[allow(clippy::new_ret_no_self)] // Intentional new() -> CaseMapCloserBorrowed<'static>97 pub const fn new() -> CaseMapCloserBorrowed<'static> { 98 CaseMapCloserBorrowed::new() 99 } 100 } 101 102 // We use Borrow, not AsRef, since we want the blanket impl on T 103 impl<CM: AsRef<CaseMapper>> CaseMapCloser<CM> { 104 icu_provider::gen_buffer_data_constructors!((casemapper: CM) -> error: DataError, 105 functions: [ 106 new_with_mapper: skip, 107 try_new_with_mapper_with_buffer_provider, 108 try_new_with_mapper_unstable, 109 Self, 110 ]); 111 112 /// A constructor which creates a [`CaseMapCloser`] from an existing [`CaseMapper`] 113 /// (either owned or as a reference) 114 /// 115 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 116 /// 117 /// [ Help choosing a constructor](icu_provider::constructors) 118 #[cfg(feature = "compiled_data")] new_with_mapper(casemapper: CM) -> Self119 pub const fn new_with_mapper(casemapper: CM) -> Self { 120 Self { 121 cm: casemapper, 122 unfold: DataPayload::from_static_ref( 123 crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1, 124 ), 125 } 126 } 127 128 /// Construct this object to wrap an existing CaseMapper (or a reference to one), loading additional data as needed. 129 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_with_mapper)] try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError> where P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,130 pub fn try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError> 131 where 132 P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized, 133 { 134 let unfold = provider.load(Default::default())?.payload; 135 Ok(Self { 136 cm: casemapper, 137 unfold, 138 }) 139 } 140 141 /// Constructs a borrowed version of this type for more efficient querying. as_borrowed(&self) -> CaseMapCloserBorrowed<'_>142 pub fn as_borrowed(&self) -> CaseMapCloserBorrowed<'_> { 143 CaseMapCloserBorrowed { 144 cm: self.cm.as_ref().as_borrowed(), 145 unfold: self.unfold.get(), 146 } 147 } 148 } 149 150 /// A borrowed [`CaseMapCloser`]. 151 /// 152 /// See methods or [`CaseMapCloser`] for examples. 153 #[derive(Clone, Debug, Copy)] 154 pub struct CaseMapCloserBorrowed<'a> { 155 cm: CaseMapperBorrowed<'a>, 156 unfold: &'a CaseMapUnfold<'a>, 157 } 158 159 impl CaseMapCloserBorrowed<'static> { 160 /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data. 161 /// 162 /// # Examples 163 /// 164 /// ```rust 165 /// use icu::casemap::CaseMapCloser; 166 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 167 /// 168 /// let cm = CaseMapCloser::new(); 169 /// let mut builder = CodePointInversionListBuilder::new(); 170 /// let found = cm.add_string_case_closure_to("ffi", &mut builder); 171 /// assert!(found); 172 /// let set = builder.build(); 173 /// 174 /// assert!(set.contains('ffi')); 175 /// 176 /// let mut builder = CodePointInversionListBuilder::new(); 177 /// let found = cm.add_string_case_closure_to("ss", &mut builder); 178 /// assert!(found); 179 /// let set = builder.build(); 180 /// 181 /// assert!(set.contains('ß')); 182 /// assert!(set.contains('ẞ')); 183 /// ``` 184 /// 185 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 186 /// 187 /// [ Help choosing a constructor](icu_provider::constructors) 188 #[cfg(feature = "compiled_data")] new() -> CaseMapCloserBorrowed<'static>189 pub const fn new() -> CaseMapCloserBorrowed<'static> { 190 CaseMapCloserBorrowed { 191 cm: CaseMapper::new(), 192 unfold: crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1, 193 } 194 } 195 /// Cheaply converts a [`CaseMapCloserBorrowed<'static>`] into a [`CaseMapCloser`]. 196 /// 197 /// Note: Due to branching and indirection, using [`CaseMapCloser`] might inhibit some 198 /// compile-time optimizations that are possible with [`CaseMapCloserBorrowed`]. static_to_owned(self) -> CaseMapCloser<CaseMapper>199 pub const fn static_to_owned(self) -> CaseMapCloser<CaseMapper> { 200 CaseMapCloser { 201 cm: self.cm.static_to_owned(), 202 unfold: DataPayload::from_static_ref(self.unfold), 203 } 204 } 205 } 206 207 #[cfg(feature = "compiled_data")] 208 impl Default for CaseMapCloserBorrowed<'static> { default() -> Self209 fn default() -> Self { 210 Self::new() 211 } 212 } 213 214 impl CaseMapCloserBorrowed<'_> { 215 /// Adds all simple case mappings and the full case folding for `c` to `set`. 216 /// Also adds special case closure mappings. 217 /// 218 /// In other words, this adds all strings/characters that this casemaps to, as 219 /// well as all characters that may casemap to this one. 220 /// 221 /// The character itself is not added. 222 /// 223 /// For example, the mappings 224 /// - for s include long s 225 /// - for sharp s include ss 226 /// - for k include the Kelvin sign 227 /// 228 /// This function is identical to [`CaseMapperBorrowed::add_case_closure_to()`]; if you don't 229 /// need [`Self::add_string_case_closure_to()`] consider using a [`CaseMapper`] to avoid 230 /// loading additional data. 231 /// 232 /// # Examples 233 /// 234 /// ```rust 235 /// use icu::casemap::CaseMapCloser; 236 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 237 /// 238 /// let cm = CaseMapCloser::new(); 239 /// let mut builder = CodePointInversionListBuilder::new(); 240 /// cm.add_case_closure_to('s', &mut builder); 241 /// 242 /// let set = builder.build(); 243 /// 244 /// assert!(set.contains('S')); 245 /// assert!(set.contains('ſ')); 246 /// assert!(!set.contains('s')); // does not contain itself 247 /// ``` add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S)248 pub fn add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S) { 249 self.cm.add_case_closure_to(c, set); 250 } 251 252 /// Finds all characters and strings which may casemap to `s` as their full case folding string 253 /// and adds them to the set. Includes the full case closure of each character mapping. 254 /// 255 /// In other words, this performs a reverse full case folding and then 256 /// adds the case closure items of the resulting code points. 257 /// 258 /// The string itself is not added to the set. 259 /// 260 /// Returns true if the string was found 261 /// 262 /// # Examples 263 /// 264 /// ```rust 265 /// use icu::casemap::CaseMapCloser; 266 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; 267 /// 268 /// let cm = CaseMapCloser::new(); 269 /// let mut builder = CodePointInversionListBuilder::new(); 270 /// let found = cm.add_string_case_closure_to("ffi", &mut builder); 271 /// assert!(found); 272 /// let set = builder.build(); 273 /// 274 /// assert!(set.contains('ffi')); 275 /// 276 /// let mut builder = CodePointInversionListBuilder::new(); 277 /// let found = cm.add_string_case_closure_to("ss", &mut builder); 278 /// assert!(found); 279 /// let set = builder.build(); 280 /// 281 /// assert!(set.contains('ß')); 282 /// assert!(set.contains('ẞ')); 283 /// ``` add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool284 pub fn add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool { 285 self.cm.data.add_string_case_closure_to(s, set, self.unfold) 286 } 287 } 288