• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::provider::{CaseMapUnfold, CaseMapUnfoldV1, CaseMapV1};
6 use crate::set::ClosureSink;
7 use crate::{CaseMapper, CaseMapperBorrowed};
8 
9 use icu_provider::prelude::*;
10 
11 /// A wrapper around [`CaseMapper`] that can produce case mapping closures
12 /// over a character or string. This wrapper can be constructed directly, or
13 /// by wrapping a reference to an existing [`CaseMapper`].
14 ///
15 /// Most methods for this type live on [`CaseMapCloserBorrowed`], which you can obtain via
16 /// [`CaseMapCloser::new()`] or [`CaseMapCloser::as_borrowed()`].
17 ///
18 /// # Examples
19 ///
20 /// ```rust
21 /// use icu::casemap::CaseMapCloser;
22 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
23 ///
24 /// let cm = CaseMapCloser::new();
25 /// let mut builder = CodePointInversionListBuilder::new();
26 /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
27 /// assert!(found);
28 /// let set = builder.build();
29 ///
30 /// assert!(set.contains('ffi'));
31 ///
32 /// let mut builder = CodePointInversionListBuilder::new();
33 /// let found = cm.add_string_case_closure_to("ss", &mut builder);
34 /// assert!(found);
35 /// let set = builder.build();
36 ///
37 /// assert!(set.contains('ß'));
38 /// assert!(set.contains('ẞ'));
39 /// ```
40 #[derive(Clone, Debug)]
41 pub struct CaseMapCloser<CM> {
42     cm: CM,
43     unfold: DataPayload<CaseMapUnfoldV1>,
44 }
45 
46 impl CaseMapCloser<CaseMapper> {
47     icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
48     functions: [
49         new: skip,
50         try_new_with_buffer_provider,
51         try_new_unstable,
52         Self,
53     ]);
54 
55     #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
try_new_unstable<P>(provider: &P) -> Result<Self, DataError> where P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,56     pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
57     where
58         P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
59     {
60         let cm = CaseMapper::try_new_unstable(provider)?;
61         let unfold = provider.load(Default::default())?.payload;
62         Ok(Self { cm, unfold })
63     }
64 }
65 
66 impl CaseMapCloser<CaseMapper> {
67     /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
68     ///
69     /// # Examples
70     ///
71     /// ```rust
72     /// use icu::casemap::CaseMapCloser;
73     /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
74     ///
75     /// let cm = CaseMapCloser::new();
76     /// let mut builder = CodePointInversionListBuilder::new();
77     /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
78     /// assert!(found);
79     /// let set = builder.build();
80     ///
81     /// assert!(set.contains('ffi'));
82     ///
83     /// let mut builder = CodePointInversionListBuilder::new();
84     /// let found = cm.add_string_case_closure_to("ss", &mut builder);
85     /// assert!(found);
86     /// let set = builder.build();
87     ///
88     /// assert!(set.contains('ß'));
89     /// assert!(set.contains('ẞ'));
90     /// ```
91     ///
92     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
93     ///
94     /// [�� Help choosing a constructor](icu_provider::constructors)
95     #[cfg(feature = "compiled_data")]
96     #[allow(clippy::new_ret_no_self)] // Intentional
new() -> CaseMapCloserBorrowed<'static>97     pub const fn new() -> CaseMapCloserBorrowed<'static> {
98         CaseMapCloserBorrowed::new()
99     }
100 }
101 
102 // We use Borrow, not AsRef, since we want the blanket impl on T
103 impl<CM: AsRef<CaseMapper>> CaseMapCloser<CM> {
104     icu_provider::gen_buffer_data_constructors!((casemapper: CM) -> error: DataError,
105     functions: [
106         new_with_mapper: skip,
107         try_new_with_mapper_with_buffer_provider,
108         try_new_with_mapper_unstable,
109         Self,
110     ]);
111 
112     /// A constructor which creates a [`CaseMapCloser`] from an existing [`CaseMapper`]
113     /// (either owned or as a reference)
114     ///
115     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
116     ///
117     /// [�� Help choosing a constructor](icu_provider::constructors)
118     #[cfg(feature = "compiled_data")]
new_with_mapper(casemapper: CM) -> Self119     pub const fn new_with_mapper(casemapper: CM) -> Self {
120         Self {
121             cm: casemapper,
122             unfold: DataPayload::from_static_ref(
123                 crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
124             ),
125         }
126     }
127 
128     /// Construct this object to wrap an existing CaseMapper (or a reference to one), loading additional data as needed.
129     #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_with_mapper)]
try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError> where P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,130     pub fn try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError>
131     where
132         P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
133     {
134         let unfold = provider.load(Default::default())?.payload;
135         Ok(Self {
136             cm: casemapper,
137             unfold,
138         })
139     }
140 
141     /// Constructs a borrowed version of this type for more efficient querying.
as_borrowed(&self) -> CaseMapCloserBorrowed<'_>142     pub fn as_borrowed(&self) -> CaseMapCloserBorrowed<'_> {
143         CaseMapCloserBorrowed {
144             cm: self.cm.as_ref().as_borrowed(),
145             unfold: self.unfold.get(),
146         }
147     }
148 }
149 
150 /// A borrowed [`CaseMapCloser`].
151 ///
152 /// See methods or [`CaseMapCloser`] for examples.
153 #[derive(Clone, Debug, Copy)]
154 pub struct CaseMapCloserBorrowed<'a> {
155     cm: CaseMapperBorrowed<'a>,
156     unfold: &'a CaseMapUnfold<'a>,
157 }
158 
159 impl CaseMapCloserBorrowed<'static> {
160     /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
161     ///
162     /// # Examples
163     ///
164     /// ```rust
165     /// use icu::casemap::CaseMapCloser;
166     /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
167     ///
168     /// let cm = CaseMapCloser::new();
169     /// let mut builder = CodePointInversionListBuilder::new();
170     /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
171     /// assert!(found);
172     /// let set = builder.build();
173     ///
174     /// assert!(set.contains('ffi'));
175     ///
176     /// let mut builder = CodePointInversionListBuilder::new();
177     /// let found = cm.add_string_case_closure_to("ss", &mut builder);
178     /// assert!(found);
179     /// let set = builder.build();
180     ///
181     /// assert!(set.contains('ß'));
182     /// assert!(set.contains('ẞ'));
183     /// ```
184     ///
185     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
186     ///
187     /// [�� Help choosing a constructor](icu_provider::constructors)
188     #[cfg(feature = "compiled_data")]
new() -> CaseMapCloserBorrowed<'static>189     pub const fn new() -> CaseMapCloserBorrowed<'static> {
190         CaseMapCloserBorrowed {
191             cm: CaseMapper::new(),
192             unfold: crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
193         }
194     }
195     /// Cheaply converts a [`CaseMapCloserBorrowed<'static>`] into a [`CaseMapCloser`].
196     ///
197     /// Note: Due to branching and indirection, using [`CaseMapCloser`] might inhibit some
198     /// compile-time optimizations that are possible with [`CaseMapCloserBorrowed`].
static_to_owned(self) -> CaseMapCloser<CaseMapper>199     pub const fn static_to_owned(self) -> CaseMapCloser<CaseMapper> {
200         CaseMapCloser {
201             cm: self.cm.static_to_owned(),
202             unfold: DataPayload::from_static_ref(self.unfold),
203         }
204     }
205 }
206 
207 #[cfg(feature = "compiled_data")]
208 impl Default for CaseMapCloserBorrowed<'static> {
default() -> Self209     fn default() -> Self {
210         Self::new()
211     }
212 }
213 
214 impl CaseMapCloserBorrowed<'_> {
215     /// Adds all simple case mappings and the full case folding for `c` to `set`.
216     /// Also adds special case closure mappings.
217     ///
218     /// In other words, this adds all strings/characters that this casemaps to, as
219     /// well as all characters that may casemap to this one.
220     ///
221     /// The character itself is not added.
222     ///
223     /// For example, the mappings
224     /// - for s include long s
225     /// - for sharp s include ss
226     /// - for k include the Kelvin sign
227     ///
228     /// This function is identical to [`CaseMapperBorrowed::add_case_closure_to()`]; if you don't
229     /// need [`Self::add_string_case_closure_to()`] consider using a [`CaseMapper`] to avoid
230     /// loading additional data.
231     ///
232     /// # Examples
233     ///
234     /// ```rust
235     /// use icu::casemap::CaseMapCloser;
236     /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
237     ///
238     /// let cm = CaseMapCloser::new();
239     /// let mut builder = CodePointInversionListBuilder::new();
240     /// cm.add_case_closure_to('s', &mut builder);
241     ///
242     /// let set = builder.build();
243     ///
244     /// assert!(set.contains('S'));
245     /// assert!(set.contains('ſ'));
246     /// assert!(!set.contains('s')); // does not contain itself
247     /// ```
add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S)248     pub fn add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S) {
249         self.cm.add_case_closure_to(c, set);
250     }
251 
252     /// Finds all characters and strings which may casemap to `s` as their full case folding string
253     /// and adds them to the set. Includes the full case closure of each character mapping.
254     ///
255     /// In other words, this performs a reverse full case folding and then
256     /// adds the case closure items of the resulting code points.
257     ///
258     /// The string itself is not added to the set.
259     ///
260     /// Returns true if the string was found
261     ///
262     /// # Examples
263     ///
264     /// ```rust
265     /// use icu::casemap::CaseMapCloser;
266     /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
267     ///
268     /// let cm = CaseMapCloser::new();
269     /// let mut builder = CodePointInversionListBuilder::new();
270     /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
271     /// assert!(found);
272     /// let set = builder.build();
273     ///
274     /// assert!(set.contains('ffi'));
275     ///
276     /// let mut builder = CodePointInversionListBuilder::new();
277     /// let found = cm.add_string_case_closure_to("ss", &mut builder);
278     /// assert!(found);
279     /// let set = builder.build();
280     ///
281     /// assert!(set.contains('ß'));
282     /// assert!(set.contains('ẞ'));
283     /// ```
add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool284     pub fn add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool {
285         self.cm.data.add_string_case_closure_to(s, set, self.unfold)
286     }
287 }
288