• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 #[diplomat::bridge]
6 #[diplomat::abi_rename = "icu4x_{0}_mv1"]
7 #[diplomat::attr(auto, namespace = "icu4x")]
8 pub mod ffi {
9     use alloc::boxed::Box;
10 
11     #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))]
12     use crate::locale_core::ffi::Locale;
13     #[cfg(feature = "buffer_provider")]
14     use crate::{errors::ffi::DataError, provider::ffi::DataProvider};
15     use diplomat_runtime::DiplomatOption;
16     #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))]
17     use icu_segmenter::options::LineBreakOptions;
18 
19     #[diplomat::opaque]
20     /// An ICU4X line-break segmenter, capable of finding breakpoints in strings.
21     #[diplomat::rust_link(icu::segmenter::LineSegmenter, Struct)]
22     pub struct LineSegmenter(icu_segmenter::LineSegmenter);
23 
24     #[diplomat::rust_link(icu::segmenter::options::LineBreakStrictness, Enum)]
25     #[diplomat::enum_convert(icu_segmenter::options::LineBreakStrictness, needs_wildcard)]
26     pub enum LineBreakStrictness {
27         Loose,
28         Normal,
29         Strict,
30         Anywhere,
31     }
32 
33     #[diplomat::rust_link(icu::segmenter::options::LineBreakWordOption, Enum)]
34     #[diplomat::enum_convert(icu_segmenter::options::LineBreakWordOption, needs_wildcard)]
35     pub enum LineBreakWordOption {
36         Normal,
37         BreakAll,
38         KeepAll,
39     }
40 
41     #[diplomat::rust_link(icu::segmenter::options::LineBreakOptions, Struct)]
42     #[diplomat::attr(supports = non_exhaustive_structs, rename = "LineBreakOptions")]
43     pub struct LineBreakOptionsV2 {
44         pub strictness: DiplomatOption<LineBreakStrictness>,
45         pub word_option: DiplomatOption<LineBreakWordOption>,
46     }
47 
48     #[diplomat::opaque]
49     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
50     #[diplomat::rust_link(
51         icu::segmenter::LineBreakIteratorPotentiallyIllFormedUtf8,
52         Typedef,
53         compact
54     )]
55     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf8, Typedef, hidden)]
56     pub struct LineBreakIteratorUtf8<'a>(
57         icu_segmenter::LineBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>,
58     );
59 
60     #[diplomat::opaque]
61     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
62     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf16, Typedef, compact)]
63     pub struct LineBreakIteratorUtf16<'a>(icu_segmenter::LineBreakIteratorUtf16<'a, 'a>);
64 
65     #[diplomat::opaque]
66     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
67     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorLatin1, Typedef, compact)]
68     pub struct LineBreakIteratorLatin1<'a>(icu_segmenter::LineBreakIteratorLatin1<'a, 'a>);
69 
70     impl LineSegmenter {
71         /// Construct a [`LineSegmenter`] with default options (no locale-based tailoring) using compiled data. It automatically loads the best
72         /// available payload data for Burmese, Khmer, Lao, and Thai.
73         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto, FnInStruct)]
74         #[diplomat::attr(auto, named_constructor = "auto")]
75         #[cfg(feature = "compiled_data")]
create_auto() -> Box<LineSegmenter>76         pub fn create_auto() -> Box<LineSegmenter> {
77             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_auto(
78                 Default::default(),
79             )))
80         }
81 
82         /// Construct a [`LineSegmenter`] with default options (no locale-based tailoring) and LSTM payload data for
83         /// Burmese, Khmer, Lao, and Thai, using compiled data.
84         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm, FnInStruct)]
85         #[diplomat::attr(auto, named_constructor = "lstm")]
86         #[cfg(feature = "compiled_data")]
create_lstm() -> Box<LineSegmenter>87         pub fn create_lstm() -> Box<LineSegmenter> {
88             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_lstm(
89                 Default::default(),
90             )))
91         }
92 
93         /// Construct a [`LineSegmenter`] with default options (no locale-based tailoring) and dictionary payload data for
94         /// Burmese, Khmer, Lao, and Thai, using compiled data
95         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_dictionary, FnInStruct)]
96         #[diplomat::attr(auto, named_constructor = "dictionary")]
97         #[cfg(feature = "compiled_data")]
create_dictionary() -> Box<LineSegmenter>98         pub fn create_dictionary() -> Box<LineSegmenter> {
99             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_dictionary(
100                 Default::default(),
101             )))
102         }
103 
104         /// Construct a [`LineSegmenter`] with custom options using compiled data. It automatically loads the best
105         /// available payload data for Burmese, Khmer, Lao, and Thai.
106         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto, FnInStruct)]
107         #[diplomat::attr(supports = non_exhaustive_structs, rename = "auto_with_options")]
108         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = named_constructors), named_constructor = "auto_with_options")]
109         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = named_constructors), named_constructor = "auto_with_options_v2")]
110         #[cfg(feature = "compiled_data")]
create_auto_with_options_v2( content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Box<LineSegmenter>111         pub fn create_auto_with_options_v2(
112             content_locale: Option<&Locale>,
113             options: LineBreakOptionsV2,
114         ) -> Box<LineSegmenter> {
115             let mut options: LineBreakOptions = options.into();
116             options.content_locale = content_locale.map(|c| &c.0.id);
117             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_auto(
118                 options,
119             )))
120         }
121         /// Construct a [`LineSegmenter`] with custom options. It automatically loads the best
122         /// available payload data for Burmese, Khmer, Lao, and Thai, using a particular data source.
123         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto, FnInStruct)]
124         #[diplomat::attr(supports = non_exhaustive_structs, rename = "auto_with_options_and_provider")]
125         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto_with_options_and_provider")]
126         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = fallible_constructors, supports = named_constructors), named_constructor = "auto_with_options_v2_and_provider")]
127         #[cfg(feature = "buffer_provider")]
create_auto_with_options_v2_and_provider( provider: &DataProvider, content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Result<Box<LineSegmenter>, DataError>128         pub fn create_auto_with_options_v2_and_provider(
129             provider: &DataProvider,
130             content_locale: Option<&Locale>,
131             options: LineBreakOptionsV2,
132         ) -> Result<Box<LineSegmenter>, DataError> {
133             let mut options: LineBreakOptions = options.into();
134             options.content_locale = content_locale.map(|c| &c.0.id);
135 
136             Ok(Box::new(LineSegmenter(
137                 icu_segmenter::LineSegmenter::try_new_auto_with_buffer_provider(
138                     provider.get()?,
139                     options,
140                 )?,
141             )))
142         }
143         /// Construct a [`LineSegmenter`] with custom options and LSTM payload data for
144         /// Burmese, Khmer, Lao, and Thai, using compiled data.
145         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm, FnInStruct)]
146         #[diplomat::attr(supports = non_exhaustive_structs, rename = "lstm_with_options")]
147         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = named_constructors), named_constructor = "lstm_with_options")]
148         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = named_constructors), named_constructor = "lstm_with_options_v2")]
149         #[cfg(feature = "compiled_data")]
create_lstm_with_options_v2( content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Box<LineSegmenter>150         pub fn create_lstm_with_options_v2(
151             content_locale: Option<&Locale>,
152             options: LineBreakOptionsV2,
153         ) -> Box<LineSegmenter> {
154             let mut options: LineBreakOptions = options.into();
155             options.content_locale = content_locale.map(|c| &c.0.id);
156 
157             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_lstm(
158                 options,
159             )))
160         }
161         /// Construct a [`LineSegmenter`] with custom options and LSTM payload data for
162         /// Burmese, Khmer, Lao, and Thai, using a particular data source.
163         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm, FnInStruct)]
164         #[diplomat::attr(supports = non_exhaustive_structs, rename = "lstm_with_options_and_provider")]
165         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm_with_options_and_provider")]
166         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm_with_options_v2_and_provider")]
167         #[cfg(feature = "buffer_provider")]
create_lstm_with_options_v2_and_provider( provider: &DataProvider, content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Result<Box<LineSegmenter>, DataError>168         pub fn create_lstm_with_options_v2_and_provider(
169             provider: &DataProvider,
170             content_locale: Option<&Locale>,
171             options: LineBreakOptionsV2,
172         ) -> Result<Box<LineSegmenter>, DataError> {
173             let mut options: LineBreakOptions = options.into();
174             options.content_locale = content_locale.map(|c| &c.0.id);
175 
176             Ok(Box::new(LineSegmenter(
177                 icu_segmenter::LineSegmenter::try_new_lstm_with_buffer_provider(
178                     provider.get()?,
179                     options,
180                 )?,
181             )))
182         }
183         /// Construct a [`LineSegmenter`] with custom options and dictionary payload data for
184         /// Burmese, Khmer, Lao, and Thai, using compiled data.
185         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_dictionary, FnInStruct)]
186         #[diplomat::attr(supports = non_exhaustive_structs, rename = "dictionary_with_options")]
187         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = named_constructors), named_constructor = "dictionary_with_options")]
188         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = named_constructors), named_constructor = "dictionary_with_options_v2")]
189         #[cfg(feature = "compiled_data")]
create_dictionary_with_options_v2( content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Box<LineSegmenter>190         pub fn create_dictionary_with_options_v2(
191             content_locale: Option<&Locale>,
192             options: LineBreakOptionsV2,
193         ) -> Box<LineSegmenter> {
194             let mut options: LineBreakOptions = options.into();
195             options.content_locale = content_locale.map(|c| &c.0.id);
196 
197             Box::new(LineSegmenter(icu_segmenter::LineSegmenter::new_dictionary(
198                 options,
199             )))
200         }
201         /// Construct a [`LineSegmenter`] with custom options and dictionary payload data for
202         /// Burmese, Khmer, Lao, and Thai, using a particular data source.
203         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_dictionary, FnInStruct)]
204         #[diplomat::attr(supports = non_exhaustive_structs, rename = "dictionary_with_options_and_provider")]
205         #[diplomat::attr(all(supports = non_exhaustive_structs, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary_with_options_and_provider")]
206         #[diplomat::attr(all(not(supports = non_exhaustive_structs), supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary_with_options_v2_and_provider")]
207         #[cfg(feature = "buffer_provider")]
create_dictionary_with_options_v2_and_provider( provider: &DataProvider, content_locale: Option<&Locale>, options: LineBreakOptionsV2, ) -> Result<Box<LineSegmenter>, DataError>208         pub fn create_dictionary_with_options_v2_and_provider(
209             provider: &DataProvider,
210             content_locale: Option<&Locale>,
211             options: LineBreakOptionsV2,
212         ) -> Result<Box<LineSegmenter>, DataError> {
213             let mut options: LineBreakOptions = options.into();
214             options.content_locale = content_locale.map(|c| &c.0.id);
215 
216             Ok(Box::new(LineSegmenter(
217                 icu_segmenter::LineSegmenter::try_new_dictionary_with_buffer_provider(
218                     provider.get()?,
219                     options,
220                 )?,
221             )))
222         }
223         /// Segments a string.
224         ///
225         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
226         /// to the WHATWG Encoding Standard.
227         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf8, FnInStruct)]
228         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_str, FnInStruct, hidden)]
229         #[diplomat::attr(not(supports = utf8_strings), disable)]
230         #[diplomat::attr(*, rename = "segment")]
segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<LineBreakIteratorUtf8<'a>>231         pub fn segment_utf8<'a>(
232             &'a self,
233             input: &'a DiplomatStr,
234         ) -> Box<LineBreakIteratorUtf8<'a>> {
235             Box::new(LineBreakIteratorUtf8(self.0.segment_utf8(input)))
236         }
237 
238         /// Segments a string.
239         ///
240         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
241         /// to the WHATWG Encoding Standard.
242         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf16, FnInStruct)]
243         #[diplomat::attr(not(supports = utf8_strings), rename = "segment")]
244         #[diplomat::attr(supports = utf8_strings, rename = "segment16")]
segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<LineBreakIteratorUtf16<'a>>245         pub fn segment_utf16<'a>(
246             &'a self,
247             input: &'a DiplomatStr16,
248         ) -> Box<LineBreakIteratorUtf16<'a>> {
249             Box::new(LineBreakIteratorUtf16(self.0.segment_utf16(input)))
250         }
251 
252         /// Segments a Latin-1 string.
253         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_latin1, FnInStruct)]
254         #[diplomat::attr(not(supports = utf8_strings), disable)]
segment_latin1<'a>(&'a self, input: &'a [u8]) -> Box<LineBreakIteratorLatin1<'a>>255         pub fn segment_latin1<'a>(&'a self, input: &'a [u8]) -> Box<LineBreakIteratorLatin1<'a>> {
256             Box::new(LineBreakIteratorLatin1(self.0.segment_latin1(input)))
257         }
258     }
259 
260     impl<'a> LineBreakIteratorUtf8<'a> {
261         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
262         /// out of range of a 32-bit signed integer.
263         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
264         #[diplomat::rust_link(
265             icu::segmenter::LineBreakIterator::Item,
266             AssociatedTypeInStruct,
267             hidden
268         )]
next(&mut self) -> i32269         pub fn next(&mut self) -> i32 {
270             self.0
271                 .next()
272                 .and_then(|u| i32::try_from(u).ok())
273                 .unwrap_or(-1)
274         }
275     }
276 
277     impl<'a> LineBreakIteratorUtf16<'a> {
278         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
279         /// out of range of a 32-bit signed integer.
280         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
281         #[diplomat::rust_link(
282             icu::segmenter::LineBreakIterator::Item,
283             AssociatedTypeInStruct,
284             hidden
285         )]
next(&mut self) -> i32286         pub fn next(&mut self) -> i32 {
287             self.0
288                 .next()
289                 .and_then(|u| i32::try_from(u).ok())
290                 .unwrap_or(-1)
291         }
292     }
293 
294     impl<'a> LineBreakIteratorLatin1<'a> {
295         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
296         /// out of range of a 32-bit signed integer.
297         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
298         #[diplomat::rust_link(
299             icu::segmenter::LineBreakIterator::Item,
300             AssociatedTypeInStruct,
301             hidden
302         )]
next(&mut self) -> i32303         pub fn next(&mut self) -> i32 {
304             self.0
305                 .next()
306                 .and_then(|u| i32::try_from(u).ok())
307                 .unwrap_or(-1)
308         }
309     }
310 }
311 
312 impl From<ffi::LineBreakOptionsV2> for icu_segmenter::options::LineBreakOptions<'_> {
from(other: ffi::LineBreakOptionsV2) -> Self313     fn from(other: ffi::LineBreakOptionsV2) -> Self {
314         let mut options = icu_segmenter::options::LineBreakOptions::default();
315         options.strictness = other.strictness.into_converted_option();
316         options.word_option = other.word_option.into_converted_option();
317         options
318     }
319 }
320