• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 #[diplomat::bridge]
6 #[diplomat::abi_rename = "icu4x_{0}_mv1"]
7 #[diplomat::attr(auto, namespace = "icu4x")]
8 pub mod ffi {
9     use alloc::boxed::Box;
10 
11     #[cfg(feature = "buffer_provider")]
12     use crate::provider::ffi::DataProvider;
13     #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))]
14     use crate::{errors::ffi::DataError, locale_core::ffi::Locale};
15 
16     #[diplomat::opaque]
17     /// An ICU4X sentence-break segmenter, capable of finding sentence breakpoints in strings.
18     #[diplomat::rust_link(icu::segmenter::SentenceSegmenter, Struct)]
19     pub struct SentenceSegmenter(icu_segmenter::SentenceSegmenter);
20 
21     #[diplomat::opaque]
22     #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
23     #[diplomat::rust_link(
24         icu::segmenter::SentenceBreakIteratorPotentiallyIllFormedUtf8,
25         Typedef,
26         hidden
27     )]
28     #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorUtf8, Typedef, hidden)]
29     pub struct SentenceBreakIteratorUtf8<'a>(
30         icu_segmenter::SentenceBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>,
31     );
32 
33     #[diplomat::opaque]
34     #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
35     #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorUtf16, Typedef, hidden)]
36     pub struct SentenceBreakIteratorUtf16<'a>(icu_segmenter::SentenceBreakIteratorUtf16<'a, 'a>);
37 
38     #[diplomat::opaque]
39     #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
40     #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorLatin1, Typedef, hidden)]
41     pub struct SentenceBreakIteratorLatin1<'a>(icu_segmenter::SentenceBreakIteratorLatin1<'a, 'a>);
42 
43     impl SentenceSegmenter {
44         /// Construct a [`SentenceSegmenter`] using compiled data. This does not assume any content locale.
45         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::new, FnInStruct)]
46         #[diplomat::rust_link(
47             icu::segmenter::options::SentenceBreakInvariantOptions,
48             Struct,
49             hidden
50         )]
51         #[diplomat::attr(auto, constructor)]
52         #[cfg(feature = "compiled_data")]
create() -> Box<SentenceSegmenter>53         pub fn create() -> Box<SentenceSegmenter> {
54             Box::new(SentenceSegmenter(icu_segmenter::SentenceSegmenter::new(
55                 Default::default(),
56             )))
57         }
58         /// Construct a [`SentenceSegmenter`] for content known to be of a given locale, using compiled data.
59         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::try_new, FnInStruct, hidden)]
60         #[diplomat::rust_link(icu::segmenter::options::SentenceBreakOptions, Struct, hidden)]
61         #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "with_content_locale")]
62         #[cfg(feature = "compiled_data")]
create_with_content_locale( locale: &Locale, ) -> Result<Box<SentenceSegmenter>, DataError>63         pub fn create_with_content_locale(
64             locale: &Locale,
65         ) -> Result<Box<SentenceSegmenter>, DataError> {
66             Ok(Box::new(SentenceSegmenter(
67                 icu_segmenter::SentenceSegmenter::try_new(locale.into())?,
68             )))
69         }
70 
71         /// Construct a [`SentenceSegmenter`]  for content known to be of a given locale, using a particular data source.
72         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::try_new, FnInStruct, hidden)]
73         #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "with_content_locale_and_provider")]
74         #[cfg(feature = "buffer_provider")]
create_with_content_locale_and_provider( provider: &DataProvider, locale: &Locale, ) -> Result<Box<SentenceSegmenter>, DataError>75         pub fn create_with_content_locale_and_provider(
76             provider: &DataProvider,
77             locale: &Locale,
78         ) -> Result<Box<SentenceSegmenter>, DataError> {
79             Ok(Box::new(SentenceSegmenter(
80                 icu_segmenter::SentenceSegmenter::try_new_with_buffer_provider(
81                     provider.get()?,
82                     locale.into(),
83                 )?,
84             )))
85         }
86 
87         /// Segments a string.
88         ///
89         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
90         /// to the WHATWG Encoding Standard.
91         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_utf8, FnInStruct)]
92         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_str, FnInStruct, hidden)]
93         #[diplomat::attr(not(supports = utf8_strings), disable)]
94         #[diplomat::attr(*, rename = "segment")]
segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<SentenceBreakIteratorUtf8<'a>>95         pub fn segment_utf8<'a>(
96             &'a self,
97             input: &'a DiplomatStr,
98         ) -> Box<SentenceBreakIteratorUtf8<'a>> {
99             Box::new(SentenceBreakIteratorUtf8(self.0.segment_utf8(input)))
100         }
101 
102         /// Segments a string.
103         ///
104         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
105         /// to the WHATWG Encoding Standard.
106         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_utf16, FnInStruct)]
107         #[diplomat::attr(not(supports = utf8_strings), rename = "segment")]
108         #[diplomat::attr(supports = utf8_strings, rename = "segment16")]
segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<SentenceBreakIteratorUtf16<'a>>109         pub fn segment_utf16<'a>(
110             &'a self,
111             input: &'a DiplomatStr16,
112         ) -> Box<SentenceBreakIteratorUtf16<'a>> {
113             Box::new(SentenceBreakIteratorUtf16(self.0.segment_utf16(input)))
114         }
115 
116         /// Segments a Latin-1 string.
117         #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_latin1, FnInStruct)]
118         #[diplomat::attr(not(supports = utf8_strings), disable)]
segment_latin1<'a>( &'a self, input: &'a [u8], ) -> Box<SentenceBreakIteratorLatin1<'a>>119         pub fn segment_latin1<'a>(
120             &'a self,
121             input: &'a [u8],
122         ) -> Box<SentenceBreakIteratorLatin1<'a>> {
123             Box::new(SentenceBreakIteratorLatin1(self.0.segment_latin1(input)))
124         }
125     }
126 
127     impl<'a> SentenceBreakIteratorUtf8<'a> {
128         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
129         /// out of range of a 32-bit signed integer.
130         #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
131         #[diplomat::rust_link(
132             icu::segmenter::SentenceBreakIterator::Item,
133             AssociatedTypeInStruct,
134             hidden
135         )]
next(&mut self) -> i32136         pub fn next(&mut self) -> i32 {
137             self.0
138                 .next()
139                 .and_then(|u| i32::try_from(u).ok())
140                 .unwrap_or(-1)
141         }
142     }
143 
144     impl<'a> SentenceBreakIteratorUtf16<'a> {
145         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
146         /// out of range of a 32-bit signed integer.
147         #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
148         #[diplomat::rust_link(
149             icu::segmenter::SentenceBreakIterator::Item,
150             AssociatedTypeInStruct,
151             hidden
152         )]
next(&mut self) -> i32153         pub fn next(&mut self) -> i32 {
154             self.0
155                 .next()
156                 .and_then(|u| i32::try_from(u).ok())
157                 .unwrap_or(-1)
158         }
159     }
160 
161     impl<'a> SentenceBreakIteratorLatin1<'a> {
162         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
163         /// out of range of a 32-bit signed integer.
164         #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
165         #[diplomat::rust_link(
166             icu::segmenter::SentenceBreakIterator::Item,
167             AssociatedTypeInStruct,
168             hidden
169         )]
next(&mut self) -> i32170         pub fn next(&mut self) -> i32 {
171             self.0
172                 .next()
173                 .and_then(|u| i32::try_from(u).ok())
174                 .unwrap_or(-1)
175         }
176     }
177 }
178 
179 impl<'a> From<&'a crate::locale_core::ffi::Locale>
180     for icu_segmenter::options::SentenceBreakOptions<'a>
181 {
from(other: &'a crate::locale_core::ffi::Locale) -> Self182     fn from(other: &'a crate::locale_core::ffi::Locale) -> Self {
183         let mut options = icu_segmenter::options::SentenceBreakOptions::default();
184         options.content_locale = Some(&other.0.id);
185         options
186     }
187 }
188