1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 #[diplomat::bridge] 6 #[diplomat::abi_rename = "icu4x_{0}_mv1"] 7 #[diplomat::attr(auto, namespace = "icu4x")] 8 pub mod ffi { 9 use alloc::boxed::Box; 10 11 #[cfg(feature = "buffer_provider")] 12 use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; 13 14 #[diplomat::opaque] 15 /// An ICU4X grapheme-cluster-break segmenter, capable of finding grapheme cluster breakpoints 16 /// in strings. 17 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter, Struct)] 18 pub struct GraphemeClusterSegmenter(icu_segmenter::GraphemeClusterSegmenter); 19 20 #[diplomat::opaque] 21 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)] 22 #[diplomat::rust_link( 23 icu::segmenter::GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8, 24 Typedef, 25 hidden 26 )] 27 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorUtf8, Typedef, hidden)] 28 pub struct GraphemeClusterBreakIteratorUtf8<'a>( 29 icu_segmenter::GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>, 30 ); 31 32 #[diplomat::opaque] 33 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)] 34 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorUtf16, Typedef, hidden)] 35 pub struct GraphemeClusterBreakIteratorUtf16<'a>( 36 icu_segmenter::GraphemeClusterBreakIteratorUtf16<'a, 'a>, 37 ); 38 39 #[diplomat::opaque] 40 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)] 41 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorLatin1, Typedef, hidden)] 42 pub struct GraphemeClusterBreakIteratorLatin1<'a>( 43 icu_segmenter::GraphemeClusterBreakIteratorLatin1<'a, 'a>, 44 ); 45 46 impl GraphemeClusterSegmenter { 47 /// Construct an [`GraphemeClusterSegmenter`] using compiled data. 48 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)] 49 #[diplomat::attr(auto, constructor)] 50 #[cfg(feature = "compiled_data")] create() -> Box<GraphemeClusterSegmenter>51 pub fn create() -> Box<GraphemeClusterSegmenter> { 52 Box::new(GraphemeClusterSegmenter( 53 icu_segmenter::GraphemeClusterSegmenter::new(), 54 )) 55 } 56 /// Construct an [`GraphemeClusterSegmenter`]. 57 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)] 58 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "with_provider")] 59 #[cfg(feature = "buffer_provider")] create_with_provider( provider: &DataProvider, ) -> Result<Box<GraphemeClusterSegmenter>, DataError>60 pub fn create_with_provider( 61 provider: &DataProvider, 62 ) -> Result<Box<GraphemeClusterSegmenter>, DataError> { 63 Ok(Box::new(GraphemeClusterSegmenter( 64 icu_segmenter::GraphemeClusterSegmenter::try_new_with_buffer_provider( 65 provider.get()?, 66 )?, 67 ))) 68 } 69 /// Segments a string. 70 /// 71 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 72 /// to the WHATWG Encoding Standard. 73 #[diplomat::rust_link( 74 icu::segmenter::GraphemeClusterSegmenter::segment_str, 75 FnInStruct, 76 hidden 77 )] 78 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf8, FnInStruct)] 79 #[diplomat::attr(not(supports = utf8_strings), disable)] 80 #[diplomat::attr(*, rename = "segment")] segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<GraphemeClusterBreakIteratorUtf8<'a>>81 pub fn segment_utf8<'a>( 82 &'a self, 83 input: &'a DiplomatStr, 84 ) -> Box<GraphemeClusterBreakIteratorUtf8<'a>> { 85 Box::new(GraphemeClusterBreakIteratorUtf8(self.0.segment_utf8(input))) 86 } 87 88 /// Segments a string. 89 /// 90 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 91 /// to the WHATWG Encoding Standard. 92 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf16, FnInStruct)] 93 #[diplomat::attr(not(supports = utf8_strings), rename = "segment")] 94 #[diplomat::attr(supports = utf8_strings, rename = "segment16")] segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<GraphemeClusterBreakIteratorUtf16<'a>>95 pub fn segment_utf16<'a>( 96 &'a self, 97 input: &'a DiplomatStr16, 98 ) -> Box<GraphemeClusterBreakIteratorUtf16<'a>> { 99 Box::new(GraphemeClusterBreakIteratorUtf16( 100 self.0.segment_utf16(input), 101 )) 102 } 103 104 /// Segments a Latin-1 string. 105 #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_latin1, FnInStruct)] 106 #[diplomat::attr(not(supports = utf8_strings), disable)] segment_latin1<'a>( &'a self, input: &'a [u8], ) -> Box<GraphemeClusterBreakIteratorLatin1<'a>>107 pub fn segment_latin1<'a>( 108 &'a self, 109 input: &'a [u8], 110 ) -> Box<GraphemeClusterBreakIteratorLatin1<'a>> { 111 Box::new(GraphemeClusterBreakIteratorLatin1( 112 self.0.segment_latin1(input), 113 )) 114 } 115 } 116 117 impl<'a> GraphemeClusterBreakIteratorUtf8<'a> { 118 /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is 119 /// out of range of a 32-bit signed integer. 120 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)] 121 #[diplomat::rust_link( 122 icu::segmenter::GraphemeClusterBreakIterator::Item, 123 AssociatedTypeInStruct, 124 hidden 125 )] next(&mut self) -> i32126 pub fn next(&mut self) -> i32 { 127 self.0 128 .next() 129 .and_then(|u| i32::try_from(u).ok()) 130 .unwrap_or(-1) 131 } 132 } 133 134 impl<'a> GraphemeClusterBreakIteratorUtf16<'a> { 135 /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is 136 /// out of range of a 32-bit signed integer. 137 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)] 138 #[diplomat::rust_link( 139 icu::segmenter::GraphemeClusterBreakIterator::Item, 140 AssociatedTypeInStruct, 141 hidden 142 )] next(&mut self) -> i32143 pub fn next(&mut self) -> i32 { 144 self.0 145 .next() 146 .and_then(|u| i32::try_from(u).ok()) 147 .unwrap_or(-1) 148 } 149 } 150 151 impl<'a> GraphemeClusterBreakIteratorLatin1<'a> { 152 /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is 153 /// out of range of a 32-bit signed integer. 154 #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)] 155 #[diplomat::rust_link( 156 icu::segmenter::GraphemeClusterBreakIterator::Item, 157 AssociatedTypeInStruct, 158 hidden 159 )] next(&mut self) -> i32160 pub fn next(&mut self) -> i32 { 161 self.0 162 .next() 163 .and_then(|u| i32::try_from(u).ok()) 164 .unwrap_or(-1) 165 } 166 } 167 } 168