1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! This module defines all available properties. 6 //! 7 //! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1] 8 //! and implement [`EnumeratedProperty`]. 9 //! 10 //! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData), 11 //! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData). 12 //! 13 //! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or 14 //! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser), 15 //! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort) 16 //! can be constructed. 17 //! 18 //! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums) 19 20 pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty}; 21 22 pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType}; 23 24 /// See [`test_enumerated_property_completeness`] for usage. 25 /// Example input: 26 /// ```ignore 27 /// impl EastAsianWidth { 28 /// pub const Neutral: EastAsianWidth = EastAsianWidth(0); 29 /// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); 30 /// ... 31 /// } 32 /// ``` 33 /// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by 34 /// explicitly casting first field of the struct to u16. 35 macro_rules! create_const_array { 36 ( 37 $ ( #[$meta:meta] )* 38 impl $enum_ty:ident { 39 $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )* 40 } 41 ) => { 42 $( #[$meta] )* 43 impl $enum_ty { 44 $( 45 $(#[$const_meta])* 46 $v const $i: $t = $e; 47 )* 48 49 /// All possible values of this enum in the Unicode version 50 /// from this ICU4X release. 51 pub const ALL_VALUES: &'static [$enum_ty] = &[ 52 $($enum_ty::$i),* 53 ]; 54 } 55 56 57 impl From<$enum_ty> for u16 { 58 fn from(other: $enum_ty) -> Self { 59 other.0 as u16 60 } 61 } 62 } 63 } 64 65 pub use crate::code_point_map::EnumeratedProperty; 66 67 macro_rules! make_enumerated_property { 68 ( 69 name: $name:literal; 70 short_name: $short_name:literal; 71 ident: $value_ty:path; 72 data_marker: $data_marker:ty; 73 singleton: $singleton:ident; 74 $(ule_ty: $ule_ty:ty;)? 75 func: 76 $(#[$doc:meta])* 77 ) => { 78 impl crate::private::Sealed for $value_ty {} 79 80 impl EnumeratedProperty for $value_ty { 81 type DataMarker = $data_marker; 82 #[cfg(feature = "compiled_data")] 83 const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> = 84 crate::provider::Baked::$singleton; 85 const NAME: &'static [u8] = $name.as_bytes(); 86 const SHORT_NAME: &'static [u8] = $short_name.as_bytes(); 87 } 88 89 $( 90 impl zerovec::ule::AsULE for $value_ty { 91 type ULE = $ule_ty; 92 93 fn to_unaligned(self) -> Self::ULE { 94 self.0.to_unaligned() 95 } 96 fn from_unaligned(unaligned: Self::ULE) -> Self { 97 Self(zerovec::ule::AsULE::from_unaligned(unaligned)) 98 } 99 } 100 )? 101 }; 102 } 103 104 /// Enumerated property Bidi_Class 105 /// 106 /// These are the categories required by the Unicode Bidirectional Algorithm. 107 /// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values). 108 /// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9). 109 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 110 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 111 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 112 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 113 #[allow(clippy::exhaustive_structs)] // newtype 114 #[repr(transparent)] 115 pub struct BidiClass(pub(crate) u8); 116 117 impl BidiClass { 118 /// Returns an ICU4C `UBidiClass` value. to_icu4c_value(self) -> u8119 pub const fn to_icu4c_value(self) -> u8 { 120 self.0 121 } 122 /// Constructor from an ICU4C `UBidiClass` value. from_icu4c_value(value: u8) -> Self123 pub const fn from_icu4c_value(value: u8) -> Self { 124 Self(value) 125 } 126 } 127 128 create_const_array! { 129 #[allow(non_upper_case_globals)] 130 impl BidiClass { 131 /// (`L`) any strong left-to-right character 132 pub const LeftToRight: BidiClass = BidiClass(0); 133 /// (`R`) any strong right-to-left (non-Arabic-type) character 134 pub const RightToLeft: BidiClass = BidiClass(1); 135 /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit 136 pub const EuropeanNumber: BidiClass = BidiClass(2); 137 /// (`ES`) plus and minus signs 138 pub const EuropeanSeparator: BidiClass = BidiClass(3); 139 /// (`ET`) a terminator in a numeric format context, includes currency signs 140 pub const EuropeanTerminator: BidiClass = BidiClass(4); 141 /// (`AN`) any Arabic-Indic digit 142 pub const ArabicNumber: BidiClass = BidiClass(5); 143 /// (`CS`) commas, colons, and slashes 144 pub const CommonSeparator: BidiClass = BidiClass(6); 145 /// (`B`) various newline characters 146 pub const ParagraphSeparator: BidiClass = BidiClass(7); 147 /// (`S`) various segment-related control codes 148 pub const SegmentSeparator: BidiClass = BidiClass(8); 149 /// (`WS`) spaces 150 pub const WhiteSpace: BidiClass = BidiClass(9); 151 /// (`ON`) most other symbols and punctuation marks 152 pub const OtherNeutral: BidiClass = BidiClass(10); 153 /// (`LRE`) U+202A: the LR embedding control 154 pub const LeftToRightEmbedding: BidiClass = BidiClass(11); 155 /// (`LRO`) U+202D: the LR override control 156 pub const LeftToRightOverride: BidiClass = BidiClass(12); 157 /// (`AL`) any strong right-to-left (Arabic-type) character 158 pub const ArabicLetter: BidiClass = BidiClass(13); 159 /// (`RLE`) U+202B: the RL embedding control 160 pub const RightToLeftEmbedding: BidiClass = BidiClass(14); 161 /// (`RLO`) U+202E: the RL override control 162 pub const RightToLeftOverride: BidiClass = BidiClass(15); 163 /// (`PDF`) U+202C: terminates an embedding or override control 164 pub const PopDirectionalFormat: BidiClass = BidiClass(16); 165 /// (`NSM`) any nonspacing mark 166 pub const NonspacingMark: BidiClass = BidiClass(17); 167 /// (`BN`) most format characters, control codes, or noncharacters 168 pub const BoundaryNeutral: BidiClass = BidiClass(18); 169 /// (`FSI`) U+2068: the first strong isolate control 170 pub const FirstStrongIsolate: BidiClass = BidiClass(19); 171 /// (`LRI`) U+2066: the LR isolate control 172 pub const LeftToRightIsolate: BidiClass = BidiClass(20); 173 /// (`RLI`) U+2067: the RL isolate control 174 pub const RightToLeftIsolate: BidiClass = BidiClass(21); 175 /// (`PDI`) U+2069: terminates an isolate control 176 pub const PopDirectionalIsolate: BidiClass = BidiClass(22); 177 } 178 } 179 180 make_enumerated_property! { 181 name: "Bidi_Class"; 182 short_name: "bc"; 183 ident: BidiClass; 184 data_marker: crate::provider::BidiClassV1; 185 singleton: SINGLETON_BIDI_CLASS_V1; 186 ule_ty: u8; 187 func: 188 /// Return a [`CodePointMapDataBorrowed`] for the Bidi_Class Unicode enumerated property. See [`BidiClass`]. 189 /// 190 /// # Example 191 /// 192 /// ``` 193 /// use icu::properties::{maps, BidiClass}; 194 /// 195 /// assert_eq!(maps::bidi_class().get('y'), BidiClass::LeftToRight); // U+0079 196 /// assert_eq!(maps::bidi_class().get('ع'), BidiClass::ArabicLetter); // U+0639 197 /// ``` 198 } 199 200 // This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props 201 pub(crate) mod gc { 202 /// Enumerated property General_Category. 203 /// 204 /// General_Category specifies the most general classification of a code point, usually 205 /// determined based on the primary characteristic of the assigned character. For example, is the 206 /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type? 207 /// 208 /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`). 209 /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`]( 210 /// crate::props::GeneralCategoryGroup). 211 #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] 212 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 213 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 214 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 215 #[allow(clippy::exhaustive_enums)] // this type is stable 216 #[zerovec::make_ule(GeneralCategoryULE)] 217 #[repr(u8)] 218 pub enum GeneralCategory { 219 /// (`Cn`) A reserved unassigned code point or a noncharacter 220 Unassigned = 0, 221 222 /// (`Lu`) An uppercase letter 223 UppercaseLetter = 1, 224 /// (`Ll`) A lowercase letter 225 LowercaseLetter = 2, 226 /// (`Lt`) A digraphic letter, with first part uppercase 227 TitlecaseLetter = 3, 228 /// (`Lm`) A modifier letter 229 ModifierLetter = 4, 230 /// (`Lo`) Other letters, including syllables and ideographs 231 OtherLetter = 5, 232 233 /// (`Mn`) A nonspacing combining mark (zero advance width) 234 NonspacingMark = 6, 235 /// (`Mc`) A spacing combining mark (positive advance width) 236 SpacingMark = 8, 237 /// (`Me`) An enclosing combining mark 238 EnclosingMark = 7, 239 240 /// (`Nd`) A decimal digit 241 DecimalNumber = 9, 242 /// (`Nl`) A letterlike numeric character 243 LetterNumber = 10, 244 /// (`No`) A numeric character of other type 245 OtherNumber = 11, 246 247 /// (`Zs`) A space character (of various non-zero widths) 248 SpaceSeparator = 12, 249 /// (`Zl`) U+2028 LINE SEPARATOR only 250 LineSeparator = 13, 251 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only 252 ParagraphSeparator = 14, 253 254 /// (`Cc`) A C0 or C1 control code 255 Control = 15, 256 /// (`Cf`) A format control character 257 Format = 16, 258 /// (`Co`) A private-use character 259 PrivateUse = 17, 260 /// (`Cs`) A surrogate code point 261 Surrogate = 18, 262 263 /// (`Pd`) A dash or hyphen punctuation mark 264 DashPunctuation = 19, 265 /// (`Ps`) An opening punctuation mark (of a pair) 266 OpenPunctuation = 20, 267 /// (`Pe`) A closing punctuation mark (of a pair) 268 ClosePunctuation = 21, 269 /// (`Pc`) A connecting punctuation mark, like a tie 270 ConnectorPunctuation = 22, 271 /// (`Pi`) An initial quotation mark 272 InitialPunctuation = 28, 273 /// (`Pf`) A final quotation mark 274 FinalPunctuation = 29, 275 /// (`Po`) A punctuation mark of other type 276 OtherPunctuation = 23, 277 278 /// (`Sm`) A symbol of mathematical use 279 MathSymbol = 24, 280 /// (`Sc`) A currency sign 281 CurrencySymbol = 25, 282 /// (`Sk`) A non-letterlike modifier symbol 283 ModifierSymbol = 26, 284 /// (`So`) A symbol of other type 285 OtherSymbol = 27, 286 } 287 } 288 289 pub use gc::GeneralCategory; 290 291 impl GeneralCategory { 292 /// All possible values of this enum 293 pub const ALL_VALUES: &'static [GeneralCategory] = &[ 294 GeneralCategory::Unassigned, 295 GeneralCategory::UppercaseLetter, 296 GeneralCategory::LowercaseLetter, 297 GeneralCategory::TitlecaseLetter, 298 GeneralCategory::ModifierLetter, 299 GeneralCategory::OtherLetter, 300 GeneralCategory::NonspacingMark, 301 GeneralCategory::SpacingMark, 302 GeneralCategory::EnclosingMark, 303 GeneralCategory::DecimalNumber, 304 GeneralCategory::LetterNumber, 305 GeneralCategory::OtherNumber, 306 GeneralCategory::SpaceSeparator, 307 GeneralCategory::LineSeparator, 308 GeneralCategory::ParagraphSeparator, 309 GeneralCategory::Control, 310 GeneralCategory::Format, 311 GeneralCategory::PrivateUse, 312 GeneralCategory::Surrogate, 313 GeneralCategory::DashPunctuation, 314 GeneralCategory::OpenPunctuation, 315 GeneralCategory::ClosePunctuation, 316 GeneralCategory::ConnectorPunctuation, 317 GeneralCategory::InitialPunctuation, 318 GeneralCategory::FinalPunctuation, 319 GeneralCategory::OtherPunctuation, 320 GeneralCategory::MathSymbol, 321 GeneralCategory::CurrencySymbol, 322 GeneralCategory::ModifierSymbol, 323 GeneralCategory::OtherSymbol, 324 ]; 325 } 326 327 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] 328 /// Error value for `impl TryFrom<u8> for GeneralCategory`. 329 #[non_exhaustive] 330 pub struct GeneralCategoryOutOfBoundsError; 331 332 impl TryFrom<u8> for GeneralCategory { 333 type Error = GeneralCategoryOutOfBoundsError; 334 /// Construct this [`GeneralCategory`] from an integer, returning 335 /// an error if it is out of bounds try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError>336 fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> { 337 GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError) 338 } 339 } 340 341 make_enumerated_property! { 342 name: "General_Category"; 343 short_name: "gc"; 344 ident: GeneralCategory; 345 data_marker: crate::provider::GeneralCategoryV1; 346 singleton: SINGLETON_GENERAL_CATEGORY_V1; 347 func: 348 /// Return a [`CodePointMapDataBorrowed`] for the General_Category Unicode enumerated property. See [`GeneralCategory`]. 349 /// 350 /// # Example 351 /// 352 /// ``` 353 /// use icu::properties::{maps, GeneralCategory}; 354 /// 355 /// assert_eq!(maps::general_category().get('木'), GeneralCategory::OtherLetter); // U+6728 356 /// assert_eq!(maps::general_category().get(''), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN 357 /// ``` 358 } 359 360 /// Groupings of multiple General_Category property values. 361 /// 362 /// Instances of `GeneralCategoryGroup` represent the defined multi-category 363 /// values that are useful for users in certain contexts, such as regex. In 364 /// other words, unlike [`GeneralCategory`], this supports groups of general 365 /// categories: for example, `Letter` /// is the union of `UppercaseLetter`, 366 /// `LowercaseLetter`, etc. 367 /// 368 /// See <https://www.unicode.org/reports/tr44/> . 369 /// 370 /// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C. 371 /// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter` 372 /// is the union of `UppercaseLetter`, `LowercaseLetter`, etc. 373 /// 374 /// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C. 375 #[derive(Copy, Clone, PartialEq, Debug, Eq)] 376 #[allow(clippy::exhaustive_structs)] // newtype 377 #[repr(transparent)] 378 pub struct GeneralCategoryGroup(pub(crate) u32); 379 380 impl crate::private::Sealed for GeneralCategoryGroup {} 381 382 use GeneralCategory as GC; 383 use GeneralCategoryGroup as GCG; 384 385 #[allow(non_upper_case_globals)] 386 impl GeneralCategoryGroup { 387 /// (`Lu`) An uppercase letter 388 pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32)); 389 /// (`Ll`) A lowercase letter 390 pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32)); 391 /// (`Lt`) A digraphic letter, with first part uppercase 392 pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32)); 393 /// (`Lm`) A modifier letter 394 pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32)); 395 /// (`Lo`) Other letters, including syllables and ideographs 396 pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32)); 397 /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter 398 pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) 399 | (1 << (GC::LowercaseLetter as u32)) 400 | (1 << (GC::TitlecaseLetter as u32))); 401 /// (`L`) The union of all letter categories 402 pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) 403 | (1 << (GC::LowercaseLetter as u32)) 404 | (1 << (GC::TitlecaseLetter as u32)) 405 | (1 << (GC::ModifierLetter as u32)) 406 | (1 << (GC::OtherLetter as u32))); 407 408 /// (`Mn`) A nonspacing combining mark (zero advance width) 409 pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32)); 410 /// (`Mc`) A spacing combining mark (positive advance width) 411 pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32)); 412 /// (`Me`) An enclosing combining mark 413 pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32)); 414 /// (`M`) The union of all mark categories 415 pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32)) 416 | (1 << (GC::EnclosingMark as u32)) 417 | (1 << (GC::SpacingMark as u32))); 418 419 /// (`Nd`) A decimal digit 420 pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32)); 421 /// (`Nl`) A letterlike numeric character 422 pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32)); 423 /// (`No`) A numeric character of other type 424 pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32)); 425 /// (`N`) The union of all number categories 426 pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32)) 427 | (1 << (GC::LetterNumber as u32)) 428 | (1 << (GC::OtherNumber as u32))); 429 430 /// (`Zs`) A space character (of various non-zero widths) 431 pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32)); 432 /// (`Zl`) U+2028 LINE SEPARATOR only 433 pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32)); 434 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only 435 pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32)); 436 /// (`Z`) The union of all separator categories 437 pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32)) 438 | (1 << (GC::LineSeparator as u32)) 439 | (1 << (GC::ParagraphSeparator as u32))); 440 441 /// (`Cc`) A C0 or C1 control code 442 pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32)); 443 /// (`Cf`) A format control character 444 pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32)); 445 /// (`Co`) A private-use character 446 pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32)); 447 /// (`Cs`) A surrogate code point 448 pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32)); 449 /// (`Cn`) A reserved unassigned code point or a noncharacter 450 pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32)); 451 /// (`C`) The union of all control code, reserved, and unassigned categories 452 pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32)) 453 | (1 << (GC::Format as u32)) 454 | (1 << (GC::PrivateUse as u32)) 455 | (1 << (GC::Surrogate as u32)) 456 | (1 << (GC::Unassigned as u32))); 457 458 /// (`Pd`) A dash or hyphen punctuation mark 459 pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32)); 460 /// (`Ps`) An opening punctuation mark (of a pair) 461 pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32)); 462 /// (`Pe`) A closing punctuation mark (of a pair) 463 pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32)); 464 /// (`Pc`) A connecting punctuation mark, like a tie 465 pub const ConnectorPunctuation: GeneralCategoryGroup = 466 GCG(1 << (GC::ConnectorPunctuation as u32)); 467 /// (`Pi`) An initial quotation mark 468 pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32)); 469 /// (`Pf`) A final quotation mark 470 pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32)); 471 /// (`Po`) A punctuation mark of other type 472 pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32)); 473 /// (`P`) The union of all punctuation categories 474 pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32)) 475 | (1 << (GC::OpenPunctuation as u32)) 476 | (1 << (GC::ClosePunctuation as u32)) 477 | (1 << (GC::ConnectorPunctuation as u32)) 478 | (1 << (GC::OtherPunctuation as u32)) 479 | (1 << (GC::InitialPunctuation as u32)) 480 | (1 << (GC::FinalPunctuation as u32))); 481 482 /// (`Sm`) A symbol of mathematical use 483 pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32)); 484 /// (`Sc`) A currency sign 485 pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32)); 486 /// (`Sk`) A non-letterlike modifier symbol 487 pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32)); 488 /// (`So`) A symbol of other type 489 pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32)); 490 /// (`S`) The union of all symbol categories 491 pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32)) 492 | (1 << (GC::CurrencySymbol as u32)) 493 | (1 << (GC::ModifierSymbol as u32)) 494 | (1 << (GC::OtherSymbol as u32))); 495 496 const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1; 497 498 /// Return whether the code point belongs in the provided multi-value category. 499 /// 500 /// ``` 501 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 502 /// use icu::properties::CodePointMapData; 503 /// 504 /// let gc = CodePointMapData::<GeneralCategory>::new(); 505 /// 506 /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter); 507 /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A'))); 508 /// 509 /// // U+0B1E ORIYA LETTER NYA 510 /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter); 511 /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ'))); 512 /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ'))); 513 /// 514 /// // U+0301 COMBINING ACUTE ACCENT 515 /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark); 516 /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}'))); 517 /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}'))); 518 /// 519 /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber); 520 /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0'))); 521 /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0'))); 522 /// 523 /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation); 524 /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('('))); 525 /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('('))); 526 /// 527 /// // U+2713 CHECK MARK 528 /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol); 529 /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓'))); 530 /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓'))); 531 /// 532 /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator); 533 /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' '))); 534 /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' '))); 535 /// 536 /// // U+E007F CANCEL TAG 537 /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format); 538 /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}'))); 539 /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}'))); 540 /// ``` contains(self, val: GeneralCategory) -> bool541 pub const fn contains(self, val: GeneralCategory) -> bool { 542 0 != (1 << (val as u32)) & self.0 543 } 544 545 /// Produce a GeneralCategoryGroup that is the inverse of this one 546 /// 547 /// # Example 548 /// 549 /// ```rust 550 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 551 /// 552 /// let letter = GeneralCategoryGroup::Letter; 553 /// let not_letter = letter.complement(); 554 /// 555 /// assert!(not_letter.contains(GeneralCategory::MathSymbol)); 556 /// assert!(!letter.contains(GeneralCategory::MathSymbol)); 557 /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation)); 558 /// assert!(!letter.contains(GeneralCategory::OtherPunctuation)); 559 /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter)); 560 /// assert!(letter.contains(GeneralCategory::UppercaseLetter)); 561 /// ``` complement(self) -> Self562 pub const fn complement(self) -> Self { 563 // Mask off things not in Self::ALL to guarantee the mask 564 // values stay in-range 565 GeneralCategoryGroup(!self.0 & Self::ALL) 566 } 567 568 /// Return the group representing all GeneralCategory values 569 /// 570 /// # Example 571 /// 572 /// ```rust 573 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 574 /// 575 /// let all = GeneralCategoryGroup::all(); 576 /// 577 /// assert!(all.contains(GeneralCategory::MathSymbol)); 578 /// assert!(all.contains(GeneralCategory::OtherPunctuation)); 579 /// assert!(all.contains(GeneralCategory::UppercaseLetter)); 580 /// ``` all() -> Self581 pub const fn all() -> Self { 582 Self(Self::ALL) 583 } 584 585 /// Return the empty group 586 /// 587 /// # Example 588 /// 589 /// ```rust 590 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 591 /// 592 /// let empty = GeneralCategoryGroup::empty(); 593 /// 594 /// assert!(!empty.contains(GeneralCategory::MathSymbol)); 595 /// assert!(!empty.contains(GeneralCategory::OtherPunctuation)); 596 /// assert!(!empty.contains(GeneralCategory::UppercaseLetter)); 597 /// ``` empty() -> Self598 pub const fn empty() -> Self { 599 Self(0) 600 } 601 602 /// Take the union of two groups 603 /// 604 /// # Example 605 /// 606 /// ```rust 607 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 608 /// 609 /// let letter = GeneralCategoryGroup::Letter; 610 /// let symbol = GeneralCategoryGroup::Symbol; 611 /// let union = letter.union(symbol); 612 /// 613 /// assert!(union.contains(GeneralCategory::MathSymbol)); 614 /// assert!(!union.contains(GeneralCategory::OtherPunctuation)); 615 /// assert!(union.contains(GeneralCategory::UppercaseLetter)); 616 /// ``` union(self, other: Self) -> Self617 pub const fn union(self, other: Self) -> Self { 618 Self(self.0 | other.0) 619 } 620 621 /// Take the intersection of two groups 622 /// 623 /// # Example 624 /// 625 /// ```rust 626 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; 627 /// 628 /// let letter = GeneralCategoryGroup::Letter; 629 /// let lu = GeneralCategoryGroup::UppercaseLetter; 630 /// let intersection = letter.intersection(lu); 631 /// 632 /// assert!(!intersection.contains(GeneralCategory::MathSymbol)); 633 /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation)); 634 /// assert!(intersection.contains(GeneralCategory::UppercaseLetter)); 635 /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter)); 636 /// ``` intersection(self, other: Self) -> Self637 pub const fn intersection(self, other: Self) -> Self { 638 Self(self.0 & other.0) 639 } 640 } 641 642 impl From<GeneralCategory> for GeneralCategoryGroup { from(subcategory: GeneralCategory) -> Self643 fn from(subcategory: GeneralCategory) -> Self { 644 GeneralCategoryGroup(1 << (subcategory as u32)) 645 } 646 } 647 impl From<u32> for GeneralCategoryGroup { from(mask: u32) -> Self648 fn from(mask: u32) -> Self { 649 // Mask off things not in Self::ALL to guarantee the mask 650 // values stay in-range 651 GeneralCategoryGroup(mask & Self::ALL) 652 } 653 } 654 impl From<GeneralCategoryGroup> for u32 { from(group: GeneralCategoryGroup) -> Self655 fn from(group: GeneralCategoryGroup) -> Self { 656 group.0 657 } 658 } 659 660 /// Enumerated property Script. 661 /// 662 /// This is used with both the Script and Script_Extensions Unicode properties. 663 /// Each character is assigned a single Script, but characters that are used in 664 /// a particular subset of scripts will be in more than one Script_Extensions set. 665 /// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the 666 /// Script_Extensions set for Dogra, Kaithi, and Mahajani. 667 /// 668 /// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>. 669 /// See `UScriptCode` in ICU4C. 670 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 671 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 672 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 673 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 674 #[allow(clippy::exhaustive_structs)] // newtype 675 #[repr(transparent)] 676 pub struct Script(pub(crate) u16); 677 678 impl Script { 679 /// Returns an ICU4C `UScriptCode` value. to_icu4c_value(self) -> u16680 pub const fn to_icu4c_value(self) -> u16 { 681 self.0 682 } 683 /// Constructor from an ICU4C `UScriptCode` value. from_icu4c_value(value: u16) -> Self684 pub const fn from_icu4c_value(value: u16) -> Self { 685 Self(value) 686 } 687 } 688 689 create_const_array! { 690 #[allow(missing_docs)] // These constants don't need individual documentation. 691 #[allow(non_upper_case_globals)] 692 impl Script { 693 pub const Adlam: Script = Script(167); 694 pub const Ahom: Script = Script(161); 695 pub const AnatolianHieroglyphs: Script = Script(156); 696 pub const Arabic: Script = Script(2); 697 pub const Armenian: Script = Script(3); 698 pub const Avestan: Script = Script(117); 699 pub const Balinese: Script = Script(62); 700 pub const Bamum: Script = Script(130); 701 pub const BassaVah: Script = Script(134); 702 pub const Batak: Script = Script(63); 703 pub const Bengali: Script = Script(4); 704 pub const Bhaiksuki: Script = Script(168); 705 pub const Bopomofo: Script = Script(5); 706 pub const Brahmi: Script = Script(65); 707 pub const Braille: Script = Script(46); 708 pub const Buginese: Script = Script(55); 709 pub const Buhid: Script = Script(44); 710 pub const CanadianAboriginal: Script = Script(40); 711 pub const Carian: Script = Script(104); 712 pub const CaucasianAlbanian: Script = Script(159); 713 pub const Chakma: Script = Script(118); 714 pub const Cham: Script = Script(66); 715 pub const Cherokee: Script = Script(6); 716 pub const Chorasmian: Script = Script(189); 717 pub const Common: Script = Script(0); 718 pub const Coptic: Script = Script(7); 719 pub const Cuneiform: Script = Script(101); 720 pub const Cypriot: Script = Script(47); 721 pub const CyproMinoan: Script = Script(193); 722 pub const Cyrillic: Script = Script(8); 723 pub const Deseret: Script = Script(9); 724 pub const Devanagari: Script = Script(10); 725 pub const DivesAkuru: Script = Script(190); 726 pub const Dogra: Script = Script(178); 727 pub const Duployan: Script = Script(135); 728 pub const EgyptianHieroglyphs: Script = Script(71); 729 pub const Elbasan: Script = Script(136); 730 pub const Elymaic: Script = Script(185); 731 pub const Ethiopian: Script = Script(11); 732 pub const Georgian: Script = Script(12); 733 pub const Glagolitic: Script = Script(56); 734 pub const Gothic: Script = Script(13); 735 pub const Grantha: Script = Script(137); 736 pub const Greek: Script = Script(14); 737 pub const Gujarati: Script = Script(15); 738 pub const GunjalaGondi: Script = Script(179); 739 pub const Gurmukhi: Script = Script(16); 740 pub const Han: Script = Script(17); 741 pub const Hangul: Script = Script(18); 742 pub const HanifiRohingya: Script = Script(182); 743 pub const Hanunoo: Script = Script(43); 744 pub const Hatran: Script = Script(162); 745 pub const Hebrew: Script = Script(19); 746 pub const Hiragana: Script = Script(20); 747 pub const ImperialAramaic: Script = Script(116); 748 pub const Inherited: Script = Script(1); 749 pub const InscriptionalPahlavi: Script = Script(122); 750 pub const InscriptionalParthian: Script = Script(125); 751 pub const Javanese: Script = Script(78); 752 pub const Kaithi: Script = Script(120); 753 pub const Kannada: Script = Script(21); 754 pub const Katakana: Script = Script(22); 755 pub const Kawi: Script = Script(198); 756 pub const KayahLi: Script = Script(79); 757 pub const Kharoshthi: Script = Script(57); 758 pub const KhitanSmallScript: Script = Script(191); 759 pub const Khmer: Script = Script(23); 760 pub const Khojki: Script = Script(157); 761 pub const Khudawadi: Script = Script(145); 762 pub const Lao: Script = Script(24); 763 pub const Latin: Script = Script(25); 764 pub const Lepcha: Script = Script(82); 765 pub const Limbu: Script = Script(48); 766 pub const LinearA: Script = Script(83); 767 pub const LinearB: Script = Script(49); 768 pub const Lisu: Script = Script(131); 769 pub const Lycian: Script = Script(107); 770 pub const Lydian: Script = Script(108); 771 pub const Mahajani: Script = Script(160); 772 pub const Makasar: Script = Script(180); 773 pub const Malayalam: Script = Script(26); 774 pub const Mandaic: Script = Script(84); 775 pub const Manichaean: Script = Script(121); 776 pub const Marchen: Script = Script(169); 777 pub const MasaramGondi: Script = Script(175); 778 pub const Medefaidrin: Script = Script(181); 779 pub const MeeteiMayek: Script = Script(115); 780 pub const MendeKikakui: Script = Script(140); 781 pub const MeroiticCursive: Script = Script(141); 782 pub const MeroiticHieroglyphs: Script = Script(86); 783 pub const Miao: Script = Script(92); 784 pub const Modi: Script = Script(163); 785 pub const Mongolian: Script = Script(27); 786 pub const Mro: Script = Script(149); 787 pub const Multani: Script = Script(164); 788 pub const Myanmar: Script = Script(28); 789 pub const Nabataean: Script = Script(143); 790 pub const NagMundari: Script = Script(199); 791 pub const Nandinagari: Script = Script(187); 792 pub const Nastaliq: Script = Script(200); 793 pub const NewTaiLue: Script = Script(59); 794 pub const Newa: Script = Script(170); 795 pub const Nko: Script = Script(87); 796 pub const Nushu: Script = Script(150); 797 pub const NyiakengPuachueHmong: Script = Script(186); 798 pub const Ogham: Script = Script(29); 799 pub const OlChiki: Script = Script(109); 800 pub const OldHungarian: Script = Script(76); 801 pub const OldItalic: Script = Script(30); 802 pub const OldNorthArabian: Script = Script(142); 803 pub const OldPermic: Script = Script(89); 804 pub const OldPersian: Script = Script(61); 805 pub const OldSogdian: Script = Script(184); 806 pub const OldSouthArabian: Script = Script(133); 807 pub const OldTurkic: Script = Script(88); 808 pub const OldUyghur: Script = Script(194); 809 pub const Oriya: Script = Script(31); 810 pub const Osage: Script = Script(171); 811 pub const Osmanya: Script = Script(50); 812 pub const PahawhHmong: Script = Script(75); 813 pub const Palmyrene: Script = Script(144); 814 pub const PauCinHau: Script = Script(165); 815 pub const PhagsPa: Script = Script(90); 816 pub const Phoenician: Script = Script(91); 817 pub const PsalterPahlavi: Script = Script(123); 818 pub const Rejang: Script = Script(110); 819 pub const Runic: Script = Script(32); 820 pub const Samaritan: Script = Script(126); 821 pub const Saurashtra: Script = Script(111); 822 pub const Sharada: Script = Script(151); 823 pub const Shavian: Script = Script(51); 824 pub const Siddham: Script = Script(166); 825 pub const SignWriting: Script = Script(112); 826 pub const Sinhala: Script = Script(33); 827 pub const Sogdian: Script = Script(183); 828 pub const SoraSompeng: Script = Script(152); 829 pub const Soyombo: Script = Script(176); 830 pub const Sundanese: Script = Script(113); 831 pub const SylotiNagri: Script = Script(58); 832 pub const Syriac: Script = Script(34); 833 pub const Tagalog: Script = Script(42); 834 pub const Tagbanwa: Script = Script(45); 835 pub const TaiLe: Script = Script(52); 836 pub const TaiTham: Script = Script(106); 837 pub const TaiViet: Script = Script(127); 838 pub const Takri: Script = Script(153); 839 pub const Tamil: Script = Script(35); 840 pub const Tangsa: Script = Script(195); 841 pub const Tangut: Script = Script(154); 842 pub const Telugu: Script = Script(36); 843 pub const Thaana: Script = Script(37); 844 pub const Thai: Script = Script(38); 845 pub const Tibetan: Script = Script(39); 846 pub const Tifinagh: Script = Script(60); 847 pub const Tirhuta: Script = Script(158); 848 pub const Toto: Script = Script(196); 849 pub const Ugaritic: Script = Script(53); 850 pub const Unknown: Script = Script(103); 851 pub const Vai: Script = Script(99); 852 pub const Vithkuqi: Script = Script(197); 853 pub const Wancho: Script = Script(188); 854 pub const WarangCiti: Script = Script(146); 855 pub const Yezidi: Script = Script(192); 856 pub const Yi: Script = Script(41); 857 pub const ZanabazarSquare: Script = Script(177); 858 } 859 } 860 861 make_enumerated_property! { 862 name: "Script"; 863 short_name: "sc"; 864 ident: Script; 865 data_marker: crate::provider::ScriptV1; 866 singleton: SINGLETON_SCRIPT_V1; 867 ule_ty: <u16 as zerovec::ule::AsULE>::ULE; 868 func: 869 /// Return a [`CodePointMapDataBorrowed`] for the Script Unicode enumerated property. See [`Script`]. 870 /// 871 /// **Note:** Some code points are associated with multiple scripts. If you are trying to 872 /// determine whether a code point belongs to a certain script, you should use 873 /// [`load_script_with_extensions_unstable`] and [`ScriptWithExtensionsBorrowed::has_script`] 874 /// instead of this function. 875 /// 876 /// # Example 877 /// 878 /// ``` 879 /// use icu::properties::{maps, Script}; 880 /// 881 /// assert_eq!(maps::script().get('木'), Script::Han); // U+6728 882 /// assert_eq!(maps::script().get(''), Script::Common); // U+1F383 JACK-O-LANTERN 883 /// ``` 884 /// [`load_script_with_extensions_unstable`]: crate::script::load_script_with_extensions_unstable 885 /// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script 886 } 887 888 /// Enumerated property Hangul_Syllable_Type 889 /// 890 /// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose 891 /// arbitrary Hangul syllables. This property provides that ontology of Hangul code points. 892 /// 893 /// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html). 894 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 895 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 896 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 897 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 898 #[allow(clippy::exhaustive_structs)] // newtype 899 #[repr(transparent)] 900 pub struct HangulSyllableType(pub(crate) u8); 901 902 impl HangulSyllableType { 903 /// Returns an ICU4C `UHangulSyllableType` value. to_icu4c_value(self) -> u8904 pub const fn to_icu4c_value(self) -> u8 { 905 self.0 906 } 907 /// Constructor from an ICU4C `UHangulSyllableType` value. from_icu4c_value(value: u8) -> Self908 pub const fn from_icu4c_value(value: u8) -> Self { 909 Self(value) 910 } 911 } 912 913 create_const_array! { 914 #[allow(non_upper_case_globals)] 915 impl HangulSyllableType { 916 /// (`NA`) not applicable (e.g. not a Hangul code point). 917 pub const NotApplicable: HangulSyllableType = HangulSyllableType(0); 918 /// (`L`) a conjoining leading consonant Jamo. 919 pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1); 920 /// (`V`) a conjoining vowel Jamo. 921 pub const VowelJamo: HangulSyllableType = HangulSyllableType(2); 922 /// (`T`) a conjoining trailing consonant Jamo. 923 pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3); 924 /// (`LV`) a precomposed syllable with a leading consonant and a vowel. 925 pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4); 926 /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant. 927 pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5); 928 } 929 } 930 931 make_enumerated_property! { 932 name: "Hangul_Syllable_Type"; 933 short_name: "hst"; 934 ident: HangulSyllableType; 935 data_marker: crate::provider::HangulSyllableTypeV1; 936 singleton: SINGLETON_HANGUL_SYLLABLE_TYPE_V1; 937 ule_ty: u8; 938 func: 939 /// Returns a [`CodePointMapDataBorrowed`] for the Hangul_Syllable_Type 940 /// Unicode enumerated property. See [`HangulSyllableType`]. 941 /// 942 /// # Example 943 /// 944 /// ``` 945 /// use icu::properties::{maps, HangulSyllableType}; 946 /// 947 /// assert_eq!(maps::hangul_syllable_type().get('ᄀ'), HangulSyllableType::LeadingJamo); // U+1100 948 /// assert_eq!(maps::hangul_syllable_type().get('가'), HangulSyllableType::LeadingVowelSyllable); // U+AC00 949 /// ``` 950 951 } 952 953 /// Enumerated property East_Asian_Width. 954 /// 955 /// See "Definition" in UAX #11 for the summary of each property value: 956 /// <https://www.unicode.org/reports/tr11/#Definitions> 957 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 958 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 959 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 960 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 961 #[allow(clippy::exhaustive_structs)] // newtype 962 #[repr(transparent)] 963 pub struct EastAsianWidth(pub(crate) u8); 964 965 impl EastAsianWidth { 966 /// Returns an ICU4C `UEastAsianWidth` value. to_icu4c_value(self) -> u8967 pub const fn to_icu4c_value(self) -> u8 { 968 self.0 969 } 970 /// Constructor from an ICU4C `UEastAsianWidth` value. from_icu4c_value(value: u8) -> Self971 pub const fn from_icu4c_value(value: u8) -> Self { 972 Self(value) 973 } 974 } 975 976 create_const_array! { 977 #[allow(missing_docs)] // These constants don't need individual documentation. 978 #[allow(non_upper_case_globals)] 979 impl EastAsianWidth { 980 pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N" 981 pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A" 982 pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H" 983 pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F" 984 pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na" 985 pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W" 986 } 987 } 988 989 make_enumerated_property! { 990 name: "East_Asian_Width"; 991 short_name: "ea"; 992 ident: EastAsianWidth; 993 data_marker: crate::provider::EastAsianWidthV1; 994 singleton: SINGLETON_EAST_ASIAN_WIDTH_V1; 995 ule_ty: u8; 996 func: 997 /// Return a [`CodePointMapDataBorrowed`] for the East_Asian_Width Unicode enumerated 998 /// property. See [`EastAsianWidth`]. 999 /// 1000 /// # Example 1001 /// 1002 /// ``` 1003 /// use icu::properties::{maps, EastAsianWidth}; 1004 /// 1005 /// assert_eq!(maps::east_asian_width().get('ア'), EastAsianWidth::Halfwidth); // U+FF71: Halfwidth Katakana Letter A 1006 /// assert_eq!(maps::east_asian_width().get('ア'), EastAsianWidth::Wide); //U+30A2: Katakana Letter A 1007 /// ``` 1008 } 1009 1010 /// Enumerated property Line_Break. 1011 /// 1012 /// See "Line Breaking Properties" in UAX #14 for the summary of each property 1013 /// value: <https://www.unicode.org/reports/tr14/#Properties> 1014 /// 1015 /// The numeric value is compatible with `ULineBreak` in ICU4C. 1016 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1017 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1018 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1019 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1020 #[allow(clippy::exhaustive_structs)] // newtype 1021 #[repr(transparent)] 1022 pub struct LineBreak(pub(crate) u8); 1023 1024 impl LineBreak { 1025 /// Returns an ICU4C `ULineBreak` value. to_icu4c_value(self) -> u81026 pub const fn to_icu4c_value(self) -> u8 { 1027 self.0 1028 } 1029 /// Constructor from an ICU4C `ULineBreak` value. from_icu4c_value(value: u8) -> Self1030 pub const fn from_icu4c_value(value: u8) -> Self { 1031 Self(value) 1032 } 1033 } 1034 1035 create_const_array! { 1036 #[allow(missing_docs)] // These constants don't need individual documentation. 1037 #[allow(non_upper_case_globals)] 1038 impl LineBreak { 1039 pub const Unknown: LineBreak = LineBreak(0); // name="XX" 1040 pub const Ambiguous: LineBreak = LineBreak(1); // name="AI" 1041 pub const Alphabetic: LineBreak = LineBreak(2); // name="AL" 1042 pub const BreakBoth: LineBreak = LineBreak(3); // name="B2" 1043 pub const BreakAfter: LineBreak = LineBreak(4); // name="BA" 1044 pub const BreakBefore: LineBreak = LineBreak(5); // name="BB" 1045 pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK" 1046 pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB" 1047 pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL" 1048 pub const CombiningMark: LineBreak = LineBreak(9); // name="CM" 1049 pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR" 1050 pub const Exclamation: LineBreak = LineBreak(11); // name="EX" 1051 pub const Glue: LineBreak = LineBreak(12); // name="GL" 1052 pub const Hyphen: LineBreak = LineBreak(13); // name="HY" 1053 pub const Ideographic: LineBreak = LineBreak(14); // name="ID" 1054 pub const Inseparable: LineBreak = LineBreak(15); // name="IN" 1055 pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS" 1056 pub const LineFeed: LineBreak = LineBreak(17); // name="LF" 1057 pub const Nonstarter: LineBreak = LineBreak(18); // name="NS" 1058 pub const Numeric: LineBreak = LineBreak(19); // name="NU" 1059 pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP" 1060 pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO" 1061 pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR" 1062 pub const Quotation: LineBreak = LineBreak(23); // name="QU" 1063 pub const ComplexContext: LineBreak = LineBreak(24); // name="SA" 1064 pub const Surrogate: LineBreak = LineBreak(25); // name="SG" 1065 pub const Space: LineBreak = LineBreak(26); // name="SP" 1066 pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY" 1067 pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW" 1068 pub const NextLine: LineBreak = LineBreak(29); // name="NL" 1069 pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ" 1070 pub const H2: LineBreak = LineBreak(31); // name="H2" 1071 pub const H3: LineBreak = LineBreak(32); // name="H3" 1072 pub const JL: LineBreak = LineBreak(33); // name="JL" 1073 pub const JT: LineBreak = LineBreak(34); // name="JT" 1074 pub const JV: LineBreak = LineBreak(35); // name="JV" 1075 pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP" 1076 pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ" 1077 pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL" 1078 pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI" 1079 pub const EBase: LineBreak = LineBreak(40); // name="EB" 1080 pub const EModifier: LineBreak = LineBreak(41); // name="EM" 1081 pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ" 1082 1083 // Added in ICU 74: 1084 pub const Aksara: LineBreak = LineBreak(43); // name="AK" 1085 pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP" 1086 pub const AksaraStart: LineBreak = LineBreak(45); // name=AS" 1087 pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF" 1088 pub const Virama: LineBreak = LineBreak(47); // name=VI" 1089 } 1090 } 1091 1092 make_enumerated_property! { 1093 name: "Line_Break"; 1094 short_name: "lb"; 1095 ident: LineBreak; 1096 data_marker: crate::provider::LineBreakV1; 1097 singleton: SINGLETON_LINE_BREAK_V1; 1098 ule_ty: u8; 1099 func: 1100 /// Return a [`CodePointMapDataBorrowed`] for the Line_Break Unicode enumerated 1101 /// property. See [`LineBreak`]. 1102 /// 1103 /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. 1104 /// 1105 /// # Example 1106 /// 1107 /// ``` 1108 /// use icu::properties::{maps, LineBreak}; 1109 /// 1110 /// assert_eq!(maps::line_break().get(')'), LineBreak::CloseParenthesis); // U+0029: Right Parenthesis 1111 /// assert_eq!(maps::line_break().get('ぁ'), LineBreak::ConditionalJapaneseStarter); //U+3041: Hiragana Letter Small A 1112 /// ``` 1113 } 1114 1115 /// Enumerated property Grapheme_Cluster_Break. 1116 /// 1117 /// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the 1118 /// summary of each property value: 1119 /// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table> 1120 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1121 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1122 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1123 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1124 #[allow(clippy::exhaustive_structs)] // this type is stable 1125 #[repr(transparent)] 1126 pub struct GraphemeClusterBreak(pub(crate) u8); 1127 1128 impl GraphemeClusterBreak { 1129 /// Returns an ICU4C `UGraphemeClusterBreak` value. to_icu4c_value(self) -> u81130 pub const fn to_icu4c_value(self) -> u8 { 1131 self.0 1132 } 1133 /// Constructor from an ICU4C `UGraphemeClusterBreak` value. from_icu4c_value(value: u8) -> Self1134 pub const fn from_icu4c_value(value: u8) -> Self { 1135 Self(value) 1136 } 1137 } 1138 1139 create_const_array! { 1140 #[allow(missing_docs)] // These constants don't need individual documentation. 1141 #[allow(non_upper_case_globals)] 1142 impl GraphemeClusterBreak { 1143 pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX" 1144 pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN" 1145 pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR" 1146 pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX" 1147 pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L" 1148 pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF" 1149 pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV" 1150 pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT" 1151 pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T" 1152 pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V" 1153 pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM" 1154 pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP" 1155 pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI" 1156 /// This value is obsolete and unused. 1157 pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB" 1158 /// This value is obsolete and unused. 1159 pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG" 1160 /// This value is obsolete and unused. 1161 pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM" 1162 /// This value is obsolete and unused. 1163 pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ" 1164 pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ" 1165 } 1166 } 1167 1168 make_enumerated_property! { 1169 name: "Grapheme_Cluster_Break"; 1170 short_name: "GCB"; 1171 ident: GraphemeClusterBreak; 1172 data_marker: crate::provider::GraphemeClusterBreakV1; 1173 singleton: SINGLETON_GRAPHEME_CLUSTER_BREAK_V1; 1174 ule_ty: u8; 1175 func: 1176 /// Return a [`CodePointMapDataBorrowed`] for the Grapheme_Cluster_Break Unicode enumerated 1177 /// property. See [`GraphemeClusterBreak`]. 1178 /// 1179 /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. 1180 /// 1181 /// # Example 1182 /// 1183 /// ``` 1184 /// use icu::properties::{maps, GraphemeClusterBreak}; 1185 /// 1186 /// assert_eq!(maps::grapheme_cluster_break().get(''), GraphemeClusterBreak::RegionalIndicator); // U+1F1E6: Regional Indicator Symbol Letter A 1187 /// assert_eq!(maps::grapheme_cluster_break().get('ำ'), GraphemeClusterBreak::SpacingMark); //U+0E33: Thai Character Sara Am 1188 /// ``` 1189 } 1190 1191 /// Enumerated property Word_Break. 1192 /// 1193 /// See "Default Word Boundary Specification" in UAX #29 for the summary of 1194 /// each property value: 1195 /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. 1196 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1197 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1198 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1199 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1200 #[allow(clippy::exhaustive_structs)] // newtype 1201 #[repr(transparent)] 1202 pub struct WordBreak(pub(crate) u8); 1203 1204 impl WordBreak { 1205 /// Returns an ICU4C `UWordBreak` value. to_icu4c_value(self) -> u81206 pub const fn to_icu4c_value(self) -> u8 { 1207 self.0 1208 } 1209 /// Constructor from an ICU4C `UWordBreak` value. from_icu4c_value(value: u8) -> Self1210 pub const fn from_icu4c_value(value: u8) -> Self { 1211 Self(value) 1212 } 1213 } 1214 1215 create_const_array! { 1216 #[allow(missing_docs)] // These constants don't need individual documentation. 1217 #[allow(non_upper_case_globals)] 1218 impl WordBreak { 1219 pub const Other: WordBreak = WordBreak(0); // name="XX" 1220 pub const ALetter: WordBreak = WordBreak(1); // name="LE" 1221 pub const Format: WordBreak = WordBreak(2); // name="FO" 1222 pub const Katakana: WordBreak = WordBreak(3); // name="KA" 1223 pub const MidLetter: WordBreak = WordBreak(4); // name="ML" 1224 pub const MidNum: WordBreak = WordBreak(5); // name="MN" 1225 pub const Numeric: WordBreak = WordBreak(6); // name="NU" 1226 pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX" 1227 pub const CR: WordBreak = WordBreak(8); // name="CR" 1228 pub const Extend: WordBreak = WordBreak(9); // name="Extend" 1229 pub const LF: WordBreak = WordBreak(10); // name="LF" 1230 pub const MidNumLet: WordBreak = WordBreak(11); // name="MB" 1231 pub const Newline: WordBreak = WordBreak(12); // name="NL" 1232 pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI" 1233 pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL" 1234 pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ" 1235 pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ 1236 /// This value is obsolete and unused. 1237 pub const EBase: WordBreak = WordBreak(17); // name="EB" 1238 /// This value is obsolete and unused. 1239 pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG" 1240 /// This value is obsolete and unused. 1241 pub const EModifier: WordBreak = WordBreak(19); // name="EM" 1242 /// This value is obsolete and unused. 1243 pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ" 1244 pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ" 1245 pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace" 1246 } 1247 } 1248 1249 make_enumerated_property! { 1250 name: "Word_Break"; 1251 short_name: "WB"; 1252 ident: WordBreak; 1253 data_marker: crate::provider::WordBreakV1; 1254 singleton: SINGLETON_WORD_BREAK_V1; 1255 ule_ty: u8; 1256 func: 1257 /// Return a [`CodePointMapDataBorrowed`] for the Word_Break Unicode enumerated 1258 /// property. See [`WordBreak`]. 1259 /// 1260 /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. 1261 /// 1262 /// # Example 1263 /// 1264 /// ``` 1265 /// use icu::properties::{maps, WordBreak}; 1266 /// 1267 /// assert_eq!(maps::word_break().get('.'), WordBreak::MidNumLet); // U+002E: Full Stop 1268 /// assert_eq!(maps::word_break().get(','), WordBreak::MidNum); // U+FF0C: Fullwidth Comma 1269 /// ``` 1270 } 1271 1272 /// Enumerated property Sentence_Break. 1273 /// See "Default Sentence Boundary Specification" in UAX #29 for the summary of 1274 /// each property value: 1275 /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. 1276 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1277 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1278 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1279 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1280 #[allow(clippy::exhaustive_structs)] // newtype 1281 #[repr(transparent)] 1282 pub struct SentenceBreak(pub(crate) u8); 1283 1284 impl SentenceBreak { 1285 /// Returns an ICU4C `USentenceBreak` value. to_icu4c_value(self) -> u81286 pub const fn to_icu4c_value(self) -> u8 { 1287 self.0 1288 } 1289 /// Constructor from an ICU4C `USentenceBreak` value. from_icu4c_value(value: u8) -> Self1290 pub const fn from_icu4c_value(value: u8) -> Self { 1291 Self(value) 1292 } 1293 } 1294 1295 create_const_array! { 1296 #[allow(missing_docs)] // These constants don't need individual documentation. 1297 #[allow(non_upper_case_globals)] 1298 impl SentenceBreak { 1299 pub const Other: SentenceBreak = SentenceBreak(0); // name="XX" 1300 pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT" 1301 pub const Close: SentenceBreak = SentenceBreak(2); // name="CL" 1302 pub const Format: SentenceBreak = SentenceBreak(3); // name="FO" 1303 pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO" 1304 pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU" 1305 pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE" 1306 pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE" 1307 pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP" 1308 pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST" 1309 pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP" 1310 pub const CR: SentenceBreak = SentenceBreak(11); // name="CR" 1311 pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX" 1312 pub const LF: SentenceBreak = SentenceBreak(13); // name="LF" 1313 pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC" 1314 } 1315 } 1316 1317 make_enumerated_property! { 1318 name: "Sentence_Break"; 1319 short_name: "SB"; 1320 ident: SentenceBreak; 1321 data_marker: crate::provider::SentenceBreakV1; 1322 singleton: SINGLETON_SENTENCE_BREAK_V1; 1323 ule_ty: u8; 1324 func: 1325 /// Return a [`CodePointMapDataBorrowed`] for the Sentence_Break Unicode enumerated 1326 /// property. See [`SentenceBreak`]. 1327 /// 1328 /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. 1329 /// 1330 /// # Example 1331 /// 1332 /// ``` 1333 /// use icu::properties::{maps, SentenceBreak}; 1334 /// 1335 /// assert_eq!(maps::sentence_break().get('9'), SentenceBreak::Numeric); // U+FF19: Fullwidth Digit Nine 1336 /// assert_eq!(maps::sentence_break().get(','), SentenceBreak::SContinue); // U+002C: Comma 1337 /// ``` 1338 } 1339 1340 /// Property Canonical_Combining_Class. 1341 /// See UAX #15: 1342 /// <https://www.unicode.org/reports/tr15/>. 1343 /// 1344 /// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API 1345 /// to look up the Canonical_Combining_Class property by scalar value. 1346 // 1347 // NOTE: The Pernosco debugger has special knowledge 1348 // of this struct. Please do not change the bit layout 1349 // or the crate-module-qualified name of this struct 1350 // without coordination. 1351 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1352 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1353 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1354 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1355 #[allow(clippy::exhaustive_structs)] // newtype 1356 #[repr(transparent)] 1357 pub struct CanonicalCombiningClass(pub(crate) u8); 1358 1359 impl CanonicalCombiningClass { 1360 /// Returns an ICU4C `UCanonicalCombiningClass` value. to_icu4c_value(self) -> u81361 pub const fn to_icu4c_value(self) -> u8 { 1362 self.0 1363 } 1364 /// Constructor from an ICU4C `UCanonicalCombiningClass` value. from_icu4c_value(value: u8) -> Self1365 pub const fn from_icu4c_value(value: u8) -> Self { 1366 Self(value) 1367 } 1368 } 1369 1370 create_const_array! { 1371 // These constant names come from PropertyValueAliases.txt 1372 #[allow(missing_docs)] // These constants don't need individual documentation. 1373 #[allow(non_upper_case_globals)] 1374 impl CanonicalCombiningClass { 1375 pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR" 1376 pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV" 1377 pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR" 1378 pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK" 1379 pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV" 1380 pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR" 1381 pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10" 1382 pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11" 1383 pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12" 1384 pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13" 1385 pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14" 1386 pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15" 1387 pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16" 1388 pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17" 1389 pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18" 1390 pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19" 1391 pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20" 1392 pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21" 1393 pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22" 1394 pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23" 1395 pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24" 1396 pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25" 1397 pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26" 1398 pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27" 1399 pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28" 1400 pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29" 1401 pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30" 1402 pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31" 1403 pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32" 1404 pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33" 1405 pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34" 1406 pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35" 1407 pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36" 1408 pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84" 1409 pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91" 1410 pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103" 1411 pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107" 1412 pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118" 1413 pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122" 1414 pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129" 1415 pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130" 1416 pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132" 1417 pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED 1418 pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL" 1419 pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB" 1420 pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA" 1421 pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR" 1422 pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL" 1423 pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B" 1424 pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR" 1425 pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L" 1426 pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R" 1427 pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL" 1428 pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A" 1429 pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR" 1430 pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB" 1431 pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA" 1432 pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS" 1433 } 1434 } 1435 1436 make_enumerated_property! { 1437 name: "Canonical_Combining_Class"; 1438 short_name: "ccc"; 1439 ident: CanonicalCombiningClass; 1440 data_marker: crate::provider::CanonicalCombiningClassV1; 1441 singleton: SINGLETON_CANONICAL_COMBINING_CLASS_V1; 1442 ule_ty: u8; 1443 func: 1444 /// Return a [`CodePointMapData`] for the Canonical_Combining_Class Unicode property. See 1445 /// [`CanonicalCombiningClass`]. 1446 /// 1447 /// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API 1448 /// to look up the Canonical_Combining_Class property by scalar value. 1449 /// 1450 /// # Example 1451 /// 1452 /// ``` 1453 /// use icu::properties::{maps, CanonicalCombiningClass}; 1454 /// 1455 /// assert_eq!(maps::canonical_combining_class().get('a'), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A 1456 /// assert_eq!(maps::canonical_combining_class().get('\u{0301}'), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT 1457 /// ``` 1458 } 1459 1460 /// Property Indic_Syllabic_Category. 1461 /// See UAX #44: 1462 /// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>. 1463 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1464 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1465 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1466 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1467 #[allow(clippy::exhaustive_structs)] // newtype 1468 #[repr(transparent)] 1469 pub struct IndicSyllabicCategory(pub(crate) u8); 1470 1471 impl IndicSyllabicCategory { 1472 /// Returns an ICU4C `UIndicSyllabicCategory` value. to_icu4c_value(self) -> u81473 pub const fn to_icu4c_value(self) -> u8 { 1474 self.0 1475 } 1476 /// Constructor from an ICU4C `UIndicSyllabicCategory` value. from_icu4c_value(value: u8) -> Self1477 pub const fn from_icu4c_value(value: u8) -> Self { 1478 Self(value) 1479 } 1480 } 1481 1482 create_const_array! { 1483 #[allow(missing_docs)] // These constants don't need individual documentation. 1484 #[allow(non_upper_case_globals)] 1485 impl IndicSyllabicCategory { 1486 pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0); 1487 pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1); 1488 pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2); 1489 pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3); 1490 pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4); 1491 pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5); 1492 pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6); 1493 pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7); 1494 pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8); 1495 pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9); 1496 pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10); 1497 pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11); 1498 pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12); 1499 pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13); 1500 pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14); 1501 pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15); 1502 pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16); 1503 pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17); 1504 pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18); 1505 pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19); 1506 pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20); 1507 pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21); 1508 pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22); 1509 pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23); 1510 pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24); 1511 pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25); 1512 pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26); 1513 pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27); 1514 pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28); 1515 pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29); 1516 pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30); 1517 pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31); 1518 pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32); 1519 pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33); 1520 pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34); 1521 pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35); 1522 pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36); 1523 } 1524 } 1525 1526 make_enumerated_property! { 1527 name: "Indic_Syllabic_Category"; 1528 short_name: "InSC"; 1529 ident: IndicSyllabicCategory; 1530 data_marker: crate::provider::IndicSyllabicCategoryV1; 1531 singleton: SINGLETON_INDIC_SYLLABIC_CATEGORY_V1; 1532 ule_ty: u8; 1533 func: 1534 /// Return a [`CodePointMapData`] for the Indic_Syllabic_Category Unicode property. See 1535 /// [`IndicSyllabicCategory`]. 1536 /// 1537 /// # Example 1538 /// 1539 /// ``` 1540 /// use icu::properties::{maps, IndicSyllabicCategory}; 1541 /// 1542 /// assert_eq!(maps::indic_syllabic_category().get('a'), IndicSyllabicCategory::Other); 1543 /// assert_eq!(maps::indic_syllabic_category().get('\u{0900}'), IndicSyllabicCategory::Bindu); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU 1544 /// ``` 1545 } 1546 1547 /// Enumerated property Joining_Type. 1548 /// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of 1549 /// each property value. 1550 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 1551 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 1552 #[cfg_attr(feature = "datagen", derive(databake::Bake))] 1553 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] 1554 #[allow(clippy::exhaustive_structs)] // newtype 1555 #[repr(transparent)] 1556 pub struct JoiningType(pub(crate) u8); 1557 1558 impl JoiningType { 1559 /// Returns an ICU4C `UJoiningType` value. to_icu4c_value(self) -> u81560 pub const fn to_icu4c_value(self) -> u8 { 1561 self.0 1562 } 1563 /// Constructor from an ICU4C `UJoiningType` value. from_icu4c_value(value: u8) -> Self1564 pub const fn from_icu4c_value(value: u8) -> Self { 1565 Self(value) 1566 } 1567 } 1568 1569 create_const_array! { 1570 #[allow(missing_docs)] // These constants don't need individual documentation. 1571 #[allow(non_upper_case_globals)] 1572 impl JoiningType { 1573 pub const NonJoining: JoiningType = JoiningType(0); // name="U" 1574 pub const JoinCausing: JoiningType = JoiningType(1); // name="C" 1575 pub const DualJoining: JoiningType = JoiningType(2); // name="D" 1576 pub const LeftJoining: JoiningType = JoiningType(3); // name="L" 1577 pub const RightJoining: JoiningType = JoiningType(4); // name="R" 1578 pub const Transparent: JoiningType = JoiningType(5); // name="T" 1579 } 1580 } 1581 1582 make_enumerated_property! { 1583 name: "Joining_Type"; 1584 short_name: "jt"; 1585 ident: JoiningType; 1586 data_marker: crate::provider::JoiningTypeV1; 1587 singleton: SINGLETON_JOINING_TYPE_V1; 1588 ule_ty: u8; 1589 func: 1590 /// Return a [`CodePointMapDataBorrowed`] for the Joining_Type Unicode enumerated 1591 /// property. See [`JoiningType`]. 1592 /// 1593 /// # Example 1594 /// 1595 /// ``` 1596 /// use icu::properties::{maps, JoiningType}; 1597 /// 1598 /// assert_eq!(maps::joining_type().get('ؠ'), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh 1599 /// assert_eq!(maps::joining_type().get(''), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth 1600 /// ``` 1601 } 1602 1603 pub use crate::code_point_set::BinaryProperty; 1604 1605 macro_rules! make_binary_property { 1606 ( 1607 name: $name:literal; 1608 short_name: $short_name:literal; 1609 ident: $d:ident; 1610 data_marker: $data_marker:ty; 1611 singleton: $singleton:ident; 1612 func: 1613 $(#[$doc:meta])+ 1614 ) => { 1615 $(#[$doc])+ 1616 #[derive(Debug)] 1617 #[non_exhaustive] 1618 pub struct $d; 1619 1620 impl crate::private::Sealed for $d {} 1621 1622 impl BinaryProperty for $d { 1623 type DataMarker = $data_marker; 1624 #[cfg(feature = "compiled_data")] 1625 const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> = 1626 &crate::provider::Baked::$singleton; 1627 const NAME: &'static [u8] = $name.as_bytes(); 1628 const SHORT_NAME: &'static [u8] = $short_name.as_bytes(); 1629 } 1630 }; 1631 } 1632 1633 make_binary_property! { 1634 name: "ASCII_Hex_Digit"; 1635 short_name: "AHex"; 1636 ident: AsciiHexDigit; 1637 data_marker: crate::provider::AsciiHexDigitV1; 1638 singleton: SINGLETON_ASCII_HEX_DIGIT_V1; 1639 func: 1640 /// ASCII characters commonly used for the representation of hexadecimal numbers. 1641 /// 1642 /// # Example 1643 /// 1644 /// ``` 1645 /// use icu::properties::CodePointSetData; 1646 /// use icu::properties::props::AsciiHexDigit; 1647 /// 1648 /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>(); 1649 /// 1650 /// assert!(ascii_hex_digit.contains('3')); 1651 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE 1652 /// assert!(ascii_hex_digit.contains('A')); 1653 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS 1654 /// ``` 1655 } 1656 1657 make_binary_property! { 1658 name: "Alnum"; 1659 short_name: "Alnum"; 1660 ident: Alnum; 1661 data_marker: crate::provider::AlnumV1; 1662 singleton: SINGLETON_ALNUM_V1; 1663 func: 1664 /// Characters with the `Alphabetic` or `Decimal_Number` property. 1665 /// 1666 /// This is defined for POSIX compatibility. 1667 } 1668 1669 make_binary_property! { 1670 name: "Alphabetic"; 1671 short_name: "Alpha"; 1672 ident: Alphabetic; 1673 data_marker: crate::provider::AlphabeticV1; 1674 singleton: SINGLETON_ALPHABETIC_V1; 1675 func: 1676 /// Alphabetic characters. 1677 /// 1678 /// # Example 1679 /// 1680 /// ``` 1681 /// use icu::properties::CodePointSetData; 1682 /// use icu::properties::props::Alphabetic; 1683 /// 1684 /// let alphabetic = CodePointSetData::new::<Alphabetic>(); 1685 /// 1686 /// assert!(!alphabetic.contains('3')); 1687 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE 1688 /// assert!(alphabetic.contains('A')); 1689 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS 1690 /// ``` 1691 1692 } 1693 1694 make_binary_property! { 1695 name: "Bidi_Control"; 1696 short_name: "Bidi_C"; 1697 ident: BidiControl; 1698 data_marker: crate::provider::BidiControlV1; 1699 singleton: SINGLETON_BIDI_CONTROL_V1; 1700 func: 1701 /// Format control characters which have specific functions in the Unicode Bidirectional 1702 /// Algorithm. 1703 /// 1704 /// # Example 1705 /// 1706 /// ``` 1707 /// use icu::properties::CodePointSetData; 1708 /// use icu::properties::props::BidiControl; 1709 /// 1710 /// let bidi_control = CodePointSetData::new::<BidiControl>(); 1711 /// 1712 /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK 1713 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN 1714 /// ``` 1715 1716 } 1717 1718 make_binary_property! { 1719 name: "Bidi_Mirrored"; 1720 short_name: "Bidi_M"; 1721 ident: BidiMirrored; 1722 data_marker: crate::provider::BidiMirroredV1; 1723 singleton: SINGLETON_BIDI_MIRRORED_V1; 1724 func: 1725 /// Characters that are mirrored in bidirectional text. 1726 /// 1727 /// # Example 1728 /// 1729 /// ``` 1730 /// use icu::properties::CodePointSetData; 1731 /// use icu::properties::props::BidiMirrored; 1732 /// 1733 /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>(); 1734 /// 1735 /// assert!(bidi_mirrored.contains('[')); 1736 /// assert!(bidi_mirrored.contains(']')); 1737 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION 1738 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA 1739 /// ``` 1740 1741 } 1742 1743 make_binary_property! { 1744 name: "Blank"; 1745 short_name: "Blank"; 1746 ident: Blank; 1747 data_marker: crate::provider::BlankV1; 1748 singleton: SINGLETON_BLANK_V1; 1749 func: 1750 /// Horizontal whitespace characters 1751 1752 } 1753 1754 make_binary_property! { 1755 name: "Cased"; 1756 short_name: "Cased"; 1757 ident: Cased; 1758 data_marker: crate::provider::CasedV1; 1759 singleton: SINGLETON_CASED_V1; 1760 func: 1761 /// Uppercase, lowercase, and titlecase characters. 1762 /// 1763 /// # Example 1764 /// 1765 /// ``` 1766 /// use icu::properties::CodePointSetData; 1767 /// use icu::properties::props::Cased; 1768 /// 1769 /// let cased = CodePointSetData::new::<Cased>(); 1770 /// 1771 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE 1772 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU 1773 /// ``` 1774 1775 } 1776 1777 make_binary_property! { 1778 name: "Case_Ignorable"; 1779 short_name: "CI"; 1780 ident: CaseIgnorable; 1781 data_marker: crate::provider::CaseIgnorableV1; 1782 singleton: SINGLETON_CASE_IGNORABLE_V1; 1783 func: 1784 /// Characters which are ignored for casing purposes. 1785 /// 1786 /// # Example 1787 /// 1788 /// ``` 1789 /// use icu::properties::CodePointSetData; 1790 /// use icu::properties::props::CaseIgnorable; 1791 /// 1792 /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>(); 1793 /// 1794 /// assert!(case_ignorable.contains(':')); 1795 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA 1796 /// ``` 1797 1798 } 1799 1800 make_binary_property! { 1801 name: "Full_Composition_Exclusion"; 1802 short_name: "Comp_Ex"; 1803 ident: FullCompositionExclusion; 1804 data_marker: crate::provider::FullCompositionExclusionV1; 1805 singleton: SINGLETON_FULL_COMPOSITION_EXCLUSION_V1; 1806 func: 1807 /// Characters that are excluded from composition. 1808 /// 1809 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt> 1810 1811 } 1812 1813 make_binary_property! { 1814 name: "Changes_When_Casefolded"; 1815 short_name: "CWCF"; 1816 ident: ChangesWhenCasefolded; 1817 data_marker: crate::provider::ChangesWhenCasefoldedV1; 1818 singleton: SINGLETON_CHANGES_WHEN_CASEFOLDED_V1; 1819 func: 1820 /// Characters whose normalized forms are not stable under case folding. 1821 /// 1822 /// # Example 1823 /// 1824 /// ``` 1825 /// use icu::properties::CodePointSetData; 1826 /// use icu::properties::props::ChangesWhenCasefolded; 1827 /// 1828 /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>(); 1829 /// 1830 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S 1831 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA 1832 /// ``` 1833 1834 } 1835 1836 make_binary_property! { 1837 name: "Changes_When_Casemapped"; 1838 short_name: "CWCM"; 1839 ident: ChangesWhenCasemapped; 1840 data_marker: crate::provider::ChangesWhenCasemappedV1; 1841 singleton: SINGLETON_CHANGES_WHEN_CASEMAPPED_V1; 1842 func: 1843 /// Characters which may change when they undergo case mapping. 1844 1845 } 1846 1847 make_binary_property! { 1848 name: "Changes_When_NFKC_Casefolded"; 1849 short_name: "CWKCF"; 1850 ident: ChangesWhenNfkcCasefolded; 1851 data_marker: crate::provider::ChangesWhenNfkcCasefoldedV1; 1852 singleton: SINGLETON_CHANGES_WHEN_NFKC_CASEFOLDED_V1; 1853 func: 1854 /// Characters which are not identical to their `NFKC_Casefold` mapping. 1855 /// 1856 /// # Example 1857 /// 1858 /// ``` 1859 /// use icu::properties::CodePointSetData; 1860 /// use icu::properties::props::ChangesWhenNfkcCasefolded; 1861 /// 1862 /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>(); 1863 /// 1864 /// assert!(changes_when_nfkc_casefolded.contains('')); // U+1F135 SQUARED LATIN CAPITAL LETTER F 1865 /// assert!(!changes_when_nfkc_casefolded.contains('f')); 1866 /// ``` 1867 1868 } 1869 1870 make_binary_property! { 1871 name: "Changes_When_Lowercased"; 1872 short_name: "CWL"; 1873 ident: ChangesWhenLowercased; 1874 data_marker: crate::provider::ChangesWhenLowercasedV1; 1875 singleton: SINGLETON_CHANGES_WHEN_LOWERCASED_V1; 1876 func: 1877 /// Characters whose normalized forms are not stable under a `toLowercase` mapping. 1878 /// 1879 /// # Example 1880 /// 1881 /// ``` 1882 /// use icu::properties::CodePointSetData; 1883 /// use icu::properties::props::ChangesWhenLowercased; 1884 /// 1885 /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>(); 1886 /// 1887 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR 1888 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR 1889 /// ``` 1890 1891 } 1892 1893 make_binary_property! { 1894 name: "Changes_When_Titlecased"; 1895 short_name: "CWT"; 1896 ident: ChangesWhenTitlecased; 1897 data_marker: crate::provider::ChangesWhenTitlecasedV1; 1898 singleton: SINGLETON_CHANGES_WHEN_TITLECASED_V1; 1899 func: 1900 /// Characters whose normalized forms are not stable under a `toTitlecase` mapping. 1901 /// 1902 /// # Example 1903 /// 1904 /// ``` 1905 /// use icu::properties::CodePointSetData; 1906 /// use icu::properties::props::ChangesWhenTitlecased; 1907 /// 1908 /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>(); 1909 /// 1910 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE 1911 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE 1912 /// ``` 1913 1914 } 1915 1916 make_binary_property! { 1917 name: "Changes_When_Uppercased"; 1918 short_name: "CWU"; 1919 ident: ChangesWhenUppercased; 1920 data_marker: crate::provider::ChangesWhenUppercasedV1; 1921 singleton: SINGLETON_CHANGES_WHEN_UPPERCASED_V1; 1922 func: 1923 /// Characters whose normalized forms are not stable under a `toUppercase` mapping. 1924 /// 1925 /// # Example 1926 /// 1927 /// ``` 1928 /// use icu::properties::CodePointSetData; 1929 /// use icu::properties::props::ChangesWhenUppercased; 1930 /// 1931 /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>(); 1932 /// 1933 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN 1934 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN 1935 /// ``` 1936 1937 } 1938 1939 make_binary_property! { 1940 name: "Dash"; 1941 short_name: "Dash"; 1942 ident: Dash; 1943 data_marker: crate::provider::DashV1; 1944 singleton: SINGLETON_DASH_V1; 1945 func: 1946 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus 1947 /// their compatibility equivalents. 1948 /// 1949 /// # Example 1950 /// 1951 /// ``` 1952 /// use icu::properties::CodePointSetData; 1953 /// use icu::properties::props::Dash; 1954 /// 1955 /// let dash = CodePointSetData::new::<Dash>(); 1956 /// 1957 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH 1958 /// assert!(dash.contains('-')); // U+002D 1959 /// assert!(!dash.contains('=')); // U+003D 1960 /// ``` 1961 1962 } 1963 1964 make_binary_property! { 1965 name: "Deprecated"; 1966 short_name: "Dep"; 1967 ident: Deprecated; 1968 data_marker: crate::provider::DeprecatedV1; 1969 singleton: SINGLETON_DEPRECATED_V1; 1970 func: 1971 /// Deprecated characters. 1972 /// 1973 /// No characters will ever be removed from the standard, but the 1974 /// usage of deprecated characters is strongly discouraged. 1975 /// 1976 /// # Example 1977 /// 1978 /// ``` 1979 /// use icu::properties::CodePointSetData; 1980 /// use icu::properties::props::Deprecated; 1981 /// 1982 /// let deprecated = CodePointSetData::new::<Deprecated>(); 1983 /// 1984 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ 1985 /// assert!(!deprecated.contains('A')); 1986 /// ``` 1987 1988 } 1989 1990 make_binary_property! { 1991 name: "Default_Ignorable_Code_Point"; 1992 short_name: "DI"; 1993 ident: DefaultIgnorableCodePoint; 1994 data_marker: crate::provider::DefaultIgnorableCodePointV1; 1995 singleton: SINGLETON_DEFAULT_IGNORABLE_CODE_POINT_V1; 1996 func: 1997 /// For programmatic determination of default ignorable code points. 1998 /// 1999 /// New characters that 2000 /// should be ignored in rendering (unless explicitly supported) will be assigned in these 2001 /// ranges, permitting programs to correctly handle the default rendering of such 2002 /// characters when not otherwise supported. 2003 /// 2004 /// # Example 2005 /// 2006 /// ``` 2007 /// use icu::properties::CodePointSetData; 2008 /// use icu::properties::props::DefaultIgnorableCodePoint; 2009 /// 2010 /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>(); 2011 /// 2012 /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE 2013 /// assert!(!default_ignorable_code_point.contains('E')); 2014 /// ``` 2015 2016 } 2017 2018 make_binary_property! { 2019 name: "Diacritic"; 2020 short_name: "Dia"; 2021 ident: Diacritic; 2022 data_marker: crate::provider::DiacriticV1; 2023 singleton: SINGLETON_DIACRITIC_V1; 2024 func: 2025 /// Characters that linguistically modify the meaning of another character to which they apply. 2026 /// 2027 /// # Example 2028 /// 2029 /// ``` 2030 /// use icu::properties::CodePointSetData; 2031 /// use icu::properties::props::Diacritic; 2032 /// 2033 /// let diacritic = CodePointSetData::new::<Diacritic>(); 2034 /// 2035 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS 2036 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF 2037 /// ``` 2038 2039 } 2040 2041 make_binary_property! { 2042 name: "Emoji_Modifier_Base"; 2043 short_name: "EBase"; 2044 ident: EmojiModifierBase; 2045 data_marker: crate::provider::EmojiModifierBaseV1; 2046 singleton: SINGLETON_EMOJI_MODIFIER_BASE_V1; 2047 func: 2048 /// Characters that can serve as a base for emoji modifiers. 2049 /// 2050 /// # Example 2051 /// 2052 /// ``` 2053 /// use icu::properties::CodePointSetData; 2054 /// use icu::properties::props::EmojiModifierBase; 2055 /// 2056 /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>(); 2057 /// 2058 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST 2059 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN 2060 /// ``` 2061 2062 } 2063 2064 make_binary_property! { 2065 name: "Emoji_Component"; 2066 short_name: "EComp"; 2067 ident: EmojiComponent; 2068 data_marker: crate::provider::EmojiComponentV1; 2069 singleton: SINGLETON_EMOJI_COMPONENT_V1; 2070 func: 2071 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as 2072 /// separate choices, such as base characters for emoji keycaps. 2073 /// 2074 /// # Example 2075 /// 2076 /// ``` 2077 /// use icu::properties::CodePointSetData; 2078 /// use icu::properties::props::EmojiComponent; 2079 /// 2080 /// let emoji_component = CodePointSetData::new::<EmojiComponent>(); 2081 /// 2082 /// assert!(emoji_component.contains('')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T 2083 /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP 2084 /// assert!(emoji_component.contains('7')); 2085 /// assert!(!emoji_component.contains('T')); 2086 /// ``` 2087 2088 } 2089 2090 make_binary_property! { 2091 name: "Emoji_Modifier"; 2092 short_name: "EMod"; 2093 ident: EmojiModifier; 2094 data_marker: crate::provider::EmojiModifierV1; 2095 singleton: SINGLETON_EMOJI_MODIFIER_V1; 2096 func: 2097 /// Characters that are emoji modifiers. 2098 /// 2099 /// # Example 2100 /// 2101 /// ``` 2102 /// use icu::properties::CodePointSetData; 2103 /// use icu::properties::props::EmojiModifier; 2104 /// 2105 /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>(); 2106 /// 2107 /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4 2108 /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER 2109 /// ``` 2110 2111 } 2112 2113 make_binary_property! { 2114 name: "Emoji"; 2115 short_name: "Emoji"; 2116 ident: Emoji; 2117 data_marker: crate::provider::EmojiV1; 2118 singleton: SINGLETON_EMOJI_V1; 2119 func: 2120 /// Characters that are emoji. 2121 /// 2122 /// # Example 2123 /// 2124 /// ``` 2125 /// use icu::properties::CodePointSetData; 2126 /// use icu::properties::props::Emoji; 2127 /// 2128 /// let emoji = CodePointSetData::new::<Emoji>(); 2129 /// 2130 /// assert!(emoji.contains('')); // U+1F525 FIRE 2131 /// assert!(!emoji.contains('V')); 2132 /// ``` 2133 2134 } 2135 2136 make_binary_property! { 2137 name: "Emoji_Presentation"; 2138 short_name: "EPres"; 2139 ident: EmojiPresentation; 2140 data_marker: crate::provider::EmojiPresentationV1; 2141 singleton: SINGLETON_EMOJI_PRESENTATION_V1; 2142 func: 2143 /// Characters that have emoji presentation by default. 2144 /// 2145 /// # Example 2146 /// 2147 /// ``` 2148 /// use icu::properties::CodePointSetData; 2149 /// use icu::properties::props::EmojiPresentation; 2150 /// 2151 /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>(); 2152 /// 2153 /// assert!(emoji_presentation.contains('')); // U+1F9AC BISON 2154 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL 2155 /// ``` 2156 2157 } 2158 2159 make_binary_property! { 2160 name: "Extender"; 2161 short_name: "Ext"; 2162 ident: Extender; 2163 data_marker: crate::provider::ExtenderV1; 2164 singleton: SINGLETON_EXTENDER_V1; 2165 func: 2166 /// Characters whose principal function is to extend the value of a preceding alphabetic 2167 /// character or to extend the shape of adjacent characters. 2168 /// 2169 /// # Example 2170 /// 2171 /// ``` 2172 /// use icu::properties::CodePointSetData; 2173 /// use icu::properties::props::Extender; 2174 /// 2175 /// let extender = CodePointSetData::new::<Extender>(); 2176 /// 2177 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK 2178 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK 2179 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT 2180 /// ``` 2181 2182 } 2183 2184 make_binary_property! { 2185 name: "Extended_Pictographic"; 2186 short_name: "ExtPict"; 2187 ident: ExtendedPictographic; 2188 data_marker: crate::provider::ExtendedPictographicV1; 2189 singleton: SINGLETON_EXTENDED_PICTOGRAPHIC_V1; 2190 func: 2191 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with 2192 /// emoji characters 2193 /// 2194 /// # Example 2195 /// 2196 /// ``` 2197 /// use icu::properties::CodePointSetData; 2198 /// use icu::properties::props::ExtendedPictographic; 2199 /// 2200 /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>(); 2201 /// 2202 /// assert!(extended_pictographic.contains('')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT 2203 /// assert!(!extended_pictographic.contains('')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E 2204 /// ``` 2205 2206 } 2207 2208 make_binary_property! { 2209 name: "Graph"; 2210 short_name: "Graph"; 2211 ident: Graph; 2212 data_marker: crate::provider::GraphV1; 2213 singleton: SINGLETON_GRAPH_V1; 2214 func: 2215 /// Visible characters. 2216 /// 2217 /// This is defined for POSIX compatibility. 2218 2219 } 2220 2221 make_binary_property! { 2222 name: "Grapheme_Base"; 2223 short_name: "Gr_Base"; 2224 ident: GraphemeBase; 2225 data_marker: crate::provider::GraphemeBaseV1; 2226 singleton: SINGLETON_GRAPHEME_BASE_V1; 2227 func: 2228 /// Property used together with the definition of Standard Korean Syllable Block to define 2229 /// "Grapheme base". 2230 /// 2231 /// See D58 in Chapter 3, Conformance in the Unicode Standard. 2232 /// 2233 /// # Example 2234 /// 2235 /// ``` 2236 /// use icu::properties::CodePointSetData; 2237 /// use icu::properties::props::GraphemeBase; 2238 /// 2239 /// let grapheme_base = CodePointSetData::new::<GraphemeBase>(); 2240 /// 2241 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA 2242 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I 2243 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA 2244 /// ``` 2245 2246 } 2247 2248 make_binary_property! { 2249 name: "Grapheme_Extend"; 2250 short_name: "Gr_Ext"; 2251 ident: GraphemeExtend; 2252 data_marker: crate::provider::GraphemeExtendV1; 2253 singleton: SINGLETON_GRAPHEME_EXTEND_V1; 2254 func: 2255 /// Property used to define "Grapheme extender". 2256 /// 2257 /// See D59 in Chapter 3, Conformance in the 2258 /// Unicode Standard. 2259 /// 2260 /// # Example 2261 /// 2262 /// ``` 2263 /// use icu::properties::CodePointSetData; 2264 /// use icu::properties::props::GraphemeExtend; 2265 /// 2266 /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>(); 2267 /// 2268 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA 2269 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I 2270 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA 2271 /// ``` 2272 2273 } 2274 2275 make_binary_property! { 2276 name: "Grapheme_Link"; 2277 short_name: "Gr_Link"; 2278 ident: GraphemeLink; 2279 data_marker: crate::provider::GraphemeLinkV1; 2280 singleton: SINGLETON_GRAPHEME_LINK_V1; 2281 func: 2282 /// Deprecated property. 2283 /// 2284 /// Formerly proposed for programmatic determination of grapheme 2285 /// cluster boundaries. 2286 2287 } 2288 2289 make_binary_property! { 2290 name: "Hex_Digit"; 2291 short_name: "Hex"; 2292 ident: HexDigit; 2293 data_marker: crate::provider::HexDigitV1; 2294 singleton: SINGLETON_HEX_DIGIT_V1; 2295 func: 2296 /// Characters commonly used for the representation of hexadecimal numbers, plus their 2297 /// compatibility equivalents. 2298 /// 2299 /// # Example 2300 /// 2301 /// ``` 2302 /// use icu::properties::CodePointSetData; 2303 /// use icu::properties::props::HexDigit; 2304 /// 2305 /// let hex_digit = CodePointSetData::new::<HexDigit>(); 2306 /// 2307 /// assert!(hex_digit.contains('0')); 2308 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE 2309 /// assert!(hex_digit.contains('f')); 2310 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F 2311 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F 2312 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS 2313 /// ``` 2314 2315 } 2316 2317 make_binary_property! { 2318 name: "Hyphen"; 2319 short_name: "Hyphen"; 2320 ident: Hyphen; 2321 data_marker: crate::provider::HyphenV1; 2322 singleton: SINGLETON_HYPHEN_V1; 2323 func: 2324 /// Deprecated property. 2325 /// 2326 /// Dashes which are used to mark connections between pieces of 2327 /// words, plus the Katakana middle dot. 2328 2329 } 2330 2331 make_binary_property! { 2332 name: "Id_Continue"; 2333 short_name: "IDC"; 2334 ident: IdContinue; 2335 data_marker: crate::provider::IdContinueV1; 2336 singleton: SINGLETON_ID_CONTINUE_V1; 2337 func: 2338 /// Characters that can come after the first character in an identifier. 2339 /// 2340 /// If using NFKC to 2341 /// fold differences between characters, use [`XidContinue`] instead. See 2342 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for 2343 /// more details. 2344 /// 2345 /// # Example 2346 /// 2347 /// ``` 2348 /// use icu::properties::CodePointSetData; 2349 /// use icu::properties::props::IdContinue; 2350 /// 2351 /// let id_continue = CodePointSetData::new::<IdContinue>(); 2352 /// 2353 /// assert!(id_continue.contains('x')); 2354 /// assert!(id_continue.contains('1')); 2355 /// assert!(id_continue.contains('_')); 2356 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA 2357 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X 2358 /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM 2359 /// ``` 2360 2361 } 2362 2363 make_binary_property! { 2364 name: "Ideographic"; 2365 short_name: "Ideo"; 2366 ident: Ideographic; 2367 data_marker: crate::provider::IdeographicV1; 2368 singleton: SINGLETON_IDEOGRAPHIC_V1; 2369 func: 2370 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) 2371 /// ideographs, or related siniform ideographs 2372 /// 2373 /// # Example 2374 /// 2375 /// ``` 2376 /// use icu::properties::CodePointSetData; 2377 /// use icu::properties::props::Ideographic; 2378 /// 2379 /// let ideographic = CodePointSetData::new::<Ideographic>(); 2380 /// 2381 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD 2382 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB 2383 /// ``` 2384 2385 } 2386 2387 make_binary_property! { 2388 name: "Id_Start"; 2389 short_name: "IDS"; 2390 ident: IdStart; 2391 data_marker: crate::provider::IdStartV1; 2392 singleton: SINGLETON_ID_START_V1; 2393 func: 2394 /// Characters that can begin an identifier. 2395 /// 2396 /// If using NFKC to fold differences between 2397 /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex 2398 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. 2399 /// 2400 /// # Example 2401 /// 2402 /// ``` 2403 /// use icu::properties::CodePointSetData; 2404 /// use icu::properties::props::IdStart; 2405 /// 2406 /// let id_start = CodePointSetData::new::<IdStart>(); 2407 /// 2408 /// assert!(id_start.contains('x')); 2409 /// assert!(!id_start.contains('1')); 2410 /// assert!(!id_start.contains('_')); 2411 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA 2412 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X 2413 /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM 2414 /// ``` 2415 2416 } 2417 2418 make_binary_property! { 2419 name: "Ids_Binary_Operator"; 2420 short_name: "IDSB"; 2421 ident: IdsBinaryOperator; 2422 data_marker: crate::provider::IdsBinaryOperatorV1; 2423 singleton: SINGLETON_IDS_BINARY_OPERATOR_V1; 2424 func: 2425 /// Characters used in Ideographic Description Sequences. 2426 /// 2427 /// # Example 2428 /// 2429 /// ``` 2430 /// use icu::properties::CodePointSetData; 2431 /// use icu::properties::props::IdsBinaryOperator; 2432 /// 2433 /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>(); 2434 /// 2435 /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE 2436 /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK 2437 /// ``` 2438 2439 } 2440 2441 make_binary_property! { 2442 name: "Ids_Trinary_Operator"; 2443 short_name: "IDST"; 2444 ident: IdsTrinaryOperator; 2445 data_marker: crate::provider::IdsTrinaryOperatorV1; 2446 singleton: SINGLETON_IDS_TRINARY_OPERATOR_V1; 2447 func: 2448 /// Characters used in Ideographic Description Sequences. 2449 /// 2450 /// # Example 2451 /// 2452 /// ``` 2453 /// use icu::properties::CodePointSetData; 2454 /// use icu::properties::props::IdsTrinaryOperator; 2455 /// 2456 /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>(); 2457 /// 2458 /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT 2459 /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW 2460 /// assert!(!ids_trinary_operator.contains('\u{2FF4}')); 2461 /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE 2462 /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK 2463 /// ``` 2464 2465 } 2466 2467 make_binary_property! { 2468 name: "Join_Control"; 2469 short_name: "Join_C"; 2470 ident: JoinControl; 2471 data_marker: crate::provider::JoinControlV1; 2472 singleton: SINGLETON_JOIN_CONTROL_V1; 2473 func: 2474 /// Format control characters which have specific functions for control of cursive joining 2475 /// and ligation. 2476 /// 2477 /// # Example 2478 /// 2479 /// ``` 2480 /// use icu::properties::CodePointSetData; 2481 /// use icu::properties::props::JoinControl; 2482 /// 2483 /// let join_control = CodePointSetData::new::<JoinControl>(); 2484 /// 2485 /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER 2486 /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER 2487 /// assert!(!join_control.contains('\u{200E}')); 2488 /// ``` 2489 2490 } 2491 2492 make_binary_property! { 2493 name: "Logical_Order_Exception"; 2494 short_name: "LOE"; 2495 ident: LogicalOrderException; 2496 data_marker: crate::provider::LogicalOrderExceptionV1; 2497 singleton: SINGLETON_LOGICAL_ORDER_EXCEPTION_V1; 2498 func: 2499 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao. 2500 /// 2501 /// # Example 2502 /// 2503 /// ``` 2504 /// use icu::properties::CodePointSetData; 2505 /// use icu::properties::props::LogicalOrderException; 2506 /// 2507 /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>(); 2508 /// 2509 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI 2510 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A 2511 /// ``` 2512 2513 } 2514 2515 make_binary_property! { 2516 name: "Lowercase"; 2517 short_name: "Lower"; 2518 ident: Lowercase; 2519 data_marker: crate::provider::LowercaseV1; 2520 singleton: SINGLETON_LOWERCASE_V1; 2521 func: 2522 /// Lowercase characters. 2523 /// 2524 /// # Example 2525 /// 2526 /// ``` 2527 /// use icu::properties::CodePointSetData; 2528 /// use icu::properties::props::Lowercase; 2529 /// 2530 /// let lowercase = CodePointSetData::new::<Lowercase>(); 2531 /// 2532 /// assert!(lowercase.contains('a')); 2533 /// assert!(!lowercase.contains('A')); 2534 /// ``` 2535 2536 } 2537 2538 make_binary_property! { 2539 name: "Math"; 2540 short_name: "Math"; 2541 ident: Math; 2542 data_marker: crate::provider::MathV1; 2543 singleton: SINGLETON_MATH_V1; 2544 func: 2545 /// Characters used in mathematical notation. 2546 /// 2547 /// # Example 2548 /// 2549 /// ``` 2550 /// use icu::properties::CodePointSetData; 2551 /// use icu::properties::props::Math; 2552 /// 2553 /// let math = CodePointSetData::new::<Math>(); 2554 /// 2555 /// assert!(math.contains('=')); 2556 /// assert!(math.contains('+')); 2557 /// assert!(!math.contains('-')); 2558 /// assert!(math.contains('−')); // U+2212 MINUS SIGN 2559 /// assert!(!math.contains('/')); 2560 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH 2561 /// ``` 2562 2563 } 2564 2565 make_binary_property! { 2566 name: "Noncharacter_Code_Point"; 2567 short_name: "NChar"; 2568 ident: NoncharacterCodePoint; 2569 data_marker: crate::provider::NoncharacterCodePointV1; 2570 singleton: SINGLETON_NONCHARACTER_CODE_POINT_V1; 2571 func: 2572 /// Code points permanently reserved for internal use. 2573 /// 2574 /// # Example 2575 /// 2576 /// ``` 2577 /// use icu::properties::CodePointSetData; 2578 /// use icu::properties::props::NoncharacterCodePoint; 2579 /// 2580 /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>(); 2581 /// 2582 /// assert!(noncharacter_code_point.contains('\u{FDD0}')); 2583 /// assert!(noncharacter_code_point.contains('\u{FFFF}')); 2584 /// assert!(!noncharacter_code_point.contains('\u{10000}')); 2585 /// ``` 2586 2587 } 2588 2589 make_binary_property! { 2590 name: "NFC_Inert"; 2591 short_name: "NFC_Inert"; 2592 ident: NfcInert; 2593 data_marker: crate::provider::NfcInertV1; 2594 singleton: SINGLETON_NFC_INERT_V1; 2595 func: 2596 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters. 2597 2598 } 2599 2600 make_binary_property! { 2601 name: "NFD_Inert"; 2602 short_name: "NFD_Inert"; 2603 ident: NfdInert; 2604 data_marker: crate::provider::NfdInertV1; 2605 singleton: SINGLETON_NFD_INERT_V1; 2606 func: 2607 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters. 2608 2609 } 2610 2611 make_binary_property! { 2612 name: "NFKC_Inert"; 2613 short_name: "NFKC_Inert"; 2614 ident: NfkcInert; 2615 data_marker: crate::provider::NfkcInertV1; 2616 singleton: SINGLETON_NFKC_INERT_V1; 2617 func: 2618 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters. 2619 2620 } 2621 2622 make_binary_property! { 2623 name: "NFKD_Inert"; 2624 short_name: "NFKD_Inert"; 2625 ident: NfkdInert; 2626 data_marker: crate::provider::NfkdInertV1; 2627 singleton: SINGLETON_NFKD_INERT_V1; 2628 func: 2629 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters. 2630 2631 } 2632 2633 make_binary_property! { 2634 name: "Pattern_Syntax"; 2635 short_name: "Pat_Syn"; 2636 ident: PatternSyntax; 2637 data_marker: crate::provider::PatternSyntaxV1; 2638 singleton: SINGLETON_PATTERN_SYNTAX_V1; 2639 func: 2640 /// Characters used as syntax in patterns (such as regular expressions). 2641 /// 2642 /// See [`Unicode 2643 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more 2644 /// details. 2645 /// 2646 /// # Example 2647 /// 2648 /// ``` 2649 /// use icu::properties::CodePointSetData; 2650 /// use icu::properties::props::PatternSyntax; 2651 /// 2652 /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>(); 2653 /// 2654 /// assert!(pattern_syntax.contains('{')); 2655 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW 2656 /// assert!(!pattern_syntax.contains('0')); 2657 /// ``` 2658 2659 } 2660 2661 make_binary_property! { 2662 name: "Pattern_White_Space"; 2663 short_name: "Pat_WS"; 2664 ident: PatternWhiteSpace; 2665 data_marker: crate::provider::PatternWhiteSpaceV1; 2666 singleton: SINGLETON_PATTERN_WHITE_SPACE_V1; 2667 func: 2668 /// Characters used as whitespace in patterns (such as regular expressions). 2669 /// 2670 /// See 2671 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for 2672 /// more details. 2673 /// 2674 /// # Example 2675 /// 2676 /// ``` 2677 /// use icu::properties::CodePointSetData; 2678 /// use icu::properties::props::PatternWhiteSpace; 2679 /// 2680 /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>(); 2681 /// 2682 /// assert!(pattern_white_space.contains(' ')); 2683 /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR 2684 /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE 2685 /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE 2686 /// ``` 2687 2688 } 2689 2690 make_binary_property! { 2691 name: "Prepended_Concatenation_Mark"; 2692 short_name: "PCM"; 2693 ident: PrependedConcatenationMark; 2694 data_marker: crate::provider::PrependedConcatenationMarkV1; 2695 singleton: SINGLETON_PREPENDED_CONCATENATION_MARK_V1; 2696 func: 2697 /// A small class of visible format controls, which precede and then span a sequence of 2698 /// other characters, usually digits. 2699 2700 } 2701 2702 make_binary_property! { 2703 name: "Print"; 2704 short_name: "Print"; 2705 ident: Print; 2706 data_marker: crate::provider::PrintV1; 2707 singleton: SINGLETON_PRINT_V1; 2708 func: 2709 /// Printable characters (visible characters and whitespace). 2710 /// 2711 /// This is defined for POSIX compatibility. 2712 2713 } 2714 2715 make_binary_property! { 2716 name: "Quotation_Mark"; 2717 short_name: "QMark"; 2718 ident: QuotationMark; 2719 data_marker: crate::provider::QuotationMarkV1; 2720 singleton: SINGLETON_QUOTATION_MARK_V1; 2721 func: 2722 /// Punctuation characters that function as quotation marks. 2723 /// 2724 /// # Example 2725 /// 2726 /// ``` 2727 /// use icu::properties::CodePointSetData; 2728 /// use icu::properties::props::QuotationMark; 2729 /// 2730 /// let quotation_mark = CodePointSetData::new::<QuotationMark>(); 2731 /// 2732 /// assert!(quotation_mark.contains('\'')); 2733 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK 2734 /// assert!(!quotation_mark.contains('<')); 2735 /// ``` 2736 2737 } 2738 2739 make_binary_property! { 2740 name: "Radical"; 2741 short_name: "Radical"; 2742 ident: Radical; 2743 data_marker: crate::provider::RadicalV1; 2744 singleton: SINGLETON_RADICAL_V1; 2745 func: 2746 /// Characters used in the definition of Ideographic Description Sequences. 2747 /// 2748 /// # Example 2749 /// 2750 /// ``` 2751 /// use icu::properties::CodePointSetData; 2752 /// use icu::properties::props::Radical; 2753 /// 2754 /// let radical = CodePointSetData::new::<Radical>(); 2755 /// 2756 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX 2757 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E 2758 /// ``` 2759 2760 } 2761 2762 make_binary_property! { 2763 name: "Regional_Indicator"; 2764 short_name: "RI"; 2765 ident: RegionalIndicator; 2766 data_marker: crate::provider::RegionalIndicatorV1; 2767 singleton: SINGLETON_REGIONAL_INDICATOR_V1; 2768 func: 2769 /// Regional indicator characters, `U+1F1E6..U+1F1FF`. 2770 /// 2771 /// # Example 2772 /// 2773 /// ``` 2774 /// use icu::properties::CodePointSetData; 2775 /// use icu::properties::props::RegionalIndicator; 2776 /// 2777 /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>(); 2778 /// 2779 /// assert!(regional_indicator.contains('')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T 2780 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T 2781 /// assert!(!regional_indicator.contains('T')); 2782 /// ``` 2783 2784 } 2785 2786 make_binary_property! { 2787 name: "Soft_Dotted"; 2788 short_name: "SD"; 2789 ident: SoftDotted; 2790 data_marker: crate::provider::SoftDottedV1; 2791 singleton: SINGLETON_SOFT_DOTTED_V1; 2792 func: 2793 /// Characters with a "soft dot", like i or j. 2794 /// 2795 /// An accent placed on these characters causes 2796 /// the dot to disappear. 2797 /// 2798 /// # Example 2799 /// 2800 /// ``` 2801 /// use icu::properties::CodePointSetData; 2802 /// use icu::properties::props::SoftDotted; 2803 /// 2804 /// let soft_dotted = CodePointSetData::new::<SoftDotted>(); 2805 /// 2806 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 2807 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I 2808 /// ``` 2809 2810 } 2811 2812 make_binary_property! { 2813 name: "Segment_Starter"; 2814 short_name: "Segment_Starter"; 2815 ident: SegmentStarter; 2816 data_marker: crate::provider::SegmentStarterV1; 2817 singleton: SINGLETON_SEGMENT_STARTER_V1; 2818 func: 2819 /// Characters that are starters in terms of Unicode normalization and combining character 2820 /// sequences. 2821 2822 } 2823 2824 make_binary_property! { 2825 name: "Case_Sensitive"; 2826 short_name: "Case_Sensitive"; 2827 ident: CaseSensitive; 2828 data_marker: crate::provider::CaseSensitiveV1; 2829 singleton: SINGLETON_CASE_SENSITIVE_V1; 2830 func: 2831 /// Characters that are either the source of a case mapping or in the target of a case 2832 /// mapping. 2833 2834 } 2835 2836 make_binary_property! { 2837 name: "Sentence_Terminal"; 2838 short_name: "STerm"; 2839 ident: SentenceTerminal; 2840 data_marker: crate::provider::SentenceTerminalV1; 2841 singleton: SINGLETON_SENTENCE_TERMINAL_V1; 2842 func: 2843 /// Punctuation characters that generally mark the end of sentences. 2844 /// 2845 /// # Example 2846 /// 2847 /// ``` 2848 /// use icu::properties::CodePointSetData; 2849 /// use icu::properties::props::SentenceTerminal; 2850 /// 2851 /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>(); 2852 /// 2853 /// assert!(sentence_terminal.contains('.')); 2854 /// assert!(sentence_terminal.contains('?')); 2855 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN 2856 /// assert!(!sentence_terminal.contains(',')); 2857 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK 2858 /// ``` 2859 2860 } 2861 2862 make_binary_property! { 2863 name: "Terminal_Punctuation"; 2864 short_name: "Term"; 2865 ident: TerminalPunctuation; 2866 data_marker: crate::provider::TerminalPunctuationV1; 2867 singleton: SINGLETON_TERMINAL_PUNCTUATION_V1; 2868 func: 2869 /// Punctuation characters that generally mark the end of textual units. 2870 /// 2871 /// # Example 2872 /// 2873 /// ``` 2874 /// use icu::properties::CodePointSetData; 2875 /// use icu::properties::props::TerminalPunctuation; 2876 /// 2877 /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>(); 2878 /// 2879 /// assert!(terminal_punctuation.contains('.')); 2880 /// assert!(terminal_punctuation.contains('?')); 2881 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN 2882 /// assert!(terminal_punctuation.contains(',')); 2883 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK 2884 /// ``` 2885 2886 } 2887 2888 make_binary_property! { 2889 name: "Unified_Ideograph"; 2890 short_name: "UIdeo"; 2891 ident: UnifiedIdeograph; 2892 data_marker: crate::provider::UnifiedIdeographV1; 2893 singleton: SINGLETON_UNIFIED_IDEOGRAPH_V1; 2894 func: 2895 /// A property which specifies the exact set of Unified CJK Ideographs in the standard. 2896 /// 2897 /// # Example 2898 /// 2899 /// ``` 2900 /// use icu::properties::CodePointSetData; 2901 /// use icu::properties::props::UnifiedIdeograph; 2902 /// 2903 /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>(); 2904 /// 2905 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD 2906 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728 2907 /// assert!(!unified_ideograph.contains('')); // U+1B178 NUSHU CHARACTER-1B178 2908 /// ``` 2909 2910 } 2911 2912 make_binary_property! { 2913 name: "Uppercase"; 2914 short_name: "Upper"; 2915 ident: Uppercase; 2916 data_marker: crate::provider::UppercaseV1; 2917 singleton: SINGLETON_UPPERCASE_V1; 2918 func: 2919 /// Uppercase characters. 2920 /// 2921 /// # Example 2922 /// 2923 /// ``` 2924 /// use icu::properties::CodePointSetData; 2925 /// use icu::properties::props::Uppercase; 2926 /// 2927 /// let uppercase = CodePointSetData::new::<Uppercase>(); 2928 /// 2929 /// assert!(uppercase.contains('U')); 2930 /// assert!(!uppercase.contains('u')); 2931 /// ``` 2932 2933 } 2934 2935 make_binary_property! { 2936 name: "Variation_Selector"; 2937 short_name: "VS"; 2938 ident: VariationSelector; 2939 data_marker: crate::provider::VariationSelectorV1; 2940 singleton: SINGLETON_VARIATION_SELECTOR_V1; 2941 func: 2942 /// Characters that are Variation Selectors. 2943 /// 2944 /// # Example 2945 /// 2946 /// ``` 2947 /// use icu::properties::CodePointSetData; 2948 /// use icu::properties::props::VariationSelector; 2949 /// 2950 /// let variation_selector = CodePointSetData::new::<VariationSelector>(); 2951 /// 2952 /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE 2953 /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR 2954 /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16 2955 /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA 2956 /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256 2957 /// ``` 2958 2959 } 2960 2961 make_binary_property! { 2962 name: "White_Space"; 2963 short_name: "space"; 2964 ident: WhiteSpace; 2965 data_marker: crate::provider::WhiteSpaceV1; 2966 singleton: SINGLETON_WHITE_SPACE_V1; 2967 func: 2968 /// Spaces, separator characters and other control characters which should be treated by 2969 /// programming languages as "white space" for the purpose of parsing elements. 2970 /// 2971 /// # Example 2972 /// 2973 /// ``` 2974 /// use icu::properties::CodePointSetData; 2975 /// use icu::properties::props::WhiteSpace; 2976 /// 2977 /// let white_space = CodePointSetData::new::<WhiteSpace>(); 2978 /// 2979 /// assert!(white_space.contains(' ')); 2980 /// assert!(white_space.contains('\u{000A}')); // NEW LINE 2981 /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE 2982 /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE 2983 /// ``` 2984 2985 } 2986 2987 make_binary_property! { 2988 name: "Xdigit"; 2989 short_name: "Xdigit"; 2990 ident: Xdigit; 2991 data_marker: crate::provider::XdigitV1; 2992 singleton: SINGLETON_XDIGIT_V1; 2993 func: 2994 /// Hexadecimal digits 2995 /// This is defined for POSIX compatibility. 2996 2997 } 2998 2999 make_binary_property! { 3000 name: "XID_Continue"; 3001 short_name: "XIDC"; 3002 ident: XidContinue; 3003 data_marker: crate::provider::XidContinueV1; 3004 singleton: SINGLETON_XID_CONTINUE_V1; 3005 func: 3006 /// Characters that can come after the first character in an identifier. 3007 /// 3008 /// See [`Unicode Standard Annex 3009 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. 3010 /// 3011 /// # Example 3012 /// 3013 /// ``` 3014 /// use icu::properties::CodePointSetData; 3015 /// use icu::properties::props::XidContinue; 3016 /// 3017 /// let xid_continue = CodePointSetData::new::<XidContinue>(); 3018 /// 3019 /// assert!(xid_continue.contains('x')); 3020 /// assert!(xid_continue.contains('1')); 3021 /// assert!(xid_continue.contains('_')); 3022 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA 3023 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X 3024 /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM 3025 /// ``` 3026 3027 } 3028 3029 make_binary_property! { 3030 name: "XID_Start"; 3031 short_name: "XIDS"; 3032 ident: XidStart; 3033 data_marker: crate::provider::XidStartV1; 3034 singleton: SINGLETON_XID_START_V1; 3035 func: 3036 /// Characters that can begin an identifier. 3037 /// 3038 /// See [`Unicode 3039 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more 3040 /// details. 3041 /// 3042 /// # Example 3043 /// 3044 /// ``` 3045 /// use icu::properties::CodePointSetData; 3046 /// use icu::properties::props::XidStart; 3047 /// 3048 /// let xid_start = CodePointSetData::new::<XidStart>(); 3049 /// 3050 /// assert!(xid_start.contains('x')); 3051 /// assert!(!xid_start.contains('1')); 3052 /// assert!(!xid_start.contains('_')); 3053 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA 3054 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X 3055 /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM 3056 /// ``` 3057 3058 } 3059 3060 pub use crate::emoji::EmojiSet; 3061 3062 macro_rules! make_emoji_set { 3063 ( 3064 ident: $marker_name:ident; 3065 data_marker: $data_marker:ty; 3066 singleton: $singleton:ident; 3067 func: 3068 $(#[$doc:meta])+ 3069 ) => { 3070 $(#[$doc])+ 3071 #[derive(Debug)] 3072 #[non_exhaustive] 3073 pub struct $marker_name; 3074 3075 impl crate::private::Sealed for $marker_name {} 3076 3077 impl EmojiSet for $marker_name { 3078 type DataMarker = $data_marker; 3079 #[cfg(feature = "compiled_data")] 3080 const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> = 3081 &crate::provider::Baked::$singleton; 3082 } 3083 } 3084 } 3085 3086 make_emoji_set! { 3087 ident: BasicEmoji; 3088 data_marker: crate::provider::BasicEmojiV1; 3089 singleton: SINGLETON_BASIC_EMOJI_V1; 3090 func: 3091 /// Characters and character sequences intended for general-purpose, independent, direct input. 3092 /// 3093 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more 3094 /// details. 3095 /// 3096 /// # Example 3097 /// 3098 /// ``` 3099 /// use icu::properties::EmojiSetData; 3100 /// use icu::properties::props::BasicEmoji; 3101 /// 3102 /// let basic_emoji = EmojiSetData::new::<BasicEmoji>(); 3103 /// 3104 /// assert!(!basic_emoji.contains('\u{0020}')); 3105 /// assert!(!basic_emoji.contains('\n')); 3106 /// assert!(basic_emoji.contains('')); // U+1F983 TURKEY 3107 /// assert!(basic_emoji.contains_str("\u{1F983}")); 3108 /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track 3109 /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3 3110 /// ``` 3111 } 3112 3113 #[cfg(test)] 3114 mod test_enumerated_property_completeness { 3115 use super::*; 3116 use std::collections::BTreeMap; 3117 check_enum<'a, T: NamedEnumeratedProperty>( lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>, consts: impl IntoIterator<Item = &'a T>, ) where u16: From<T>,3118 fn check_enum<'a, T: NamedEnumeratedProperty>( 3119 lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>, 3120 consts: impl IntoIterator<Item = &'a T>, 3121 ) where 3122 u16: From<T>, 3123 { 3124 let mut data: BTreeMap<_, _> = lookup 3125 .map 3126 .iter() 3127 .map(|(name, value)| (value, (name, "Data"))) 3128 .collect(); 3129 3130 let names = crate::PropertyNamesLong::<T>::new(); 3131 let consts = consts.into_iter().map(|value| { 3132 ( 3133 u16::from(*value) as usize, 3134 ( 3135 names.get(*value).unwrap_or("<unknown>").to_string(), 3136 "Consts", 3137 ), 3138 ) 3139 }); 3140 3141 let mut diff = Vec::new(); 3142 for t @ (value, _) in consts { 3143 if data.remove(&value).is_none() { 3144 diff.push(t); 3145 } 3146 } 3147 diff.extend(data); 3148 3149 let mut fmt_diff = String::new(); 3150 for (value, (name, source)) in diff { 3151 fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n")); 3152 } 3153 3154 assert!( 3155 fmt_diff.is_empty(), 3156 "Values defined in data do not match values defined in consts. Difference:\n{}", 3157 fmt_diff 3158 ); 3159 } 3160 3161 #[test] test_ea()3162 fn test_ea() { 3163 check_enum( 3164 crate::provider::Baked::SINGLETON_EAST_ASIAN_WIDTH_NAME_TO_VALUE_V2, 3165 EastAsianWidth::ALL_VALUES, 3166 ); 3167 } 3168 3169 #[test] test_ccc()3170 fn test_ccc() { 3171 check_enum( 3172 crate::provider::Baked::SINGLETON_CANONICAL_COMBINING_CLASS_NAME_TO_VALUE_V2, 3173 CanonicalCombiningClass::ALL_VALUES, 3174 ); 3175 } 3176 3177 #[test] test_jt()3178 fn test_jt() { 3179 check_enum( 3180 crate::provider::Baked::SINGLETON_JOINING_TYPE_NAME_TO_VALUE_V2, 3181 JoiningType::ALL_VALUES, 3182 ); 3183 } 3184 3185 #[test] test_insc()3186 fn test_insc() { 3187 check_enum( 3188 crate::provider::Baked::SINGLETON_INDIC_SYLLABIC_CATEGORY_NAME_TO_VALUE_V2, 3189 IndicSyllabicCategory::ALL_VALUES, 3190 ); 3191 } 3192 3193 #[test] test_sb()3194 fn test_sb() { 3195 check_enum( 3196 crate::provider::Baked::SINGLETON_SENTENCE_BREAK_NAME_TO_VALUE_V2, 3197 SentenceBreak::ALL_VALUES, 3198 ); 3199 } 3200 3201 #[test] test_wb()3202 fn test_wb() { 3203 check_enum( 3204 crate::provider::Baked::SINGLETON_WORD_BREAK_NAME_TO_VALUE_V2, 3205 WordBreak::ALL_VALUES, 3206 ); 3207 } 3208 3209 #[test] test_bc()3210 fn test_bc() { 3211 check_enum( 3212 crate::provider::Baked::SINGLETON_BIDI_CLASS_NAME_TO_VALUE_V2, 3213 BidiClass::ALL_VALUES, 3214 ); 3215 } 3216 3217 #[test] test_hst()3218 fn test_hst() { 3219 check_enum( 3220 crate::provider::Baked::SINGLETON_HANGUL_SYLLABLE_TYPE_NAME_TO_VALUE_V2, 3221 HangulSyllableType::ALL_VALUES, 3222 ); 3223 } 3224 } 3225