• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! This module defines all available properties.
6 //!
7 //! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8 //! and implement [`EnumeratedProperty`].
9 //!
10 //! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11 //! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12 //!
13 //! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14 //! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15 //! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16 //! can be constructed.
17 //!
18 //! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19 
20 pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21 
22 pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23 
24 /// See [`test_enumerated_property_completeness`] for usage.
25 /// Example input:
26 /// ```ignore
27 /// impl EastAsianWidth {
28 ///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29 ///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30 ///     ...
31 /// }
32 /// ```
33 /// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34 /// explicitly casting first field of the struct to u16.
35 macro_rules! create_const_array {
36     (
37         $ ( #[$meta:meta] )*
38         impl $enum_ty:ident {
39             $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40         }
41     ) => {
42         $( #[$meta] )*
43         impl $enum_ty {
44             $(
45                 $(#[$const_meta])*
46                 $v const $i: $t = $e;
47             )*
48 
49             /// All possible values of this enum in the Unicode version
50             /// from this ICU4X release.
51             pub const ALL_VALUES: &'static [$enum_ty] = &[
52                 $($enum_ty::$i),*
53             ];
54         }
55 
56 
57         impl From<$enum_ty> for u16  {
58             fn from(other: $enum_ty) -> Self {
59                 other.0 as u16
60             }
61         }
62     }
63 }
64 
65 pub use crate::code_point_map::EnumeratedProperty;
66 
67 macro_rules! make_enumerated_property {
68     (
69         name: $name:literal;
70         short_name: $short_name:literal;
71         ident: $value_ty:path;
72         data_marker: $data_marker:ty;
73         singleton: $singleton:ident;
74         $(ule_ty: $ule_ty:ty;)?
75         func:
76         $(#[$doc:meta])*
77     ) => {
78         impl crate::private::Sealed for $value_ty {}
79 
80         impl EnumeratedProperty for $value_ty {
81             type DataMarker = $data_marker;
82             #[cfg(feature = "compiled_data")]
83             const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
84                 crate::provider::Baked::$singleton;
85             const NAME: &'static [u8] = $name.as_bytes();
86             const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
87         }
88 
89         $(
90             impl zerovec::ule::AsULE for $value_ty {
91                 type ULE = $ule_ty;
92 
93                 fn to_unaligned(self) -> Self::ULE {
94                     self.0.to_unaligned()
95                 }
96                 fn from_unaligned(unaligned: Self::ULE) -> Self {
97                     Self(zerovec::ule::AsULE::from_unaligned(unaligned))
98                 }
99             }
100         )?
101     };
102 }
103 
104 /// Enumerated property Bidi_Class
105 ///
106 /// These are the categories required by the Unicode Bidirectional Algorithm.
107 /// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
108 /// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
109 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
110 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
111 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
112 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
113 #[allow(clippy::exhaustive_structs)] // newtype
114 #[repr(transparent)]
115 pub struct BidiClass(pub(crate) u8);
116 
117 impl BidiClass {
118     /// Returns an ICU4C `UBidiClass` value.
to_icu4c_value(self) -> u8119     pub const fn to_icu4c_value(self) -> u8 {
120         self.0
121     }
122     /// Constructor from an ICU4C `UBidiClass` value.
from_icu4c_value(value: u8) -> Self123     pub const fn from_icu4c_value(value: u8) -> Self {
124         Self(value)
125     }
126 }
127 
128 create_const_array! {
129 #[allow(non_upper_case_globals)]
130 impl BidiClass {
131     /// (`L`) any strong left-to-right character
132     pub const LeftToRight: BidiClass = BidiClass(0);
133     /// (`R`) any strong right-to-left (non-Arabic-type) character
134     pub const RightToLeft: BidiClass = BidiClass(1);
135     /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
136     pub const EuropeanNumber: BidiClass = BidiClass(2);
137     /// (`ES`) plus and minus signs
138     pub const EuropeanSeparator: BidiClass = BidiClass(3);
139     /// (`ET`) a terminator in a numeric format context, includes currency signs
140     pub const EuropeanTerminator: BidiClass = BidiClass(4);
141     /// (`AN`) any Arabic-Indic digit
142     pub const ArabicNumber: BidiClass = BidiClass(5);
143     /// (`CS`) commas, colons, and slashes
144     pub const CommonSeparator: BidiClass = BidiClass(6);
145     /// (`B`) various newline characters
146     pub const ParagraphSeparator: BidiClass = BidiClass(7);
147     /// (`S`) various segment-related control codes
148     pub const SegmentSeparator: BidiClass = BidiClass(8);
149     /// (`WS`) spaces
150     pub const WhiteSpace: BidiClass = BidiClass(9);
151     /// (`ON`) most other symbols and punctuation marks
152     pub const OtherNeutral: BidiClass = BidiClass(10);
153     /// (`LRE`) U+202A: the LR embedding control
154     pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
155     /// (`LRO`) U+202D: the LR override control
156     pub const LeftToRightOverride: BidiClass = BidiClass(12);
157     /// (`AL`) any strong right-to-left (Arabic-type) character
158     pub const ArabicLetter: BidiClass = BidiClass(13);
159     /// (`RLE`) U+202B: the RL embedding control
160     pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
161     /// (`RLO`) U+202E: the RL override control
162     pub const RightToLeftOverride: BidiClass = BidiClass(15);
163     /// (`PDF`) U+202C: terminates an embedding or override control
164     pub const PopDirectionalFormat: BidiClass = BidiClass(16);
165     /// (`NSM`) any nonspacing mark
166     pub const NonspacingMark: BidiClass = BidiClass(17);
167     /// (`BN`) most format characters, control codes, or noncharacters
168     pub const BoundaryNeutral: BidiClass = BidiClass(18);
169     /// (`FSI`) U+2068: the first strong isolate control
170     pub const FirstStrongIsolate: BidiClass = BidiClass(19);
171     /// (`LRI`) U+2066: the LR isolate control
172     pub const LeftToRightIsolate: BidiClass = BidiClass(20);
173     /// (`RLI`) U+2067: the RL isolate control
174     pub const RightToLeftIsolate: BidiClass = BidiClass(21);
175     /// (`PDI`) U+2069: terminates an isolate control
176     pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
177 }
178 }
179 
180 make_enumerated_property! {
181     name: "Bidi_Class";
182     short_name: "bc";
183     ident: BidiClass;
184     data_marker: crate::provider::BidiClassV1;
185     singleton: SINGLETON_BIDI_CLASS_V1;
186     ule_ty: u8;
187     func:
188     /// Return a [`CodePointMapDataBorrowed`] for the Bidi_Class Unicode enumerated property. See [`BidiClass`].
189     ///
190     /// # Example
191     ///
192     /// ```
193     /// use icu::properties::{maps, BidiClass};
194     ///
195     /// assert_eq!(maps::bidi_class().get('y'), BidiClass::LeftToRight);  // U+0079
196     /// assert_eq!(maps::bidi_class().get('ع'), BidiClass::ArabicLetter);  // U+0639
197     /// ```
198 }
199 
200 // This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
201 pub(crate) mod gc {
202     /// Enumerated property General_Category.
203     ///
204     /// General_Category specifies the most general classification of a code point, usually
205     /// determined based on the primary characteristic of the assigned character. For example, is the
206     /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
207     ///
208     /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
209     /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
210     /// crate::props::GeneralCategoryGroup).
211     #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
212     #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
213     #[cfg_attr(feature = "datagen", derive(databake::Bake))]
214     #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
215     #[allow(clippy::exhaustive_enums)] // this type is stable
216     #[zerovec::make_ule(GeneralCategoryULE)]
217     #[repr(u8)]
218     pub enum GeneralCategory {
219         /// (`Cn`) A reserved unassigned code point or a noncharacter
220         Unassigned = 0,
221 
222         /// (`Lu`) An uppercase letter
223         UppercaseLetter = 1,
224         /// (`Ll`) A lowercase letter
225         LowercaseLetter = 2,
226         /// (`Lt`) A digraphic letter, with first part uppercase
227         TitlecaseLetter = 3,
228         /// (`Lm`) A modifier letter
229         ModifierLetter = 4,
230         /// (`Lo`) Other letters, including syllables and ideographs
231         OtherLetter = 5,
232 
233         /// (`Mn`) A nonspacing combining mark (zero advance width)
234         NonspacingMark = 6,
235         /// (`Mc`) A spacing combining mark (positive advance width)
236         SpacingMark = 8,
237         /// (`Me`) An enclosing combining mark
238         EnclosingMark = 7,
239 
240         /// (`Nd`) A decimal digit
241         DecimalNumber = 9,
242         /// (`Nl`) A letterlike numeric character
243         LetterNumber = 10,
244         /// (`No`) A numeric character of other type
245         OtherNumber = 11,
246 
247         /// (`Zs`) A space character (of various non-zero widths)
248         SpaceSeparator = 12,
249         /// (`Zl`) U+2028 LINE SEPARATOR only
250         LineSeparator = 13,
251         /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
252         ParagraphSeparator = 14,
253 
254         /// (`Cc`) A C0 or C1 control code
255         Control = 15,
256         /// (`Cf`) A format control character
257         Format = 16,
258         /// (`Co`) A private-use character
259         PrivateUse = 17,
260         /// (`Cs`) A surrogate code point
261         Surrogate = 18,
262 
263         /// (`Pd`) A dash or hyphen punctuation mark
264         DashPunctuation = 19,
265         /// (`Ps`) An opening punctuation mark (of a pair)
266         OpenPunctuation = 20,
267         /// (`Pe`) A closing punctuation mark (of a pair)
268         ClosePunctuation = 21,
269         /// (`Pc`) A connecting punctuation mark, like a tie
270         ConnectorPunctuation = 22,
271         /// (`Pi`) An initial quotation mark
272         InitialPunctuation = 28,
273         /// (`Pf`) A final quotation mark
274         FinalPunctuation = 29,
275         /// (`Po`) A punctuation mark of other type
276         OtherPunctuation = 23,
277 
278         /// (`Sm`) A symbol of mathematical use
279         MathSymbol = 24,
280         /// (`Sc`) A currency sign
281         CurrencySymbol = 25,
282         /// (`Sk`) A non-letterlike modifier symbol
283         ModifierSymbol = 26,
284         /// (`So`) A symbol of other type
285         OtherSymbol = 27,
286     }
287 }
288 
289 pub use gc::GeneralCategory;
290 
291 impl GeneralCategory {
292     /// All possible values of this enum
293     pub const ALL_VALUES: &'static [GeneralCategory] = &[
294         GeneralCategory::Unassigned,
295         GeneralCategory::UppercaseLetter,
296         GeneralCategory::LowercaseLetter,
297         GeneralCategory::TitlecaseLetter,
298         GeneralCategory::ModifierLetter,
299         GeneralCategory::OtherLetter,
300         GeneralCategory::NonspacingMark,
301         GeneralCategory::SpacingMark,
302         GeneralCategory::EnclosingMark,
303         GeneralCategory::DecimalNumber,
304         GeneralCategory::LetterNumber,
305         GeneralCategory::OtherNumber,
306         GeneralCategory::SpaceSeparator,
307         GeneralCategory::LineSeparator,
308         GeneralCategory::ParagraphSeparator,
309         GeneralCategory::Control,
310         GeneralCategory::Format,
311         GeneralCategory::PrivateUse,
312         GeneralCategory::Surrogate,
313         GeneralCategory::DashPunctuation,
314         GeneralCategory::OpenPunctuation,
315         GeneralCategory::ClosePunctuation,
316         GeneralCategory::ConnectorPunctuation,
317         GeneralCategory::InitialPunctuation,
318         GeneralCategory::FinalPunctuation,
319         GeneralCategory::OtherPunctuation,
320         GeneralCategory::MathSymbol,
321         GeneralCategory::CurrencySymbol,
322         GeneralCategory::ModifierSymbol,
323         GeneralCategory::OtherSymbol,
324     ];
325 }
326 
327 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
328 /// Error value for `impl TryFrom<u8> for GeneralCategory`.
329 #[non_exhaustive]
330 pub struct GeneralCategoryOutOfBoundsError;
331 
332 impl TryFrom<u8> for GeneralCategory {
333     type Error = GeneralCategoryOutOfBoundsError;
334     /// Construct this [`GeneralCategory`] from an integer, returning
335     /// an error if it is out of bounds
try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError>336     fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
337         GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
338     }
339 }
340 
341 make_enumerated_property! {
342     name: "General_Category";
343     short_name: "gc";
344     ident: GeneralCategory;
345     data_marker: crate::provider::GeneralCategoryV1;
346     singleton: SINGLETON_GENERAL_CATEGORY_V1;
347     func:
348     /// Return a [`CodePointMapDataBorrowed`] for the General_Category Unicode enumerated property. See [`GeneralCategory`].
349     ///
350     /// # Example
351     ///
352     /// ```
353     /// use icu::properties::{maps, GeneralCategory};
354     ///
355     /// assert_eq!(maps::general_category().get('木'), GeneralCategory::OtherLetter);  // U+6728
356     /// assert_eq!(maps::general_category().get('��'), GeneralCategory::OtherSymbol);  // U+1F383 JACK-O-LANTERN
357     /// ```
358 }
359 
360 /// Groupings of multiple General_Category property values.
361 ///
362 /// Instances of `GeneralCategoryGroup` represent the defined multi-category
363 /// values that are useful for users in certain contexts, such as regex. In
364 /// other words, unlike [`GeneralCategory`], this supports groups of general
365 /// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
366 /// `LowercaseLetter`, etc.
367 ///
368 /// See <https://www.unicode.org/reports/tr44/> .
369 ///
370 /// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
371 /// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
372 /// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
373 ///
374 /// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
375 #[derive(Copy, Clone, PartialEq, Debug, Eq)]
376 #[allow(clippy::exhaustive_structs)] // newtype
377 #[repr(transparent)]
378 pub struct GeneralCategoryGroup(pub(crate) u32);
379 
380 impl crate::private::Sealed for GeneralCategoryGroup {}
381 
382 use GeneralCategory as GC;
383 use GeneralCategoryGroup as GCG;
384 
385 #[allow(non_upper_case_globals)]
386 impl GeneralCategoryGroup {
387     /// (`Lu`) An uppercase letter
388     pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
389     /// (`Ll`) A lowercase letter
390     pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
391     /// (`Lt`) A digraphic letter, with first part uppercase
392     pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
393     /// (`Lm`) A modifier letter
394     pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
395     /// (`Lo`) Other letters, including syllables and ideographs
396     pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
397     /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
398     pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
399         | (1 << (GC::LowercaseLetter as u32))
400         | (1 << (GC::TitlecaseLetter as u32)));
401     /// (`L`) The union of all letter categories
402     pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
403         | (1 << (GC::LowercaseLetter as u32))
404         | (1 << (GC::TitlecaseLetter as u32))
405         | (1 << (GC::ModifierLetter as u32))
406         | (1 << (GC::OtherLetter as u32)));
407 
408     /// (`Mn`) A nonspacing combining mark (zero advance width)
409     pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
410     /// (`Mc`) A spacing combining mark (positive advance width)
411     pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
412     /// (`Me`) An enclosing combining mark
413     pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
414     /// (`M`) The union of all mark categories
415     pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
416         | (1 << (GC::EnclosingMark as u32))
417         | (1 << (GC::SpacingMark as u32)));
418 
419     /// (`Nd`) A decimal digit
420     pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
421     /// (`Nl`) A letterlike numeric character
422     pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
423     /// (`No`) A numeric character of other type
424     pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
425     /// (`N`) The union of all number categories
426     pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
427         | (1 << (GC::LetterNumber as u32))
428         | (1 << (GC::OtherNumber as u32)));
429 
430     /// (`Zs`) A space character (of various non-zero widths)
431     pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
432     /// (`Zl`) U+2028 LINE SEPARATOR only
433     pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
434     /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
435     pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
436     /// (`Z`) The union of all separator categories
437     pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
438         | (1 << (GC::LineSeparator as u32))
439         | (1 << (GC::ParagraphSeparator as u32)));
440 
441     /// (`Cc`) A C0 or C1 control code
442     pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
443     /// (`Cf`) A format control character
444     pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
445     /// (`Co`) A private-use character
446     pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
447     /// (`Cs`) A surrogate code point
448     pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
449     /// (`Cn`) A reserved unassigned code point or a noncharacter
450     pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
451     /// (`C`) The union of all control code, reserved, and unassigned categories
452     pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
453         | (1 << (GC::Format as u32))
454         | (1 << (GC::PrivateUse as u32))
455         | (1 << (GC::Surrogate as u32))
456         | (1 << (GC::Unassigned as u32)));
457 
458     /// (`Pd`) A dash or hyphen punctuation mark
459     pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
460     /// (`Ps`) An opening punctuation mark (of a pair)
461     pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
462     /// (`Pe`) A closing punctuation mark (of a pair)
463     pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
464     /// (`Pc`) A connecting punctuation mark, like a tie
465     pub const ConnectorPunctuation: GeneralCategoryGroup =
466         GCG(1 << (GC::ConnectorPunctuation as u32));
467     /// (`Pi`) An initial quotation mark
468     pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
469     /// (`Pf`) A final quotation mark
470     pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
471     /// (`Po`) A punctuation mark of other type
472     pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
473     /// (`P`) The union of all punctuation categories
474     pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
475         | (1 << (GC::OpenPunctuation as u32))
476         | (1 << (GC::ClosePunctuation as u32))
477         | (1 << (GC::ConnectorPunctuation as u32))
478         | (1 << (GC::OtherPunctuation as u32))
479         | (1 << (GC::InitialPunctuation as u32))
480         | (1 << (GC::FinalPunctuation as u32)));
481 
482     /// (`Sm`) A symbol of mathematical use
483     pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
484     /// (`Sc`) A currency sign
485     pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
486     /// (`Sk`) A non-letterlike modifier symbol
487     pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
488     /// (`So`) A symbol of other type
489     pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
490     /// (`S`) The union of all symbol categories
491     pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
492         | (1 << (GC::CurrencySymbol as u32))
493         | (1 << (GC::ModifierSymbol as u32))
494         | (1 << (GC::OtherSymbol as u32)));
495 
496     const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
497 
498     /// Return whether the code point belongs in the provided multi-value category.
499     ///
500     /// ```
501     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
502     /// use icu::properties::CodePointMapData;
503     ///
504     /// let gc = CodePointMapData::<GeneralCategory>::new();
505     ///
506     /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
507     /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
508     ///
509     /// // U+0B1E ORIYA LETTER NYA
510     /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
511     /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
512     /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
513     ///
514     /// // U+0301 COMBINING ACUTE ACCENT
515     /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
516     /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
517     /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
518     ///
519     /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
520     /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
521     /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
522     ///
523     /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
524     /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
525     /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
526     ///
527     /// // U+2713 CHECK MARK
528     /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
529     /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
530     /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
531     ///
532     /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
533     /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
534     /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
535     ///
536     /// // U+E007F CANCEL TAG
537     /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
538     /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
539     /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
540     /// ```
contains(self, val: GeneralCategory) -> bool541     pub const fn contains(self, val: GeneralCategory) -> bool {
542         0 != (1 << (val as u32)) & self.0
543     }
544 
545     /// Produce a GeneralCategoryGroup that is the inverse of this one
546     ///
547     /// # Example
548     ///
549     /// ```rust
550     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
551     ///
552     /// let letter = GeneralCategoryGroup::Letter;
553     /// let not_letter = letter.complement();
554     ///
555     /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
556     /// assert!(!letter.contains(GeneralCategory::MathSymbol));
557     /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
558     /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
559     /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
560     /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
561     /// ```
complement(self) -> Self562     pub const fn complement(self) -> Self {
563         // Mask off things not in Self::ALL to guarantee the mask
564         // values stay in-range
565         GeneralCategoryGroup(!self.0 & Self::ALL)
566     }
567 
568     /// Return the group representing all GeneralCategory values
569     ///
570     /// # Example
571     ///
572     /// ```rust
573     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
574     ///
575     /// let all = GeneralCategoryGroup::all();
576     ///
577     /// assert!(all.contains(GeneralCategory::MathSymbol));
578     /// assert!(all.contains(GeneralCategory::OtherPunctuation));
579     /// assert!(all.contains(GeneralCategory::UppercaseLetter));
580     /// ```
all() -> Self581     pub const fn all() -> Self {
582         Self(Self::ALL)
583     }
584 
585     /// Return the empty group
586     ///
587     /// # Example
588     ///
589     /// ```rust
590     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
591     ///
592     /// let empty = GeneralCategoryGroup::empty();
593     ///
594     /// assert!(!empty.contains(GeneralCategory::MathSymbol));
595     /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
596     /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
597     /// ```
empty() -> Self598     pub const fn empty() -> Self {
599         Self(0)
600     }
601 
602     /// Take the union of two groups
603     ///
604     /// # Example
605     ///
606     /// ```rust
607     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
608     ///
609     /// let letter = GeneralCategoryGroup::Letter;
610     /// let symbol = GeneralCategoryGroup::Symbol;
611     /// let union = letter.union(symbol);
612     ///
613     /// assert!(union.contains(GeneralCategory::MathSymbol));
614     /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
615     /// assert!(union.contains(GeneralCategory::UppercaseLetter));
616     /// ```
union(self, other: Self) -> Self617     pub const fn union(self, other: Self) -> Self {
618         Self(self.0 | other.0)
619     }
620 
621     /// Take the intersection of two groups
622     ///
623     /// # Example
624     ///
625     /// ```rust
626     /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
627     ///
628     /// let letter = GeneralCategoryGroup::Letter;
629     /// let lu = GeneralCategoryGroup::UppercaseLetter;
630     /// let intersection = letter.intersection(lu);
631     ///
632     /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
633     /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
634     /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
635     /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
636     /// ```
intersection(self, other: Self) -> Self637     pub const fn intersection(self, other: Self) -> Self {
638         Self(self.0 & other.0)
639     }
640 }
641 
642 impl From<GeneralCategory> for GeneralCategoryGroup {
from(subcategory: GeneralCategory) -> Self643     fn from(subcategory: GeneralCategory) -> Self {
644         GeneralCategoryGroup(1 << (subcategory as u32))
645     }
646 }
647 impl From<u32> for GeneralCategoryGroup {
from(mask: u32) -> Self648     fn from(mask: u32) -> Self {
649         // Mask off things not in Self::ALL to guarantee the mask
650         // values stay in-range
651         GeneralCategoryGroup(mask & Self::ALL)
652     }
653 }
654 impl From<GeneralCategoryGroup> for u32 {
from(group: GeneralCategoryGroup) -> Self655     fn from(group: GeneralCategoryGroup) -> Self {
656         group.0
657     }
658 }
659 
660 /// Enumerated property Script.
661 ///
662 /// This is used with both the Script and Script_Extensions Unicode properties.
663 /// Each character is assigned a single Script, but characters that are used in
664 /// a particular subset of scripts will be in more than one Script_Extensions set.
665 /// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
666 /// Script_Extensions set for Dogra, Kaithi, and Mahajani.
667 ///
668 /// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
669 /// See `UScriptCode` in ICU4C.
670 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
671 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
672 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
673 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
674 #[allow(clippy::exhaustive_structs)] // newtype
675 #[repr(transparent)]
676 pub struct Script(pub(crate) u16);
677 
678 impl Script {
679     /// Returns an ICU4C `UScriptCode` value.
to_icu4c_value(self) -> u16680     pub const fn to_icu4c_value(self) -> u16 {
681         self.0
682     }
683     /// Constructor from an ICU4C `UScriptCode` value.
from_icu4c_value(value: u16) -> Self684     pub const fn from_icu4c_value(value: u16) -> Self {
685         Self(value)
686     }
687 }
688 
689 create_const_array! {
690 #[allow(missing_docs)] // These constants don't need individual documentation.
691 #[allow(non_upper_case_globals)]
692 impl Script {
693     pub const Adlam: Script = Script(167);
694     pub const Ahom: Script = Script(161);
695     pub const AnatolianHieroglyphs: Script = Script(156);
696     pub const Arabic: Script = Script(2);
697     pub const Armenian: Script = Script(3);
698     pub const Avestan: Script = Script(117);
699     pub const Balinese: Script = Script(62);
700     pub const Bamum: Script = Script(130);
701     pub const BassaVah: Script = Script(134);
702     pub const Batak: Script = Script(63);
703     pub const Bengali: Script = Script(4);
704     pub const Bhaiksuki: Script = Script(168);
705     pub const Bopomofo: Script = Script(5);
706     pub const Brahmi: Script = Script(65);
707     pub const Braille: Script = Script(46);
708     pub const Buginese: Script = Script(55);
709     pub const Buhid: Script = Script(44);
710     pub const CanadianAboriginal: Script = Script(40);
711     pub const Carian: Script = Script(104);
712     pub const CaucasianAlbanian: Script = Script(159);
713     pub const Chakma: Script = Script(118);
714     pub const Cham: Script = Script(66);
715     pub const Cherokee: Script = Script(6);
716     pub const Chorasmian: Script = Script(189);
717     pub const Common: Script = Script(0);
718     pub const Coptic: Script = Script(7);
719     pub const Cuneiform: Script = Script(101);
720     pub const Cypriot: Script = Script(47);
721     pub const CyproMinoan: Script = Script(193);
722     pub const Cyrillic: Script = Script(8);
723     pub const Deseret: Script = Script(9);
724     pub const Devanagari: Script = Script(10);
725     pub const DivesAkuru: Script = Script(190);
726     pub const Dogra: Script = Script(178);
727     pub const Duployan: Script = Script(135);
728     pub const EgyptianHieroglyphs: Script = Script(71);
729     pub const Elbasan: Script = Script(136);
730     pub const Elymaic: Script = Script(185);
731     pub const Ethiopian: Script = Script(11);
732     pub const Georgian: Script = Script(12);
733     pub const Glagolitic: Script = Script(56);
734     pub const Gothic: Script = Script(13);
735     pub const Grantha: Script = Script(137);
736     pub const Greek: Script = Script(14);
737     pub const Gujarati: Script = Script(15);
738     pub const GunjalaGondi: Script = Script(179);
739     pub const Gurmukhi: Script = Script(16);
740     pub const Han: Script = Script(17);
741     pub const Hangul: Script = Script(18);
742     pub const HanifiRohingya: Script = Script(182);
743     pub const Hanunoo: Script = Script(43);
744     pub const Hatran: Script = Script(162);
745     pub const Hebrew: Script = Script(19);
746     pub const Hiragana: Script = Script(20);
747     pub const ImperialAramaic: Script = Script(116);
748     pub const Inherited: Script = Script(1);
749     pub const InscriptionalPahlavi: Script = Script(122);
750     pub const InscriptionalParthian: Script = Script(125);
751     pub const Javanese: Script = Script(78);
752     pub const Kaithi: Script = Script(120);
753     pub const Kannada: Script = Script(21);
754     pub const Katakana: Script = Script(22);
755     pub const Kawi: Script = Script(198);
756     pub const KayahLi: Script = Script(79);
757     pub const Kharoshthi: Script = Script(57);
758     pub const KhitanSmallScript: Script = Script(191);
759     pub const Khmer: Script = Script(23);
760     pub const Khojki: Script = Script(157);
761     pub const Khudawadi: Script = Script(145);
762     pub const Lao: Script = Script(24);
763     pub const Latin: Script = Script(25);
764     pub const Lepcha: Script = Script(82);
765     pub const Limbu: Script = Script(48);
766     pub const LinearA: Script = Script(83);
767     pub const LinearB: Script = Script(49);
768     pub const Lisu: Script = Script(131);
769     pub const Lycian: Script = Script(107);
770     pub const Lydian: Script = Script(108);
771     pub const Mahajani: Script = Script(160);
772     pub const Makasar: Script = Script(180);
773     pub const Malayalam: Script = Script(26);
774     pub const Mandaic: Script = Script(84);
775     pub const Manichaean: Script = Script(121);
776     pub const Marchen: Script = Script(169);
777     pub const MasaramGondi: Script = Script(175);
778     pub const Medefaidrin: Script = Script(181);
779     pub const MeeteiMayek: Script = Script(115);
780     pub const MendeKikakui: Script = Script(140);
781     pub const MeroiticCursive: Script = Script(141);
782     pub const MeroiticHieroglyphs: Script = Script(86);
783     pub const Miao: Script = Script(92);
784     pub const Modi: Script = Script(163);
785     pub const Mongolian: Script = Script(27);
786     pub const Mro: Script = Script(149);
787     pub const Multani: Script = Script(164);
788     pub const Myanmar: Script = Script(28);
789     pub const Nabataean: Script = Script(143);
790     pub const NagMundari: Script = Script(199);
791     pub const Nandinagari: Script = Script(187);
792     pub const Nastaliq: Script = Script(200);
793     pub const NewTaiLue: Script = Script(59);
794     pub const Newa: Script = Script(170);
795     pub const Nko: Script = Script(87);
796     pub const Nushu: Script = Script(150);
797     pub const NyiakengPuachueHmong: Script = Script(186);
798     pub const Ogham: Script = Script(29);
799     pub const OlChiki: Script = Script(109);
800     pub const OldHungarian: Script = Script(76);
801     pub const OldItalic: Script = Script(30);
802     pub const OldNorthArabian: Script = Script(142);
803     pub const OldPermic: Script = Script(89);
804     pub const OldPersian: Script = Script(61);
805     pub const OldSogdian: Script = Script(184);
806     pub const OldSouthArabian: Script = Script(133);
807     pub const OldTurkic: Script = Script(88);
808     pub const OldUyghur: Script = Script(194);
809     pub const Oriya: Script = Script(31);
810     pub const Osage: Script = Script(171);
811     pub const Osmanya: Script = Script(50);
812     pub const PahawhHmong: Script = Script(75);
813     pub const Palmyrene: Script = Script(144);
814     pub const PauCinHau: Script = Script(165);
815     pub const PhagsPa: Script = Script(90);
816     pub const Phoenician: Script = Script(91);
817     pub const PsalterPahlavi: Script = Script(123);
818     pub const Rejang: Script = Script(110);
819     pub const Runic: Script = Script(32);
820     pub const Samaritan: Script = Script(126);
821     pub const Saurashtra: Script = Script(111);
822     pub const Sharada: Script = Script(151);
823     pub const Shavian: Script = Script(51);
824     pub const Siddham: Script = Script(166);
825     pub const SignWriting: Script = Script(112);
826     pub const Sinhala: Script = Script(33);
827     pub const Sogdian: Script = Script(183);
828     pub const SoraSompeng: Script = Script(152);
829     pub const Soyombo: Script = Script(176);
830     pub const Sundanese: Script = Script(113);
831     pub const SylotiNagri: Script = Script(58);
832     pub const Syriac: Script = Script(34);
833     pub const Tagalog: Script = Script(42);
834     pub const Tagbanwa: Script = Script(45);
835     pub const TaiLe: Script = Script(52);
836     pub const TaiTham: Script = Script(106);
837     pub const TaiViet: Script = Script(127);
838     pub const Takri: Script = Script(153);
839     pub const Tamil: Script = Script(35);
840     pub const Tangsa: Script = Script(195);
841     pub const Tangut: Script = Script(154);
842     pub const Telugu: Script = Script(36);
843     pub const Thaana: Script = Script(37);
844     pub const Thai: Script = Script(38);
845     pub const Tibetan: Script = Script(39);
846     pub const Tifinagh: Script = Script(60);
847     pub const Tirhuta: Script = Script(158);
848     pub const Toto: Script = Script(196);
849     pub const Ugaritic: Script = Script(53);
850     pub const Unknown: Script = Script(103);
851     pub const Vai: Script = Script(99);
852     pub const Vithkuqi: Script = Script(197);
853     pub const Wancho: Script = Script(188);
854     pub const WarangCiti: Script = Script(146);
855     pub const Yezidi: Script = Script(192);
856     pub const Yi: Script = Script(41);
857     pub const ZanabazarSquare: Script = Script(177);
858 }
859 }
860 
861 make_enumerated_property! {
862     name: "Script";
863     short_name: "sc";
864     ident: Script;
865     data_marker: crate::provider::ScriptV1;
866     singleton: SINGLETON_SCRIPT_V1;
867     ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
868     func:
869     /// Return a [`CodePointMapDataBorrowed`] for the Script Unicode enumerated property. See [`Script`].
870     ///
871     /// **Note:** Some code points are associated with multiple scripts. If you are trying to
872     /// determine whether a code point belongs to a certain script, you should use
873     /// [`load_script_with_extensions_unstable`] and [`ScriptWithExtensionsBorrowed::has_script`]
874     /// instead of this function.
875     ///
876     /// # Example
877     ///
878     /// ```
879     /// use icu::properties::{maps, Script};
880     ///
881     /// assert_eq!(maps::script().get('木'), Script::Han);  // U+6728
882     /// assert_eq!(maps::script().get('��'), Script::Common);  // U+1F383 JACK-O-LANTERN
883     /// ```
884     /// [`load_script_with_extensions_unstable`]: crate::script::load_script_with_extensions_unstable
885     /// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
886 }
887 
888 /// Enumerated property Hangul_Syllable_Type
889 ///
890 /// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
891 /// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
892 ///
893 /// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
894 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
895 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
896 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
897 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
898 #[allow(clippy::exhaustive_structs)] // newtype
899 #[repr(transparent)]
900 pub struct HangulSyllableType(pub(crate) u8);
901 
902 impl HangulSyllableType {
903     /// Returns an ICU4C `UHangulSyllableType` value.
to_icu4c_value(self) -> u8904     pub const fn to_icu4c_value(self) -> u8 {
905         self.0
906     }
907     /// Constructor from an ICU4C `UHangulSyllableType` value.
from_icu4c_value(value: u8) -> Self908     pub const fn from_icu4c_value(value: u8) -> Self {
909         Self(value)
910     }
911 }
912 
913 create_const_array! {
914 #[allow(non_upper_case_globals)]
915 impl HangulSyllableType {
916     /// (`NA`) not applicable (e.g. not a Hangul code point).
917     pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
918     /// (`L`) a conjoining leading consonant Jamo.
919     pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
920     /// (`V`) a conjoining vowel Jamo.
921     pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
922     /// (`T`) a conjoining trailing consonant Jamo.
923     pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
924     /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
925     pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
926     /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
927     pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
928 }
929 }
930 
931 make_enumerated_property! {
932     name: "Hangul_Syllable_Type";
933     short_name: "hst";
934     ident: HangulSyllableType;
935     data_marker: crate::provider::HangulSyllableTypeV1;
936     singleton: SINGLETON_HANGUL_SYLLABLE_TYPE_V1;
937     ule_ty: u8;
938     func:
939     /// Returns a [`CodePointMapDataBorrowed`] for the Hangul_Syllable_Type
940     /// Unicode enumerated property. See [`HangulSyllableType`].
941     ///
942     /// # Example
943     ///
944     /// ```
945     /// use icu::properties::{maps, HangulSyllableType};
946     ///
947     /// assert_eq!(maps::hangul_syllable_type().get('ᄀ'), HangulSyllableType::LeadingJamo);  // U+1100
948     /// assert_eq!(maps::hangul_syllable_type().get('가'), HangulSyllableType::LeadingVowelSyllable);  // U+AC00
949     /// ```
950 
951 }
952 
953 /// Enumerated property East_Asian_Width.
954 ///
955 /// See "Definition" in UAX #11 for the summary of each property value:
956 /// <https://www.unicode.org/reports/tr11/#Definitions>
957 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
958 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
959 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
960 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
961 #[allow(clippy::exhaustive_structs)] // newtype
962 #[repr(transparent)]
963 pub struct EastAsianWidth(pub(crate) u8);
964 
965 impl EastAsianWidth {
966     /// Returns an ICU4C `UEastAsianWidth` value.
to_icu4c_value(self) -> u8967     pub const fn to_icu4c_value(self) -> u8 {
968         self.0
969     }
970     /// Constructor from an ICU4C `UEastAsianWidth` value.
from_icu4c_value(value: u8) -> Self971     pub const fn from_icu4c_value(value: u8) -> Self {
972         Self(value)
973     }
974 }
975 
976 create_const_array! {
977 #[allow(missing_docs)] // These constants don't need individual documentation.
978 #[allow(non_upper_case_globals)]
979 impl EastAsianWidth {
980     pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
981     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
982     pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
983     pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
984     pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
985     pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
986 }
987 }
988 
989 make_enumerated_property! {
990     name: "East_Asian_Width";
991     short_name: "ea";
992     ident: EastAsianWidth;
993     data_marker: crate::provider::EastAsianWidthV1;
994     singleton: SINGLETON_EAST_ASIAN_WIDTH_V1;
995     ule_ty: u8;
996     func:
997     /// Return a [`CodePointMapDataBorrowed`] for the East_Asian_Width Unicode enumerated
998     /// property. See [`EastAsianWidth`].
999     ///
1000     /// # Example
1001     ///
1002     /// ```
1003     /// use icu::properties::{maps, EastAsianWidth};
1004     ///
1005     /// assert_eq!(maps::east_asian_width().get('ア'), EastAsianWidth::Halfwidth); // U+FF71: Halfwidth Katakana Letter A
1006     /// assert_eq!(maps::east_asian_width().get('ア'), EastAsianWidth::Wide); //U+30A2: Katakana Letter A
1007     /// ```
1008 }
1009 
1010 /// Enumerated property Line_Break.
1011 ///
1012 /// See "Line Breaking Properties" in UAX #14 for the summary of each property
1013 /// value: <https://www.unicode.org/reports/tr14/#Properties>
1014 ///
1015 /// The numeric value is compatible with `ULineBreak` in ICU4C.
1016 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1017 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1018 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1019 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1020 #[allow(clippy::exhaustive_structs)] // newtype
1021 #[repr(transparent)]
1022 pub struct LineBreak(pub(crate) u8);
1023 
1024 impl LineBreak {
1025     /// Returns an ICU4C `ULineBreak` value.
to_icu4c_value(self) -> u81026     pub const fn to_icu4c_value(self) -> u8 {
1027         self.0
1028     }
1029     /// Constructor from an ICU4C `ULineBreak` value.
from_icu4c_value(value: u8) -> Self1030     pub const fn from_icu4c_value(value: u8) -> Self {
1031         Self(value)
1032     }
1033 }
1034 
1035 create_const_array! {
1036 #[allow(missing_docs)] // These constants don't need individual documentation.
1037 #[allow(non_upper_case_globals)]
1038 impl LineBreak {
1039     pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1040     pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1041     pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1042     pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1043     pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1044     pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1045     pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1046     pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1047     pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1048     pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1049     pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1050     pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1051     pub const Glue: LineBreak = LineBreak(12); // name="GL"
1052     pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1053     pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1054     pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1055     pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1056     pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1057     pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1058     pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1059     pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1060     pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1061     pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1062     pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1063     pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1064     pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1065     pub const Space: LineBreak = LineBreak(26); // name="SP"
1066     pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1067     pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1068     pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1069     pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1070     pub const H2: LineBreak = LineBreak(31); // name="H2"
1071     pub const H3: LineBreak = LineBreak(32); // name="H3"
1072     pub const JL: LineBreak = LineBreak(33); // name="JL"
1073     pub const JT: LineBreak = LineBreak(34); // name="JT"
1074     pub const JV: LineBreak = LineBreak(35); // name="JV"
1075     pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1076     pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1077     pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1078     pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1079     pub const EBase: LineBreak = LineBreak(40); // name="EB"
1080     pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1081     pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1082 
1083     // Added in ICU 74:
1084     pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1085     pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP"
1086     pub const AksaraStart: LineBreak = LineBreak(45); // name=AS"
1087     pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF"
1088     pub const Virama: LineBreak = LineBreak(47); // name=VI"
1089 }
1090 }
1091 
1092 make_enumerated_property! {
1093     name: "Line_Break";
1094     short_name: "lb";
1095     ident: LineBreak;
1096     data_marker: crate::provider::LineBreakV1;
1097     singleton: SINGLETON_LINE_BREAK_V1;
1098     ule_ty: u8;
1099     func:
1100     /// Return a [`CodePointMapDataBorrowed`] for the Line_Break Unicode enumerated
1101     /// property. See [`LineBreak`].
1102     ///
1103     /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1104     ///
1105     /// # Example
1106     ///
1107     /// ```
1108     /// use icu::properties::{maps, LineBreak};
1109     ///
1110     /// assert_eq!(maps::line_break().get(')'), LineBreak::CloseParenthesis); // U+0029: Right Parenthesis
1111     /// assert_eq!(maps::line_break().get('ぁ'), LineBreak::ConditionalJapaneseStarter); //U+3041: Hiragana Letter Small A
1112     /// ```
1113 }
1114 
1115 /// Enumerated property Grapheme_Cluster_Break.
1116 ///
1117 /// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1118 /// summary of each property value:
1119 /// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1120 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1121 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1122 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1123 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1124 #[allow(clippy::exhaustive_structs)] // this type is stable
1125 #[repr(transparent)]
1126 pub struct GraphemeClusterBreak(pub(crate) u8);
1127 
1128 impl GraphemeClusterBreak {
1129     /// Returns an ICU4C `UGraphemeClusterBreak` value.
to_icu4c_value(self) -> u81130     pub const fn to_icu4c_value(self) -> u8 {
1131         self.0
1132     }
1133     /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
from_icu4c_value(value: u8) -> Self1134     pub const fn from_icu4c_value(value: u8) -> Self {
1135         Self(value)
1136     }
1137 }
1138 
1139 create_const_array! {
1140 #[allow(missing_docs)] // These constants don't need individual documentation.
1141 #[allow(non_upper_case_globals)]
1142 impl GraphemeClusterBreak {
1143     pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1144     pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1145     pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1146     pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1147     pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1148     pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1149     pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1150     pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1151     pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1152     pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1153     pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1154     pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1155     pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1156     /// This value is obsolete and unused.
1157     pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1158     /// This value is obsolete and unused.
1159     pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1160     /// This value is obsolete and unused.
1161     pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1162     /// This value is obsolete and unused.
1163     pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1164     pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1165 }
1166 }
1167 
1168 make_enumerated_property! {
1169     name: "Grapheme_Cluster_Break";
1170     short_name: "GCB";
1171     ident: GraphemeClusterBreak;
1172     data_marker: crate::provider::GraphemeClusterBreakV1;
1173     singleton: SINGLETON_GRAPHEME_CLUSTER_BREAK_V1;
1174     ule_ty: u8;
1175     func:
1176     /// Return a [`CodePointMapDataBorrowed`] for the Grapheme_Cluster_Break Unicode enumerated
1177     /// property. See [`GraphemeClusterBreak`].
1178     ///
1179     /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1180     ///
1181     /// # Example
1182     ///
1183     /// ```
1184     /// use icu::properties::{maps, GraphemeClusterBreak};
1185     ///
1186     /// assert_eq!(maps::grapheme_cluster_break().get('��'), GraphemeClusterBreak::RegionalIndicator); // U+1F1E6: Regional Indicator Symbol Letter A
1187     /// assert_eq!(maps::grapheme_cluster_break().get('ำ'), GraphemeClusterBreak::SpacingMark); //U+0E33: Thai Character Sara Am
1188     /// ```
1189 }
1190 
1191 /// Enumerated property Word_Break.
1192 ///
1193 /// See "Default Word Boundary Specification" in UAX #29 for the summary of
1194 /// each property value:
1195 /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1196 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1197 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1198 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1199 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1200 #[allow(clippy::exhaustive_structs)] // newtype
1201 #[repr(transparent)]
1202 pub struct WordBreak(pub(crate) u8);
1203 
1204 impl WordBreak {
1205     /// Returns an ICU4C `UWordBreak` value.
to_icu4c_value(self) -> u81206     pub const fn to_icu4c_value(self) -> u8 {
1207         self.0
1208     }
1209     /// Constructor from an ICU4C `UWordBreak` value.
from_icu4c_value(value: u8) -> Self1210     pub const fn from_icu4c_value(value: u8) -> Self {
1211         Self(value)
1212     }
1213 }
1214 
1215 create_const_array! {
1216 #[allow(missing_docs)] // These constants don't need individual documentation.
1217 #[allow(non_upper_case_globals)]
1218 impl WordBreak {
1219     pub const Other: WordBreak = WordBreak(0); // name="XX"
1220     pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1221     pub const Format: WordBreak = WordBreak(2); // name="FO"
1222     pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1223     pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1224     pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1225     pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1226     pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1227     pub const CR: WordBreak = WordBreak(8); // name="CR"
1228     pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1229     pub const LF: WordBreak = WordBreak(10); // name="LF"
1230     pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1231     pub const Newline: WordBreak = WordBreak(12); // name="NL"
1232     pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1233     pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1234     pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1235     pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1236     /// This value is obsolete and unused.
1237     pub const EBase: WordBreak = WordBreak(17); // name="EB"
1238     /// This value is obsolete and unused.
1239     pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1240     /// This value is obsolete and unused.
1241     pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1242     /// This value is obsolete and unused.
1243     pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1244     pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1245     pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1246 }
1247 }
1248 
1249 make_enumerated_property! {
1250     name: "Word_Break";
1251     short_name: "WB";
1252     ident: WordBreak;
1253     data_marker: crate::provider::WordBreakV1;
1254     singleton: SINGLETON_WORD_BREAK_V1;
1255     ule_ty: u8;
1256     func:
1257     /// Return a [`CodePointMapDataBorrowed`] for the Word_Break Unicode enumerated
1258     /// property. See [`WordBreak`].
1259     ///
1260     /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1261     ///
1262     /// # Example
1263     ///
1264     /// ```
1265     /// use icu::properties::{maps, WordBreak};
1266     ///
1267     /// assert_eq!(maps::word_break().get('.'), WordBreak::MidNumLet); // U+002E: Full Stop
1268     /// assert_eq!(maps::word_break().get(','), WordBreak::MidNum); // U+FF0C: Fullwidth Comma
1269     /// ```
1270 }
1271 
1272 /// Enumerated property Sentence_Break.
1273 /// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1274 /// each property value:
1275 /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1276 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1277 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1278 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1279 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1280 #[allow(clippy::exhaustive_structs)] // newtype
1281 #[repr(transparent)]
1282 pub struct SentenceBreak(pub(crate) u8);
1283 
1284 impl SentenceBreak {
1285     /// Returns an ICU4C `USentenceBreak` value.
to_icu4c_value(self) -> u81286     pub const fn to_icu4c_value(self) -> u8 {
1287         self.0
1288     }
1289     /// Constructor from an ICU4C `USentenceBreak` value.
from_icu4c_value(value: u8) -> Self1290     pub const fn from_icu4c_value(value: u8) -> Self {
1291         Self(value)
1292     }
1293 }
1294 
1295 create_const_array! {
1296 #[allow(missing_docs)] // These constants don't need individual documentation.
1297 #[allow(non_upper_case_globals)]
1298 impl SentenceBreak {
1299     pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1300     pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1301     pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1302     pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1303     pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1304     pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1305     pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1306     pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1307     pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1308     pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1309     pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1310     pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1311     pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1312     pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1313     pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1314 }
1315 }
1316 
1317 make_enumerated_property! {
1318     name: "Sentence_Break";
1319     short_name: "SB";
1320     ident: SentenceBreak;
1321     data_marker: crate::provider::SentenceBreakV1;
1322     singleton: SINGLETON_SENTENCE_BREAK_V1;
1323     ule_ty: u8;
1324     func:
1325     /// Return a [`CodePointMapDataBorrowed`] for the Sentence_Break Unicode enumerated
1326     /// property. See [`SentenceBreak`].
1327     ///
1328     /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1329     ///
1330     /// # Example
1331     ///
1332     /// ```
1333     /// use icu::properties::{maps, SentenceBreak};
1334     ///
1335     /// assert_eq!(maps::sentence_break().get('9'), SentenceBreak::Numeric); // U+FF19: Fullwidth Digit Nine
1336     /// assert_eq!(maps::sentence_break().get(','), SentenceBreak::SContinue); // U+002C: Comma
1337     /// ```
1338 }
1339 
1340 /// Property Canonical_Combining_Class.
1341 /// See UAX #15:
1342 /// <https://www.unicode.org/reports/tr15/>.
1343 ///
1344 /// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1345 /// to look up the Canonical_Combining_Class property by scalar value.
1346 //
1347 // NOTE: The Pernosco debugger has special knowledge
1348 // of this struct. Please do not change the bit layout
1349 // or the crate-module-qualified name of this struct
1350 // without coordination.
1351 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1352 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1353 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1354 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1355 #[allow(clippy::exhaustive_structs)] // newtype
1356 #[repr(transparent)]
1357 pub struct CanonicalCombiningClass(pub(crate) u8);
1358 
1359 impl CanonicalCombiningClass {
1360     /// Returns an ICU4C `UCanonicalCombiningClass` value.
to_icu4c_value(self) -> u81361     pub const fn to_icu4c_value(self) -> u8 {
1362         self.0
1363     }
1364     /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
from_icu4c_value(value: u8) -> Self1365     pub const fn from_icu4c_value(value: u8) -> Self {
1366         Self(value)
1367     }
1368 }
1369 
1370 create_const_array! {
1371 // These constant names come from PropertyValueAliases.txt
1372 #[allow(missing_docs)] // These constants don't need individual documentation.
1373 #[allow(non_upper_case_globals)]
1374 impl CanonicalCombiningClass {
1375     pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1376     pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1377     pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1378     pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1379     pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1380     pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1381     pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1382     pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1383     pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1384     pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1385     pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1386     pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1387     pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1388     pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1389     pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1390     pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1391     pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1392     pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1393     pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1394     pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1395     pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1396     pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1397     pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1398     pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1399     pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1400     pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1401     pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1402     pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1403     pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1404     pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1405     pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1406     pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1407     pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1408     pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1409     pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1410     pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1411     pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1412     pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1413     pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1414     pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1415     pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1416     pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1417     pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1418     pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1419     pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1420     pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1421     pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1422     pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1423     pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1424     pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1425     pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1426     pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1427     pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1428     pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1429     pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1430     pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1431     pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1432     pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1433 }
1434 }
1435 
1436 make_enumerated_property! {
1437     name: "Canonical_Combining_Class";
1438     short_name: "ccc";
1439     ident: CanonicalCombiningClass;
1440     data_marker: crate::provider::CanonicalCombiningClassV1;
1441     singleton: SINGLETON_CANONICAL_COMBINING_CLASS_V1;
1442     ule_ty: u8;
1443     func:
1444     /// Return a [`CodePointMapData`] for the Canonical_Combining_Class Unicode property. See
1445     /// [`CanonicalCombiningClass`].
1446     ///
1447     /// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1448     /// to look up the Canonical_Combining_Class property by scalar value.
1449     ///
1450     /// # Example
1451     ///
1452     /// ```
1453     /// use icu::properties::{maps, CanonicalCombiningClass};
1454     ///
1455     /// assert_eq!(maps::canonical_combining_class().get('a'), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A
1456     /// assert_eq!(maps::canonical_combining_class().get('\u{0301}'), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT
1457     /// ```
1458 }
1459 
1460 /// Property Indic_Syllabic_Category.
1461 /// See UAX #44:
1462 /// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1463 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1464 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1465 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1466 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1467 #[allow(clippy::exhaustive_structs)] // newtype
1468 #[repr(transparent)]
1469 pub struct IndicSyllabicCategory(pub(crate) u8);
1470 
1471 impl IndicSyllabicCategory {
1472     /// Returns an ICU4C `UIndicSyllabicCategory` value.
to_icu4c_value(self) -> u81473     pub const fn to_icu4c_value(self) -> u8 {
1474         self.0
1475     }
1476     /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
from_icu4c_value(value: u8) -> Self1477     pub const fn from_icu4c_value(value: u8) -> Self {
1478         Self(value)
1479     }
1480 }
1481 
1482 create_const_array! {
1483 #[allow(missing_docs)] // These constants don't need individual documentation.
1484 #[allow(non_upper_case_globals)]
1485 impl IndicSyllabicCategory {
1486     pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1487     pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1488     pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1489     pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1490     pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1491     pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1492     pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1493     pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1494     pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1495     pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1496     pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1497     pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1498     pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1499     pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1500     pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1501     pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1502     pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1503     pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1504     pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1505     pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1506     pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1507     pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1508     pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1509     pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1510     pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1511     pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1512     pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1513     pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1514     pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1515     pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1516     pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1517     pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1518     pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1519     pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1520     pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1521     pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1522     pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1523 }
1524 }
1525 
1526 make_enumerated_property! {
1527     name: "Indic_Syllabic_Category";
1528     short_name: "InSC";
1529     ident: IndicSyllabicCategory;
1530     data_marker: crate::provider::IndicSyllabicCategoryV1;
1531     singleton: SINGLETON_INDIC_SYLLABIC_CATEGORY_V1;
1532     ule_ty: u8;
1533     func:
1534     /// Return a [`CodePointMapData`] for the Indic_Syllabic_Category Unicode property. See
1535     /// [`IndicSyllabicCategory`].
1536     ///
1537     /// # Example
1538     ///
1539     /// ```
1540     /// use icu::properties::{maps, IndicSyllabicCategory};
1541     ///
1542     /// assert_eq!(maps::indic_syllabic_category().get('a'), IndicSyllabicCategory::Other);
1543     /// assert_eq!(maps::indic_syllabic_category().get('\u{0900}'), IndicSyllabicCategory::Bindu); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1544     /// ```
1545 }
1546 
1547 /// Enumerated property Joining_Type.
1548 /// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1549 /// each property value.
1550 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1551 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1552 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
1553 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1554 #[allow(clippy::exhaustive_structs)] // newtype
1555 #[repr(transparent)]
1556 pub struct JoiningType(pub(crate) u8);
1557 
1558 impl JoiningType {
1559     /// Returns an ICU4C `UJoiningType` value.
to_icu4c_value(self) -> u81560     pub const fn to_icu4c_value(self) -> u8 {
1561         self.0
1562     }
1563     /// Constructor from an ICU4C `UJoiningType` value.
from_icu4c_value(value: u8) -> Self1564     pub const fn from_icu4c_value(value: u8) -> Self {
1565         Self(value)
1566     }
1567 }
1568 
1569 create_const_array! {
1570 #[allow(missing_docs)] // These constants don't need individual documentation.
1571 #[allow(non_upper_case_globals)]
1572 impl JoiningType {
1573     pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1574     pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1575     pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1576     pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1577     pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1578     pub const Transparent: JoiningType = JoiningType(5); // name="T"
1579 }
1580 }
1581 
1582 make_enumerated_property! {
1583     name: "Joining_Type";
1584     short_name: "jt";
1585     ident: JoiningType;
1586     data_marker: crate::provider::JoiningTypeV1;
1587     singleton: SINGLETON_JOINING_TYPE_V1;
1588     ule_ty: u8;
1589     func:
1590     /// Return a [`CodePointMapDataBorrowed`] for the Joining_Type Unicode enumerated
1591     /// property. See [`JoiningType`].
1592     ///
1593     /// # Example
1594     ///
1595     /// ```
1596     /// use icu::properties::{maps, JoiningType};
1597     ///
1598     /// assert_eq!(maps::joining_type().get('ؠ'), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh
1599     /// assert_eq!(maps::joining_type().get('��'), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth
1600     /// ```
1601 }
1602 
1603 pub use crate::code_point_set::BinaryProperty;
1604 
1605 macro_rules! make_binary_property {
1606     (
1607         name: $name:literal;
1608         short_name: $short_name:literal;
1609         ident: $d:ident;
1610         data_marker: $data_marker:ty;
1611         singleton: $singleton:ident;
1612         func:
1613         $(#[$doc:meta])+
1614     ) => {
1615         $(#[$doc])+
1616         #[derive(Debug)]
1617         #[non_exhaustive]
1618         pub struct $d;
1619 
1620         impl crate::private::Sealed for $d {}
1621 
1622         impl BinaryProperty for $d {
1623         type DataMarker = $data_marker;
1624             #[cfg(feature = "compiled_data")]
1625             const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1626                 &crate::provider::Baked::$singleton;
1627             const NAME: &'static [u8] = $name.as_bytes();
1628             const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1629         }
1630     };
1631 }
1632 
1633 make_binary_property! {
1634     name: "ASCII_Hex_Digit";
1635     short_name: "AHex";
1636     ident: AsciiHexDigit;
1637     data_marker: crate::provider::AsciiHexDigitV1;
1638     singleton: SINGLETON_ASCII_HEX_DIGIT_V1;
1639     func:
1640     /// ASCII characters commonly used for the representation of hexadecimal numbers.
1641     ///
1642     /// # Example
1643     ///
1644     /// ```
1645     /// use icu::properties::CodePointSetData;
1646     /// use icu::properties::props::AsciiHexDigit;
1647     ///
1648     /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1649     ///
1650     /// assert!(ascii_hex_digit.contains('3'));
1651     /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1652     /// assert!(ascii_hex_digit.contains('A'));
1653     /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1654     /// ```
1655 }
1656 
1657 make_binary_property! {
1658     name: "Alnum";
1659     short_name: "Alnum";
1660     ident: Alnum;
1661     data_marker: crate::provider::AlnumV1;
1662     singleton: SINGLETON_ALNUM_V1;
1663     func:
1664     /// Characters with the `Alphabetic` or `Decimal_Number` property.
1665     ///
1666     /// This is defined for POSIX compatibility.
1667 }
1668 
1669 make_binary_property! {
1670     name: "Alphabetic";
1671     short_name: "Alpha";
1672     ident: Alphabetic;
1673     data_marker: crate::provider::AlphabeticV1;
1674     singleton: SINGLETON_ALPHABETIC_V1;
1675     func:
1676     /// Alphabetic characters.
1677     ///
1678     /// # Example
1679     ///
1680     /// ```
1681     /// use icu::properties::CodePointSetData;
1682     /// use icu::properties::props::Alphabetic;
1683     ///
1684     /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1685     ///
1686     /// assert!(!alphabetic.contains('3'));
1687     /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1688     /// assert!(alphabetic.contains('A'));
1689     /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1690     /// ```
1691 
1692 }
1693 
1694 make_binary_property! {
1695     name: "Bidi_Control";
1696     short_name: "Bidi_C";
1697     ident: BidiControl;
1698     data_marker: crate::provider::BidiControlV1;
1699     singleton: SINGLETON_BIDI_CONTROL_V1;
1700     func:
1701     /// Format control characters which have specific functions in the Unicode Bidirectional
1702     /// Algorithm.
1703     ///
1704     /// # Example
1705     ///
1706     /// ```
1707     /// use icu::properties::CodePointSetData;
1708     /// use icu::properties::props::BidiControl;
1709     ///
1710     /// let bidi_control = CodePointSetData::new::<BidiControl>();
1711     ///
1712     /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1713     /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1714     /// ```
1715 
1716 }
1717 
1718 make_binary_property! {
1719     name: "Bidi_Mirrored";
1720     short_name: "Bidi_M";
1721     ident: BidiMirrored;
1722     data_marker: crate::provider::BidiMirroredV1;
1723     singleton: SINGLETON_BIDI_MIRRORED_V1;
1724     func:
1725     /// Characters that are mirrored in bidirectional text.
1726     ///
1727     /// # Example
1728     ///
1729     /// ```
1730     /// use icu::properties::CodePointSetData;
1731     /// use icu::properties::props::BidiMirrored;
1732     ///
1733     /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1734     ///
1735     /// assert!(bidi_mirrored.contains('['));
1736     /// assert!(bidi_mirrored.contains(']'));
1737     /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1738     /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1739     /// ```
1740 
1741 }
1742 
1743 make_binary_property! {
1744     name: "Blank";
1745     short_name: "Blank";
1746     ident: Blank;
1747     data_marker: crate::provider::BlankV1;
1748     singleton: SINGLETON_BLANK_V1;
1749     func:
1750     /// Horizontal whitespace characters
1751 
1752 }
1753 
1754 make_binary_property! {
1755     name: "Cased";
1756     short_name: "Cased";
1757     ident: Cased;
1758     data_marker: crate::provider::CasedV1;
1759     singleton: SINGLETON_CASED_V1;
1760     func:
1761     /// Uppercase, lowercase, and titlecase characters.
1762     ///
1763     /// # Example
1764     ///
1765     /// ```
1766     /// use icu::properties::CodePointSetData;
1767     /// use icu::properties::props::Cased;
1768     ///
1769     /// let cased = CodePointSetData::new::<Cased>();
1770     ///
1771     /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1772     /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
1773     /// ```
1774 
1775 }
1776 
1777 make_binary_property! {
1778     name: "Case_Ignorable";
1779     short_name: "CI";
1780     ident: CaseIgnorable;
1781     data_marker: crate::provider::CaseIgnorableV1;
1782     singleton: SINGLETON_CASE_IGNORABLE_V1;
1783     func:
1784     /// Characters which are ignored for casing purposes.
1785     ///
1786     /// # Example
1787     ///
1788     /// ```
1789     /// use icu::properties::CodePointSetData;
1790     /// use icu::properties::props::CaseIgnorable;
1791     ///
1792     /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1793     ///
1794     /// assert!(case_ignorable.contains(':'));
1795     /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
1796     /// ```
1797 
1798 }
1799 
1800 make_binary_property! {
1801     name: "Full_Composition_Exclusion";
1802     short_name: "Comp_Ex";
1803     ident: FullCompositionExclusion;
1804     data_marker: crate::provider::FullCompositionExclusionV1;
1805     singleton: SINGLETON_FULL_COMPOSITION_EXCLUSION_V1;
1806     func:
1807     /// Characters that are excluded from composition.
1808     ///
1809     /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1810 
1811 }
1812 
1813 make_binary_property! {
1814     name: "Changes_When_Casefolded";
1815     short_name: "CWCF";
1816     ident: ChangesWhenCasefolded;
1817     data_marker: crate::provider::ChangesWhenCasefoldedV1;
1818     singleton: SINGLETON_CHANGES_WHEN_CASEFOLDED_V1;
1819     func:
1820     /// Characters whose normalized forms are not stable under case folding.
1821     ///
1822     /// # Example
1823     ///
1824     /// ```
1825     /// use icu::properties::CodePointSetData;
1826     /// use icu::properties::props::ChangesWhenCasefolded;
1827     ///
1828     /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1829     ///
1830     /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
1831     /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
1832     /// ```
1833 
1834 }
1835 
1836 make_binary_property! {
1837     name: "Changes_When_Casemapped";
1838     short_name: "CWCM";
1839     ident: ChangesWhenCasemapped;
1840     data_marker: crate::provider::ChangesWhenCasemappedV1;
1841     singleton: SINGLETON_CHANGES_WHEN_CASEMAPPED_V1;
1842     func:
1843     /// Characters which may change when they undergo case mapping.
1844 
1845 }
1846 
1847 make_binary_property! {
1848     name: "Changes_When_NFKC_Casefolded";
1849     short_name: "CWKCF";
1850     ident: ChangesWhenNfkcCasefolded;
1851     data_marker: crate::provider::ChangesWhenNfkcCasefoldedV1;
1852     singleton: SINGLETON_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1853     func:
1854     /// Characters which are not identical to their `NFKC_Casefold` mapping.
1855     ///
1856     /// # Example
1857     ///
1858     /// ```
1859     /// use icu::properties::CodePointSetData;
1860     /// use icu::properties::props::ChangesWhenNfkcCasefolded;
1861     ///
1862     /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
1863     ///
1864     /// assert!(changes_when_nfkc_casefolded.contains('��'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
1865     /// assert!(!changes_when_nfkc_casefolded.contains('f'));
1866     /// ```
1867 
1868 }
1869 
1870 make_binary_property! {
1871     name: "Changes_When_Lowercased";
1872     short_name: "CWL";
1873     ident: ChangesWhenLowercased;
1874     data_marker: crate::provider::ChangesWhenLowercasedV1;
1875     singleton: SINGLETON_CHANGES_WHEN_LOWERCASED_V1;
1876     func:
1877     /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
1878     ///
1879     /// # Example
1880     ///
1881     /// ```
1882     /// use icu::properties::CodePointSetData;
1883     /// use icu::properties::props::ChangesWhenLowercased;
1884     ///
1885     /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
1886     ///
1887     /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
1888     /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
1889     /// ```
1890 
1891 }
1892 
1893 make_binary_property! {
1894     name: "Changes_When_Titlecased";
1895     short_name: "CWT";
1896     ident: ChangesWhenTitlecased;
1897     data_marker: crate::provider::ChangesWhenTitlecasedV1;
1898     singleton: SINGLETON_CHANGES_WHEN_TITLECASED_V1;
1899     func:
1900     /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
1901     ///
1902     /// # Example
1903     ///
1904     /// ```
1905     /// use icu::properties::CodePointSetData;
1906     /// use icu::properties::props::ChangesWhenTitlecased;
1907     ///
1908     /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
1909     ///
1910     /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
1911     /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
1912     /// ```
1913 
1914 }
1915 
1916 make_binary_property! {
1917     name: "Changes_When_Uppercased";
1918     short_name: "CWU";
1919     ident: ChangesWhenUppercased;
1920     data_marker: crate::provider::ChangesWhenUppercasedV1;
1921     singleton: SINGLETON_CHANGES_WHEN_UPPERCASED_V1;
1922     func:
1923     /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
1924     ///
1925     /// # Example
1926     ///
1927     /// ```
1928     /// use icu::properties::CodePointSetData;
1929     /// use icu::properties::props::ChangesWhenUppercased;
1930     ///
1931     /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
1932     ///
1933     /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
1934     /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
1935     /// ```
1936 
1937 }
1938 
1939 make_binary_property! {
1940     name: "Dash";
1941     short_name: "Dash";
1942     ident: Dash;
1943     data_marker: crate::provider::DashV1;
1944     singleton: SINGLETON_DASH_V1;
1945     func:
1946     /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
1947     /// their compatibility equivalents.
1948     ///
1949     /// # Example
1950     ///
1951     /// ```
1952     /// use icu::properties::CodePointSetData;
1953     /// use icu::properties::props::Dash;
1954     ///
1955     /// let dash = CodePointSetData::new::<Dash>();
1956     ///
1957     /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
1958     /// assert!(dash.contains('-'));  // U+002D
1959     /// assert!(!dash.contains('='));  // U+003D
1960     /// ```
1961 
1962 }
1963 
1964 make_binary_property! {
1965     name: "Deprecated";
1966     short_name: "Dep";
1967     ident: Deprecated;
1968     data_marker: crate::provider::DeprecatedV1;
1969     singleton: SINGLETON_DEPRECATED_V1;
1970     func:
1971     /// Deprecated characters.
1972     ///
1973     /// No characters will ever be removed from the standard, but the
1974     /// usage of deprecated characters is strongly discouraged.
1975     ///
1976     /// # Example
1977     ///
1978     /// ```
1979     /// use icu::properties::CodePointSetData;
1980     /// use icu::properties::props::Deprecated;
1981     ///
1982     /// let deprecated = CodePointSetData::new::<Deprecated>();
1983     ///
1984     /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
1985     /// assert!(!deprecated.contains('A'));
1986     /// ```
1987 
1988 }
1989 
1990 make_binary_property! {
1991     name: "Default_Ignorable_Code_Point";
1992     short_name: "DI";
1993     ident: DefaultIgnorableCodePoint;
1994     data_marker: crate::provider::DefaultIgnorableCodePointV1;
1995     singleton: SINGLETON_DEFAULT_IGNORABLE_CODE_POINT_V1;
1996     func:
1997     /// For programmatic determination of default ignorable code points.
1998     ///
1999     /// New characters that
2000     /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2001     /// ranges, permitting programs to correctly handle the default rendering of such
2002     /// characters when not otherwise supported.
2003     ///
2004     /// # Example
2005     ///
2006     /// ```
2007     /// use icu::properties::CodePointSetData;
2008     /// use icu::properties::props::DefaultIgnorableCodePoint;
2009     ///
2010     /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2011     ///
2012     /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2013     /// assert!(!default_ignorable_code_point.contains('E'));
2014     /// ```
2015 
2016 }
2017 
2018 make_binary_property! {
2019     name: "Diacritic";
2020     short_name: "Dia";
2021     ident: Diacritic;
2022     data_marker: crate::provider::DiacriticV1;
2023     singleton: SINGLETON_DIACRITIC_V1;
2024     func:
2025     /// Characters that linguistically modify the meaning of another character to which they apply.
2026     ///
2027     /// # Example
2028     ///
2029     /// ```
2030     /// use icu::properties::CodePointSetData;
2031     /// use icu::properties::props::Diacritic;
2032     ///
2033     /// let diacritic = CodePointSetData::new::<Diacritic>();
2034     ///
2035     /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2036     /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2037     /// ```
2038 
2039 }
2040 
2041 make_binary_property! {
2042     name: "Emoji_Modifier_Base";
2043     short_name: "EBase";
2044     ident: EmojiModifierBase;
2045     data_marker: crate::provider::EmojiModifierBaseV1;
2046     singleton: SINGLETON_EMOJI_MODIFIER_BASE_V1;
2047     func:
2048     /// Characters that can serve as a base for emoji modifiers.
2049     ///
2050     /// # Example
2051     ///
2052     /// ```
2053     /// use icu::properties::CodePointSetData;
2054     /// use icu::properties::props::EmojiModifierBase;
2055     ///
2056     /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2057     ///
2058     /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2059     /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2060     /// ```
2061 
2062 }
2063 
2064 make_binary_property! {
2065     name: "Emoji_Component";
2066     short_name: "EComp";
2067     ident: EmojiComponent;
2068     data_marker: crate::provider::EmojiComponentV1;
2069     singleton: SINGLETON_EMOJI_COMPONENT_V1;
2070     func:
2071     /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2072     /// separate choices, such as base characters for emoji keycaps.
2073     ///
2074     /// # Example
2075     ///
2076     /// ```
2077     /// use icu::properties::CodePointSetData;
2078     /// use icu::properties::props::EmojiComponent;
2079     ///
2080     /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2081     ///
2082     /// assert!(emoji_component.contains('��'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2083     /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2084     /// assert!(emoji_component.contains('7'));
2085     /// assert!(!emoji_component.contains('T'));
2086     /// ```
2087 
2088 }
2089 
2090 make_binary_property! {
2091     name: "Emoji_Modifier";
2092     short_name: "EMod";
2093     ident: EmojiModifier;
2094     data_marker: crate::provider::EmojiModifierV1;
2095     singleton: SINGLETON_EMOJI_MODIFIER_V1;
2096     func:
2097     /// Characters that are emoji modifiers.
2098     ///
2099     /// # Example
2100     ///
2101     /// ```
2102     /// use icu::properties::CodePointSetData;
2103     /// use icu::properties::props::EmojiModifier;
2104     ///
2105     /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2106     ///
2107     /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2108     /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2109     /// ```
2110 
2111 }
2112 
2113 make_binary_property! {
2114     name: "Emoji";
2115     short_name: "Emoji";
2116     ident: Emoji;
2117     data_marker: crate::provider::EmojiV1;
2118     singleton: SINGLETON_EMOJI_V1;
2119     func:
2120     /// Characters that are emoji.
2121     ///
2122     /// # Example
2123     ///
2124     /// ```
2125     /// use icu::properties::CodePointSetData;
2126     /// use icu::properties::props::Emoji;
2127     ///
2128     /// let emoji = CodePointSetData::new::<Emoji>();
2129     ///
2130     /// assert!(emoji.contains('��'));  // U+1F525 FIRE
2131     /// assert!(!emoji.contains('V'));
2132     /// ```
2133 
2134 }
2135 
2136 make_binary_property! {
2137     name: "Emoji_Presentation";
2138     short_name: "EPres";
2139     ident: EmojiPresentation;
2140     data_marker: crate::provider::EmojiPresentationV1;
2141     singleton: SINGLETON_EMOJI_PRESENTATION_V1;
2142     func:
2143     /// Characters that have emoji presentation by default.
2144     ///
2145     /// # Example
2146     ///
2147     /// ```
2148     /// use icu::properties::CodePointSetData;
2149     /// use icu::properties::props::EmojiPresentation;
2150     ///
2151     /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2152     ///
2153     /// assert!(emoji_presentation.contains('��')); // U+1F9AC BISON
2154     /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2155     /// ```
2156 
2157 }
2158 
2159 make_binary_property! {
2160     name: "Extender";
2161     short_name: "Ext";
2162     ident: Extender;
2163     data_marker: crate::provider::ExtenderV1;
2164     singleton: SINGLETON_EXTENDER_V1;
2165     func:
2166     /// Characters whose principal function is to extend the value of a preceding alphabetic
2167     /// character or to extend the shape of adjacent characters.
2168     ///
2169     /// # Example
2170     ///
2171     /// ```
2172     /// use icu::properties::CodePointSetData;
2173     /// use icu::properties::props::Extender;
2174     ///
2175     /// let extender = CodePointSetData::new::<Extender>();
2176     ///
2177     /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2178     /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2179     /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2180     /// ```
2181 
2182 }
2183 
2184 make_binary_property! {
2185     name: "Extended_Pictographic";
2186     short_name: "ExtPict";
2187     ident: ExtendedPictographic;
2188     data_marker: crate::provider::ExtendedPictographicV1;
2189     singleton: SINGLETON_EXTENDED_PICTOGRAPHIC_V1;
2190     func:
2191     /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2192     /// emoji characters
2193     ///
2194     /// # Example
2195     ///
2196     /// ```
2197     /// use icu::properties::CodePointSetData;
2198     /// use icu::properties::props::ExtendedPictographic;
2199     ///
2200     /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2201     ///
2202     /// assert!(extended_pictographic.contains('��')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2203     /// assert!(!extended_pictographic.contains('��'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2204     /// ```
2205 
2206 }
2207 
2208 make_binary_property! {
2209     name: "Graph";
2210     short_name: "Graph";
2211     ident: Graph;
2212     data_marker: crate::provider::GraphV1;
2213     singleton: SINGLETON_GRAPH_V1;
2214     func:
2215     /// Visible characters.
2216     ///
2217     /// This is defined for POSIX compatibility.
2218 
2219 }
2220 
2221 make_binary_property! {
2222     name: "Grapheme_Base";
2223     short_name: "Gr_Base";
2224     ident: GraphemeBase;
2225     data_marker: crate::provider::GraphemeBaseV1;
2226     singleton: SINGLETON_GRAPHEME_BASE_V1;
2227     func:
2228     /// Property used together with the definition of Standard Korean Syllable Block to define
2229     /// "Grapheme base".
2230     ///
2231     /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2232     ///
2233     /// # Example
2234     ///
2235     /// ```
2236     /// use icu::properties::CodePointSetData;
2237     /// use icu::properties::props::GraphemeBase;
2238     ///
2239     /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2240     ///
2241     /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2242     /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2243     /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2244     /// ```
2245 
2246 }
2247 
2248 make_binary_property! {
2249     name: "Grapheme_Extend";
2250     short_name: "Gr_Ext";
2251     ident: GraphemeExtend;
2252     data_marker: crate::provider::GraphemeExtendV1;
2253     singleton: SINGLETON_GRAPHEME_EXTEND_V1;
2254     func:
2255     /// Property used to define "Grapheme extender".
2256     ///
2257     /// See D59 in Chapter 3, Conformance in the
2258     /// Unicode Standard.
2259     ///
2260     /// # Example
2261     ///
2262     /// ```
2263     /// use icu::properties::CodePointSetData;
2264     /// use icu::properties::props::GraphemeExtend;
2265     ///
2266     /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2267     ///
2268     /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2269     /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2270     /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2271     /// ```
2272 
2273 }
2274 
2275 make_binary_property! {
2276     name: "Grapheme_Link";
2277     short_name: "Gr_Link";
2278     ident: GraphemeLink;
2279     data_marker: crate::provider::GraphemeLinkV1;
2280     singleton: SINGLETON_GRAPHEME_LINK_V1;
2281     func:
2282     /// Deprecated property.
2283     ///
2284     /// Formerly proposed for programmatic determination of grapheme
2285     /// cluster boundaries.
2286 
2287 }
2288 
2289 make_binary_property! {
2290     name: "Hex_Digit";
2291     short_name: "Hex";
2292     ident: HexDigit;
2293     data_marker: crate::provider::HexDigitV1;
2294     singleton: SINGLETON_HEX_DIGIT_V1;
2295     func:
2296     /// Characters commonly used for the representation of hexadecimal numbers, plus their
2297     /// compatibility equivalents.
2298     ///
2299     /// # Example
2300     ///
2301     /// ```
2302     /// use icu::properties::CodePointSetData;
2303     /// use icu::properties::props::HexDigit;
2304     ///
2305     /// let hex_digit = CodePointSetData::new::<HexDigit>();
2306     ///
2307     /// assert!(hex_digit.contains('0'));
2308     /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2309     /// assert!(hex_digit.contains('f'));
2310     /// assert!(hex_digit.contains('f'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2311     /// assert!(hex_digit.contains('F'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2312     /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2313     /// ```
2314 
2315 }
2316 
2317 make_binary_property! {
2318     name: "Hyphen";
2319     short_name: "Hyphen";
2320     ident: Hyphen;
2321     data_marker: crate::provider::HyphenV1;
2322     singleton: SINGLETON_HYPHEN_V1;
2323     func:
2324     /// Deprecated property.
2325     ///
2326     /// Dashes which are used to mark connections between pieces of
2327     /// words, plus the Katakana middle dot.
2328 
2329 }
2330 
2331 make_binary_property! {
2332     name: "Id_Continue";
2333     short_name: "IDC";
2334     ident: IdContinue;
2335     data_marker: crate::provider::IdContinueV1;
2336     singleton: SINGLETON_ID_CONTINUE_V1;
2337     func:
2338     /// Characters that can come after the first character in an identifier.
2339     ///
2340     /// If using NFKC to
2341     /// fold differences between characters, use [`XidContinue`] instead.  See
2342     /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2343     /// more details.
2344     ///
2345     /// # Example
2346     ///
2347     /// ```
2348     /// use icu::properties::CodePointSetData;
2349     /// use icu::properties::props::IdContinue;
2350     ///
2351     /// let id_continue = CodePointSetData::new::<IdContinue>();
2352     ///
2353     /// assert!(id_continue.contains('x'));
2354     /// assert!(id_continue.contains('1'));
2355     /// assert!(id_continue.contains('_'));
2356     /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2357     /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2358     /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2359     /// ```
2360 
2361 }
2362 
2363 make_binary_property! {
2364     name: "Ideographic";
2365     short_name: "Ideo";
2366     ident: Ideographic;
2367     data_marker: crate::provider::IdeographicV1;
2368     singleton: SINGLETON_IDEOGRAPHIC_V1;
2369     func:
2370     /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2371     /// ideographs, or related siniform ideographs
2372     ///
2373     /// # Example
2374     ///
2375     /// ```
2376     /// use icu::properties::CodePointSetData;
2377     /// use icu::properties::props::Ideographic;
2378     ///
2379     /// let ideographic = CodePointSetData::new::<Ideographic>();
2380     ///
2381     /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2382     /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2383     /// ```
2384 
2385 }
2386 
2387 make_binary_property! {
2388     name: "Id_Start";
2389     short_name: "IDS";
2390     ident: IdStart;
2391     data_marker: crate::provider::IdStartV1;
2392     singleton: SINGLETON_ID_START_V1;
2393     func:
2394     /// Characters that can begin an identifier.
2395     ///
2396     /// If using NFKC to fold differences between
2397     /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2398     /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2399     ///
2400     /// # Example
2401     ///
2402     /// ```
2403     /// use icu::properties::CodePointSetData;
2404     /// use icu::properties::props::IdStart;
2405     ///
2406     /// let id_start = CodePointSetData::new::<IdStart>();
2407     ///
2408     /// assert!(id_start.contains('x'));
2409     /// assert!(!id_start.contains('1'));
2410     /// assert!(!id_start.contains('_'));
2411     /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2412     /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2413     /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2414     /// ```
2415 
2416 }
2417 
2418 make_binary_property! {
2419     name: "Ids_Binary_Operator";
2420     short_name: "IDSB";
2421     ident: IdsBinaryOperator;
2422     data_marker: crate::provider::IdsBinaryOperatorV1;
2423     singleton: SINGLETON_IDS_BINARY_OPERATOR_V1;
2424     func:
2425     /// Characters used in Ideographic Description Sequences.
2426     ///
2427     /// # Example
2428     ///
2429     /// ```
2430     /// use icu::properties::CodePointSetData;
2431     /// use icu::properties::props::IdsBinaryOperator;
2432     ///
2433     /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2434     ///
2435     /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2436     /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2437     /// ```
2438 
2439 }
2440 
2441 make_binary_property! {
2442     name: "Ids_Trinary_Operator";
2443     short_name: "IDST";
2444     ident: IdsTrinaryOperator;
2445     data_marker: crate::provider::IdsTrinaryOperatorV1;
2446     singleton: SINGLETON_IDS_TRINARY_OPERATOR_V1;
2447     func:
2448     /// Characters used in Ideographic Description Sequences.
2449     ///
2450     /// # Example
2451     ///
2452     /// ```
2453     /// use icu::properties::CodePointSetData;
2454     /// use icu::properties::props::IdsTrinaryOperator;
2455     ///
2456     /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2457     ///
2458     /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2459     /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2460     /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2461     /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2462     /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2463     /// ```
2464 
2465 }
2466 
2467 make_binary_property! {
2468     name: "Join_Control";
2469     short_name: "Join_C";
2470     ident: JoinControl;
2471     data_marker: crate::provider::JoinControlV1;
2472     singleton: SINGLETON_JOIN_CONTROL_V1;
2473     func:
2474     /// Format control characters which have specific functions for control of cursive joining
2475     /// and ligation.
2476     ///
2477     /// # Example
2478     ///
2479     /// ```
2480     /// use icu::properties::CodePointSetData;
2481     /// use icu::properties::props::JoinControl;
2482     ///
2483     /// let join_control = CodePointSetData::new::<JoinControl>();
2484     ///
2485     /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2486     /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2487     /// assert!(!join_control.contains('\u{200E}'));
2488     /// ```
2489 
2490 }
2491 
2492 make_binary_property! {
2493     name: "Logical_Order_Exception";
2494     short_name: "LOE";
2495     ident: LogicalOrderException;
2496     data_marker: crate::provider::LogicalOrderExceptionV1;
2497     singleton: SINGLETON_LOGICAL_ORDER_EXCEPTION_V1;
2498     func:
2499     /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2500     ///
2501     /// # Example
2502     ///
2503     /// ```
2504     /// use icu::properties::CodePointSetData;
2505     /// use icu::properties::props::LogicalOrderException;
2506     ///
2507     /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2508     ///
2509     /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2510     /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2511     /// ```
2512 
2513 }
2514 
2515 make_binary_property! {
2516     name: "Lowercase";
2517     short_name: "Lower";
2518     ident: Lowercase;
2519     data_marker: crate::provider::LowercaseV1;
2520     singleton: SINGLETON_LOWERCASE_V1;
2521     func:
2522     /// Lowercase characters.
2523     ///
2524     /// # Example
2525     ///
2526     /// ```
2527     /// use icu::properties::CodePointSetData;
2528     /// use icu::properties::props::Lowercase;
2529     ///
2530     /// let lowercase = CodePointSetData::new::<Lowercase>();
2531     ///
2532     /// assert!(lowercase.contains('a'));
2533     /// assert!(!lowercase.contains('A'));
2534     /// ```
2535 
2536 }
2537 
2538 make_binary_property! {
2539     name: "Math";
2540     short_name: "Math";
2541     ident: Math;
2542     data_marker: crate::provider::MathV1;
2543     singleton: SINGLETON_MATH_V1;
2544     func:
2545     /// Characters used in mathematical notation.
2546     ///
2547     /// # Example
2548     ///
2549     /// ```
2550     /// use icu::properties::CodePointSetData;
2551     /// use icu::properties::props::Math;
2552     ///
2553     /// let math = CodePointSetData::new::<Math>();
2554     ///
2555     /// assert!(math.contains('='));
2556     /// assert!(math.contains('+'));
2557     /// assert!(!math.contains('-'));
2558     /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2559     /// assert!(!math.contains('/'));
2560     /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2561     /// ```
2562 
2563 }
2564 
2565 make_binary_property! {
2566     name: "Noncharacter_Code_Point";
2567     short_name: "NChar";
2568     ident: NoncharacterCodePoint;
2569     data_marker: crate::provider::NoncharacterCodePointV1;
2570     singleton: SINGLETON_NONCHARACTER_CODE_POINT_V1;
2571     func:
2572     /// Code points permanently reserved for internal use.
2573     ///
2574     /// # Example
2575     ///
2576     /// ```
2577     /// use icu::properties::CodePointSetData;
2578     /// use icu::properties::props::NoncharacterCodePoint;
2579     ///
2580     /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2581     ///
2582     /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2583     /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2584     /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2585     /// ```
2586 
2587 }
2588 
2589 make_binary_property! {
2590     name: "NFC_Inert";
2591     short_name: "NFC_Inert";
2592     ident: NfcInert;
2593     data_marker: crate::provider::NfcInertV1;
2594     singleton: SINGLETON_NFC_INERT_V1;
2595     func:
2596     /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2597 
2598 }
2599 
2600 make_binary_property! {
2601     name: "NFD_Inert";
2602     short_name: "NFD_Inert";
2603     ident: NfdInert;
2604     data_marker: crate::provider::NfdInertV1;
2605     singleton: SINGLETON_NFD_INERT_V1;
2606     func:
2607     /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2608 
2609 }
2610 
2611 make_binary_property! {
2612     name: "NFKC_Inert";
2613     short_name: "NFKC_Inert";
2614     ident: NfkcInert;
2615     data_marker: crate::provider::NfkcInertV1;
2616     singleton: SINGLETON_NFKC_INERT_V1;
2617     func:
2618     /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2619 
2620 }
2621 
2622 make_binary_property! {
2623     name: "NFKD_Inert";
2624     short_name: "NFKD_Inert";
2625     ident: NfkdInert;
2626     data_marker: crate::provider::NfkdInertV1;
2627     singleton: SINGLETON_NFKD_INERT_V1;
2628     func:
2629     /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2630 
2631 }
2632 
2633 make_binary_property! {
2634     name: "Pattern_Syntax";
2635     short_name: "Pat_Syn";
2636     ident: PatternSyntax;
2637     data_marker: crate::provider::PatternSyntaxV1;
2638     singleton: SINGLETON_PATTERN_SYNTAX_V1;
2639     func:
2640     /// Characters used as syntax in patterns (such as regular expressions).
2641     ///
2642     /// See [`Unicode
2643     /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2644     /// details.
2645     ///
2646     /// # Example
2647     ///
2648     /// ```
2649     /// use icu::properties::CodePointSetData;
2650     /// use icu::properties::props::PatternSyntax;
2651     ///
2652     /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2653     ///
2654     /// assert!(pattern_syntax.contains('{'));
2655     /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2656     /// assert!(!pattern_syntax.contains('0'));
2657     /// ```
2658 
2659 }
2660 
2661 make_binary_property! {
2662     name: "Pattern_White_Space";
2663     short_name: "Pat_WS";
2664     ident: PatternWhiteSpace;
2665     data_marker: crate::provider::PatternWhiteSpaceV1;
2666     singleton: SINGLETON_PATTERN_WHITE_SPACE_V1;
2667     func:
2668     /// Characters used as whitespace in patterns (such as regular expressions).
2669     ///
2670     /// See
2671     /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2672     /// more details.
2673     ///
2674     /// # Example
2675     ///
2676     /// ```
2677     /// use icu::properties::CodePointSetData;
2678     /// use icu::properties::props::PatternWhiteSpace;
2679     ///
2680     /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2681     ///
2682     /// assert!(pattern_white_space.contains(' '));
2683     /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2684     /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2685     /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2686     /// ```
2687 
2688 }
2689 
2690 make_binary_property! {
2691     name: "Prepended_Concatenation_Mark";
2692     short_name: "PCM";
2693     ident: PrependedConcatenationMark;
2694     data_marker: crate::provider::PrependedConcatenationMarkV1;
2695     singleton: SINGLETON_PREPENDED_CONCATENATION_MARK_V1;
2696     func:
2697     /// A small class of visible format controls, which precede and then span a sequence of
2698     /// other characters, usually digits.
2699 
2700 }
2701 
2702 make_binary_property! {
2703     name: "Print";
2704     short_name: "Print";
2705     ident: Print;
2706     data_marker: crate::provider::PrintV1;
2707     singleton: SINGLETON_PRINT_V1;
2708     func:
2709     /// Printable characters (visible characters and whitespace).
2710     ///
2711     /// This is defined for POSIX compatibility.
2712 
2713 }
2714 
2715 make_binary_property! {
2716     name: "Quotation_Mark";
2717     short_name: "QMark";
2718     ident: QuotationMark;
2719     data_marker: crate::provider::QuotationMarkV1;
2720     singleton: SINGLETON_QUOTATION_MARK_V1;
2721     func:
2722     /// Punctuation characters that function as quotation marks.
2723     ///
2724     /// # Example
2725     ///
2726     /// ```
2727     /// use icu::properties::CodePointSetData;
2728     /// use icu::properties::props::QuotationMark;
2729     ///
2730     /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2731     ///
2732     /// assert!(quotation_mark.contains('\''));
2733     /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2734     /// assert!(!quotation_mark.contains('<'));
2735     /// ```
2736 
2737 }
2738 
2739 make_binary_property! {
2740     name: "Radical";
2741     short_name: "Radical";
2742     ident: Radical;
2743     data_marker: crate::provider::RadicalV1;
2744     singleton: SINGLETON_RADICAL_V1;
2745     func:
2746     /// Characters used in the definition of Ideographic Description Sequences.
2747     ///
2748     /// # Example
2749     ///
2750     /// ```
2751     /// use icu::properties::CodePointSetData;
2752     /// use icu::properties::props::Radical;
2753     ///
2754     /// let radical = CodePointSetData::new::<Radical>();
2755     ///
2756     /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2757     /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2758     /// ```
2759 
2760 }
2761 
2762 make_binary_property! {
2763     name: "Regional_Indicator";
2764     short_name: "RI";
2765     ident: RegionalIndicator;
2766     data_marker: crate::provider::RegionalIndicatorV1;
2767     singleton: SINGLETON_REGIONAL_INDICATOR_V1;
2768     func:
2769     /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2770     ///
2771     /// # Example
2772     ///
2773     /// ```
2774     /// use icu::properties::CodePointSetData;
2775     /// use icu::properties::props::RegionalIndicator;
2776     ///
2777     /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2778     ///
2779     /// assert!(regional_indicator.contains('��'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2780     /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2781     /// assert!(!regional_indicator.contains('T'));
2782     /// ```
2783 
2784 }
2785 
2786 make_binary_property! {
2787     name: "Soft_Dotted";
2788     short_name: "SD";
2789     ident: SoftDotted;
2790     data_marker: crate::provider::SoftDottedV1;
2791     singleton: SINGLETON_SOFT_DOTTED_V1;
2792     func:
2793     /// Characters with a "soft dot", like i or j.
2794     ///
2795     /// An accent placed on these characters causes
2796     /// the dot to disappear.
2797     ///
2798     /// # Example
2799     ///
2800     /// ```
2801     /// use icu::properties::CodePointSetData;
2802     /// use icu::properties::props::SoftDotted;
2803     ///
2804     /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2805     ///
2806     /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2807     /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
2808     /// ```
2809 
2810 }
2811 
2812 make_binary_property! {
2813     name: "Segment_Starter";
2814     short_name: "Segment_Starter";
2815     ident: SegmentStarter;
2816     data_marker: crate::provider::SegmentStarterV1;
2817     singleton: SINGLETON_SEGMENT_STARTER_V1;
2818     func:
2819     /// Characters that are starters in terms of Unicode normalization and combining character
2820     /// sequences.
2821 
2822 }
2823 
2824 make_binary_property! {
2825     name: "Case_Sensitive";
2826     short_name: "Case_Sensitive";
2827     ident: CaseSensitive;
2828     data_marker: crate::provider::CaseSensitiveV1;
2829     singleton: SINGLETON_CASE_SENSITIVE_V1;
2830     func:
2831     /// Characters that are either the source of a case mapping or in the target of a case
2832     /// mapping.
2833 
2834 }
2835 
2836 make_binary_property! {
2837     name: "Sentence_Terminal";
2838     short_name: "STerm";
2839     ident: SentenceTerminal;
2840     data_marker: crate::provider::SentenceTerminalV1;
2841     singleton: SINGLETON_SENTENCE_TERMINAL_V1;
2842     func:
2843     /// Punctuation characters that generally mark the end of sentences.
2844     ///
2845     /// # Example
2846     ///
2847     /// ```
2848     /// use icu::properties::CodePointSetData;
2849     /// use icu::properties::props::SentenceTerminal;
2850     ///
2851     /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2852     ///
2853     /// assert!(sentence_terminal.contains('.'));
2854     /// assert!(sentence_terminal.contains('?'));
2855     /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2856     /// assert!(!sentence_terminal.contains(','));
2857     /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2858     /// ```
2859 
2860 }
2861 
2862 make_binary_property! {
2863     name: "Terminal_Punctuation";
2864     short_name: "Term";
2865     ident: TerminalPunctuation;
2866     data_marker: crate::provider::TerminalPunctuationV1;
2867     singleton: SINGLETON_TERMINAL_PUNCTUATION_V1;
2868     func:
2869     /// Punctuation characters that generally mark the end of textual units.
2870     ///
2871     /// # Example
2872     ///
2873     /// ```
2874     /// use icu::properties::CodePointSetData;
2875     /// use icu::properties::props::TerminalPunctuation;
2876     ///
2877     /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2878     ///
2879     /// assert!(terminal_punctuation.contains('.'));
2880     /// assert!(terminal_punctuation.contains('?'));
2881     /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2882     /// assert!(terminal_punctuation.contains(','));
2883     /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2884     /// ```
2885 
2886 }
2887 
2888 make_binary_property! {
2889     name: "Unified_Ideograph";
2890     short_name: "UIdeo";
2891     ident: UnifiedIdeograph;
2892     data_marker: crate::provider::UnifiedIdeographV1;
2893     singleton: SINGLETON_UNIFIED_IDEOGRAPH_V1;
2894     func:
2895     /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
2896     ///
2897     /// # Example
2898     ///
2899     /// ```
2900     /// use icu::properties::CodePointSetData;
2901     /// use icu::properties::props::UnifiedIdeograph;
2902     ///
2903     /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
2904     ///
2905     /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2906     /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
2907     /// assert!(!unified_ideograph.contains('��'));  // U+1B178 NUSHU CHARACTER-1B178
2908     /// ```
2909 
2910 }
2911 
2912 make_binary_property! {
2913     name: "Uppercase";
2914     short_name: "Upper";
2915     ident: Uppercase;
2916     data_marker: crate::provider::UppercaseV1;
2917     singleton: SINGLETON_UPPERCASE_V1;
2918     func:
2919     /// Uppercase characters.
2920     ///
2921     /// # Example
2922     ///
2923     /// ```
2924     /// use icu::properties::CodePointSetData;
2925     /// use icu::properties::props::Uppercase;
2926     ///
2927     /// let uppercase = CodePointSetData::new::<Uppercase>();
2928     ///
2929     /// assert!(uppercase.contains('U'));
2930     /// assert!(!uppercase.contains('u'));
2931     /// ```
2932 
2933 }
2934 
2935 make_binary_property! {
2936     name: "Variation_Selector";
2937     short_name: "VS";
2938     ident: VariationSelector;
2939     data_marker: crate::provider::VariationSelectorV1;
2940     singleton: SINGLETON_VARIATION_SELECTOR_V1;
2941     func:
2942     /// Characters that are Variation Selectors.
2943     ///
2944     /// # Example
2945     ///
2946     /// ```
2947     /// use icu::properties::CodePointSetData;
2948     /// use icu::properties::props::VariationSelector;
2949     ///
2950     /// let variation_selector = CodePointSetData::new::<VariationSelector>();
2951     ///
2952     /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
2953     /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
2954     /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
2955     /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
2956     /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
2957     /// ```
2958 
2959 }
2960 
2961 make_binary_property! {
2962     name: "White_Space";
2963     short_name: "space";
2964     ident: WhiteSpace;
2965     data_marker: crate::provider::WhiteSpaceV1;
2966     singleton: SINGLETON_WHITE_SPACE_V1;
2967     func:
2968     /// Spaces, separator characters and other control characters which should be treated by
2969     /// programming languages as "white space" for the purpose of parsing elements.
2970     ///
2971     /// # Example
2972     ///
2973     /// ```
2974     /// use icu::properties::CodePointSetData;
2975     /// use icu::properties::props::WhiteSpace;
2976     ///
2977     /// let white_space = CodePointSetData::new::<WhiteSpace>();
2978     ///
2979     /// assert!(white_space.contains(' '));
2980     /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
2981     /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2982     /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
2983     /// ```
2984 
2985 }
2986 
2987 make_binary_property! {
2988     name: "Xdigit";
2989     short_name: "Xdigit";
2990     ident: Xdigit;
2991     data_marker: crate::provider::XdigitV1;
2992     singleton: SINGLETON_XDIGIT_V1;
2993     func:
2994     /// Hexadecimal digits
2995     /// This is defined for POSIX compatibility.
2996 
2997 }
2998 
2999 make_binary_property! {
3000     name: "XID_Continue";
3001     short_name: "XIDC";
3002     ident: XidContinue;
3003     data_marker: crate::provider::XidContinueV1;
3004     singleton: SINGLETON_XID_CONTINUE_V1;
3005     func:
3006     /// Characters that can come after the first character in an identifier.
3007     ///
3008     /// See [`Unicode Standard Annex
3009     /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3010     ///
3011     /// # Example
3012     ///
3013     /// ```
3014     /// use icu::properties::CodePointSetData;
3015     /// use icu::properties::props::XidContinue;
3016     ///
3017     /// let xid_continue = CodePointSetData::new::<XidContinue>();
3018     ///
3019     /// assert!(xid_continue.contains('x'));
3020     /// assert!(xid_continue.contains('1'));
3021     /// assert!(xid_continue.contains('_'));
3022     /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3023     /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3024     /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3025     /// ```
3026 
3027 }
3028 
3029 make_binary_property! {
3030     name: "XID_Start";
3031     short_name: "XIDS";
3032     ident: XidStart;
3033     data_marker: crate::provider::XidStartV1;
3034     singleton: SINGLETON_XID_START_V1;
3035     func:
3036     /// Characters that can begin an identifier.
3037     ///
3038     /// See [`Unicode
3039     /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3040     /// details.
3041     ///
3042     /// # Example
3043     ///
3044     /// ```
3045     /// use icu::properties::CodePointSetData;
3046     /// use icu::properties::props::XidStart;
3047     ///
3048     /// let xid_start = CodePointSetData::new::<XidStart>();
3049     ///
3050     /// assert!(xid_start.contains('x'));
3051     /// assert!(!xid_start.contains('1'));
3052     /// assert!(!xid_start.contains('_'));
3053     /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3054     /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3055     /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3056     /// ```
3057 
3058 }
3059 
3060 pub use crate::emoji::EmojiSet;
3061 
3062 macro_rules! make_emoji_set {
3063     (
3064         ident: $marker_name:ident;
3065         data_marker: $data_marker:ty;
3066         singleton: $singleton:ident;
3067         func:
3068         $(#[$doc:meta])+
3069     ) => {
3070         $(#[$doc])+
3071         #[derive(Debug)]
3072         #[non_exhaustive]
3073         pub struct $marker_name;
3074 
3075         impl crate::private::Sealed for $marker_name {}
3076 
3077         impl EmojiSet for $marker_name {
3078             type DataMarker = $data_marker;
3079             #[cfg(feature = "compiled_data")]
3080             const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3081                 &crate::provider::Baked::$singleton;
3082         }
3083     }
3084 }
3085 
3086 make_emoji_set! {
3087     ident: BasicEmoji;
3088     data_marker: crate::provider::BasicEmojiV1;
3089     singleton: SINGLETON_BASIC_EMOJI_V1;
3090     func:
3091     /// Characters and character sequences intended for general-purpose, independent, direct input.
3092     ///
3093     /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3094     /// details.
3095     ///
3096     /// # Example
3097     ///
3098     /// ```
3099     /// use icu::properties::EmojiSetData;
3100     /// use icu::properties::props::BasicEmoji;
3101     ///
3102     /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3103     ///
3104     /// assert!(!basic_emoji.contains('\u{0020}'));
3105     /// assert!(!basic_emoji.contains('\n'));
3106     /// assert!(basic_emoji.contains('��')); // U+1F983 TURKEY
3107     /// assert!(basic_emoji.contains_str("\u{1F983}"));
3108     /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3109     /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3110     /// ```
3111 }
3112 
3113 #[cfg(test)]
3114 mod test_enumerated_property_completeness {
3115     use super::*;
3116     use std::collections::BTreeMap;
3117 
check_enum<'a, T: NamedEnumeratedProperty>( lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>, consts: impl IntoIterator<Item = &'a T>, ) where u16: From<T>,3118     fn check_enum<'a, T: NamedEnumeratedProperty>(
3119         lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3120         consts: impl IntoIterator<Item = &'a T>,
3121     ) where
3122         u16: From<T>,
3123     {
3124         let mut data: BTreeMap<_, _> = lookup
3125             .map
3126             .iter()
3127             .map(|(name, value)| (value, (name, "Data")))
3128             .collect();
3129 
3130         let names = crate::PropertyNamesLong::<T>::new();
3131         let consts = consts.into_iter().map(|value| {
3132             (
3133                 u16::from(*value) as usize,
3134                 (
3135                     names.get(*value).unwrap_or("<unknown>").to_string(),
3136                     "Consts",
3137                 ),
3138             )
3139         });
3140 
3141         let mut diff = Vec::new();
3142         for t @ (value, _) in consts {
3143             if data.remove(&value).is_none() {
3144                 diff.push(t);
3145             }
3146         }
3147         diff.extend(data);
3148 
3149         let mut fmt_diff = String::new();
3150         for (value, (name, source)) in diff {
3151             fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3152         }
3153 
3154         assert!(
3155             fmt_diff.is_empty(),
3156             "Values defined in data do not match values defined in consts. Difference:\n{}",
3157             fmt_diff
3158         );
3159     }
3160 
3161     #[test]
test_ea()3162     fn test_ea() {
3163         check_enum(
3164             crate::provider::Baked::SINGLETON_EAST_ASIAN_WIDTH_NAME_TO_VALUE_V2,
3165             EastAsianWidth::ALL_VALUES,
3166         );
3167     }
3168 
3169     #[test]
test_ccc()3170     fn test_ccc() {
3171         check_enum(
3172             crate::provider::Baked::SINGLETON_CANONICAL_COMBINING_CLASS_NAME_TO_VALUE_V2,
3173             CanonicalCombiningClass::ALL_VALUES,
3174         );
3175     }
3176 
3177     #[test]
test_jt()3178     fn test_jt() {
3179         check_enum(
3180             crate::provider::Baked::SINGLETON_JOINING_TYPE_NAME_TO_VALUE_V2,
3181             JoiningType::ALL_VALUES,
3182         );
3183     }
3184 
3185     #[test]
test_insc()3186     fn test_insc() {
3187         check_enum(
3188             crate::provider::Baked::SINGLETON_INDIC_SYLLABIC_CATEGORY_NAME_TO_VALUE_V2,
3189             IndicSyllabicCategory::ALL_VALUES,
3190         );
3191     }
3192 
3193     #[test]
test_sb()3194     fn test_sb() {
3195         check_enum(
3196             crate::provider::Baked::SINGLETON_SENTENCE_BREAK_NAME_TO_VALUE_V2,
3197             SentenceBreak::ALL_VALUES,
3198         );
3199     }
3200 
3201     #[test]
test_wb()3202     fn test_wb() {
3203         check_enum(
3204             crate::provider::Baked::SINGLETON_WORD_BREAK_NAME_TO_VALUE_V2,
3205             WordBreak::ALL_VALUES,
3206         );
3207     }
3208 
3209     #[test]
test_bc()3210     fn test_bc() {
3211         check_enum(
3212             crate::provider::Baked::SINGLETON_BIDI_CLASS_NAME_TO_VALUE_V2,
3213             BidiClass::ALL_VALUES,
3214         );
3215     }
3216 
3217     #[test]
test_hst()3218     fn test_hst() {
3219         check_enum(
3220             crate::provider::Baked::SINGLETON_HANGUL_SYLLABLE_TYPE_NAME_TO_VALUE_V2,
3221             HangulSyllableType::ALL_VALUES,
3222         );
3223     }
3224 }
3225