• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright (C) 2016 and later: Unicode, Inc. and others.
3# License & terms of use: http://www.unicode.org/copyright.html
4# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
5
6# file: grapheme.txt
7#
8# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
9#
10#
11# Note: Rule syntax and the monkey test itself are still a work in progress.
12#       They are expected to change with review and the addition of support for rule tailoring.
13
14type = grapheme;      # one of grapheme | word | line | sentence
15locale = en;
16
17CR                 = [\p{Grapheme_Cluster_Break = CR}];
18LF                 = [\p{Grapheme_Cluster_Break = LF}];
19
20Control            = [[\p{Grapheme_Cluster_Break = Control}]];
21Extend             = [[\p{Grapheme_Cluster_Break = Extend}]];
22ZWJ                = [\p{Grapheme_Cluster_Break = ZWJ}];
23Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
24Prepend            = [\p{Grapheme_Cluster_Break = Prepend}];
25SpacingMark        = [\p{Grapheme_Cluster_Break = SpacingMark}];
26
27#
28# Korean Syllable Definitions
29#
30L                  = [\p{Grapheme_Cluster_Break = L}];
31V                  = [\p{Grapheme_Cluster_Break = V}];
32T                  = [\p{Grapheme_Cluster_Break = T}];
33LV                 = [\p{Grapheme_Cluster_Break = LV}];
34LVT                = [\p{Grapheme_Cluster_Break = LVT}];
35
36# Emoji defintions
37
38Extended_Pict      = [:ExtPict:];
39
40# Indic Sequences
41Virama_            = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
42
43LinkingConsonant   = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
44
45ExtCccZwj          = [[Extend-[\p{ccc=0}]] ZWJ];
46
47GB3:     CR LF;
48GB4:     (Control | CR | LF) ÷;
49GB5:     . ÷ (Control | CR | LF);
50
51GB6:     L (L | V | LV | LVT);
52GB7:     (LV | V) (V | T);
53GB8:     (LVT | T) T;
54
55GB11:    Extended_Pict Extend* ZWJ Extended_Pict;
56GB9c:    LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
57GB9:     . (Extend | ZWJ);
58
59GB9a:    . SpacingMark;
60GB9b:    Prepend .;
61
62# Regional Indicators, split into pairs.
63#      Note that a pair of RIs that is not followed by a third RI will fall into
64#      the normal rules for Extend, etc.
65#
66GB12:  Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
67GB13:  Regional_Indicator Regional_Indicator;
68
69GB999:     . ÷;
70