1# 2# Copyright (C) 2016 and later: Unicode, Inc. and others. 3# License & terms of use: http://www.unicode.org/copyright.html 4# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. 5 6# file: grapheme.txt 7# 8# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest 9# 10# 11# Note: Rule syntax and the monkey test itself are still a work in progress. 12# They are expected to change with review and the addition of support for rule tailoring. 13 14type = grapheme; # one of grapheme | word | line | sentence 15locale = en; 16 17CR = [\p{Grapheme_Cluster_Break = CR}]; 18LF = [\p{Grapheme_Cluster_Break = LF}]; 19 20Control = [[\p{Grapheme_Cluster_Break = Control}]]; 21Extend = [[\p{Grapheme_Cluster_Break = Extend}]]; 22ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}]; 23Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; 24Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; 25SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 26 27# 28# Korean Syllable Definitions 29# 30L = [\p{Grapheme_Cluster_Break = L}]; 31V = [\p{Grapheme_Cluster_Break = V}]; 32T = [\p{Grapheme_Cluster_Break = T}]; 33LV = [\p{Grapheme_Cluster_Break = LV}]; 34LVT = [\p{Grapheme_Cluster_Break = LVT}]; 35 36# Emoji defintions 37 38Extended_Pict = [:ExtPict:]; 39 40# Indic Sequences 41Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]]; 42 43LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]]; 44 45ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ]; 46 47GB3: CR LF; 48GB4: (Control | CR | LF) ÷; 49GB5: . ÷ (Control | CR | LF); 50 51GB6: L (L | V | LV | LVT); 52GB7: (LV | V) (V | T); 53GB8: (LVT | T) T; 54 55GB11: Extended_Pict Extend* ZWJ Extended_Pict; 56GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant; 57GB9: . (Extend | ZWJ); 58 59GB9a: . SpacingMark; 60GB9b: Prepend .; 61 62# Regional Indicators, split into pairs. 63# Note that a pair of RIs that is not followed by a third RI will fall into 64# the normal rules for Extend, etc. 65# 66GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator; 67GB13: Regional_Indicator Regional_Indicator; 68 69GB999: . ÷; 70