• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2#   Copyright (C) 2002-2013, International Business Machines Corporation and others.
3#       All Rights Reserved.
4#
5#   file:  char.txt
6#
7#   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8#      See Unicode Standard Annex #29.
9#      These rules are based on UAX #29 Revision 20 for Unicode Version 6.2
10#
11
12#
13#  Character Class Definitions.
14#
15$CR          = [\p{Grapheme_Cluster_Break = CR}];
16$LF          = [\p{Grapheme_Cluster_Break = LF}];
17$Control     = [\p{Grapheme_Cluster_Break = Control}];
18# TODO: Restore if the Prepend set becomes non-empty again: $Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
19$Extend      = [\p{Grapheme_Cluster_Break = Extend}];
20$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21$Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
22
23#
24# Korean Syllable Definitions
25#
26$L       = [\p{Grapheme_Cluster_Break = L}];
27$V       = [\p{Grapheme_Cluster_Break = V}];
28$T       = [\p{Grapheme_Cluster_Break = T}];
29
30$LV      = [\p{Grapheme_Cluster_Break = LV}];
31$LVT     = [\p{Grapheme_Cluster_Break = LVT}];
32
33
34## -------------------------------------------------
35!!chain;
36
37!!forward;
38
39$CR $LF;
40
41$L ($L | $V | $LV | $LVT);
42($LV | $V) ($V | $T);
43($LVT | $T) $T;
44
45$Regional_Indicator $Regional_Indicator;
46
47[^$Control $CR $LF] $Extend;
48
49[^$Control $CR $LF] $SpacingMark;
50# TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF];
51
52
53## -------------------------------------------------
54
55!!reverse;
56$LF $CR;
57($L | $V | $LV | $LVT) $L;
58($V | $T) ($LV | $V);
59$T ($LVT | $T);
60
61$Regional_Indicator $Regional_Indicator;
62
63$Extend      [^$Control $CR $LF];
64$SpacingMark [^$Control $CR $LF];
65# TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend;
66
67
68## -------------------------------------------------
69#  We don't logically need safe char break rules, but if we don't provide any at all
70#  the engine for preceding() and following() will fall back to the
71#  old style inefficient algorithm.
72
73!!safe_reverse;
74$LF $CR;
75
76## -------------------------------------------------
77
78!!safe_forward;
79$CR $LF;
80
81