1# 2# Copyright (C) 2002-2010, International Business Machines Corporation and others. 3# All Rights Reserved. 4# 5# file: char.txt 6# 7# ICU Character Break Rules, also known as Grapheme Cluster Boundaries 8# See Unicode Standard Annex #29. 9# These rules are based on TR29 Revision 16, for Unicode Version 6.0 10# 11 12# 13# Character Class Definitions. 14# 15$CR = [\p{Grapheme_Cluster_Break = CR}]; 16$LF = [\p{Grapheme_Cluster_Break = LF}]; 17$Control = [\p{Grapheme_Cluster_Break = Control}]; 18$Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; 19$Extend = [\p{Grapheme_Cluster_Break = Extend}]; 20$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 21 22# 23# Korean Syllable Definitions 24# 25$L = [\p{Grapheme_Cluster_Break = L}]; 26$V = [\p{Grapheme_Cluster_Break = V}]; 27$T = [\p{Grapheme_Cluster_Break = T}]; 28 29$LV = [\p{Grapheme_Cluster_Break = LV}]; 30$LVT = [\p{Grapheme_Cluster_Break = LVT}]; 31 32 33## ------------------------------------------------- 34!!chain; 35 36!!forward; 37 38$CR $LF; 39 40$L ($L | $V | $LV | $LVT); 41($LV | $V) ($V | $T); 42($LVT | $T) $T; 43 44[^$Control $CR $LF] $Extend; 45 46[^$Control $CR $LF] $SpacingMark; 47$Prepend [^$Control $CR $LF]; 48 49 50## ------------------------------------------------- 51 52!!reverse; 53$LF $CR; 54($L | $V | $LV | $LVT) $L; 55($V | $T) ($LV | $V); 56$T ($LVT | $T); 57 58$Extend [^$Control $CR $LF]; 59$SpacingMark [^$Control $CR $LF]; 60[^$Control $CR $LF] $Prepend; 61 62 63## ------------------------------------------------- 64 65!!safe_reverse; 66 67 68## ------------------------------------------------- 69 70!!safe_forward; 71 72