• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
3<!--
4Copyright © 1991-2024 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (https://unicode.org/reports/tr35/)
6For terms of use and license, see https://www.unicode.org/terms_of_use.html
7-->
8<ldml>
9	<identity>
10		<version number="$Revision: 13690 $"/>
11		<language type="root"/>
12	</identity>
13	<segmentations>
14		<segmentation type="GraphemeClusterBreak">
15			<variables>
16				<!-- VARIABLES -->
17				<variable id="$CR">\p{Grapheme_Cluster_Break=CR}</variable>
18				<variable id="$LF">\p{Grapheme_Cluster_Break=LF}</variable>
19				<variable id="$Control">\p{Grapheme_Cluster_Break=Control}</variable>
20				<variable id="$Extend">\p{Grapheme_Cluster_Break=Extend}</variable>
21				<variable id="$ZWJ">\p{Grapheme_Cluster_Break=ZWJ}</variable>
22				<variable id="$RI">\p{Grapheme_Cluster_Break=Regional_Indicator}</variable>
23				<variable id="$Prepend">\p{Grapheme_Cluster_Break=Prepend}</variable>
24				<variable id="$SpacingMark">\p{Grapheme_Cluster_Break=SpacingMark}</variable>
25				<variable id="$L">\p{Grapheme_Cluster_Break=L}</variable>
26				<variable id="$V">\p{Grapheme_Cluster_Break=V}</variable>
27				<variable id="$T">\p{Grapheme_Cluster_Break=T}</variable>
28				<variable id="$LV">\p{Grapheme_Cluster_Break=LV}</variable>
29				<variable id="$LVT">\p{Grapheme_Cluster_Break=LVT}</variable>
30				<!-- Note: The following may overlap with the above -->
31				<!-- Note: ConjunctLinkingScripts is not used anymore, instead that list exists in the derivation of Indic_Conjunct_Break. -->
32				<!-- It is kept here so that the diff of the generated test cases compared to the Unicode 15.1 β is minimal. -->
33				<!-- TODO(egg): Consider removing in Unicode 16.0. -->
34				<variable id="$ConjunctLinkingScripts">[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}]</variable>
35				<variable id="$ConjunctLinker">\p{Indic_Conjunct_Break=Linker}</variable>
36				<variable id="$LinkingConsonant">\p{Indic_Conjunct_Break=Consonant}</variable>
37				<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
38				<variable id="$ExtCccZwj">[\p{Indic_Conjunct_Break=Linker}\p{Indic_Conjunct_Break=Extend}]</variable>
39			</variables>
40			<segmentRules>
41				<!-- RULES -->
42				<!-- Break at the start and end of text, unless the text is empty. -->
43				<!-- Do not break between a CR and LF. Otherwise, break before and after controls. -->
44				<rule id="3"> $CR × $LF </rule>
45				<rule id="4"> ( $Control | $CR | $LF ) ÷ </rule>
46				<rule id="5"> ÷ ( $Control | $CR | $LF ) </rule>
47				<!-- Do not break Hangul syllable sequences. -->
48				<rule id="6"> $L × ( $L | $V | $LV | $LVT ) </rule>
49				<rule id="7"> ( $LV | $V ) × ( $V | $T ) </rule>
50				<rule id="8"> ( $LVT | $T) × $T </rule>
51				<rule id="9"> × ($Extend | $ZWJ) </rule>
52				<!-- Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters. -->
53				<rule id="9.1"> × $SpacingMark </rule>
54				<rule id="9.2"> $Prepend × </rule>
55				<rule id="9.3"> $LinkingConsonant $ExtCccZwj* $ConjunctLinker $ExtCccZwj* × $LinkingConsonant </rule>
56				<rule id="11"> $ExtPict $Extend* $ZWJ × $ExtPict </rule>
57				<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
58				<rule id="12"> ^ ($RI $RI)* $RI × $RI </rule>
59				<rule id="13"> [^$RI] ($RI $RI)* $RI × $RI </rule>
60				<!-- Otherwise, break everywhere. -->
61			</segmentRules>
62		</segmentation>
63		<segmentation type="LineBreak">
64			<variables>
65				<!-- VARIABLES -->
66				<variable id="$AI">\p{Line_Break=Ambiguous}</variable>
67				<variable id="$AK">\p{Line_Break=Aksara}</variable>
68				<variable id="$AL">\p{Line_Break=Alphabetic}</variable>
69				<variable id="$AP">\p{Line_Break=Aksara_Prebase}</variable>
70				<variable id="$AS">\p{Line_Break=Aksara_Start}</variable>
71				<variable id="$B2">\p{Line_Break=Break_Both}</variable>
72				<variable id="$BA">\p{Line_Break=Break_After}</variable>
73				<variable id="$BB">\p{Line_Break=Break_Before}</variable>
74				<variable id="$BK">\p{Line_Break=Mandatory_Break}</variable>
75				<variable id="$CB">\p{Line_Break=Contingent_Break}</variable>
76				<variable id="$CL">\p{Line_Break=Close_Punctuation}</variable>
77				<variable id="$CP">\p{Line_Break=CP}</variable>
78				<variable id="$CM1">\p{Line_Break=Combining_Mark}</variable>
79				<variable id="$CR">\p{Line_Break=Carriage_Return}</variable>
80				<variable id="$EX">\p{Line_Break=Exclamation}</variable>
81				<variable id="$GL">\p{Line_Break=Glue}</variable>
82				<variable id="$H2">\p{Line_Break=H2}</variable>
83				<variable id="$H3">\p{Line_Break=H3}</variable>
84				<variable id="$HL">\p{Line_Break=HL}</variable>
85				<variable id="$HY">\p{Line_Break=Hyphen}</variable>
86				<variable id="$ID">\p{Line_Break=Ideographic}</variable>
87				<variable id="$IN">\p{Line_Break=Inseparable}</variable>
88				<variable id="$IS">\p{Line_Break=Infix_Numeric}</variable>
89				<variable id="$JL">\p{Line_Break=JL}</variable>
90				<variable id="$JT">\p{Line_Break=JT}</variable>
91				<variable id="$JV">\p{Line_Break=JV}</variable>
92				<variable id="$LF">\p{Line_Break=Line_Feed}</variable>
93				<variable id="$NL">\p{Line_Break=Next_Line}</variable>
94				<variable id="$NS">\p{Line_Break=Nonstarter}</variable>
95				<variable id="$NU">\p{Line_Break=Numeric}</variable>
96				<variable id="$OP">\p{Line_Break=Open_Punctuation}</variable>
97				<variable id="$PO">\p{Line_Break=Postfix_Numeric}</variable>
98				<variable id="$PR">\p{Line_Break=Prefix_Numeric}</variable>
99				<variable id="$QU">\p{Line_Break=Quotation}</variable>
100				<variable id="$SA">\p{Line_Break=Complex_Context}</variable>
101				<variable id="$SG">\p{Line_Break=Surrogate}</variable>
102				<variable id="$SP">\p{Line_Break=Space}</variable>
103				<variable id="$SY">\p{Line_Break=Break_Symbols}</variable>
104				<variable id="$VF">\p{Line_Break=Virama_Final}</variable>
105				<variable id="$VI">\p{Line_Break=Virama}</variable>
106				<variable id="$WJ">\p{Line_Break=Word_Joiner}</variable>
107				<variable id="$XX">\p{Line_Break=Unknown}</variable>
108				<variable id="$ZW">\p{Line_Break=ZWSpace}</variable>
109				<variable id="$CJ">\p{Line_Break=Conditional_Japanese_Starter}</variable>
110				<variable id="$RI">\p{Line_Break=Regional_Indicator}</variable>
111				<variable id="$EB">\p{Line_Break=E_Base}</variable>
112				<variable id="$EM">\p{Line_Break=E_Modifier}</variable>
113				<variable id="$ZWJ_O">\p{Line_Break=ZWJ}</variable>
114				<variable id="$ZWJ">\p{Line_Break=ZWJ}</variable>
115				<variable id="$QU_Pi">[$QU &amp; \p{gc=Pi}]</variable>
116				<variable id="$QU_Pf">[$QU &amp; \p{gc=Pf}]</variable>
117				<variable id="$QUmPi">[$QU - \p{gc=Pi}]</variable>
118				<variable id="$QUmPf">[$QU - \p{gc=Pf}]</variable>
119				<variable id="$NotEastAsian">[^\p{ea=F}\p{ea=W}\p{ea=H}]</variable>
120				<variable id="$NonEastAsianBA">[$BA &amp; $NotEastAsian]</variable>
121				<variable id="$DottedCircle">◌</variable>
122				<variable id="$Hyphen">[\u2010]</variable>
123				<variable id="$CP30">[$CP-[\p{ea=F}\p{ea=W}\p{ea=H}]]</variable>
124				<variable id="$OP30">[$OP-[\p{ea=F}\p{ea=W}\p{ea=H}]]</variable>
125				<variable id="$ExtPictUnassigned">[\p{Extended_Pictographic}&amp;\p{gc=Cn}]</variable>
126				<!-- Some rules refer to the start and end of text.  We could just use a literal ^ for sot, but naming -->
127				<!-- it as in the spec makes it easier to compare.  The parser will eat (and choke on) $, so we play a -->
128				<!-- stupid trick instead. -->
129				<variable id="$sot">^</variable>
130				<variable id="$eot">(?!.)</variable>
131				<!-- SPECIAL EXTENSIONS -->
132				<variable id="$CM">[$CM1 $ZWJ]</variable>
133				<!-- LB 1  Assign a line breaking class to each code point of the input. -->
134				<!-- Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. -->
135				<!-- NOTE: CB is ok to fall through, but must handle others here. -->
136				<variable id="$AL">[$AI $AL $SG $XX $SA]</variable>
137				<variable id="$NS">[$NS $CJ]</variable>
138				<!-- WARNING: Fixes for Rule 9 -->
139				<!-- Treat X (CM|ZWJ* as if it were X. -->
140				<!-- Where X is any line break class except SP, BK, CR, LF, NL or ZW. -->
141				<variable id="$X">$CM*</variable>
142				<!-- MACROS -->
143				<variable id="$Spec1_">[$SP $BK $CR $LF $NL $ZW]</variable>
144				<variable id="$Spec2_">[^ $SP $BK $CR $LF $NL $ZW]</variable>
145				<variable id="$Spec3a_">[^ $SP $BA $HY $CM]</variable>
146				<variable id="$Spec3b_">[^ $BA $HY $CM]</variable>
147				<variable id="$Spec4_">[^ $NU $CM]</variable>
148				<variable id="$AI">($AI $X)</variable>
149				<variable id="$AK">($AK $X)</variable>
150				<variable id="$AL">($AL $X)</variable>
151				<variable id="$AP">($AP $X)</variable>
152				<variable id="$AS">($AS $X)</variable>
153				<variable id="$B2">($B2 $X)</variable>
154				<variable id="$BA">($BA $X)</variable>
155				<variable id="$BB">($BB $X)</variable>
156				<variable id="$CB">($CB $X)</variable>
157				<variable id="$CL">($CL $X)</variable>
158				<variable id="$CP">($CP $X)</variable>
159				<variable id="$CM">($CM $X)</variable>
160				<variable id="$EX">($EX $X)</variable>
161				<variable id="$GL">($GL $X)</variable>
162				<variable id="$H2">($H2 $X)</variable>
163				<variable id="$H3">($H3 $X)</variable>
164				<variable id="$HL">($HL $X)</variable>
165				<variable id="$HY">($HY $X)</variable>
166				<variable id="$ID">($ID $X)</variable>
167				<variable id="$IN">($IN $X)</variable>
168				<variable id="$IS">($IS $X)</variable>
169				<variable id="$JL">($JL $X)</variable>
170				<variable id="$JT">($JT $X)</variable>
171				<variable id="$JV">($JV $X)</variable>
172				<variable id="$NS">($NS $X)</variable>
173				<variable id="$NU">($NU $X)</variable>
174				<variable id="$OP">($OP $X)</variable>
175				<variable id="$PO">($PO $X)</variable>
176				<variable id="$PR">($PR $X)</variable>
177				<variable id="$QU">($QU $X)</variable>
178				<variable id="$SA">($SA $X)</variable>
179				<variable id="$SG">($SG $X)</variable>
180				<variable id="$SY">($SY $X)</variable>
181				<variable id="$VF">($VF $X)</variable>
182				<variable id="$VI">($VI $X)</variable>
183				<variable id="$WJ">($WJ $X)</variable>
184				<variable id="$XX">($XX $X)</variable>
185				<variable id="$RI">($RI $X)</variable>
186				<variable id="$EB">($EB $X)</variable>
187				<variable id="$EM">($EM $X)</variable>
188				<variable id="$ZWJ">($ZWJ $X)</variable>
189				<variable id="$QU_Pi">($QU_Pi $X)</variable>
190				<variable id="$QU_Pf">($QU_Pf $X)</variable>
191				<variable id="$QUmPi">($QUmPi $X)</variable>
192				<variable id="$QUmPf">($QUmPf $X)</variable>
193				<variable id="$NotEastAsian">( $NotEastAsian | [$NotEastAsian - $Spec1_] $X)</variable>
194				<variable id="$NonEastAsianBA">(NonEastAsianBA $X)</variable>
195				<variable id="$DottedCircle">($DottedCircle $X)</variable>
196				<variable id="$Hyphen">($Hyphen $X)</variable>
197				<variable id="$CP30">($CP30 $X)</variable>
198				<variable id="$OP30">($OP30 $X)</variable>
199				<!-- OUT OF ORDER ON PURPOSE -->
200				<!-- LB 10  Treat any remaining combining mark as AL and non-$EastAsian. -->
201				<variable id="$AL">($AL | ^ $CM | (?&lt;=$Spec1_) $CM)</variable>
202				<variable id="$NotEastAsian">( $NotEastAsian | ^ $CM | (?&lt;=$Spec1_) $CM )</variable>
203			</variables>
204			<segmentRules>
205				<!-- RULES -->
206				<!-- LB 4  Always break after hard line breaks (but never between CR and LF). -->
207				<rule id="4"> $BK ÷ </rule>
208				<!-- LB 5  Treat CR followed by LF, as well as CR, LF and NL as hard line breaks. -->
209				<rule id="5.01"> $CR × $LF </rule>
210				<rule id="5.02"> $CR ÷ </rule>
211				<rule id="5.03"> $LF ÷ </rule>
212				<rule id="5.04"> $NL ÷ </rule>
213				<!-- LB 6  Do not break before hard line breaks. -->
214				<rule id="6"> × ( $BK | $CR | $LF | $NL ) </rule>
215				<!-- LB 7  Do not break before spaces or zero-width space. -->
216				<rule id="7.01"> × $SP </rule>
217				<rule id="7.02"> × $ZW </rule>
218				<!-- LB 8  Break before any character following a zero-width space, even if one or more spaces intervene. -->
219				<rule id="8"> $ZW $SP* ÷ </rule>
220				<!-- LB 8a  Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) -->
221				<rule id="8.1"> $ZWJ_O × </rule>
222				<!-- LB 9  Do not break a combining character sequence; treat it as if it has the LB class of the base character -->
223				<!-- in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -->
224				<rule id="9"> $Spec2_ × $CM </rule>
225				<rule id="11.01"> × $WJ </rule>
226				<rule id="11.02"> $WJ × </rule>
227				<!-- LB 12  Do not break after NBSP and related characters. -->
228				<rule id="12"> $GL × </rule>
229				<rule id="12.1"> $Spec3a_ × $GL </rule>
230				<rule id="12.2"> $Spec3b_ $CM+ × $GL </rule>
231				<rule id="12.3"> ^ $CM+ × $GL </rule>
232				<!-- LB 13  Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces. -->
233				<rule id="13.01"> × $EX </rule>
234				<rule id="13.02"> × $CL </rule>
235				<rule id="13.03"> × $CP </rule>
236				<rule id="13.04"> × $SY </rule>
237				<!-- LB 14  Do not break after \u2018[\u2019, even after spaces. -->
238				<rule id="14"> $OP $SP* × </rule>
239				<!-- LB 15a Do not break after an unresolved initial punctuation that lies at the start of the line, -->
240				<!-- after a space, after opening punctuation, or after an unresolved quotation mark, even after -->
241				<!-- spaces. -->
242				<rule id="15.11"> ( $sot | $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW ) $QU_Pi $SP* × </rule>
243				<!-- LB 15b Do not break before an unresolved final punctuation that lies at the end of the line, before -->
244				<!-- a space, before a prohibited break, or before an unresolved quotation mark, even before spaces. -->
245				<rule id="15.21"> × $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot ) </rule>
246				<!-- LB 15c Break before numbers starting with a decimal mark. -->
247				<rule id="15.3"> $SP ÷ $IS $NU </rule>
248				<!-- LB 15d Otherwise, do not break before commas or full stops. -->
249				<rule id="15.4"> × $IS </rule>
250				<!-- LB 16  Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. -->
251				<rule id="16"> ($CL | $CP) $SP* × $NS </rule>
252				<!-- LB 17  Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces. -->
253				<rule id="17"> $B2 $SP* × $B2 </rule>
254				<!-- LB 18  Break after spaces. -->
255				<rule id="18"> $SP ÷ </rule>
256				<!-- LB 19  Do not break before or after \u2018\"\u2019. -->
257				<rule id="19.01"> × $QUmPi </rule>
258				<rule id="19.02"> $QUmPf × </rule>
259				<!-- LB 19a Unless surrounded by East Asian Characters, do not break either side of any unresolved quotation marks. -->
260				<rule id="19.1"> $NotEastAsian × $QU </rule>
261				<rule id="19.11"> × $QU ( $NotEastAsian | $eot ) </rule>
262				<rule id="19.12"> $QU × $NotEastAsian </rule>
263				<rule id="19.13"> ( $sot | $NotEastAsian ) $QU × </rule>
264				<!-- LB 20  Break before and after unresolved CB. -->
265				<rule id="20.01"> ÷ $CB </rule>
266				<rule id="20.02"> $CB ÷ </rule>
267				<!-- LB 20a Do not break after a hyphen that follows break opportunity, a space, or the start of text. -->
268				<rule id="20.1"> ( $sot | $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL ) ( $HY | $Hyphen ) × $AL </rule>
269				<!-- LB 21  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. -->
270				<rule id="21.01"> × $BA </rule>
271				<rule id="21.02"> × $HY </rule>
272				<rule id="21.03"> × $NS </rule>
273				<rule id="21.04"> $BB × </rule>
274				<!-- LB 21a Do not break after the hyphen in Hebrew-hyphen-non-Hebrew. -->
275				<rule id="21.1"> $HL ($HY | $NonEastAsianBA) × [^$HL] </rule>
276				<!-- LB 21b Don’t break between Solidus and Hebrew letters. -->
277				<rule id="21.2"> $SY × $HL </rule>
278				<!-- LB 22  Do not break before ellipses. -->
279				<rule id="22"> × $IN </rule>
280				<!-- LB 23  Do not break between digits and letters. -->
281				<rule id="23.02"> ($AL | $HL) × $NU </rule>
282				<rule id="23.03"> $NU × ($AL | $HL) </rule>
283				<!-- LB 24  Do not break between prefix and letters or ideographs. -->
284				<rule id="23.12"> $PR × ($ID | $EB | $EM) </rule>
285				<rule id="23.13"> ($ID | $EB | $EM) × $PO </rule>
286				<!-- LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix. -->
287				<rule id="24.02"> ($PR | $PO) × ($AL | $HL) </rule>
288				<rule id="24.03"> ($AL | $HL) × ($PR | $PO) </rule>
289				<!-- LB 25 Do not break numbers. -->
290				<rule id="25.01"> $NU ( $SY | $IS )* $CL × $PO </rule>
291				<rule id="25.02"> $NU ( $SY | $IS )* $CP × $PO </rule>
292				<rule id="25.03"> $NU ( $SY | $IS )* $CL × $PR </rule>
293				<rule id="25.04"> $NU ( $SY | $IS )* $CP × $PR </rule>
294				<rule id="25.05"> $NU ( $SY | $IS )* × $PO </rule>
295				<rule id="25.06"> $NU ( $SY | $IS )* × $PR </rule>
296				<rule id="25.07"> $PO × $OP $NU </rule>
297				<rule id="25.08"> $PO × $OP $IS $NU </rule>
298				<rule id="25.09"> $PO × $NU </rule>
299				<rule id="25.1"> $PR × $OP $NU </rule>
300				<rule id="25.11"> $PR × $OP $IS $NU </rule>
301				<rule id="25.12"> $PR × $NU </rule>
302				<rule id="25.13"> $HY × $NU </rule>
303				<rule id="25.14"> $IS × $NU </rule>
304				<rule id="25.15"> $NU ( $SY | $IS )* × $NU </rule>
305				<!-- LB 26 Do not break a Korean syllable. -->
306				<rule id="26.01"> $JL × $JL | $JV | $H2 | $H3 </rule>
307				<rule id="26.02"> $JV | $H2 × $JV | $JT </rule>
308				<rule id="26.03"> $JT | $H3 × $JT </rule>
309				<!-- LB 27 Treat a Korean Syllable Block the same as ID. -->
310				<rule id="27.01"> $JL | $JV | $JT | $H2 | $H3 × $PO </rule>
311				<rule id="27.02"> $PR × $JL | $JV | $JT | $H2 | $H3 </rule>
312				<!-- LB 28  Do not break between alphabetics (\"at\"). -->
313				<rule id="28"> ($AL | $HL) × ($AL | $HL) </rule>
314				<!-- LB28a Do not break inside the orthographic syllables of Brahmic scripts. -->
315				<rule id="28.11"> $AP × ($AK | $DottedCircle | $AS) </rule>
316				<rule id="28.12"> ($AK | $DottedCircle | $AS) × ($VF | $VI) </rule>
317				<rule id="28.13"> ($AK | $DottedCircle | $AS) $VI × ($AK | $DottedCircle) </rule>
318				<rule id="28.14"> ($AK | $DottedCircle | $AS) × ($AK | $DottedCircle | $AS) $VF </rule>
319				<!-- LB 29  Do not break between numeric punctuation and alphabetics (\"e.g.\"). -->
320				<rule id="29"> $IS × ($AL | $HL) </rule>
321				<!-- LB 30  Do not break between letters, numbers or ordinary symbols and opening or closing punctuation. -->
322				<rule id="30.01"> ($AL | $HL | $NU) × $OP30 </rule>
323				<rule id="30.02"> $CP30 × ($AL | $HL | $NU) </rule>
324				<!-- LB 30a  Break between two Regional Indicators if and only if there is an even number of them before the point being considered. -->
325				<rule id="30.11"> $sot ($RI $RI)* $RI × $RI </rule>
326				<rule id="30.12"> [^$RI] ($RI $RI)* $RI × $RI </rule>
327				<rule id="30.13"> $RI ÷ $RI </rule>
328				<!-- LB 30b Do not break between an emoji base (or potential emoji) and an emoji modifier. -->
329				<rule id="30.21"> $EB × $EM </rule>
330				<rule id="30.22"> $ExtPictUnassigned × $EM </rule>
331			</segmentRules>
332		</segmentation>
333		<segmentation type="SentenceBreak">
334			<variables>
335				<!-- VARIABLES -->
336				<variable id="$CR">\p{Sentence_Break=CR}</variable>
337				<variable id="$LF">\p{Sentence_Break=LF}</variable>
338				<variable id="$Extend">\p{Sentence_Break=Extend}</variable>
339				<variable id="$Format">\p{Sentence_Break=Format}</variable>
340				<variable id="$Sep">\p{Sentence_Break=Sep}</variable>
341				<variable id="$Sp">\p{Sentence_Break=Sp}</variable>
342				<variable id="$Lower">\p{Sentence_Break=Lower}</variable>
343				<variable id="$Upper">\p{Sentence_Break=Upper}</variable>
344				<variable id="$OLetter">\p{Sentence_Break=OLetter}</variable>
345				<variable id="$Numeric">\p{Sentence_Break=Numeric}</variable>
346				<variable id="$ATerm">\p{Sentence_Break=ATerm}</variable>
347				<variable id="$STerm">\p{Sentence_Break=STerm}</variable>
348				<variable id="$Close">\p{Sentence_Break=Close}</variable>
349				<variable id="$SContinue">\p{Sentence_Break=SContinue}</variable>
350				<variable id="$Any">.</variable>
351				<!-- SPECIAL EXTENSIONS -->
352				<!-- WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend -->
353				<variable id="$FE">[$Format $Extend]</variable>
354				<variable id="$NotPreLower_">[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]</variable>
355				<variable id="$Sp">($Sp $FE*)</variable>
356				<variable id="$Lower">($Lower $FE*)</variable>
357				<variable id="$Upper">($Upper $FE*)</variable>
358				<variable id="$OLetter">($OLetter $FE*)</variable>
359				<variable id="$Numeric">($Numeric $FE*)</variable>
360				<variable id="$ATerm">($ATerm $FE*)</variable>
361				<variable id="$STerm">($STerm $FE*)</variable>
362				<variable id="$Close">($Close $FE*)</variable>
363				<variable id="$SContinue">($SContinue $FE*)</variable>
364				<!-- MACROS -->
365				<variable id="$ParaSep">($Sep | $CR | $LF)</variable>
366				<variable id="$SATerm">($STerm | $ATerm)</variable>
367			</variables>
368			<segmentRules>
369				<!-- RULES -->
370				<!-- Break at the start and end of text, unless the text is empty. -->
371				<!-- Do not break within CRLF. -->
372				<rule id="3"> $CR × $LF </rule>
373				<!-- Break after paragraph separators. -->
374				<rule id="4"> $ParaSep ÷ </rule>
375				<!-- Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend) -->
376				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
377				<!-- AND add format and extend in all variables definitions that appear after this point! -->
378				<rule id="5"> × [$Format $Extend] </rule>
379				<!-- Do not break after full stop in certain contexts. [See note below.] -->
380				<!-- Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, -->
381				<!-- is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. -->
382				<!-- For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence. -->
383				<rule id="6"> $ATerm × $Numeric </rule>
384				<rule id="7"> ($Upper | $Lower) $ATerm × $Upper </rule>
385				<rule id="8"> $ATerm $Close* $Sp* × $NotPreLower_* $Lower </rule>
386				<rule id="8.1"> $SATerm $Close* $Sp* × ($SContinue | $SATerm) </rule>
387				<!-- Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator. -->
388				<rule id="9"> $SATerm $Close* × ( $Close | $Sp | $ParaSep ) </rule>
389				<!-- Note the fix to $Sp*, $Sep? -->
390				<rule id="10"> $SATerm $Close* $Sp* × ( $Sp | $ParaSep ) </rule>
391				<rule id="11"> $SATerm $Close* $Sp* $ParaSep? ÷ </rule>
392				<!-- Otherwise, do not break -->
393				<rule id="998"> × $Any </rule>
394			</segmentRules>
395		</segmentation>
396		<segmentation type="WordBreak">
397			<variables>
398				<!-- VARIABLES -->
399				<variable id="$CR">\p{Word_Break=CR}</variable>
400				<variable id="$LF">\p{Word_Break=LF}</variable>
401				<variable id="$Newline">\p{Word_Break=Newline}</variable>
402				<variable id="$Extend">\p{Word_Break=Extend}</variable>
403				<!-- Now normal variables -->
404				<variable id="$Format">[\p{Word_Break=Format}]</variable>
405				<variable id="$Katakana">\p{Word_Break=Katakana}</variable>
406				<variable id="$ALetter">\p{Word_Break=ALetter}</variable>
407				<variable id="$MidLetter">\p{Word_Break=MidLetter}</variable>
408				<variable id="$MidNum">\p{Word_Break=MidNum}</variable>
409				<variable id="$MidNumLet">\p{Word_Break=MidNumLet}</variable>
410				<variable id="$Numeric">\p{Word_Break=Numeric}</variable>
411				<variable id="$ExtendNumLet">\p{Word_Break=ExtendNumLet}</variable>
412				<variable id="$RI">\p{Word_Break=Regional_Indicator}</variable>
413				<variable id="$Hebrew_Letter">\p{Word_Break=Hebrew_Letter}</variable>
414				<variable id="$Double_Quote">\p{Word_Break=Double_Quote}</variable>
415				<variable id="$Single_Quote">\p{Word_Break=Single_Quote}</variable>
416				<variable id="$ZWJ">\p{Word_Break=ZWJ}</variable>
417				<!-- Note: The following may overlap with the above -->
418				<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
419				<variable id="$WSegSpace">\p{Word_Break=WSegSpace}</variable>
420				<!-- MACROS -->
421				<variable id="$AHLetter">($ALetter | $Hebrew_Letter)</variable>
422				<variable id="$MidNumLetQ">($MidNumLet | $Single_Quote)</variable>
423				<!-- SPECIAL EXTENSIONS -->
424				<!-- Add format and extend to everything -->
425				<variable id="$FE">[$Format $Extend $ZWJ]</variable>
426				<variable id="$NotBreak_">[^ $Newline $CR $LF ]</variable>
427				<variable id="$Katakana">($Katakana $FE*)</variable>
428				<variable id="$ALetter">($ALetter $FE*)</variable>
429				<variable id="$MidLetter">($MidLetter $FE*)</variable>
430				<variable id="$MidNum">($MidNum $FE*)</variable>
431				<variable id="$MidNumLet">($MidNumLet $FE*)</variable>
432				<variable id="$Numeric">($Numeric $FE*)</variable>
433				<variable id="$ExtendNumLet">($ExtendNumLet $FE*)</variable>
434				<variable id="$RI">($RI $FE*)</variable>
435				<variable id="$Hebrew_Letter">($Hebrew_Letter $FE*)</variable>
436				<variable id="$Double_Quote">($Double_Quote $FE*)</variable>
437				<variable id="$Single_Quote">($Single_Quote $FE*)</variable>
438				<variable id="$AHLetter">($AHLetter $FE*)</variable>
439				<variable id="$MidNumLetQ">($MidNumLetQ $FE*)</variable>
440			</variables>
441			<segmentRules>
442				<!-- RULES -->
443				<!-- Break at the start and end of text, unless the text is empty. -->
444				<!-- Do not break within CRLF. -->
445				<rule id="3"> $CR × $LF </rule>
446				<!-- Otherwise break before and after Newlines (including CR and LF) -->
447				<rule id="3.1"> ($Newline | $CR | $LF) ÷ </rule>
448				<rule id="3.2"> ÷ ($Newline | $CR | $LF) </rule>
449				<!-- Do not break within emoji zwj sequences. -->
450				<rule id="3.3"> $ZWJ × $ExtPict </rule>
451				<rule id="3.4"> $WSegSpace × $WSegSpace </rule>
452				<!-- Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend) -->
453				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
454				<!-- AND add format and extend in all variables definitions that appear after this point! -->
455				<rule id="4"> $NotBreak_ × [$Format $Extend $ZWJ] </rule>
456				<!-- VANILLA RULES -->
457				<!-- Do not break between most letters. -->
458				<rule id="5"> $AHLetter × $AHLetter </rule>
459				<!-- Do not break letters across certain punctuation. -->
460				<rule id="6"> $AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter </rule>
461				<rule id="7"> $AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter </rule>
462				<rule id="7.1"> $Hebrew_Letter × $Single_Quote </rule>
463				<rule id="7.2"> $Hebrew_Letter × $Double_Quote $Hebrew_Letter </rule>
464				<rule id="7.3"> $Hebrew_Letter $Double_Quote × $Hebrew_Letter </rule>
465				<!-- Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”). -->
466				<rule id="8"> $Numeric × $Numeric </rule>
467				<rule id="9"> $AHLetter × $Numeric </rule>
468				<rule id="10"> $Numeric × $AHLetter </rule>
469				<!-- Do not break within sequences, such as “3.2” or “3,456.789”. -->
470				<rule id="11"> $Numeric ($MidNum | $MidNumLetQ) × $Numeric </rule>
471				<rule id="12"> $Numeric × ($MidNum | $MidNumLetQ) $Numeric </rule>
472				<!-- Do not break between Katakana. -->
473				<rule id="13"> $Katakana × $Katakana </rule>
474				<!-- Do not break from extenders. -->
475				<rule id="13.1"> ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet </rule>
476				<rule id="13.2"> $ExtendNumLet × ($AHLetter | $Numeric | $Katakana) </rule>
477				<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
478				<rule id="15"> ^ ($RI $RI)* $RI × $RI </rule>
479				<rule id="16"> [^$RI] ($RI $RI)* $RI × $RI </rule>
480				<!-- Otherwise, break everywhere (including around ideographs). -->
481			</segmentRules>
482		</segmentation>
483	</segmentations>
484</ldml>
485