1diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk 2index 91754f1..ccac4d1 100644 3--- a/source/data/brkitr/brklocal.mk 4+++ b/source/data/brkitr/brklocal.mk 5@@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) 6 7 8 # List of compact trie dictionary files (ctd). 9-BRK_CTD_SOURCE = thaidict.txt cjdict.txt 10+BRK_CTD_SOURCE = thaidict.txt 11 12 13 # List of break iterator files (brk). 14-# Chrome change: remove word_ja.txt and line_he.txt 15-BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt 16+# Chrome change: remove line_he.txt 17+BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt 18 19 20 # Ordinary resources 21-# Chrome change: remove ja.txt and he.txt 22+# Chrome change: remove he.txt 23 BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\ 24- fi.txt th.txt 25+ fi.txt ja.txt th.txt 26diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt 27index fb83ac3..5d839bd 100644 28--- a/source/data/brkitr/root.txt 29+++ b/source/data/brkitr/root.txt 30@@ -17,8 +17,5 @@ root{ 31 } 32 dictionaries{ 33 Thai:process(dependency){"thaidict.ctd"} 34- Hani:process(dependency){"cjdict.ctd"} 35- Hira:process(dependency){"cjdict.ctd"} 36- Kata:process(dependency){"cjdict.ctd"} 37 } 38 } 39diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt 40index 0b49377..a0e1ceb 100644 41--- a/source/data/brkitr/word.txt 42+++ b/source/data/brkitr/word.txt 43@@ -60,11 +60,10 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; 44 $HangulSyllable = [\uac00-\ud7a3]; 45 $ComplexContext = [:LineBreak = Complex_Context:]; 46 $KanaKanji = [$Han $Hiragana $Katakana]; 47-$dictionaryCJK = [$KanaKanji $HangulSyllable]; 48-$dictionary = [$ComplexContext $dictionaryCJK]; 49+$dictionary = [:LineBreak = Complex_Context:]; 50 51-# leave CJK scripts out of ALetterPlus 52-$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; 53+$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not 54+ # include the dictionary characters. 55 56 57 # 58@@ -99,8 +98,7 @@ $CR $LF; 59 # begins with a group of Format chars, or with a "word" consisting of a single 60 # char that is not in any of the listed word break categories followed by 61 # format char(s). 62- # format char(s), or is not a CJK dictionary character. 63-[^$CR $LF $Newline $dictionaryCJK]? ($Extend | $Format)+; 64+[^$CR $LF $Newline]? ($Extend | $Format)+; 65 66 $NumericEx {100}; 67 $ALetterEx {200}; 68@@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx {200}; # (13b) 69 $ExtendNumLetEx $NumericEx {100}; # (13b) 70 $ExtendNumLetEx $KatakanaEx {400}; # (13b) 71 72-# special handling for CJK characters: chain for later dictionary segmentation 73-$HangulSyllable $HangulSyllable {200}; 74-$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found 75 76 77 ## ------------------------------------------------- 78@@ -179,7 +174,7 @@ $BackHebrewLetEx = ($Format | $Extend)* $HebrewLet; 79 $LF $CR; 80 81 # rule 4 82-($Format | $Extend)* [^$CR $LF $Newline $dictionaryCJK]?; 83+($Format | $Extend)* [^$CR $LF $Newline]?; 84 85 # rule 5 86 87@@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx; 88 $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx); 89 ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 90 91-# special handling for CJK characters: chain for later dictionary segmentation 92-$HangulSyllable $HangulSyllable; 93-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found 94- 95 ## ------------------------------------------------- 96 97 !!safe_reverse; 98