Index: source/data/brkitr/word.txt =================================================================== --- source/data/brkitr/word.txt (revision 264859) +++ source/data/brkitr/word.txt (working copy) @@ -56,15 +56,13 @@ # 5.0 or later as the definition of Complex_Context was corrected to include all # characters requiring dictionary break. -$Control = [\p{Grapheme_Cluster_Break = Control}]; +$Control = [\p{Grapheme_Cluster_Break = Control}]; $HangulSyllable = [\uac00-\ud7a3]; $ComplexContext = [:LineBreak = Complex_Context:]; $KanaKanji = [$Han $Hiragana $Katakana]; -$dictionaryCJK = [$KanaKanji $HangulSyllable]; -$dictionary = [$ComplexContext $dictionaryCJK]; +$dictionary = [$ComplexContext]; -# leave CJK scripts out of ALetterPlus -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; # @@ -166,11 +164,6 @@ $Regional_IndicatorEx $Regional_IndicatorEx; -# special handling for CJK characters: chain for later dictionary segmentation -$HangulSyllable $HangulSyllable {200}; -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found - - ## ------------------------------------------------- !!reverse; @@ -237,10 +230,6 @@ $BackRegional_IndicatorEx $BackRegional_IndicatorEx; -# special handling for CJK characters: chain for later dictionary segmentation -$HangulSyllable $HangulSyllable; -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found - ## ------------------------------------------------- !!safe_reverse; Index: source/data/brkitr/brklocal.mk =================================================================== --- source/data/brkitr/brklocal.mk (revision 264859) +++ source/data/brkitr/brklocal.mk (working copy) @@ -34,13 +34,13 @@ # List of dictionary files (dict). -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt # List of break iterator files (brk). -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt # Ordinary resources -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt Index: source/data/brkitr/root.txt =================================================================== --- source/data/brkitr/root.txt (revision 264859) +++ source/data/brkitr/root.txt (working copy) @@ -16,9 +16,6 @@ word:process(dependency){"word.brk"} } dictionaries{ - Hani:process(dependency){"cjdict.dict"} - Hira:process(dependency){"cjdict.dict"} - Kata:process(dependency){"cjdict.dict"} Khmr:process(dependency){"khmerdict.dict"} Laoo:process(dependency){"laodict.dict"} Thai:process(dependency){"thaidict.dict"} Index: source/data/brkitr/ja.txt =================================================================== --- source/data/brkitr/ja.txt (revision 264859) +++ source/data/brkitr/ja.txt (working copy) @@ -9,6 +9,6 @@ ja{ Version{"1.1"} boundaries{ - line:process(dependency){"line_ja.brk"} + word:process(dependency){"word_ja.brk"} } }