• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1Index: source/data/brkitr/word.txt
2===================================================================
3--- source/data/brkitr/word.txt	(revision 264859)
4+++ source/data/brkitr/word.txt	(working copy)
5@@ -56,15 +56,13 @@
6 #   5.0 or later as the definition of Complex_Context was corrected to include all
7 #   characters requiring dictionary break.
8
9-$Control        = [\p{Grapheme_Cluster_Break = Control}];
10+$Control        = [\p{Grapheme_Cluster_Break = Control}];
11 $HangulSyllable = [\uac00-\ud7a3];
12 $ComplexContext = [:LineBreak = Complex_Context:];
13 $KanaKanji      = [$Han $Hiragana $Katakana];
14-$dictionaryCJK  = [$KanaKanji $HangulSyllable];
15-$dictionary     = [$ComplexContext $dictionaryCJK];
16+$dictionary     = [$ComplexContext];
17
18-# leave CJK scripts out of ALetterPlus
19-$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
20+$ALetterPlus  = [$ALetter [$ComplexContext-$Extend-$Control]];
21
22
23 #
24@@ -166,11 +164,6 @@
25
26 $Regional_IndicatorEx $Regional_IndicatorEx;
27
28-# special handling for CJK characters: chain for later dictionary segmentation
29-$HangulSyllable $HangulSyllable {200};
30-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
31-
32-
33 ## -------------------------------------------------
34
35 !!reverse;
36@@ -237,10 +230,6 @@
37
38 $BackRegional_IndicatorEx $BackRegional_IndicatorEx;
39
40-# special handling for CJK characters: chain for later dictionary segmentation
41-$HangulSyllable $HangulSyllable;
42-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
43-
44 ## -------------------------------------------------
45
46 !!safe_reverse;
47Index: source/data/brkitr/brklocal.mk
48===================================================================
49--- source/data/brkitr/brklocal.mk	(revision 264859)
50+++ source/data/brkitr/brklocal.mk	(working copy)
51@@ -34,13 +34,13 @@
52
53
54 # List of dictionary files (dict).
55-BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt
56+BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt
57
58
59 # List of break iterator files (brk).
60-BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt
61+BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt
62
63
64 # Ordinary resources
65-BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt
66+BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt
67
68Index: source/data/brkitr/root.txt
69===================================================================
70--- source/data/brkitr/root.txt	(revision 264859)
71+++ source/data/brkitr/root.txt	(working copy)
72@@ -16,9 +16,6 @@
73         word:process(dependency){"word.brk"}
74     }
75     dictionaries{
76-        Hani:process(dependency){"cjdict.dict"}
77-        Hira:process(dependency){"cjdict.dict"}
78-        Kata:process(dependency){"cjdict.dict"}
79         Khmr:process(dependency){"khmerdict.dict"}
80         Laoo:process(dependency){"laodict.dict"}
81         Thai:process(dependency){"thaidict.dict"}
82Index: source/data/brkitr/ja.txt
83===================================================================
84--- source/data/brkitr/ja.txt	(revision 264859)
85+++ source/data/brkitr/ja.txt	(working copy)
86@@ -9,6 +9,6 @@
87 ja{
88     Version{"1.1"}
89     boundaries{
90-        line:process(dependency){"line_ja.brk"}
91+        word:process(dependency){"word_ja.brk"}
92     }
93 }
94