• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk
2index 91754f1..ccac4d1 100644
3--- a/source/data/brkitr/brklocal.mk
4+++ b/source/data/brkitr/brklocal.mk
5@@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS)
6
7
8 # List of compact trie dictionary files (ctd).
9-BRK_CTD_SOURCE = thaidict.txt cjdict.txt
10+BRK_CTD_SOURCE = thaidict.txt
11
12
13 # List of break iterator files (brk).
14-# Chrome change: remove word_ja.txt and line_he.txt
15-BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
16+# Chrome change: remove line_he.txt
17+BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
18
19
20 # Ordinary resources
21-# Chrome change: remove ja.txt and he.txt
22+# Chrome change: remove he.txt
23 BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\
24- fi.txt   th.txt
25+ fi.txt ja.txt th.txt
26diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt
27index fb83ac3..5d839bd 100644
28--- a/source/data/brkitr/root.txt
29+++ b/source/data/brkitr/root.txt
30@@ -17,8 +17,5 @@ root{
31     }
32     dictionaries{
33         Thai:process(dependency){"thaidict.ctd"}
34-        Hani:process(dependency){"cjdict.ctd"}
35-        Hira:process(dependency){"cjdict.ctd"}
36-        Kata:process(dependency){"cjdict.ctd"}
37     }
38 }
39diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt
40index 0b49377..a0e1ceb 100644
41--- a/source/data/brkitr/word.txt
42+++ b/source/data/brkitr/word.txt
43@@ -60,11 +60,10 @@ $Control        = [\p{Grapheme_Cluster_Break = Control}];
44 $HangulSyllable = [\uac00-\ud7a3];
45 $ComplexContext = [:LineBreak = Complex_Context:];
46 $KanaKanji      = [$Han $Hiragana $Katakana];
47-$dictionaryCJK  = [$KanaKanji $HangulSyllable];
48-$dictionary     = [$ComplexContext $dictionaryCJK];
49+$dictionary   = [:LineBreak = Complex_Context:];
50
51-# leave CJK scripts out of ALetterPlus
52-$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
53+$ALetterPlus  = [$ALetter [$dictionary-$Extend-$Control]];   # Note:  default ALetter does not
54+                                                             #  include the dictionary characters.
55
56
57 #
58@@ -99,8 +98,7 @@ $CR $LF;
59 #          begins with a group of Format chars, or with a "word" consisting of a single
60 #          char that is not in any of the listed word break categories followed by
61 #          format char(s).
62- #          format char(s), or is not a CJK dictionary character.
63-[^$CR $LF $Newline $dictionaryCJK]? ($Extend |  $Format)+;
64+[^$CR $LF $Newline]? ($Extend |  $Format)+;
65
66 $NumericEx {100};
67 $ALetterEx {200};
68@@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx  {200};    #  (13b)
69 $ExtendNumLetEx $NumericEx  {100};    #  (13b)
70 $ExtendNumLetEx $KatakanaEx {400};    #  (13b)
71
72-# special handling for CJK characters: chain for later dictionary segmentation
73-$HangulSyllable $HangulSyllable {200};
74-$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found
75
76
77 ## -------------------------------------------------
78@@ -179,7 +174,7 @@ $BackHebrewLetEx   = ($Format | $Extend)* $HebrewLet;
79 $LF $CR;
80
81 # rule 4
82-($Format | $Extend)*  [^$CR $LF $Newline $dictionaryCJK]?;
83+($Format | $Extend)*  [^$CR $LF $Newline]?;
84
85 # rule 5
86
87@@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx;
88 $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
89 ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
90
91-# special handling for CJK characters: chain for later dictionary segmentation
92-$HangulSyllable $HangulSyllable;
93-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
94-
95 ## -------------------------------------------------
96
97 !!safe_reverse;
98