1\directlua{ 2 function load_patterns(tag, dir) 3 if not tag or type(tag) \string~= "string" then return end 4 5 local new_lang = lang.new() 6 local pattdir 7 if dir 8 then pattdir = dir 9 else pattdir = "../hyph-utf8/tex/generic/hyph-utf8/patterns/txt" 10 end 11 local pattfile = io.open(pattdir .. "/hyph-" .. tag .. ".pat.txt", "r") 12 local pattlist = pattfile:read("*all") 13 lang.patterns(new_lang, pattlist) 14 local hyphfile = io.open(pattdir .. "/hyph-" .. tag .. ".hyp.txt", "r") 15 local hyphlist = hyphfile:read("*all") 16 lang.hyphenation(new_lang, hyphlist) 17 tex.print("\string\\language") tex.print(lang.id(new_lang)) 18 19 local usv = unicode.utf8.byte 20 21 function make_letter(inputcode) 22 tex.print("\string\\catcode") 23 tex.print(usv(inputcode)) 24 tex.print("=11\string\\relax") 25 end 26 27 function set_lccode(inputcode, lccode) 28 tex.print("\string\\lccode") 29 tex.print(usv(inputcode)) 30 tex.print("=") 31 tex.print(usv(lccode)) 32 tex.print("\string\\relax") 33 end 34 35 local charfile = pattdir .. "/hyph-" .. tag .. ".chr.txt" 36 for line in io.lines(charfile) do 37 local chars, n = { }, 0 38 for char in unicode.utf8.gmatch(line, ".") do 39 table.insert(chars, char) 40 end 41 make_letter(chars[1]) ; make_letter(chars[2]) 42 set_lccode(chars[1], chars[1]) ; set_lccode(chars[2], chars[1]) 43 end 44 end 45} 46 47\directlua{ 48 function dump_node(n) 49 texio.write_nl("term and log", "NODE type " .. node.type(n.id)) 50 if node.type(n.id) == "whatsit" then 51 texio.write_nl("term and log", " subtype " .. node.whatsits()[n.subtype]) 52 if node.whatsits()[n.subtype] == "local_par" then 53 texio.write_nl("term and log", " box_left_width " .. n.box_left_width .. ", box_right_width " .. n.box_right_width) 54 end 55 elseif node.type(n.id) == "glyph" then 56 texio.write("term and log", " FONT " .. n.font .. " CHAR " .. n.char) 57 texio.write_nl("term and log", "(" .. unicode.utf8.char(n.char) .. ")") 58 end 59 end 60 61 local ni = node.id 62 local hlist_id, glyph_id, disc_id, kern_id = ni("hlist"), ni("glyph"), ni("disc"), ni("kern") 63 local tw, twnl, chr = texio.write, texio.write_nl, unicode.utf8.char 64 65 function dump_node_list(head) 66 local n = head 67 twnl("") 68 while n do 69 tw("NODE type " .. node.type(n.id)) 70 if n.id == glyph_id then 71 tw(" CHAR ") ; tw(string.format("U+\string\%04X", n.char)) 72 tw(" (") ; tw(chr(n.char)) ; tw(")") 73 end 74 tw("\string\n") 75 n = n.next 76 end 77 end 78 79 hyphen_collector = coroutine.create(function(wd) 80 local list_of_words = { } 81 local w = wd 82 while true do 83 if not w then w = { } end 84 local h = w.hyphenatedword 85 if h == "<reset>" then list_of_words = { } ; h = "" end 86 if h and tostring(h):len() > 1 then table.insert(list_of_words, w) end 87 w = coroutine.yield(list_of_words) 88 end 89 end) 90 91 function insert_explicit_hyphen(head) 92 local n = head 93 local hard_hyphen = node.new(glyph_id) 94 hard_hyphen.char = 45 ; hard_hyphen.font = font.current() 95 96 local prev_is_hyphen = false 97 while n do 98 while n and n.id \string~= glyph_id do 99 n = n.next 100 end 101 102 local t = { } 103 local origword = "" 104 local hyphenatedword = "" 105 while n and (n.id == glyph_id or n.id == disc_id or n.id == kern_id) do 106 if n.id == glyph_id then 107 if n.char == 45 then 108 prev_is_hyphen = true 109 end 110 % tw(chr(n.char)) 111 origword = origword .. chr(n.char) 112 hyphenatedword = hyphenatedword .. chr(n.char) 113 elseif n.id == disc_id then 114 if prev_is_hyphen then 115 prev_is_hyphen = false 116 else 117 % tw("-") 118 hyphenatedword = hyphenatedword.. "-" 119 local h = node.copy(hard_hyphen) 120 node.insert_before(head, n, h) 121 end 122 node.remove(head, n) 123 end 124 n = n.next 125 end 126 local t = { origword = origword, hyphenatedword = hyphenatedword } 127 local s, r = coroutine.resume(hyphen_collector, t) 128 end 129 end 130} 131 132\directlua{ 133 dofile("/Users/arthur/TeX/LuaTeX/share/lua/use\string_open\string_or\string_truetype\string_font.lua") 134} 135 136\directlua{ 137 --[[ catch_discretionary() ]] 138 % callback.register("pre_linebreak_filter", insert_explicit_hyphen) 139 140 function par() 141 --[[ 142 tex.print(unicode.utf8.char(tex.endlinechar)) 143 tex.print(unicode.utf8.char(tex.endlinechar)) 144 --]] 145 tex.print("\string\\par") 146 end 147 148 % par() 149 % Grr... Assertion failed: (0), function ship_out, file ../../../source/texk/web2c/luatexdir/pdf/pdfshipout.w, line 281. 150 % tex.shipout(255) 151} 152 153\def\loadpatterns#1{\directlua{load_patterns("#1") ; languages.tag = "#1"}} 154 155\directlua{ 156 function resethyphens() 157 local t = { hyphenatedword = "<reset>" } 158 coroutine.resume(hyphen_collector, t) 159 end 160} 161 162\directlua{ 163 register_callback = callback.original_register_callback or callback.register 164} 165 166\def\startcollecthyphens{% 167 \directlua{ 168 resethyphens() 169 register_callback("linebreak_filter", insert_explicit_hyphen)} 170 } 171 172\def\stopcollecthyphens{% 173 \par 174 \directlua{register_callback("linebreak_filter", nil)} 175} 176 177\directlua{% 178 function startcollecthyphens() tex.print"\string\\startcollecthyphens" end 179 function stopcollecthyphens() tex.print"\string\\stopcollecthyphens" end 180 181 function collecthyphens() 182 local retvalue, res = coroutine.resume(hyphen_collector) 183 return res 184 % return { [1] = { hyphenatedword = "m-a-tu-la", origword = "ma-tula" } } 185 end 186} 187 188\def\resethyphens{\directlua{resethyphens()}} 189 190\def\collecthyphens#1{% 191 \startcollecthyphens 192 #1 193 \stopcollecthyphens 194 \directlua{ 195 local t = collecthyphens() 196 % texio.write_nl("term and log", "(1) collecthyphens() returned a " .. type(t)) 197 texio.write_nl("term and log", "") 198 for n = 1, table.maxn(t) do 199 texio.write("term and log", tostring(t[n].hyphenatedword), " (", tostring(t[n].origword), ")") 200 texio.write_nl("term and log", "") 201 % texio.write("term and log", t[n], " ") 202 end 203 % texio.write_nl("") 204 } 205} 206 207\def\dumpnodelist#1{% 208 \directlua{register_callback("pre_linebreak_filter", function(head) 209 dump_node_list(head) 210 end)} 211 #1\par 212 \directlua{register_callback("pre_linebreak_filter", nil)} 213} 214 215\loadpatterns{sl} 216 217\font\gentium=/Users/arthur/Library/Fonts/GenR102.TTF\gentium 218 219\def\callLuafromTeXfromLua#1{tex.print"\string\\directlua{#1}"} 220 221\directlua{ 222 function table.nentries(t) 223 local n = 0 224 for _ in pairs(t) do n = n + 1 end 225 return n 226 end 227} 228 229\long\def\comparehyphenation#1{% 230 \def\nonspace{[\string^ \string\t\string\r\string\n]} 231 \directlua{ 232 origs = { } 233 hyphens = { } 234 for o in unicode.utf8.gmatch("#1", "\nonspace*") do 235 if o and o:len() > 0 then 236 table.insert(origs, o) 237 238 w = o:gsub("-", "") 239 resethyphens() 240 startcollecthyphens() 241 tex.print(w) 242 stopcollecthyphens() 243 \callLuafromTeXfromLua{ 244 local t = collecthyphens() 245 local h = t[1].hyphenatedword 246 table.insert(hyphens, h) 247 } 248 end 249 end 250 }% 251% 252 \directlua{ 253 local n = table.maxn(origs) 254 for k = 1, n do 255 texio.write_nl("term and log", tostring(hyphens[k]) .. " (" .. tostring(origs[k]) .. ")") 256 end 257 258 --[[ 259 origs = nil 260 hyphens = nil 261 --]] 262 } 263 \let\nonspace\undefined 264} 265 266\endinput 267