• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1\directlua{
2  function load_patterns(tag, dir)
3    if not tag or type(tag) \string~= "string" then return end
4
5    local new_lang = lang.new()
6    local pattdir
7    if dir
8    then pattdir = dir
9    else pattdir = "../hyph-utf8/tex/generic/hyph-utf8/patterns/txt"
10    end
11    local pattfile = io.open(pattdir .. "/hyph-" .. tag .. ".pat.txt", "r")
12    local pattlist = pattfile:read("*all")
13    lang.patterns(new_lang, pattlist)
14    local hyphfile = io.open(pattdir .. "/hyph-" .. tag .. ".hyp.txt", "r")
15    local hyphlist = hyphfile:read("*all")
16    lang.hyphenation(new_lang, hyphlist)
17    tex.print("\string\\language") tex.print(lang.id(new_lang))
18
19    local usv = unicode.utf8.byte
20
21    function make_letter(inputcode)
22      tex.print("\string\\catcode")
23      tex.print(usv(inputcode))
24      tex.print("=11\string\\relax")
25    end
26
27    function set_lccode(inputcode, lccode)
28      tex.print("\string\\lccode")
29      tex.print(usv(inputcode))
30      tex.print("=")
31      tex.print(usv(lccode))
32      tex.print("\string\\relax")
33    end
34
35    local charfile = pattdir .. "/hyph-" .. tag .. ".chr.txt"
36    for line in io.lines(charfile) do
37      local chars, n = { }, 0
38      for char in unicode.utf8.gmatch(line, ".") do
39	table.insert(chars, char)
40      end
41      make_letter(chars[1]) ; make_letter(chars[2])
42      set_lccode(chars[1], chars[1]) ; set_lccode(chars[2], chars[1])
43    end
44  end
45}
46
47\directlua{
48  function dump_node(n)
49    texio.write_nl("term and log", "NODE type " .. node.type(n.id))
50    if node.type(n.id) == "whatsit" then
51      texio.write_nl("term and log", " subtype " .. node.whatsits()[n.subtype])
52      if node.whatsits()[n.subtype] == "local_par" then
53        texio.write_nl("term and log", " box_left_width " .. n.box_left_width .. ", box_right_width " .. n.box_right_width)
54      end
55    elseif node.type(n.id) == "glyph" then
56      texio.write("term and log", " FONT " .. n.font .. " CHAR " .. n.char)
57      texio.write_nl("term and log", "(" .. unicode.utf8.char(n.char) .. ")")
58    end
59  end
60
61  local ni = node.id
62  local hlist_id, glyph_id, disc_id, kern_id = ni("hlist"), ni("glyph"), ni("disc"), ni("kern")
63  local tw, twnl, chr = texio.write, texio.write_nl, unicode.utf8.char
64
65  function dump_node_list(head)
66    local n = head
67    twnl("")
68    while n do
69      tw("NODE type " .. node.type(n.id))
70      if n.id == glyph_id then
71        tw(" CHAR ") ; tw(string.format("U+\string\%04X", n.char))
72	tw(" (") ; tw(chr(n.char)) ; tw(")")
73      end
74      tw("\string\n")
75      n = n.next
76    end
77  end
78
79  hyphen_collector = coroutine.create(function(wd)
80    local list_of_words = { }
81    local w = wd
82    while true do
83      if not w then w = { } end
84      local h = w.hyphenatedword
85      if h == "<reset>" then list_of_words = { } ; h = "" end
86      if h and tostring(h):len() > 1 then table.insert(list_of_words, w) end
87      w = coroutine.yield(list_of_words)
88    end
89  end)
90
91  function insert_explicit_hyphen(head)
92    local n = head
93    local hard_hyphen = node.new(glyph_id)
94    hard_hyphen.char = 45 ; hard_hyphen.font = font.current()
95
96    local prev_is_hyphen = false
97    while n do
98      while n and n.id \string~= glyph_id do
99	n = n.next
100      end
101
102      local t = { }
103      local origword = ""
104      local hyphenatedword = ""
105      while n and (n.id == glyph_id or n.id == disc_id or n.id == kern_id) do
106	if n.id == glyph_id then
107	  if n.char == 45 then
108	    prev_is_hyphen = true
109	  end
110	  % tw(chr(n.char))
111	  origword = origword .. chr(n.char)
112	  hyphenatedword = hyphenatedword .. chr(n.char)
113	elseif n.id == disc_id then
114	  if prev_is_hyphen then
115	    prev_is_hyphen = false
116	  else
117	    % tw("-")
118	    hyphenatedword = hyphenatedword.. "-"
119	    local h = node.copy(hard_hyphen)
120	    node.insert_before(head, n, h)
121	  end
122	  node.remove(head, n)
123	end
124	n = n.next
125      end
126      local t = { origword = origword, hyphenatedword = hyphenatedword }
127      local s, r = coroutine.resume(hyphen_collector, t)
128    end
129  end
130}
131
132\directlua{
133  dofile("/Users/arthur/TeX/LuaTeX/share/lua/use\string_open\string_or\string_truetype\string_font.lua")
134}
135
136\directlua{
137  --[[ catch_discretionary() ]]
138  % callback.register("pre_linebreak_filter", insert_explicit_hyphen)
139
140  function par()
141    --[[
142    tex.print(unicode.utf8.char(tex.endlinechar))
143    tex.print(unicode.utf8.char(tex.endlinechar))
144    --]]
145    tex.print("\string\\par")
146  end
147
148  % par()
149  % Grr... Assertion failed: (0), function ship_out, file ../../../source/texk/web2c/luatexdir/pdf/pdfshipout.w, line 281.
150  % tex.shipout(255)
151}
152
153\def\loadpatterns#1{\directlua{load_patterns("#1") ; languages.tag = "#1"}}
154
155\directlua{
156  function resethyphens()
157    local t = { hyphenatedword = "<reset>" }
158    coroutine.resume(hyphen_collector, t)
159  end
160}
161
162\directlua{
163  register_callback = callback.original_register_callback or callback.register
164}
165
166\def\startcollecthyphens{%
167  \directlua{
168    resethyphens()
169    register_callback("linebreak_filter", insert_explicit_hyphen)}
170  }
171
172\def\stopcollecthyphens{%
173  \par
174  \directlua{register_callback("linebreak_filter", nil)}
175}
176
177\directlua{%
178  function startcollecthyphens() tex.print"\string\\startcollecthyphens" end
179  function stopcollecthyphens() tex.print"\string\\stopcollecthyphens" end
180
181  function collecthyphens()
182    local retvalue, res = coroutine.resume(hyphen_collector)
183    return res
184    % return { [1] = { hyphenatedword = "m-a-tu-la", origword = "ma-tula" } }
185  end
186}
187
188\def\resethyphens{\directlua{resethyphens()}}
189
190\def\collecthyphens#1{%
191  \startcollecthyphens
192  #1
193  \stopcollecthyphens
194  \directlua{
195    local t = collecthyphens()
196    % texio.write_nl("term and log", "(1) collecthyphens() returned a " .. type(t))
197    texio.write_nl("term and log", "")
198    for n = 1, table.maxn(t) do
199      texio.write("term and log", tostring(t[n].hyphenatedword), " (", tostring(t[n].origword), ")")
200      texio.write_nl("term and log", "")
201      % texio.write("term and log", t[n], " ")
202    end
203    % texio.write_nl("")
204  }
205}
206
207\def\dumpnodelist#1{%
208  \directlua{register_callback("pre_linebreak_filter", function(head)
209    dump_node_list(head)
210  end)}
211  #1\par
212  \directlua{register_callback("pre_linebreak_filter", nil)}
213}
214
215\loadpatterns{sl}
216
217\font\gentium=/Users/arthur/Library/Fonts/GenR102.TTF\gentium
218
219\def\callLuafromTeXfromLua#1{tex.print"\string\\directlua{#1}"}
220
221\directlua{
222  function table.nentries(t)
223    local n = 0
224    for _ in pairs(t) do n = n + 1 end
225    return n
226  end
227}
228
229\long\def\comparehyphenation#1{%
230  \def\nonspace{[\string^ \string\t\string\r\string\n]}
231  \directlua{
232    origs = { }
233    hyphens = { }
234    for o in unicode.utf8.gmatch("#1", "\nonspace*") do
235      if o and o:len() > 0 then
236        table.insert(origs, o)
237
238	w = o:gsub("-", "")
239	resethyphens()
240	startcollecthyphens()
241	tex.print(w)
242	stopcollecthyphens()
243	\callLuafromTeXfromLua{
244	  local t = collecthyphens()
245	  local h = t[1].hyphenatedword
246	  table.insert(hyphens, h)
247	}
248      end
249    end
250  }%
251%
252  \directlua{
253    local n = table.maxn(origs)
254    for k = 1, n do
255      texio.write_nl("term and log", tostring(hyphens[k]) .. " (" .. tostring(origs[k]) .. ")")
256    end
257
258    --[[
259    origs = nil
260    hyphens = nil
261    --]]
262  }
263  \let\nonspace\undefined
264}
265
266\endinput
267