• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env ruby
2# -*- coding: utf-8 -*-
3
4# this file generates FOP XML Hyphenation Patterns
5# original author: Simon Pepping
6
7# use 'gem install unicode' if unicode is missing on your computer
8# require 'jcode'
9# require 'rubygems'
10# require 'unicode'
11
12# this script
13# collaboration/source/offo
14$path_to_top = '../../..'
15
16# languages.rb
17# hyph-utf8/source/generic/hyph-utf8
18$path_lang = 'hyph-utf8/source/generic/hyph-utf8'
19load "#{$path_to_top}/#{$path_lang}/languages.rb"
20
21# modify methods of class Language
22class Language
23	# TODO: simplify this (reduce duplication)
24
25	def get_exceptions(pattern_path)
26		if @exceptions1 == nil
27          filename = "#{pattern_path}/hyph-#{@code}.hyp.txt";
28			lines = IO.readlines(filename, '.').join("")
29			exceptions = lines.gsub(/%.*/,'');
30			@exceptions1 = exceptions.
31				gsub(/\s+/m,"\n").
32				gsub(/^\s*/m,'').
33				gsub(/\s*$/m,'').
34				split("\n")
35		end
36
37		return @exceptions1
38	end
39
40	def get_patterns(pattern_path)
41		if @patterns == nil
42			filename = "#{pattern_path}/hyph-#{@code}.pat.txt"
43			lines = IO.readlines(filename, '.').join("")
44			@patterns = lines.gsub(/%.*/,'').
45				gsub(/\s+/m,"\n").
46				gsub(/^\s*/m,'').
47				gsub(/\s*$/m,'').
48				split("\n")
49
50			if @code == 'eo' then
51				@patterns = lines.gsub(/%.*/,'').
52					#
53					gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.').
54					gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.').
55					gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.').
56					#
57					gsub(/\s+/m,"\n").
58					gsub(/^\s*/m,'').
59					gsub(/\s*$/m,'').
60					split("\n")
61			end
62		end
63		return @patterns
64	end
65
66	def get_comments_and_licence(pattern_path)
67		if @comments_and_licence == nil then
68			filename = File.expand_path("#{pattern_path}/hyph-#{@code}.lic.txt");
69			lines = IO.readlines(filename)
70			@comments_and_licence = lines
71		end
72		return @comments_and_licence
73	end
74
75    def get_classes(pattern_path)
76        if @classes == nil then
77            filename = File.expand_path("#{pattern_path}/hyph-#{@code}.chr.txt");
78			lines = IO.readlines(filename, '.').join("")
79			@classes = lines
80		end
81		return @classes
82    end
83
84end
85
86# source patterns
87# hyph-utf8/tex/generic/hyph-utf8/patterns/txt
88$path_src_pat = 'hyph-utf8/tex/generic/hyph-utf8/patterns/txt'
89
90# XML patterns
91# collaboration/repository/offo
92$path_offo = "collaboration/repository/offo"
93
94$rel_path_offo = "#{$path_to_top}/#{$path_offo}"
95$rel_path_patterns = "#{$path_to_top}/#{$path_src_pat}"
96
97$l = Languages.new()
98# TODO: should be singleton
99languages = $l.list.sort{|a,b| a.name <=> b.name}
100
101# TODO: we should rewrite this
102# not using: eo, el
103# todo: mn, no!, sa, sh
104# codes = ['bg', 'ca', 'cs', 'cy', 'da', 'de-1901', 'de-1996', 'de-ch-1901', 'en-gb', 'en-us', 'es', 'et', 'eu', 'fi', 'fr', 'ga', 'gl', 'hr', 'hsb', 'hu', 'ia', 'id', 'is', 'it', 'kmr', 'la', 'lt', 'lv', 'nl', 'no', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sr-cyrl', 'sv', 'tr', 'uk']
105
106language_codes = Hash.new
107languages.each do |language|
108	language_codes[language.code] = language.code
109end
110language_codes['de-1901']      = 'de_1901'
111language_codes['de-1996']      = 'de'
112language_codes['de-ch-1901']   = 'de_CH'
113language_codes['el-monoton']   = 'el'
114language_codes['el-polyton']   = 'el_Polyton'
115language_codes['en-gb']        = 'en_GB'
116# FOP requires a pattern file for en; use the US patterns
117language_codes['en-us']        = 'en'
118# hu patterns cause a stack overflow when compiled with FOP
119language_codes['hu']           = nil
120language_codes['la-x-classic'] = 'la_CL'
121language_codes['mn-cyrl']      = 'mn'
122language_codes['mn-cyrl-x-lmc'] = nil # no such pattern
123language_codes['mul-ethi']     = 'mul_ET'
124language_codes['sh-cyrl']      = 'sr_Cyrl'
125language_codes['sh-latn']      = 'sr_Latn'
126language_codes['zh-latn-pinyin']      = 'zh_Latn'
127
128# How can I sort on language_codes?
129# language_codes[a.code] <=> language_codes[b.code] fails.
130# It would be nice to apply here the filter
131# code != nil && include_language = true.
132languages = $l.list.sort{|a,b| a.code <=> b.code}
133
134$languages_info = File.open("#{$rel_path_offo}/info/languages-info.xml", 'w')
135$languages_info.puts '<?xml version="1.0" encoding="utf-8"?>'
136$languages_info.puts '<?xml-stylesheet type="text/xsl" href="languages-info.xsl"?>'
137$languages_info.puts '<languages-info>'
138
139languages.each do |language|
140	include_language = language.use_new_loader
141	code = language_codes[language.code]
142	if code == nil
143		include_language = false
144	end
145
146	if include_language
147		puts "generating #{code}"
148        $languages_info.puts '<language>'
149        $languages_info.puts "<name>#{language.name}</name>"
150        $languages_info.puts "<tex-code>#{language.code}</tex-code>"
151        $languages_info.puts "<code>#{code}</code>"
152        $languages_info.puts "<message>#{language.message}</message>"
153        $languages_info.puts '</language>'
154
155		$file_offo_pattern = File.open("#{$rel_path_offo}/#{code}.xml", 'w')
156
157        comments_and_licence = language.get_comments_and_licence($rel_path_patterns)
158        # do not use classes (SP 2010/10/26)
159        # classes    = language.get_classes
160        classes = nil
161		exceptions = language.get_exceptions($rel_path_patterns)
162		patterns   = language.get_patterns($rel_path_patterns)
163
164		# if code == 'nn' or code == 'nb'
165		# 	patterns = ""
166		# 	patterns = $l['no'].get_patterns
167		# end
168
169		$file_offo_pattern.puts '<?xml version="1.0" encoding="utf-8"?>'
170		$file_offo_pattern.puts '<hyphenation-info>'
171		$file_offo_pattern.puts
172
173        # comments and license, optional
174        if comments_and_licence != "" then
175            comments_and_licence.each do |line|
176               $file_offo_pattern.puts line.
177                   gsub(/--/,'‐‐').
178                   gsub(/%/,'').
179                   gsub(/^\s*(\S+.*)$/, '<!-- \1 -->')
180            end
181    		$file_offo_pattern.puts
182        end
183
184        # hyphenmin, optional
185		if language.hyphenmin != nil and language.hyphenmin.length != 0 then
186            $file_offo_pattern.puts "<hyphen-min before=\"#{language.hyphenmin[0]}\" after=\"#{language.hyphenmin[1]}\"/>"
187            $file_offo_pattern.puts
188        end
189
190        # classes, optional
191        if classes != nil then
192            $file_offo_pattern.puts '<classes>'
193			$file_offo_pattern.puts classes
194            $file_offo_pattern.puts '</classes>'
195		    $file_offo_pattern.puts
196		end
197
198        # exceptions, optional
199		if exceptions != ""
200            $file_offo_pattern.puts '<exceptions>'
201			$file_offo_pattern.puts exceptions
202            $file_offo_pattern.puts '</exceptions>'
203            $file_offo_pattern.puts
204		end
205
206        # patterns
207		$file_offo_pattern.puts '<patterns>'
208		patterns.each do |pattern|
209			$file_offo_pattern.puts pattern.gsub(/'/,"’")
210		end
211		$file_offo_pattern.puts '</patterns>'
212		$file_offo_pattern.puts
213
214		$file_offo_pattern.puts '</hyphenation-info>'
215
216		$file_offo_pattern.close
217	end
218end
219
220$languages_info.puts '</languages-info>'
221$languages_info.close
222