1#!/usr/bin/env ruby 2# -*- coding: utf-8 -*- 3 4# this file generates FOP XML Hyphenation Patterns 5# original author: Simon Pepping 6 7# use 'gem install unicode' if unicode is missing on your computer 8# require 'jcode' 9# require 'rubygems' 10# require 'unicode' 11 12# this script 13# collaboration/source/offo 14$path_to_top = '../../..' 15 16# languages.rb 17# hyph-utf8/source/generic/hyph-utf8 18$path_lang = 'hyph-utf8/source/generic/hyph-utf8' 19load "#{$path_to_top}/#{$path_lang}/languages.rb" 20 21# modify methods of class Language 22class Language 23 # TODO: simplify this (reduce duplication) 24 25 def get_exceptions(pattern_path) 26 if @exceptions1 == nil 27 filename = "#{pattern_path}/hyph-#{@code}.hyp.txt"; 28 lines = IO.readlines(filename, '.').join("") 29 exceptions = lines.gsub(/%.*/,''); 30 @exceptions1 = exceptions. 31 gsub(/\s+/m,"\n"). 32 gsub(/^\s*/m,''). 33 gsub(/\s*$/m,''). 34 split("\n") 35 end 36 37 return @exceptions1 38 end 39 40 def get_patterns(pattern_path) 41 if @patterns == nil 42 filename = "#{pattern_path}/hyph-#{@code}.pat.txt" 43 lines = IO.readlines(filename, '.').join("") 44 @patterns = lines.gsub(/%.*/,''). 45 gsub(/\s+/m,"\n"). 46 gsub(/^\s*/m,''). 47 gsub(/\s*$/m,''). 48 split("\n") 49 50 if @code == 'eo' then 51 @patterns = lines.gsub(/%.*/,''). 52 # 53 gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.'). 54 gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.'). 55 gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.'). 56 # 57 gsub(/\s+/m,"\n"). 58 gsub(/^\s*/m,''). 59 gsub(/\s*$/m,''). 60 split("\n") 61 end 62 end 63 return @patterns 64 end 65 66 def get_comments_and_licence(pattern_path) 67 if @comments_and_licence == nil then 68 filename = File.expand_path("#{pattern_path}/hyph-#{@code}.lic.txt"); 69 lines = IO.readlines(filename) 70 @comments_and_licence = lines 71 end 72 return @comments_and_licence 73 end 74 75 def get_classes(pattern_path) 76 if @classes == nil then 77 filename = File.expand_path("#{pattern_path}/hyph-#{@code}.chr.txt"); 78 lines = IO.readlines(filename, '.').join("") 79 @classes = lines 80 end 81 return @classes 82 end 83 84end 85 86# source patterns 87# hyph-utf8/tex/generic/hyph-utf8/patterns/txt 88$path_src_pat = 'hyph-utf8/tex/generic/hyph-utf8/patterns/txt' 89 90# XML patterns 91# collaboration/repository/offo 92$path_offo = "collaboration/repository/offo" 93 94$rel_path_offo = "#{$path_to_top}/#{$path_offo}" 95$rel_path_patterns = "#{$path_to_top}/#{$path_src_pat}" 96 97$l = Languages.new() 98# TODO: should be singleton 99languages = $l.list.sort{|a,b| a.name <=> b.name} 100 101# TODO: we should rewrite this 102# not using: eo, el 103# todo: mn, no!, sa, sh 104# codes = ['bg', 'ca', 'cs', 'cy', 'da', 'de-1901', 'de-1996', 'de-ch-1901', 'en-gb', 'en-us', 'es', 'et', 'eu', 'fi', 'fr', 'ga', 'gl', 'hr', 'hsb', 'hu', 'ia', 'id', 'is', 'it', 'kmr', 'la', 'lt', 'lv', 'nl', 'no', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sr-cyrl', 'sv', 'tr', 'uk'] 105 106language_codes = Hash.new 107languages.each do |language| 108 language_codes[language.code] = language.code 109end 110language_codes['de-1901'] = 'de_1901' 111language_codes['de-1996'] = 'de' 112language_codes['de-ch-1901'] = 'de_CH' 113language_codes['el-monoton'] = 'el' 114language_codes['el-polyton'] = 'el_Polyton' 115language_codes['en-gb'] = 'en_GB' 116# FOP requires a pattern file for en; use the US patterns 117language_codes['en-us'] = 'en' 118# hu patterns cause a stack overflow when compiled with FOP 119language_codes['hu'] = nil 120language_codes['la-x-classic'] = 'la_CL' 121language_codes['mn-cyrl'] = 'mn' 122language_codes['mn-cyrl-x-lmc'] = nil # no such pattern 123language_codes['mul-ethi'] = 'mul_ET' 124language_codes['sh-cyrl'] = 'sr_Cyrl' 125language_codes['sh-latn'] = 'sr_Latn' 126language_codes['zh-latn-pinyin'] = 'zh_Latn' 127 128# How can I sort on language_codes? 129# language_codes[a.code] <=> language_codes[b.code] fails. 130# It would be nice to apply here the filter 131# code != nil && include_language = true. 132languages = $l.list.sort{|a,b| a.code <=> b.code} 133 134$languages_info = File.open("#{$rel_path_offo}/info/languages-info.xml", 'w') 135$languages_info.puts '<?xml version="1.0" encoding="utf-8"?>' 136$languages_info.puts '<?xml-stylesheet type="text/xsl" href="languages-info.xsl"?>' 137$languages_info.puts '<languages-info>' 138 139languages.each do |language| 140 include_language = language.use_new_loader 141 code = language_codes[language.code] 142 if code == nil 143 include_language = false 144 end 145 146 if include_language 147 puts "generating #{code}" 148 $languages_info.puts '<language>' 149 $languages_info.puts "<name>#{language.name}</name>" 150 $languages_info.puts "<tex-code>#{language.code}</tex-code>" 151 $languages_info.puts "<code>#{code}</code>" 152 $languages_info.puts "<message>#{language.message}</message>" 153 $languages_info.puts '</language>' 154 155 $file_offo_pattern = File.open("#{$rel_path_offo}/#{code}.xml", 'w') 156 157 comments_and_licence = language.get_comments_and_licence($rel_path_patterns) 158 # do not use classes (SP 2010/10/26) 159 # classes = language.get_classes 160 classes = nil 161 exceptions = language.get_exceptions($rel_path_patterns) 162 patterns = language.get_patterns($rel_path_patterns) 163 164 # if code == 'nn' or code == 'nb' 165 # patterns = "" 166 # patterns = $l['no'].get_patterns 167 # end 168 169 $file_offo_pattern.puts '<?xml version="1.0" encoding="utf-8"?>' 170 $file_offo_pattern.puts '<hyphenation-info>' 171 $file_offo_pattern.puts 172 173 # comments and license, optional 174 if comments_and_licence != "" then 175 comments_and_licence.each do |line| 176 $file_offo_pattern.puts line. 177 gsub(/--/,'‐‐'). 178 gsub(/%/,''). 179 gsub(/^\s*(\S+.*)$/, '<!-- \1 -->') 180 end 181 $file_offo_pattern.puts 182 end 183 184 # hyphenmin, optional 185 if language.hyphenmin != nil and language.hyphenmin.length != 0 then 186 $file_offo_pattern.puts "<hyphen-min before=\"#{language.hyphenmin[0]}\" after=\"#{language.hyphenmin[1]}\"/>" 187 $file_offo_pattern.puts 188 end 189 190 # classes, optional 191 if classes != nil then 192 $file_offo_pattern.puts '<classes>' 193 $file_offo_pattern.puts classes 194 $file_offo_pattern.puts '</classes>' 195 $file_offo_pattern.puts 196 end 197 198 # exceptions, optional 199 if exceptions != "" 200 $file_offo_pattern.puts '<exceptions>' 201 $file_offo_pattern.puts exceptions 202 $file_offo_pattern.puts '</exceptions>' 203 $file_offo_pattern.puts 204 end 205 206 # patterns 207 $file_offo_pattern.puts '<patterns>' 208 patterns.each do |pattern| 209 $file_offo_pattern.puts pattern.gsub(/'/,"’") 210 end 211 $file_offo_pattern.puts '</patterns>' 212 $file_offo_pattern.puts 213 214 $file_offo_pattern.puts '</hyphenation-info>' 215 216 $file_offo_pattern.close 217 end 218end 219 220$languages_info.puts '</languages-info>' 221$languages_info.close 222