1#!/usr/bin/env ruby 2# 3# This script generates hyphenation patterns for Turkmen 4# 5# This script has been written by Mojca Miklavec <mojca dot miklavec dot lists at gmail dot com> 6 7# open file for writing the patterns 8# $tr = File.new("hyph-tk.tex", "w") 9# in TDS 10$tr = File.new("../../../../../tex/generic/hyph-utf8/patterns/tex/hyph-tk.tex", "w") 11 12# write comments into the file 13def add_comment(str) 14 $tr.puts "% " + str.gsub(/\n/, "\n% ").gsub(/% \n/, "%\n") 15end 16 17# define a class of vowels and consonants 18# vowels are split into so that unnecessary permutations are not generated 19front_vowels = %w{ä e i ö ü} 20back_vowels = %w{a y o u} 21consonants = %w{b ç d f g h j k l m n p r s t w ý z ň ž ş} 22# This is to eliminate impossible combinations 23common_suffix_consonants = %w{b ç d g j k l m n p s t ý z ş} 24 25 26# start the file 27add_comment( 28"Hyphenation patterns for Turkmen (hyph-tk.tex) 29 30Author: Nazar Annagurban <nazartm at gmail.com> 31License: Public domain 32Version: 0.1 33Date: 16 March 2010 34 35---------------------------------------------------------------------- 36 37The file has been auto-generated from generate_patterns_tk.rb 38that is part of hyph-utf8. 39 40For more information about UTF-8 hyphenation patterns for TeX and 41links to this file see 42 http://www.tug.org/tex-hyphen/ 43") 44 45# we have the following comment for Basque: 46# 47# Some of the patterns below represent combinations that never 48# happen in Turkmen. Would they happen, they would be hyphenated 49# according to the rules. 50 51$tr.puts '\patterns{' 52add_comment("Some suffixes are added through a hyphen. When hyphenating these words, a hyphen is added before the hyphen so that the line ends with a hyphen and the new line starts with a hyphen.") 53$tr.puts "1-4" 54 55add_comment("Allow hyphen after a vowel if and only if there is a single consonant before next the vowel") 56front_vowels.each do |v1| 57 consonants.each do |c| 58 front_vowels.each do |v2| 59 $tr.puts "#{v1}1#{c}#{v2}" 60 end 61 end 62end 63 64back_vowels.each do |v1| 65 consonants.each do |c| 66 back_vowels.each do |v2| 67 $tr.puts "#{v1}1#{c}#{v2}" 68 end 69 end 70end 71 72add_comment("These combinations occur in words of foreign origin or joined words") 73consonants.each do |c| 74 $tr.puts "a1#{c}i" 75 $tr.puts "a1#{c}e" 76 $tr.puts "y1#{c}ä" 77 $tr.puts "y1#{c}i" 78 $tr.puts "y1#{c}e" 79 $tr.puts "o1#{c}i" 80 $tr.puts "o1#{c}e" 81 $tr.puts "u1#{c}i" 82 $tr.puts "u1#{c}e" 83 $tr.puts "i1#{c}a" 84 $tr.puts "i1#{c}o" 85 $tr.puts "e1#{c}a" 86 $tr.puts "e1#{c}o" 87 $tr.puts "ä1#{c}o" 88 $tr.puts "ä1#{c}a" 89 $tr.puts "ö1#{c}a" 90end 91 92add_comment("Allow hyphen between two consonants (if there is only two of them), except when they are at the begining of the word") 93consonants.each do |c1| 94 consonants.each do |c2| 95 $tr.puts "#{c1}1#{c2}" 96 $tr.puts ".#{c1}2#{c2}" 97 end 98end 99 100add_comment("Patterns for triple consonants. There may be additions to this category, as this list is not exhaustive.") 101common_suffix_consonants.each do |c| 102 $tr.puts "ý2t1#{c}" 103 $tr.puts "ý2n1#{c}" 104 $tr.puts "ý2d1#{c}" 105 $tr.puts "r2t1#{c}" 106 $tr.puts "ý2p1#{c}" 107 $tr.puts "l2p1#{c}" 108 $tr.puts "l2t1#{c}" 109 $tr.puts "g2t1#{c}" 110 $tr.puts "n2t1#{c}" 111 $tr.puts "r2k1#{c}" 112 $tr.puts "r2p1#{c}" 113 $tr.puts "k2t1#{c}" 114 $tr.puts "r2h1#{c}" 115 $tr.puts "s2t1#{c}" 116 $tr.puts "l2k1#{c}" 117 $tr.puts "w2p1#{c}" 118 $tr.puts "n2s1#{c}" 119 $tr.puts "r2s1#{c}" 120 $tr.puts "l2m1#{c}" 121end 122 123add_comment("Exceptions and single word occurence patterns for words of foreign origin i.e. Russian") 124$tr.puts "s2k1d" 125$tr.puts "l1s2k" 126$tr.puts "l1s2t" 127$tr.puts "s1t2r" 128$tr.puts "n2g1l" 129$tr.puts "n1g2r" 130$tr.puts "s2k1w" 131 132# end the file 133$tr.puts '}' 134$tr.close 135 136