• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env ruby
2#
3# This script generates hyphenation patterns for Turkmen
4#
5# This script has been written by Mojca Miklavec <mojca dot miklavec dot lists at gmail dot com>
6
7# open file for writing the patterns
8# $tr = File.new("hyph-tk.tex", "w")
9# in TDS
10$tr = File.new("../../../../../tex/generic/hyph-utf8/patterns/tex/hyph-tk.tex", "w")
11
12# write comments into the file
13def add_comment(str)
14	$tr.puts "% " + str.gsub(/\n/, "\n% ").gsub(/% \n/, "%\n")
15end
16
17# define a class of vowels and consonants
18# vowels are split into so that unnecessary permutations are not generated
19front_vowels = %w{ä e i ö ü}
20back_vowels = %w{a y o u}
21consonants = %w{b ç d f g h j k l m n p r s t w ý z ň ž ş}
22# This is to eliminate impossible combinations
23common_suffix_consonants = %w{b ç d g j k l m n p s t ý z ş}
24
25
26# start the file
27add_comment(
28"Hyphenation patterns for Turkmen (hyph-tk.tex)
29
30Author:  Nazar Annagurban <nazartm at gmail.com>
31License: Public domain
32Version: 0.1
33Date:    16 March 2010
34
35----------------------------------------------------------------------
36
37The file has been auto-generated from generate_patterns_tk.rb
38that is part of hyph-utf8.
39
40For more information about UTF-8 hyphenation patterns for TeX and
41links to this file see
42    http://www.tug.org/tex-hyphen/
43")
44
45# we have the following comment for Basque:
46#
47# Some of the patterns below represent combinations that never
48# happen in Turkmen. Would they happen, they would be hyphenated
49# according to the rules.
50
51$tr.puts '\patterns{'
52add_comment("Some suffixes are added through a hyphen. When hyphenating these words, a hyphen is added before the hyphen so that the line ends with a hyphen and the new line starts with a hyphen.")
53$tr.puts "1-4"
54
55add_comment("Allow hyphen after a vowel if and only if there is a single consonant before next the vowel")
56front_vowels.each do |v1|
57	consonants.each do |c|
58		front_vowels.each do |v2|
59			$tr.puts "#{v1}1#{c}#{v2}"
60		end
61	end
62end
63
64back_vowels.each do |v1|
65	consonants.each do |c|
66		back_vowels.each do |v2|
67			$tr.puts "#{v1}1#{c}#{v2}"
68		end
69	end
70end
71
72add_comment("These combinations occur in words of foreign origin or joined words")
73consonants.each do |c|
74  	$tr.puts "a1#{c}i"
75  	$tr.puts "a1#{c}e"
76	$tr.puts "y1#{c}ä"
77	$tr.puts "y1#{c}i"
78	$tr.puts "y1#{c}e"
79	$tr.puts "o1#{c}i"
80	$tr.puts "o1#{c}e"
81	$tr.puts "u1#{c}i"
82	$tr.puts "u1#{c}e"
83	$tr.puts "i1#{c}a"
84	$tr.puts "i1#{c}o"
85	$tr.puts "e1#{c}a"
86	$tr.puts "e1#{c}o"
87	$tr.puts "ä1#{c}o"
88	$tr.puts "ä1#{c}a"
89	$tr.puts "ö1#{c}a"
90end
91
92add_comment("Allow hyphen between two consonants (if there is only two of them), except when they are at the begining of the word")
93consonants.each do |c1|
94	consonants.each do |c2|
95		$tr.puts "#{c1}1#{c2}"
96		$tr.puts ".#{c1}2#{c2}"
97	end
98end
99
100add_comment("Patterns for triple consonants. There may be additions to this category, as this list is not exhaustive.")
101common_suffix_consonants.each do |c|
102	$tr.puts "ý2t1#{c}"
103	$tr.puts "ý2n1#{c}"
104	$tr.puts "ý2d1#{c}"
105	$tr.puts "r2t1#{c}"
106	$tr.puts "ý2p1#{c}"
107	$tr.puts "l2p1#{c}"
108	$tr.puts "l2t1#{c}"
109	$tr.puts "g2t1#{c}"
110	$tr.puts "n2t1#{c}"
111	$tr.puts "r2k1#{c}"
112	$tr.puts "r2p1#{c}"
113	$tr.puts "k2t1#{c}"
114	$tr.puts "r2h1#{c}"
115	$tr.puts "s2t1#{c}"
116	$tr.puts "l2k1#{c}"
117	$tr.puts "w2p1#{c}"
118	$tr.puts "n2s1#{c}"
119	$tr.puts "r2s1#{c}"
120	$tr.puts "l2m1#{c}"
121end
122
123add_comment("Exceptions and single word occurence patterns for words of foreign origin i.e. Russian")
124$tr.puts "s2k1d"
125$tr.puts "l1s2k"
126$tr.puts "l1s2t"
127$tr.puts "s1t2r"
128$tr.puts "n2g1l"
129$tr.puts "n1g2r"
130$tr.puts "s2k1w"
131
132# end the file
133$tr.puts '}'
134$tr.close
135
136