1require 'yaml' 2require 'hydra' 3require 'byebug' 4 5require_relative 'path' 6 7class String 8 def superstrip 9 strip.gsub /%.*$/, '' 10 end 11 12 def supersplit 13 strip.gsub(/\s+/m,"\n").split("\n") 14 end 15 16 def safe 17 gsub /[\s-]/, '' 18 end 19 20 def titlecase 21 split.map(&:capitalize).join(' ') 22 end 23end 24 25module TeX 26 module Hyphen 27 class InvalidMetadata < StandardError; end 28 class NoAuthor < InvalidMetadata; end 29 class NoLicence < InvalidMetadata; end 30 31 include PATH 32 33 class Author 34 def initialize(name,surname,email,contacted1,contacted2) 35 @name = name 36 @surname = surname 37 @email = email 38 # this mostly means if email has been recently checked 39 @contacted1 = contacted1 40 # this means if we made more cooperation with author, 41 # exchanging patches etc. 42 @contacted2 = contacted2 43 end 44 45 attr_reader :name, :surname, :email 46 47 def self.authors 48 @@authors ||= YAML::load File.read File.expand_path 'authors.yml', __dir__ 49 end 50 51 def self.all 52 authors.values 53 end 54 55 def self.[] a 56 authors[a] 57 end 58 end 59 60 class Language 61 attr_reader :bcp47 62 63 @@eohmarker = '=' * 42 64 65 def iso639 66 @bcp47.split('-').first 67 end 68 69 # FIXME Find something better than all that extract_metadata unless @blah nonsens 70 def babelname 71 extract_metadata unless @babelname 72 @babelname 73 end 74 75 def description 76 extract_metadata unless @description 77 @description 78 end 79 80 def use_old_loader 81 extract_metadata unless @use_old_loader 82 @use_old_loader 83 end 84 85 def use_old_patterns_comment 86 extract_metadata unless @use_old_patterns_comment 87 @use_old_patterns_comment 88 end 89 90 def legacy_patterns 91 extract_metadata unless @legacy_patterns 92 @legacy_patterns 93 end 94 95 def message 96 extract_metadata unless @message 97 @message 98 end 99 100 def known_bugs 101 extract_metadata unless @known_bugs 102 @known_bugs 103 end 104 105 def initialize(bcp47 = nil) 106 @bcp47 = bcp47 107 end 108 109 def self.languages 110 @@languages ||= Dir.glob(File.join(PATH::TEX, 'hyph-*.tex')).inject [] do |languages, texfile| 111 bcp47 = texfile.gsub /^.*\/hyph-(.*)\.tex$/, '\1' 112 # languages << [bcp47, Language.new(bcp47)] 113 languages << [bcp47, nil] 114 end.to_h 115 end 116 @@languages = Language.languages 117 118 def self.all 119 @@all ||= languages.map do |bcp47, language| 120 next if bcp47 == 'sr-cyrl' # FIXME Remove later 121 @@languages[bcp47] ||= Language.new(bcp47) 122 end.compact 123 end 124 125 def self.all_by_iso639 126 all.inject(Hash.new) do |maintags, language| 127 (maintags[language.iso639] ||= Array.new) << language 128 maintags 129 end 130 end 131 132 def self.find_by_bcp47(bcp47) 133 languages[bcp47] 134 end 135 136 def private_use? 137 @bcp47.length == 3 || @bcp47[3] == '-' and @bcp47[0] == 'q' and ('a'..'t').include? @bcp47[1] 138 end 139 140 # TODO This should probably become “macrolanguage name” or something similar 141 # @@displaynames = { 142 # 'el' => 'Greek', 143 # 'nb' => 'Norwegian', 144 # 'nn' => 'Norwegian', 145 # 'sh' => 'Serbian', 146 # } 147 148 def licences 149 extract_metadata unless @licences 150 @licences 151 end 152 153 def lefthyphenmin 154 extract_metadata unless @lefthyphenmin 155 @lefthyphenmin 156 end 157 158 def righthyphenmin 159 extract_metadata unless @righthyphenmin 160 @righthyphenmin 161 end 162 163 # Strictly speaking a misnomer, because grc-x-ibycus should also return true. 164 # But useful for a number of apostrophe-related routines 165 def isgreek? 166 ['grc', 'el-polyton', 'el-monoton'].include? @bcp47 167 end 168 169 def serbian? 170 @bcp47 =~ /^sh-/ 171 end 172 173 def italic? 174 ['it', 'pms', 'rm'].include? @bcp47 175 end 176 177 def has_apostrophes? 178 begin 179 !isgreek? && patterns.any? { |p| p =~ /'/ } 180 rescue Errno::ENOENT 181 false 182 end 183 end 184 185 def has_hyphens? 186 begin 187 patterns.any? { |p| p =~ /-/ } 188 rescue Errno::ENOENT 189 false 190 end 191 end 192 193 def extract_apostrophes 194 plain, with_apostrophe = Array.new, nil 195 196 patterns.each do |pattern| 197 plain << pattern 198 if pattern =~ /'/ && !isgreek? 199 pattern_with_apostrophe = pattern.gsub(/'/,"’") 200 plain << pattern_with_apostrophe 201 (with_apostrophe ||= []) << pattern_with_apostrophe 202 end 203 end 204 205 { plain: plain, with_apostrophe: with_apostrophe } 206 end 207 208 def extract_characters # FIXME Turkish and others iİ; check Church Slavonic 209 characters = Array.new 210 211 characters_indexes = patterns.join.gsub(/[.0-9]/,'').unpack('U*').sort.uniq 212 characters_indexes.each do |c| 213 ch = [c].pack('U') 214 characters << ch + ch.upcase 215 characters << "’’" if ch == "'" && !isgreek? 216 end 217 218 characters 219 end 220 221 def comments_and_licence # Major TODO extract everything into YAML, and write .yml 222 @comments_and_licence ||= readtexfile.gsub(/(.*)\\patterns.*/m,'\1') 223 end 224 225 def authors 226 extract_metadata unless @authors 227 @authors 228 end 229 230 def github_link 231 sprintf 'https://github.com/hyphenation/tex-hyphen/tree/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-%s.tex', @bcp47 232 end 233 234 def <=>(other) 235 # TODO Remove when practical 236 a = self.babelname 237 b = other.babelname 238 239 if a == 'serbian' && b == 'serbianc' 240 return -1 241 elsif a == 'serbianc' && b == 'serbian' 242 return 1 243 end 244 245 self.bcp47 <=> other.bcp47 246 end 247 248 @@texfile = Hash.new 249 def readtexfile(bcp47 = @bcp47) 250 begin 251 @@texfile[bcp47] ||= File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', bcp47))) 252 rescue Errno::ENOENT 253 @@texfile[bcp47] = "" 254 end 255 end 256 257 def patterns 258 @patterns ||= if @bcp47 == 'eo' then 259 readtexfile.superstrip. 260 gsub(/.*\\patterns\s*\{(.*)\}.*/m,'\1'). 261 # 262 gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.'). 263 gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.'). 264 gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.'). 265 # 266 supersplit 267 else 268 readtexfile(if ['nb', 'nn'].include? @bcp47 then 'no' else @bcp47 end).superstrip. 269 gsub(/.*\\patterns\s*\{(.*?)\}.*/m,'\1'). 270 supersplit 271 end 272 end 273 274 def exceptions 275 unless @exceptions 276 if readtexfile.superstrip.index('\hyphenation') 277 @exceptions = readtexfile.superstrip.gsub(/.*\\hyphenation\s*\{(.*?)\}.*/m,'\1').supersplit 278 if @exceptions != "" 279 @hyphenation = @exceptions.inject [] do |exceptions, exception| 280 exceptions << [exception.gsub('-', ''), exception] 281 end.to_h 282 else 283 @hyphenation = { } 284 end 285 else 286 @exceptions = "" 287 @hyphenation = { } 288 end 289 end 290 291 @exceptions 292 end 293 294 def hyphenate(word) 295 exceptions 296 return @hyphenation[word] if @hyphenation[word] # FIXME Better name 297 298 unless @hydra 299 begin 300 metadata = extract_metadata 301 @hydra = Hydra.new patterns, :lax, '', metadata 302 rescue InvalidMetadata 303 @hydra = Hydra.new patterns 304 end 305 end 306 @hydra.showhyphens(word) 307 end 308 309 def extract_metadata 310 header = "" 311 File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', @bcp47))).each_line do |line| 312 break if line =~ /\\patterns|#{@@eohmarker}/ 313 header += line.gsub(/^% /, '').gsub(/%.*/, '') 314 end 315 begin 316 metadata = YAML::load header 317 raise InvalidMetadata unless metadata.is_a? Hash 318 rescue Psych::SyntaxError 319 raise InvalidMetadata 320 end 321 322 @name = metadata.dig('language', 'name') 323 @lefthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'left') || metadata.dig('hyphenmins', 'generation', 'left') 324 @righthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'right') || metadata.dig('hyphenmins', 'generation', 'right') 325 # FIXME That’s not nice 326 if respond_to? :synonyms 327 @synonyms = metadata.dig('texlive', 'synonyms') || [] 328 @encoding = metadata.dig('texlive', 'encoding') 329 end 330 @message = metadata.dig('texlive', 'message') 331 @legacy_patterns = metadata.dig('texlive', 'legacy_patterns') 332 @use_old_loader = metadata.dig('texlive', 'use_old_loader') 333 @use_old_patterns_comment = metadata.dig('texlive', 'use_old_patterns_comment') 334 @description = metadata.dig('texlive', 'description') 335 @babelname = metadata.dig('texlive', 'babelname') 336 @package = metadata.dig('texlive', 'package') 337 licences = metadata.dig('licence') 338 raise NoLicence unless licences 339 licences = [licences] unless licences.is_a? Array 340 @licences = licences.map do |licence| 341 raise bcp47 if licence.is_a? String 342 next if licence.values == [nil] 343 licence.dig('name') || 'custom' 344 end.compact 345 authors = metadata.dig('authors') 346 if authors 347 @authors = authors.map do |author| 348 author['name'] 349 end 350 else 351 @authors = nil 352 raise NoAuthor.new # FIXME 353 end 354 @known_bugs = metadata.dig('known_bugs') 355 356 # raise NoAuthor unless @authors && @authors.count > 0 # TODO Later ;-) AR 2018-09-13 357 358 metadata 359 end 360 end 361 end 362end 363