1require 'psych' 2require 'hydra' 3require 'byebug' 4require 'date' 5 6require_relative 'path' 7 8class String 9 def superstrip 10 strip.gsub /%.*$/, '' 11 end 12 13 def supersplit 14 strip.gsub(/\s+/m,"\n").split("\n") 15 end 16 17 def safe 18 gsub /[\s-]/, '' 19 end 20 21 def titlecase 22 split.map(&:capitalize).join(' ') 23 end 24end 25 26module TeX 27 module Hyphen 28 class InvalidMetadata < StandardError; end 29 class NoAuthor < InvalidMetadata; end 30 class NoLicence < InvalidMetadata; end 31 32 include PATH 33 34 class Author 35 def initialize(name,surname,email,contacted1,contacted2) 36 @name = name 37 @surname = surname 38 @email = email 39 # this mostly means if email has been recently checked 40 @contacted1 = contacted1 41 # this means if we made more cooperation with author, 42 # exchanging patches etc. 43 @contacted2 = contacted2 44 end 45 46 attr_reader :name, :surname, :email 47 48 def self.authors 49 @@authors ||= Psych::safe_load(File.read(File.expand_path('authors.yml', __dir__)), permitted_classes: [Author, Date]) 50 end 51 52 def self.all 53 authors.values 54 end 55 56 def self.[] a 57 authors[a] 58 end 59 end 60 61 class Language 62 attr_reader :bcp47 63 64 @@eohmarker = '=' * 42 65 66 def iso639 67 @bcp47.split('-').first 68 end 69 70 # FIXME Find something better than all that extract_metadata unless @blah nonsens 71 def babelname 72 extract_metadata unless @babelname 73 @babelname 74 end 75 76 def description 77 extract_metadata unless @description 78 @description 79 end 80 81 def use_old_loader 82 extract_metadata unless @use_old_loader 83 @use_old_loader 84 end 85 86 def use_old_patterns_comment 87 extract_metadata unless @use_old_patterns_comment 88 @use_old_patterns_comment 89 end 90 91 def legacy_patterns 92 extract_metadata unless @legacy_patterns 93 @legacy_patterns 94 end 95 96 def message 97 extract_metadata unless @message 98 @message 99 end 100 101 def known_bugs 102 extract_metadata unless @known_bugs 103 @known_bugs 104 end 105 106 def initialize(bcp47 = nil) 107 @bcp47 = bcp47 108 end 109 110 def self.languages 111 @@languages ||= Dir.glob(File.join(PATH::TEX, 'hyph-*.tex')).inject [] do |languages, texfile| 112 bcp47 = texfile.gsub /^.*\/hyph-(.*)\.tex$/, '\1' 113 # languages << [bcp47, Language.new(bcp47)] 114 languages << [bcp47, nil] 115 end.to_h 116 end 117 @@languages = Language.languages 118 119 def self.all 120 @@all ||= languages.map do |bcp47, language| 121 next if bcp47 == 'sr-cyrl' # FIXME Remove later 122 @@languages[bcp47] ||= Language.new(bcp47) 123 end.compact 124 end 125 126 def self.all_by_iso639 127 all.inject(Hash.new) do |maintags, language| 128 (maintags[language.iso639] ||= Array.new) << language 129 maintags 130 end 131 end 132 133 def self.find_by_bcp47(bcp47) 134 languages[bcp47] 135 end 136 137 def private_use? 138 @bcp47.length == 3 || @bcp47[3] == '-' and @bcp47[0] == 'q' and ('a'..'t').include? @bcp47[1] 139 end 140 141 # TODO This should probably become “macrolanguage name” or something similar 142 # @@displaynames = { 143 # 'el' => 'Greek', 144 # 'nb' => 'Norwegian', 145 # 'nn' => 'Norwegian', 146 # 'sh' => 'Serbian', 147 # } 148 149 def licences 150 extract_metadata unless @licences 151 @licences 152 end 153 154 def lefthyphenmin 155 extract_metadata unless @lefthyphenmin 156 @lefthyphenmin 157 end 158 159 def righthyphenmin 160 extract_metadata unless @righthyphenmin 161 @righthyphenmin 162 end 163 164 # Strictly speaking a misnomer, because grc-x-ibycus should also return true. 165 # But useful for a number of apostrophe-related routines 166 def isgreek? 167 ['grc', 'el-polyton', 'el-monoton'].include? @bcp47 168 end 169 170 def serbian? 171 @bcp47 =~ /^sh-/ 172 end 173 174 def italic? 175 ['it', 'pms', 'rm'].include? @bcp47 176 end 177 178 def has_apostrophes? 179 begin 180 !isgreek? && patterns.any? { |p| p =~ /'/ } 181 rescue Errno::ENOENT 182 false 183 end 184 end 185 186 def has_hyphens? 187 begin 188 patterns.any? { |p| p =~ /-/ } 189 rescue Errno::ENOENT 190 false 191 end 192 end 193 194 def extract_apostrophes 195 plain, with_apostrophe = Array.new, nil 196 197 patterns.each do |pattern| 198 plain << pattern 199 if pattern =~ /'/ && !isgreek? 200 pattern_with_apostrophe = pattern.gsub(/'/,"’") 201 plain << pattern_with_apostrophe 202 (with_apostrophe ||= []) << pattern_with_apostrophe 203 end 204 end 205 206 { plain: plain, with_apostrophe: with_apostrophe } 207 end 208 209 def extract_characters # FIXME Turkish and others iİ; check Church Slavonic 210 characters = Array.new 211 212 characters_indexes = patterns.join.gsub(/[.0-9]/,'').unpack('U*').sort.uniq 213 characters_indexes.each do |c| 214 ch = [c].pack('U') 215 characters << ch + ch.upcase 216 characters << "’’" if ch == "'" && !isgreek? 217 end 218 219 characters 220 end 221 222 def comments_and_licence # Major TODO extract everything into YAML, and write .yml 223 @comments_and_licence ||= readtexfile.gsub(/(.*)\\patterns.*/m,'\1') 224 end 225 226 def authors 227 extract_metadata unless @authors 228 @authors 229 end 230 231 def github_link 232 sprintf 'https://github.com/hyphenation/tex-hyphen/tree/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-%s.tex', @bcp47 233 end 234 235 def <=>(other) 236 # TODO Remove when practical 237 a = self.babelname 238 b = other.babelname 239 240 if a == 'serbian' && b == 'serbianc' 241 return -1 242 elsif a == 'serbianc' && b == 'serbian' 243 return 1 244 end 245 246 self.bcp47 <=> other.bcp47 247 end 248 249 @@texfile = Hash.new 250 def readtexfile(bcp47 = @bcp47) 251 begin 252 @@texfile[bcp47] ||= File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', bcp47))) 253 rescue Errno::ENOENT 254 @@texfile[bcp47] = "" 255 end 256 end 257 258 def patterns 259 @patterns ||= if @bcp47 == 'eo' then 260 readtexfile.superstrip. 261 gsub(/.*\\patterns\s*\{(.*)\}.*/m,'\1'). 262 # 263 gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.'). 264 gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.'). 265 gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.'). 266 # 267 supersplit 268 else 269 readtexfile(if ['nb', 'nn'].include? @bcp47 then 'no' else @bcp47 end).superstrip. 270 gsub(/.*\\patterns\s*\{(.*?)\}.*/m,'\1'). 271 supersplit 272 end 273 end 274 275 def exceptions 276 unless @exceptions 277 if readtexfile.superstrip.index('\hyphenation') 278 @exceptions = readtexfile.superstrip.gsub(/.*\\hyphenation\s*\{(.*?)\}.*/m,'\1').supersplit 279 if @exceptions != "" 280 @hyphenation = @exceptions.inject [] do |exceptions, exception| 281 exceptions << [exception.gsub('-', ''), exception] 282 end.to_h 283 else 284 @hyphenation = { } 285 end 286 else 287 @exceptions = "" 288 @hyphenation = { } 289 end 290 end 291 292 @exceptions 293 end 294 295 def hyphenate(word) 296 exceptions 297 return @hyphenation[word] if @hyphenation[word] # FIXME Better name 298 299 unless @hydra 300 begin 301 metadata = extract_metadata 302 @hydra = Hydra.new patterns, :lax, '', metadata 303 rescue InvalidMetadata 304 @hydra = Hydra.new patterns 305 end 306 end 307 @hydra.showhyphens(word) 308 end 309 310 def extract_metadata 311 header = "" 312 File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', @bcp47))).each_line do |line| 313 break if line =~ /\\patterns|#{@@eohmarker}/ 314 header += line.gsub(/^% /, '').gsub(/%.*/, '') 315 end 316 begin 317 metadata = Psych::safe_load(header, permitted_classes: [Date]) 318 raise InvalidMetadata unless metadata.is_a? Hash 319 rescue Psych::SyntaxError 320 raise InvalidMetadata 321 end 322 323 @name = metadata.dig('language', 'name') 324 @lefthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'left') || metadata.dig('hyphenmins', 'generation', 'left') 325 @righthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'right') || metadata.dig('hyphenmins', 'generation', 'right') 326 # FIXME That’s not nice 327 if respond_to? :synonyms 328 @synonyms = metadata.dig('texlive', 'synonyms') || [] 329 @encoding = metadata.dig('texlive', 'encoding') 330 end 331 @message = metadata.dig('texlive', 'message') 332 @legacy_patterns = metadata.dig('texlive', 'legacy_patterns') 333 @use_old_loader = metadata.dig('texlive', 'use_old_loader') 334 @use_old_patterns_comment = metadata.dig('texlive', 'use_old_patterns_comment') 335 @description = metadata.dig('texlive', 'description') 336 @babelname = metadata.dig('texlive', 'babelname') 337 @package = metadata.dig('texlive', 'package') 338 licences = metadata.dig('licence') 339 raise NoLicence unless licences 340 licences = [licences] unless licences.is_a? Array 341 @licences = licences.map do |licence| 342 raise bcp47 if licence.is_a? String 343 next if licence.values == [nil] 344 licence.dig('name') || 'custom' 345 end.compact 346 authors = metadata.dig('authors') 347 if authors 348 @authors = authors.map do |author| 349 author['name'] 350 end 351 else 352 @authors = nil 353 raise NoAuthor.new # FIXME 354 end 355 @known_bugs = metadata.dig('known_bugs') 356 357 # raise NoAuthor unless @authors && @authors.count > 0 # TODO Later ;-) AR 2018-09-13 358 359 metadata 360 end 361 end 362 end 363end 364