• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1require 'psych'
2require 'hydra'
3require 'byebug'
4require 'date'
5
6require_relative 'path'
7
8class String
9  def superstrip
10    strip.gsub /%.*$/, ''
11  end
12
13  def supersplit
14    strip.gsub(/\s+/m,"\n").split("\n")
15  end
16
17  def safe
18    gsub /[\s-]/, ''
19  end
20
21  def titlecase
22    split.map(&:capitalize).join(' ')
23  end
24end
25
26module TeX
27  module Hyphen
28    class InvalidMetadata < StandardError; end
29    class NoAuthor < InvalidMetadata; end
30    class NoLicence < InvalidMetadata; end
31
32    include PATH
33
34    class Author
35      def initialize(name,surname,email,contacted1,contacted2)
36        @name       = name
37        @surname    = surname
38        @email      = email
39        # this mostly means if email has been recently checked
40        @contacted1 = contacted1
41        # this means if we made more cooperation with author,
42        # exchanging patches etc.
43        @contacted2 = contacted2
44      end
45
46      attr_reader :name, :surname, :email
47
48      def self.authors
49        @@authors ||= Psych::safe_load(File.read(File.expand_path('authors.yml', __dir__)), permitted_classes: [Author, Date])
50      end
51
52      def self.all
53        authors.values
54      end
55
56      def self.[] a
57        authors[a]
58      end
59    end
60
61    class Language
62      attr_reader :bcp47
63
64      @@eohmarker = '=' * 42
65
66      def iso639
67        @bcp47.split('-').first
68      end
69
70      # FIXME Find something better than all that extract_metadata unless @blah nonsens
71      def babelname
72        extract_metadata unless @babelname
73        @babelname
74      end
75
76      def description
77        extract_metadata unless @description
78        @description
79      end
80
81      def use_old_loader
82        extract_metadata unless @use_old_loader
83        @use_old_loader
84      end
85
86      def use_old_patterns_comment
87        extract_metadata unless @use_old_patterns_comment
88        @use_old_patterns_comment
89      end
90
91      def legacy_patterns
92        extract_metadata unless @legacy_patterns
93        @legacy_patterns
94      end
95
96      def message
97        extract_metadata unless @message
98        @message
99      end
100
101      def known_bugs
102        extract_metadata unless @known_bugs
103        @known_bugs
104      end
105
106      def initialize(bcp47 = nil)
107        @bcp47 = bcp47
108      end
109
110      def self.languages
111        @@languages ||= Dir.glob(File.join(PATH::TEX, 'hyph-*.tex')).inject [] do |languages, texfile|
112          bcp47 = texfile.gsub /^.*\/hyph-(.*)\.tex$/, '\1'
113          # languages << [bcp47, Language.new(bcp47)]
114          languages << [bcp47, nil]
115        end.to_h
116      end
117      @@languages = Language.languages
118
119      def self.all
120        @@all ||= languages.map do |bcp47, language|
121          next if bcp47 == 'sr-cyrl' # FIXME Remove later
122          @@languages[bcp47] ||= Language.new(bcp47)
123        end.compact
124      end
125
126      def self.all_by_iso639
127        all.inject(Hash.new) do |maintags, language|
128          (maintags[language.iso639] ||= Array.new) << language
129          maintags
130        end
131      end
132
133      def self.find_by_bcp47(bcp47)
134        languages[bcp47]
135      end
136
137      def private_use?
138        @bcp47.length == 3 || @bcp47[3] == '-' and @bcp47[0] == 'q' and ('a'..'t').include? @bcp47[1]
139      end
140
141      # TODO This should probably become “macrolanguage name” or something similar
142      # @@displaynames = {
143      #   'el' => 'Greek',
144      #   'nb' => 'Norwegian',
145      #   'nn' => 'Norwegian',
146      #   'sh' => 'Serbian',
147      # }
148
149      def licences
150        extract_metadata unless @licences
151        @licences
152      end
153
154      def lefthyphenmin
155        extract_metadata unless @lefthyphenmin
156        @lefthyphenmin
157      end
158
159      def righthyphenmin
160        extract_metadata unless @righthyphenmin
161        @righthyphenmin
162      end
163
164      # Strictly speaking a misnomer, because grc-x-ibycus should also return true.
165      # But useful for a number of apostrophe-related routines
166      def isgreek?
167        ['grc', 'el-polyton', 'el-monoton'].include? @bcp47
168      end
169
170      def serbian?
171        @bcp47 =~ /^sh-/
172      end
173
174      def italic?
175        ['it', 'pms', 'rm'].include? @bcp47
176      end
177
178      def has_apostrophes?
179        begin
180          !isgreek? && patterns.any? { |p| p =~ /'/ }
181        rescue Errno::ENOENT
182          false
183        end
184      end
185
186      def has_hyphens?
187        begin
188          patterns.any? { |p| p =~ /-/ }
189        rescue Errno::ENOENT
190          false
191        end
192      end
193
194      def extract_apostrophes
195        plain, with_apostrophe = Array.new, nil
196
197        patterns.each do |pattern|
198          plain << pattern
199          if pattern =~ /'/ && !isgreek?
200            pattern_with_apostrophe = pattern.gsub(/'/,"’")
201            plain << pattern_with_apostrophe
202            (with_apostrophe ||= []) << pattern_with_apostrophe
203          end
204        end
205
206        { plain: plain, with_apostrophe: with_apostrophe }
207      end
208
209      def extract_characters # FIXME Turkish and others iİ; check Church Slavonic
210        characters = Array.new
211
212        characters_indexes = patterns.join.gsub(/[.0-9]/,'').unpack('U*').sort.uniq
213        characters_indexes.each do |c|
214          ch = [c].pack('U')
215          characters << ch + ch.upcase
216          characters << "’’" if ch == "'" && !isgreek?
217        end
218
219        characters
220      end
221
222      def comments_and_licence # Major TODO extract everything into YAML, and write .yml
223        @comments_and_licence ||= readtexfile.gsub(/(.*)\\patterns.*/m,'\1')
224      end
225
226      def authors
227        extract_metadata unless @authors
228        @authors
229      end
230
231      def github_link
232        sprintf 'https://github.com/hyphenation/tex-hyphen/tree/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-%s.tex', @bcp47
233      end
234
235      def <=>(other)
236        # TODO Remove when practical
237        a = self.babelname
238        b = other.babelname
239
240        if a == 'serbian' && b == 'serbianc'
241          return -1
242        elsif a == 'serbianc' && b == 'serbian'
243          return 1
244        end
245
246        self.bcp47 <=> other.bcp47
247      end
248
249      @@texfile = Hash.new
250      def readtexfile(bcp47 = @bcp47)
251        begin
252          @@texfile[bcp47] ||= File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', bcp47)))
253        rescue Errno::ENOENT
254          @@texfile[bcp47] = ""
255        end
256      end
257
258      def patterns
259        @patterns ||= if @bcp47 == 'eo' then
260          readtexfile.superstrip.
261            gsub(/.*\\patterns\s*\{(.*)\}.*/m,'\1').
262            #
263            gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.').
264            gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.').
265            gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.').
266            #
267            supersplit
268        else
269          readtexfile(if ['nb', 'nn'].include? @bcp47 then 'no' else @bcp47 end).superstrip.
270            gsub(/.*\\patterns\s*\{(.*?)\}.*/m,'\1').
271            supersplit
272        end
273      end
274
275      def exceptions
276        unless @exceptions
277          if readtexfile.superstrip.index('\hyphenation')
278            @exceptions = readtexfile.superstrip.gsub(/.*\\hyphenation\s*\{(.*?)\}.*/m,'\1').supersplit
279            if @exceptions != ""
280              @hyphenation = @exceptions.inject [] do |exceptions, exception|
281                exceptions << [exception.gsub('-', ''), exception]
282              end.to_h
283            else
284              @hyphenation = { }
285            end
286          else
287            @exceptions = ""
288            @hyphenation = { }
289          end
290        end
291
292        @exceptions
293      end
294
295      def hyphenate(word)
296        exceptions
297        return @hyphenation[word] if @hyphenation[word] # FIXME Better name
298
299        unless @hydra
300          begin
301            metadata = extract_metadata
302            @hydra = Hydra.new patterns, :lax, '', metadata
303          rescue InvalidMetadata
304            @hydra = Hydra.new patterns
305          end
306        end
307        @hydra.showhyphens(word)
308      end
309
310      def extract_metadata
311        header = ""
312        File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', @bcp47))).each_line do |line|
313          break if line =~ /\\patterns|#{@@eohmarker}/
314          header += line.gsub(/^% /, '').gsub(/%.*/, '')
315        end
316        begin
317          metadata = Psych::safe_load(header, permitted_classes: [Date])
318          raise InvalidMetadata unless metadata.is_a? Hash
319        rescue Psych::SyntaxError
320          raise InvalidMetadata
321        end
322
323        @name = metadata.dig('language', 'name')
324        @lefthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'left') || metadata.dig('hyphenmins', 'generation', 'left')
325        @righthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'right') || metadata.dig('hyphenmins', 'generation', 'right')
326        # FIXME That’s not nice
327        if respond_to? :synonyms
328          @synonyms = metadata.dig('texlive', 'synonyms') || []
329          @encoding = metadata.dig('texlive', 'encoding')
330        end
331        @message = metadata.dig('texlive', 'message')
332        @legacy_patterns = metadata.dig('texlive', 'legacy_patterns')
333        @use_old_loader = metadata.dig('texlive', 'use_old_loader')
334        @use_old_patterns_comment = metadata.dig('texlive', 'use_old_patterns_comment')
335        @description = metadata.dig('texlive', 'description')
336        @babelname = metadata.dig('texlive', 'babelname')
337        @package = metadata.dig('texlive', 'package')
338        licences = metadata.dig('licence')
339        raise NoLicence unless licences
340        licences = [licences] unless licences.is_a? Array
341        @licences = licences.map do |licence|
342          raise bcp47 if licence.is_a? String
343          next if licence.values == [nil]
344          licence.dig('name') || 'custom'
345        end.compact
346        authors = metadata.dig('authors')
347        if authors
348          @authors = authors.map do |author|
349            author['name']
350          end
351        else
352          @authors = nil
353          raise NoAuthor.new # FIXME
354        end
355        @known_bugs = metadata.dig('known_bugs')
356
357        # raise NoAuthor unless @authors && @authors.count > 0 # TODO Later ;-) AR 2018-09-13
358
359        metadata
360      end
361    end
362  end
363end
364