• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1require 'yaml'
2require 'hydra'
3require 'byebug'
4
5require_relative 'path'
6
7class String
8  def superstrip
9    strip.gsub /%.*$/, ''
10  end
11
12  def supersplit
13    strip.gsub(/\s+/m,"\n").split("\n")
14  end
15
16  def safe
17    gsub /[\s-]/, ''
18  end
19
20  def titlecase
21    split.map(&:capitalize).join(' ')
22  end
23end
24
25module TeX
26  module Hyphen
27    class InvalidMetadata < StandardError; end
28    class NoAuthor < InvalidMetadata; end
29    class NoLicence < InvalidMetadata; end
30
31    include PATH
32
33    class Author
34      def initialize(name,surname,email,contacted1,contacted2)
35        @name       = name
36        @surname    = surname
37        @email      = email
38        # this mostly means if email has been recently checked
39        @contacted1 = contacted1
40        # this means if we made more cooperation with author,
41        # exchanging patches etc.
42        @contacted2 = contacted2
43      end
44
45      attr_reader :name, :surname, :email
46
47      def self.authors
48        @@authors ||= YAML::load File.read File.expand_path 'authors.yml', __dir__
49      end
50
51      def self.all
52        authors.values
53      end
54
55      def self.[] a
56        authors[a]
57      end
58    end
59
60    class Language
61      attr_reader :bcp47
62
63      @@eohmarker = '=' * 42
64
65      def iso639
66        @bcp47.split('-').first
67      end
68
69      # FIXME Find something better than all that extract_metadata unless @blah nonsens
70      def babelname
71        extract_metadata unless @babelname
72        @babelname
73      end
74
75      def description
76        extract_metadata unless @description
77        @description
78      end
79
80      def use_old_loader
81        extract_metadata unless @use_old_loader
82        @use_old_loader
83      end
84
85      def use_old_patterns_comment
86        extract_metadata unless @use_old_patterns_comment
87        @use_old_patterns_comment
88      end
89
90      def legacy_patterns
91        extract_metadata unless @legacy_patterns
92        @legacy_patterns
93      end
94
95      def message
96        extract_metadata unless @message
97        @message
98      end
99
100      def known_bugs
101        extract_metadata unless @known_bugs
102        @known_bugs
103      end
104
105      def initialize(bcp47 = nil)
106        @bcp47 = bcp47
107      end
108
109      def self.languages
110        @@languages ||= Dir.glob(File.join(PATH::TEX, 'hyph-*.tex')).inject [] do |languages, texfile|
111          bcp47 = texfile.gsub /^.*\/hyph-(.*)\.tex$/, '\1'
112          # languages << [bcp47, Language.new(bcp47)]
113          languages << [bcp47, nil]
114        end.to_h
115      end
116      @@languages = Language.languages
117
118      def self.all
119        @@all ||= languages.map do |bcp47, language|
120          next if bcp47 == 'sr-cyrl' # FIXME Remove later
121          @@languages[bcp47] ||= Language.new(bcp47)
122        end.compact
123      end
124
125      def self.all_by_iso639
126        all.inject(Hash.new) do |maintags, language|
127          (maintags[language.iso639] ||= Array.new) << language
128          maintags
129        end
130      end
131
132      def self.find_by_bcp47(bcp47)
133        languages[bcp47]
134      end
135
136      def private_use?
137        @bcp47.length == 3 || @bcp47[3] == '-' and @bcp47[0] == 'q' and ('a'..'t').include? @bcp47[1]
138      end
139
140      # TODO This should probably become “macrolanguage name” or something similar
141      # @@displaynames = {
142      #   'el' => 'Greek',
143      #   'nb' => 'Norwegian',
144      #   'nn' => 'Norwegian',
145      #   'sh' => 'Serbian',
146      # }
147
148      def licences
149        extract_metadata unless @licences
150        @licences
151      end
152
153      def lefthyphenmin
154        extract_metadata unless @lefthyphenmin
155        @lefthyphenmin
156      end
157
158      def righthyphenmin
159        extract_metadata unless @righthyphenmin
160        @righthyphenmin
161      end
162
163      # Strictly speaking a misnomer, because grc-x-ibycus should also return true.
164      # But useful for a number of apostrophe-related routines
165      def isgreek?
166        ['grc', 'el-polyton', 'el-monoton'].include? @bcp47
167      end
168
169      def serbian?
170        @bcp47 =~ /^sh-/
171      end
172
173      def italic?
174        ['it', 'pms', 'rm'].include? @bcp47
175      end
176
177      def has_apostrophes?
178        begin
179          !isgreek? && patterns.any? { |p| p =~ /'/ }
180        rescue Errno::ENOENT
181          false
182        end
183      end
184
185      def has_hyphens?
186        begin
187          patterns.any? { |p| p =~ /-/ }
188        rescue Errno::ENOENT
189          false
190        end
191      end
192
193      def extract_apostrophes
194        plain, with_apostrophe = Array.new, nil
195
196        patterns.each do |pattern|
197          plain << pattern
198          if pattern =~ /'/ && !isgreek?
199            pattern_with_apostrophe = pattern.gsub(/'/,"’")
200            plain << pattern_with_apostrophe
201            (with_apostrophe ||= []) << pattern_with_apostrophe
202          end
203        end
204
205        { plain: plain, with_apostrophe: with_apostrophe }
206      end
207
208      def extract_characters # FIXME Turkish and others iİ; check Church Slavonic
209        characters = Array.new
210
211        characters_indexes = patterns.join.gsub(/[.0-9]/,'').unpack('U*').sort.uniq
212        characters_indexes.each do |c|
213          ch = [c].pack('U')
214          characters << ch + ch.upcase
215          characters << "’’" if ch == "'" && !isgreek?
216        end
217
218        characters
219      end
220
221      def comments_and_licence # Major TODO extract everything into YAML, and write .yml
222        @comments_and_licence ||= readtexfile.gsub(/(.*)\\patterns.*/m,'\1')
223      end
224
225      def authors
226        extract_metadata unless @authors
227        @authors
228      end
229
230      def github_link
231        sprintf 'https://github.com/hyphenation/tex-hyphen/tree/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-%s.tex', @bcp47
232      end
233
234      def <=>(other)
235        # TODO Remove when practical
236        a = self.babelname
237        b = other.babelname
238
239        if a == 'serbian' && b == 'serbianc'
240          return -1
241        elsif a == 'serbianc' && b == 'serbian'
242          return 1
243        end
244
245        self.bcp47 <=> other.bcp47
246      end
247
248      @@texfile = Hash.new
249      def readtexfile(bcp47 = @bcp47)
250        begin
251          @@texfile[bcp47] ||= File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', bcp47)))
252        rescue Errno::ENOENT
253          @@texfile[bcp47] = ""
254        end
255      end
256
257      def patterns
258        @patterns ||= if @bcp47 == 'eo' then
259          readtexfile.superstrip.
260            gsub(/.*\\patterns\s*\{(.*)\}.*/m,'\1').
261            #
262            gsub(/\\adj\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e.').
263            gsub(/\\nom\{(.*?)\}/m,'\1a. \1aj. \1ajn. \1an. \1e. \1o. \1oj. \1ojn. \1on.').
264            gsub(/\\ver\{(.*?)\}/m,'\1as. \1i. \1is. \1os. \1u. \1us.').
265            #
266            supersplit
267        else
268          readtexfile(if ['nb', 'nn'].include? @bcp47 then 'no' else @bcp47 end).superstrip.
269            gsub(/.*\\patterns\s*\{(.*?)\}.*/m,'\1').
270            supersplit
271        end
272      end
273
274      def exceptions
275        unless @exceptions
276          if readtexfile.superstrip.index('\hyphenation')
277            @exceptions = readtexfile.superstrip.gsub(/.*\\hyphenation\s*\{(.*?)\}.*/m,'\1').supersplit
278            if @exceptions != ""
279              @hyphenation = @exceptions.inject [] do |exceptions, exception|
280                exceptions << [exception.gsub('-', ''), exception]
281              end.to_h
282            else
283              @hyphenation = { }
284            end
285          else
286            @exceptions = ""
287            @hyphenation = { }
288          end
289        end
290
291        @exceptions
292      end
293
294      def hyphenate(word)
295        exceptions
296        return @hyphenation[word] if @hyphenation[word] # FIXME Better name
297
298        unless @hydra
299          begin
300            metadata = extract_metadata
301            @hydra = Hydra.new patterns, :lax, '', metadata
302          rescue InvalidMetadata
303            @hydra = Hydra.new patterns
304          end
305        end
306        @hydra.showhyphens(word)
307      end
308
309      def extract_metadata
310        header = ""
311        File.read(File.join(PATH::TEX, sprintf('hyph-%s.tex', @bcp47))).each_line do |line|
312          break if line =~ /\\patterns|#{@@eohmarker}/
313          header += line.gsub(/^% /, '').gsub(/%.*/, '')
314        end
315        begin
316          metadata = YAML::load header
317          raise InvalidMetadata unless metadata.is_a? Hash
318        rescue Psych::SyntaxError
319          raise InvalidMetadata
320        end
321
322        @name = metadata.dig('language', 'name')
323        @lefthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'left') || metadata.dig('hyphenmins', 'generation', 'left')
324        @righthyphenmin = metadata.dig('hyphenmins', 'typesetting', 'right') || metadata.dig('hyphenmins', 'generation', 'right')
325        # FIXME That’s not nice
326        if respond_to? :synonyms
327          @synonyms = metadata.dig('texlive', 'synonyms') || []
328          @encoding = metadata.dig('texlive', 'encoding')
329        end
330        @message = metadata.dig('texlive', 'message')
331        @legacy_patterns = metadata.dig('texlive', 'legacy_patterns')
332        @use_old_loader = metadata.dig('texlive', 'use_old_loader')
333        @use_old_patterns_comment = metadata.dig('texlive', 'use_old_patterns_comment')
334        @description = metadata.dig('texlive', 'description')
335        @babelname = metadata.dig('texlive', 'babelname')
336        @package = metadata.dig('texlive', 'package')
337        licences = metadata.dig('licence')
338        raise NoLicence unless licences
339        licences = [licences] unless licences.is_a? Array
340        @licences = licences.map do |licence|
341          raise bcp47 if licence.is_a? String
342          next if licence.values == [nil]
343          licence.dig('name') || 'custom'
344        end.compact
345        authors = metadata.dig('authors')
346        if authors
347          @authors = authors.map do |author|
348            author['name']
349          end
350        else
351          @authors = nil
352          raise NoAuthor.new # FIXME
353        end
354        @known_bugs = metadata.dig('known_bugs')
355
356        # raise NoAuthor unless @authors && @authors.count > 0 # TODO Later ;-) AR 2018-09-13
357
358        metadata
359      end
360    end
361  end
362end
363