• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# encoding: UTF-8
2
3module TeX
4  module Hyphen
5    class Converter
6      def self.scan_line(line)
7        m = line.match /^(0x\h{2})\tU\+(\h{4})(?:\t(1?)(?:\t(\w+)))?$/
8        return nil unless m
9
10        return m[1].to_i(16), m[2].to_i(16).chr(Encoding::UTF_8), m[3].to_i, m[4]
11      end
12
13      def read(conversion)
14        @mapping = { }
15        File.read(conversion).each_line do |line|
16          next if line =~ /^#/
17
18          eightbit, usv = Converter.scan_line line
19          @mapping[eightbit] = usv.chr
20        end
21      end
22
23      def convert(filename)
24        raise "Please define the encoding mapping first with #read" unless @mapping
25        doconvert = false
26        File.open(filename, external_encoding: Encoding::ASCII_8BIT).each_line do |line|
27          if line =~ /\\patterns{/
28            doconvert = true
29            next
30          end
31          doconvert = false if doconvert && line =~ /}/
32
33          if doconvert
34            puts (line.strip.each_byte.map do |byte|
35              @mapping[byte]
36            end || '').join
37          end
38        end
39      end
40    end
41  end
42end
43