1# encoding: UTF-8 2 3module TeX 4 module Hyphen 5 class Converter 6 def self.scan_line(line) 7 m = line.match /^(0x\h{2})\tU\+(\h{4})(?:\t(1?)(?:\t(\w+)))?$/ 8 return nil unless m 9 10 return m[1].to_i(16), m[2].to_i(16).chr(Encoding::UTF_8), m[3].to_i, m[4] 11 end 12 13 def read(conversion) 14 @mapping = { } 15 File.read(conversion).each_line do |line| 16 next if line =~ /^#/ 17 18 eightbit, usv = Converter.scan_line line 19 @mapping[eightbit] = usv.chr 20 end 21 end 22 23 def convert(filename) 24 raise "Please define the encoding mapping first with #read" unless @mapping 25 doconvert = false 26 File.open(filename, external_encoding: Encoding::ASCII_8BIT).each_line do |line| 27 if line =~ /\\patterns{/ 28 doconvert = true 29 next 30 end 31 doconvert = false if doconvert && line =~ /}/ 32 33 if doconvert 34 puts (line.strip.each_byte.map do |byte| 35 @mapping[byte] 36 end || '').join 37 end 38 end 39 end 40 end 41 end 42end 43