1#!/usr/bin/env ruby 2 3$path_data = "data" 4$filename_thai_enc = File.join($path_data, "thai", "conv-utf8-hex.sed") 5$filename_unicode_data = File.join($path_data, "UnicodeData.txt") 6 7 8$unicode_name = Hash.new() 9$unicode_type = Hash.new() 10# 00F0;LATIN SMALL LETTER ETH;Ll;0;L;;;;;N;;Icelandic;00D0;;00D0 11# FB01;LATIN SMALL LIGATURE FI;Ll;0;L;<compat> 0066 0069;;;;N;;;;; 12# lowercase letters 13File.open($filename_unicode_data).grep /^([0-9A-F]*);[^;]*;[^;]*;.*$/ do |line| 14 unicode, name, lowercase, dummy1, dummy2, compat = line.split(/;/) 15 if unicode.hex >= 0x0E01 and unicode.hex <= 0x0E5B then 16 $unicode_name[unicode.hex] = name 17 $unicode_type[unicode.hex] = lowercase 18 end 19end 20 21enc = "lth2" 22$filename_encoding2unicode = File.join($path_data, "enc2unicode", "#{enc}.dat") 23$file_encoding2unicode = File.open($filename_encoding2unicode, "w") 24 25IO.readlines($filename_thai_enc).each do |line| 26 if line =~ /^s\/([^\/]*)\/\^{2}([^\/]*)/ then 27 letter = $1 28 code = $2.hex 29 unicode_code = letter.unpack('U')[0] 30 31 lowercase = "" 32 if $unicode_type[unicode_code] =~ /(Lo|Mn)/ then 33 lowercase = "1" 34 end 35 36 #puts sprintf("0x%02X\tU+%04X\t%s\t", code, unicode_code, lowercase) 37 $file_encoding2unicode.puts sprintf("0x%02X\tU+%04X\t%s\t", code, unicode_code, lowercase) 38 # puts sprintf("%s\t%s", $unicode_type[unicode_code], $unicode_name[unicode_code]) 39 else 40 puts line 41 end 42end 43 44$file_encoding2unicode.close