• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env ruby
2
3$path_data             = "data"
4$filename_thai_enc     = File.join($path_data, "thai", "conv-utf8-hex.sed")
5$filename_unicode_data = File.join($path_data, "UnicodeData.txt")
6
7
8$unicode_name = Hash.new()
9$unicode_type = Hash.new()
10# 00F0;LATIN SMALL LETTER ETH;Ll;0;L;;;;;N;;Icelandic;00D0;;00D0
11# FB01;LATIN SMALL LIGATURE FI;Ll;0;L;<compat> 0066 0069;;;;N;;;;;
12# lowercase letters
13File.open($filename_unicode_data).grep /^([0-9A-F]*);[^;]*;[^;]*;.*$/ do |line|
14	unicode, name, lowercase, dummy1, dummy2, compat = line.split(/;/)
15	if unicode.hex >= 0x0E01 and unicode.hex <= 0x0E5B then
16		$unicode_name[unicode.hex] = name
17		$unicode_type[unicode.hex] = lowercase
18	end
19end
20
21enc = "lth2"
22$filename_encoding2unicode = File.join($path_data, "enc2unicode", "#{enc}.dat")
23$file_encoding2unicode = File.open($filename_encoding2unicode, "w")
24
25IO.readlines($filename_thai_enc).each do |line|
26	if line =~ /^s\/([^\/]*)\/\^{2}([^\/]*)/ then
27		letter = $1
28		code = $2.hex
29		unicode_code = letter.unpack('U')[0]
30
31		lowercase = ""
32		if $unicode_type[unicode_code] =~ /(Lo|Mn)/ then
33			lowercase = "1"
34		end
35
36		#puts sprintf("0x%02X\tU+%04X\t%s\t", code, unicode_code, lowercase)
37		$file_encoding2unicode.puts sprintf("0x%02X\tU+%04X\t%s\t", code, unicode_code, lowercase)
38		# puts sprintf("%s\t%s", $unicode_type[unicode_code], $unicode_name[unicode_code])
39	else
40		puts line
41	end
42end
43
44$file_encoding2unicode.close