• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/perl
2
3#
4# Copyright (C) 2017 and later: Unicode, Inc. and others.
5# License & terms of use: http://www.unicode.org/copyright.html
6#
7
8use strict;
9use Unicode::UCD 'charinfo';
10use Unicode::Normalize;
11use utf8;
12use open ':utf8';
13
14my $printout = 0;
15my $braces = 0;
16my $colls = 0;
17my $aliased = 0;
18my $newName = "";
19my $filename;
20my $suffix;
21my $locale;
22
23NEW_FILE:
24foreach my $arg (@ARGV) {
25  if($newName =~ /^$/) {
26    $locale = $arg;
27    $locale =~ s#^.*/##g;
28    $locale =~ s/\.txt//;
29  } else {
30    $newName = "";
31  }
32  my $command = "/home/weiv/build/current/bin/uconv -x hex-any/Java -f utf8 -t utf8 $arg";
33  print $command."\n";
34  my @bundle = `$command`;
35  foreach $_ (@bundle) {
36 #while(<>) {
37   #print $ARGV if eof;
38   if(/^\/\//) {
39     next;
40   }
41   if(/collations/) {
42     print "found Collations\n";
43     $colls = 1;
44     if(/alias/) {
45       print "collations are aliased\n";
46       $aliased = 1;
47     }
48   }
49   if($aliased) {
50     print "processing aliased data: $_\n";
51     if(/\{/) {
52       print "Braces opened\n";
53       $braces = 1;
54     }
55     if($braces && /\"(.*)\"/) {
56       $newName = $1;
57       print "Aliasing to $newName\n";
58     }
59     if($braces && /\}/) {
60       $braces = 0;
61       print "Braces closed\n";
62       $aliased = 0;
63       print "Switching from $filename to $newName\n";
64       $arg =~ s/$locale\.txt$/$newName\.txt/;
65       print "$arg\n";
66       redo NEW_FILE;
67     }
68
69   }
70   if(/standard|phonebook|traditional|pinyin|stroke|direct/ && $colls) {
71     print "found $& collation\n";
72     $suffix = "_".uc($&);
73     if(/standard/) {
74       $suffix = "";
75     }
76   }
77   if(/Sequence/ && $colls) {
78     #binmode ARGV, ":utf8";
79     $printout = 1;
80     #$filename = $ARGV;
81     $filename = $locale;
82     if($suffix) {
83       $filename .= "_".$suffix;
84     }
85     $filename .= "_collation.html";
86     print "filename is $filename\n";
87     #open(OUT, ">:utf8", "$filename");
88     open(OUT, ">$filename");
89     printHeading($arg);
90     #next;
91   }
92   my $line = $_;
93   if($line =~ /\{/ && $printout) {
94     $braces++;
95   }
96   if($printout) {
97     print OUT processLine($line);
98     print OUT "\n";
99   }
100   if( $line =~ /\}/ && $printout) {
101     $braces--;
102     if($braces == 0) {
103       $printout = 0;
104       printFooting();
105       close(OUT);
106     }
107   }
108 }
109}
110
111sub processLine {
112  my $line = shift;
113  $_ = $line;
114  my $i = 0;
115  my $j = 0;
116  my $result;
117# remove comments
118  s#//.*$##g;
119# remove "Sequence" if present
120  s/Sequence\s*//;
121# remove leading brace if present
122  s/^\s*{//;
123# remove trailing brace if present
124  s/}\s*$//;
125# remove trailing quote
126  s/"\s*$//;
127#remove lead quote
128  s/^\s*"//;
129#separate options
130  s/(\[.*\])/\n\1/g;
131#separate resets
132  s/\s*\&\s*/\n\& /g;
133#separate strengths and insert spaces
134  s/\s*(<{1,4})\s*/\n\1 /g;
135#separate equals and insert spaces
136  s/\s*=\s*/\n= /g;
137
138# break into individual reset/strength/setting lines
139  my @lines = split(/\n/);
140
141  my $line;
142  my $name;
143  my $spanEnd = "";
144  my $result = "";
145  my $names = "";
146  my $codes = "";
147  my $lrm = "";
148
149  foreach $line (@lines) {
150    # skip empty lines
151    if($line =~ /^$/) {
152      next;
153    }
154    $spanEnd = "";
155    $name = "";
156    $lrm = "";
157    $line = NFC($line);
158    # for resets and strengths we will get name for elements
159    if($line =~ /<{1,4} |= |& \[.*\]|& /) {
160      $name = "<span title=\"";
161      $names = "";
162      $codes = "";
163      my $start = $&;
164      my $rest = $';
165      for ($j = 0; $j < length($rest); $j++) {
166	my $char = substr($rest, $j, 1);
167	my $charVal = ord($char);
168	# some of elements are part of the syntax, so they are
169	# entered without translation to the name
170	if($charVal == 0x002F || $charVal == 0x007C) {
171	  $name .= $codes.$names." $char ";
172	  $codes = "";
173	  $names = "";
174	} elsif($charVal == 0x0027) { #quote requires more processing
175	  #$name .= "'";
176	} else {
177	  my $charinfo = charinfo($charVal);
178	  $codes .= $charinfo->{'code'}." ";
179	  $names .= "{".$charinfo->{'name'}."} ";
180          if($charinfo->{'bidi'} eq "R" || $charinfo->{'bidi'} eq "AL") {
181            $lrm = "&lrm;";
182          }
183	  #$name .= $charinfo->{'code'}." {".$charinfo->{'name'}."} ";
184	}
185      }
186      $name .= $codes.$names."\" >";
187      $spanEnd = "</span>";
188    }
189    #print $name."\n";
190    if($line =~ /^<<<</) {
191      $line = "    $line";
192    } elsif($line =~ /^<<</) {
193      $line = "   $line";
194    } elsif($line =~ /^<</) {
195      $line = "  $line";
196    } elsif($line =~ /^</) {
197      $line = " $line";
198    } elsif($line =~ /^=/) {
199      $line = "    $line";
200    }
201    # insert spaces around vertical bars (fix prefixes)
202
203    # insert spaces around slashes (fix expansions)
204    $line =~ s#/# / #g;
205    # replace &
206    $line =~ s/\&/&amp;/g;
207    # replace spaces
208    $line =~ s/ /&nbsp;/g;
209    # replace <
210    $line =~ s/</&lt;/g;
211    # replace >
212    $line =~ s/>/&gt;/g;
213
214    #$lines[$i] = $name.$lrm.$line."</span><br>";
215    #$i++;
216    $result .=  $name.$lrm.$line.$spanEnd."<br>\n";
217  }
218
219  #$_ = join("\n", @lines);
220  return $result;
221
222}
223
224sub printHeading {
225my $filename = shift;
226$filename =~ s/\.txt//;
227print OUT <<"EndOfHeading";
228<html>
229<head>
230<meta http-equiv="content-type" content="text/html; charset=utf-8">
231</head>
232# Collation data resource bundle generated for locale: $filename<br>
233# For platform icu reference platform UCA<br><br>
234
235
236$filename&nbsp;{<br>
237&nbsp;&nbsp;CollationElements&nbsp;{<br>
238&nbsp;&nbsp;&nbsp;&nbsp;Sequence&nbsp;{<br>
239EndOfHeading
240}
241
242sub printFooting {
243print OUT <<"EndOfFooting";
244&nbsp;&nbsp;&nbsp;&nbsp;}<br>
245&nbsp;&nbsp;}<br>
246}<br>
247
248</pre>
249</html>
250EndOfFooting
251}
252