1#!/usr/bin/perl 2#* 3#******************************************************************************* 4#* Copyright (C) 2006, International Business Machines 5#* Corporation and others. All Rights Reserved. 6#******************************************************************************* 7#* 8#* file name: genspva.pl 9#* encoding: US-ASCII 10#* tab size: 8 (not used) 11#* indentation:4 12#* 13#* Created by: Ram Viswanadha 14#* 15#* This file filters iso15924-utf8-<date>.txt 16#* 17 18use File::Find; 19use File::Basename; 20use IO::File; 21use Cwd; 22use File::Copy; 23use Getopt::Long; 24use File::Path; 25use File::Copy; 26 27#run the program 28main(); 29 30#--------------------------------------------------------------------- 31# The main program 32 33sub main(){ 34 GetOptions( 35 "--destdir=s" => \$destdir, 36 "--iso15924=s" => \$iso, 37 "--prop=s" => \$prop, 38 "--code-start=s" => \$code, 39 ); 40 usage() unless defined $destdir; 41 usage() unless defined $iso; 42 usage() unless defined $prop; 43 44 $outfile = "$destdir/SyntheticPropertyValueAliases.txt"; 45 $propFH = IO::File->new($prop,"r") 46 or die "could not open the file $prop for reading: $! \n"; 47 $isoFH = IO::File->new($iso,"r") 48 or die "could not open the file $iso for reading: $! \n"; 49 $outFH = IO::File->new($outfile,"w") 50 or die "could not open the file $outfile for reading: $! \n"; 51 my @propLines; 52 while (<$propFH>) { 53 next if(!($_ =~/sc ; /)); 54 push(@propLines, $_); 55 } 56 printHeader($outFH); 57 if(defined $code){ 58 print "Please add the following to UScriptCode enum in uscript.h.\n"; 59 print "#ifndef U_HIDE_DRAFT_API\n"; 60 } 61 while (<$isoFH>) { 62 next if($_=~/^#/);#skip if the line starts with a comment char 63 ($script, $t, $name, $rest) = split(/;/,$_,4); 64 #sc ; Arab 65 $outstr = "sc ; $script"; 66 $encoded = 0; #false 67 68 # seach the propLines to make sure that this scipt code is not 69 # encoded in Unicode 70 foreach $key (@propLines){ 71 if($key =~ /$outstr/){ 72 $encoded = 1; 73 } 74 } 75 next if($encoded == 1); 76 #ignore private use codes 77 next if($script =~ /Qa[ab][a-z]/); 78 79 #if($script eq "Qaaa"){ 80 # $outstr = $outstr." ; Private_Use_Start\n"; 81 #}elsif($script eq "Qabx"){ 82 # $outstr = $outstr." ; Private_Use_End\n"; 83 #}else{ 84 # $outstr = $outstr." ; $script \n"; 85 #} 86 87 $outstr = $outstr." ; $script \n"; 88 print $outFH $outstr; 89 90 #print to console 91 if(defined $code){ 92 if($name =~ /[(\s,\x80-\xFF]/){ 93 $name = $script; 94 } 95 $name =~s/-/_/g; 96 97 $scriptcode = "USCRIPT_".uc($name); 98 print " $scriptcode = $code, /* $script */\n"; 99 $code++; 100 } 101 102 } 103 if(defined $code){ 104 print "#endif /* U_HIDE_DRAFT_API */\n"; 105 } 106 for($i=0; $i<2; $i++){ 107 108 } 109 close($isoFH); 110 close($propFH); 111 close($outFH); 112} 113#----------------------------------------------------------------------- 114sub printHeader{ 115 ($outFH) = @_; 116 ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5]; 117 $YEAR += 1900; 118 #We will print our copyright here + warnings 119print $outFH <<END_HEADER_COMMENT; 120######################################################################## 121# Copyright (c) 2006-$YEAR, International Business Machines 122# Corporation and others. All Rights Reserved. 123######################################################################## 124# file name: SyntheticPropertyValueAliases.txt 125# encoding: US-ASCII 126# tab size: 8 (not used) 127# indentation: 4 128# created by: gensvpa.pl 129######################################################################## 130 131# This file follows the format of PropertyValueAliases.txt 132# It contains synthetic property value aliases not present 133# in the UCD. Unlike PropertyValueAliases.txt, it should 134# NOT contain a version number. 135 136######################################################################## 137# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW 138# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! 139######################################################################## 140 141# set the same names as short and long names to fit the syntax without 142# inventing names that we would have to support forever 143 144# Script (sc) 145 146END_HEADER_COMMENT 147} 148#----------------------------------------------------------------------- 149sub usage { 150 print << "END"; 151Usage: 152gensvpa.pl 153Options: 154 --destdir=<directory> 155 --iso15924=<file name> 156 --prop=<PropertyValueAliases.txt> 157 --code-start=s 158e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60 159END 160 exit(0); 161}