1 2use Getopt::Long; 3 4$rc = GetOptions("pht=s" => \$phtfile, 5 "i=s" => \$oklongfile, 6 "ok=s" => \$okfile); 7 8load_phtfile($phtfile) if(defined $phtfile); 9 10sub load_phtfile 11{ 12 my $phtfile = shift(@_); 13 open(PHT, "<$phtfile") || die "error opening phtfile $phtfile\n"; 14 print STDERR "using phtfile $phtfile\n"; 15 <PHT>; # header 16 while(<PHT>) { 17 s/\s+$//g; 18 ($trash,$lph,$sph,$num_states) = split(/\s+/); 19 $lph_for_sph{$sph} = $lph; 20 $sph_for_lph{$lph} = $sph; 21 $lphhash{$lph}++; 22 } 23 close(PHT); 24 $sph = $lph = "&"; 25 $lph_for_sph{$sph} = $lph; 26 $sph_for_lph{$lph} = $sph; 27 $lphhash{$lph}++; 28 $sph = "#"; $lph = "iwt"; 29 $lph_for_sph{$sph} = $lph; 30 $sph_for_lph{$lph} = $sph; 31 $lphhash{$lph}++; 32} 33 34open(HH, "<$oklongfile") || die "error opening okfile $oklongfile\n"; 35open(OO, ">$okfile") || die "error opening output dict $okfile\n"; 36while(<HH>) { 37 s/\s+$//; 38 if(/^LANG\s*=\s*(\S+)/) { # LANG = EN-US 39 my $language = lc($1); 40 my $language_header_line = $_; 41 $language =~ s/\-/\./g; 42 if(!defined $phtfile) { 43 die "Error: ESRSDK is not defined\n" if(!defined $ENV{ESRSDK}); 44 $phtfile = "$ENV{ESRSDK}/config/$language/models/generic.pht"; 45 load_phtfile( $phtfile); 46 } 47 print OO "$language_header_line\n"; 48 next; 49 } 50 s/\s+$//; 51 s/^\s+//; 52 if(/\#\#/) { 53 next if($skip_funnies); 54 s/\#\#.*$//; 55 } 56 ($word, $pron) = split(/\s*\t\s*/, $_, 2); 57 58 @lphlist = split(/\s+/, $pron); 59 @sphlist = (); 60 foreach $lph (@lphlist) { 61 die "error: unknown lph $lph in $word\n" if(!defined $sph_for_lph{$lph}); 62 push(@sphlist, $sph_for_lph{$lph}); 63 } 64 $sphPron = join("",@sphlist); 65 print OO "$word $sphPron\n"; 66} 67 68close(HH); 69close(OO); 70 71