1 /*---------------------------------------------------------------------------*
2 * make_ve_grammar.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "plog.h"
26 #include "passert.h"
27 #include "duk_args.h"
28 #include "duk_err.h"
29 #include "ptrd.h"
30
31 #include "srec_arb.h"
32 #include "simapi.h"
33
34 #include "PFileSystem.h"
35 #include "PANSIFileSystem.h"
36
37 #define MAX_FILE_NAME_LEN 64
38 #define DEFAULT_WWTRIPHONE_SILMODE 3
39
40
41 /* check if the central phoneme is a word-specific phoneme; if so, do not enroll it into the ve grammar.*/
ws_verify(char * allo_phoneme)42 int ws_verify(char * allo_phoneme){
43 switch(allo_phoneme[0]){
44 case '(': return 1;
45 case '.': return 1;
46 case '0': return 1;
47 case '1': return 1;
48 case '2': return 1;
49 case '3': return 1;
50 case '4': return 1;
51 case '5': return 1;
52 case '7': return 1;
53 case '8': return 1;
54 case '9': return 1;
55 case '=': return 1;
56 case '>': return 1;
57 case 'B': return 1;
58 case 'F': return 1;
59 case 'G': return 1;
60 case 'H': return 1;
61 case 'K': return 1;
62 case 'M': return 1;
63 case 'Q': return 1;
64 case 'R': return 1;
65 case 'W': return 1;
66 case 'X': return 1;
67 case 'Y': return 1;
68 case '[': return 1;
69 case '\\': return 1;
70 case '|': return 1;
71 case '+': return 1;
72 default: return 0;
73 }
74 }
75
main(int argc,char ** argv)76 int main (int argc, char **argv)
77 {
78 int i;
79 char filen[MAX_FILE_NAME_LEN]="";
80 CA_Arbdata *ca_arbdata = NULL; /* new, link btw acc/syn */
81 char *arbfile = NULL;
82 char *base = NULL;
83
84 FILE* pfile;
85 FILE* pFile_PCLG;
86 FILE* pFile_map;
87 FILE* pFile_P;
88 FILE* pFile_Grev;
89 FILE* pFile_script;
90
91 int num_hmms;
92 int num_wd = 0;
93 int script_line = 0;
94 int cflag = 0, fnode = 0;
95 int sil_model = DEFAULT_WWTRIPHONE_SILMODE;
96 int rc;
97 srec_arbdata *allotree = NULL;
98
99 nodeID startNode = 0;
100 nodeID pauEndNode = 1;
101 nodeID modelStartNode = 2;
102 nodeID modelEndNode = 3;
103 nodeID pau2StartNode = 4;
104 nodeID pau2EndNode = 5;
105 nodeID endNode = 6;
106
107 /* initial memory */
108 CHKLOG(rc, PMemInit());
109
110 if(argc<5){
111 printf("USAGE: -swiarb <swiarb file> -base <output base name>\n");
112 exit(1);
113 }
114
115
116 for(i=1; i<argc; i++) {
117 if(!strcmp(argv[i],"-swiarb")) {
118 arbfile = argv[++i];
119 printf("using swiarb from file %s\n", arbfile);
120 }
121 else if(!strcmp(argv[i],"-base")){
122 base = argv[++i];
123 }
124 else {
125 printf("error_usage: argument [%s]\n", argv[i]);
126 exit(1);
127 }
128 }
129
130 /* check arb file exist*/
131 if ( (pfile = fopen(arbfile, "r")) != NULL ){
132 fclose(pfile);
133 }
134 else{
135 printf("ERROR: the specified swiarb file does not exist.\n");
136 exit(1);
137 }
138
139
140 ca_arbdata = CA_LoadArbdata(arbfile);
141
142 allotree = (srec_arbdata*)ca_arbdata;
143 num_hmms = allotree->num_hmms;
144
145
146 /* Dump out VE .PCLG.txt, .Grev2.det.txt, .P.txt, .script and .map files; .P.txt, .script and .map are not necessary for voice enroll, so just dump out to create .g2g file. Xufang */
147
148 printf("Dumping out VE files\n");
149
150 strcat(filen,base);
151 strcat(filen,".PCLG.txt");
152 pFile_PCLG = fopen(filen,"w");
153
154 filen[0]='\0';
155 strcat(filen,base);
156 strcat(filen,".map");
157 pFile_map = fopen(filen,"w");
158
159 filen[0]='\0';
160 strcat(filen,base);
161 strcat(filen,".P.txt");
162 pFile_P = fopen(filen,"w");
163
164 filen[0]='\0';
165 strcat(filen,base);
166 strcat(filen,".Grev2.det.txt");
167 pFile_Grev = fopen(filen,"w");
168
169 filen[0]='\0';
170 strcat(filen,base);
171 strcat(filen,".script");
172 pFile_script = fopen(filen,"w");
173
174 fprintf(pFile_Grev,"0\t1\teps\t80\n");
175 fprintf(pFile_Grev,"1\t2\t%s.grxml@VE_Words\n",base);
176
177 fprintf(pFile_map,"eps %d\n",num_wd++);
178 fprintf(pFile_map,"%s.grxml@ROOT %d\n",base,num_wd++);
179 fprintf(pFile_map,"%s.grxml@VE_Words %d\n",base,num_wd++);
180 fprintf(pFile_map,"-pau- %d\n",num_wd++);
181 fprintf(pFile_map,"-pau2- %d\n",num_wd++);
182 fprintf(pFile_map,"@VE_Words %d\n",num_wd++);
183
184 fprintf(pFile_P,"0\t1\teps\t{\t\n");
185 fprintf(pFile_P,"1\t2\teps\t{\t\n");
186 fprintf(pFile_P,"2\t3\teps\t{\t\n");
187 fprintf(pFile_P,"2\t4\teps\t{\t\n");
188 fprintf(pFile_P,"3\t5\t%s.grxml@VE_Words\t%s.grxml@VE_Words\t\n",base,base);
189 fprintf(pFile_P,"4\t8\teps\t{\t\n");
190 fprintf(pFile_P,"5\t6\teps\t_3\t\n");
191 fprintf(pFile_P,"6\t7\teps\tVE_Words}\t\n");
192 fprintf(pFile_P,"7\t9\teps\t_2\t\n");
193
194 fprintf(pFile_script,"%d type=SENT.type;meaning=SENT.V;\n",script_line++);
195 fprintf(pFile_script,"%d type='NEW';V=UTT.V;\n",script_line++);
196 fprintf(pFile_script,"%d type='OLD';V=VE_Words.V;\n",script_line++);
197 fprintf(pFile_script,"%d V=UTT.V?UTT.V:'--';\n",script_line++);
198 fprintf(pFile_script,"%d V=PHONEME.V\n",script_line++);
199
200 for(i=0;i<num_hmms;i++){
201 if(ws_verify(allotree->hmm_infos[i].name))
202 continue;
203 if(!strcmp(allotree->hmm_infos[i].name,"#")){
204 sil_model = i;
205 fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau-\n", startNode, pauEndNode, i);
206 fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pauEndNode, modelStartNode);
207 }
208 else{
209 if(strlen(allotree->hmm_infos[i].name)>0){
210 if(cflag==0){
211 fnode = i;
212 cflag = 1;
213 }
214 fprintf(pFile_PCLG,"%d\t%d\thmm%d_%s\twd_hmm%d_%s\t40\n", modelStartNode, modelEndNode,
215 i,allotree->hmm_infos[i].name,i,allotree->hmm_infos[i].name);
216 fprintf(pFile_map,"wd_hmm%d_%s %d\n",i,allotree->hmm_infos[i].name,num_wd++);
217 fprintf(pFile_Grev,"1\t3\twd_hmm%d_%s\n",i,allotree->hmm_infos[i].name);
218 fprintf(pFile_P,"8\t10\twd_hmm%d_%s\t_%d\t\n",i,allotree->hmm_infos[i].name,script_line);
219 fprintf(pFile_script,"%d V=V?V:'';V=V+'wd_hmm%d_%s';\n",script_line++,i,allotree->hmm_infos[i].name);
220 }
221 }
222 }
223
224 fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, modelStartNode);
225 fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, pau2StartNode);
226 fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau2-\n",pau2StartNode, pau2EndNode, sil_model);
227 fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pau2EndNode, endNode);
228 fprintf(pFile_PCLG,"%d\n", endNode);
229
230 fprintf(pFile_Grev,"2\n");
231 for(i=fnode;i<num_hmms;i++){
232 if(ws_verify(allotree->hmm_infos[i].name))
233 continue;
234 fprintf(pFile_Grev,"3\t3\twd_hmm%d_%s\t40\n",i,allotree->hmm_infos[i].name);
235 }
236 fprintf(pFile_Grev,"3\n");
237
238 fprintf(pFile_P,"9\t11\teps\tSENT}\t\n");
239 fprintf(pFile_P,"10\t12\teps\tPHONEME}\t\n");
240 fprintf(pFile_P,"11\t13\teps\t_0\t\n");
241 fprintf(pFile_P,"12\t14\teps\t_4\t\n");
242 fprintf(pFile_P,"13\t15\teps\tROOT}\t\n");
243 fprintf(pFile_P,"14\t16\teps\teps\t\n");
244 fprintf(pFile_P,"15\t\n");
245 fprintf(pFile_P,"16\t17\teps\tUTT}\t\n");
246 fprintf(pFile_P,"16\t8\teps\t{\t\n");
247 fprintf(pFile_P,"17\t9\teps\t_1\t\n");
248
249 fclose(pFile_PCLG);
250 printf("Creating %s.PCLG.txt...\n",base);
251 fclose(pFile_Grev);
252 printf("Creating %s.Grev2.det.txt...\n",base);
253 fclose(pFile_map);
254 printf("Creating %s.map...\n",base);
255 fclose(pFile_P);
256 printf("Creating %s.P.txt...\n",base);
257 fclose(pFile_script);
258 printf("Creating %s.script...\n",base);
259 printf("SUCCESS!\n");
260
261
262 CA_FreeArbdata( ca_arbdata);
263
264 PMemShutdown();
265 return 0;
266 CLEANUP:
267 return 1;
268 }
269
270