1 /*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17
18 //
19 // This converts the data found at http://www.speech.cs.cmu.edu/cgi-bin/cmudict
20 // into the *.ok format used by Nuance.
21 // We use the file c0.6, which corresponds to (v. 0.6).
22 //
23 // to run: make cmu2nuance && ./cmu2nuance <c0.6 >c0.6.ok
24 //
25 // TODO: look at generation of 'L', ')', and ','
26 //
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <ctype.h>
31
32
xlate(const char * phone,const char * cmu,const char * nuance)33 static const char* xlate(const char* phone, const char* cmu, const char* nuance) {
34 int ncmu = strlen(cmu);
35 if (strncmp(phone, cmu, ncmu) || !isspace(phone[ncmu])) return NULL;
36 fputs(nuance, stdout);
37 return phone + strlen(cmu);
38 }
39
40
main(int argc,const char * argv[])41 int main(int argc, const char* argv[]) {
42 char line[200];
43
44 fputs("#LANG=EN-US\n", stdout);
45
46 for (int lineno = 1; NULL != fgets(line, sizeof(line), stdin); lineno++)
47 {
48 if (line[0] == '#') continue;
49 if (line[0] == 0) continue;
50 if (!isalnum(line[0])) {
51 fprintf(stderr, "warning: ignoring line %d - %s", lineno, line);
52 continue;
53 }
54
55 const char* p = line;
56
57 // parse name, echoing in lower case and skipping (2) suffix
58 while (!isspace(*p)) {
59 if (*p == 0) {
60 fprintf(stderr, "can't read name at line %d\n", lineno);
61 break;
62 }
63 if (p[0] == '(' && isdigit(p[1]) && p[2] == ')' && isspace(p[3])) {
64 p += 3;
65 break;
66 }
67 fputc(tolower(*p), stdout);
68 p++;
69 }
70 fputc(' ', stdout);
71
72 // loop over whitespace delimited phonemes
73 while (1) {
74 // skip leading whitespace
75 while (isspace(*p)) p++;
76 if (*p == 0) break;
77
78 const char* next = 0;
79 if (
80 (next=xlate(p, "AA1 R", ")r")) || // odd AA D
81 (next=xlate(p, "AA0", "o")) || // odd AA D
82 (next=xlate(p, "AA1", "o")) || // odd AA D
83 (next=xlate(p, "AA2", "o")) || // odd AA D
84
85 (next=xlate(p, "AE0", "a")) || // at AE T
86 (next=xlate(p, "AE1", "a")) || // at AE T
87 (next=xlate(p, "AE2", "a")) || // at AE T
88
89 // (next=xlate(p, "AH0 L", "L")) || // drops accuracy by 1%
90 (next=xlate(p, "AH0 N", "~")) || // hut HH AH T - from jean
91 (next=xlate(p, "AH0 M", "}")) || // hut HH AH T - from jean
92 (next=xlate(p, "AH0", "@")) || // hut HH AH T - from jean
93 (next=xlate(p, "AH1", "u")) || // hut HH AH T
94 (next=xlate(p, "AH2", "u")) || // hut HH AH T
95
96 (next=xlate(p, "AO0", "{")) || // ought AO T
97 (next=xlate(p, "AO1", "{")) || // ought AO T
98 (next=xlate(p, "AO2", "{")) || // ought AO T
99
100 (next=xlate(p, "AW0", "?")) || // cow K AW
101 (next=xlate(p, "AW1", "?")) || // cow K AW
102 (next=xlate(p, "AW2", "?")) || // cow K AW
103
104 (next=xlate(p, "AY0", "I")) || // hide HH AY D
105 (next=xlate(p, "AY1", "I")) || // hide HH AY D
106 (next=xlate(p, "AY2", "I")) || // hide HH AY D
107
108 (next=xlate(p, "B" , "b")) || // be B IY
109 (next=xlate(p, "CH" , "C")) || // cheese CH IY Z
110 (next=xlate(p, "D" , "d")) || // dee D IY
111 (next=xlate(p, "DH" , "D")) || // thee DH IY
112
113 (next=xlate(p, "EH1 R", ",r")) || // Ed EH D
114 (next=xlate(p, "EH0", "c")) || // Ed EH D - from jean
115 (next=xlate(p, "EH1", "e")) || // Ed EH D
116 (next=xlate(p, "EH2", "e")) || // Ed EH D
117
118 (next=xlate(p, "ER0", "P")) || // hurt HH ER T
119 (next=xlate(p, "ER1", "V")) || // hurt HH ER T
120 (next=xlate(p, "ER2", "V")) || // hurt HH ER T
121
122 (next=xlate(p, "EY0", "A")) || // ate EY T
123 (next=xlate(p, "EY1", "A")) || // ate EY T
124 (next=xlate(p, "EY2", "A")) || // ate EY T
125
126 (next=xlate(p, "F" , "f")) || // fee F IY
127 (next=xlate(p, "G" , "g")) || // green G R IY N
128 (next=xlate(p, "HH" , "h")) || // he HH IY
129
130 (next=xlate(p, "IH0", "6")) || // it IH T
131 (next=xlate(p, "IH1", "i")) || // it IH T
132 (next=xlate(p, "IH2", "i")) || // it IH T
133
134 (next=xlate(p, "IY0", "/")) || // eat IY T - from jean
135 (next=xlate(p, "IY1", "E")) || // eat IY T
136 (next=xlate(p, "IY2", "E")) || // eat IY T
137
138 (next=xlate(p, "JH" , "j")) || // gee JH IY
139 (next=xlate(p, "K" , "k")) || // key K IY
140 (next=xlate(p, "L" , "l")) || // lee L IY
141 (next=xlate(p, "M" , "m")) || // me M IY
142 (next=xlate(p, "N" , "n")) || // knee N IY
143 (next=xlate(p, "NG" , "N")) || // ping P IH NG
144
145 (next=xlate(p, "OW0", "]")) || // oat OW T
146 (next=xlate(p, "OW1", "O")) || // oat OW T
147 (next=xlate(p, "OW2", "O")) || // oat OW T
148
149 (next=xlate(p, "OY0", "<")) || // toy T OY
150 (next=xlate(p, "OY1", "<")) || // toy T OY
151 (next=xlate(p, "OY2", "<")) || // toy T OY
152
153 (next=xlate(p, "P" , "p")) || // pee P IY
154 (next=xlate(p, "R" , "r")) || // read R IY D
155 (next=xlate(p, "S" , "s")) || // sea S IY
156 (next=xlate(p, "SH" , "S")) || // she SH IY
157 (next=xlate(p, "T" , "t")) || // tea T IY
158 (next=xlate(p, "TH" , "T")) || // theta TH EY T AH
159
160 (next=xlate(p, "UH0", "q")) || // hood HH UH D
161 (next=xlate(p, "UH1", "q")) || // hood HH UH D
162 (next=xlate(p, "UH2", "q")) || // hood HH UH D
163
164 (next=xlate(p, "UW0", "U")) || // two T UW
165 (next=xlate(p, "UW1", "U")) || // two T UW
166 (next=xlate(p, "UW2", "U")) || // two T UW
167
168 (next=xlate(p, "V" , "v")) || // vee V IY
169 (next=xlate(p, "W" , "w")) || // we W IY
170 (next=xlate(p, "Y" , "y")) || // yield Y IY L D
171 (next=xlate(p, "Z" , "z")) || // zee Z IY
172 (next=xlate(p, "ZH" , "Z")) || // seizure S IY ZH ER
173 0) {
174 p = next;
175 }
176 else {
177 fprintf(stderr, "can't pronounce line %d: %s", lineno, p);
178 break;
179 }
180
181 }
182
183 fputc('\n', stdout);
184
185 }
186 }
187