1 /*---------------------------------------------------------------------------*
2 * Vocabulary.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include "plog.h"
21 #include "SR_Vocabulary.h"
22 #include "SR_VocabularyImpl.h"
23
24
SR_VocabularyCreate(ESR_Locale locale,SR_Vocabulary ** self)25 ESR_ReturnCode SR_VocabularyCreate(ESR_Locale locale, SR_Vocabulary** self)
26 {
27 SR_Vocabulary* Interface;
28 SR_VocabularyImpl* impl;
29 ESR_ReturnCode rc;
30
31 CHK(rc, SR_VocabularyCreateImpl(&Interface));
32 impl = (SR_VocabularyImpl*) Interface;
33 impl->locale = locale;
34 impl->ttp_lang = TTP_LANG(locale);
35
36 #ifdef USE_TTP
37 /* impl->ttp_lang should be set to the current language before G2P is created */
38 rc = SR_CreateG2P(Interface);
39 if (rc != ESR_SUCCESS)
40 {
41 SR_VocabularyDestroyImpl(Interface);
42 goto CLEANUP;
43 }
44 #endif
45
46 *self = Interface;
47 return ESR_SUCCESS;
48 CLEANUP:
49 return rc;
50 }
51
SR_VocabularyLoad(const LCHAR * filename,SR_Vocabulary ** self)52 ESR_ReturnCode SR_VocabularyLoad(const LCHAR* filename, SR_Vocabulary** self)
53 {
54 SR_Vocabulary* Interface;
55 ESR_ReturnCode rc;
56
57 CHK(rc, SR_VocabularyLoadImpl(filename, &Interface));
58
59 *self = Interface;
60 return ESR_SUCCESS;
61 CLEANUP:
62 return rc;
63 }
64
SR_VocabularySave(SR_Vocabulary * self,const LCHAR * filename)65 ESR_ReturnCode SR_VocabularySave(SR_Vocabulary* self, const LCHAR* filename)
66 {
67 if (self==NULL)
68 {
69 PLogError(L("ESR_INVALID_ARGUMENT"));
70 return ESR_INVALID_ARGUMENT;
71 }
72 return self->save(self, filename);
73 }
74
SR_VocabularyGetLanguage(SR_Vocabulary * self,ESR_Locale * locale)75 ESR_ReturnCode SR_VocabularyGetLanguage(SR_Vocabulary* self, ESR_Locale* locale)
76 {
77 if (self==NULL)
78 {
79 PLogError(L("ESR_INVALID_ARGUMENT"));
80 return ESR_INVALID_ARGUMENT;
81 }
82 return self->getLanguage(self, locale);
83 }
84
SR_VocabularyDestroy(SR_Vocabulary * self)85 ESR_ReturnCode SR_VocabularyDestroy(SR_Vocabulary* self)
86 {
87 if (self==NULL)
88 {
89 PLogError(L("ESR_INVALID_ARGUMENT"));
90 return ESR_INVALID_ARGUMENT;
91 }
92 return self->destroy(self);
93 }
94
SR_VocabularyGetPronunciation(SR_Vocabulary * self,const LCHAR * word,LCHAR * phoneme,size_t * len)95 ESR_ReturnCode SR_VocabularyGetPronunciation(SR_Vocabulary* self, const LCHAR* word, LCHAR* phoneme, size_t* len)
96 {
97 if (self==NULL)
98 {
99 PLogError(L("ESR_INVALID_ARGUMENT"));
100 return ESR_INVALID_ARGUMENT;
101 }
102 return self->getPronunciation(self, word, phoneme, len);
103 }
104
105 /****************************
106 * ETI to INFINITIVE Phoneme conversion stuff
107 */
108
109 static const int CH_MAX = 128;
110
getTable(ESR_Locale locale,const LCHAR * m[])111 static ESR_ReturnCode getTable(ESR_Locale locale, const LCHAR* m[])
112 {
113 int i;
114 for(i = 0; i< CH_MAX; i++) m[i] = "";
115
116 switch (locale)
117 {
118 case ESR_LOCALE_EN_US:
119 case ESR_LOCALE_EN_GB:
120 /* enu_d2f_fray_g.pht */
121 m['}']="um"; m['?']="OW"; m['~']="un"; m['@']="uh"; m['A']="EY";
122 m['C']="ch"; m['D']="dh"; m['E']="EE"; m['I']="AY"; m['J']="jnk";
123 m['L']="ul"; m['N']="ng"; m['O']="OH"; m['P']="ur"; m['S']="sh";
124 m['T']="th"; m['U']="OOH"; m['V']="UR"; m['Z']="zh"; m[']']="oh";
125 m['^']="ENV"; m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="eh";
126 m['d']="d"; m['e']="EH"; m['f']="f"; m[')']="AH"; m['g']="g";
127 m['h']="h"; m['i']="IH"; m['j']="j"; m[',']="AE"; m['k']="k";
128 m['l']="l"; m['m']="m"; m['/']="ee"; m['n']="n"; m['o']="AW";
129 m['p']="p"; m['q']="OO"; m['r']="r"; m['s']="s"; m['t']="t";
130 m['6']="ih"; m['u']="UH"; m['v']="v"; m['w']="w"; m['y']="y";
131 m['z']="z"; m['<']="OY"; m['{']="AWH";
132 break;
133 case ESR_LOCALE_FR_FR:
134 /* fra_t22_m.pht */
135 m['A']="ACI"; m[3]="OEE"; m[6]="OEN"; m['E']="EAC"; m['J']="jnk";
136 m['M']="gn"; m[16]="QQ"; m['N']="ng"; m['O']="OCI"; m[19]="AE";
137 m['S']="sh"; m['U']="UY"; m['W']="yw"; m['Y']="EN"; m['Z']="ge";
138 m[31]="OE"; m['^']="ENV"; m['#']="sil"; m['a']="AGR"; m['b']="b";
139 m['d']="d"; m['e']="ECI"; m['f']="f"; m[')']="AN"; m['g']="g";
140 m['i']="II"; m['k']="k"; m['l']="l"; m['m']="m"; m['n']="n";
141 m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s"; m['t']="t";
142 m['u']="UGR"; m['v']="v"; m['w']="w"; m['y']="y"; m['z']="z";
143 m['{']="ON";
144 break;
145
146 case ESR_LOCALE_DE_DE:
147 m['@']="utt"; m['A']="AH"; m[4]="eu"; m['C']="ich"; m[6]="EU";
148 m['E']="EH"; m['H']="ue"; m['I']="IH"; m['J']="jnk"; m['K']="ach";
149 m['N']="ng"; m['O']="OH"; m['S']="sch"; m['T']="hr"; m['U']="UH";
150 m['V']="UEH"; m['W']="wu"; m['Z']="zh"; m['[']="ott"; m['^']="ENV";
151 m['!']="att"; m['#']="sil"; m['a']="ATT"; m['b']="b"; m['c']="ett";
152 m['d']="d"; m['e']="ETT"; m['f']="f"; m['g']="g"; m['h']="h";
153 m['i']="ITT"; m['j']="j"; m[',']="AEH"; m['k']="k"; m['l']="l";
154 m['m']="m"; m['n']="n"; m['o']="OTT"; m['p']="p"; m['q']="UE";
155 m['r']="r"; m['s']="s"; m['t']="t"; m['6']="itt"; m['u']="UTT";
156 m['w']="w"; m['x']="@@"; m[':']="oe"; m['z']="z"; m['<']="OE";
157 m['{']="OEH";
158 break;
159 case ESR_LOCALE_ES_ES:
160 m['@']="uu"; m['C']="ch"; m['D']="rr"; m['E']="EY"; m['J']="jnk";
161 m['M']="ks"; m['N']="nn"; m['T']="Z"; m['[']="oo"; m['^']="ENV";
162 m['!']="aa"; m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="ee";
163 m['d']="d"; m['e']="EE"; m['f']="f"; m[')']="AU"; m['g']="g";
164 m['i']="II"; m['j']="j"; m['k']="k"; m['l']="l"; m['m']="m";
165 m['n']="n"; m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s";
166 m['6']="ii"; m['t']="t"; m['u']="UU"; m['w']="w"; m['y']="y";
167 break;
168 case ESR_LOCALE_NL_NL:
169 m['S']="S"; m['a']="a"; m['N']="nK"; m['d']="d"; m['E']="E";
170 m['2']="ep"; m['j']="j"; m['y']="y"; m['Z']="Z"; m['u']="u";
171 m['1']="AA"; m['k']="k"; m['g']="g"; m['t']="t"; m['e']="e";
172 m['J']="jnk"; m['v']="v"; m['s']="s"; m['^']="ENV"; m['b']="b";
173 m['I']="I"; m['G']="G"; m['z']="z"; m['w']="w"; m['$']="$";
174 m['r']="r"; m['x']="x"; m['h']="h"; m['f']="f"; m['i']="i";
175 m['A']="A"; m['6']="A%t"; m['O']="O"; m['n']="n"; m['3']="Ei";
176 m['#']="sil"; m['m']="m"; m['8']="O%t"; m['l']="l"; m['4']="yy";
177 m['p']="p"; m['5']="Au"; m['o']="o";
178 break;
179 case ESR_LOCALE_IT_IT:
180 m['@']="uu"; m['A']="AI"; m['C']="ci"; m['E']="EI"; m['J']="jnk";
181 m['K']="rr"; m['M']="gi"; m['N']="gn"; m['O']="OI"; m[21]="gl";
182 m['S']="sci"; m['Y']="ETT"; m['[']="oo"; m['^']="ENV"; m['!']="aa";
183 m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="ee"; m['d']="d";
184 m['e']="EE"; m['f']="f"; m[')']="AU"; m['g']="g"; m['i']="II";
185 m['j']="j"; m['k']="k"; m['l']="l"; m['m']="m"; m['n']="n";
186 m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s"; m['t']="t";
187 m['6']="ii"; m['u']="UU"; m['v']="v"; m['w']="w"; m['z']="z";
188 m['{']="OTT";
189 break;
190 case ESR_LOCALE_PT_PT:
191 m['A']="ao"; m['B']="ojn"; m['E']="eh"; m['I']="ix"; m['J']="jnk";
192 m['L']="lj"; m['N']="nj"; m['O']="on"; m['R']="rr"; m['S']="sh";
193 m['U']="un"; m['Z']="zh"; m['^']="ENV"; m['#']="sil"; m['a']="a";
194 m['b']="b"; m['c']="ew"; m['d']="d"; m['e']="e"; m['f']="f";
195 m['g']="g"; m['h']="in"; m['i']="i"; m['j']="j"; m['k']="k";
196 m['l']="l"; m['m']="m"; m['n']="n"; m['1']="aj"; m['o']="o";
197 m['p']="p"; m['2']="ajn"; m['3']="an"; m['q']="iw"; m['r']="r";
198 m['4']="aw"; m['s']="s"; m['5']="awn"; m['t']="t"; m['6']="ax";
199 m['u']="u"; m['7']="axn"; m['v']="v"; m['8']="ej"; m['w']="w";
200 m['9']="en"; m['x']="ls"; m['y']="oj"; m['z']="z";
201 break;
202 case ESR_LOCALE_JA_JP:
203 return ESR_NOT_SUPPORTED;
204 break;
205 }
206 m['#']="iwt"; m['&']="&";
207
208 return ESR_SUCCESS;
209 }
210
SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale,const LCHAR * single,LCHAR * multi,size_t max_len)211 ESR_ReturnCode SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale, const LCHAR* single, LCHAR* multi, size_t max_len)
212 {
213 const LCHAR* m[CH_MAX];
214
215 ESR_ReturnCode rc = getTable(locale, m);
216 if (rc != ESR_SUCCESS) return rc;
217
218 for (*multi='\0'; *single; ++single)
219 {
220 LSTRCAT(multi, m[(int)*single]);
221 if (*(single+1)) LSTRCAT(multi, " ");
222 }
223 return ESR_SUCCESS;
224 }
225
SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale,const LCHAR * multi,LCHAR * single)226 ESR_ReturnCode SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale, const LCHAR* multi, LCHAR* single)
227 {
228 const LCHAR* m[CH_MAX];
229 int i;
230
231 ESR_ReturnCode rc = getTable(locale, m);
232 if (rc != ESR_SUCCESS) return rc;
233
234 for (i = 0; i < CH_MAX; i++) {
235 if (!LSTRCMP(m[i], multi)) {
236 *single = (LCHAR)i;
237 return ESR_SUCCESS;
238 }
239 }
240 return ESR_NO_MATCH_ERROR;
241 }
242