• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  Vocabulary.c                                                             *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "plog.h"
21 #include "SR_Vocabulary.h"
22 #include "SR_VocabularyImpl.h"
23 
24 
SR_VocabularyCreate(ESR_Locale locale,SR_Vocabulary ** self)25 ESR_ReturnCode SR_VocabularyCreate(ESR_Locale locale, SR_Vocabulary** self)
26 {
27      SR_Vocabulary* Interface;
28      SR_VocabularyImpl* impl;
29      ESR_ReturnCode rc;
30 
31      CHK(rc, SR_VocabularyCreateImpl(&Interface));
32      impl = (SR_VocabularyImpl*) Interface;
33      impl->locale = locale;
34      impl->ttp_lang = TTP_LANG(locale);
35 
36 #ifdef USE_TTP
37      /* impl->ttp_lang should be set to the current language before G2P is created */
38      rc = SR_CreateG2P(Interface);
39      if (rc != ESR_SUCCESS)
40      {
41           SR_VocabularyDestroyImpl(Interface);
42           goto CLEANUP;
43      }
44 #endif
45 
46      *self = Interface;
47      return ESR_SUCCESS;
48  CLEANUP:
49      return rc;
50 }
51 
SR_VocabularyLoad(const LCHAR * filename,SR_Vocabulary ** self)52 ESR_ReturnCode SR_VocabularyLoad(const LCHAR* filename, SR_Vocabulary** self)
53 {
54      SR_Vocabulary* Interface;
55      ESR_ReturnCode rc;
56 
57      CHK(rc, SR_VocabularyLoadImpl(filename, &Interface));
58 
59      *self = Interface;
60      return ESR_SUCCESS;
61  CLEANUP:
62      return rc;
63 }
64 
SR_VocabularySave(SR_Vocabulary * self,const LCHAR * filename)65 ESR_ReturnCode SR_VocabularySave(SR_Vocabulary* self, const LCHAR* filename)
66 {
67   if (self==NULL)
68   {
69     PLogError(L("ESR_INVALID_ARGUMENT"));
70     return ESR_INVALID_ARGUMENT;
71   }
72   return self->save(self, filename);
73 }
74 
SR_VocabularyGetLanguage(SR_Vocabulary * self,ESR_Locale * locale)75 ESR_ReturnCode SR_VocabularyGetLanguage(SR_Vocabulary* self, ESR_Locale* locale)
76 {
77   if (self==NULL)
78   {
79     PLogError(L("ESR_INVALID_ARGUMENT"));
80     return ESR_INVALID_ARGUMENT;
81   }
82   return self->getLanguage(self, locale);
83 }
84 
SR_VocabularyDestroy(SR_Vocabulary * self)85 ESR_ReturnCode SR_VocabularyDestroy(SR_Vocabulary* self)
86 {
87   if (self==NULL)
88   {
89     PLogError(L("ESR_INVALID_ARGUMENT"));
90     return ESR_INVALID_ARGUMENT;
91   }
92   return self->destroy(self);
93 }
94 
SR_VocabularyGetPronunciation(SR_Vocabulary * self,const LCHAR * word,LCHAR * phoneme,size_t * len)95 ESR_ReturnCode SR_VocabularyGetPronunciation(SR_Vocabulary* self, const LCHAR* word, LCHAR* phoneme, size_t* len)
96 {
97   if (self==NULL)
98   {
99     PLogError(L("ESR_INVALID_ARGUMENT"));
100     return ESR_INVALID_ARGUMENT;
101   }
102   return self->getPronunciation(self, word, phoneme, len);
103 }
104 
105 /****************************
106  * ETI to INFINITIVE Phoneme conversion stuff
107  */
108 
109 static const int CH_MAX = 128;
110 
getTable(ESR_Locale locale,const LCHAR * m[])111 static ESR_ReturnCode getTable(ESR_Locale locale, const LCHAR* m[])
112 {
113      int i;
114      for(i = 0; i< CH_MAX; i++) m[i] = "";
115 
116      switch (locale)
117      {
118      case ESR_LOCALE_EN_US:
119      case ESR_LOCALE_EN_GB:
120           /* enu_d2f_fray_g.pht */
121           m['}']="um";  m['?']="OW";  m['~']="un";  m['@']="uh";  m['A']="EY";
122           m['C']="ch";  m['D']="dh";  m['E']="EE";  m['I']="AY";  m['J']="jnk";
123           m['L']="ul";  m['N']="ng";  m['O']="OH";  m['P']="ur";  m['S']="sh";
124           m['T']="th";  m['U']="OOH"; m['V']="UR";  m['Z']="zh";  m[']']="oh";
125           m['^']="ENV"; m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="eh";
126           m['d']="d";   m['e']="EH";  m['f']="f";   m[')']="AH";  m['g']="g";
127           m['h']="h";   m['i']="IH";  m['j']="j";   m[',']="AE";  m['k']="k";
128           m['l']="l";   m['m']="m";   m['/']="ee";  m['n']="n";   m['o']="AW";
129           m['p']="p";   m['q']="OO";  m['r']="r";   m['s']="s";   m['t']="t";
130           m['6']="ih";  m['u']="UH";  m['v']="v";   m['w']="w";   m['y']="y";
131           m['z']="z";   m['<']="OY";  m['{']="AWH";
132           break;
133      case ESR_LOCALE_FR_FR:
134           /* fra_t22_m.pht */
135           m['A']="ACI"; m[3]="OEE";   m[6]="OEN";   m['E']="EAC"; m['J']="jnk";
136           m['M']="gn";  m[16]="QQ";   m['N']="ng";  m['O']="OCI"; m[19]="AE";
137           m['S']="sh";  m['U']="UY";  m['W']="yw";  m['Y']="EN";  m['Z']="ge";
138           m[31]="OE";   m['^']="ENV"; m['#']="sil"; m['a']="AGR"; m['b']="b";
139           m['d']="d";   m['e']="ECI"; m['f']="f";   m[')']="AN";  m['g']="g";
140           m['i']="II";  m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
141           m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
142           m['u']="UGR"; m['v']="v";   m['w']="w";   m['y']="y";   m['z']="z";
143           m['{']="ON";
144           break;
145 
146      case ESR_LOCALE_DE_DE:
147           m['@']="utt"; m['A']="AH";  m[4]="eu";    m['C']="ich"; m[6]="EU";
148           m['E']="EH";  m['H']="ue";  m['I']="IH";  m['J']="jnk"; m['K']="ach";
149           m['N']="ng";  m['O']="OH";  m['S']="sch"; m['T']="hr";  m['U']="UH";
150           m['V']="UEH"; m['W']="wu";  m['Z']="zh";  m['[']="ott"; m['^']="ENV";
151           m['!']="att"; m['#']="sil"; m['a']="ATT"; m['b']="b";   m['c']="ett";
152           m['d']="d";   m['e']="ETT"; m['f']="f";   m['g']="g";   m['h']="h";
153           m['i']="ITT"; m['j']="j";   m[',']="AEH"; m['k']="k";   m['l']="l";
154           m['m']="m";   m['n']="n";   m['o']="OTT"; m['p']="p";   m['q']="UE";
155           m['r']="r";   m['s']="s";   m['t']="t";   m['6']="itt"; m['u']="UTT";
156           m['w']="w";   m['x']="@@";  m[':']="oe";  m['z']="z";   m['<']="OE";
157           m['{']="OEH";
158           break;
159      case ESR_LOCALE_ES_ES:
160           m['@']="uu";  m['C']="ch";  m['D']="rr";  m['E']="EY";  m['J']="jnk";
161           m['M']="ks";  m['N']="nn";  m['T']="Z";   m['[']="oo";  m['^']="ENV";
162           m['!']="aa";  m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";
163           m['d']="d";   m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";
164           m['i']="II";  m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";
165           m['n']="n";   m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";
166           m['6']="ii";  m['t']="t";   m['u']="UU";  m['w']="w";   m['y']="y";
167           break;
168      case ESR_LOCALE_NL_NL:
169           m['S']="S";   m['a']="a";   m['N']="nK";  m['d']="d";   m['E']="E";
170           m['2']="ep";  m['j']="j";   m['y']="y";   m['Z']="Z";   m['u']="u";
171           m['1']="AA";  m['k']="k";   m['g']="g";   m['t']="t";   m['e']="e";
172           m['J']="jnk"; m['v']="v";   m['s']="s";   m['^']="ENV"; m['b']="b";
173           m['I']="I";   m['G']="G";   m['z']="z";   m['w']="w";   m['$']="$";
174           m['r']="r";   m['x']="x";   m['h']="h";   m['f']="f";   m['i']="i";
175           m['A']="A";   m['6']="A%t"; m['O']="O";   m['n']="n";   m['3']="Ei";
176           m['#']="sil"; m['m']="m";   m['8']="O%t"; m['l']="l";   m['4']="yy";
177           m['p']="p";   m['5']="Au";  m['o']="o";
178           break;
179      case ESR_LOCALE_IT_IT:
180           m['@']="uu";  m['A']="AI";  m['C']="ci";  m['E']="EI";  m['J']="jnk";
181           m['K']="rr";  m['M']="gi";  m['N']="gn";  m['O']="OI";  m[21]="gl";
182           m['S']="sci"; m['Y']="ETT"; m['[']="oo";  m['^']="ENV"; m['!']="aa";
183           m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";  m['d']="d";
184           m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";   m['i']="II";
185           m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
186           m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
187           m['6']="ii";  m['u']="UU";  m['v']="v";   m['w']="w";   m['z']="z";
188           m['{']="OTT";
189           break;
190      case ESR_LOCALE_PT_PT:
191           m['A']="ao";  m['B']="ojn"; m['E']="eh";  m['I']="ix";  m['J']="jnk";
192           m['L']="lj";  m['N']="nj";  m['O']="on";  m['R']="rr";  m['S']="sh";
193           m['U']="un";  m['Z']="zh";  m['^']="ENV"; m['#']="sil"; m['a']="a";
194           m['b']="b";   m['c']="ew";  m['d']="d";   m['e']="e";   m['f']="f";
195           m['g']="g";   m['h']="in";  m['i']="i";   m['j']="j";   m['k']="k";
196           m['l']="l";   m['m']="m";   m['n']="n";   m['1']="aj";  m['o']="o";
197           m['p']="p";   m['2']="ajn"; m['3']="an";  m['q']="iw";  m['r']="r";
198           m['4']="aw";  m['s']="s";   m['5']="awn"; m['t']="t";   m['6']="ax";
199           m['u']="u";   m['7']="axn"; m['v']="v";   m['8']="ej";  m['w']="w";
200           m['9']="en";  m['x']="ls";  m['y']="oj";  m['z']="z";
201           break;
202      case ESR_LOCALE_JA_JP:
203           return ESR_NOT_SUPPORTED;
204           break;
205      }
206      m['#']="iwt"; m['&']="&";
207 
208      return ESR_SUCCESS;
209 }
210 
SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale,const LCHAR * single,LCHAR * multi,size_t max_len)211 ESR_ReturnCode SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale, const LCHAR* single, LCHAR* multi, size_t max_len)
212 {
213     const LCHAR* m[CH_MAX];
214 
215     ESR_ReturnCode rc = getTable(locale, m);
216     if (rc != ESR_SUCCESS) return rc;
217 
218     for (*multi='\0'; *single; ++single)
219     {
220         LSTRCAT(multi, m[(int)*single]);
221         if (*(single+1)) LSTRCAT(multi, " ");
222     }
223     return ESR_SUCCESS;
224 }
225 
SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale,const LCHAR * multi,LCHAR * single)226 ESR_ReturnCode SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale, const LCHAR* multi, LCHAR* single)
227 {
228     const LCHAR* m[CH_MAX];
229     int i;
230 
231     ESR_ReturnCode rc = getTable(locale, m);
232     if (rc != ESR_SUCCESS) return rc;
233 
234     for (i = 0; i < CH_MAX; i++) {
235         if (!LSTRCMP(m[i], multi)) {
236             *single = (LCHAR)i;
237             return ESR_SUCCESS;
238         }
239     }
240     return ESR_NO_MATCH_ERROR;
241 }
242