1 /* com_svox_picottsengine.cpp
2
3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This is the Manager layer. It sits on top of the native Pico engine
18 * and provides the interface to the defined Google TTS engine API.
19 * The Google engine API is the boundary to allow a TTS engine to be swapped.
20 * The Manager layer also provide the SSML tag interpretation.
21 * The supported SSML tags are mapped to corresponding tags natively supported by Pico.
22 * Native Pico functions always begin with picoXXX.
23 *
24 * In the Pico engine, the language cannot be changed indpendently of the voice.
25 * If either the voice or locale/language are changed, a new resource is loaded.
26 *
27 * Only a subset of SSML 1.0 tags are supported.
28 * Some SSML tags involve significant complexity.
29 * If the language is changed through an SSML tag, there is a latency for the load.
30 *
31 */
32 //#define LOG_NDEBUG 0
33
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37
38 #define LOG_TAG "SVOX Pico Engine"
39
40 #include <utils/Log.h>
41 #include <utils/String16.h> /* for strlen16 */
42 #include <TtsEngine.h>
43
44 #include <cutils/jstring.h>
45 #include <picoapi.h>
46 #include <picodefs.h>
47
48 #include "svox_ssml_parser.h"
49
50 using namespace android;
51
52 /* adaptation layer defines */
53 #define PICO_MEM_SIZE 2500000
54 /* speaking rate */
55 #define PICO_MIN_RATE 20
56 #define PICO_MAX_RATE 500
57 #define PICO_DEF_RATE 100
58 /* speaking pitch */
59 #define PICO_MIN_PITCH 50
60 #define PICO_MAX_PITCH 200
61 #define PICO_DEF_PITCH 100
62 /* speaking volume */
63 #define PICO_MIN_VOLUME 0
64 #define PICO_MAX_VOLUME 500
65 #define PICO_DEF_VOLUME 100
66
67 /* string constants */
68 #define MAX_OUTBUF_SIZE 128
69 const char * PICO_SYSTEM_LINGWARE_PATH = "/system/tts/lang_pico/";
70 const char * PICO_LINGWARE_PATH = "/sdcard/svox/";
71 const char * PICO_VOICE_NAME = "PicoVoice";
72 const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>";
73 const char * PICO_SPEED_CLOSE_TAG = "</speed>";
74 const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>";
75 const char * PICO_PITCH_CLOSE_TAG = "</pitch>";
76 const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>";
77 const char * PICO_VOLUME_CLOSE_TAG = "</volume>";
78 const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='";
79 const char * PICO_PHONEME_CLOSE_TAG = "'/>";
80
81 /* supported voices
82 Pico does not seperately specify the voice and locale. */
83 const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
84 const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
85 const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
86 const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
87 const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
88 const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
89 const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
90 const int picoNumSupportedVocs = 6;
91
92 /* supported properties */
93 const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" };
94 const int picoNumSupportedProperties = 4;
95
96
97 /* adapation layer global variables */
98 synthDoneCB_t * picoSynthDoneCBPtr;
99 void * picoMemArea = NULL;
100 pico_System picoSystem = NULL;
101 pico_Resource picoTaResource = NULL;
102 pico_Resource picoSgResource = NULL;
103 pico_Resource picoUtppResource = NULL;
104 pico_Engine picoEngine = NULL;
105 pico_Char * picoTaFileName = NULL;
106 pico_Char * picoSgFileName = NULL;
107 pico_Char * picoUtppFileName = NULL;
108 pico_Char * picoTaResourceName = NULL;
109 pico_Char * picoSgResourceName = NULL;
110 pico_Char * picoUtppResourceName = NULL;
111 int picoSynthAbort = 0;
112 char * picoProp_currLang = NULL; /* current language */
113 int picoProp_currRate = PICO_DEF_RATE; /* current rate */
114 int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */
115 int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */
116
117 int picoCurrentLangIndex = -1;
118
119 char * pico_alt_lingware_path = NULL;
120
121
122 /* internal helper functions */
123
124 /** checkForLocale
125 * Check whether the requested locale is among the supported locales.
126 * @locale - the locale to check, either in xx or xx-YY format
127 * return index of the locale, or -1 if not supported.
128 */
checkForLocale(const char * locale)129 static int checkForLocale( const char * locale )
130 {
131 int found = -1; /* language not found */
132 int i;
133 if (locale == NULL) {
134 ALOGE("checkForLocale called with NULL language");
135 return found;
136 }
137
138 /* Verify that the requested locale is a locale that we support. */
139 for (i = 0; i < picoNumSupportedVocs; i ++) {
140 if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */
141 found = i;
142 break;
143 }
144 };
145
146 /* The exact locale was not found. */
147 if (found < 0) {
148 /* We didn't find an exact match; it may have been specified with only the first 2 characters.
149 This could overmatch ISO 639-3 language codes.%% */
150
151 /* check whether the current language matches the locale's language */
152 if ((picoCurrentLangIndex > -1) &&
153 (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) {
154 /* the current language matches the requested language, let's use it */
155 found = picoCurrentLangIndex;
156 } else {
157 /* check whether we can find a match at least on the language */
158 for (i = 0; i < picoNumSupportedVocs; i ++) {
159 if (strncmp(locale, picoSupportedLang[i], 2) == 0) {
160 found = i;
161 break;
162 }
163 }
164 }
165
166 if (found < 0) {
167 ALOGE("TtsEngine::set language called with unsupported locale %s", locale);
168 }
169 };
170 return found;
171 }
172
173
174 /** cleanResources
175 * Unloads any loaded Pico resources.
176 */
cleanResources(void)177 static void cleanResources( void )
178 {
179 if (picoEngine) {
180 pico_disposeEngine( picoSystem, &picoEngine );
181 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
182 picoEngine = NULL;
183 }
184 if (picoUtppResource) {
185 pico_unloadResource( picoSystem, &picoUtppResource );
186 picoUtppResource = NULL;
187 }
188 if (picoTaResource) {
189 pico_unloadResource( picoSystem, &picoTaResource );
190 picoTaResource = NULL;
191 }
192 if (picoSgResource) {
193 pico_unloadResource( picoSystem, &picoSgResource );
194 picoSgResource = NULL;
195 }
196
197 if (picoSystem) {
198 pico_terminate(&picoSystem);
199 picoSystem = NULL;
200 }
201 picoCurrentLangIndex = -1;
202 }
203
204
205 /** cleanFiles
206 * Frees any memory allocated for file and resource strings.
207 */
cleanFiles(void)208 static void cleanFiles( void )
209 {
210 if (picoProp_currLang) {
211 free( picoProp_currLang );
212 picoProp_currLang = NULL;
213 }
214
215 if (picoTaFileName) {
216 free( picoTaFileName );
217 picoTaFileName = NULL;
218 }
219
220 if (picoSgFileName) {
221 free( picoSgFileName );
222 picoSgFileName = NULL;
223 }
224
225 if (picoUtppFileName) {
226 free( picoUtppFileName );
227 picoUtppFileName = NULL;
228 }
229
230 if (picoTaResourceName) {
231 free( picoTaResourceName );
232 picoTaResourceName = NULL;
233 }
234
235 if (picoSgResourceName) {
236 free( picoSgResourceName );
237 picoSgResourceName = NULL;
238 }
239
240 if (picoUtppResourceName) {
241 free( picoUtppResourceName );
242 picoUtppResourceName = NULL;
243 }
244 }
245
246 /** hasResourcesForLanguage
247 * Check to see if the resources required to load the language at the specified index
248 * are properly installed
249 * @langIndex - the index of the language to check the resources for. The index is valid.
250 * return true if the required resources are installed, false otherwise
251 */
hasResourcesForLanguage(int langIndex)252 static bool hasResourcesForLanguage(int langIndex) {
253 FILE * pFile;
254 char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
255
256 /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */
257 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
258 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
259 pFile = fopen(fileName, "r");
260 if (pFile != NULL) {
261 /* "ta" file found. */
262 fclose (pFile);
263 /* now look for "sg" file. */
264 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
265 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
266 pFile = fopen(fileName, "r");
267 if (pFile != NULL) {
268 /* "sg" file found, no need to continue checking, return success. */
269 fclose(pFile);
270 free(fileName);
271 return true;
272 }
273 }
274
275 /* resources not found on system, check resources on alternative location */
276 /* (under pico_alt_lingware_path). */
277 strcpy((char*)fileName, pico_alt_lingware_path);
278 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
279 pFile = fopen(fileName, "r");
280 if (pFile == NULL) {
281 free(fileName);
282 return false;
283 } else {
284 fclose (pFile);
285 }
286
287 strcpy((char*)fileName, pico_alt_lingware_path);
288 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
289 pFile = fopen(fileName, "r");
290 if (pFile == NULL) {
291 free(fileName);
292 return false;
293 } else {
294 fclose(pFile);
295 free(fileName);
296 return true;
297 }
298 }
299
300 /** doLanguageSwitchFromLangIndex
301 * Switch to the requested locale.
302 * If the locale is already loaded, it returns immediately.
303 * If another locale is already is loaded, it will first be unloaded and the new one then loaded.
304 * If no locale is loaded, the requested locale will be loaded.
305 * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported.
306 * return TTS_SUCCESS or TTS_FAILURE
307 */
doLanguageSwitchFromLangIndex(int langIndex)308 static tts_result doLanguageSwitchFromLangIndex( int langIndex )
309 {
310 int ret; /* function result code */
311
312 if (langIndex>=0) {
313 /* If we already have a loaded locale, check whether it is the same one as requested. */
314 if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) {
315 //ALOGI("Language already loaded (%s == %s)", picoProp_currLang,
316 // picoSupportedLang[langIndex]);
317 return TTS_SUCCESS;
318 }
319 }
320
321 /* It is not the same locale; unload the current one first. Also invalidates the system object*/
322 cleanResources();
323
324 /* Allocate memory for file and resource names. */
325 cleanFiles();
326
327 if (picoSystem==NULL) {
328 /*re-init system object*/
329 ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
330 if (PICO_OK != ret) {
331 ALOGE("Failed to initialize the pico system object\n");
332 return TTS_FAILURE;
333 }
334 }
335
336 picoProp_currLang = (char *) malloc( 10 );
337 picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
338 picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
339 picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
340 picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
341 picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
342 picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
343
344 if (
345 (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) ||
346 (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) ||
347 (picoUtppResourceName==NULL)
348 ) {
349 ALOGE("Failed to allocate memory for internal strings\n");
350 cleanResources();
351 return TTS_FAILURE;
352 }
353
354 /* Find where to load the resource files from: system or alternative location */
355 /* based on availability of the Ta file. Try the alternative location first, this is where */
356 /* more recent language file updates would be installed (under pico_alt_lingware_path). */
357 bool bUseSystemPath = true;
358 FILE * pFile;
359 char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
360 strcpy((char*)tmpFileName, pico_alt_lingware_path);
361 strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]);
362 pFile = fopen(tmpFileName, "r");
363 if (pFile != NULL) {
364 /* "ta" file found under pico_alt_lingware_path, don't use the system path. */
365 fclose (pFile);
366 bUseSystemPath = false;
367 }
368 free(tmpFileName);
369
370 /* Set the path and file names for resource files. */
371 if (bUseSystemPath) {
372 strcpy((char *) picoTaFileName, PICO_SYSTEM_LINGWARE_PATH);
373 strcpy((char *) picoSgFileName, PICO_SYSTEM_LINGWARE_PATH);
374 strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH);
375 } else {
376 strcpy((char *) picoTaFileName, pico_alt_lingware_path);
377 strcpy((char *) picoSgFileName, pico_alt_lingware_path);
378 strcpy((char *) picoUtppFileName, pico_alt_lingware_path);
379 }
380 strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]);
381 strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]);
382 strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
383
384 /* Load the text analysis Lingware resource file. */
385 ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource );
386 if (PICO_OK != ret) {
387 ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret);
388 cleanResources();
389 cleanFiles();
390 return TTS_FAILURE;
391 }
392
393 /* Load the signal generation Lingware resource file. */
394 ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource );
395 if (PICO_OK != ret) {
396 ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret);
397 cleanResources();
398 cleanFiles();
399 return TTS_FAILURE;
400 }
401
402 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
403 and is currently not used. Loading is only attempted for future compatibility.
404 If this file is not present the loading will still succeed. */
405 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
406 if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) {
407 ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret);
408 cleanResources();
409 cleanFiles();
410 return TTS_FAILURE;
411 }
412
413 /* Get the text analysis resource name. */
414 ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName );
415 if (PICO_OK != ret) {
416 ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret);
417 cleanResources();
418 cleanFiles();
419 return TTS_FAILURE;
420 }
421
422 /* Get the signal generation resource name. */
423 ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName );
424 if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
425 /* Get utpp resource name - optional: see note above. */
426 ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName );
427 if (PICO_OK != ret) {
428 ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret);
429 cleanResources();
430 cleanFiles();
431 return TTS_FAILURE;
432 }
433 }
434 if (PICO_OK != ret) {
435 ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret);
436 cleanResources();
437 cleanFiles();
438 return TTS_FAILURE;
439 }
440
441 /* Create a voice definition. */
442 ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME );
443 if (PICO_OK != ret) {
444 ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret);
445 cleanResources();
446 cleanFiles();
447 return TTS_FAILURE;
448 }
449
450 /* Add the text analysis resource to the voice. */
451 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName );
452 if (PICO_OK != ret) {
453 ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
454 cleanResources();
455 cleanFiles();
456 return TTS_FAILURE;
457 }
458
459 /* Add the signal generation resource to the voice. */
460 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName );
461 if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
462 /* Add utpp resource to voice - optional: see note above. */
463 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName );
464 if (PICO_OK != ret) {
465 ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
466 cleanResources();
467 cleanFiles();
468 return TTS_FAILURE;
469 }
470 }
471
472 if (PICO_OK != ret) {
473 ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
474 cleanResources();
475 cleanFiles();
476 return TTS_FAILURE;
477 }
478
479 ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine );
480 if (PICO_OK != ret) {
481 ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret);
482 cleanResources();
483 cleanFiles();
484 return TTS_FAILURE;
485 }
486
487 /* Set the current locale/voice. */
488 strcpy( picoProp_currLang, picoSupportedLang[langIndex] );
489 picoCurrentLangIndex = langIndex;
490 ALOGI("loaded %s successfully", picoProp_currLang);
491 return TTS_SUCCESS;
492 }
493
494
495 /** doLanguageSwitch
496 * Switch to the requested locale.
497 * If this locale is already loaded, it returns immediately.
498 * If another locale is already loaded, this will first be unloaded
499 * and the new one then loaded.
500 * If no locale is loaded, the requested will be loaded.
501 * @locale - the locale to check, either in xx or xx-YY format (i.e "en" or "en-US")
502 * return TTS_SUCCESS or TTS_FAILURE
503 */
doLanguageSwitch(const char * locale)504 static tts_result doLanguageSwitch( const char * locale )
505 {
506 int loclIndex; /* locale index */
507
508 /* Load the new locale. */
509 loclIndex = checkForLocale( locale );
510 if (loclIndex < 0) {
511 ALOGE("Tried to swith to non-supported locale %s", locale);
512 return TTS_FAILURE;
513 }
514 //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]);
515 return doLanguageSwitchFromLangIndex( loclIndex );
516 }
517
518
519 /** doAddProperties
520 * Add <speed>, <pitch> and <volume> tags to the text,
521 * if the properties have been set to non-default values, and return the new string.
522 * The calling function is responsible for freeing the returned string.
523 * @str - text to apply tags to
524 * return new string with tags applied
525 */
doAddProperties(const char * str)526 static char * doAddProperties( const char * str )
527 {
528 char * data = NULL;
529 int haspitch, hasspeed, hasvol; /* parameters */
530 int textlen; /* property string length */
531 haspitch = 0; hasspeed = 0; hasvol = 0;
532 textlen = strlen(str) + 1;
533 if (picoProp_currPitch != PICO_DEF_PITCH) { /* non-default pitch */
534 textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
535 textlen += strlen(PICO_PITCH_CLOSE_TAG);
536 haspitch = 1;
537 }
538 if (picoProp_currRate != PICO_DEF_RATE) { /* non-default rate */
539 textlen += strlen(PICO_SPEED_OPEN_TAG) + 5;
540 textlen += strlen(PICO_SPEED_CLOSE_TAG);
541 hasspeed = 1;
542 }
543
544 if (picoProp_currVolume != PICO_DEF_VOLUME) { /* non-default volume */
545 textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5;
546 textlen += strlen(PICO_VOLUME_CLOSE_TAG);
547 hasvol = 1;
548 }
549
550 /* Compose the property strings. */
551 data = (char *) malloc( textlen ); /* allocate string */
552 if (!data) {
553 return NULL;
554 }
555 memset(data, 0, textlen); /* clear it */
556 if (haspitch) {
557 char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5);
558 sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch);
559 strcat(data, tmp);
560 free(tmp);
561 }
562
563 if (hasspeed) {
564 char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5);
565 sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate);
566 strcat(data, tmp);
567 free(tmp);
568 }
569
570 if (hasvol) {
571 char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5);
572 sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume);
573 strcat(data, tmp);
574 free(tmp);
575 }
576
577 strcat(data, str);
578 if (hasvol) {
579 strcat(data, PICO_VOLUME_CLOSE_TAG);
580 }
581
582 if (hasspeed) {
583 strcat(data, PICO_SPEED_CLOSE_TAG);
584 }
585
586 if (haspitch) {
587 strcat(data, PICO_PITCH_CLOSE_TAG);
588 }
589 return data;
590 }
591
592
593 /** get_tok
594 * Searches for tokens in a string
595 * @str - text to be processed
596 * @pos - position of first character to be searched in str
597 * @textlen - postion of last character to be searched
598 * @tokstart - address of a variable to receive the start of the token found
599 * @tokstart - address of a variable to receive the length of the token found
600 * return : 1=token found, 0=token not found
601 * notes : the token separator set could be enlarged adding characters in "seps"
602 */
get_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)603 static int get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen)
604 {
605 const char * seps = " ";
606
607 /*look for start*/
608 while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) {
609 pos++;
610 }
611 if (pos == textlen) {
612 /*no characters != seps found whithin string*/
613 return 0;
614 }
615 *tokstart = pos;
616 /*look for end*/
617 while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) {
618 pos++;
619 }
620 *toklen = pos - *tokstart;
621 return 1;
622 }/*get_tok*/
623
624
625 /** get_sub_tok
626 * Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy"
627 * @str - text to be processed
628 * @pos - position of first character to be searched in str
629 * @textlen - postion of last character to be searched in str
630 * @tokstart - address of a variable to receive the start of the sub token found
631 * @tokstart - address of a variable to receive the length of the sub token found
632 * return : 1=sub token found, 0=sub token not found
633 * notes : the sub token separator set could be enlarged adding characters in "seps"
634 */
get_sub_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)635 static int get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) {
636
637 const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
638
639 if (pos == textlen) {
640 return 0;
641 }
642
643 /*first char != space*/
644 *tokstart = pos;
645 /*finding first non separator*/
646 while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) {
647 pos++;
648 }
649 if (pos == textlen) {
650 /*characters all in seps found whithin string : return full token*/
651 *toklen = pos - *tokstart;
652 return 1;
653 }
654 /*pos should be pointing to first non seps and more chars are there*/
655 /*finding first separator*/
656 while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) {
657 pos++;
658 }
659 if (pos == textlen) {
660 /*transition non seps->seps not found : return full token*/
661 *toklen = pos - *tokstart;
662 return 1;
663 }
664 *toklen = pos - *tokstart;
665 return 1;
666 }/*get_sub_tok*/
667
668
669 /** doCamelCase
670 * Searches for tokens having a compound structure with camel case and transforms them as follows :
671 * "XxxxYyyy" -->> "Xxxx Yyyy",
672 * "xxxYyyy" -->> "xxx Yyyy",
673 * "XXXYyyy" -->> "XXXYyyy"
674 * etc....
675 * The calling function is responsible for freeing the returned string.
676 * @str - text to be processed
677 * return new string with text processed
678 */
doCamelCase(const char * str)679 static char * doCamelCase( const char * str )
680 {
681 int textlen; /* input string length */
682 int totlen; /* output string length */
683 int tlen_2, nsubtok; /* nuber of subtokens */
684 int toklen, tokstart; /*legnth and start of generic token*/
685 int stoklen, stokstart; /*legnth and start of generic sub-token*/
686 int pos, tokpos, outpos; /*postion of current char in input string and token and output*/
687 char *data; /*pointer of the returned string*/
688
689 pos = 0;
690 tokpos = 0;
691 toklen = 0;
692 stoklen = 0;
693 tlen_2 = 0;
694 totlen = 0;
695
696 textlen = strlen(str) + 1;
697
698 /*counting characters after sub token splitting including spaces*/
699 //while ((pos<textlen) && (str[pos]!=0)) {
700 while (get_tok(str, pos, textlen, &tokstart, &toklen)) {
701 tokpos = tokstart;
702 tlen_2 = 0;
703 nsubtok = 0;
704 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
705 totlen += stoklen;
706 tlen_2 += stoklen;
707 tokpos = stokstart + stoklen;
708 nsubtok += 1;
709 }
710 totlen += nsubtok; /*add spaces between subtokens*/
711 pos = tokstart + tlen_2;
712 }
713 //}
714 /* Allocate the return string */
715
716 data = (char *) malloc( totlen ); /* allocate string */
717 if (!data) {
718 return NULL;
719 }
720 memset(data, 0, totlen); /* clear it */
721 outpos = 0;
722 pos = 0;
723 /*copying characters*/
724 //while ((pos<textlen) && (str[pos]!=0)) {
725 while (get_tok (str, pos, textlen, &tokstart, &toklen)) {
726 tokpos = tokstart;
727 tlen_2 = 0;
728 nsubtok = 0;
729 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
730 strncpy(&(data[outpos]), &(str[stokstart]), stoklen);
731 outpos += stoklen;
732 strncpy(&(data[outpos]), " ", 1);
733 tlen_2 += stoklen;
734 outpos += 1;
735 tokpos = stokstart + stoklen;
736 }
737 pos=tokstart+tlen_2;
738 }
739 //}
740 if (outpos == 0) {
741 outpos = 1;
742 }
743 data[outpos-1] = 0;
744 return data;
745 }/*doCamelCase*/
746
747
748 /** createPhonemeString
749 * Wrap all individual words in <phoneme> tags.
750 * The Pico <phoneme> tag only supports one word in each tag,
751 * therefore they must be individually wrapped!
752 * @xsampa - text to convert to Pico phomene string
753 * @length - length of the input string
754 * return new string with tags applied
755 */
createPhonemeString(const char * xsampa,int length)756 extern char * createPhonemeString( const char * xsampa, int length )
757 {
758 char * convstring = NULL;
759 int origStrLen = strlen(xsampa);
760 int numWords = 1;
761 int start, totalLength, i, j;
762
763 for (i = 0; i < origStrLen; i ++) {
764 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
765 numWords ++;
766 }
767 }
768
769 if (numWords == 1) {
770 convstring = new char[origStrLen + 17];
771 convstring[0] = '\0';
772 strcat(convstring, PICO_PHONEME_OPEN_TAG);
773 strcat(convstring, xsampa);
774 strcat(convstring, PICO_PHONEME_CLOSE_TAG);
775 } else {
776 char * words[numWords];
777 start = 0; totalLength = 0; i = 0; j = 0;
778 for (i=0, j=0; i < origStrLen; i++) {
779 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
780 words[j] = new char[i+1-start+17];
781 words[j][0] = '\0';
782 strcat( words[j], PICO_PHONEME_OPEN_TAG);
783 strncat(words[j], xsampa+start, i-start);
784 strcat( words[j], PICO_PHONEME_CLOSE_TAG);
785 start = i + 1;
786 j++;
787 totalLength += strlen(words[j-1]);
788 }
789 }
790 words[j] = new char[i+1-start+17];
791 words[j][0] = '\0';
792 strcat(words[j], PICO_PHONEME_OPEN_TAG);
793 strcat(words[j], xsampa+start);
794 strcat(words[j], PICO_PHONEME_CLOSE_TAG);
795 totalLength += strlen(words[j]);
796 convstring = new char[totalLength + 1];
797 convstring[0] = '\0';
798 for (i=0 ; i < numWords ; i++) {
799 strcat(convstring, words[i]);
800 delete [] words[i];
801 }
802 }
803
804 return convstring;
805 }
806
807 /* The XSAMPA uses as many as 5 characters to represent a single IPA code. */
808 typedef struct tagPhnArr
809 {
810 char16_t strIPA; /* IPA Unicode symbol */
811 char strXSAMPA[6]; /* SAMPA sequence */
812 } PArr;
813
814 #define phn_cnt (134+7)
815
816 PArr PhnAry[phn_cnt] = {
817
818 /* XSAMPA conversion table
819 This maps a single IPA symbol to a sequence representing XSAMPA.
820 This relies upon a direct one-to-one correspondance
821 including diphthongs and affricates. */
822
823 /* Vowels (23) complete */
824 {0x025B, "E"},
825 {0x0251, "A"},
826 {0x0254, "O"},
827 {0x00F8, "2"},
828 {0x0153, "9"},
829 {0x0276, "&"},
830 {0x0252, "Q"},
831 {0x028C, "V"},
832 {0x0264, "7"},
833 {0x026F, "M"},
834 {0x0268, "1"},
835 {0x0289, "}"},
836 {0x026A, "I"},
837 {0x028F, "Y"},
838 {0x028A, "U"},
839 {0x0259, "@"},
840 {0x0275, "8"},
841 {0x0250, "6"},
842 {0x00E6, "{"},
843 {0x025C, "3"},
844 {0x025A, "@`"},
845 {0x025E, "3\\\\"},
846 {0x0258, "@\\\\"},
847
848 /* Consonants (60) complete */
849 {0x0288, "t`"},
850 {0x0256, "d`"},
851 {0x025F, "J\\\\"},
852 {0x0261, "g"},
853 {0x0262, "G\\\\"},
854 {0x0294, "?"},
855 {0x0271, "F"},
856 {0x0273, "n`"},
857 {0x0272, "J"},
858 {0x014B, "N"},
859 {0x0274, "N\\\\"},
860 {0x0299, "B\\\\"},
861 {0x0280, "R\\\\"},
862 {0x027E, "4"},
863 {0x027D, "r`"},
864 {0x0278, "p\\\\"},
865 {0x03B2, "B"},
866 {0x03B8, "T"},
867 {0x00F0, "D"},
868 {0x0283, "S"},
869 {0x0292, "Z"},
870 {0x0282, "s`"},
871 {0x0290, "z`"},
872 {0x00E7, "C"},
873 {0x029D, "j\\\\"},
874 {0x0263, "G"},
875 {0x03C7, "X"},
876 {0x0281, "R"},
877 {0x0127, "X\\\\"},
878 {0x0295, "?\\\\"},
879 {0x0266, "h\\\\"},
880 {0x026C, "K"},
881 {0x026E, "K\\\\"},
882 {0x028B, "P"},
883 {0x0279, "r\\\\"},
884 {0x027B, "r\\\\'"},
885 {0x0270, "M\\\\"},
886 {0x026D, "l`"},
887 {0x028E, "L"},
888 {0x029F, "L\\\\"},
889 {0x0253, "b_<"},
890 {0x0257, "d_<"},
891 {0x0284, "J\\_<"},
892 {0x0260, "g_<"},
893 {0x029B, "G\\_<"},
894 {0x028D, "W"},
895 {0x0265, "H"},
896 {0x029C, "H\\\\"},
897 {0x02A1, ">\\\\"},
898 {0x02A2, "<\\\\"},
899 {0x0267, "x\\\\"}, /* hooktop heng */
900 {0x0298, "O\\\\"},
901 {0x01C0, "|\\\\"},
902 {0x01C3, "!\\\\"},
903 {0x01C2, "=\\"},
904 {0x01C1, "|\\|\\"},
905 {0x027A, "l\\\\"},
906 {0x0255, "s\\\\"},
907 {0x0291, "z\\\\"},
908 {0x026B, "l_G"},
909
910
911 /* Diacritics (37) complete */
912 {0x02BC, "_>"},
913 {0x0325, "_0"},
914 {0x030A, "_0"},
915 {0x032C, "_v"},
916 {0x02B0, "_h"},
917 {0x0324, "_t"},
918 {0x0330, "_k"},
919 {0x033C, "_N"},
920 {0x032A, "_d"},
921 {0x033A, "_a"},
922 {0x033B, "_m"},
923 {0x0339, "_O"},
924 {0x031C, "_c"},
925 {0x031F, "_+"},
926 {0x0320, "_-"},
927 {0x0308, "_\""}, /* centralized */
928 {0x033D, "_x"},
929 {0x0318, "_A"},
930 {0x0319, "_q"},
931 {0x02DE, "`"},
932 {0x02B7, "_w"},
933 {0x02B2, "_j"},
934 {0x02E0, "_G"},
935 {0x02E4, "_?\\\\"}, /* pharyngealized */
936 {0x0303, "~"}, /* nasalized */
937 {0x207F, "_n"},
938 {0x02E1, "_l"},
939 {0x031A, "_}"},
940 {0x0334, "_e"},
941 {0x031D, "_r"}, /* raised equivalent to 02D4 */
942 {0x02D4, "_r"}, /* raised equivalent to 031D */
943 {0x031E, "_o"}, /* lowered equivalent to 02D5 */
944 {0x02D5, "_o"}, /* lowered equivalent to 031E */
945 {0x0329, "="}, /* sylabic */
946 {0x032F, "_^"}, /* non-sylabic */
947 {0x0361, "_"}, /* top tie bar */
948 {0x035C, "_"},
949
950 /* Suprasegmental (15) incomplete */
951 {0x02C8, "\""}, /* primary stress */
952 {0x02CC, "%"}, /* secondary stress */
953 {0x02D0, ":"}, /* long */
954 {0x02D1, ":\\\\"}, /* half-long */
955 {0x0306, "_X"}, /* extra short */
956
957 {0x2016, "||"}, /* major group */
958 {0x203F, "-\\\\"}, /* bottom tie bar */
959 {0x2197, "<R>"}, /* global rise */
960 {0x2198, "<F>"}, /* global fall */
961 {0x2193, "<D>"}, /* downstep */
962 {0x2191, "<U>"}, /* upstep */
963 {0x02E5, "<T>"}, /* extra high level */
964 {0x02E7, "<M>"}, /* mid level */
965 {0x02E9, "<B>"}, /* extra low level */
966
967 {0x025D, "3`:"}, /* non-IPA %% */
968
969 /* Affricates (6) complete */
970 {0x02A3, "d_z"},
971 {0x02A4, "d_Z"},
972 {0x02A5, "d_z\\\\"},
973 {0x02A6, "t_s"},
974 {0x02A7, "t_S"},
975 {0x02A8, "t_s\\\\"}
976 };
977
978
CnvIPAPnt(const char16_t IPnt,char * XPnt)979 void CnvIPAPnt( const char16_t IPnt, char * XPnt )
980 {
981 char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */
982 int idx; /* index into table */
983
984 /* Convert an individual IPA codepoint.
985 A single IPA code could map to a string.
986 Search the table. If it is not found, use the same character.
987 Since most codepoints can be contained within 16 bits,
988 they are represented as wide chars. */
989 XPnt[0] = 0; /* clear the result string */
990
991 /* Search the table for the conversion. */
992 for (idx = 0; idx < phn_cnt; idx ++) { /* for each item in table */
993 if (IPnt == PhnAry[idx].strIPA) { /* matches IPA code */
994 strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */
995 return;
996 }
997 }
998 strcat(XPnt, (const char *)&ThisPnt); /* just copy it */
999 }
1000
1001
1002 /** cnvIpaToXsampa
1003 * Convert an IPA character string to an XSAMPA character string.
1004 * @ipaString - input IPA string to convert
1005 * @outXsampaString - converted XSAMPA string is passed back in this parameter
1006 * return size of the new string
1007 */
1008
cnvIpaToXsampa(const char16_t * ipaString,size_t ipaStringSize,char ** outXsampaString)1009 int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString )
1010 {
1011 size_t xsize; /* size of result */
1012 size_t ipidx; /* index into IPA string */
1013 char * XPnt; /* short XSAMPA char sequence */
1014
1015 /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString.
1016 It is the responsibility of the caller to free the allocated string.
1017 Increment through the string. For each base & combination convert it to the XSAMP equivalent.
1018 Because of the XSAMPA limitations, not all IPA characters will be covered. */
1019 XPnt = (char *) malloc(6);
1020 xsize = (4 * ipaStringSize) + 8; /* assume more than double size */
1021 *outXsampaString = (char *) malloc( xsize );/* allocate return string */
1022 *outXsampaString[0] = 0;
1023 xsize = 0; /* clear final */
1024
1025 for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code */
1026 CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted character */
1027 strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */
1028 }
1029 free(XPnt);
1030 xsize = strlen(*outXsampaString); /* get the final length */
1031 return xsize;
1032 }
1033
1034
1035 /* Google Engine API function implementations */
1036
1037 /** init
1038 * Allocates Pico memory block and initializes the Pico system.
1039 * synthDoneCBPtr - Pointer to callback function which will receive generated samples
1040 * config - the engine configuration parameters, here only contains the non-system path
1041 * for the lingware location
1042 * return tts_result
1043 */
init(synthDoneCB_t synthDoneCBPtr,const char * config)1044 tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config )
1045 {
1046 if (synthDoneCBPtr == NULL) {
1047 ALOGE("Callback pointer is NULL");
1048 return TTS_FAILURE;
1049 }
1050
1051 picoMemArea = malloc( PICO_MEM_SIZE );
1052 if (!picoMemArea) {
1053 ALOGE("Failed to allocate memory for Pico system");
1054 return TTS_FAILURE;
1055 }
1056
1057 pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
1058 if (PICO_OK != ret) {
1059 ALOGE("Failed to initialize Pico system");
1060 free( picoMemArea );
1061 picoMemArea = NULL;
1062 return TTS_FAILURE;
1063 }
1064
1065 picoSynthDoneCBPtr = synthDoneCBPtr;
1066
1067 picoCurrentLangIndex = -1;
1068
1069 // was the initialization given an alternative path for the lingware location?
1070 if ((config != NULL) && (strlen(config) > 0)) {
1071 int max_filename_length = PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE;
1072 if (strlen(config) >= max_filename_length) {
1073 ALOGE("The length of engine config is too long (should be less than %d bytes).",
1074 max_filename_length);
1075 return TTS_FAILURE;
1076 }
1077 pico_alt_lingware_path = (char*)malloc(strlen(config) + 1);
1078 strcpy((char*)pico_alt_lingware_path, config);
1079 ALOGV("Alternative lingware path %s", pico_alt_lingware_path);
1080 } else {
1081 pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1);
1082 strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH);
1083 ALOGV("Using predefined lingware path %s", pico_alt_lingware_path);
1084 }
1085
1086 return TTS_SUCCESS;
1087 }
1088
1089
1090 /** shutdown
1091 * Unloads all Pico resources; terminates Pico system and frees Pico memory block.
1092 * return tts_result
1093 */
shutdown(void)1094 tts_result TtsEngine::shutdown( void )
1095 {
1096 cleanResources();
1097
1098 if (picoSystem) {
1099 pico_terminate(&picoSystem);
1100 picoSystem = NULL;
1101 }
1102 if (picoMemArea) {
1103 free(picoMemArea);
1104 picoMemArea = NULL;
1105 }
1106
1107 cleanFiles();
1108 return TTS_SUCCESS;
1109 }
1110
1111
1112 /** loadLanguage
1113 * Load a new language.
1114 * @lang - string with ISO 3 letter language code.
1115 * @country - string with ISO 3 letter country code .
1116 * @variant - string with language variant for that language and country pair.
1117 * return tts_result
1118 */
loadLanguage(const char * lang,const char * country,const char * variant)1119 tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
1120 {
1121 return TTS_FAILURE;
1122 //return setProperty("language", value, size);
1123 }
1124
1125
1126 /** setLanguage
1127 * Load a new language (locale). Use the ISO 639-3 language codes.
1128 * @lang - string with ISO 639-3 language code.
1129 * @country - string with ISO 3 letter country code.
1130 * @variant - string with language variant for that language and country pair.
1131 * return tts_result
1132 */
setLanguage(const char * lang,const char * country,const char * variant)1133 tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
1134 {
1135 //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant);
1136 int langIndex;
1137 int countryIndex;
1138 int i;
1139
1140 if (lang == NULL)
1141 {
1142 ALOGE("TtsEngine::setLanguage called with NULL language");
1143 return TTS_FAILURE;
1144 }
1145
1146 /* We look for a match on the language first
1147 then we look for a match on the country.
1148 If no match on the language:
1149 return an error.
1150 If match on the language, but no match on the country:
1151 load the language found for the language match.
1152 If match on the language, and match on the country:
1153 load the language found for the country match. */
1154
1155 /* Find a match on the language. */
1156 langIndex = -1; /* no match */
1157 for (i = 0; i < picoNumSupportedVocs; i ++)
1158 {
1159 if (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1160 {
1161 langIndex = i;
1162 break;
1163 }
1164 }
1165 if (langIndex < 0)
1166 {
1167 /* The language isn't supported. */
1168 ALOGE("TtsEngine::setLanguage called with unsupported language");
1169 return TTS_FAILURE;
1170 }
1171
1172 /* Find a match on the country, if there is one. */
1173 if (country != NULL)
1174 {
1175 countryIndex = -1;
1176 for (i = langIndex; i < picoNumSupportedVocs; i ++)
1177 {
1178 if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1179 && (strcmp(country, picoSupportedCountryIso3[i]) == 0))
1180 {
1181 countryIndex = i;
1182 break;
1183 }
1184 }
1185
1186 if (countryIndex < 0)
1187 {
1188 /* We didn't find a match on the country, but we had a match on the language.
1189 Use that language. */
1190 ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
1191 lang, country);
1192 }
1193 else
1194 {
1195 /* We have a match on both the language and the country. */
1196 langIndex = countryIndex;
1197 }
1198 }
1199
1200 return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */
1201 }
1202
1203
1204 /** isLanguageAvailable
1205 * Returns the level of support for a language.
1206 * @lang - string with ISO 3 letter language code.
1207 * @country - string with ISO 3 letter country code .
1208 * @variant - string with language variant for that language and country pair.
1209 * return tts_support_result
1210 */
isLanguageAvailable(const char * lang,const char * country,const char * variant)1211 tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
1212 const char *variant) {
1213 int langIndex = -1;
1214 int countryIndex = -1;
1215 //-------------------------
1216 // language matching
1217 // if no language specified
1218 if (lang == NULL) {
1219 ALOGE("TtsEngine::isLanguageAvailable called with no language");
1220 return TTS_LANG_NOT_SUPPORTED;
1221 }
1222
1223 // find a match on the language
1224 for (int i = 0; i < picoNumSupportedVocs; i++)
1225 {
1226 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
1227 langIndex = i;
1228 break;
1229 }
1230 }
1231 if (langIndex < 0) {
1232 // language isn't supported
1233 ALOGV("TtsEngine::isLanguageAvailable called with unsupported language");
1234 return TTS_LANG_NOT_SUPPORTED;
1235 }
1236
1237 //-------------------------
1238 // country matching
1239 // if no country specified
1240 if ((country == NULL) || (strlen(country) == 0)) {
1241 // check installation of matched language
1242 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1243 }
1244
1245 // find a match on the country
1246 for (int i = langIndex; i < picoNumSupportedVocs; i++) {
1247 if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
1248 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
1249 countryIndex = i;
1250 break;
1251 }
1252 }
1253 if (countryIndex < 0) {
1254 // we didn't find a match on the country, but we had a match on the language
1255 // check installation of matched language
1256 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1257 } else {
1258 // we have a match on the language and the country
1259 langIndex = countryIndex;
1260 // check installation of matched language + country
1261 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA);
1262 }
1263
1264 // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned.
1265 }
1266
1267
1268 /** getLanguage
1269 * Get the currently loaded language - if any.
1270 * @lang - string with current ISO 3 letter language code, empty string if no loaded language.
1271 * @country - string with current ISO 3 letter country code, empty string if no loaded language.
1272 * @variant - string with current language variant, empty string if no loaded language.
1273 * return tts_result
1274 */
getLanguage(char * language,char * country,char * variant)1275 tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
1276 {
1277 if (picoCurrentLangIndex == -1) {
1278 strcpy(language, "\0");
1279 strcpy(country, "\0");
1280 strcpy(variant, "\0");
1281 } else {
1282 strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]);
1283 strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]);
1284 // no variant in this implementation
1285 strcpy(variant, "\0");
1286 }
1287 return TTS_SUCCESS;
1288 }
1289
1290
1291 /** setAudioFormat
1292 * sets the audio format to use for synthesis, returns what is actually used.
1293 * @encoding - reference to encoding format
1294 * @rate - reference to sample rate
1295 * @channels - reference to number of channels
1296 * return tts_result
1297 * */
setAudioFormat(tts_audio_format & encoding,uint32_t & rate,int & channels)1298 tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
1299 int& channels)
1300 {
1301 // ignore the input parameters, the enforced audio parameters are fixed here
1302 encoding = TTS_AUDIO_FORMAT_PCM_16_BIT;
1303 rate = 16000;
1304 channels = 1;
1305 return TTS_SUCCESS;
1306 }
1307
1308
1309 /** setProperty
1310 * Set property. The supported properties are: language, rate, pitch and volume.
1311 * @property - name of property to set
1312 * @value - value to set
1313 * @size - size of value
1314 * return tts_result
1315 */
setProperty(const char * property,const char * value,const size_t size)1316 tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
1317 {
1318 int rate;
1319 int pitch;
1320 int volume;
1321
1322 /* Set a specific property for the engine.
1323 Supported properties include: language (locale), rate, pitch, volume. */
1324 /* Sanity check */
1325 if (property == NULL) {
1326 ALOGE("setProperty called with property NULL");
1327 return TTS_PROPERTY_UNSUPPORTED;
1328 }
1329
1330 if (value == NULL) {
1331 ALOGE("setProperty called with value NULL");
1332 return TTS_VALUE_INVALID;
1333 }
1334
1335 if (strncmp(property, "language", 8) == 0) {
1336 /* Verify it's in correct format. */
1337 if (strlen(value) != 2 && strlen(value) != 6) {
1338 ALOGE("change language called with incorrect format");
1339 return TTS_VALUE_INVALID;
1340 }
1341
1342 /* Try to switch to specified language. */
1343 if (doLanguageSwitch(value) == TTS_FAILURE) {
1344 ALOGE("failed to load language");
1345 return TTS_FAILURE;
1346 } else {
1347 return TTS_SUCCESS;
1348 }
1349 } else if (strncmp(property, "rate", 4) == 0) {
1350 rate = atoi(value);
1351 if (rate < PICO_MIN_RATE) {
1352 rate = PICO_MIN_RATE;
1353 }
1354 if (rate > PICO_MAX_RATE) {
1355 rate = PICO_MAX_RATE;
1356 }
1357 picoProp_currRate = rate;
1358 return TTS_SUCCESS;
1359 } else if (strncmp(property, "pitch", 5) == 0) {
1360 pitch = atoi(value);
1361 if (pitch < PICO_MIN_PITCH) {
1362 pitch = PICO_MIN_PITCH;
1363 }
1364 if (pitch > PICO_MAX_PITCH) {
1365 pitch = PICO_MAX_PITCH;
1366 }
1367 picoProp_currPitch = pitch;
1368 return TTS_SUCCESS;
1369 } else if (strncmp(property, "volume", 6) == 0) {
1370 volume = atoi(value);
1371 if (volume < PICO_MIN_VOLUME) {
1372 volume = PICO_MIN_VOLUME;
1373 }
1374 if (volume > PICO_MAX_VOLUME) {
1375 volume = PICO_MAX_VOLUME;
1376 }
1377 picoProp_currVolume = volume;
1378 return TTS_SUCCESS;
1379 }
1380
1381 return TTS_PROPERTY_UNSUPPORTED;
1382 }
1383
1384
1385 /** getProperty
1386 * Get the property. Supported properties are: language, rate, pitch and volume.
1387 * @property - name of property to get
1388 * @value - buffer which will receive value of property
1389 * @iosize - size of value - if size is too small on return this will contain actual size needed
1390 * return tts_result
1391 */
getProperty(const char * property,char * value,size_t * iosize)1392 tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize )
1393 {
1394 /* Get the property for the engine.
1395 This property was previously set by setProperty or by default. */
1396 /* sanity check */
1397 if (property == NULL) {
1398 ALOGE("getProperty called with property NULL");
1399 return TTS_PROPERTY_UNSUPPORTED;
1400 }
1401
1402 if (value == NULL) {
1403 ALOGE("getProperty called with value NULL");
1404 return TTS_VALUE_INVALID;
1405 }
1406
1407 if (strncmp(property, "language", 8) == 0) {
1408 if (picoProp_currLang == NULL) {
1409 strcpy(value, "");
1410 } else {
1411 if (*iosize < strlen(picoProp_currLang)+1) {
1412 *iosize = strlen(picoProp_currLang) + 1;
1413 return TTS_PROPERTY_SIZE_TOO_SMALL;
1414 }
1415 strcpy(value, picoProp_currLang);
1416 }
1417 return TTS_SUCCESS;
1418 } else if (strncmp(property, "rate", 4) == 0) {
1419 char tmprate[4];
1420 sprintf(tmprate, "%d", picoProp_currRate);
1421 if (*iosize < strlen(tmprate)+1) {
1422 *iosize = strlen(tmprate) + 1;
1423 return TTS_PROPERTY_SIZE_TOO_SMALL;
1424 }
1425 strcpy(value, tmprate);
1426 return TTS_SUCCESS;
1427 } else if (strncmp(property, "pitch", 5) == 0) {
1428 char tmppitch[4];
1429 sprintf(tmppitch, "%d", picoProp_currPitch);
1430 if (*iosize < strlen(tmppitch)+1) {
1431 *iosize = strlen(tmppitch) + 1;
1432 return TTS_PROPERTY_SIZE_TOO_SMALL;
1433 }
1434 strcpy(value, tmppitch);
1435 return TTS_SUCCESS;
1436 } else if (strncmp(property, "volume", 6) == 0) {
1437 char tmpvol[4];
1438 sprintf(tmpvol, "%d", picoProp_currVolume);
1439 if (*iosize < strlen(tmpvol)+1) {
1440 *iosize = strlen(tmpvol) + 1;
1441 return TTS_PROPERTY_SIZE_TOO_SMALL;
1442 }
1443 strcpy(value, tmpvol);
1444 return TTS_SUCCESS;
1445 }
1446
1447 /* Unknown property */
1448 ALOGE("Unsupported property");
1449 return TTS_PROPERTY_UNSUPPORTED;
1450 }
1451
1452
1453 /** synthesizeText
1454 * Synthesizes a text string.
1455 * The text string could be annotated with SSML tags.
1456 * @text - text to synthesize
1457 * @buffer - buffer which will receive generated samples
1458 * @bufferSize - size of buffer
1459 * @userdata - pointer to user data which will be passed back to callback function
1460 * return tts_result
1461 */
synthesizeText(const char * text,int8_t * buffer,size_t bufferSize,void * userdata)1462 tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
1463 {
1464 int err;
1465 int cbret;
1466 pico_Char * inp = NULL;
1467 char * expanded_text = NULL;
1468 pico_Char * local_text = NULL;
1469 short outbuf[MAX_OUTBUF_SIZE/2];
1470 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
1471 pico_Status ret;
1472 SvoxSsmlParser * parser = NULL;
1473
1474 picoSynthAbort = 0;
1475 if (text == NULL) {
1476 ALOGE("synthesizeText called with NULL string");
1477 return TTS_FAILURE;
1478 }
1479
1480 if (strlen(text) == 0) {
1481 return TTS_SUCCESS;
1482 }
1483
1484 if (buffer == NULL) {
1485 ALOGE("synthesizeText called with NULL buffer");
1486 return TTS_FAILURE;
1487 }
1488
1489 if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) {
1490 /* SSML input */
1491 parser = new SvoxSsmlParser();
1492 if (parser && parser->initSuccessful()) {
1493 err = parser->parseDocument(text, 1);
1494 if (err == XML_STATUS_ERROR) {
1495 /* Note: for some reason expat always thinks the input document has an error
1496 at the end, even when the XML document is perfectly formed */
1497 ALOGI("Warning: SSML document parsed with errors");
1498 }
1499 char * parsed_text = parser->getParsedDocument();
1500 if (parsed_text) {
1501 /* Add property tags to the string - if any. */
1502 local_text = (pico_Char *) doAddProperties( parsed_text );
1503 if (!local_text) {
1504 ALOGE("Failed to allocate memory for text string");
1505 delete parser;
1506 return TTS_FAILURE;
1507 }
1508 char * lang = parser->getParsedDocumentLanguage();
1509 if (lang != NULL) {
1510 if (doLanguageSwitch(lang) == TTS_FAILURE) {
1511 ALOGE("Failed to switch to language (%s) specified in SSML document.", lang);
1512 delete parser;
1513 return TTS_FAILURE;
1514 }
1515 } else {
1516 // lang is NULL, pick a language so the synthesis can be performed
1517 if (picoCurrentLangIndex == -1) {
1518 // no current language loaded, pick the first one and load it
1519 if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) {
1520 ALOGE("Failed to switch to default language.");
1521 delete parser;
1522 return TTS_FAILURE;
1523 }
1524 }
1525 //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang);
1526 }
1527 delete parser;
1528 } else {
1529 ALOGE("Failed to parse SSML document");
1530 delete parser;
1531 return TTS_FAILURE;
1532 }
1533 } else {
1534 ALOGE("Failed to create SSML parser");
1535 if (parser) {
1536 delete parser;
1537 }
1538 return TTS_FAILURE;
1539 }
1540 } else {
1541 /* camelCase pre-processing */
1542 expanded_text = doCamelCase(text);
1543 /* Add property tags to the string - if any. */
1544 local_text = (pico_Char *) doAddProperties( expanded_text );
1545 if (expanded_text) {
1546 free( expanded_text );
1547 }
1548 if (!local_text) {
1549 ALOGE("Failed to allocate memory for text string");
1550 return TTS_FAILURE;
1551 }
1552 }
1553
1554 text_remaining = strlen((const char *) local_text) + 1;
1555
1556 inp = (pico_Char *) local_text;
1557
1558 size_t bufused = 0;
1559
1560 /* synthesis loop */
1561 while (text_remaining) {
1562 if (picoSynthAbort) {
1563 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1564 break;
1565 }
1566
1567 /* Feed the text into the engine. */
1568 ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
1569 if (ret != PICO_OK) {
1570 ALOGE("Error synthesizing string '%s': [%d]", text, ret);
1571 if (local_text) {
1572 free( local_text );
1573 }
1574 return TTS_FAILURE;
1575 }
1576
1577 text_remaining -= bytes_sent;
1578 inp += bytes_sent;
1579 do {
1580 if (picoSynthAbort) {
1581 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1582 break;
1583 }
1584 /* Retrieve the samples and add them to the buffer. */
1585 ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv,
1586 &out_data_type );
1587 if (bytes_recv) {
1588 if ((bufused + bytes_recv) <= bufferSize) {
1589 memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
1590 bufused += bytes_recv;
1591 } else {
1592 /* The buffer filled; pass this on to the callback function. */
1593 cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer,
1594 bufused, TTS_SYNTH_PENDING);
1595 if (cbret == TTS_CALLBACK_HALT) {
1596 ALOGI("Halt requested by caller. Halting.");
1597 picoSynthAbort = 1;
1598 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1599 break;
1600 }
1601 bufused = 0;
1602 memcpy(buffer, (int8_t *) outbuf, bytes_recv);
1603 bufused += bytes_recv;
1604 }
1605 }
1606 } while (PICO_STEP_BUSY == ret);
1607
1608 /* This chunk of synthesis is finished; pass the remaining samples.
1609 Use 16 KHz, 16-bit samples. */
1610 if (!picoSynthAbort) {
1611 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1612 TTS_SYNTH_PENDING);
1613 }
1614 picoSynthAbort = 0;
1615
1616 if (ret != PICO_STEP_IDLE) {
1617 if (ret != 0){
1618 ALOGE("Error occurred during synthesis [%d]", ret);
1619 }
1620 if (local_text) {
1621 free(local_text);
1622 }
1623 ALOGV("Synth loop: sending TTS_SYNTH_DONE after error");
1624 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1625 TTS_SYNTH_DONE);
1626 pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1627 return TTS_FAILURE;
1628 }
1629 }
1630
1631 /* Synthesis is done; notify the caller */
1632 ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop");
1633 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1634 TTS_SYNTH_DONE);
1635
1636 if (local_text) {
1637 free( local_text );
1638 }
1639 return TTS_SUCCESS;
1640 }
1641
1642
1643
1644 /** stop
1645 * Aborts the running synthesis.
1646 * return tts_result
1647 */
stop(void)1648 tts_result TtsEngine::stop( void )
1649 {
1650 picoSynthAbort = 1;
1651 return TTS_SUCCESS;
1652 }
1653
1654
1655 #ifdef __cplusplus
1656 extern "C" {
1657 #endif
1658
getTtsEngine(void)1659 TtsEngine * getTtsEngine( void )
1660 {
1661 return new TtsEngine();
1662 }
1663
1664 #ifdef __cplusplus
1665 }
1666 #endif
1667