1 /* GStreamer
2 * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19 /*************************************************************************/
20 /* */
21 /* Centre for Speech Technology Research */
22 /* University of Edinburgh, UK */
23 /* Copyright (c) 1999 */
24 /* All Rights Reserved. */
25 /* */
26 /* Permission is hereby granted, free of charge, to use and distribute */
27 /* this software and its documentation without restriction, including */
28 /* without limitation the rights to use, copy, modify, merge, publish, */
29 /* distribute, sublicense, and/or sell copies of this work, and to */
30 /* permit persons to whom this work is furnished to do so, subject to */
31 /* the following conditions: */
32 /* 1. The code must retain the above copyright notice, this list of */
33 /* conditions and the following disclaimer. */
34 /* 2. Any modifications must be clearly marked as such. */
35 /* 3. Original authors' names are not deleted. */
36 /* 4. The authors' names are not used to endorse or promote products */
37 /* derived from this software without specific prior written */
38 /* permission. */
39 /* */
40 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
41 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
42 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
43 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
44 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
45 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
46 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
47 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
48 /* THIS SOFTWARE. */
49 /* */
50 /*************************************************************************/
51 /* Author : Alan W Black (awb@cstr.ed.ac.uk) */
52 /* Date : March 1999 */
53 /*-----------------------------------------------------------------------*/
54 /* */
55 /* Client end of Festival server API in C designed specifically for */
56 /* Galaxy Communicator use though might be of use for other things */
57 /* */
58 /* This is a modified version of the standalone client as provided in */
59 /* festival example code: festival_client.c */
60 /* */
61 /*=======================================================================*/
62
63 /**
64 * SECTION:element-festival
65 * @title: festival
66 *
67 * This element connects to a
68 * <ulink url="http://www.festvox.org/festival/index.html">festival</ulink>
69 * server process and uses it to synthesize speech. Festival need to run already
70 * in server mode, started as <screen>festival --server</screen>
71 *
72 * ## Example pipeline
73 * |[
74 * echo 'Hello G-Streamer!' | gst-launch-1.0 fdsrc fd=0 ! festival ! wavparse ! audioconvert ! alsasink
75 * ]|
76 *
77 */
78
79 #ifdef HAVE_CONFIG_H
80 #include "config.h"
81 #endif
82
83 #include <glib.h> /* Needed for G_OS_XXXX macros */
84
85 #include <stdio.h>
86 #include <stdlib.h>
87
88 #ifdef HAVE_UNISTD_H
89 #include <unistd.h>
90 #endif
91
92 #include <ctype.h>
93 #include <string.h>
94 #include <sys/types.h>
95 #ifdef G_OS_WIN32
96 #include <winsock2.h>
97 #include <ws2tcpip.h>
98 #else
99 #include <sys/socket.h>
100 #include <netdb.h>
101 #include <netinet/in.h>
102 #include <arpa/inet.h>
103 #endif
104
105 #include "gstfestival.h"
106 #include <gst/audio/audio.h>
107
108 GST_DEBUG_CATEGORY_STATIC (festival_debug);
109 #define GST_CAT_DEFAULT festival_debug
110
111 static void gst_festival_finalize (GObject * object);
112
113 static GstFlowReturn gst_festival_chain (GstPad * pad, GstObject * parent,
114 GstBuffer * buf);
115 static gboolean gst_festival_src_query (GstPad * pad, GstObject * parent,
116 GstQuery * query);
117 static GstStateChangeReturn gst_festival_change_state (GstElement * element,
118 GstStateChange transition);
119
120 static FT_Info *festival_default_info (void);
121 static char *socket_receive_file_to_buff (int fd, int *size);
122 static char *client_accept_s_expr (int fd);
123
124 static GstStaticPadTemplate sink_template_factory =
125 GST_STATIC_PAD_TEMPLATE ("sink",
126 GST_PAD_SINK,
127 GST_PAD_ALWAYS,
128 GST_STATIC_CAPS ("text/x-raw, format=(string)utf8")
129 );
130
131 static GstStaticPadTemplate src_template_factory =
132 GST_STATIC_PAD_TEMPLATE ("src",
133 GST_PAD_SRC,
134 GST_PAD_ALWAYS,
135 GST_STATIC_CAPS ("audio/x-wav")
136 );
137
138 /* Festival signals and args */
139 enum
140 {
141 /* FILL ME */
142 LAST_SIGNAL
143 };
144
145 enum
146 {
147 PROP_0
148 /* FILL ME */
149 };
150
151 /*static guint gst_festival_signals[LAST_SIGNAL] = { 0 }; */
152
G_DEFINE_TYPE(GstFestival,gst_festival,GST_TYPE_ELEMENT)153 G_DEFINE_TYPE (GstFestival, gst_festival, GST_TYPE_ELEMENT)
154
155 static void gst_festival_class_init (GstFestivalClass * klass)
156 {
157 GObjectClass *gobject_class;
158 GstElementClass *gstelement_class;
159
160 gobject_class = G_OBJECT_CLASS (klass);
161 gstelement_class = GST_ELEMENT_CLASS (klass);
162
163 gobject_class->finalize = GST_DEBUG_FUNCPTR (gst_festival_finalize);
164 gstelement_class->change_state =
165 GST_DEBUG_FUNCPTR (gst_festival_change_state);
166
167 /* register pads */
168 gst_element_class_add_static_pad_template (gstelement_class,
169 &sink_template_factory);
170 gst_element_class_add_static_pad_template (gstelement_class,
171 &src_template_factory);
172
173 gst_element_class_set_static_metadata (gstelement_class,
174 "Festival Text-to-Speech synthesizer", "Filter/Effect/Audio",
175 "Synthesizes plain text into audio",
176 "Wim Taymans <wim.taymans@gmail.com>");
177 }
178
179 static void
gst_festival_init(GstFestival * festival)180 gst_festival_init (GstFestival * festival)
181 {
182 festival->sinkpad =
183 gst_pad_new_from_static_template (&sink_template_factory, "sink");
184 gst_pad_set_chain_function (festival->sinkpad, gst_festival_chain);
185 gst_element_add_pad (GST_ELEMENT (festival), festival->sinkpad);
186
187 festival->srcpad =
188 gst_pad_new_from_static_template (&src_template_factory, "src");
189 gst_pad_set_query_function (festival->srcpad, gst_festival_src_query);
190 gst_element_add_pad (GST_ELEMENT (festival), festival->srcpad);
191
192 festival->info = festival_default_info ();
193 }
194
195 static void
gst_festival_finalize(GObject * object)196 gst_festival_finalize (GObject * object)
197 {
198 GstFestival *festival = GST_FESTIVAL (object);
199
200 g_free (festival->info);
201
202 G_OBJECT_CLASS (gst_festival_parent_class)->finalize (object);
203 }
204
205 static gboolean
read_response(GstFestival * festival)206 read_response (GstFestival * festival)
207 {
208 char ack[4];
209 char *data;
210 int filesize;
211 int fd;
212 int n;
213 gboolean ret = TRUE;
214
215 fd = festival->info->server_fd;
216 do {
217 for (n = 0; n < 3;)
218 n += read (fd, ack + n, 3 - n);
219 ack[3] = '\0';
220 GST_DEBUG_OBJECT (festival, "got response %s", ack);
221 if (strcmp (ack, "WV\n") == 0) {
222 GstBuffer *buffer;
223
224 /* receive a waveform */
225 data = socket_receive_file_to_buff (fd, &filesize);
226 GST_DEBUG_OBJECT (festival, "received %d bytes of waveform data",
227 filesize);
228
229 /* push contents as a buffer */
230 buffer = gst_buffer_new_wrapped (data, filesize);
231 GST_BUFFER_TIMESTAMP (buffer) = GST_CLOCK_TIME_NONE;
232 gst_pad_push (festival->srcpad, buffer);
233
234 } else if (strcmp (ack, "LP\n") == 0) {
235 /* receive an s-expr */
236 data = client_accept_s_expr (fd);
237 GST_DEBUG_OBJECT (festival, "received s-expression: %s", data);
238 g_free (data);
239 } else if (strcmp (ack, "ER\n") == 0) {
240 /* server got an error */
241 GST_ELEMENT_ERROR (festival,
242 LIBRARY,
243 FAILED,
244 ("Festival speech server returned an error"),
245 ("Make sure you have voices/languages installed"));
246 ret = FALSE;
247 break;
248 }
249
250 } while (strcmp (ack, "OK\n") != 0);
251
252 return ret;
253 }
254
255 static GstFlowReturn
gst_festival_chain(GstPad * pad,GstObject * parent,GstBuffer * buf)256 gst_festival_chain (GstPad * pad, GstObject * parent, GstBuffer * buf)
257 {
258 GstFlowReturn ret = GST_FLOW_OK;
259 GstFestival *festival;
260 GstMapInfo info;
261 guint8 *p, *ep;
262 gint f;
263 FILE *fd;
264
265 festival = GST_FESTIVAL (parent);
266
267 GST_LOG_OBJECT (festival, "Got text buffer, %" G_GSIZE_FORMAT " bytes",
268 gst_buffer_get_size (buf));
269
270 f = dup (festival->info->server_fd);
271 if (f < 0)
272 goto fail_open;
273 fd = fdopen (f, "wb");
274 if (fd == NULL) {
275 close (f);
276 goto fail_open;
277 }
278
279 /* Copy text over to server, escaping any quotes */
280 fprintf (fd, "(Parameter.set 'Audio_Required_Rate 16000)\n");
281 fflush (fd);
282 GST_DEBUG_OBJECT (festival, "issued Parameter.set command");
283 if (read_response (festival) == FALSE) {
284 fclose (fd);
285 goto fail_read;
286 }
287
288 fprintf (fd, "(tts_textall \"");
289 gst_buffer_map (buf, &info, GST_MAP_READ);
290 p = info.data;
291 ep = p + info.size;
292 for (; p < ep && (*p != '\0'); p++) {
293 if ((*p == '"') || (*p == '\\')) {
294 putc ('\\', fd);
295 }
296
297 putc (*p, fd);
298 }
299 fprintf (fd, "\" \"%s\")\n", festival->info->text_mode);
300 fclose (fd);
301 gst_buffer_unmap (buf, &info);
302
303 GST_DEBUG_OBJECT (festival, "issued tts_textall command");
304
305 /* Read back info from server */
306 if (read_response (festival) == FALSE)
307 goto fail_read;
308
309 out:
310 gst_buffer_unref (buf);
311 return ret;
312
313 /* ERRORS */
314 fail_open:
315 {
316 GST_ELEMENT_ERROR (festival, RESOURCE, OPEN_WRITE, (NULL), (NULL));
317 ret = GST_FLOW_ERROR;
318 goto out;
319 }
320 fail_read:
321 {
322 GST_ELEMENT_ERROR (festival, RESOURCE, READ, (NULL), (NULL));
323 ret = GST_FLOW_ERROR;
324 goto out;
325 }
326 }
327
328 static FT_Info *
festival_default_info(void)329 festival_default_info (void)
330 {
331 FT_Info *info;
332
333 info = (FT_Info *) malloc (1 * sizeof (FT_Info));
334
335 info->server_host = FESTIVAL_DEFAULT_SERVER_HOST;
336 info->server_port = FESTIVAL_DEFAULT_SERVER_PORT;
337 info->text_mode = FESTIVAL_DEFAULT_TEXT_MODE;
338
339 info->server_fd = -1;
340
341 return info;
342 }
343
344 static int
festival_socket_open(const char * host,int port)345 festival_socket_open (const char *host, int port)
346 {
347 /* Return an FD to a remote server */
348 struct sockaddr_in serv_addr;
349 struct hostent *serverhost;
350 int fd;
351
352 fd = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP);
353
354 if (fd < 0) {
355 fprintf (stderr, "festival_client: can't get socket\n");
356 return -1;
357 }
358 memset (&serv_addr, 0, sizeof (serv_addr));
359 if ((serv_addr.sin_addr.s_addr = inet_addr (host)) == -1) {
360 /* its a name rather than an ipnum */
361 serverhost = gethostbyname (host);
362 if (serverhost == (struct hostent *) 0) {
363 fprintf (stderr, "festival_client: gethostbyname failed\n");
364 close (fd);
365 return -1;
366 }
367 memmove (&serv_addr.sin_addr, serverhost->h_addr, serverhost->h_length);
368 }
369 serv_addr.sin_family = AF_INET;
370 serv_addr.sin_port = htons (port);
371
372 if (connect (fd, (struct sockaddr *) &serv_addr, sizeof (serv_addr)) != 0) {
373 fprintf (stderr, "festival_client: connect to server failed\n");
374 close (fd);
375 return -1;
376 }
377
378 return fd;
379 }
380
381 static char *
client_accept_s_expr(int fd)382 client_accept_s_expr (int fd)
383 {
384 /* Read s-expression from server, as a char * */
385 char *expr;
386 int filesize;
387
388 expr = socket_receive_file_to_buff (fd, &filesize);
389 expr[filesize] = '\0';
390
391 return expr;
392 }
393
394 static char *
socket_receive_file_to_buff(int fd,int * size)395 socket_receive_file_to_buff (int fd, int *size)
396 {
397 /* Receive file (probably a waveform file) from socket using */
398 /* Festival key stuff technique, but long winded I know, sorry */
399 /* but will receive any file without closeing the stream or */
400 /* using OOB data */
401 static const char file_stuff_key[] = "ft_StUfF_key"; /* must == Festival's key */
402 char *buff;
403 int bufflen;
404 int n, k, i;
405 char c;
406
407 bufflen = 1024;
408 buff = (char *) g_malloc (bufflen);
409 *size = 0;
410
411 for (k = 0; file_stuff_key[k] != '\0';) {
412 n = read (fd, &c, 1);
413 if (n == 0)
414 break; /* hit stream eof before end of file */
415
416 if ((*size) + k + 1 >= bufflen) {
417 /* +1 so you can add a NULL if you want */
418 bufflen += bufflen / 4;
419 buff = (char *) g_realloc (buff, bufflen);
420 }
421 if (file_stuff_key[k] == c)
422 k++;
423 else if ((c == 'X') && (file_stuff_key[k + 1] == '\0')) {
424 /* It looked like the key but wasn't */
425 for (i = 0; i < k; i++, (*size)++)
426 buff[*size] = file_stuff_key[i];
427 k = 0;
428 /* omit the stuffed 'X' */
429 } else {
430 for (i = 0; i < k; i++, (*size)++)
431 buff[*size] = file_stuff_key[i];
432 k = 0;
433 buff[*size] = c;
434 (*size)++;
435 }
436 }
437
438 return buff;
439 }
440
441 /***********************************************************************/
442 /* Public Functions to this API */
443 /***********************************************************************/
444
445 static gboolean
gst_festival_open(GstFestival * festival)446 gst_festival_open (GstFestival * festival)
447 {
448 /* Open socket to server */
449 if (festival->info == NULL)
450 festival->info = festival_default_info ();
451
452 festival->info->server_fd =
453 festival_socket_open (festival->info->server_host,
454 festival->info->server_port);
455 if (festival->info->server_fd == -1) {
456 GST_ERROR
457 ("Could not talk to festival server (no server running or wrong host/port?)");
458 return FALSE;
459 }
460 GST_OBJECT_FLAG_SET (festival, GST_FESTIVAL_OPEN);
461 return TRUE;
462 }
463
464 static void
gst_festival_close(GstFestival * festival)465 gst_festival_close (GstFestival * festival)
466 {
467 if (festival->info == NULL)
468 return;
469
470 if (festival->info->server_fd != -1)
471 close (festival->info->server_fd);
472 GST_OBJECT_FLAG_UNSET (festival, GST_FESTIVAL_OPEN);
473 return;
474 }
475
476 static GstStateChangeReturn
gst_festival_change_state(GstElement * element,GstStateChange transition)477 gst_festival_change_state (GstElement * element, GstStateChange transition)
478 {
479 g_return_val_if_fail (GST_IS_FESTIVAL (element), GST_STATE_CHANGE_FAILURE);
480
481 if (GST_STATE_PENDING (element) == GST_STATE_NULL) {
482 if (GST_OBJECT_FLAG_IS_SET (element, GST_FESTIVAL_OPEN)) {
483 GST_DEBUG ("Closing connection ");
484 gst_festival_close (GST_FESTIVAL (element));
485 }
486 } else {
487 if (!GST_OBJECT_FLAG_IS_SET (element, GST_FESTIVAL_OPEN)) {
488 GST_DEBUG ("Opening connection ");
489 if (!gst_festival_open (GST_FESTIVAL (element)))
490 return GST_STATE_CHANGE_FAILURE;
491 }
492 }
493
494 if (GST_ELEMENT_CLASS (gst_festival_parent_class)->change_state)
495 return GST_ELEMENT_CLASS (gst_festival_parent_class)->change_state (element,
496 transition);
497
498 return GST_STATE_CHANGE_SUCCESS;
499 }
500
501 static gboolean
gst_festival_src_query(GstPad * pad,GstObject * parent,GstQuery * query)502 gst_festival_src_query (GstPad * pad, GstObject * parent, GstQuery * query)
503 {
504 switch (GST_QUERY_TYPE (query)) {
505 case GST_QUERY_POSITION:
506 /* Not supported */
507 return FALSE;
508 case GST_QUERY_DURATION:
509 gst_query_set_duration (query, GST_FORMAT_BYTES, -1);
510 return TRUE;
511 case GST_QUERY_SEEKING:
512 gst_query_set_seeking (query, GST_FORMAT_BYTES, FALSE, 0, -1);
513 return TRUE;
514 case GST_QUERY_FORMATS:
515 gst_query_set_formats (query, 1, GST_FORMAT_BYTES);
516 return TRUE;
517 default:
518 break;
519 }
520
521 return gst_pad_query_default (pad, parent, query);
522 }
523
524 static gboolean
plugin_init(GstPlugin * plugin)525 plugin_init (GstPlugin * plugin)
526 {
527 GST_DEBUG_CATEGORY_INIT (festival_debug, "festival",
528 0, "Festival text-to-speech synthesizer");
529
530 if (!gst_element_register (plugin, "festival", GST_RANK_NONE,
531 GST_TYPE_FESTIVAL))
532 return FALSE;
533
534 return TRUE;
535 }
536
537 GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
538 GST_VERSION_MINOR,
539 festival,
540 "Synthesizes plain text into audio",
541 plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN);
542