• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * alsa audio handling
3  *
4  * Written in 2010-2020 by Andy Green <andy@warmcat.com>
5  *
6  * This file is made available under the Creative Commons CC0 1.0
7  * Universal Public Domain Dedication.
8  */
9 
10 #include <libwebsockets.h>
11 #include <string.h>
12 #include <signal.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 
17 #include <alsa/asoundlib.h>
18 #include <pv_porcupine.h>
19 
20 #include <mpg123.h>
21 
22 #include "private.h"
23 
24 extern struct lws_ss_handle *hss_avs_event, *hss_avs_sync;
25 
26 int
27 avs_query_start(struct lws_context *context);
28 
29 enum {
30 	MODE_IDLE,
31 	MODE_CAPTURING,
32 	MODE_PLAYING
33 };
34 
35 struct raw_vhd {
36 	int16_t			p[8 * 1024]; /* 500ms at 16kHz 16-bit PCM */
37 	pv_porcupine_object_t	*porc;
38 	snd_pcm_t		*pcm_capture;
39 	snd_pcm_t		*pcm_playback;
40 	snd_pcm_hw_params_t	*params;
41 	snd_pcm_uframes_t	frames;
42 	int16_t			*porcbuf;
43 
44 	mpg123_handle		*mh;
45 
46 	mp3_done_cb		done_cb;
47 	void			*opaque;
48 
49 	int			mode;
50 	int			rate;
51 
52 	int			porc_spf;
53 	int			filefd;
54 	int			rpos;
55 	int			wpos;
56 	int			porcpos;
57 	int			npos;
58 	int			times;
59 	int			quietcount;
60 	int			anycount;
61 
62 	int			wplay;
63 	int			rplay;
64 
65 	char			last_wake_detect;
66 	char			destroy_mh_on_drain;
67 };
68 
69 static struct raw_vhd *avhd;
70 
71 /*
72  * called from alexa.c to grab the next chunk of audio capture buffer
73  * for upload
74  */
75 
76 int
spool_capture(uint8_t * buf,size_t len)77 spool_capture(uint8_t *buf, size_t len)
78 {
79 	int16_t *sam = (int16_t *)buf;
80 	size_t s, os;
81 
82 	if (avhd->mode != MODE_CAPTURING)
83 		return -1;
84 
85 	os = s = len / 2;
86 
87 	while (s && avhd->wpos != avhd->npos) {
88 		*sam++ = avhd->p[avhd->npos];
89 		avhd->npos = (avhd->npos + 1)  % LWS_ARRAY_SIZE(avhd->p);
90 		s--;
91 	}
92 
93 	lwsl_info("Copied %d samples (%d %d)\n", (int)(os - s),
94 			avhd->wpos, avhd->npos);
95 
96 	return (os - s) * 2;
97 }
98 
99 /*
100  * Called from alexa.c to control when the mp3 playback should begin and end
101  */
102 
103 int
play_mp3(mpg123_handle * mh,mp3_done_cb cb,void * opaque)104 play_mp3(mpg123_handle *mh, mp3_done_cb cb, void *opaque)
105 {
106 	if (mh) {
107 		avhd->mh = mh;
108 		avhd->mode = MODE_PLAYING;
109 		snd_pcm_prepare(avhd->pcm_playback);
110 
111 		return 0;
112 	}
113 
114 	avhd->destroy_mh_on_drain = 1;
115 	avhd->done_cb = cb;
116 	avhd->opaque = opaque;
117 
118 	return 0;
119 }
120 
121 /*
122  * Helper used to set alsa hwparams on both capture and playback channels
123  */
124 
125 static int
set_hw_params(struct lws_vhost * vh,snd_pcm_t ** pcm,int type)126 set_hw_params(struct lws_vhost *vh, snd_pcm_t **pcm, int type)
127 {
128 	unsigned int rate = pv_sample_rate(); /* it's 16kHz */
129 	snd_pcm_hw_params_t *params;
130 	lws_sock_file_fd_type u;
131 	struct pollfd pfd;
132 	struct lws *wsi1;
133 	int n;
134 
135 	n = snd_pcm_open(pcm, "default", type, SND_PCM_NONBLOCK);
136 	if (n < 0) {
137 		lwsl_err("%s: Can't open default for playback: %s\n",
138 			 __func__, snd_strerror(n));
139 
140 		return -1;
141 	}
142 
143 	if (snd_pcm_poll_descriptors(*pcm, &pfd, 1) != 1) {
144 		lwsl_err("%s: failed to get playback desc\n", __func__);
145 		return -1;
146 	}
147 
148 	u.filefd = (lws_filefd_type)(long long)pfd.fd;
149 	wsi1 = lws_adopt_descriptor_vhost(vh, LWS_ADOPT_RAW_FILE_DESC, u,
150 					  "lws-audio-test", NULL);
151 	if (!wsi1) {
152 		lwsl_err("%s: Failed to adopt playback desc\n", __func__);
153 		goto bail;
154 	}
155 	if (type == SND_PCM_STREAM_PLAYBACK)
156 		lws_rx_flow_control(wsi1, 0); /* no POLLIN */
157 
158 	snd_pcm_hw_params_malloc(&params);
159 	snd_pcm_hw_params_any(*pcm, params);
160 
161 	n = snd_pcm_hw_params_set_access(*pcm, params,
162 					 SND_PCM_ACCESS_RW_INTERLEAVED);
163 	if (n < 0)
164 		goto bail1;
165 
166 	n = snd_pcm_hw_params_set_format(*pcm, params, SND_PCM_FORMAT_S16_LE);
167 	if (n < 0)
168 		goto bail1;
169 
170 	n = snd_pcm_hw_params_set_channels(*pcm, params, 1);
171 	if (n < 0)
172 		goto bail1;
173 
174 	n = snd_pcm_hw_params_set_rate_near(*pcm, params, &rate, 0);
175 	if (n < 0)
176 		goto bail1;
177 
178 	lwsl_notice("%s: %s rate %d\n", __func__,
179 		type == SND_PCM_STREAM_PLAYBACK ? "Playback" : "Capture", rate);
180 
181 	n = snd_pcm_hw_params(*pcm, params);
182 	snd_pcm_hw_params_free(params);
183 	if (n < 0)
184 		goto bail;
185 
186 	return 0;
187 
188 bail1:
189 	snd_pcm_hw_params_free(params);
190 bail:
191 	lwsl_err("%s: Set hw params failed: %s\n", __func__, snd_strerror(n));
192 
193 	return -1;
194 }
195 
196 /*
197  * The lws RAW file protocol handler that wraps ALSA.
198  *
199  * The timing is coming from ALSA capture channel... since they are both set to
200  * 16kHz, it's enough just to have the one.
201  */
202 
203 static int
callback_audio(struct lws * wsi,enum lws_callback_reasons reason,void * user,void * in,size_t len)204 callback_audio(struct lws *wsi, enum lws_callback_reasons reason, void *user,
205 	       void *in, size_t len)
206 {
207 	struct raw_vhd *vhd = (struct raw_vhd *)lws_protocol_vh_priv_get(
208 				   lws_get_vhost(wsi), lws_get_protocol(wsi));
209 	uint16_t rands[50];
210 	int16_t temp[256];
211 	bool det;
212 	long avg;
213 	int n, s;
214 
215 	switch (reason) {
216 	case LWS_CALLBACK_PROTOCOL_INIT:
217 
218 		if (avhd) /* just on one vhost */
219 			return 0;
220 
221 		avhd = vhd = lws_protocol_vh_priv_zalloc(lws_get_vhost(wsi),
222 				lws_get_protocol(wsi), sizeof(struct raw_vhd));
223 
224 		/*
225 		 * Set up the wakeword library
226 		 */
227 
228 		n = pv_porcupine_init("porcupine_params.pv", "alexa_linux.ppn",
229 					1.0, &vhd->porc);
230 		if (n) {
231 			lwsl_err("%s: porcupine init fail %d\n", __func__, n);
232 
233 			return -1;
234 		}
235 		vhd->porc_spf = pv_porcupine_frame_length();
236 		vhd->porcbuf = malloc(vhd->porc_spf * 2);
237 		lwsl_info("%s: %s porc frame length is %d samples\n", __func__,
238 				lws_get_vhost_name(lws_get_vhost(wsi)),
239 				vhd->porc_spf);
240 
241 		vhd->rate = pv_sample_rate(); /* 16kHz */
242 
243 		/* set up alsa */
244 
245 		if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_playback,
246 				  SND_PCM_STREAM_PLAYBACK))  {
247 			lwsl_err("%s: Can't open default for playback\n",
248 				 __func__);
249 
250 			return -1;
251 		}
252 
253 		if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_capture,
254 				  SND_PCM_STREAM_CAPTURE))  {
255 			lwsl_err("%s: Can't open default for capture\n",
256 				 __func__);
257 
258 			return -1;
259 		}
260 
261 		snd_config_update_free_global();
262 
263 		break;
264 
265 	case LWS_CALLBACK_PROTOCOL_DESTROY:
266 		lwsl_info("%s: LWS_CALLBACK_PROTOCOL_DESTROY\n", __func__);
267 		if (!vhd)
268 			break;
269 
270 		if (vhd->porcbuf) {
271 			free(vhd->porcbuf);
272 			vhd->porcbuf = NULL;
273 		}
274 		if (vhd->pcm_playback) {
275 			snd_pcm_drop(vhd->pcm_playback);
276 			snd_pcm_close(vhd->pcm_playback);
277 			vhd->pcm_playback = NULL;
278 		}
279 		if (vhd->pcm_capture) {
280 			snd_pcm_drop(vhd->pcm_capture);
281 			snd_pcm_close(vhd->pcm_capture);
282 			vhd->pcm_capture = NULL;
283 		}
284 		if (vhd->porc) {
285 			pv_porcupine_delete(vhd->porc);
286 			vhd->porc = NULL;
287 		}
288 
289 		/* avoid most of the valgrind mess from alsa */
290 		snd_config_update_free_global();
291 
292 		break;
293 
294 	case LWS_CALLBACK_RAW_CLOSE_FILE:
295 		lwsl_info("%s: closed\n", __func__);
296 		break;
297 
298 	case LWS_CALLBACK_RAW_RX_FILE:
299 		/* we come here about every 250ms */
300 
301 		/*
302 		 * Playing back the mp3?
303 		 */
304 		if (vhd->mode == MODE_PLAYING && vhd->mh) {
305 			size_t amt, try;
306 
307 			do {
308 				try = snd_pcm_avail(vhd->pcm_playback);
309 				if (try > LWS_ARRAY_SIZE(vhd->p))
310 					try = LWS_ARRAY_SIZE(vhd->p);
311 
312 				n = mpg123_read(vhd->mh, (uint8_t *)vhd->p,
313 						try * 2, &amt);
314 				lwsl_info("%s: PLAYING: mpg123 read %d, n %d\n",
315 						__func__, (int)amt, n);
316 				if (n == MPG123_NEW_FORMAT) {
317 					snd_pcm_start(vhd->pcm_playback);
318 					memset(vhd->p, 0, try);
319 					snd_pcm_writei(vhd->pcm_playback,
320 						       vhd->p, try / 2);
321 					snd_pcm_prepare(vhd->pcm_playback);
322 				}
323 			} while (n == MPG123_NEW_FORMAT);
324 
325 			if (amt) {
326 				n = snd_pcm_writei(vhd->pcm_playback,
327 						   vhd->p, amt / 2);
328 				if (n < 0)
329 					lwsl_notice("%s: snd_pcm_writei: %d %s\n",
330 						    __func__, n, snd_strerror(n));
331 				if (n == -EPIPE) {
332 					lwsl_err("%s: did EPIPE prep\n", __func__);
333 					snd_pcm_prepare(vhd->pcm_playback);
334 				}
335 			} else
336 				if (vhd->destroy_mh_on_drain &&
337 				    n != MPG123_NEW_FORMAT) {
338 					snd_pcm_drain(vhd->pcm_playback);
339 					vhd->destroy_mh_on_drain = 0;
340 					lwsl_notice("%s: mp3 destroyed\n",
341 							__func__);
342 					mpg123_close(vhd->mh);
343 					mpg123_delete(vhd->mh);
344 					vhd->mh = NULL;
345 					vhd->mode = MODE_IDLE;
346 
347 					if (vhd->done_cb)
348 						vhd->done_cb(vhd->opaque);
349 				}
350 		}
351 
352 		/*
353 		 * Get the capture data
354 		 */
355 
356 		n = snd_pcm_readi(vhd->pcm_capture, temp, LWS_ARRAY_SIZE(temp));
357 		s = 0;
358 		while (s < n) {
359 			vhd->p[(vhd->wpos + s) % LWS_ARRAY_SIZE(vhd->p)] = temp[s];
360 			s++;
361 		}
362 
363 		if (vhd->mode == MODE_CAPTURING) {
364 
365 			/*
366 			 * We are recording an utterance.
367 			 *
368 			 * Estimate the sound density in the frame by picking 50
369 			 * samples at random and averaging the sampled
370 			 * [abs()^2] / 10000 to create a Figure of Merit.
371 			 *
372 			 * Speaking on my laptop gets us 1000 - 5000, silence
373 			 * is typ under 30.  The wakeword tells us there was
374 			 * speech at the start, end the capture when there's
375 			 * ~750ms (12000 samples) under 125 FOM.
376 			 */
377 
378 #define SILENCE_THRESH 125
379 
380 			avg = 0;
381 			lws_get_random(lws_get_context(wsi), rands, sizeof(rands));
382 			for (s = 0; s < (int)LWS_ARRAY_SIZE(rands); s++) {
383 				long q;
384 
385 				q = temp[rands[s] % n];
386 
387 				avg += (q * q);
388 			}
389 			avg = (avg / (int)LWS_ARRAY_SIZE(rands)) / 10000;
390 
391 			lwsl_notice("est audio energy: %ld %d\n", avg, vhd->mode);
392 
393 			/*
394 			 * Only start looking for "silence" after 1.5s, in case
395 			 * he does a long pause after the wakeword
396 			 */
397 
398 			if (vhd->anycount < (3 *vhd->rate) / 2 &&
399 			    avg < SILENCE_THRESH) {
400 				vhd->quietcount += n;
401 				/* then 500ms of "silence" does it for us */
402 				if (vhd->quietcount >= ((vhd->rate * 3) / 4)) {
403 					lwsl_warn("%s: ended capture\n", __func__);
404 					vhd->mode = MODE_IDLE;
405 					vhd->quietcount = 0;
406 				}
407 			}
408 
409 			/* if we're not "silent", reset the count */
410 			if (avg > SILENCE_THRESH * 2)
411 				vhd->quietcount = 0;
412 
413 			/*
414 			 * Since we are in capturing mode, we have something
415 			 * new to send now.
416 			 *
417 			 * We must send an extra one at the end so we can finish
418 			 * the tx.
419 			 */
420 			lws_ss_request_tx(hss_avs_sync);
421 		}
422 
423 		/*
424 		 * Just waiting for a wakeword
425 		 */
426 
427 		while (vhd->mode == MODE_IDLE) {
428 			int m = 0, ppold = vhd->porcpos;
429 
430 			s = (vhd->wpos - vhd->porcpos) % LWS_ARRAY_SIZE(vhd->p);
431 			if (s < vhd->porc_spf)
432 				goto eol;
433 
434 			while (m < vhd->porc_spf) {
435 				vhd->porcbuf[m++] = avhd->p[vhd->porcpos];
436 				vhd->porcpos = (vhd->porcpos + 1) %
437 							LWS_ARRAY_SIZE(vhd->p);
438 			}
439 
440 			if (pv_porcupine_process(vhd->porc, vhd->porcbuf, &det))
441 				lwsl_err("%s: porc_process failed\n", __func__);
442 
443 			if (!det && vhd->last_wake_detect &&
444 			    vhd->mode == MODE_IDLE) {
445 				lwsl_warn("************* Wakeword\n");
446 				if (!avs_query_start(lws_get_context(wsi))) {
447 					vhd->mode = MODE_CAPTURING;
448 					vhd->quietcount = 0;
449 					vhd->last_wake_detect = det;
450 					vhd->npos = ppold;
451 					break;
452 				}
453 			}
454 			vhd->last_wake_detect = det;
455 		}
456 
457 eol:
458 		vhd->wpos = (vhd->wpos + n) % LWS_ARRAY_SIZE(vhd->p);
459 		break;
460 
461 	default:
462 		break;
463 	}
464 
465 	return 0;
466 }
467 
468 struct lws_protocols protocol_audio_test =
469 	{ "lws-audio-test", callback_audio, 0, 0 };
470