1 /*
2 * alsa audio handling
3 *
4 * Written in 2010-2020 by Andy Green <andy@warmcat.com>
5 *
6 * This file is made available under the Creative Commons CC0 1.0
7 * Universal Public Domain Dedication.
8 */
9
10 #include <libwebsockets.h>
11 #include <string.h>
12 #include <signal.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16
17 #include <alsa/asoundlib.h>
18 #include <pv_porcupine.h>
19
20 #include <mpg123.h>
21
22 #include "private.h"
23
24 extern struct lws_ss_handle *hss_avs_event, *hss_avs_sync;
25
26 int
27 avs_query_start(struct lws_context *context);
28
29 enum {
30 MODE_IDLE,
31 MODE_CAPTURING,
32 MODE_PLAYING
33 };
34
35 struct raw_vhd {
36 int16_t p[8 * 1024]; /* 500ms at 16kHz 16-bit PCM */
37 pv_porcupine_object_t *porc;
38 snd_pcm_t *pcm_capture;
39 snd_pcm_t *pcm_playback;
40 snd_pcm_hw_params_t *params;
41 snd_pcm_uframes_t frames;
42 int16_t *porcbuf;
43
44 mpg123_handle *mh;
45
46 mp3_done_cb done_cb;
47 void *opaque;
48
49 int mode;
50 int rate;
51
52 int porc_spf;
53 int filefd;
54 int rpos;
55 int wpos;
56 int porcpos;
57 int npos;
58 int times;
59 int quietcount;
60 int anycount;
61
62 int wplay;
63 int rplay;
64
65 char last_wake_detect;
66 char destroy_mh_on_drain;
67 };
68
69 static struct raw_vhd *avhd;
70
71 /*
72 * called from alexa.c to grab the next chunk of audio capture buffer
73 * for upload
74 */
75
76 int
spool_capture(uint8_t * buf,size_t len)77 spool_capture(uint8_t *buf, size_t len)
78 {
79 int16_t *sam = (int16_t *)buf;
80 size_t s, os;
81
82 if (avhd->mode != MODE_CAPTURING)
83 return -1;
84
85 os = s = len / 2;
86
87 while (s && avhd->wpos != avhd->npos) {
88 *sam++ = avhd->p[avhd->npos];
89 avhd->npos = (avhd->npos + 1) % LWS_ARRAY_SIZE(avhd->p);
90 s--;
91 }
92
93 lwsl_info("Copied %d samples (%d %d)\n", (int)(os - s),
94 avhd->wpos, avhd->npos);
95
96 return (os - s) * 2;
97 }
98
99 /*
100 * Called from alexa.c to control when the mp3 playback should begin and end
101 */
102
103 int
play_mp3(mpg123_handle * mh,mp3_done_cb cb,void * opaque)104 play_mp3(mpg123_handle *mh, mp3_done_cb cb, void *opaque)
105 {
106 if (mh) {
107 avhd->mh = mh;
108 avhd->mode = MODE_PLAYING;
109 snd_pcm_prepare(avhd->pcm_playback);
110
111 return 0;
112 }
113
114 avhd->destroy_mh_on_drain = 1;
115 avhd->done_cb = cb;
116 avhd->opaque = opaque;
117
118 return 0;
119 }
120
121 /*
122 * Helper used to set alsa hwparams on both capture and playback channels
123 */
124
125 static int
set_hw_params(struct lws_vhost * vh,snd_pcm_t ** pcm,int type)126 set_hw_params(struct lws_vhost *vh, snd_pcm_t **pcm, int type)
127 {
128 unsigned int rate = pv_sample_rate(); /* it's 16kHz */
129 snd_pcm_hw_params_t *params;
130 lws_sock_file_fd_type u;
131 struct pollfd pfd;
132 struct lws *wsi1;
133 int n;
134
135 n = snd_pcm_open(pcm, "default", type, SND_PCM_NONBLOCK);
136 if (n < 0) {
137 lwsl_err("%s: Can't open default for playback: %s\n",
138 __func__, snd_strerror(n));
139
140 return -1;
141 }
142
143 if (snd_pcm_poll_descriptors(*pcm, &pfd, 1) != 1) {
144 lwsl_err("%s: failed to get playback desc\n", __func__);
145 return -1;
146 }
147
148 u.filefd = (lws_filefd_type)(long long)pfd.fd;
149 wsi1 = lws_adopt_descriptor_vhost(vh, LWS_ADOPT_RAW_FILE_DESC, u,
150 "lws-audio-test", NULL);
151 if (!wsi1) {
152 lwsl_err("%s: Failed to adopt playback desc\n", __func__);
153 goto bail;
154 }
155 if (type == SND_PCM_STREAM_PLAYBACK)
156 lws_rx_flow_control(wsi1, 0); /* no POLLIN */
157
158 snd_pcm_hw_params_malloc(¶ms);
159 snd_pcm_hw_params_any(*pcm, params);
160
161 n = snd_pcm_hw_params_set_access(*pcm, params,
162 SND_PCM_ACCESS_RW_INTERLEAVED);
163 if (n < 0)
164 goto bail1;
165
166 n = snd_pcm_hw_params_set_format(*pcm, params, SND_PCM_FORMAT_S16_LE);
167 if (n < 0)
168 goto bail1;
169
170 n = snd_pcm_hw_params_set_channels(*pcm, params, 1);
171 if (n < 0)
172 goto bail1;
173
174 n = snd_pcm_hw_params_set_rate_near(*pcm, params, &rate, 0);
175 if (n < 0)
176 goto bail1;
177
178 lwsl_notice("%s: %s rate %d\n", __func__,
179 type == SND_PCM_STREAM_PLAYBACK ? "Playback" : "Capture", rate);
180
181 n = snd_pcm_hw_params(*pcm, params);
182 snd_pcm_hw_params_free(params);
183 if (n < 0)
184 goto bail;
185
186 return 0;
187
188 bail1:
189 snd_pcm_hw_params_free(params);
190 bail:
191 lwsl_err("%s: Set hw params failed: %s\n", __func__, snd_strerror(n));
192
193 return -1;
194 }
195
196 /*
197 * The lws RAW file protocol handler that wraps ALSA.
198 *
199 * The timing is coming from ALSA capture channel... since they are both set to
200 * 16kHz, it's enough just to have the one.
201 */
202
203 static int
callback_audio(struct lws * wsi,enum lws_callback_reasons reason,void * user,void * in,size_t len)204 callback_audio(struct lws *wsi, enum lws_callback_reasons reason, void *user,
205 void *in, size_t len)
206 {
207 struct raw_vhd *vhd = (struct raw_vhd *)lws_protocol_vh_priv_get(
208 lws_get_vhost(wsi), lws_get_protocol(wsi));
209 uint16_t rands[50];
210 int16_t temp[256];
211 bool det;
212 long avg;
213 int n, s;
214
215 switch (reason) {
216 case LWS_CALLBACK_PROTOCOL_INIT:
217
218 if (avhd) /* just on one vhost */
219 return 0;
220
221 avhd = vhd = lws_protocol_vh_priv_zalloc(lws_get_vhost(wsi),
222 lws_get_protocol(wsi), sizeof(struct raw_vhd));
223
224 /*
225 * Set up the wakeword library
226 */
227
228 n = pv_porcupine_init("porcupine_params.pv", "alexa_linux.ppn",
229 1.0, &vhd->porc);
230 if (n) {
231 lwsl_err("%s: porcupine init fail %d\n", __func__, n);
232
233 return -1;
234 }
235 vhd->porc_spf = pv_porcupine_frame_length();
236 vhd->porcbuf = malloc(vhd->porc_spf * 2);
237 lwsl_info("%s: %s porc frame length is %d samples\n", __func__,
238 lws_get_vhost_name(lws_get_vhost(wsi)),
239 vhd->porc_spf);
240
241 vhd->rate = pv_sample_rate(); /* 16kHz */
242
243 /* set up alsa */
244
245 if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_playback,
246 SND_PCM_STREAM_PLAYBACK)) {
247 lwsl_err("%s: Can't open default for playback\n",
248 __func__);
249
250 return -1;
251 }
252
253 if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_capture,
254 SND_PCM_STREAM_CAPTURE)) {
255 lwsl_err("%s: Can't open default for capture\n",
256 __func__);
257
258 return -1;
259 }
260
261 snd_config_update_free_global();
262
263 break;
264
265 case LWS_CALLBACK_PROTOCOL_DESTROY:
266 lwsl_info("%s: LWS_CALLBACK_PROTOCOL_DESTROY\n", __func__);
267 if (!vhd)
268 break;
269
270 if (vhd->porcbuf) {
271 free(vhd->porcbuf);
272 vhd->porcbuf = NULL;
273 }
274 if (vhd->pcm_playback) {
275 snd_pcm_drop(vhd->pcm_playback);
276 snd_pcm_close(vhd->pcm_playback);
277 vhd->pcm_playback = NULL;
278 }
279 if (vhd->pcm_capture) {
280 snd_pcm_drop(vhd->pcm_capture);
281 snd_pcm_close(vhd->pcm_capture);
282 vhd->pcm_capture = NULL;
283 }
284 if (vhd->porc) {
285 pv_porcupine_delete(vhd->porc);
286 vhd->porc = NULL;
287 }
288
289 /* avoid most of the valgrind mess from alsa */
290 snd_config_update_free_global();
291
292 break;
293
294 case LWS_CALLBACK_RAW_CLOSE_FILE:
295 lwsl_info("%s: closed\n", __func__);
296 break;
297
298 case LWS_CALLBACK_RAW_RX_FILE:
299 /* we come here about every 250ms */
300
301 /*
302 * Playing back the mp3?
303 */
304 if (vhd->mode == MODE_PLAYING && vhd->mh) {
305 size_t amt, try;
306
307 do {
308 try = snd_pcm_avail(vhd->pcm_playback);
309 if (try > LWS_ARRAY_SIZE(vhd->p))
310 try = LWS_ARRAY_SIZE(vhd->p);
311
312 n = mpg123_read(vhd->mh, (uint8_t *)vhd->p,
313 try * 2, &amt);
314 lwsl_info("%s: PLAYING: mpg123 read %d, n %d\n",
315 __func__, (int)amt, n);
316 if (n == MPG123_NEW_FORMAT) {
317 snd_pcm_start(vhd->pcm_playback);
318 memset(vhd->p, 0, try);
319 snd_pcm_writei(vhd->pcm_playback,
320 vhd->p, try / 2);
321 snd_pcm_prepare(vhd->pcm_playback);
322 }
323 } while (n == MPG123_NEW_FORMAT);
324
325 if (amt) {
326 n = snd_pcm_writei(vhd->pcm_playback,
327 vhd->p, amt / 2);
328 if (n < 0)
329 lwsl_notice("%s: snd_pcm_writei: %d %s\n",
330 __func__, n, snd_strerror(n));
331 if (n == -EPIPE) {
332 lwsl_err("%s: did EPIPE prep\n", __func__);
333 snd_pcm_prepare(vhd->pcm_playback);
334 }
335 } else
336 if (vhd->destroy_mh_on_drain &&
337 n != MPG123_NEW_FORMAT) {
338 snd_pcm_drain(vhd->pcm_playback);
339 vhd->destroy_mh_on_drain = 0;
340 lwsl_notice("%s: mp3 destroyed\n",
341 __func__);
342 mpg123_close(vhd->mh);
343 mpg123_delete(vhd->mh);
344 vhd->mh = NULL;
345 vhd->mode = MODE_IDLE;
346
347 if (vhd->done_cb)
348 vhd->done_cb(vhd->opaque);
349 }
350 }
351
352 /*
353 * Get the capture data
354 */
355
356 n = snd_pcm_readi(vhd->pcm_capture, temp, LWS_ARRAY_SIZE(temp));
357 s = 0;
358 while (s < n) {
359 vhd->p[(vhd->wpos + s) % LWS_ARRAY_SIZE(vhd->p)] = temp[s];
360 s++;
361 }
362
363 if (vhd->mode == MODE_CAPTURING) {
364
365 /*
366 * We are recording an utterance.
367 *
368 * Estimate the sound density in the frame by picking 50
369 * samples at random and averaging the sampled
370 * [abs()^2] / 10000 to create a Figure of Merit.
371 *
372 * Speaking on my laptop gets us 1000 - 5000, silence
373 * is typ under 30. The wakeword tells us there was
374 * speech at the start, end the capture when there's
375 * ~750ms (12000 samples) under 125 FOM.
376 */
377
378 #define SILENCE_THRESH 125
379
380 avg = 0;
381 lws_get_random(lws_get_context(wsi), rands, sizeof(rands));
382 for (s = 0; s < (int)LWS_ARRAY_SIZE(rands); s++) {
383 long q;
384
385 q = temp[rands[s] % n];
386
387 avg += (q * q);
388 }
389 avg = (avg / (int)LWS_ARRAY_SIZE(rands)) / 10000;
390
391 lwsl_notice("est audio energy: %ld %d\n", avg, vhd->mode);
392
393 /*
394 * Only start looking for "silence" after 1.5s, in case
395 * he does a long pause after the wakeword
396 */
397
398 if (vhd->anycount < (3 *vhd->rate) / 2 &&
399 avg < SILENCE_THRESH) {
400 vhd->quietcount += n;
401 /* then 500ms of "silence" does it for us */
402 if (vhd->quietcount >= ((vhd->rate * 3) / 4)) {
403 lwsl_warn("%s: ended capture\n", __func__);
404 vhd->mode = MODE_IDLE;
405 vhd->quietcount = 0;
406 }
407 }
408
409 /* if we're not "silent", reset the count */
410 if (avg > SILENCE_THRESH * 2)
411 vhd->quietcount = 0;
412
413 /*
414 * Since we are in capturing mode, we have something
415 * new to send now.
416 *
417 * We must send an extra one at the end so we can finish
418 * the tx.
419 */
420 lws_ss_request_tx(hss_avs_sync);
421 }
422
423 /*
424 * Just waiting for a wakeword
425 */
426
427 while (vhd->mode == MODE_IDLE) {
428 int m = 0, ppold = vhd->porcpos;
429
430 s = (vhd->wpos - vhd->porcpos) % LWS_ARRAY_SIZE(vhd->p);
431 if (s < vhd->porc_spf)
432 goto eol;
433
434 while (m < vhd->porc_spf) {
435 vhd->porcbuf[m++] = avhd->p[vhd->porcpos];
436 vhd->porcpos = (vhd->porcpos + 1) %
437 LWS_ARRAY_SIZE(vhd->p);
438 }
439
440 if (pv_porcupine_process(vhd->porc, vhd->porcbuf, &det))
441 lwsl_err("%s: porc_process failed\n", __func__);
442
443 if (!det && vhd->last_wake_detect &&
444 vhd->mode == MODE_IDLE) {
445 lwsl_warn("************* Wakeword\n");
446 if (!avs_query_start(lws_get_context(wsi))) {
447 vhd->mode = MODE_CAPTURING;
448 vhd->quietcount = 0;
449 vhd->last_wake_detect = det;
450 vhd->npos = ppold;
451 break;
452 }
453 }
454 vhd->last_wake_detect = det;
455 }
456
457 eol:
458 vhd->wpos = (vhd->wpos + n) % LWS_ARRAY_SIZE(vhd->p);
459 break;
460
461 default:
462 break;
463 }
464
465 return 0;
466 }
467
468 struct lws_protocols protocol_audio_test =
469 { "lws-audio-test", callback_audio, 0, 0 };
470