• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 package android.speech.tts;
17 
18 import android.media.AudioFormat;
19 import android.media.AudioTrack;
20 import android.text.TextUtils;
21 import android.util.Log;
22 
23 import java.util.Iterator;
24 import java.util.concurrent.PriorityBlockingQueue;
25 import java.util.concurrent.atomic.AtomicLong;
26 
27 class AudioPlaybackHandler {
28     private static final String TAG = "TTS.AudioPlaybackHandler";
29     private static final boolean DBG_THREADING = false;
30     private static final boolean DBG = false;
31 
32     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
33 
34     private static final int SYNTHESIS_START = 1;
35     private static final int SYNTHESIS_DATA_AVAILABLE = 2;
36     private static final int SYNTHESIS_DONE = 3;
37 
38     private static final int PLAY_AUDIO = 5;
39     private static final int PLAY_SILENCE = 6;
40 
41     private static final int SHUTDOWN = -1;
42 
43     private static final int DEFAULT_PRIORITY = 1;
44     private static final int HIGH_PRIORITY = 0;
45 
46     private final PriorityBlockingQueue<ListEntry> mQueue =
47             new PriorityBlockingQueue<ListEntry>();
48     private final Thread mHandlerThread;
49 
50     private volatile MessageParams mCurrentParams = null;
51     // Used only for book keeping and error detection.
52     private volatile SynthesisMessageParams mLastSynthesisRequest = null;
53     // Used to order incoming messages in our priority queue.
54     private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
55 
56 
AudioPlaybackHandler()57     AudioPlaybackHandler() {
58         mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
59     }
60 
start()61     public void start() {
62         mHandlerThread.start();
63     }
64 
65     /**
66      * Stops all synthesis for a given {@code token}. If the current token
67      * is currently being processed, an effort will be made to stop it but
68      * that is not guaranteed.
69      *
70      * NOTE: This assumes that all other messages in the queue with {@code token}
71      * have been removed already.
72      *
73      * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
74      */
stop(MessageParams token)75     private void stop(MessageParams token) {
76         if (token == null) {
77             return;
78         }
79 
80         if (DBG) Log.d(TAG, "Stopping token : " + token);
81 
82         if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
83             AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
84             if (current != null) {
85                 // Stop the current audio track if it's still playing.
86                 // The audio track is thread safe in this regard. The current
87                 // handleSynthesisDataAvailable call will return soon after this
88                 // call.
89                 current.stop();
90             }
91             // This is safe because PlaybackSynthesisCallback#stop would have
92             // been called before this method, and will no longer enqueue any
93             // audio for this token.
94             //
95             // (Even if it did, all it would result in is a warning message).
96             mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
97         } else if (token.getType() == MessageParams.TYPE_AUDIO) {
98             ((AudioMessageParams) token).getPlayer().stop();
99             // No cleanup required for audio messages.
100         } else if (token.getType() == MessageParams.TYPE_SILENCE) {
101             ((SilenceMessageParams) token).getConditionVariable().open();
102             // No cleanup required for silence messages.
103         }
104     }
105 
106     // -----------------------------------------------------
107     // Methods that add and remove elements from the queue. These do not
108     // need to be synchronized strictly speaking, but they make the behaviour
109     // a lot more predictable. (though it would still be correct without
110     // synchronization).
111     // -----------------------------------------------------
112 
removePlaybackItems(String callingApp)113     synchronized public void removePlaybackItems(String callingApp) {
114         if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
115         removeMessages(callingApp);
116 
117         final MessageParams current = getCurrentParams();
118         if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
119             stop(current);
120         }
121     }
122 
removeAllItems()123     synchronized public void removeAllItems() {
124         if (DBG_THREADING) Log.d(TAG, "Removing all items");
125         removeAllMessages();
126         stop(getCurrentParams());
127     }
128 
129     /**
130      * @return false iff the queue is empty and no queue item is currently
131      *        being handled, true otherwise.
132      */
isSpeaking()133     public boolean isSpeaking() {
134         return (mQueue.peek() != null) || (mCurrentParams != null);
135     }
136 
137     /**
138      * Shut down the audio playback thread.
139      */
quit()140     synchronized public void quit() {
141         removeAllMessages();
142         stop(getCurrentParams());
143         mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
144     }
145 
enqueueSynthesisStart(SynthesisMessageParams token)146     synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
147         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
148         mQueue.add(new ListEntry(SYNTHESIS_START, token));
149     }
150 
enqueueSynthesisDataAvailable(SynthesisMessageParams token)151     synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
152         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
153         mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
154     }
155 
enqueueSynthesisDone(SynthesisMessageParams token)156     synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
157         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
158         mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
159     }
160 
enqueueAudio(AudioMessageParams token)161     synchronized void enqueueAudio(AudioMessageParams token) {
162         if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
163         mQueue.add(new ListEntry(PLAY_AUDIO, token));
164     }
165 
enqueueSilence(SilenceMessageParams token)166     synchronized void enqueueSilence(SilenceMessageParams token) {
167         if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
168         mQueue.add(new ListEntry(PLAY_SILENCE, token));
169     }
170 
171     // -----------------------------------------
172     // End of public API methods.
173     // -----------------------------------------
174 
175     // -----------------------------------------
176     // Methods for managing the message queue.
177     // -----------------------------------------
178 
179     /*
180      * The MessageLoop is a handler like implementation that
181      * processes messages from a priority queue.
182      */
183     private final class MessageLoop implements Runnable {
184         @Override
run()185         public void run() {
186             while (true) {
187                 ListEntry entry = null;
188                 try {
189                     entry = mQueue.take();
190                 } catch (InterruptedException ie) {
191                     return;
192                 }
193 
194                 if (entry.mWhat == SHUTDOWN) {
195                     if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
196                     return;
197                 }
198 
199                 if (DBG) {
200                     Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
201                             + " ,seqId : " + entry.mSequenceId);
202                 }
203 
204                 setCurrentParams(entry.mMessage);
205                 handleMessage(entry);
206                 setCurrentParams(null);
207             }
208         }
209     }
210 
211     /*
212      * Atomically clear the queue of all messages.
213      */
removeAllMessages()214     synchronized private void removeAllMessages() {
215         mQueue.clear();
216     }
217 
218     /*
219      * Remove all messages that originate from a given calling app.
220      */
removeMessages(String callingApp)221     synchronized private void removeMessages(String callingApp) {
222         Iterator<ListEntry> it = mQueue.iterator();
223 
224         while (it.hasNext()) {
225             final ListEntry current = it.next();
226             // The null check is to prevent us from removing control messages,
227             // such as a shutdown message.
228             if (current.mMessage != null &&
229                     callingApp.equals(current.mMessage.getCallingApp())) {
230                 it.remove();
231             }
232         }
233     }
234 
235     /*
236      * An element of our priority queue of messages. Each message has a priority,
237      * and a sequence id (defined by the order of enqueue calls). Among messages
238      * with the same priority, messages that were received earlier win out.
239      */
240     private final class ListEntry implements Comparable<ListEntry> {
241         final int mWhat;
242         final MessageParams mMessage;
243         final int mPriority;
244         final long mSequenceId;
245 
ListEntry(int what, MessageParams message)246         private ListEntry(int what, MessageParams message) {
247             this(what, message, DEFAULT_PRIORITY);
248         }
249 
ListEntry(int what, MessageParams message, int priority)250         private ListEntry(int what, MessageParams message, int priority) {
251             mWhat = what;
252             mMessage = message;
253             mPriority = priority;
254             mSequenceId = mSequenceIdCtr.incrementAndGet();
255         }
256 
257         @Override
compareTo(ListEntry that)258         public int compareTo(ListEntry that) {
259             if (that == this) {
260                 return 0;
261             }
262 
263             // Note that this is always 0, 1 or -1.
264             int priorityDiff = mPriority - that.mPriority;
265             if (priorityDiff == 0) {
266                 // The == case cannot occur.
267                 return (mSequenceId < that.mSequenceId) ? -1 : 1;
268             }
269 
270             return priorityDiff;
271         }
272     }
273 
setCurrentParams(MessageParams p)274     private void setCurrentParams(MessageParams p) {
275         if (DBG_THREADING) {
276             if (p != null) {
277                 Log.d(TAG, "Started handling :" + p);
278             } else {
279                 Log.d(TAG, "End handling : " + mCurrentParams);
280             }
281         }
282         mCurrentParams = p;
283     }
284 
getCurrentParams()285     private MessageParams getCurrentParams() {
286         return mCurrentParams;
287     }
288 
289     // -----------------------------------------
290     // Methods for dealing with individual messages, the methods
291     // below do the actual work.
292     // -----------------------------------------
293 
handleMessage(ListEntry entry)294     private void handleMessage(ListEntry entry) {
295         final MessageParams msg = entry.mMessage;
296         if (entry.mWhat == SYNTHESIS_START) {
297             handleSynthesisStart(msg);
298         } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
299             handleSynthesisDataAvailable(msg);
300         } else if (entry.mWhat == SYNTHESIS_DONE) {
301             handleSynthesisDone(msg);
302         } else if (entry.mWhat == PLAY_AUDIO) {
303             handleAudio(msg);
304         } else if (entry.mWhat == PLAY_SILENCE) {
305             handleSilence(msg);
306         }
307     }
308 
309     // Currently implemented as blocking the audio playback thread for the
310     // specified duration. If a call to stop() is made, the thread
311     // unblocks.
handleSilence(MessageParams msg)312     private void handleSilence(MessageParams msg) {
313         if (DBG) Log.d(TAG, "handleSilence()");
314         SilenceMessageParams params = (SilenceMessageParams) msg;
315         if (params.getSilenceDurationMs() > 0) {
316             params.getConditionVariable().block(params.getSilenceDurationMs());
317         }
318         params.getDispatcher().dispatchUtteranceCompleted();
319         if (DBG) Log.d(TAG, "handleSilence() done.");
320     }
321 
322     // Plays back audio from a given URI. No TTS engine involvement here.
handleAudio(MessageParams msg)323     private void handleAudio(MessageParams msg) {
324         if (DBG) Log.d(TAG, "handleAudio()");
325         AudioMessageParams params = (AudioMessageParams) msg;
326         // Note that the BlockingMediaPlayer spawns a separate thread.
327         //
328         // TODO: This can be avoided.
329         params.getPlayer().startAndWait();
330         params.getDispatcher().dispatchUtteranceCompleted();
331         if (DBG) Log.d(TAG, "handleAudio() done.");
332     }
333 
334     // Denotes the start of a new synthesis request. We create a new
335     // audio track, and prepare it for incoming data.
336     //
337     // Note that since all TTS synthesis happens on a single thread, we
338     // should ALWAYS see the following order :
339     //
340     // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
341     // OR
342     // handleSynthesisCompleteDataAvailable.
handleSynthesisStart(MessageParams msg)343     private void handleSynthesisStart(MessageParams msg) {
344         if (DBG) Log.d(TAG, "handleSynthesisStart()");
345         final SynthesisMessageParams param = (SynthesisMessageParams) msg;
346 
347         // Oops, looks like the engine forgot to call done(). We go through
348         // extra trouble to clean the data to prevent the AudioTrack resources
349         // from being leaked.
350         if (mLastSynthesisRequest != null) {
351             Log.w(TAG, "Error : Missing call to done() for request : " +
352                     mLastSynthesisRequest);
353             handleSynthesisDone(mLastSynthesisRequest);
354         }
355 
356         mLastSynthesisRequest = param;
357 
358         // Create the audio track.
359         final AudioTrack audioTrack = createStreamingAudioTrack(param);
360 
361         if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
362 
363         param.setAudioTrack(audioTrack);
364     }
365 
366     // More data available to be flushed to the audio track.
handleSynthesisDataAvailable(MessageParams msg)367     private void handleSynthesisDataAvailable(MessageParams msg) {
368         final SynthesisMessageParams param = (SynthesisMessageParams) msg;
369         if (param.getAudioTrack() == null) {
370             Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
371             return;
372         }
373 
374         if (param != mLastSynthesisRequest) {
375             Log.e(TAG, "Call to dataAvailable without done() / start()");
376             return;
377         }
378 
379         final AudioTrack audioTrack = param.getAudioTrack();
380         final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
381 
382         if (bufferCopy == null) {
383             Log.e(TAG, "No buffers available to play.");
384             return;
385         }
386 
387         int playState = audioTrack.getPlayState();
388         if (playState == AudioTrack.PLAYSTATE_STOPPED) {
389             if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
390             audioTrack.play();
391         }
392         int count = 0;
393         while (count < bufferCopy.mBytes.length) {
394             // Note that we don't take bufferCopy.mOffset into account because
395             // it is guaranteed to be 0.
396             int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length);
397             if (written <= 0) {
398                 break;
399             }
400             count += written;
401         }
402         param.mBytesWritten += count;
403         param.mLogger.onPlaybackStart();
404     }
405 
406     // Wait for the audio track to stop playing, and then release its resources.
handleSynthesisDone(MessageParams msg)407     private void handleSynthesisDone(MessageParams msg) {
408         final SynthesisMessageParams params = (SynthesisMessageParams) msg;
409 
410         if (DBG) Log.d(TAG, "handleSynthesisDone()");
411         final AudioTrack audioTrack = params.getAudioTrack();
412 
413         if (audioTrack == null) {
414             return;
415         }
416 
417         if (params.mBytesWritten < params.mAudioBufferSize) {
418             if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " +
419                     audioTrack.getPlayState());
420             params.mIsShortUtterance = true;
421             audioTrack.stop();
422         }
423 
424         if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
425                 audioTrack.hashCode());
426         blockUntilDone(params);
427         if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
428 
429         // The last call to AudioTrack.write( ) will return only after
430         // all data from the audioTrack has been sent to the mixer, so
431         // it's safe to release at this point. Make sure release() and the call
432         // that set the audio track to null are performed atomically.
433         synchronized (this) {
434             // Never allow the audioTrack to be observed in a state where
435             // it is released but non null. The only case this might happen
436             // is in the various stopFoo methods that call AudioTrack#stop from
437             // different threads, but they are synchronized on AudioPlayBackHandler#this
438             // too.
439             audioTrack.release();
440             params.setAudioTrack(null);
441         }
442         params.getDispatcher().dispatchUtteranceCompleted();
443         mLastSynthesisRequest = null;
444         params.mLogger.onWriteData();
445     }
446 
447     /**
448      * The minimum increment of time to wait for an audiotrack to finish
449      * playing.
450      */
451     private static final long MIN_SLEEP_TIME_MS = 20;
452 
453     /**
454      * The maximum increment of time to sleep while waiting for an audiotrack
455      * to finish playing.
456      */
457     private static final long MAX_SLEEP_TIME_MS = 2500;
458 
459     /**
460      * The maximum amount of time to wait for an audio track to make progress while
461      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
462      * could happen in exceptional circumstances like a media_server crash.
463      */
464     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
465 
blockUntilDone(SynthesisMessageParams params)466     private static void blockUntilDone(SynthesisMessageParams params) {
467         if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
468             return;
469         }
470 
471         if (params.mIsShortUtterance) {
472             // In this case we would have called AudioTrack#stop() to flush
473             // buffers to the mixer. This makes the playback head position
474             // unobservable and notification markers do not work reliably. We
475             // have no option but to wait until we think the track would finish
476             // playing and release it after.
477             //
478             // This isn't as bad as it looks because (a) We won't end up waiting
479             // for much longer than we should because even at 4khz mono, a short
480             // utterance weighs in at about 2 seconds, and (b) such short utterances
481             // are expected to be relatively infrequent and in a stream of utterances
482             // this shows up as a slightly longer pause.
483             blockUntilEstimatedCompletion(params);
484         } else {
485             blockUntilCompletion(params);
486         }
487     }
488 
blockUntilEstimatedCompletion(SynthesisMessageParams params)489     private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) {
490         final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
491         final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz);
492 
493         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
494 
495         try {
496             Thread.sleep(estimatedTimeMs);
497         } catch (InterruptedException ie) {
498             // Do nothing.
499         }
500     }
501 
blockUntilCompletion(SynthesisMessageParams params)502     private static void blockUntilCompletion(SynthesisMessageParams params) {
503         final AudioTrack audioTrack = params.mAudioTrack;
504         final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
505 
506         int previousPosition = -1;
507         int currentPosition = 0;
508         long blockedTimeMs = 0;
509 
510         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
511                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) {
512 
513             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
514                     audioTrack.getSampleRate();
515             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
516 
517             // Check if the audio track has made progress since the last loop
518             // iteration. We should then add in the amount of time that was
519             // spent sleeping in the last iteration.
520             if (currentPosition == previousPosition) {
521                 // This works only because the sleep time that would have been calculated
522                 // would be the same in the previous iteration too.
523                 blockedTimeMs += sleepTimeMs;
524                 // If we've taken too long to make progress, bail.
525                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
526                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
527                             "for AudioTrack to make progress, Aborting");
528                     break;
529                 }
530             } else {
531                 blockedTimeMs = 0;
532             }
533             previousPosition = currentPosition;
534 
535             if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
536                     " Playback position : " + currentPosition + ", Length in frames : "
537                     + lengthInFrames);
538             try {
539                 Thread.sleep(sleepTimeMs);
540             } catch (InterruptedException ie) {
541                 break;
542             }
543         }
544     }
545 
clip(long value, long min, long max)546     private static final long clip(long value, long min, long max) {
547         if (value < min) {
548             return min;
549         }
550 
551         if (value > max) {
552             return max;
553         }
554 
555         return value;
556     }
557 
createStreamingAudioTrack(SynthesisMessageParams params)558     private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) {
559         final int channelConfig = getChannelConfig(params.mChannelCount);
560         final int sampleRateInHz = params.mSampleRateInHz;
561         final int audioFormat = params.mAudioFormat;
562 
563         int minBufferSizeInBytes
564                 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
565         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
566 
567         AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig,
568                 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
569         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
570             Log.w(TAG, "Unable to create audio track.");
571             audioTrack.release();
572             return null;
573         }
574         params.mAudioBufferSize = bufferSizeInBytes;
575 
576         setupVolume(audioTrack, params.mVolume, params.mPan);
577         return audioTrack;
578     }
579 
getChannelConfig(int channelCount)580     static int getChannelConfig(int channelCount) {
581         if (channelCount == 1) {
582             return AudioFormat.CHANNEL_OUT_MONO;
583         } else if (channelCount == 2){
584             return AudioFormat.CHANNEL_OUT_STEREO;
585         }
586 
587         return 0;
588     }
589 
setupVolume(AudioTrack audioTrack, float volume, float pan)590     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
591         float vol = clip(volume, 0.0f, 1.0f);
592         float panning = clip(pan, -1.0f, 1.0f);
593         float volLeft = vol;
594         float volRight = vol;
595         if (panning > 0.0f) {
596             volLeft *= (1.0f - panning);
597         } else if (panning < 0.0f) {
598             volRight *= (1.0f + panning);
599         }
600         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
601         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
602             Log.e(TAG, "Failed to set volume");
603         }
604     }
605 
clip(float value, float min, float max)606     private static float clip(float value, float min, float max) {
607         return value > max ? max : (value < min ? min : value);
608     }
609 
610 }
611