• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.voicedialer;
18 
19 import android.app.Activity;
20 import android.app.AlertDialog;
21 import android.bluetooth.BluetoothAdapter;
22 import android.bluetooth.BluetoothDevice;
23 import android.bluetooth.BluetoothHeadset;
24 import android.bluetooth.BluetoothProfile;
25 import android.content.BroadcastReceiver;
26 import android.content.Context;
27 import android.content.DialogInterface;
28 import android.content.Intent;
29 import android.content.IntentFilter;
30 import android.media.AudioManager;
31 import android.media.ToneGenerator;
32 import android.os.Bundle;
33 import android.os.Handler;
34 import android.os.PowerManager;
35 import android.os.PowerManager.WakeLock;
36 import android.os.SystemProperties;
37 import android.os.Vibrator;
38 import android.speech.tts.TextToSpeech;
39 import android.util.Log;
40 import android.view.View;
41 import android.view.WindowManager;
42 import android.widget.TextView;
43 
44 import java.io.File;
45 import java.io.IOException;
46 import java.io.InputStream;
47 import java.util.HashMap;
48 import java.util.List;
49 
50 /**
51  * TODO: get rid of the anonymous classes
52  *
53  * This class is the user interface of the VoiceDialer application.
54  * It begins in the INITIALIZING state.
55  *
56  * INITIALIZING :
57  *  This transitions out on events from TTS and the BluetoothHeadset
58  *   once TTS initialized and SCO channel set up:
59  *     * prompt the user "speak now"
60  *     * transition to the SPEAKING_GREETING state
61  *
62  * SPEAKING_GREETING:
63  *  This transitions out only on events from TTS or the fallback runnable
64  *   once the greeting utterance completes:
65  *     * begin listening for the command using the {@link CommandRecognizerEngine}
66  *     * transition to the WAITING_FOR_COMMAND state
67  *
68  * WAITING_FOR_COMMAND :
69  * This transitions out only on events from the recognizer
70  *   on RecognitionFailure or RecognitionError:
71  *     * begin speaking "try again."
72  *     * transition to state SPEAKING_TRY_AGAIN
73  *   on RecognitionSuccess:
74  *     single result:
75  *       * begin speaking the sentence describing the intent
76  *       * transition to the SPEAKING_CHOSEN_ACTION
77  *     multiple results:
78  *       * begin speaking each of the choices in order
79  *       * transition to the SPEAKING_CHOICES state
80  *
81  * SPEAKING_TRY_AGAIN:
82  * This transitions out only on events from TTS or the fallback runnable
83  *   once the try again utterance completes:
84  *     * begin listening for the command using the {@link CommandRecognizerEngine}
85  *     * transition to the LISTENING_FOR_COMMAND state
86  *
87  * SPEAKING_CHOSEN_ACTION:
88  *  This transitions out only on events from TTS or the fallback runnable
89  *   once the utterance completes:
90  *     * dispatch the intent that was chosen
91  *     * transition to the EXITING state
92  *     * finish the activity
93  *
94  * SPEAKING_CHOICES:
95  *  This transitions out only on events from TTS or the fallback runnable
96  *   once the utterance completes:
97  *     * begin listening for the user's choice using the
98  *         {@link PhoneTypeChoiceRecognizerEngine}
99  *     * transition to the WAITING_FOR_CHOICE state.
100  *
101  * WAITING_FOR_CHOICE:
102  *  This transitions out only on events from the recognizer
103  *   on RecognitionFailure or RecognitionError:
104  *     * begin speaking the "invalid choice" message, along with the list
105  *       of choices
106  *     * transition to the SPEAKING_CHOICES state
107  *   on RecognitionSuccess:
108  *     if the result is "try again", prompt the user to say a command, begin
109  *       listening for the command, and transition back to the WAITING_FOR_COMMAND
110  *       state.
111  *     if the result is "exit", then being speaking the "goodbye" message and
112  *       transition to the SPEAKING_GOODBYE state.
113  *     if the result is a valid choice, begin speaking the action chosen,initiate
114  *       the command the user has choose and exit.
115  *     if not a valid choice, speak the "invalid choice" message, begin
116  *       speaking the choices in order again, transition to the
117  *       SPEAKING_CHOICES
118  *
119  * SPEAKING_GOODBYE:
120  *  This transitions out only on events from TTS or the fallback runnable
121  *   after a time out, finish the activity.
122  *
123  */
124 
125 public class VoiceDialerActivity extends Activity {
126 
127     private static final String TAG = "VoiceDialerActivity";
128 
129     private static final String MICROPHONE_EXTRA = "microphone";
130     private static final String CONTACTS_EXTRA = "contacts";
131 
132     private static final String SPEAK_NOW_UTTERANCE = "speak_now";
133     private static final String TRY_AGAIN_UTTERANCE = "try_again";
134     private static final String CHOSEN_ACTION_UTTERANCE = "chose_action";
135     private static final String GOODBYE_UTTERANCE = "goodbye";
136     private static final String CHOICES_UTTERANCE = "choices";
137 
138     private static final int FIRST_UTTERANCE_DELAY = 300;
139     private static final int MAX_TTS_DELAY = 6000;
140     private static final int EXIT_DELAY = 2000;
141 
142     private static final int BLUETOOTH_SAMPLE_RATE = 8000;
143     private static final int REGULAR_SAMPLE_RATE = 11025;
144 
145     private static final int INITIALIZING = 0;
146     private static final int SPEAKING_GREETING = 1;
147     private static final int WAITING_FOR_COMMAND = 2;
148     private static final int SPEAKING_TRY_AGAIN = 3;
149     private static final int SPEAKING_CHOICES = 4;
150     private static final int WAITING_FOR_CHOICE = 5;
151     private static final int WAITING_FOR_DIALOG_CHOICE = 6;
152     private static final int SPEAKING_CHOSEN_ACTION = 7;
153     private static final int SPEAKING_GOODBYE = 8;
154     private static final int EXITING = 9;
155 
156     private static final CommandRecognizerEngine mCommandEngine =
157             new CommandRecognizerEngine();
158     private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine =
159             new PhoneTypeChoiceRecognizerEngine();
160     private CommandRecognizerClient mCommandClient;
161     private ChoiceRecognizerClient mChoiceClient;
162     private ToneGenerator mToneGenerator;
163     private Handler mHandler;
164     private Thread mRecognizerThread = null;
165     private AudioManager mAudioManager;
166     private BluetoothHeadset mBluetoothHeadset;
167     private BluetoothDevice mBluetoothDevice;
168     private BluetoothAdapter mAdapter;
169     private TextToSpeech mTts;
170     private HashMap<String, String> mTtsParams;
171     private VoiceDialerBroadcastReceiver mReceiver;
172     private boolean mWaitingForTts;
173     private boolean mWaitingForScoConnection;
174     private Intent[] mAvailableChoices;
175     private Intent mChosenAction;
176     private int mBluetoothVoiceVolume;
177     private int mState;
178     private AlertDialog mAlertDialog;
179     private Runnable mFallbackRunnable;
180     private boolean mUsingBluetooth = false;
181     private int mSampleRate;
182     private WakeLock mWakeLock;
183 
184     @Override
onCreate(Bundle icicle)185     protected void onCreate(Bundle icicle) {
186         super.onCreate(icicle);
187         // TODO: All of this state management and holding of
188         // connections to the TTS engine and recognizer really
189         // belongs in a service.  The activity can be stopped or deleted
190         // and recreated for lots of reasons.
191         // It's way too late in the ICS release cycle for a change
192         // like this now though.
193         // MHibdon Sept 20 2011
194         mHandler = new Handler();
195         mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE);
196         mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING,
197                 ToneGenerator.MAX_VOLUME);
198 
199         acquireWakeLock(this);
200 
201         mState = INITIALIZING;
202         mChosenAction = null;
203         mAudioManager.requestAudioFocus(
204                 null, AudioManager.STREAM_MUSIC,
205                 AudioManager.AUDIOFOCUS_GAIN_TRANSIENT);
206 
207         // set this flag so this activity will stay in front of the keyguard
208         int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED;
209         getWindow().addFlags(flags);
210 
211         // open main window
212         setTheme(android.R.style.Theme_Dialog);
213         setTitle(R.string.title);
214         setContentView(R.layout.voice_dialing);
215         findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
216         findViewById(R.id.retry_view).setVisibility(View.INVISIBLE);
217         findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE);
218         if (RecognizerLogger.isEnabled(this)) {
219             ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled);
220         }
221 
222         // Get handle to BluetoothHeadset object
223         IntentFilter audioStateFilter;
224         audioStateFilter = new IntentFilter();
225         audioStateFilter.addAction(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED);
226         audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED);
227         mReceiver = new VoiceDialerBroadcastReceiver();
228         registerReceiver(mReceiver, audioStateFilter);
229 
230         mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA)));
231         mCommandEngine.setMinimizeResults(true);
232         mCommandEngine.setAllowOpenEntries(false);
233         mCommandClient = new CommandRecognizerClient();
234         mChoiceClient = new ChoiceRecognizerClient();
235 
236         mAdapter = BluetoothAdapter.getDefaultAdapter();
237         if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this) && mAdapter != null) {
238            if (!mAdapter.getProfileProxy(this, mBluetoothHeadsetServiceListener,
239                                          BluetoothProfile.HEADSET)) {
240                Log.e(TAG, "Getting Headset Proxy failed");
241            }
242 
243         } else {
244             mUsingBluetooth = false;
245             if (false) Log.d(TAG, "bluetooth unavailable");
246             mSampleRate = REGULAR_SAMPLE_RATE;
247             mCommandEngine.setMinimizeResults(false);
248             mCommandEngine.setAllowOpenEntries(true);
249 
250             // we're not using bluetooth apparently, just start listening.
251             listenForCommand();
252         }
253 
254     }
255 
256     class ErrorRunnable implements Runnable {
257         private int mErrorMsg;
ErrorRunnable(int errorMsg)258         public ErrorRunnable(int errorMsg) {
259             mErrorMsg = errorMsg;
260         }
261 
run()262         public void run() {
263             // put up an error and exit
264             mHandler.removeCallbacks(mMicFlasher);
265             ((TextView)findViewById(R.id.state)).setText(R.string.failure);
266             ((TextView)findViewById(R.id.substate)).setText(mErrorMsg);
267             ((TextView)findViewById(R.id.substate)).setText(
268                     R.string.headset_connection_lost);
269             findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
270             findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
271 
272 
273             if (!mUsingBluetooth) {
274                 playSound(ToneGenerator.TONE_PROP_NACK);
275             }
276         }
277     }
278 
279     class OnTtsCompletionRunnable implements Runnable {
280         private boolean mFallback;
281 
OnTtsCompletionRunnable(boolean fallback)282         OnTtsCompletionRunnable(boolean fallback) {
283             mFallback = fallback;
284         }
285 
run()286         public void run() {
287             if (mFallback) {
288                 Log.e(TAG, "utterance completion not delivered, using fallback");
289             }
290             Log.d(TAG, "onTtsCompletionRunnable");
291             if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) {
292                 listenForCommand();
293             } else if (mState == SPEAKING_CHOICES) {
294                 listenForChoice();
295             } else if (mState == SPEAKING_GOODBYE) {
296                 mState = EXITING;
297                 finish();
298             } else if (mState == SPEAKING_CHOSEN_ACTION) {
299                 mState = EXITING;
300                 startActivityHelp(mChosenAction);
301                 finish();
302             }
303         }
304     }
305 
306     class GreetingRunnable implements Runnable {
run()307         public void run() {
308             mState = SPEAKING_GREETING;
309             mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
310                     SPEAK_NOW_UTTERANCE);
311             mTts.speak(getString(R.string.speak_now_tts),
312                 TextToSpeech.QUEUE_FLUSH,
313                 mTtsParams);
314             // Normally, we will begin listening for the command after the
315             // utterance completes.  As a fallback in case the utterance
316             // does not complete, post a delayed runnable to fire
317             // the intent.
318             mFallbackRunnable = new OnTtsCompletionRunnable(true);
319             mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
320         }
321     }
322 
323     class TtsInitListener implements TextToSpeech.OnInitListener {
onInit(int status)324         public void onInit(int status) {
325             // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR.
326             if (false) Log.d(TAG, "onInit for tts");
327             if (status != TextToSpeech.SUCCESS) {
328                 // Initialization failed.
329                 Log.e(TAG, "Could not initialize TextToSpeech.");
330                 mHandler.post(new ErrorRunnable(R.string.recognition_error));
331                 exitActivity();
332                 return;
333             }
334 
335             if (mTts == null) {
336                 Log.e(TAG, "null tts");
337                 mHandler.post(new ErrorRunnable(R.string.recognition_error));
338                 exitActivity();
339                 return;
340             }
341 
342             mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener());
343 
344             // The TTS engine has been successfully initialized.
345             mWaitingForTts = false;
346 
347             // TTS over bluetooth is really loud,
348             // Limit volume to -18dB. Stream volume range represents approximately 50dB
349             // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding
350             // to 18dB is 18 / (50 / maxSteps).
351             mBluetoothVoiceVolume = mAudioManager.getStreamVolume(
352                     AudioManager.STREAM_BLUETOOTH_SCO);
353             int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO);
354             int volume = maxVolume - ((18 / (50/maxVolume)) + 1);
355             if (mBluetoothVoiceVolume > volume) {
356                 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0);
357             }
358 
359             if (mWaitingForScoConnection) {
360                 // the bluetooth connection is not up yet, still waiting.
361             } else {
362                 // we now have SCO connection and TTS, so we can start.
363                 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY);
364             }
365         }
366     }
367 
368     class OnUtteranceCompletedListener
369             implements TextToSpeech.OnUtteranceCompletedListener {
onUtteranceCompleted(String utteranceId)370         public void onUtteranceCompleted(String utteranceId) {
371             if (false) Log.d(TAG, "onUtteranceCompleted " + utteranceId);
372             // since the utterance has completed, we no longer need the fallback.
373             mHandler.removeCallbacks(mFallbackRunnable);
374             mFallbackRunnable = null;
375             mHandler.post(new OnTtsCompletionRunnable(false));
376         }
377     }
378 
updateBluetoothParameters(boolean connected)379     private void updateBluetoothParameters(boolean connected) {
380         if (connected) {
381             if (false) Log.d(TAG, "using bluetooth");
382             mUsingBluetooth = true;
383 
384             mBluetoothHeadset.startVoiceRecognition(mBluetoothDevice);
385 
386             mSampleRate = BLUETOOTH_SAMPLE_RATE;
387             mCommandEngine.setMinimizeResults(true);
388             mCommandEngine.setAllowOpenEntries(false);
389 
390             // we can't start recognizing until we get connected to the BluetoothHeadset
391             // and have a connected audio state.  We will listen for these
392             // states to change.
393             mWaitingForScoConnection = true;
394 
395             // initialize the text to speech system
396             mWaitingForTts = true;
397             mTts = new TextToSpeech(VoiceDialerActivity.this, new TtsInitListener());
398             mTtsParams = new HashMap<String, String>();
399             mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM,
400                     String.valueOf(AudioManager.STREAM_VOICE_CALL));
401             // we need to wait for the TTS system and the SCO connection
402             // before we can start listening.
403         } else {
404             if (false) Log.d(TAG, "not using bluetooth");
405             mUsingBluetooth = false;
406             mSampleRate = REGULAR_SAMPLE_RATE;
407             mCommandEngine.setMinimizeResults(false);
408             mCommandEngine.setAllowOpenEntries(true);
409 
410             // we're not using bluetooth apparently, just start listening.
411             listenForCommand();
412         }
413     }
414 
415     private BluetoothProfile.ServiceListener mBluetoothHeadsetServiceListener =
416             new BluetoothProfile.ServiceListener() {
417         public void onServiceConnected(int profile, BluetoothProfile proxy) {
418             if (false) Log.d(TAG, "onServiceConnected");
419             mBluetoothHeadset = (BluetoothHeadset) proxy;
420 
421             List<BluetoothDevice> deviceList = mBluetoothHeadset.getConnectedDevices();
422 
423             if (deviceList.size() > 0) {
424                 mBluetoothDevice = deviceList.get(0);
425                 int state = mBluetoothHeadset.getConnectionState(mBluetoothDevice);
426                 if (false) Log.d(TAG, "headset status " + state);
427 
428                 // We are already connnected to a headset
429                 if (state == BluetoothHeadset.STATE_CONNECTED) {
430                     updateBluetoothParameters(true);
431                     return;
432                 }
433             }
434             updateBluetoothParameters(false);
435         }
436 
437         public void onServiceDisconnected(int profile) {
438             mBluetoothHeadset = null;
439         }
440     };
441 
442     private class VoiceDialerBroadcastReceiver extends BroadcastReceiver {
443         @Override
onReceive(Context context, Intent intent)444         public void onReceive(Context context, Intent intent) {
445             String action = intent.getAction();
446             if (action.equals(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED)) {
447 
448                 BluetoothDevice device = intent.getParcelableExtra(BluetoothDevice.EXTRA_DEVICE);
449                 int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1);
450 
451                 if (false) Log.d(TAG, "HEADSET STATE -> " + state);
452 
453                 if (state == BluetoothProfile.STATE_CONNECTED) {
454                     if (device == null) {
455                         return;
456                     }
457                     mBluetoothDevice = device;
458                     updateBluetoothParameters(true);
459                 } else if (state == BluetoothProfile.STATE_DISCONNECTED) {
460                     mBluetoothDevice = null;
461                     updateBluetoothParameters(false);
462                 }
463             } else if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) {
464                 int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1);
465                 int prevState = intent.getIntExtra(BluetoothProfile.EXTRA_PREVIOUS_STATE, -1);
466                 if (state == BluetoothHeadset.STATE_AUDIO_CONNECTED &&
467                     mWaitingForScoConnection) {
468                     // SCO channel has just become available.
469                     mWaitingForScoConnection = false;
470                     if (mWaitingForTts) {
471                         // still waiting for the TTS to be set up.
472                     } else {
473                         // we now have SCO connection and TTS, so we can start.
474                         mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY);
475                     }
476                 } else if (prevState == BluetoothHeadset.STATE_AUDIO_CONNECTED) {
477                     if (!mWaitingForScoConnection && mState != EXITING) {
478                         // apparently our connection to the headset has dropped.
479                         // we won't be able to continue voicedialing.
480                         if (false) Log.d(TAG, "lost sco connection");
481 
482                         mHandler.post(new ErrorRunnable(
483                                 R.string.headset_connection_lost));
484 
485                         exitActivity();
486                     }
487                 }
488             }
489         }
490     }
491 
askToTryAgain()492     private void askToTryAgain() {
493         // get work off UAPI thread
494         mHandler.post(new Runnable() {
495             public void run() {
496                 if (mAlertDialog != null) {
497                     mAlertDialog.dismiss();
498                 }
499 
500                 mHandler.removeCallbacks(mMicFlasher);
501                 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again);
502                 findViewById(R.id.state).setVisibility(View.VISIBLE);
503                 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
504                 findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
505 
506                 if (mUsingBluetooth) {
507                     mState = SPEAKING_TRY_AGAIN;
508                     mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
509                             TRY_AGAIN_UTTERANCE);
510                     mTts.speak(getString(R.string.no_results_tts),
511                         TextToSpeech.QUEUE_FLUSH,
512                         mTtsParams);
513 
514                     // Normally, the we will start listening after the
515                     // utterance completes.  As a fallback in case the utterance
516                     // does not complete, post a delayed runnable to fire
517                     // the intent.
518                     mFallbackRunnable = new OnTtsCompletionRunnable(true);
519                     mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
520                 } else {
521                     try {
522                         Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK));
523                     } catch (InterruptedException e) {
524                     }
525                     // we are not using tts, so we just start listening again.
526                     listenForCommand();
527                 }
528             }
529         });
530     }
531 
performChoice()532     private void performChoice() {
533         if (mUsingBluetooth) {
534             String sentenceSpoken = spaceOutDigits(
535                     mChosenAction.getStringExtra(
536                         RecognizerEngine.SENTENCE_EXTRA));
537 
538             mState = SPEAKING_CHOSEN_ACTION;
539             mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
540                     CHOSEN_ACTION_UTTERANCE);
541             mTts.speak(sentenceSpoken,
542                 TextToSpeech.QUEUE_FLUSH,
543                 mTtsParams);
544 
545             // Normally, the intent will be dispatched after the
546             // utterance completes.  As a fallback in case the utterance
547             // does not complete, post a delayed runnable to fire
548             // the intent.
549             mFallbackRunnable = new OnTtsCompletionRunnable(true);
550             mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
551         } else {
552             // just dispatch the intent
553             startActivityHelp(mChosenAction);
554             finish();
555         }
556     }
557 
waitForChoice()558     private void waitForChoice() {
559         if (mUsingBluetooth) {
560             // We are running in bluetooth mode, and we have
561             // multiple matches.  Speak the choices and let
562             // the user choose.
563 
564             // We will not start listening until the utterance
565             // of the choice list completes.
566             speakChoices();
567 
568             // Normally, listening will begin after the
569             // utterance completes.  As a fallback in case the utterance
570             // does not complete, post a delayed runnable to begin
571             // listening.
572             mFallbackRunnable = new OnTtsCompletionRunnable(true);
573             mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
574         } else {
575             // We are not running in bluetooth mode, so all
576             // we need to do is wait for the user to select
577             // a choice from the alert dialog.  We will wait
578             // indefinitely for this.
579             mState = WAITING_FOR_DIALOG_CHOICE;
580         }
581     }
582 
583     private class CommandRecognizerClient implements RecognizerClient {
584          static final int MIN_VOLUME_TO_SKIP = 2;
585        /**
586          * Called by the {@link RecognizerEngine} when the microphone is started.
587          */
onMicrophoneStart(InputStream mic)588         public void onMicrophoneStart(InputStream mic) {
589             if (false) Log.d(TAG, "onMicrophoneStart");
590 
591            if (!mUsingBluetooth) {
592                playSound(ToneGenerator.TONE_PROP_BEEP);
593 
594                 int ringVolume = mAudioManager.getStreamVolume(
595                         AudioManager.STREAM_RING);
596                 Log.d(TAG, "ringVolume " + ringVolume);
597 
598                 if (ringVolume >= MIN_VOLUME_TO_SKIP) {
599                     // now we're playing a sound, and corrupting the input sample.
600                     // So we need to pull that junk off of the input stream so that the
601                     // recognizer won't see it.
602                     try {
603                         skipBeep(mic);
604                     } catch (java.io.IOException e) {
605                         Log.e(TAG, "IOException " + e);
606                     }
607                 } else {
608                     if (false) Log.d(TAG, "no tone");
609                 }
610             }
611 
612             mHandler.post(new Runnable() {
613                 public void run() {
614                     findViewById(R.id.retry_view).setVisibility(View.INVISIBLE);
615                     findViewById(R.id.microphone_loading_view).setVisibility(
616                             View.INVISIBLE);
617                     ((TextView)findViewById(R.id.state)).setText(R.string.listening);
618                     mHandler.post(mMicFlasher);
619                 }
620             });
621         }
622 
623         /**
624          *  Beep detection
625          */
626         private static final int START_WINDOW_MS = 500;  // Beep detection window duration in ms
627         private static final int SINE_FREQ = 400;        // base sine frequency on beep
628         private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block
629         private static final int THRESHOLD = 8;          // absolute pseudo energy threshold
630         private static final int START = 0;              // beep detection start
631         private static final int RISING = 1;             // beep rising edge start
632         private static final int TOP = 2;                // beep constant energy detected
633 
skipBeep(InputStream is)634         void skipBeep(InputStream is) throws IOException {
635             int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK);
636             int blockSize = 2 * sampleCount; // energy averaging block
637 
638             if (is == null || blockSize == 0) {
639                 return;
640             }
641 
642             byte[] buf = new byte[blockSize];
643             int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000);
644             maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize;
645 
646             int count = 0;
647             int state = START;  // detection state
648             long prevE = 0; // previous pseudo energy
649             long peak = 0;
650             int threshold =  THRESHOLD*sampleCount;  // absolute energy threshold
651             Log.d(TAG, "blockSize " + blockSize);
652 
653             while (count < maxBytes) {
654                 int cnt = 0;
655                 while (cnt < blockSize) {
656                     int n = is.read(buf, cnt, blockSize-cnt);
657                     if (n < 0) {
658                         throw new java.io.IOException();
659                     }
660                     cnt += n;
661                 }
662 
663                 // compute pseudo energy
664                 cnt = blockSize;
665                 long sumx = 0;
666                 long sumxx = 0;
667                 while (cnt >= 2) {
668                     short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF));
669                     sumx += smp;
670                     sumxx += smp*smp;
671                     cnt -= 2;
672                 }
673                 long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount);
674                 Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy);
675 
676                 switch (state) {
677                     case START:
678                         if (energy > threshold && energy > (prevE * 2) && prevE != 0) {
679                             // rising edge if energy doubled and > abs threshold
680                             state = RISING;
681                             if (false) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
682                         }
683                         break;
684                     case RISING:
685                         if (energy < threshold || energy < (prevE / 2)){
686                             // energy fell back below half of previous, back to start
687                             if (false) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
688                             peak = 0;
689                             state = START;
690                         } else if (energy > (prevE / 2) && energy < (prevE * 2)) {
691                             // Start of constant energy
692                             if (false) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
693                             if (peak < energy) {
694                                 peak = energy;
695                             }
696                             state = TOP;
697                         }
698                         break;
699                     case TOP:
700                         if (energy < threshold || energy < (peak / 2)) {
701                             // e went to less than half of the peak
702                             if (false) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
703                             return;
704                         }
705                         break;
706                     }
707                 prevE = energy;
708                 count += blockSize;
709             }
710             if (false) Log.d(TAG, "no beep detected, timed out");
711         }
712 
713         /**
714          * Called by the {@link RecognizerEngine} if the recognizer fails.
715          */
onRecognitionFailure(final String msg)716         public void onRecognitionFailure(final String msg) {
717             if (false) Log.d(TAG, "onRecognitionFailure " + msg);
718             // we had zero results.  Just try again.
719             askToTryAgain();
720         }
721 
722         /**
723          * Called by the {@link RecognizerEngine} on an internal error.
724          */
onRecognitionError(final String msg)725         public void onRecognitionError(final String msg) {
726             if (false) Log.d(TAG, "onRecognitionError " + msg);
727             mHandler.post(new ErrorRunnable(R.string.recognition_error));
728             exitActivity();
729         }
730 
731         /**
732          * Called by the {@link RecognizerEngine} when is succeeds.  If there is
733          * only one item, then the Intent is dispatched immediately.
734          * If there are more, then an AlertDialog is displayed and the user is
735          * prompted to select.
736          * @param intents a list of Intents corresponding to the sentences.
737          */
onRecognitionSuccess(final Intent[] intents)738         public void onRecognitionSuccess(final Intent[] intents) {
739             if (false) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " +
740                     intents.length);
741             if (mState != WAITING_FOR_COMMAND) {
742                 if (false) Log.d(TAG, "not waiting for command, ignoring");
743                 return;
744             }
745 
746             // store the intents in a member variable so that we can access it
747             // later when the user chooses which action to perform.
748             mAvailableChoices = intents;
749 
750             mHandler.post(new Runnable() {
751                 public void run() {
752                     if (!mUsingBluetooth) {
753                         playSound(ToneGenerator.TONE_PROP_ACK);
754                     }
755                     mHandler.removeCallbacks(mMicFlasher);
756 
757                     String[] sentences = new String[intents.length];
758                     for (int i = 0; i < intents.length; i++) {
759                         sentences[i] = intents[i].getStringExtra(
760                                 RecognizerEngine.SENTENCE_EXTRA);
761                     }
762 
763                     if (intents.length == 0) {
764                         onRecognitionFailure("zero intents");
765                         return;
766                     }
767 
768                     if (intents.length > 0) {
769                         // see if we the response was "exit" or "cancel".
770                         String value = intents[0].getStringExtra(
771                             RecognizerEngine.SEMANTIC_EXTRA);
772                         if (false) Log.d(TAG, "value " + value);
773                         if ("X".equals(value)) {
774                             exitActivity();
775                             return;
776                         }
777                     }
778 
779                     if (mUsingBluetooth &&
780                             (intents.length == 1 ||
781                              !Intent.ACTION_CALL_PRIVILEGED.equals(
782                                     intents[0].getAction()))) {
783                         // When we're running in bluetooth mode, we expect
784                         // that the user is not looking at the screen and cannot
785                         // interact with the device in any way besides voice
786                         // commands.  In this case we need to minimize how many
787                         // interactions the user has to perform in order to call
788                         // someone.
789                         // So if there is only one match, instead of making the
790                         // user confirm, we just assume it's correct, speak
791                         // the choice over TTS, and then dispatch it.
792                         // If there are multiple matches for some intent type
793                         // besides "call", it's too difficult for the user to
794                         // explain which one they meant, so we just take the highest
795                         // confidence match and dispatch that.
796 
797                         // Speak the sentence for the action we are about
798                         // to dispatch so that the user knows what is happening.
799                         mChosenAction = intents[0];
800                         performChoice();
801 
802                         return;
803                     } else {
804                         // Either we are not running in bluetooth mode,
805                         // or we had multiple matches.  Either way, we need
806                         // the user to confirm the choice.
807                         // Put up a dialog from which the user can select
808                         // his/her choice.
809                         DialogInterface.OnCancelListener cancelListener =
810                             new DialogInterface.OnCancelListener() {
811 
812                             public void onCancel(DialogInterface dialog) {
813                                 if (false) {
814                                     Log.d(TAG, "cancelListener.onCancel");
815                                 }
816                                 dialog.dismiss();
817                                 finish();
818                             }
819                        };
820 
821                         DialogInterface.OnClickListener clickListener =
822                             new DialogInterface.OnClickListener() {
823 
824                             public void onClick(DialogInterface dialog, int which) {
825                                 if (false) {
826                                     Log.d(TAG, "clickListener.onClick " + which);
827                                 }
828                                 startActivityHelp(intents[which]);
829                                 dialog.dismiss();
830                                 finish();
831                             }
832                         };
833 
834                         DialogInterface.OnClickListener negativeListener =
835                             new DialogInterface.OnClickListener() {
836 
837                             public void onClick(DialogInterface dialog, int which) {
838                                 if (false) {
839                                     Log.d(TAG, "negativeListener.onClick " +
840                                         which);
841                                 }
842                                 dialog.dismiss();
843                                 finish();
844                             }
845                         };
846 
847                         mAlertDialog =
848                                 new AlertDialog.Builder(VoiceDialerActivity.this,
849                                         AlertDialog.THEME_HOLO_DARK)
850                                 .setTitle(R.string.title)
851                                 .setItems(sentences, clickListener)
852                                 .setOnCancelListener(cancelListener)
853                                 .setNegativeButton(android.R.string.cancel,
854                                         negativeListener)
855                                 .show();
856 
857                         waitForChoice();
858                     }
859                 }
860             });
861         }
862     }
863 
864     private class ChoiceRecognizerClient implements RecognizerClient {
onRecognitionSuccess(final Intent[] intents)865         public void onRecognitionSuccess(final Intent[] intents) {
866             if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess");
867             if (mState != WAITING_FOR_CHOICE) {
868                 if (false) Log.d(TAG, "not waiting for choice, ignoring");
869                 return;
870             }
871 
872             if (mAlertDialog != null) {
873                 mAlertDialog.dismiss();
874             }
875 
876             // disregard all but the first intent.
877             if (intents.length > 0) {
878                 String value = intents[0].getStringExtra(
879                     RecognizerEngine.SEMANTIC_EXTRA);
880                 if (false) Log.d(TAG, "value " + value);
881                 if ("R".equals(value)) {
882                     if (mUsingBluetooth) {
883                         mHandler.post(new GreetingRunnable());
884                     } else {
885                         listenForCommand();
886                     }
887                 } else if ("X".equals(value)) {
888                     exitActivity();
889                 } else {
890                     // it's a phone type response
891                     mChosenAction = null;
892                     for (int i = 0; i < mAvailableChoices.length; i++) {
893                         if (value.equalsIgnoreCase(
894                                 mAvailableChoices[i].getStringExtra(
895                                         CommandRecognizerEngine.PHONE_TYPE_EXTRA))) {
896                             mChosenAction = mAvailableChoices[i];
897                         }
898                     }
899 
900                     if (mChosenAction != null) {
901                         performChoice();
902                     } else {
903                         // invalid choice
904                         if (false) Log.d(TAG, "invalid choice" + value);
905 
906                         if (mUsingBluetooth) {
907                             mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID);
908                             mTts.speak(getString(R.string.invalid_choice_tts),
909                                 TextToSpeech.QUEUE_FLUSH,
910                                 mTtsParams);
911                         }
912                         waitForChoice();
913                     }
914                 }
915             }
916         }
917 
onRecognitionFailure(String msg)918         public void onRecognitionFailure(String msg) {
919             if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure");
920             exitActivity();
921         }
922 
onRecognitionError(String err)923         public void onRecognitionError(String err) {
924             if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError");
925             mHandler.post(new ErrorRunnable(R.string.recognition_error));
926             exitActivity();
927         }
928 
onMicrophoneStart(InputStream mic)929         public void onMicrophoneStart(InputStream mic) {
930             if (false) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart");
931         }
932     }
933 
speakChoices()934     private void speakChoices() {
935         if (false) Log.d(TAG, "speakChoices");
936         mState = SPEAKING_CHOICES;
937 
938         String sentenceSpoken = spaceOutDigits(
939                 mAvailableChoices[0].getStringExtra(
940                     RecognizerEngine.SENTENCE_EXTRA));
941 
942         // When we have multiple choices, they will be of the form
943         // "call jack jones at home", "call jack jones on mobile".
944         // Speak the entire first sentence, then the last word from each
945         // of the remaining sentences.  This will come out to something
946         // like "call jack jones at home mobile or work".
947         StringBuilder builder = new StringBuilder();
948         builder.append(sentenceSpoken);
949 
950         int count = mAvailableChoices.length;
951         for (int i=1; i < count; i++) {
952             if (i == count-1) {
953                 builder.append(" or ");
954             } else {
955                 builder.append(" ");
956             }
957             String tmpSentence = mAvailableChoices[i].getStringExtra(
958                     RecognizerEngine.SENTENCE_EXTRA);
959             String[] words = tmpSentence.trim().split(" ");
960             builder.append(words[words.length-1]);
961         }
962         mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
963                 CHOICES_UTTERANCE);
964         mTts.speak(builder.toString(),
965             TextToSpeech.QUEUE_ADD,
966             mTtsParams);
967     }
968 
969 
spaceOutDigits(String sentenceDisplay)970     private static String spaceOutDigits(String sentenceDisplay) {
971         // if we have a sentence of the form "dial 123 456 7890",
972         // we need to insert a space between each digit, otherwise
973         // the TTS engine will say "dial one hundred twenty three...."
974         // When there already is a space, we also insert a comma,
975         // so that it pauses between sections.  For the displayable
976         // sentence "dial 123 456 7890" it will speak
977         // "dial 1 2 3, 4 5 6, 7 8 9 0"
978         char buffer[] = sentenceDisplay.toCharArray();
979         StringBuilder builder = new StringBuilder();
980         boolean buildingNumber = false;
981         int l = sentenceDisplay.length();
982         for (int index = 0; index < l; index++) {
983             char c = buffer[index];
984             if (Character.isDigit(c)) {
985                 if (buildingNumber) {
986                     builder.append(" ");
987                 }
988                 buildingNumber = true;
989                 builder.append(c);
990             } else if (c == ' ') {
991                 if (buildingNumber) {
992                     builder.append(",");
993                 } else {
994                     builder.append(" ");
995                 }
996             } else {
997                 buildingNumber = false;
998                 builder.append(c);
999             }
1000         }
1001         return builder.toString();
1002     }
1003 
startActivityHelp(Intent intent)1004     private void startActivityHelp(Intent intent) {
1005         startActivity(intent);
1006     }
1007 
listenForCommand()1008     private void listenForCommand() {
1009         if (false) Log.d(TAG, ""
1010                 + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+
1011                 ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA));
1012 
1013         mState = WAITING_FOR_COMMAND;
1014         mRecognizerThread = new Thread() {
1015             public void run() {
1016                 mCommandEngine.recognize(mCommandClient,
1017                         VoiceDialerActivity.this,
1018                         newFile(getArg(MICROPHONE_EXTRA)),
1019                         mSampleRate);
1020             }
1021         };
1022         mRecognizerThread.start();
1023     }
1024 
listenForChoice()1025     private void listenForChoice() {
1026         if (false) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " +
1027                 getArg(MICROPHONE_EXTRA));
1028 
1029         mState = WAITING_FOR_CHOICE;
1030         mRecognizerThread = new Thread() {
1031             public void run() {
1032                 mPhoneTypeChoiceEngine.recognize(mChoiceClient,
1033                         VoiceDialerActivity.this,
1034                         newFile(getArg(MICROPHONE_EXTRA)), mSampleRate);
1035             }
1036         };
1037         mRecognizerThread.start();
1038     }
1039 
exitActivity()1040     private void exitActivity() {
1041         synchronized(this) {
1042             if (mState != EXITING) {
1043                 if (false) Log.d(TAG, "exitActivity");
1044                 mState = SPEAKING_GOODBYE;
1045                 if (mUsingBluetooth) {
1046                     mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
1047                             GOODBYE_UTTERANCE);
1048                     mTts.speak(getString(R.string.goodbye_tts),
1049                         TextToSpeech.QUEUE_FLUSH,
1050                         mTtsParams);
1051                     // Normally, the activity will finish() after the
1052                     // utterance completes.  As a fallback in case the utterance
1053                     // does not complete, post a delayed runnable finish the
1054                     // activity.
1055                     mFallbackRunnable = new OnTtsCompletionRunnable(true);
1056                     mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
1057                 } else {
1058                     mHandler.postDelayed(new Runnable() {
1059                         public void run() {
1060                             finish();
1061                         }
1062                     }, EXIT_DELAY);
1063                 }
1064             }
1065         }
1066     }
1067 
getArg(String name)1068     private String getArg(String name) {
1069         if (name == null) return null;
1070         String arg = getIntent().getStringExtra(name);
1071         if (arg != null) return arg;
1072         arg = SystemProperties.get("app.voicedialer." + name);
1073         return arg != null && arg.length() > 0 ? arg : null;
1074     }
1075 
newFile(String name)1076     private static File newFile(String name) {
1077         return name != null ? new File(name) : null;
1078     }
1079 
playSound(int toneType)1080     private int playSound(int toneType) {
1081         int msecDelay = 1;
1082 
1083         // use the MediaPlayer to prompt the user
1084         if (mToneGenerator != null) {
1085             mToneGenerator.startTone(toneType);
1086             msecDelay = StrictMath.max(msecDelay, 300);
1087         }
1088         // use the Vibrator to prompt the user
1089         if (mAudioManager != null &&
1090                 mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) {
1091             final int VIBRATOR_TIME = 150;
1092             final int VIBRATOR_GUARD_TIME = 150;
1093             Vibrator vibrator = (Vibrator)getSystemService(VIBRATOR_SERVICE);
1094             vibrator.vibrate(VIBRATOR_TIME);
1095             msecDelay = StrictMath.max(msecDelay,
1096                     VIBRATOR_TIME + VIBRATOR_GUARD_TIME);
1097         }
1098 
1099 
1100         return msecDelay;
1101     }
1102 
onDestroy()1103     protected void onDestroy() {
1104         synchronized(this) {
1105             mState = EXITING;
1106         }
1107 
1108         if (mAlertDialog != null) {
1109             mAlertDialog.dismiss();
1110         }
1111 
1112         // set the volume back to the level it was before we started.
1113         mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO,
1114                                       mBluetoothVoiceVolume, 0);
1115         mAudioManager.abandonAudioFocus(null);
1116 
1117         // shut down bluetooth, if it exists
1118         if (mBluetoothHeadset != null) {
1119             mBluetoothHeadset.stopVoiceRecognition(mBluetoothDevice);
1120             mAdapter.closeProfileProxy(BluetoothProfile.HEADSET, mBluetoothHeadset);
1121             mBluetoothHeadset = null;
1122         }
1123 
1124         // shut down recognizer and wait for the thread to complete
1125         if (mRecognizerThread !=  null) {
1126             mRecognizerThread.interrupt();
1127             try {
1128                 mRecognizerThread.join();
1129             } catch (InterruptedException e) {
1130                 if (false) Log.d(TAG, "onStop mRecognizerThread.join exception " + e);
1131             }
1132             mRecognizerThread = null;
1133         }
1134 
1135         // clean up UI
1136         mHandler.removeCallbacks(mMicFlasher);
1137         mHandler.removeMessages(0);
1138 
1139         if (mTts != null) {
1140             mTts.stop();
1141             mTts.shutdown();
1142             mTts = null;
1143         }
1144         unregisterReceiver(mReceiver);
1145 
1146         super.onDestroy();
1147 
1148         releaseWakeLock();
1149     }
1150 
acquireWakeLock(Context context)1151     private void acquireWakeLock(Context context) {
1152         if (mWakeLock == null) {
1153             PowerManager pm = (PowerManager)context.getSystemService(Context.POWER_SERVICE);
1154             mWakeLock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK,
1155                                        "VoiceDialer");
1156             mWakeLock.acquire();
1157         }
1158     }
1159 
releaseWakeLock()1160     private void releaseWakeLock() {
1161         if (mWakeLock != null) {
1162             mWakeLock.release();
1163             mWakeLock = null;
1164         }
1165     }
1166 
1167     private Runnable mMicFlasher = new Runnable() {
1168         int visible = View.VISIBLE;
1169 
1170         public void run() {
1171             findViewById(R.id.microphone_view).setVisibility(visible);
1172             findViewById(R.id.state).setVisibility(visible);
1173             visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE;
1174             mHandler.postDelayed(this, 750);
1175         }
1176     };
1177 }
1178