• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 package org.chromium.content.browser;
6 
7 import android.content.ComponentName;
8 import android.content.Context;
9 import android.content.Intent;
10 import android.content.pm.PackageManager;
11 import android.content.pm.PackageManager.NameNotFoundException;
12 import android.content.pm.ResolveInfo;
13 import android.content.pm.ServiceInfo;
14 import android.os.Bundle;
15 import android.speech.RecognitionListener;
16 import android.speech.RecognitionService;
17 import android.speech.RecognizerIntent;
18 import android.speech.SpeechRecognizer;
19 
20 import org.chromium.base.CalledByNative;
21 import org.chromium.base.JNINamespace;
22 
23 import java.util.ArrayList;
24 import java.util.List;
25 
26 /**
27  * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
28  * on Android. Using Android's platform recognizer offers several benefits, like good quality and
29  * good local fallback when no data connection is available.
30  */
31 @JNINamespace("content")
32 public class SpeechRecognition {
33 
34     // Constants describing the speech recognition provider we depend on.
35     private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
36     private static final int PROVIDER_MIN_VERSION = 300207030;
37 
38     // We track the recognition state to remember what events we need to send when recognition is
39     // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
40     // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
41     // called before.
42     private static final int STATE_IDLE = 0;
43     private static final int STATE_AWAITING_SPEECH = 1;
44     private static final int STATE_CAPTURING_SPEECH = 2;
45     private int mState;
46 
47     // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
48     // PROVIDER_MIN_VERSION as selected by initialize().
49     private static ComponentName sRecognitionProvider;
50 
51     private final Context mContext;
52     private final Intent mIntent;
53     private final RecognitionListener mListener;
54     private SpeechRecognizer mRecognizer;
55 
56     // Native pointer to C++ SpeechRecognizerImplAndroid.
57     private long mNativeSpeechRecognizerImplAndroid;
58 
59     // Remember if we are using continuous recognition.
60     private boolean mContinuous;
61 
62     // Internal class to handle events from Android's SpeechRecognizer and route them to native.
63     class Listener implements RecognitionListener {
64 
65         @Override
onBeginningOfSpeech()66         public void onBeginningOfSpeech() {
67             mState = STATE_CAPTURING_SPEECH;
68             nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
69         }
70 
71         @Override
onBufferReceived(byte[] buffer)72         public void onBufferReceived(byte[] buffer) { }
73 
74         @Override
onEndOfSpeech()75         public void onEndOfSpeech() {
76             // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
77             // events. The Android API documentation is vague as to when onEndOfSpeech is called in
78             // continuous mode, whereas the Web Speech API defines a stronger semantic on the
79             // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
80             // event is to trigger it when the last result is received or the session is aborted.
81             if (!mContinuous) {
82                 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
83                 // Since Android doesn't have a dedicated event for when audio capture is finished,
84                 // we fire it after speech has ended.
85                 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
86                 mState = STATE_IDLE;
87             }
88         }
89 
90         @Override
onError(int error)91         public void onError(int error) {
92             int code = SpeechRecognitionError.NONE;
93 
94             // Translate Android SpeechRecognizer errors to Web Speech API errors.
95             switch(error) {
96                 case SpeechRecognizer.ERROR_AUDIO:
97                     code = SpeechRecognitionError.AUDIO;
98                     break;
99                 case SpeechRecognizer.ERROR_CLIENT:
100                     code = SpeechRecognitionError.ABORTED;
101                     break;
102                 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
103                 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
104                     code = SpeechRecognitionError.NOT_ALLOWED;
105                     break;
106                 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
107                 case SpeechRecognizer.ERROR_NETWORK:
108                 case SpeechRecognizer.ERROR_SERVER:
109                     code = SpeechRecognitionError.NETWORK;
110                     break;
111                 case SpeechRecognizer.ERROR_NO_MATCH:
112                     code = SpeechRecognitionError.NO_MATCH;
113                     break;
114                 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
115                     code = SpeechRecognitionError.NO_SPEECH;
116                     break;
117                 default:
118                     assert false;
119                     return;
120             }
121 
122             terminate(code);
123         }
124 
125         @Override
onEvent(int event, Bundle bundle)126         public void onEvent(int event, Bundle bundle) { }
127 
128         @Override
onPartialResults(Bundle bundle)129         public void onPartialResults(Bundle bundle) {
130             handleResults(bundle, true);
131         }
132 
133         @Override
onReadyForSpeech(Bundle bundle)134         public void onReadyForSpeech(Bundle bundle) {
135             mState = STATE_AWAITING_SPEECH;
136             nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
137         }
138 
139         @Override
onResults(Bundle bundle)140         public void onResults(Bundle bundle) {
141             handleResults(bundle, false);
142             // We assume that onResults is called only once, at the end of a session, thus we
143             // terminate. If one day the recognition provider changes dictation mode behavior to
144             // call onResults several times, we should terminate only if (!mContinuous).
145             terminate(SpeechRecognitionError.NONE);
146         }
147 
148         @Override
onRmsChanged(float rms)149         public void onRmsChanged(float rms) { }
150 
handleResults(Bundle bundle, boolean provisional)151         private void handleResults(Bundle bundle, boolean provisional) {
152             if (mContinuous && provisional) {
153                 // In continuous mode, Android's recognizer sends final results as provisional.
154                 provisional = false;
155             }
156 
157             ArrayList<String> list = bundle.getStringArrayList(
158                     SpeechRecognizer.RESULTS_RECOGNITION);
159             String[] results = list.toArray(new String[list.size()]);
160 
161             float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
162 
163             nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
164                                        results,
165                                        scores,
166                                        provisional);
167         }
168     }
169 
170     /**
171      * This method must be called before any instance of SpeechRecognition can be created. It will
172      * query Android's package manager to find a suitable speech recognition provider that supports
173      * continuous recognition.
174      */
initialize(Context context)175     public static boolean initialize(Context context) {
176         if (!SpeechRecognizer.isRecognitionAvailable(context))
177             return false;
178 
179         PackageManager pm = context.getPackageManager();
180         Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
181         final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
182 
183         for (ResolveInfo resolve : list) {
184             ServiceInfo service = resolve.serviceInfo;
185 
186             if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
187                 continue;
188 
189             int versionCode;
190             try {
191                 versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
192             } catch (NameNotFoundException e) {
193                 continue;
194             }
195 
196             if (versionCode < PROVIDER_MIN_VERSION)
197                 continue;
198 
199             sRecognitionProvider = new ComponentName(service.packageName, service.name);
200 
201             return true;
202         }
203 
204         // If we reach this point, we failed to find a suitable recognition provider.
205         return false;
206     }
207 
SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid)208     private SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid) {
209         mContext = context;
210         mContinuous = false;
211         mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
212         mListener = new Listener();
213         mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
214 
215         if (sRecognitionProvider != null) {
216             mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, sRecognitionProvider);
217         } else {
218             // It is possible to force-enable the speech recognition web platform feature (using a
219             // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
220             // provider, in which case the first available speech recognition provider is used.
221             // Caveat: Continuous mode may not work as expected with a different provider.
222             mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
223         }
224 
225         mRecognizer.setRecognitionListener(mListener);
226     }
227 
228     // This function destroys everything when recognition is done, taking care to properly tear
229     // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
terminate(int error)230     private void terminate(int error) {
231 
232         if (mState != STATE_IDLE) {
233             if (mState == STATE_CAPTURING_SPEECH) {
234                 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
235             }
236             nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
237             mState = STATE_IDLE;
238         }
239 
240         if (error != SpeechRecognitionError.NONE)
241             nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
242 
243         mRecognizer.destroy();
244         mRecognizer = null;
245         nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
246         mNativeSpeechRecognizerImplAndroid = 0;
247     }
248 
249     @CalledByNative
createSpeechRecognition( Context context, long nativeSpeechRecognizerImplAndroid)250     private static SpeechRecognition createSpeechRecognition(
251             Context context, long nativeSpeechRecognizerImplAndroid) {
252         return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
253     }
254 
255     @CalledByNative
startRecognition(String language, boolean continuous, boolean interimResults)256     private void startRecognition(String language, boolean continuous, boolean interimResults) {
257         if (mRecognizer == null)
258             return;
259 
260         mContinuous = continuous;
261         mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
262         mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
263         mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interimResults);
264         mRecognizer.startListening(mIntent);
265     }
266 
267     @CalledByNative
abortRecognition()268     private void abortRecognition() {
269         if (mRecognizer == null)
270             return;
271 
272         mRecognizer.cancel();
273         terminate(SpeechRecognitionError.ABORTED);
274     }
275 
276     @CalledByNative
stopRecognition()277     private void stopRecognition() {
278         if (mRecognizer == null)
279             return;
280 
281         mContinuous = false;
282         mRecognizer.stopListening();
283     }
284 
285     // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid)286     private native void nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid);
nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid)287     private native void nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid);
nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid)288     private native void nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid);
nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid)289     private native void nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid);
nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid, String[] results, float[] scores, boolean provisional)290     private native void nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid,
291                                                    String[] results,
292                                                    float[] scores,
293                                                    boolean provisional);
nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error)294     private native void nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error);
nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid)295     private native void nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid);
296 }
297