1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package org.chromium.content.browser; 6 7 import android.content.ComponentName; 8 import android.content.Context; 9 import android.content.Intent; 10 import android.content.pm.PackageManager; 11 import android.content.pm.PackageManager.NameNotFoundException; 12 import android.content.pm.ResolveInfo; 13 import android.content.pm.ServiceInfo; 14 import android.os.Bundle; 15 import android.speech.RecognitionListener; 16 import android.speech.RecognitionService; 17 import android.speech.RecognizerIntent; 18 import android.speech.SpeechRecognizer; 19 20 import org.chromium.base.CalledByNative; 21 import org.chromium.base.JNINamespace; 22 23 import java.util.ArrayList; 24 import java.util.List; 25 26 /** 27 * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API 28 * on Android. Using Android's platform recognizer offers several benefits, like good quality and 29 * good local fallback when no data connection is available. 30 */ 31 @JNINamespace("content") 32 public class SpeechRecognition { 33 34 // Constants describing the speech recognition provider we depend on. 35 private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox"; 36 private static final int PROVIDER_MIN_VERSION = 300207030; 37 38 // We track the recognition state to remember what events we need to send when recognition is 39 // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more 40 // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were 41 // called before. 42 private static final int STATE_IDLE = 0; 43 private static final int STATE_AWAITING_SPEECH = 1; 44 private static final int STATE_CAPTURING_SPEECH = 2; 45 private int mState; 46 47 // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and 48 // PROVIDER_MIN_VERSION as selected by initialize(). 49 private static ComponentName sRecognitionProvider; 50 51 private final Context mContext; 52 private final Intent mIntent; 53 private final RecognitionListener mListener; 54 private SpeechRecognizer mRecognizer; 55 56 // Native pointer to C++ SpeechRecognizerImplAndroid. 57 private long mNativeSpeechRecognizerImplAndroid; 58 59 // Remember if we are using continuous recognition. 60 private boolean mContinuous; 61 62 // Internal class to handle events from Android's SpeechRecognizer and route them to native. 63 class Listener implements RecognitionListener { 64 65 @Override onBeginningOfSpeech()66 public void onBeginningOfSpeech() { 67 mState = STATE_CAPTURING_SPEECH; 68 nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid); 69 } 70 71 @Override onBufferReceived(byte[] buffer)72 public void onBufferReceived(byte[] buffer) { } 73 74 @Override onEndOfSpeech()75 public void onEndOfSpeech() { 76 // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending 77 // events. The Android API documentation is vague as to when onEndOfSpeech is called in 78 // continuous mode, whereas the Web Speech API defines a stronger semantic on the 79 // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend 80 // event is to trigger it when the last result is received or the session is aborted. 81 if (!mContinuous) { 82 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); 83 // Since Android doesn't have a dedicated event for when audio capture is finished, 84 // we fire it after speech has ended. 85 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); 86 mState = STATE_IDLE; 87 } 88 } 89 90 @Override onError(int error)91 public void onError(int error) { 92 int code = SpeechRecognitionError.NONE; 93 94 // Translate Android SpeechRecognizer errors to Web Speech API errors. 95 switch(error) { 96 case SpeechRecognizer.ERROR_AUDIO: 97 code = SpeechRecognitionError.AUDIO; 98 break; 99 case SpeechRecognizer.ERROR_CLIENT: 100 code = SpeechRecognitionError.ABORTED; 101 break; 102 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY: 103 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS: 104 code = SpeechRecognitionError.NOT_ALLOWED; 105 break; 106 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: 107 case SpeechRecognizer.ERROR_NETWORK: 108 case SpeechRecognizer.ERROR_SERVER: 109 code = SpeechRecognitionError.NETWORK; 110 break; 111 case SpeechRecognizer.ERROR_NO_MATCH: 112 code = SpeechRecognitionError.NO_MATCH; 113 break; 114 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: 115 code = SpeechRecognitionError.NO_SPEECH; 116 break; 117 default: 118 assert false; 119 return; 120 } 121 122 terminate(code); 123 } 124 125 @Override onEvent(int event, Bundle bundle)126 public void onEvent(int event, Bundle bundle) { } 127 128 @Override onPartialResults(Bundle bundle)129 public void onPartialResults(Bundle bundle) { 130 handleResults(bundle, true); 131 } 132 133 @Override onReadyForSpeech(Bundle bundle)134 public void onReadyForSpeech(Bundle bundle) { 135 mState = STATE_AWAITING_SPEECH; 136 nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid); 137 } 138 139 @Override onResults(Bundle bundle)140 public void onResults(Bundle bundle) { 141 handleResults(bundle, false); 142 // We assume that onResults is called only once, at the end of a session, thus we 143 // terminate. If one day the recognition provider changes dictation mode behavior to 144 // call onResults several times, we should terminate only if (!mContinuous). 145 terminate(SpeechRecognitionError.NONE); 146 } 147 148 @Override onRmsChanged(float rms)149 public void onRmsChanged(float rms) { } 150 handleResults(Bundle bundle, boolean provisional)151 private void handleResults(Bundle bundle, boolean provisional) { 152 if (mContinuous && provisional) { 153 // In continuous mode, Android's recognizer sends final results as provisional. 154 provisional = false; 155 } 156 157 ArrayList<String> list = bundle.getStringArrayList( 158 SpeechRecognizer.RESULTS_RECOGNITION); 159 String[] results = list.toArray(new String[list.size()]); 160 161 float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES); 162 163 nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid, 164 results, 165 scores, 166 provisional); 167 } 168 } 169 170 /** 171 * This method must be called before any instance of SpeechRecognition can be created. It will 172 * query Android's package manager to find a suitable speech recognition provider that supports 173 * continuous recognition. 174 */ initialize(Context context)175 public static boolean initialize(Context context) { 176 if (!SpeechRecognizer.isRecognitionAvailable(context)) 177 return false; 178 179 PackageManager pm = context.getPackageManager(); 180 Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE); 181 final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES); 182 183 for (ResolveInfo resolve : list) { 184 ServiceInfo service = resolve.serviceInfo; 185 186 if (!service.packageName.equals(PROVIDER_PACKAGE_NAME)) 187 continue; 188 189 int versionCode; 190 try { 191 versionCode = pm.getPackageInfo(service.packageName, 0).versionCode; 192 } catch (NameNotFoundException e) { 193 continue; 194 } 195 196 if (versionCode < PROVIDER_MIN_VERSION) 197 continue; 198 199 sRecognitionProvider = new ComponentName(service.packageName, service.name); 200 201 return true; 202 } 203 204 // If we reach this point, we failed to find a suitable recognition provider. 205 return false; 206 } 207 SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid)208 private SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid) { 209 mContext = context; 210 mContinuous = false; 211 mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid; 212 mListener = new Listener(); 213 mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); 214 215 if (sRecognitionProvider != null) { 216 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, sRecognitionProvider); 217 } else { 218 // It is possible to force-enable the speech recognition web platform feature (using a 219 // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME 220 // provider, in which case the first available speech recognition provider is used. 221 // Caveat: Continuous mode may not work as expected with a different provider. 222 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext); 223 } 224 225 mRecognizer.setRecognitionListener(mListener); 226 } 227 228 // This function destroys everything when recognition is done, taking care to properly tear 229 // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called. terminate(int error)230 private void terminate(int error) { 231 232 if (mState != STATE_IDLE) { 233 if (mState == STATE_CAPTURING_SPEECH) { 234 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); 235 } 236 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); 237 mState = STATE_IDLE; 238 } 239 240 if (error != SpeechRecognitionError.NONE) 241 nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error); 242 243 mRecognizer.destroy(); 244 mRecognizer = null; 245 nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid); 246 mNativeSpeechRecognizerImplAndroid = 0; 247 } 248 249 @CalledByNative createSpeechRecognition( Context context, long nativeSpeechRecognizerImplAndroid)250 private static SpeechRecognition createSpeechRecognition( 251 Context context, long nativeSpeechRecognizerImplAndroid) { 252 return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid); 253 } 254 255 @CalledByNative startRecognition(String language, boolean continuous, boolean interimResults)256 private void startRecognition(String language, boolean continuous, boolean interimResults) { 257 if (mRecognizer == null) 258 return; 259 260 mContinuous = continuous; 261 mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous); 262 mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language); 263 mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interimResults); 264 mRecognizer.startListening(mIntent); 265 } 266 267 @CalledByNative abortRecognition()268 private void abortRecognition() { 269 if (mRecognizer == null) 270 return; 271 272 mRecognizer.cancel(); 273 terminate(SpeechRecognitionError.ABORTED); 274 } 275 276 @CalledByNative stopRecognition()277 private void stopRecognition() { 278 if (mRecognizer == null) 279 return; 280 281 mContinuous = false; 282 mRecognizer.stopListening(); 283 } 284 285 // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid)286 private native void nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid); nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid)287 private native void nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid); nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid)288 private native void nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid); nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid)289 private native void nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid); nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid, String[] results, float[] scores, boolean provisional)290 private native void nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid, 291 String[] results, 292 float[] scores, 293 boolean provisional); nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error)294 private native void nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error); nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid)295 private native void nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid); 296 } 297