1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package android.speech; 18 19 import android.Manifest; 20 import android.annotation.NonNull; 21 import android.annotation.Nullable; 22 import android.annotation.SdkConstant; 23 import android.annotation.SdkConstant.SdkConstantType; 24 import android.annotation.SuppressLint; 25 import android.app.AppOpsManager; 26 import android.app.Service; 27 import android.content.AttributionSource; 28 import android.content.Context; 29 import android.content.ContextParams; 30 import android.content.Intent; 31 import android.content.PermissionChecker; 32 import android.os.Binder; 33 import android.os.Bundle; 34 import android.os.Handler; 35 import android.os.IBinder; 36 import android.os.Looper; 37 import android.os.Message; 38 import android.os.RemoteException; 39 import android.util.Log; 40 41 import com.android.internal.util.function.pooled.PooledLambda; 42 43 import java.lang.ref.WeakReference; 44 import java.util.Objects; 45 46 /** 47 * This class provides a base class for recognition service implementations. This class should be 48 * extended only in case you wish to implement a new speech recognizer. Please note that the 49 * implementation of this service is stateless. 50 */ 51 public abstract class RecognitionService extends Service { 52 /** 53 * The {@link Intent} that must be declared as handled by the service. 54 */ 55 @SdkConstant(SdkConstantType.SERVICE_ACTION) 56 public static final String SERVICE_INTERFACE = "android.speech.RecognitionService"; 57 58 /** 59 * Name under which a RecognitionService component publishes information about itself. 60 * This meta-data should reference an XML resource containing a 61 * <code><{@link android.R.styleable#RecognitionService recognition-service}></code> or 62 * <code><{@link android.R.styleable#RecognitionService on-device-recognition-service} 63 * ></code> tag. 64 */ 65 public static final String SERVICE_META_DATA = "android.speech"; 66 67 /** Log messages identifier */ 68 private static final String TAG = "RecognitionService"; 69 70 /** Debugging flag */ 71 private static final boolean DBG = false; 72 73 /** Binder of the recognition service */ 74 private RecognitionServiceBinder mBinder = new RecognitionServiceBinder(this); 75 76 /** 77 * The current callback of an application that invoked the 78 * 79 * {@link RecognitionService#onStartListening(Intent, Callback)} method 80 */ 81 private Callback mCurrentCallback = null; 82 83 private boolean mStartedDataDelivery; 84 85 private static final int MSG_START_LISTENING = 1; 86 87 private static final int MSG_STOP_LISTENING = 2; 88 89 private static final int MSG_CANCEL = 3; 90 91 private static final int MSG_RESET = 4; 92 93 private final Handler mHandler = new Handler() { 94 @Override 95 public void handleMessage(Message msg) { 96 switch (msg.what) { 97 case MSG_START_LISTENING: 98 StartListeningArgs args = (StartListeningArgs) msg.obj; 99 dispatchStartListening(args.mIntent, args.mListener, args.mAttributionSource); 100 break; 101 case MSG_STOP_LISTENING: 102 dispatchStopListening((IRecognitionListener) msg.obj); 103 break; 104 case MSG_CANCEL: 105 dispatchCancel((IRecognitionListener) msg.obj); 106 break; 107 case MSG_RESET: 108 dispatchClearCallback(); 109 break; 110 } 111 } 112 }; 113 dispatchStartListening(Intent intent, final IRecognitionListener listener, @NonNull AttributionSource attributionSource)114 private void dispatchStartListening(Intent intent, final IRecognitionListener listener, 115 @NonNull AttributionSource attributionSource) { 116 try { 117 if (mCurrentCallback == null) { 118 boolean preflightPermissionCheckPassed = checkPermissionForPreflight( 119 attributionSource); 120 if (preflightPermissionCheckPassed) { 121 if (DBG) { 122 Log.d(TAG, "created new mCurrentCallback, listener = " 123 + listener.asBinder()); 124 } 125 mCurrentCallback = new Callback(listener, attributionSource); 126 RecognitionService.this.onStartListening(intent, mCurrentCallback); 127 } 128 129 if (!preflightPermissionCheckPassed || !checkPermissionAndStartDataDelivery()) { 130 listener.onError(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS); 131 if (preflightPermissionCheckPassed) { 132 // If we attempted to start listening, cancel the callback 133 RecognitionService.this.onCancel(mCurrentCallback); 134 dispatchClearCallback(); 135 } 136 Log.i(TAG, "caller doesn't have permission:" 137 + Manifest.permission.RECORD_AUDIO); 138 } 139 } else { 140 listener.onError(SpeechRecognizer.ERROR_RECOGNIZER_BUSY); 141 Log.i(TAG, "concurrent startListening received - ignoring this call"); 142 } 143 } catch (RemoteException e) { 144 Log.d(TAG, "onError call from startListening failed"); 145 } 146 } 147 dispatchStopListening(IRecognitionListener listener)148 private void dispatchStopListening(IRecognitionListener listener) { 149 try { 150 if (mCurrentCallback == null) { 151 listener.onError(SpeechRecognizer.ERROR_CLIENT); 152 Log.w(TAG, "stopListening called with no preceding startListening - ignoring"); 153 } else if (mCurrentCallback.mListener.asBinder() != listener.asBinder()) { 154 listener.onError(SpeechRecognizer.ERROR_RECOGNIZER_BUSY); 155 Log.w(TAG, "stopListening called by other caller than startListening - ignoring"); 156 } else { // the correct state 157 RecognitionService.this.onStopListening(mCurrentCallback); 158 } 159 } catch (RemoteException e) { // occurs if onError fails 160 Log.d(TAG, "onError call from stopListening failed"); 161 } 162 } 163 dispatchCancel(IRecognitionListener listener)164 private void dispatchCancel(IRecognitionListener listener) { 165 if (mCurrentCallback == null) { 166 if (DBG) Log.d(TAG, "cancel called with no preceding startListening - ignoring"); 167 } else if (mCurrentCallback.mListener.asBinder() != listener.asBinder()) { 168 Log.w(TAG, "cancel called by client who did not call startListening - ignoring"); 169 } else { // the correct state 170 RecognitionService.this.onCancel(mCurrentCallback); 171 dispatchClearCallback(); 172 if (DBG) Log.d(TAG, "canceling - setting mCurrentCallback to null"); 173 } 174 } 175 dispatchClearCallback()176 private void dispatchClearCallback() { 177 finishDataDelivery(); 178 mCurrentCallback = null; 179 mStartedDataDelivery = false; 180 } 181 182 private class StartListeningArgs { 183 public final Intent mIntent; 184 185 public final IRecognitionListener mListener; 186 public final @NonNull AttributionSource mAttributionSource; 187 StartListeningArgs(Intent intent, IRecognitionListener listener, @NonNull AttributionSource attributionSource)188 public StartListeningArgs(Intent intent, IRecognitionListener listener, 189 @NonNull AttributionSource attributionSource) { 190 this.mIntent = intent; 191 this.mListener = listener; 192 this.mAttributionSource = attributionSource; 193 } 194 } 195 196 /** 197 * Notifies the service that it should start listening for speech. 198 * 199 * <p> If you are recognizing speech from the microphone, in this callback you 200 * should create an attribution context for the caller such that when you access 201 * the mic the caller would be properly blamed (and their permission checked in 202 * the process) for accessing the microphone and that you served as a proxy for 203 * this sensitive data (and your permissions would be checked in the process). 204 * You should also open the mic in this callback via the attribution context 205 * and close the mic before returning the recognized result. If you don't do 206 * that then the caller would be blamed and you as being a proxy as well as you 207 * would get one more blame on yourself when you open the microphone. 208 * 209 * <pre> 210 * Context attributionContext = context.createContext(new ContextParams.Builder() 211 * .setNextAttributionSource(callback.getCallingAttributionSource()) 212 * .build()); 213 * 214 * AudioRecord recorder = AudioRecord.Builder() 215 * .setContext(attributionContext); 216 * . . . 217 * .build(); 218 * 219 * recorder.startRecording() 220 * </pre> 221 * 222 * @param recognizerIntent contains parameters for the recognition to be performed. The intent 223 * may also contain optional extras, see {@link RecognizerIntent}. If these values are 224 * not set explicitly, default values should be used by the recognizer. 225 * @param listener that will receive the service's callbacks 226 */ onStartListening(Intent recognizerIntent, Callback listener)227 protected abstract void onStartListening(Intent recognizerIntent, Callback listener); 228 229 /** 230 * Notifies the service that it should cancel the speech recognition. 231 */ onCancel(Callback listener)232 protected abstract void onCancel(Callback listener); 233 234 /** 235 * Notifies the service that it should stop listening for speech. Speech captured so far should 236 * be recognized as if the user had stopped speaking at this point. This method is only called 237 * if the application calls it explicitly. 238 */ onStopListening(Callback listener)239 protected abstract void onStopListening(Callback listener); 240 241 @Override 242 @SuppressLint("MissingNullability") createContext(@onNull ContextParams contextParams)243 public Context createContext(@NonNull ContextParams contextParams) { 244 if (contextParams.getNextAttributionSource() != null) { 245 if (mHandler.getLooper().equals(Looper.myLooper())) { 246 handleAttributionContextCreation(contextParams.getNextAttributionSource()); 247 } else { 248 mHandler.sendMessage( 249 PooledLambda.obtainMessage(this::handleAttributionContextCreation, 250 contextParams.getNextAttributionSource())); 251 } 252 } 253 return super.createContext(contextParams); 254 } 255 handleAttributionContextCreation(@onNull AttributionSource attributionSource)256 private void handleAttributionContextCreation(@NonNull AttributionSource attributionSource) { 257 if (mCurrentCallback != null 258 && mCurrentCallback.mCallingAttributionSource.equals(attributionSource)) { 259 mCurrentCallback.mAttributionContextCreated = true; 260 } 261 } 262 263 @Override onBind(final Intent intent)264 public final IBinder onBind(final Intent intent) { 265 if (DBG) Log.d(TAG, "onBind, intent=" + intent); 266 return mBinder; 267 } 268 269 @Override onDestroy()270 public void onDestroy() { 271 if (DBG) Log.d(TAG, "onDestroy"); 272 finishDataDelivery(); 273 mCurrentCallback = null; 274 mBinder.clearReference(); 275 super.onDestroy(); 276 } 277 278 /** 279 * This class receives callbacks from the speech recognition service and forwards them to the 280 * user. An instance of this class is passed to the 281 * {@link RecognitionService#onStartListening(Intent, Callback)} method. Recognizers may call 282 * these methods on any thread. 283 */ 284 public class Callback { 285 private final IRecognitionListener mListener; 286 private final @NonNull AttributionSource mCallingAttributionSource; 287 private @Nullable Context mAttributionContext; 288 private boolean mAttributionContextCreated; 289 Callback(IRecognitionListener listener, @NonNull AttributionSource attributionSource)290 private Callback(IRecognitionListener listener, 291 @NonNull AttributionSource attributionSource) { 292 mListener = listener; 293 mCallingAttributionSource = attributionSource; 294 } 295 296 /** 297 * The service should call this method when the user has started to speak. 298 */ beginningOfSpeech()299 public void beginningOfSpeech() throws RemoteException { 300 mListener.onBeginningOfSpeech(); 301 } 302 303 /** 304 * The service should call this method when sound has been received. The purpose of this 305 * function is to allow giving feedback to the user regarding the captured audio. 306 * 307 * @param buffer a buffer containing a sequence of big-endian 16-bit integers representing a 308 * single channel audio stream. The sample rate is implementation dependent. 309 */ bufferReceived(byte[] buffer)310 public void bufferReceived(byte[] buffer) throws RemoteException { 311 mListener.onBufferReceived(buffer); 312 } 313 314 /** 315 * The service should call this method after the user stops speaking. 316 */ endOfSpeech()317 public void endOfSpeech() throws RemoteException { 318 mListener.onEndOfSpeech(); 319 } 320 321 /** 322 * The service should call this method when a network or recognition error occurred. 323 * 324 * @param error code is defined in {@link SpeechRecognizer} 325 */ error(@peechRecognizer.RecognitionError int error)326 public void error(@SpeechRecognizer.RecognitionError int error) throws RemoteException { 327 Message.obtain(mHandler, MSG_RESET).sendToTarget(); 328 mListener.onError(error); 329 } 330 331 /** 332 * The service should call this method when partial recognition results are available. This 333 * method can be called at any time between {@link #beginningOfSpeech()} and 334 * {@link #results(Bundle)} when partial results are ready. This method may be called zero, 335 * one or multiple times for each call to {@link SpeechRecognizer#startListening(Intent)}, 336 * depending on the speech recognition service implementation. 337 * 338 * @param partialResults the returned results. To retrieve the results in 339 * ArrayList<String> format use {@link Bundle#getStringArrayList(String)} with 340 * {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter 341 */ partialResults(Bundle partialResults)342 public void partialResults(Bundle partialResults) throws RemoteException { 343 mListener.onPartialResults(partialResults); 344 } 345 346 /** 347 * The service should call this method when the endpointer is ready for the user to start 348 * speaking. 349 * 350 * @param params parameters set by the recognition service. Reserved for future use. 351 */ readyForSpeech(Bundle params)352 public void readyForSpeech(Bundle params) throws RemoteException { 353 mListener.onReadyForSpeech(params); 354 } 355 356 /** 357 * The service should call this method when recognition results are ready. 358 * 359 * @param results the recognition results. To retrieve the results in {@code 360 * ArrayList<String>} format use {@link Bundle#getStringArrayList(String)} with 361 * {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter 362 */ results(Bundle results)363 public void results(Bundle results) throws RemoteException { 364 Message.obtain(mHandler, MSG_RESET).sendToTarget(); 365 mListener.onResults(results); 366 } 367 368 /** 369 * The service should call this method when the sound level in the audio stream has changed. 370 * There is no guarantee that this method will be called. 371 * 372 * @param rmsdB the new RMS dB value 373 */ rmsChanged(float rmsdB)374 public void rmsChanged(float rmsdB) throws RemoteException { 375 mListener.onRmsChanged(rmsdB); 376 } 377 378 /** 379 * Return the Linux uid assigned to the process that sent you the current transaction that 380 * is being processed. This is obtained from {@link Binder#getCallingUid()}. 381 */ getCallingUid()382 public int getCallingUid() { 383 return mCallingAttributionSource.getUid(); 384 } 385 386 /** 387 * Gets the permission identity of the calling app. If you want to attribute 388 * the mic access to the calling app you can create an attribution context 389 * via {@link android.content.Context#createContext(android.content.ContextParams)} 390 * and passing this identity to {@link 391 * android.content.ContextParams.Builder#setNextAttributionSource(AttributionSource)}. 392 * 393 * @return The permission identity of the calling app. 394 * 395 * @see android.content.ContextParams.Builder#setNextAttributionSource( 396 * AttributionSource) 397 */ 398 @SuppressLint("CallbackMethodName") getCallingAttributionSource()399 public @NonNull AttributionSource getCallingAttributionSource() { 400 return mCallingAttributionSource; 401 } 402 getAttributionContextForCaller()403 @NonNull Context getAttributionContextForCaller() { 404 if (mAttributionContext == null) { 405 mAttributionContext = createContext(new ContextParams.Builder() 406 .setNextAttributionSource(mCallingAttributionSource) 407 .build()); 408 } 409 return mAttributionContext; 410 } 411 } 412 413 /** Binder of the recognition service */ 414 private static final class RecognitionServiceBinder extends IRecognitionService.Stub { 415 private final WeakReference<RecognitionService> mServiceRef; 416 RecognitionServiceBinder(RecognitionService service)417 public RecognitionServiceBinder(RecognitionService service) { 418 mServiceRef = new WeakReference<>(service); 419 } 420 421 @Override startListening(Intent recognizerIntent, IRecognitionListener listener, @NonNull AttributionSource attributionSource)422 public void startListening(Intent recognizerIntent, IRecognitionListener listener, 423 @NonNull AttributionSource attributionSource) { 424 Objects.requireNonNull(attributionSource); 425 attributionSource.enforceCallingUid(); 426 if (DBG) Log.d(TAG, "startListening called by:" + listener.asBinder()); 427 final RecognitionService service = mServiceRef.get(); 428 if (service != null) { 429 service.mHandler.sendMessage(Message.obtain(service.mHandler, 430 MSG_START_LISTENING, service.new StartListeningArgs( 431 recognizerIntent, listener, attributionSource))); 432 } 433 } 434 435 @Override stopListening(IRecognitionListener listener)436 public void stopListening(IRecognitionListener listener) { 437 if (DBG) Log.d(TAG, "stopListening called by:" + listener.asBinder()); 438 final RecognitionService service = mServiceRef.get(); 439 if (service != null) { 440 service.mHandler.sendMessage( 441 Message.obtain(service.mHandler, MSG_STOP_LISTENING, listener)); 442 } 443 } 444 445 @Override cancel(IRecognitionListener listener, boolean isShutdown)446 public void cancel(IRecognitionListener listener, boolean isShutdown) { 447 if (DBG) Log.d(TAG, "cancel called by:" + listener.asBinder()); 448 final RecognitionService service = mServiceRef.get(); 449 if (service != null) { 450 service.mHandler.sendMessage( 451 Message.obtain(service.mHandler, MSG_CANCEL, listener)); 452 } 453 } 454 clearReference()455 public void clearReference() { 456 mServiceRef.clear(); 457 } 458 } 459 checkPermissionAndStartDataDelivery()460 private boolean checkPermissionAndStartDataDelivery() { 461 if (mCurrentCallback.mAttributionContextCreated) { 462 return true; 463 } 464 if (PermissionChecker.checkPermissionAndStartDataDelivery( 465 RecognitionService.this, Manifest.permission.RECORD_AUDIO, 466 mCurrentCallback.getAttributionContextForCaller().getAttributionSource(), 467 /*message*/ null) == PermissionChecker.PERMISSION_GRANTED) { 468 mStartedDataDelivery = true; 469 } 470 return mStartedDataDelivery; 471 } 472 checkPermissionForPreflight(AttributionSource attributionSource)473 private boolean checkPermissionForPreflight(AttributionSource attributionSource) { 474 return PermissionChecker.checkPermissionForPreflight(RecognitionService.this, 475 Manifest.permission.RECORD_AUDIO, attributionSource) 476 == PermissionChecker.PERMISSION_GRANTED; 477 } 478 finishDataDelivery()479 void finishDataDelivery() { 480 if (mStartedDataDelivery) { 481 mStartedDataDelivery = false; 482 final String op = AppOpsManager.permissionToOp(Manifest.permission.RECORD_AUDIO); 483 PermissionChecker.finishDataDelivery(RecognitionService.this, op, 484 mCurrentCallback.getAttributionContextForCaller().getAttributionSource()); 485 } 486 } 487 } 488