• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package android.speech;
18 
19 import android.Manifest;
20 import android.annotation.NonNull;
21 import android.annotation.Nullable;
22 import android.annotation.SdkConstant;
23 import android.annotation.SdkConstant.SdkConstantType;
24 import android.annotation.SuppressLint;
25 import android.app.AppOpsManager;
26 import android.app.Service;
27 import android.content.AttributionSource;
28 import android.content.Context;
29 import android.content.ContextParams;
30 import android.content.Intent;
31 import android.content.PermissionChecker;
32 import android.os.Binder;
33 import android.os.Bundle;
34 import android.os.Handler;
35 import android.os.IBinder;
36 import android.os.Looper;
37 import android.os.Message;
38 import android.os.RemoteException;
39 import android.util.Log;
40 
41 import com.android.internal.util.function.pooled.PooledLambda;
42 
43 import java.lang.ref.WeakReference;
44 import java.util.Objects;
45 
46 /**
47  * This class provides a base class for recognition service implementations. This class should be
48  * extended only in case you wish to implement a new speech recognizer. Please note that the
49  * implementation of this service is stateless.
50  */
51 public abstract class RecognitionService extends Service {
52     /**
53      * The {@link Intent} that must be declared as handled by the service.
54      */
55     @SdkConstant(SdkConstantType.SERVICE_ACTION)
56     public static final String SERVICE_INTERFACE = "android.speech.RecognitionService";
57 
58     /**
59      * Name under which a RecognitionService component publishes information about itself.
60      * This meta-data should reference an XML resource containing a
61      * <code>&lt;{@link android.R.styleable#RecognitionService recognition-service}&gt;</code> or
62      * <code>&lt;{@link android.R.styleable#RecognitionService on-device-recognition-service}
63      * &gt;</code> tag.
64      */
65     public static final String SERVICE_META_DATA = "android.speech";
66 
67     /** Log messages identifier */
68     private static final String TAG = "RecognitionService";
69 
70     /** Debugging flag */
71     private static final boolean DBG = false;
72 
73     /** Binder of the recognition service */
74     private RecognitionServiceBinder mBinder = new RecognitionServiceBinder(this);
75 
76     /**
77      * The current callback of an application that invoked the
78      *
79      * {@link RecognitionService#onStartListening(Intent, Callback)} method
80      */
81     private Callback mCurrentCallback = null;
82 
83     private boolean mStartedDataDelivery;
84 
85     private static final int MSG_START_LISTENING = 1;
86 
87     private static final int MSG_STOP_LISTENING = 2;
88 
89     private static final int MSG_CANCEL = 3;
90 
91     private static final int MSG_RESET = 4;
92 
93     private final Handler mHandler = new Handler() {
94         @Override
95         public void handleMessage(Message msg) {
96             switch (msg.what) {
97                 case MSG_START_LISTENING:
98                     StartListeningArgs args = (StartListeningArgs) msg.obj;
99                     dispatchStartListening(args.mIntent, args.mListener, args.mAttributionSource);
100                     break;
101                 case MSG_STOP_LISTENING:
102                     dispatchStopListening((IRecognitionListener) msg.obj);
103                     break;
104                 case MSG_CANCEL:
105                     dispatchCancel((IRecognitionListener) msg.obj);
106                     break;
107                 case MSG_RESET:
108                     dispatchClearCallback();
109                     break;
110             }
111         }
112     };
113 
dispatchStartListening(Intent intent, final IRecognitionListener listener, @NonNull AttributionSource attributionSource)114     private void dispatchStartListening(Intent intent, final IRecognitionListener listener,
115             @NonNull AttributionSource attributionSource) {
116         try {
117             if (mCurrentCallback == null) {
118                 boolean preflightPermissionCheckPassed = checkPermissionForPreflight(
119                         attributionSource);
120                 if (preflightPermissionCheckPassed) {
121                     if (DBG) {
122                         Log.d(TAG, "created new mCurrentCallback, listener = "
123                                 + listener.asBinder());
124                     }
125                     mCurrentCallback = new Callback(listener, attributionSource);
126                     RecognitionService.this.onStartListening(intent, mCurrentCallback);
127                 }
128 
129                 if (!preflightPermissionCheckPassed || !checkPermissionAndStartDataDelivery()) {
130                     listener.onError(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS);
131                     if (preflightPermissionCheckPassed) {
132                         // If we attempted to start listening, cancel the callback
133                         RecognitionService.this.onCancel(mCurrentCallback);
134                         dispatchClearCallback();
135                     }
136                     Log.i(TAG, "caller doesn't have permission:"
137                             + Manifest.permission.RECORD_AUDIO);
138                 }
139             } else {
140                 listener.onError(SpeechRecognizer.ERROR_RECOGNIZER_BUSY);
141                 Log.i(TAG, "concurrent startListening received - ignoring this call");
142             }
143         } catch (RemoteException e) {
144             Log.d(TAG, "onError call from startListening failed");
145         }
146     }
147 
dispatchStopListening(IRecognitionListener listener)148     private void dispatchStopListening(IRecognitionListener listener) {
149         try {
150             if (mCurrentCallback == null) {
151                 listener.onError(SpeechRecognizer.ERROR_CLIENT);
152                 Log.w(TAG, "stopListening called with no preceding startListening - ignoring");
153             } else if (mCurrentCallback.mListener.asBinder() != listener.asBinder()) {
154                 listener.onError(SpeechRecognizer.ERROR_RECOGNIZER_BUSY);
155                 Log.w(TAG, "stopListening called by other caller than startListening - ignoring");
156             } else { // the correct state
157                 RecognitionService.this.onStopListening(mCurrentCallback);
158             }
159         } catch (RemoteException e) { // occurs if onError fails
160             Log.d(TAG, "onError call from stopListening failed");
161         }
162     }
163 
dispatchCancel(IRecognitionListener listener)164     private void dispatchCancel(IRecognitionListener listener) {
165         if (mCurrentCallback == null) {
166             if (DBG) Log.d(TAG, "cancel called with no preceding startListening - ignoring");
167         } else if (mCurrentCallback.mListener.asBinder() != listener.asBinder()) {
168             Log.w(TAG, "cancel called by client who did not call startListening - ignoring");
169         } else { // the correct state
170             RecognitionService.this.onCancel(mCurrentCallback);
171             dispatchClearCallback();
172             if (DBG) Log.d(TAG, "canceling - setting mCurrentCallback to null");
173         }
174     }
175 
dispatchClearCallback()176     private void dispatchClearCallback() {
177         finishDataDelivery();
178         mCurrentCallback = null;
179         mStartedDataDelivery = false;
180     }
181 
182     private class StartListeningArgs {
183         public final Intent mIntent;
184 
185         public final IRecognitionListener mListener;
186         public final @NonNull AttributionSource mAttributionSource;
187 
StartListeningArgs(Intent intent, IRecognitionListener listener, @NonNull AttributionSource attributionSource)188         public StartListeningArgs(Intent intent, IRecognitionListener listener,
189                 @NonNull AttributionSource attributionSource) {
190             this.mIntent = intent;
191             this.mListener = listener;
192             this.mAttributionSource = attributionSource;
193         }
194     }
195 
196     /**
197      * Notifies the service that it should start listening for speech.
198      *
199      * <p> If you are recognizing speech from the microphone, in this callback you
200      * should create an attribution context for the caller such that when you access
201      * the mic the caller would be properly blamed (and their permission checked in
202      * the process) for accessing the microphone and that you served as a proxy for
203      * this sensitive data (and your permissions would be checked in the process).
204      * You should also open the mic in this callback via the attribution context
205      * and close the mic before returning the recognized result. If you don't do
206      * that then the caller would be blamed and you as being a proxy as well as you
207      * would get one more blame on yourself when you open the microphone.
208      *
209      * <pre>
210      * Context attributionContext = context.createContext(new ContextParams.Builder()
211      *     .setNextAttributionSource(callback.getCallingAttributionSource())
212      *     .build());
213      *
214      * AudioRecord recorder = AudioRecord.Builder()
215      *     .setContext(attributionContext);
216      *     . . .
217      *    .build();
218      *
219      * recorder.startRecording()
220      * </pre>
221      *
222      * @param recognizerIntent contains parameters for the recognition to be performed. The intent
223      *        may also contain optional extras, see {@link RecognizerIntent}. If these values are
224      *        not set explicitly, default values should be used by the recognizer.
225      * @param listener that will receive the service's callbacks
226      */
onStartListening(Intent recognizerIntent, Callback listener)227     protected abstract void onStartListening(Intent recognizerIntent, Callback listener);
228 
229     /**
230      * Notifies the service that it should cancel the speech recognition.
231      */
onCancel(Callback listener)232     protected abstract void onCancel(Callback listener);
233 
234     /**
235      * Notifies the service that it should stop listening for speech. Speech captured so far should
236      * be recognized as if the user had stopped speaking at this point. This method is only called
237      * if the application calls it explicitly.
238      */
onStopListening(Callback listener)239     protected abstract void onStopListening(Callback listener);
240 
241     @Override
242     @SuppressLint("MissingNullability")
createContext(@onNull ContextParams contextParams)243     public Context createContext(@NonNull ContextParams contextParams) {
244         if (contextParams.getNextAttributionSource() != null) {
245             if (mHandler.getLooper().equals(Looper.myLooper())) {
246                 handleAttributionContextCreation(contextParams.getNextAttributionSource());
247             } else {
248                 mHandler.sendMessage(
249                         PooledLambda.obtainMessage(this::handleAttributionContextCreation,
250                                 contextParams.getNextAttributionSource()));
251             }
252         }
253         return super.createContext(contextParams);
254     }
255 
handleAttributionContextCreation(@onNull AttributionSource attributionSource)256     private void handleAttributionContextCreation(@NonNull AttributionSource attributionSource) {
257         if (mCurrentCallback != null
258                 && mCurrentCallback.mCallingAttributionSource.equals(attributionSource)) {
259             mCurrentCallback.mAttributionContextCreated = true;
260         }
261     }
262 
263     @Override
onBind(final Intent intent)264     public final IBinder onBind(final Intent intent) {
265         if (DBG) Log.d(TAG, "onBind, intent=" + intent);
266         return mBinder;
267     }
268 
269     @Override
onDestroy()270     public void onDestroy() {
271         if (DBG) Log.d(TAG, "onDestroy");
272         finishDataDelivery();
273         mCurrentCallback = null;
274         mBinder.clearReference();
275         super.onDestroy();
276     }
277 
278     /**
279      * This class receives callbacks from the speech recognition service and forwards them to the
280      * user. An instance of this class is passed to the
281      * {@link RecognitionService#onStartListening(Intent, Callback)} method. Recognizers may call
282      * these methods on any thread.
283      */
284     public class Callback {
285         private final IRecognitionListener mListener;
286         private final @NonNull AttributionSource mCallingAttributionSource;
287         private @Nullable Context mAttributionContext;
288         private boolean mAttributionContextCreated;
289 
Callback(IRecognitionListener listener, @NonNull AttributionSource attributionSource)290         private Callback(IRecognitionListener listener,
291                 @NonNull AttributionSource attributionSource) {
292             mListener = listener;
293             mCallingAttributionSource = attributionSource;
294         }
295 
296         /**
297          * The service should call this method when the user has started to speak.
298          */
beginningOfSpeech()299         public void beginningOfSpeech() throws RemoteException {
300             mListener.onBeginningOfSpeech();
301         }
302 
303         /**
304          * The service should call this method when sound has been received. The purpose of this
305          * function is to allow giving feedback to the user regarding the captured audio.
306          *
307          * @param buffer a buffer containing a sequence of big-endian 16-bit integers representing a
308          *        single channel audio stream. The sample rate is implementation dependent.
309          */
bufferReceived(byte[] buffer)310         public void bufferReceived(byte[] buffer) throws RemoteException {
311             mListener.onBufferReceived(buffer);
312         }
313 
314         /**
315          * The service should call this method after the user stops speaking.
316          */
endOfSpeech()317         public void endOfSpeech() throws RemoteException {
318             mListener.onEndOfSpeech();
319         }
320 
321         /**
322          * The service should call this method when a network or recognition error occurred.
323          *
324          * @param error code is defined in {@link SpeechRecognizer}
325          */
error(@peechRecognizer.RecognitionError int error)326         public void error(@SpeechRecognizer.RecognitionError int error) throws RemoteException {
327             Message.obtain(mHandler, MSG_RESET).sendToTarget();
328             mListener.onError(error);
329         }
330 
331         /**
332          * The service should call this method when partial recognition results are available. This
333          * method can be called at any time between {@link #beginningOfSpeech()} and
334          * {@link #results(Bundle)} when partial results are ready. This method may be called zero,
335          * one or multiple times for each call to {@link SpeechRecognizer#startListening(Intent)},
336          * depending on the speech recognition service implementation.
337          *
338          * @param partialResults the returned results. To retrieve the results in
339          *        ArrayList&lt;String&gt; format use {@link Bundle#getStringArrayList(String)} with
340          *        {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter
341          */
partialResults(Bundle partialResults)342         public void partialResults(Bundle partialResults) throws RemoteException {
343             mListener.onPartialResults(partialResults);
344         }
345 
346         /**
347          * The service should call this method when the endpointer is ready for the user to start
348          * speaking.
349          *
350          * @param params parameters set by the recognition service. Reserved for future use.
351          */
readyForSpeech(Bundle params)352         public void readyForSpeech(Bundle params) throws RemoteException {
353             mListener.onReadyForSpeech(params);
354         }
355 
356         /**
357          * The service should call this method when recognition results are ready.
358          *
359          * @param results the recognition results. To retrieve the results in {@code
360          *        ArrayList<String>} format use {@link Bundle#getStringArrayList(String)} with
361          *        {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter
362          */
results(Bundle results)363         public void results(Bundle results) throws RemoteException {
364             Message.obtain(mHandler, MSG_RESET).sendToTarget();
365             mListener.onResults(results);
366         }
367 
368         /**
369          * The service should call this method when the sound level in the audio stream has changed.
370          * There is no guarantee that this method will be called.
371          *
372          * @param rmsdB the new RMS dB value
373          */
rmsChanged(float rmsdB)374         public void rmsChanged(float rmsdB) throws RemoteException {
375             mListener.onRmsChanged(rmsdB);
376         }
377 
378         /**
379          * Return the Linux uid assigned to the process that sent you the current transaction that
380          * is being processed. This is obtained from {@link Binder#getCallingUid()}.
381          */
getCallingUid()382         public int getCallingUid() {
383             return mCallingAttributionSource.getUid();
384         }
385 
386         /**
387          * Gets the permission identity of the calling app. If you want to attribute
388          * the mic access to the calling app you can create an attribution context
389          * via {@link android.content.Context#createContext(android.content.ContextParams)}
390          * and passing this identity to {@link
391          * android.content.ContextParams.Builder#setNextAttributionSource(AttributionSource)}.
392          *
393          * @return The permission identity of the calling app.
394          *
395          * @see android.content.ContextParams.Builder#setNextAttributionSource(
396          * AttributionSource)
397          */
398         @SuppressLint("CallbackMethodName")
getCallingAttributionSource()399         public @NonNull AttributionSource getCallingAttributionSource() {
400             return mCallingAttributionSource;
401         }
402 
getAttributionContextForCaller()403         @NonNull Context getAttributionContextForCaller() {
404             if (mAttributionContext == null) {
405                 mAttributionContext = createContext(new ContextParams.Builder()
406                         .setNextAttributionSource(mCallingAttributionSource)
407                         .build());
408             }
409             return mAttributionContext;
410         }
411     }
412 
413     /** Binder of the recognition service */
414     private static final class RecognitionServiceBinder extends IRecognitionService.Stub {
415         private final WeakReference<RecognitionService> mServiceRef;
416 
RecognitionServiceBinder(RecognitionService service)417         public RecognitionServiceBinder(RecognitionService service) {
418             mServiceRef = new WeakReference<>(service);
419         }
420 
421         @Override
startListening(Intent recognizerIntent, IRecognitionListener listener, @NonNull AttributionSource attributionSource)422         public void startListening(Intent recognizerIntent, IRecognitionListener listener,
423                 @NonNull AttributionSource attributionSource) {
424             Objects.requireNonNull(attributionSource);
425             attributionSource.enforceCallingUid();
426             if (DBG) Log.d(TAG, "startListening called by:" + listener.asBinder());
427             final RecognitionService service = mServiceRef.get();
428             if (service != null) {
429                 service.mHandler.sendMessage(Message.obtain(service.mHandler,
430                         MSG_START_LISTENING, service.new StartListeningArgs(
431                                 recognizerIntent, listener, attributionSource)));
432             }
433         }
434 
435         @Override
stopListening(IRecognitionListener listener)436         public void stopListening(IRecognitionListener listener) {
437             if (DBG) Log.d(TAG, "stopListening called by:" + listener.asBinder());
438             final RecognitionService service = mServiceRef.get();
439             if (service != null) {
440                 service.mHandler.sendMessage(
441                         Message.obtain(service.mHandler, MSG_STOP_LISTENING, listener));
442             }
443         }
444 
445         @Override
cancel(IRecognitionListener listener, boolean isShutdown)446         public void cancel(IRecognitionListener listener, boolean isShutdown) {
447             if (DBG) Log.d(TAG, "cancel called by:" + listener.asBinder());
448             final RecognitionService service = mServiceRef.get();
449             if (service != null) {
450                 service.mHandler.sendMessage(
451                         Message.obtain(service.mHandler, MSG_CANCEL, listener));
452             }
453         }
454 
clearReference()455         public void clearReference() {
456             mServiceRef.clear();
457         }
458     }
459 
checkPermissionAndStartDataDelivery()460     private boolean checkPermissionAndStartDataDelivery() {
461         if (mCurrentCallback.mAttributionContextCreated) {
462             return true;
463         }
464         if (PermissionChecker.checkPermissionAndStartDataDelivery(
465                 RecognitionService.this, Manifest.permission.RECORD_AUDIO,
466                 mCurrentCallback.getAttributionContextForCaller().getAttributionSource(),
467                 /*message*/ null) == PermissionChecker.PERMISSION_GRANTED) {
468             mStartedDataDelivery = true;
469         }
470         return mStartedDataDelivery;
471     }
472 
checkPermissionForPreflight(AttributionSource attributionSource)473     private boolean checkPermissionForPreflight(AttributionSource attributionSource) {
474         return PermissionChecker.checkPermissionForPreflight(RecognitionService.this,
475                 Manifest.permission.RECORD_AUDIO, attributionSource)
476                 == PermissionChecker.PERMISSION_GRANTED;
477     }
478 
finishDataDelivery()479     void finishDataDelivery() {
480         if (mStartedDataDelivery) {
481             mStartedDataDelivery = false;
482             final String op = AppOpsManager.permissionToOp(Manifest.permission.RECORD_AUDIO);
483             PermissionChecker.finishDataDelivery(RecognitionService.this, op,
484                     mCurrentCallback.getAttributionContextForCaller().getAttributionSource());
485         }
486     }
487 }
488