• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.media.soundtrigger;
18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
19 
20 import android.annotation.IntDef;
21 import android.annotation.NonNull;
22 import android.annotation.Nullable;
23 import android.annotation.RequiresPermission;
24 import android.annotation.SystemApi;
25 import android.compat.annotation.UnsupportedAppUsage;
26 import android.hardware.soundtrigger.IRecognitionStatusCallback;
27 import android.hardware.soundtrigger.SoundTrigger;
28 import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30 import android.media.AudioFormat;
31 import android.os.Handler;
32 import android.os.Looper;
33 import android.os.Message;
34 import android.os.ParcelUuid;
35 import android.os.RemoteException;
36 import android.util.Slog;
37 
38 import com.android.internal.app.ISoundTriggerService;
39 
40 import java.io.PrintWriter;
41 import java.lang.annotation.Retention;
42 import java.lang.annotation.RetentionPolicy;
43 import java.util.UUID;
44 
45 /**
46  * A class that allows interaction with the actual sound trigger detection on the system.
47  * Sound trigger detection refers to a detectors that match generic sound patterns that are
48  * not voice-based. The voice-based recognition models should utilize the {@link
49  * VoiceInteractionService} instead. Access to this class is protected by a permission
50  * granted only to system or privileged apps.
51  *
52  * @hide
53  */
54 @SystemApi
55 public final class SoundTriggerDetector {
56     private static final boolean DBG = false;
57     private static final String TAG = "SoundTriggerDetector";
58 
59     private static final int MSG_AVAILABILITY_CHANGED = 1;
60     private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
61     private static final int MSG_DETECTION_ERROR = 3;
62     private static final int MSG_DETECTION_PAUSE = 4;
63     private static final int MSG_DETECTION_RESUME = 5;
64 
65     private final Object mLock = new Object();
66 
67     private final ISoundTriggerService mSoundTriggerService;
68     private final UUID mSoundModelId;
69     private final Callback mCallback;
70     private final Handler mHandler;
71     private final RecognitionCallback mRecognitionCallback;
72 
73     /** @hide */
74     @Retention(RetentionPolicy.SOURCE)
75     @IntDef(flag = true,
76             value = {
77                 RECOGNITION_FLAG_NONE,
78                 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
79                 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS,
80                 RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION,
81                     RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION,
82             })
83     public @interface RecognitionFlags {}
84 
85     /**
86      * Empty flag for {@link #startRecognition(int)}.
87      *
88      *  @hide
89      */
90     public static final int RECOGNITION_FLAG_NONE = 0;
91 
92     /**
93      * Recognition flag for {@link #startRecognition(int)} that indicates
94      * whether the trigger audio for hotword needs to be captured.
95      */
96     public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
97 
98     /**
99      * Recognition flag for {@link #startRecognition(int)} that indicates
100      * whether the recognition should keep going on even after the
101      * model triggers.
102      * If this flag is specified, it's possible to get multiple
103      * triggers after a call to {@link #startRecognition(int)}, if the model
104      * triggers multiple times.
105      * When this isn't specified, the default behavior is to stop recognition once the
106      * trigger happens, till the caller starts recognition again.
107      */
108     public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
109 
110     /**
111      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
112      * if the underlying recognition should use AEC.
113      * This capability may or may not be supported by the system, and support can be queried
114      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
115      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
116      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_ECHO_CANCELLATION}.
117      * If this flag is passed without the audio capability supported, there will be no audio effect
118      * applied.
119      */
120     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION = 0x4;
121 
122     /**
123      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
124      * if the underlying recognition should use noise suppression.
125      * This capability may or may not be supported by the system, and support can be queried
126      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
127      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
128      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_NOISE_SUPPRESSION}.
129      * If this flag is passed without the audio capability supported, there will be no audio effect
130      * applied.
131      */
132     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION = 0x8;
133 
134     /**
135      * Additional payload for {@link Callback#onDetected}.
136      */
137     public static class EventPayload {
138         private final boolean mTriggerAvailable;
139 
140         // Indicates if {@code captureSession} can be used to continue capturing more audio
141         // from the DSP hardware.
142         private final boolean mCaptureAvailable;
143         // The session to use when attempting to capture more audio from the DSP hardware.
144         private final int mCaptureSession;
145         private final AudioFormat mAudioFormat;
146         // Raw data associated with the event.
147         // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
148         private final byte[] mData;
149 
EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)150         private EventPayload(boolean triggerAvailable, boolean captureAvailable,
151                 AudioFormat audioFormat, int captureSession, byte[] data) {
152             mTriggerAvailable = triggerAvailable;
153             mCaptureAvailable = captureAvailable;
154             mCaptureSession = captureSession;
155             mAudioFormat = audioFormat;
156             mData = data;
157         }
158 
159         /**
160          * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
161          * May be null if there's no audio present.
162          */
163         @Nullable
getCaptureAudioFormat()164         public AudioFormat getCaptureAudioFormat() {
165             return mAudioFormat;
166         }
167 
168         /**
169          * Gets the raw audio that triggered the detector.
170          * This may be null if the trigger audio isn't available.
171          * If non-null, the format of the audio can be obtained by calling
172          * {@link #getCaptureAudioFormat()}.
173          *
174          * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
175          */
176         @Nullable
getTriggerAudio()177         public byte[] getTriggerAudio() {
178             if (mTriggerAvailable) {
179                 return mData;
180             } else {
181                 return null;
182             }
183         }
184 
185         /**
186          * Gets the opaque data passed from the detection engine for the event.
187          * This may be null if it was not populated by the engine, or if the data is known to
188          * contain the trigger audio.
189          *
190          * @see #getTriggerAudio
191          *
192          * @hide
193          */
194         @Nullable
195         @UnsupportedAppUsage
getData()196         public byte[] getData() {
197             if (!mTriggerAvailable) {
198                 return mData;
199             } else {
200                 return null;
201             }
202         }
203 
204         /**
205          * Gets the session ID to start a capture from the DSP.
206          * This may be null if streaming capture isn't possible.
207          * If non-null, the format of the audio that can be captured can be
208          * obtained using {@link #getCaptureAudioFormat()}.
209          *
210          * TODO: Candidate for Public API when the API to start capture with a session ID
211          * is made public.
212          *
213          * TODO: Add this to {@link #getCaptureAudioFormat()}:
214          * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
215          * or {@link #getCaptureSession()}. May be null if no audio can be obtained
216          * for either the trigger or a streaming session."
217          *
218          * TODO: Should this return a known invalid value instead?
219          *
220          * @hide
221          */
222         @Nullable
223         @UnsupportedAppUsage
getCaptureSession()224         public Integer getCaptureSession() {
225             if (mCaptureAvailable) {
226                 return mCaptureSession;
227             } else {
228                 return null;
229             }
230         }
231     }
232 
233     public static abstract class Callback {
234         /**
235          * Called when the availability of the sound model changes.
236          */
onAvailabilityChanged(int status)237         public abstract void onAvailabilityChanged(int status);
238 
239         /**
240          * Called when the sound model has triggered (such as when it matched a
241          * given sound pattern).
242          */
onDetected(@onNull EventPayload eventPayload)243         public abstract void onDetected(@NonNull EventPayload eventPayload);
244 
245         /**
246          *  Called when the detection fails due to an error.
247          */
onError()248         public abstract void onError();
249 
250         /**
251          * Called when the recognition is paused temporarily for some reason.
252          * This is an informational callback, and the clients shouldn't be doing anything here
253          * except showing an indication on their UI if they have to.
254          */
onRecognitionPaused()255         public abstract void onRecognitionPaused();
256 
257         /**
258          * Called when the recognition is resumed after it was temporarily paused.
259          * This is an informational callback, and the clients shouldn't be doing anything here
260          * except showing an indication on their UI if they have to.
261          */
onRecognitionResumed()262         public abstract void onRecognitionResumed();
263     }
264 
265     /**
266      * This class should be constructed by the {@link SoundTriggerManager}.
267      * @hide
268      */
SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)269     SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId,
270             @NonNull Callback callback, @Nullable Handler handler) {
271         mSoundTriggerService = soundTriggerService;
272         mSoundModelId = soundModelId;
273         mCallback = callback;
274         if (handler == null) {
275             mHandler = new MyHandler();
276         } else {
277             mHandler = new MyHandler(handler.getLooper());
278         }
279         mRecognitionCallback = new RecognitionCallback();
280     }
281 
282     /**
283      * Starts recognition on the associated sound model. Result is indicated via the
284      * {@link Callback}.
285      * @return Indicates whether the call succeeded or not.
286      */
287     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
startRecognition(@ecognitionFlags int recognitionFlags)288     public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
289         if (DBG) {
290             Slog.d(TAG, "startRecognition()");
291         }
292         boolean captureTriggerAudio =
293                 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
294 
295         boolean allowMultipleTriggers =
296                 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
297 
298         int audioCapabilities = 0;
299         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION) != 0) {
300             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_ECHO_CANCELLATION;
301         }
302         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION) != 0) {
303             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_NOISE_SUPPRESSION;
304         }
305 
306         int status;
307         try {
308             status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId),
309                     mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
310                         allowMultipleTriggers, null, null, audioCapabilities));
311         } catch (RemoteException e) {
312             return false;
313         }
314         return status == STATUS_OK;
315     }
316 
317     /**
318      * Stops recognition for the associated model.
319      */
320     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
stopRecognition()321     public boolean stopRecognition() {
322         int status = STATUS_OK;
323         try {
324             status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId),
325                     mRecognitionCallback);
326         } catch (RemoteException e) {
327             return false;
328         }
329         return status == STATUS_OK;
330     }
331 
332     /**
333      * @hide
334      */
dump(String prefix, PrintWriter pw)335     public void dump(String prefix, PrintWriter pw) {
336         synchronized (mLock) {
337             // TODO: Dump useful debug information.
338         }
339     }
340 
341     /**
342      * Callback that handles events from the lower sound trigger layer.
343      *
344      * Note that these callbacks will be called synchronously from the SoundTriggerService
345      * layer and thus should do minimal work (such as sending a message on a handler to do
346      * the real work).
347      * @hide
348      */
349     private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
350 
351         /**
352          * @hide
353          */
354         @Override
onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)355         public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
356             Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
357             Message.obtain(mHandler,
358                     MSG_SOUND_TRIGGER_DETECTED,
359                     new EventPayload(event.triggerInData, event.captureAvailable,
360                             event.captureFormat, event.captureSession, event.data))
361                     .sendToTarget();
362         }
363 
364         @Override
onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)365         public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
366             Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
367         }
368 
369         /**
370          * @hide
371          */
372         @Override
onError(int status)373         public void onError(int status) {
374             Slog.d(TAG, "onError()" + status);
375             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
376         }
377 
378         /**
379          * @hide
380          */
381         @Override
onRecognitionPaused()382         public void onRecognitionPaused() {
383             Slog.d(TAG, "onRecognitionPaused()");
384             mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
385         }
386 
387         /**
388          * @hide
389          */
390         @Override
onRecognitionResumed()391         public void onRecognitionResumed() {
392             Slog.d(TAG, "onRecognitionResumed()");
393             mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
394         }
395     }
396 
397     private class MyHandler extends Handler {
398 
MyHandler()399         MyHandler() {
400             super();
401         }
402 
MyHandler(Looper looper)403         MyHandler(Looper looper) {
404             super(looper);
405         }
406 
407         @Override
handleMessage(Message msg)408         public void handleMessage(Message msg) {
409             if (mCallback == null) {
410                   Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
411                   return;
412             }
413             switch (msg.what) {
414                 case MSG_SOUND_TRIGGER_DETECTED:
415                     mCallback.onDetected((EventPayload) msg.obj);
416                     break;
417                 case MSG_DETECTION_ERROR:
418                     mCallback.onError();
419                     break;
420                 case MSG_DETECTION_PAUSE:
421                     mCallback.onRecognitionPaused();
422                     break;
423                 case MSG_DETECTION_RESUME:
424                     mCallback.onRecognitionResumed();
425                     break;
426                 default:
427                     super.handleMessage(msg);
428 
429             }
430         }
431     }
432 }
433