1 /** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.media.soundtrigger; 18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK; 19 20 import android.annotation.IntDef; 21 import android.annotation.NonNull; 22 import android.annotation.Nullable; 23 import android.annotation.RequiresPermission; 24 import android.annotation.SystemApi; 25 import android.compat.annotation.UnsupportedAppUsage; 26 import android.hardware.soundtrigger.IRecognitionStatusCallback; 27 import android.hardware.soundtrigger.SoundTrigger; 28 import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 29 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 30 import android.media.AudioFormat; 31 import android.os.Handler; 32 import android.os.Looper; 33 import android.os.Message; 34 import android.os.ParcelUuid; 35 import android.os.RemoteException; 36 import android.util.Slog; 37 38 import com.android.internal.app.ISoundTriggerService; 39 40 import java.io.PrintWriter; 41 import java.lang.annotation.Retention; 42 import java.lang.annotation.RetentionPolicy; 43 import java.util.UUID; 44 45 /** 46 * A class that allows interaction with the actual sound trigger detection on the system. 47 * Sound trigger detection refers to a detectors that match generic sound patterns that are 48 * not voice-based. The voice-based recognition models should utilize the {@link 49 * VoiceInteractionService} instead. Access to this class is protected by a permission 50 * granted only to system or privileged apps. 51 * 52 * @hide 53 */ 54 @SystemApi 55 public final class SoundTriggerDetector { 56 private static final boolean DBG = false; 57 private static final String TAG = "SoundTriggerDetector"; 58 59 private static final int MSG_AVAILABILITY_CHANGED = 1; 60 private static final int MSG_SOUND_TRIGGER_DETECTED = 2; 61 private static final int MSG_DETECTION_ERROR = 3; 62 private static final int MSG_DETECTION_PAUSE = 4; 63 private static final int MSG_DETECTION_RESUME = 5; 64 65 private final Object mLock = new Object(); 66 67 private final ISoundTriggerService mSoundTriggerService; 68 private final UUID mSoundModelId; 69 private final Callback mCallback; 70 private final Handler mHandler; 71 private final RecognitionCallback mRecognitionCallback; 72 73 /** @hide */ 74 @Retention(RetentionPolicy.SOURCE) 75 @IntDef(flag = true, 76 value = { 77 RECOGNITION_FLAG_NONE, 78 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 79 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS, 80 RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION, 81 RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION, 82 }) 83 public @interface RecognitionFlags {} 84 85 /** 86 * Empty flag for {@link #startRecognition(int)}. 87 * 88 * @hide 89 */ 90 public static final int RECOGNITION_FLAG_NONE = 0; 91 92 /** 93 * Recognition flag for {@link #startRecognition(int)} that indicates 94 * whether the trigger audio for hotword needs to be captured. 95 */ 96 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 97 98 /** 99 * Recognition flag for {@link #startRecognition(int)} that indicates 100 * whether the recognition should keep going on even after the 101 * model triggers. 102 * If this flag is specified, it's possible to get multiple 103 * triggers after a call to {@link #startRecognition(int)}, if the model 104 * triggers multiple times. 105 * When this isn't specified, the default behavior is to stop recognition once the 106 * trigger happens, till the caller starts recognition again. 107 */ 108 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 109 110 /** 111 * Audio capabilities flag for {@link #startRecognition(int)} that indicates 112 * if the underlying recognition should use AEC. 113 * This capability may or may not be supported by the system, and support can be queried 114 * by calling {@link SoundTriggerManager#getModuleProperties()} and checking 115 * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for 116 * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_ECHO_CANCELLATION}. 117 * If this flag is passed without the audio capability supported, there will be no audio effect 118 * applied. 119 */ 120 public static final int RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION = 0x4; 121 122 /** 123 * Audio capabilities flag for {@link #startRecognition(int)} that indicates 124 * if the underlying recognition should use noise suppression. 125 * This capability may or may not be supported by the system, and support can be queried 126 * by calling {@link SoundTriggerManager#getModuleProperties()} and checking 127 * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for 128 * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_NOISE_SUPPRESSION}. 129 * If this flag is passed without the audio capability supported, there will be no audio effect 130 * applied. 131 */ 132 public static final int RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION = 0x8; 133 134 /** 135 * Additional payload for {@link Callback#onDetected}. 136 */ 137 public static class EventPayload { 138 private final boolean mTriggerAvailable; 139 140 // Indicates if {@code captureSession} can be used to continue capturing more audio 141 // from the DSP hardware. 142 private final boolean mCaptureAvailable; 143 // The session to use when attempting to capture more audio from the DSP hardware. 144 private final int mCaptureSession; 145 private final AudioFormat mAudioFormat; 146 // Raw data associated with the event. 147 // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. 148 private final byte[] mData; 149 EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)150 private EventPayload(boolean triggerAvailable, boolean captureAvailable, 151 AudioFormat audioFormat, int captureSession, byte[] data) { 152 mTriggerAvailable = triggerAvailable; 153 mCaptureAvailable = captureAvailable; 154 mCaptureSession = captureSession; 155 mAudioFormat = audioFormat; 156 mData = data; 157 } 158 159 /** 160 * Gets the format of the audio obtained using {@link #getTriggerAudio()}. 161 * May be null if there's no audio present. 162 */ 163 @Nullable getCaptureAudioFormat()164 public AudioFormat getCaptureAudioFormat() { 165 return mAudioFormat; 166 } 167 168 /** 169 * Gets the raw audio that triggered the detector. 170 * This may be null if the trigger audio isn't available. 171 * If non-null, the format of the audio can be obtained by calling 172 * {@link #getCaptureAudioFormat()}. 173 * 174 * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO 175 */ 176 @Nullable getTriggerAudio()177 public byte[] getTriggerAudio() { 178 if (mTriggerAvailable) { 179 return mData; 180 } else { 181 return null; 182 } 183 } 184 185 /** 186 * Gets the opaque data passed from the detection engine for the event. 187 * This may be null if it was not populated by the engine, or if the data is known to 188 * contain the trigger audio. 189 * 190 * @see #getTriggerAudio 191 * 192 * @hide 193 */ 194 @Nullable 195 @UnsupportedAppUsage getData()196 public byte[] getData() { 197 if (!mTriggerAvailable) { 198 return mData; 199 } else { 200 return null; 201 } 202 } 203 204 /** 205 * Gets the session ID to start a capture from the DSP. 206 * This may be null if streaming capture isn't possible. 207 * If non-null, the format of the audio that can be captured can be 208 * obtained using {@link #getCaptureAudioFormat()}. 209 * 210 * TODO: Candidate for Public API when the API to start capture with a session ID 211 * is made public. 212 * 213 * TODO: Add this to {@link #getCaptureAudioFormat()}: 214 * "Gets the format of the audio obtained using {@link #getTriggerAudio()} 215 * or {@link #getCaptureSession()}. May be null if no audio can be obtained 216 * for either the trigger or a streaming session." 217 * 218 * TODO: Should this return a known invalid value instead? 219 * 220 * @hide 221 */ 222 @Nullable 223 @UnsupportedAppUsage getCaptureSession()224 public Integer getCaptureSession() { 225 if (mCaptureAvailable) { 226 return mCaptureSession; 227 } else { 228 return null; 229 } 230 } 231 } 232 233 public static abstract class Callback { 234 /** 235 * Called when the availability of the sound model changes. 236 */ onAvailabilityChanged(int status)237 public abstract void onAvailabilityChanged(int status); 238 239 /** 240 * Called when the sound model has triggered (such as when it matched a 241 * given sound pattern). 242 */ onDetected(@onNull EventPayload eventPayload)243 public abstract void onDetected(@NonNull EventPayload eventPayload); 244 245 /** 246 * Called when the detection fails due to an error. 247 */ onError()248 public abstract void onError(); 249 250 /** 251 * Called when the recognition is paused temporarily for some reason. 252 * This is an informational callback, and the clients shouldn't be doing anything here 253 * except showing an indication on their UI if they have to. 254 */ onRecognitionPaused()255 public abstract void onRecognitionPaused(); 256 257 /** 258 * Called when the recognition is resumed after it was temporarily paused. 259 * This is an informational callback, and the clients shouldn't be doing anything here 260 * except showing an indication on their UI if they have to. 261 */ onRecognitionResumed()262 public abstract void onRecognitionResumed(); 263 } 264 265 /** 266 * This class should be constructed by the {@link SoundTriggerManager}. 267 * @hide 268 */ SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)269 SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, 270 @NonNull Callback callback, @Nullable Handler handler) { 271 mSoundTriggerService = soundTriggerService; 272 mSoundModelId = soundModelId; 273 mCallback = callback; 274 if (handler == null) { 275 mHandler = new MyHandler(); 276 } else { 277 mHandler = new MyHandler(handler.getLooper()); 278 } 279 mRecognitionCallback = new RecognitionCallback(); 280 } 281 282 /** 283 * Starts recognition on the associated sound model. Result is indicated via the 284 * {@link Callback}. 285 * @return Indicates whether the call succeeded or not. 286 */ 287 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) startRecognition(@ecognitionFlags int recognitionFlags)288 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 289 if (DBG) { 290 Slog.d(TAG, "startRecognition()"); 291 } 292 boolean captureTriggerAudio = 293 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 294 295 boolean allowMultipleTriggers = 296 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 297 298 int audioCapabilities = 0; 299 if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION) != 0) { 300 audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_ECHO_CANCELLATION; 301 } 302 if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION) != 0) { 303 audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_NOISE_SUPPRESSION; 304 } 305 306 int status; 307 try { 308 status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId), 309 mRecognitionCallback, new RecognitionConfig(captureTriggerAudio, 310 allowMultipleTriggers, null, null, audioCapabilities)); 311 } catch (RemoteException e) { 312 return false; 313 } 314 return status == STATUS_OK; 315 } 316 317 /** 318 * Stops recognition for the associated model. 319 */ 320 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) stopRecognition()321 public boolean stopRecognition() { 322 int status = STATUS_OK; 323 try { 324 status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId), 325 mRecognitionCallback); 326 } catch (RemoteException e) { 327 return false; 328 } 329 return status == STATUS_OK; 330 } 331 332 /** 333 * @hide 334 */ dump(String prefix, PrintWriter pw)335 public void dump(String prefix, PrintWriter pw) { 336 synchronized (mLock) { 337 // TODO: Dump useful debug information. 338 } 339 } 340 341 /** 342 * Callback that handles events from the lower sound trigger layer. 343 * 344 * Note that these callbacks will be called synchronously from the SoundTriggerService 345 * layer and thus should do minimal work (such as sending a message on a handler to do 346 * the real work). 347 * @hide 348 */ 349 private class RecognitionCallback extends IRecognitionStatusCallback.Stub { 350 351 /** 352 * @hide 353 */ 354 @Override onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)355 public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) { 356 Slog.d(TAG, "onGenericSoundTriggerDetected()" + event); 357 Message.obtain(mHandler, 358 MSG_SOUND_TRIGGER_DETECTED, 359 new EventPayload(event.triggerInData, event.captureAvailable, 360 event.captureFormat, event.captureSession, event.data)) 361 .sendToTarget(); 362 } 363 364 @Override onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)365 public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) { 366 Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event); 367 } 368 369 /** 370 * @hide 371 */ 372 @Override onError(int status)373 public void onError(int status) { 374 Slog.d(TAG, "onError()" + status); 375 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 376 } 377 378 /** 379 * @hide 380 */ 381 @Override onRecognitionPaused()382 public void onRecognitionPaused() { 383 Slog.d(TAG, "onRecognitionPaused()"); 384 mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE); 385 } 386 387 /** 388 * @hide 389 */ 390 @Override onRecognitionResumed()391 public void onRecognitionResumed() { 392 Slog.d(TAG, "onRecognitionResumed()"); 393 mHandler.sendEmptyMessage(MSG_DETECTION_RESUME); 394 } 395 } 396 397 private class MyHandler extends Handler { 398 MyHandler()399 MyHandler() { 400 super(); 401 } 402 MyHandler(Looper looper)403 MyHandler(Looper looper) { 404 super(looper); 405 } 406 407 @Override handleMessage(Message msg)408 public void handleMessage(Message msg) { 409 if (mCallback == null) { 410 Slog.w(TAG, "Received message: " + msg.what + " for NULL callback."); 411 return; 412 } 413 switch (msg.what) { 414 case MSG_SOUND_TRIGGER_DETECTED: 415 mCallback.onDetected((EventPayload) msg.obj); 416 break; 417 case MSG_DETECTION_ERROR: 418 mCallback.onError(); 419 break; 420 case MSG_DETECTION_PAUSE: 421 mCallback.onRecognitionPaused(); 422 break; 423 case MSG_DETECTION_RESUME: 424 mCallback.onRecognitionResumed(); 425 break; 426 default: 427 super.handleMessage(msg); 428 429 } 430 } 431 } 432 } 433