1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.voiceinteraction.service; 18 19 import static android.media.AudioFormat.CHANNEL_IN_FRONT; 20 21 import android.media.AudioAttributes; 22 import android.media.AudioFormat; 23 import android.media.AudioRecord; 24 import android.media.MediaRecorder; 25 import android.os.Handler; 26 import android.os.Looper; 27 import android.os.ParcelFileDescriptor; 28 import android.os.PersistableBundle; 29 import android.os.Process; 30 import android.os.SharedMemory; 31 import android.os.SystemClock; 32 import android.service.voice.AlwaysOnHotwordDetector; 33 import android.service.voice.HotwordDetectedResult; 34 import android.service.voice.HotwordDetectionService; 35 import android.service.voice.HotwordRejectedResult; 36 import android.service.voice.SandboxedDetectionInitializer; 37 import android.system.ErrnoException; 38 import android.text.TextUtils; 39 import android.util.Log; 40 import android.voiceinteraction.common.Utils; 41 42 import androidx.annotation.NonNull; 43 import androidx.annotation.Nullable; 44 45 import java.io.IOException; 46 import java.io.InputStream; 47 import java.util.function.IntConsumer; 48 49 import javax.annotation.concurrent.GuardedBy; 50 51 public class MainHotwordDetectionService extends HotwordDetectionService { 52 static final String TAG = "MainHotwordDetectionService"; 53 54 public static byte[] FAKE_HOTWORD_AUDIO_DATA = 55 new byte[]{'h', 'o', 't', 'w', 'o', 'r', 'd', '!'}; 56 public static String KEY_FAKE_DATA = "fakeData"; 57 public static String VALUE_FAKE_DATA = "fakeData"; 58 public static final int DEFAULT_PHRASE_ID = 5; 59 public static final HotwordDetectedResult DETECTED_RESULT = 60 new HotwordDetectedResult.Builder() 61 .setAudioChannel(CHANNEL_IN_FRONT) 62 .setConfidenceLevel(HotwordDetectedResult.CONFIDENCE_LEVEL_HIGH) 63 .setHotwordDetectionPersonalized(true) 64 .setHotwordDurationMillis(1000) 65 .setHotwordOffsetMillis(500) 66 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 67 .setPersonalizedScore(10) 68 .setScore(15) 69 .setBackgroundAudioPower(50) 70 .build(); 71 public static final HotwordDetectedResult DETECTED_RESULT_AFTER_STOP_DETECTION = 72 new HotwordDetectedResult.Builder() 73 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 74 .setScore(57) 75 .build(); 76 public static final HotwordDetectedResult DETECTED_RESULT_FOR_MIC_FAILURE = 77 new HotwordDetectedResult.Builder() 78 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 79 .setScore(58) 80 .build(); 81 public static final HotwordRejectedResult REJECTED_RESULT = 82 new HotwordRejectedResult.Builder() 83 .setConfidenceLevel(HotwordRejectedResult.CONFIDENCE_LEVEL_MEDIUM) 84 .build(); 85 86 @NonNull 87 private final Object mLock = new Object(); 88 private Handler mHandler; 89 @GuardedBy("mLock") 90 private boolean mStopDetectionCalled; 91 @GuardedBy("mLock") 92 private int mDetectionDelayMs = 0; 93 private long mServiceCreatedTimeMillis = -1; 94 95 @GuardedBy("mLock") 96 @Nullable 97 private Runnable mDetectionJob; 98 99 private boolean mIsTestUnexpectedCallback; 100 101 private boolean mIsTestAudioEgress; 102 103 /** 104 * It only works when {@link #mIsTestAudioEgress} is true 105 */ 106 private boolean mUseIllegalAudioEgressCopyBufferSize; 107 108 private boolean mIsNoNeedActionDuringDetection; 109 110 private boolean mCheckAudioDataIsNotZero; 111 112 @Override onCreate()113 public void onCreate() { 114 super.onCreate(); 115 mHandler = Handler.createAsync(Looper.getMainLooper()); 116 mServiceCreatedTimeMillis = SystemClock.elapsedRealtime(); 117 Log.d(TAG, "onCreate"); 118 } 119 120 @Override onDetect(@onNull AlwaysOnHotwordDetector.EventPayload eventPayload, long timeoutMillis, @NonNull Callback callback)121 public void onDetect(@NonNull AlwaysOnHotwordDetector.EventPayload eventPayload, 122 long timeoutMillis, @NonNull Callback callback) { 123 Log.d(TAG, "onDetect for DSP source"); 124 125 if (mIsNoNeedActionDuringDetection) { 126 mIsNoNeedActionDuringDetection = false; 127 return; 128 } 129 130 if (!canReadAudio()) { 131 callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE); 132 return; 133 } 134 135 // TODO: Check the capture session (needs to be reflectively accessed). 136 byte[] data = eventPayload.getData(); 137 if (data != null && data.length > 0) { 138 if (mIsTestUnexpectedCallback) { 139 Log.d(TAG, "callback onDetected twice"); 140 callback.onDetected(DETECTED_RESULT); 141 callback.onDetected(DETECTED_RESULT); 142 mIsTestUnexpectedCallback = false; 143 return; 144 } 145 146 // Create the unaccepted HotwordDetectedResult first to test the protection in the 147 // onDetected callback function of HotwordDetectionService. When the bundle data of 148 // HotwordDetectedResult is larger than max bundle size, it will throw the 149 // IllegalArgumentException. 150 PersistableBundle persistableBundle = new PersistableBundle(); 151 HotwordDetectedResult hotwordDetectedResult = 152 new HotwordDetectedResult.Builder() 153 .setHotwordPhraseId(eventPayload.getKeyphraseRecognitionExtras().get( 154 0).getKeyphraseId()) 155 .setExtras(persistableBundle) 156 .build(); 157 int key = 0; 158 do { 159 persistableBundle.putInt(Integer.toString(key), 0); 160 key++; 161 } while (Utils.getParcelableSize(persistableBundle) 162 <= HotwordDetectedResult.getMaxBundleSize()); 163 164 synchronized (mLock) { 165 mHandler.postDelayed(() -> { 166 try { 167 if (mIsTestAudioEgress) { 168 if (mUseIllegalAudioEgressCopyBufferSize) { 169 callback.onDetected( 170 Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE); 171 } else { 172 callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT); 173 } 174 } else { 175 callback.onDetected(hotwordDetectedResult); 176 } 177 } catch (IllegalArgumentException e) { 178 callback.onDetected(DETECTED_RESULT); 179 } 180 }, mDetectionDelayMs); 181 } 182 } else { 183 callback.onRejected(REJECTED_RESULT); 184 if (mIsTestUnexpectedCallback) { 185 Log.d(TAG, "callback onRejected again"); 186 callback.onRejected(REJECTED_RESULT); 187 mIsTestUnexpectedCallback = false; 188 } 189 } 190 } 191 192 @Override onDetect( @onNull ParcelFileDescriptor audioStream, @NonNull AudioFormat audioFormat, @Nullable PersistableBundle options, @NonNull Callback callback)193 public void onDetect( 194 @NonNull ParcelFileDescriptor audioStream, 195 @NonNull AudioFormat audioFormat, 196 @Nullable PersistableBundle options, 197 @NonNull Callback callback) { 198 Log.d(TAG, "onDetect for external source"); 199 200 if (callback == null) { 201 Log.w(TAG, "callback is null"); 202 return; 203 } 204 if (audioStream == null) { 205 Log.w(TAG, "audioStream is null"); 206 return; 207 } 208 if (options != null) { 209 if (options.getBoolean(Utils.KEY_DETECTION_REJECTED, false)) { 210 Log.d(TAG, "Call onRejected for external source"); 211 callback.onRejected(REJECTED_RESULT); 212 return; 213 } 214 } 215 216 long startTime = System.currentTimeMillis(); 217 try (InputStream fis = 218 new ParcelFileDescriptor.AutoCloseInputStream(audioStream)) { 219 220 // We added the fake audio data and set "hotword!" string at the head. Then we simulated 221 // to verify the audio data with "hotword!" in HotwordDetectionService. If the audio 222 // data includes "hotword!", it means that the hotword is valid. 223 while (fis.available() < 8) { 224 try { 225 Thread.sleep(10); 226 } catch (InterruptedException e) { 227 // Nothing 228 } 229 if (System.currentTimeMillis() - startTime > 3000) { 230 Log.w(TAG, "Over timeout"); 231 return; 232 } 233 } 234 Log.d(TAG, "fis.available() = " + fis.available()); 235 byte[] buffer = new byte[8]; 236 fis.read(buffer, 0, 8); 237 if (isSame(buffer, FAKE_HOTWORD_AUDIO_DATA, 238 buffer.length)) { 239 Log.d(TAG, "call callback.onDetected"); 240 if (mIsTestAudioEgress) { 241 if (mUseIllegalAudioEgressCopyBufferSize) { 242 callback.onDetected( 243 Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE); 244 } else { 245 callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT); 246 } 247 } else { 248 callback.onDetected(DETECTED_RESULT); 249 } 250 } 251 } catch (IOException e) { 252 Log.w(TAG, "Failed to read data : ", e); 253 } 254 } 255 256 @Override onDetect(@onNull Callback callback)257 public void onDetect(@NonNull Callback callback) { 258 Log.d(TAG, "onDetect for Mic source"); 259 synchronized (mLock) { 260 if (mDetectionJob != null) { 261 throw new IllegalStateException("onDetect called while already detecting"); 262 } 263 if (!mStopDetectionCalled) { 264 // Delaying this allows us to test other flows, such as stopping detection. It's 265 // also more realistic to schedule it onto another thread. 266 mDetectionJob = () -> { 267 Log.d(TAG, "Sending detected result"); 268 if (mIsTestUnexpectedCallback) { 269 Log.d(TAG, "callback onDetected twice"); 270 callback.onDetected(DETECTED_RESULT); 271 callback.onDetected(DETECTED_RESULT); 272 mIsTestUnexpectedCallback = false; 273 return; 274 } 275 if (canReadAudio()) { 276 if (mIsTestAudioEgress) { 277 if (mUseIllegalAudioEgressCopyBufferSize) { 278 callback.onDetected( 279 Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE); 280 } else { 281 callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT); 282 } 283 } else { 284 callback.onDetected(DETECTED_RESULT); 285 } 286 } else { 287 callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE); 288 } 289 }; 290 mHandler.postDelayed(mDetectionJob, 1500); 291 } else { 292 Log.d(TAG, "Sending detected result after stop detection"); 293 // We can't store and use this callback in onStopDetection (not valid anymore 294 // there), so instead we trigger detection again to report the event. 295 callback.onDetected(DETECTED_RESULT_AFTER_STOP_DETECTION); 296 } 297 } 298 } 299 300 @Override onStopDetection()301 public void onStopDetection() { 302 super.onStopDetection(); 303 Log.d(TAG, "onStopDetection"); 304 synchronized (mLock) { 305 mHandler.removeCallbacks(mDetectionJob); 306 mDetectionJob = null; 307 mStopDetectionCalled = true; 308 } 309 } 310 311 @Override onUpdateState( @ullable PersistableBundle options, @Nullable SharedMemory sharedMemory, long callbackTimeoutMillis, @Nullable IntConsumer statusCallback)312 public void onUpdateState( 313 @Nullable PersistableBundle options, 314 @Nullable SharedMemory sharedMemory, 315 long callbackTimeoutMillis, 316 @Nullable IntConsumer statusCallback) { 317 super.onUpdateState(options, sharedMemory, callbackTimeoutMillis, statusCallback); 318 Log.d(TAG, "onUpdateState"); 319 320 // Reset mDetectionJob and mStopDetectionCalled when service is initializing. 321 synchronized (mLock) { 322 // When the service is initializing, the statusCallback will be not null. 323 if (statusCallback != null) { 324 if (mDetectionJob != null) { 325 Log.d(TAG, "onUpdateState mDetectionJob is not null"); 326 mHandler.removeCallbacks(mDetectionJob); 327 mDetectionJob = null; 328 } 329 mStopDetectionCalled = false; 330 331 if (options != null) { 332 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 333 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_OVER_MAX_INIT_STATUS) { 334 Log.d(TAG, "send over the max custom initialization status"); 335 final int initializationStatus = 336 SandboxedDetectionInitializer.getMaxCustomInitializationStatus(); 337 try { 338 statusCallback.accept(initializationStatus + 1); 339 } catch (IllegalArgumentException ex) { 340 Log.d(TAG, "expect to get IllegalArgumentException here"); 341 statusCallback.accept(initializationStatus); 342 } 343 return; 344 } else if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 345 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_CUSTOM_INIT_STATUS) { 346 Log.d(TAG, "send custom initialization status"); 347 statusCallback.accept(options.getInt(Utils.KEY_INITIALIZATION_STATUS, -1)); 348 return; 349 } else if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 350 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_SUCCESS_IF_CREATED_AFTER) { 351 // verify that the HotwordDetectionService was created after the timestamp 352 // passed in via the options parameter 353 long optionsTimestampMillis = options.getLong(Utils.KEY_TIMESTAMP_MILLIS, 354 -1); 355 Log.d(TAG, "send initialization success if created after: " 356 + optionsTimestampMillis 357 + ", onCreate timestamp=" + mServiceCreatedTimeMillis); 358 statusCallback.accept((mServiceCreatedTimeMillis > optionsTimestampMillis) 359 ? SandboxedDetectionInitializer.INITIALIZATION_STATUS_SUCCESS 360 : SandboxedDetectionInitializer.getMaxCustomInitializationStatus()); 361 return; 362 } 363 } 364 } 365 if (options != null) { 366 mDetectionDelayMs = options.getInt(Utils.KEY_DETECTION_DELAY_MS, 0); 367 368 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 369 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_CLEAR_SOFTWARE_DETECTION_JOB) { 370 Log.d(TAG, "options : Clear software detection job"); 371 if (mDetectionJob != null) { 372 Log.d(TAG, "Clear mDetectionJob"); 373 mHandler.removeCallbacks(mDetectionJob); 374 mDetectionJob = null; 375 } 376 return; 377 } 378 } 379 } 380 381 if (options != null) { 382 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 383 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ON_UPDATE_STATE_CRASH) { 384 Log.d(TAG, "Crash itself. Pid: " + Process.myPid()); 385 Process.killProcess(Process.myPid()); 386 return; 387 } 388 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 389 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ON_UPDATE_STATE_UNEXPECTED_CALLBACK) { 390 Log.d(TAG, "options : Test unexpected callback"); 391 mIsTestUnexpectedCallback = true; 392 return; 393 } 394 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 395 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ENABLE_AUDIO_EGRESS) { 396 mUseIllegalAudioEgressCopyBufferSize = options.getBoolean( 397 Utils.KEY_AUDIO_EGRESS_USE_ILLEGAL_COPY_BUFFER_SIZE, 398 /* defaultValue= */ false); 399 Log.d(TAG, "options : Test audio egress use illegal copy buffer size = " 400 + mUseIllegalAudioEgressCopyBufferSize); 401 mIsTestAudioEgress = true; 402 return; 403 } 404 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 405 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_NO_NEED_ACTION_DURING_DETECTION) { 406 Log.d(TAG, "options : Test no need action during detection"); 407 mIsNoNeedActionDuringDetection = true; 408 return; 409 } 410 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 411 == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_CAN_READ_AUDIO_DATA_IS_NOT_ZERO) { 412 Log.d(TAG, "options : Test can read audio, and data is not zero"); 413 mCheckAudioDataIsNotZero = true; 414 return; 415 } 416 417 String fakeData = options.getString(KEY_FAKE_DATA); 418 if (!TextUtils.equals(fakeData, VALUE_FAKE_DATA)) { 419 Log.d(TAG, "options : data is not the same"); 420 return; 421 } 422 } 423 424 if (sharedMemory != null) { 425 try { 426 sharedMemory.mapReadWrite(); 427 Log.d(TAG, "sharedMemory : is not read-only"); 428 return; 429 } catch (ErrnoException e) { 430 // For read-only case 431 } finally { 432 sharedMemory.close(); 433 } 434 } 435 436 // Report success 437 Log.d(TAG, "onUpdateState success"); 438 if (statusCallback != null) { 439 statusCallback.accept(INITIALIZATION_STATUS_SUCCESS); 440 } 441 } 442 isSame(byte[] array1, byte[] array2, int length)443 private boolean isSame(byte[] array1, byte[] array2, int length) { 444 if (length <= 0) { 445 return false; 446 } 447 if (array1 == null || array2 == null || array1.length < length || array2.length < length) { 448 return false; 449 } 450 for (int i = 0; i < length; i++) { 451 if (array1[i] != array2[i]) { 452 return false; 453 } 454 } 455 return true; 456 } 457 canReadAudio()458 private boolean canReadAudio() { 459 int bytesPerSample = 2; // for ENCODING_PCM_16BIT 460 int sampleRate = 16000; 461 int bytesPerSecond = bytesPerSample * sampleRate; // for single channel 462 AudioRecord record = 463 new AudioRecord.Builder() 464 .setAudioAttributes( 465 new AudioAttributes.Builder() 466 .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) 467 .build()) 468 .setAudioFormat( 469 new AudioFormat.Builder() 470 .setChannelMask(AudioFormat.CHANNEL_IN_MONO) 471 .setEncoding(AudioFormat.ENCODING_PCM_16BIT) 472 .setSampleRate(sampleRate) 473 .build()) 474 .setBufferSizeInBytes(bytesPerSecond) 475 .build(); 476 if (record.getState() != AudioRecord.STATE_INITIALIZED) { 477 Log.e(TAG, "Failed to initialize AudioRecord"); 478 record.release(); 479 return false; 480 } 481 482 record.startRecording(); 483 try { 484 byte[] buffer = new byte[bytesPerSecond]; // read 1 second of audio 485 int numBytes = 0; 486 while (numBytes < buffer.length) { 487 int bytesRead = 488 record.read(buffer, numBytes, Math.min(1024, buffer.length - numBytes)); 489 if (bytesRead < 0) { 490 Log.e(TAG, "Error reading from mic: " + bytesRead); 491 return false; 492 } 493 numBytes += bytesRead; 494 } 495 // The audio data will be zero on virtual device, so it would be better to skip to 496 // check the audio data. 497 if (Utils.isVirtualDevice()) { 498 return true; 499 } 500 if (mCheckAudioDataIsNotZero) { 501 for (byte b : buffer) { 502 // TODO: Maybe check that some portion of the bytes are non-zero. 503 if (b != 0) { 504 return true; 505 } 506 } 507 Log.d(TAG, "All data are zero"); 508 return false; 509 } 510 511 return true; 512 } finally { 513 record.release(); 514 } 515 } 516 } 517