1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.voiceinteraction.service; 18 19 import static android.media.AudioFormat.CHANNEL_IN_FRONT; 20 21 import android.media.AudioAttributes; 22 import android.media.AudioFormat; 23 import android.media.AudioRecord; 24 import android.media.MediaRecorder; 25 import android.os.Handler; 26 import android.os.Looper; 27 import android.os.ParcelFileDescriptor; 28 import android.os.PersistableBundle; 29 import android.os.Process; 30 import android.os.SharedMemory; 31 import android.service.voice.AlwaysOnHotwordDetector; 32 import android.service.voice.HotwordDetectedResult; 33 import android.service.voice.HotwordDetectionService; 34 import android.service.voice.HotwordRejectedResult; 35 import android.system.ErrnoException; 36 import android.text.TextUtils; 37 import android.util.Log; 38 import android.voiceinteraction.common.Utils; 39 40 import androidx.annotation.NonNull; 41 import androidx.annotation.Nullable; 42 43 import java.io.IOException; 44 import java.io.InputStream; 45 import java.util.function.IntConsumer; 46 47 import javax.annotation.concurrent.GuardedBy; 48 49 public class MainHotwordDetectionService extends HotwordDetectionService { 50 static final String TAG = "MainHotwordDetectionService"; 51 52 public static final int DEFAULT_PHRASE_ID = 5; 53 public static final HotwordDetectedResult DETECTED_RESULT = 54 new HotwordDetectedResult.Builder() 55 .setAudioChannel(CHANNEL_IN_FRONT) 56 .setConfidenceLevel(HotwordDetectedResult.CONFIDENCE_LEVEL_HIGH) 57 .setHotwordDetectionPersonalized(true) 58 .setHotwordDurationMillis(1000) 59 .setHotwordOffsetMillis(500) 60 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 61 .setPersonalizedScore(10) 62 .setScore(15) 63 .build(); 64 public static final HotwordDetectedResult DETECTED_RESULT_AFTER_STOP_DETECTION = 65 new HotwordDetectedResult.Builder() 66 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 67 .setScore(57) 68 .build(); 69 public static final HotwordDetectedResult DETECTED_RESULT_FOR_MIC_FAILURE = 70 new HotwordDetectedResult.Builder() 71 .setHotwordPhraseId(DEFAULT_PHRASE_ID) 72 .setScore(58) 73 .build(); 74 public static final HotwordRejectedResult REJECTED_RESULT = 75 new HotwordRejectedResult.Builder() 76 .setConfidenceLevel(HotwordRejectedResult.CONFIDENCE_LEVEL_MEDIUM) 77 .build(); 78 @NonNull 79 private final Object mLock = new Object(); 80 private Handler mHandler; 81 @GuardedBy("mLock") 82 private boolean mStopDetectionCalled; 83 84 @GuardedBy("mLock") 85 @Nullable 86 private Runnable mDetectionJob; 87 88 @Override onCreate()89 public void onCreate() { 90 super.onCreate(); 91 mHandler = Handler.createAsync(Looper.getMainLooper()); 92 Log.d(TAG, "onCreate"); 93 } 94 95 @Override onDetect(@onNull AlwaysOnHotwordDetector.EventPayload eventPayload, long timeoutMillis, @NonNull Callback callback)96 public void onDetect(@NonNull AlwaysOnHotwordDetector.EventPayload eventPayload, 97 long timeoutMillis, @NonNull Callback callback) { 98 Log.d(TAG, "onDetect for DSP source"); 99 100 if (!canReadAudio()) { 101 callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE); 102 return; 103 } 104 105 // TODO: Check the capture session (needs to be reflectively accessed). 106 byte[] data = eventPayload.getTriggerAudio(); 107 if (data != null && data.length > 0) { 108 // Create the unaccepted HotwordDetectedResult first to test the protection in the 109 // onDetected callback function of HotwordDetectionService. When the bundle data of 110 // HotwordDetectedResult is larger than max bundle size, it will throw the 111 // IllegalArgumentException. 112 PersistableBundle persistableBundle = new PersistableBundle(); 113 HotwordDetectedResult hotwordDetectedResult = 114 new HotwordDetectedResult.Builder() 115 .setHotwordPhraseId(eventPayload.getKeyphraseRecognitionExtras().get( 116 0).getKeyphraseId()) 117 .setExtras(persistableBundle) 118 .build(); 119 int key = 0; 120 do { 121 persistableBundle.putInt(Integer.toString(key), 0); 122 key++; 123 } while (Utils.getParcelableSize(persistableBundle) 124 <= HotwordDetectedResult.getMaxBundleSize()); 125 126 try { 127 callback.onDetected(hotwordDetectedResult); 128 } catch (IllegalArgumentException e) { 129 callback.onDetected(DETECTED_RESULT); 130 } 131 } else { 132 callback.onRejected(REJECTED_RESULT); 133 } 134 } 135 136 @Override onDetect( @onNull ParcelFileDescriptor audioStream, @NonNull AudioFormat audioFormat, @Nullable PersistableBundle options, @NonNull Callback callback)137 public void onDetect( 138 @NonNull ParcelFileDescriptor audioStream, 139 @NonNull AudioFormat audioFormat, 140 @Nullable PersistableBundle options, 141 @NonNull Callback callback) { 142 Log.d(TAG, "onDetect for external source"); 143 144 if (callback == null) { 145 Log.w(TAG, "callback is null"); 146 return; 147 } 148 if (audioStream == null) { 149 Log.w(TAG, "audioStream is null"); 150 return; 151 } 152 153 long startTime = System.currentTimeMillis(); 154 try (InputStream fis = 155 new ParcelFileDescriptor.AutoCloseInputStream(audioStream)) { 156 157 // We added the fake audio data and set "hotword!" string at the head. Then we simulated 158 // to verify the audio data with "hotword!" in HotwordDetectionService. If the audio 159 // data includes "hotword!", it means that the hotword is valid. 160 while (fis.available() < 8) { 161 try { 162 Thread.sleep(10); 163 } catch (InterruptedException e) { 164 // Nothing 165 } 166 if (System.currentTimeMillis() - startTime > 3000) { 167 Log.w(TAG, "Over timeout"); 168 return; 169 } 170 } 171 Log.d(TAG, "fis.available() = " + fis.available()); 172 byte[] buffer = new byte[8]; 173 fis.read(buffer, 0, 8); 174 if (isSame(buffer, BasicVoiceInteractionService.FAKE_HOTWORD_AUDIO_DATA, 175 buffer.length)) { 176 Log.d(TAG, "call callback.onDetected"); 177 callback.onDetected(DETECTED_RESULT); 178 } 179 } catch (IOException e) { 180 Log.w(TAG, "Failed to read data : ", e); 181 } 182 } 183 184 @Override onDetect(@onNull Callback callback)185 public void onDetect(@NonNull Callback callback) { 186 Log.d(TAG, "onDetect for Mic source"); 187 synchronized (mLock) { 188 if (mDetectionJob != null) { 189 throw new IllegalStateException("onDetect called while already detecting"); 190 } 191 if (!mStopDetectionCalled) { 192 // Delaying this allows us to test other flows, such as stopping detection. It's 193 // also more realistic to schedule it onto another thread. 194 mDetectionJob = () -> { 195 Log.d(TAG, "Sending detected result"); 196 197 if (canReadAudio()) { 198 callback.onDetected(DETECTED_RESULT); 199 } else { 200 callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE); 201 } 202 }; 203 mHandler.postDelayed(mDetectionJob, 1500); 204 } else { 205 Log.d(TAG, "Sending detected result after stop detection"); 206 // We can't store and use this callback in onStopDetection (not valid anymore 207 // there), so instead we trigger detection again to report the event. 208 callback.onDetected(DETECTED_RESULT_AFTER_STOP_DETECTION); 209 } 210 } 211 } 212 213 @Override onStopDetection()214 public void onStopDetection() { 215 super.onStopDetection(); 216 Log.d(TAG, "onStopDetection"); 217 synchronized (mLock) { 218 mHandler.removeCallbacks(mDetectionJob); 219 mDetectionJob = null; 220 mStopDetectionCalled = true; 221 } 222 } 223 224 @Override onUpdateState( @ullable PersistableBundle options, @Nullable SharedMemory sharedMemory, long callbackTimeoutMillis, @Nullable IntConsumer statusCallback)225 public void onUpdateState( 226 @Nullable PersistableBundle options, 227 @Nullable SharedMemory sharedMemory, 228 long callbackTimeoutMillis, 229 @Nullable IntConsumer statusCallback) { 230 super.onUpdateState(options, sharedMemory, callbackTimeoutMillis, statusCallback); 231 Log.d(TAG, "onUpdateState"); 232 233 // Reset mDetectionJob and mStopDetectionCalled when service is initializing. 234 synchronized (mLock) { 235 if (statusCallback != null) { 236 if (mDetectionJob != null) { 237 Log.d(TAG, "onUpdateState mDetectionJob is not null"); 238 mHandler.removeCallbacks(mDetectionJob); 239 mDetectionJob = null; 240 } 241 mStopDetectionCalled = false; 242 } 243 } 244 245 if (options != null) { 246 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1) 247 == Utils.HOTWORD_DETECTION_SERVICE_ON_UPDATE_STATE_CRASH) { 248 Log.d(TAG, "Crash itself. Pid: " + Process.myPid()); 249 Process.killProcess(Process.myPid()); 250 return; 251 } 252 String fakeData = options.getString(BasicVoiceInteractionService.KEY_FAKE_DATA); 253 if (!TextUtils.equals(fakeData, BasicVoiceInteractionService.VALUE_FAKE_DATA)) { 254 Log.d(TAG, "options : data is not the same"); 255 return; 256 } 257 } 258 259 if (sharedMemory != null) { 260 try { 261 sharedMemory.mapReadWrite(); 262 Log.d(TAG, "sharedMemory : is not read-only"); 263 return; 264 } catch (ErrnoException e) { 265 // For read-only case 266 } finally { 267 sharedMemory.close(); 268 } 269 } 270 271 // Report success 272 Log.d(TAG, "onUpdateState success"); 273 if (statusCallback != null) { 274 statusCallback.accept(INITIALIZATION_STATUS_SUCCESS); 275 } 276 } 277 isSame(byte[] array1, byte[] array2, int length)278 private boolean isSame(byte[] array1, byte[] array2, int length) { 279 if (length <= 0) { 280 return false; 281 } 282 if (array1 == null || array2 == null || array1.length < length || array2.length < length) { 283 return false; 284 } 285 for (int i = 0; i < length; i++) { 286 if (array1[i] != array2[i]) { 287 return false; 288 } 289 } 290 return true; 291 } 292 canReadAudio()293 private boolean canReadAudio() { 294 int bytesPerSample = 2; // for ENCODING_PCM_16BIT 295 int sampleRate = 16000; 296 int bytesPerSecond = bytesPerSample * sampleRate; // for single channel 297 AudioRecord record = 298 new AudioRecord.Builder() 299 .setAudioAttributes( 300 new AudioAttributes.Builder() 301 .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) 302 .build()) 303 .setAudioFormat( 304 new AudioFormat.Builder() 305 .setChannelMask(AudioFormat.CHANNEL_IN_MONO) 306 .setEncoding(AudioFormat.ENCODING_PCM_16BIT) 307 .setSampleRate(sampleRate) 308 .build()) 309 .setBufferSizeInBytes(bytesPerSecond) 310 .build(); 311 if (record.getState() != AudioRecord.STATE_INITIALIZED) { 312 Log.e(TAG, "Failed to initialize AudioRecord"); 313 record.release(); 314 return false; 315 } 316 317 record.startRecording(); 318 try { 319 byte[] buffer = new byte[bytesPerSecond]; // read 1 second of audio 320 int numBytes = 0; 321 while (numBytes < buffer.length) { 322 int bytesRead = 323 record.read(buffer, numBytes, Math.min(1024, buffer.length - numBytes)); 324 if (bytesRead < 0) { 325 Log.e(TAG, "Error reading from mic: " + bytesRead); 326 return false; 327 } 328 numBytes += bytesRead; 329 } 330 // The audio data will be zero on virtual device, so it would be better to skip to 331 // check the audio data. 332 if (Utils.isVirtualDevice()) { 333 return true; 334 } 335 for (byte b : buffer) { 336 // TODO: Maybe check that some portion of the bytes are non-zero. 337 if (b != 0) { 338 return true; 339 } 340 } 341 Log.d(TAG, "All data are zero"); 342 return false; 343 } finally { 344 record.release(); 345 } 346 } 347 } 348