• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.voiceinteraction.service;
18 
19 import static android.media.AudioFormat.CHANNEL_IN_FRONT;
20 
21 import android.media.AudioAttributes;
22 import android.media.AudioFormat;
23 import android.media.AudioRecord;
24 import android.media.MediaRecorder;
25 import android.os.Handler;
26 import android.os.Looper;
27 import android.os.ParcelFileDescriptor;
28 import android.os.PersistableBundle;
29 import android.os.Process;
30 import android.os.SharedMemory;
31 import android.os.SystemClock;
32 import android.service.voice.AlwaysOnHotwordDetector;
33 import android.service.voice.HotwordDetectedResult;
34 import android.service.voice.HotwordDetectionService;
35 import android.service.voice.HotwordRejectedResult;
36 import android.service.voice.SandboxedDetectionInitializer;
37 import android.system.ErrnoException;
38 import android.text.TextUtils;
39 import android.util.Log;
40 import android.voiceinteraction.common.Utils;
41 
42 import androidx.annotation.NonNull;
43 import androidx.annotation.Nullable;
44 
45 import java.io.IOException;
46 import java.io.InputStream;
47 import java.util.function.IntConsumer;
48 
49 import javax.annotation.concurrent.GuardedBy;
50 
51 public class MainHotwordDetectionService extends HotwordDetectionService {
52     static final String TAG = "MainHotwordDetectionService";
53 
54     public static byte[] FAKE_HOTWORD_AUDIO_DATA =
55             new byte[]{'h', 'o', 't', 'w', 'o', 'r', 'd', '!'};
56     public static String KEY_FAKE_DATA = "fakeData";
57     public static String VALUE_FAKE_DATA = "fakeData";
58     public static final int DEFAULT_PHRASE_ID = 5;
59     public static final HotwordDetectedResult DETECTED_RESULT =
60             new HotwordDetectedResult.Builder()
61                     .setAudioChannel(CHANNEL_IN_FRONT)
62                     .setConfidenceLevel(HotwordDetectedResult.CONFIDENCE_LEVEL_HIGH)
63                     .setHotwordDetectionPersonalized(true)
64                     .setHotwordDurationMillis(1000)
65                     .setHotwordOffsetMillis(500)
66                     .setHotwordPhraseId(DEFAULT_PHRASE_ID)
67                     .setPersonalizedScore(10)
68                     .setScore(15)
69                     .setBackgroundAudioPower(50)
70                     .build();
71     public static final HotwordDetectedResult DETECTED_RESULT_AFTER_STOP_DETECTION =
72             new HotwordDetectedResult.Builder()
73                     .setHotwordPhraseId(DEFAULT_PHRASE_ID)
74                     .setScore(57)
75                     .build();
76     public static final HotwordDetectedResult DETECTED_RESULT_FOR_MIC_FAILURE =
77             new HotwordDetectedResult.Builder()
78                     .setHotwordPhraseId(DEFAULT_PHRASE_ID)
79                     .setScore(58)
80                     .build();
81     public static final HotwordRejectedResult REJECTED_RESULT =
82             new HotwordRejectedResult.Builder()
83                     .setConfidenceLevel(HotwordRejectedResult.CONFIDENCE_LEVEL_MEDIUM)
84                     .build();
85 
86     @NonNull
87     private final Object mLock = new Object();
88     private Handler mHandler;
89     @GuardedBy("mLock")
90     private boolean mStopDetectionCalled;
91     @GuardedBy("mLock")
92     private int mDetectionDelayMs = 0;
93     private long mServiceCreatedTimeMillis = -1;
94 
95     @GuardedBy("mLock")
96     @Nullable
97     private Runnable mDetectionJob;
98 
99     private boolean mIsTestUnexpectedCallback;
100 
101     private boolean mIsTestAudioEgress;
102 
103     /**
104      * It only works when {@link #mIsTestAudioEgress} is true
105      */
106     private boolean mUseIllegalAudioEgressCopyBufferSize;
107 
108     private boolean mIsNoNeedActionDuringDetection;
109 
110     private boolean mCheckAudioDataIsNotZero;
111 
112     @Override
onCreate()113     public void onCreate() {
114         super.onCreate();
115         mHandler = Handler.createAsync(Looper.getMainLooper());
116         mServiceCreatedTimeMillis = SystemClock.elapsedRealtime();
117         Log.d(TAG, "onCreate");
118     }
119 
120     @Override
onDetect(@onNull AlwaysOnHotwordDetector.EventPayload eventPayload, long timeoutMillis, @NonNull Callback callback)121     public void onDetect(@NonNull AlwaysOnHotwordDetector.EventPayload eventPayload,
122             long timeoutMillis, @NonNull Callback callback) {
123         Log.d(TAG, "onDetect for DSP source");
124 
125         if (mIsNoNeedActionDuringDetection) {
126             mIsNoNeedActionDuringDetection = false;
127             return;
128         }
129 
130         if (!canReadAudio()) {
131             callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE);
132             return;
133         }
134 
135         // TODO: Check the capture session (needs to be reflectively accessed).
136         byte[] data = eventPayload.getData();
137         if (data != null && data.length > 0) {
138             if (mIsTestUnexpectedCallback) {
139                 Log.d(TAG, "callback onDetected twice");
140                 callback.onDetected(DETECTED_RESULT);
141                 callback.onDetected(DETECTED_RESULT);
142                 mIsTestUnexpectedCallback = false;
143                 return;
144             }
145 
146             // Create the unaccepted HotwordDetectedResult first to test the protection in the
147             // onDetected callback function of HotwordDetectionService. When the bundle data of
148             // HotwordDetectedResult is larger than max bundle size, it will throw the
149             // IllegalArgumentException.
150             PersistableBundle persistableBundle = new PersistableBundle();
151             HotwordDetectedResult hotwordDetectedResult =
152                     new HotwordDetectedResult.Builder()
153                             .setHotwordPhraseId(eventPayload.getKeyphraseRecognitionExtras().get(
154                                     0).getKeyphraseId())
155                             .setExtras(persistableBundle)
156                             .build();
157             int key = 0;
158             do {
159                 persistableBundle.putInt(Integer.toString(key), 0);
160                 key++;
161             } while (Utils.getParcelableSize(persistableBundle)
162                     <= HotwordDetectedResult.getMaxBundleSize());
163 
164             synchronized (mLock) {
165                 mHandler.postDelayed(() -> {
166                     try {
167                         if (mIsTestAudioEgress) {
168                             if (mUseIllegalAudioEgressCopyBufferSize) {
169                                 callback.onDetected(
170                                         Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE);
171                             } else {
172                                 callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT);
173                             }
174                         } else {
175                             callback.onDetected(hotwordDetectedResult);
176                         }
177                     } catch (IllegalArgumentException e) {
178                         callback.onDetected(DETECTED_RESULT);
179                     }
180                 }, mDetectionDelayMs);
181             }
182         } else {
183             callback.onRejected(REJECTED_RESULT);
184             if (mIsTestUnexpectedCallback) {
185                 Log.d(TAG, "callback onRejected again");
186                 callback.onRejected(REJECTED_RESULT);
187                 mIsTestUnexpectedCallback = false;
188             }
189         }
190     }
191 
192     @Override
onDetect( @onNull ParcelFileDescriptor audioStream, @NonNull AudioFormat audioFormat, @Nullable PersistableBundle options, @NonNull Callback callback)193     public void onDetect(
194             @NonNull ParcelFileDescriptor audioStream,
195             @NonNull AudioFormat audioFormat,
196             @Nullable PersistableBundle options,
197             @NonNull Callback callback) {
198         Log.d(TAG, "onDetect for external source");
199 
200         if (callback == null) {
201             Log.w(TAG, "callback is null");
202             return;
203         }
204         if (audioStream == null) {
205             Log.w(TAG, "audioStream is null");
206             return;
207         }
208         if (options != null) {
209             if (options.getBoolean(Utils.KEY_DETECTION_REJECTED, false)) {
210                 Log.d(TAG, "Call onRejected for external source");
211                 callback.onRejected(REJECTED_RESULT);
212                 return;
213             }
214         }
215 
216         long startTime = System.currentTimeMillis();
217         try (InputStream fis =
218                      new ParcelFileDescriptor.AutoCloseInputStream(audioStream)) {
219 
220             // We added the fake audio data and set "hotword!" string at the head. Then we simulated
221             // to verify the audio data with "hotword!" in HotwordDetectionService. If the audio
222             // data includes "hotword!", it means that the hotword is valid.
223             while (fis.available() < 8) {
224                 try {
225                     Thread.sleep(10);
226                 } catch (InterruptedException e) {
227                     // Nothing
228                 }
229                 if (System.currentTimeMillis() - startTime > 3000) {
230                     Log.w(TAG, "Over timeout");
231                     return;
232                 }
233             }
234             Log.d(TAG, "fis.available() = " + fis.available());
235             byte[] buffer = new byte[8];
236             fis.read(buffer, 0, 8);
237             if (isSame(buffer, FAKE_HOTWORD_AUDIO_DATA,
238                     buffer.length)) {
239                 Log.d(TAG, "call callback.onDetected");
240                 if (mIsTestAudioEgress) {
241                     if (mUseIllegalAudioEgressCopyBufferSize) {
242                         callback.onDetected(
243                                 Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE);
244                     } else {
245                         callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT);
246                     }
247                 } else {
248                     callback.onDetected(DETECTED_RESULT);
249                 }
250             }
251         } catch (IOException e) {
252             Log.w(TAG, "Failed to read data : ", e);
253         }
254     }
255 
256     @Override
onDetect(@onNull Callback callback)257     public void onDetect(@NonNull Callback callback) {
258         Log.d(TAG, "onDetect for Mic source");
259         synchronized (mLock) {
260             if (mDetectionJob != null) {
261                 throw new IllegalStateException("onDetect called while already detecting");
262             }
263             if (!mStopDetectionCalled) {
264                 // Delaying this allows us to test other flows, such as stopping detection. It's
265                 // also more realistic to schedule it onto another thread.
266                 mDetectionJob = () -> {
267                     Log.d(TAG, "Sending detected result");
268                     if (mIsTestUnexpectedCallback) {
269                         Log.d(TAG, "callback onDetected twice");
270                         callback.onDetected(DETECTED_RESULT);
271                         callback.onDetected(DETECTED_RESULT);
272                         mIsTestUnexpectedCallback = false;
273                         return;
274                     }
275                     if (canReadAudio()) {
276                         if (mIsTestAudioEgress) {
277                             if (mUseIllegalAudioEgressCopyBufferSize) {
278                                 callback.onDetected(
279                                         Utils.AUDIO_EGRESS_DETECTED_RESULT_WRONG_COPY_BUFFER_SIZE);
280                             } else {
281                                 callback.onDetected(Utils.AUDIO_EGRESS_DETECTED_RESULT);
282                             }
283                         } else {
284                             callback.onDetected(DETECTED_RESULT);
285                         }
286                     } else {
287                         callback.onDetected(DETECTED_RESULT_FOR_MIC_FAILURE);
288                     }
289                 };
290                 mHandler.postDelayed(mDetectionJob, 1500);
291             } else {
292                 Log.d(TAG, "Sending detected result after stop detection");
293                 // We can't store and use this callback in onStopDetection (not valid anymore
294                 // there), so instead we trigger detection again to report the event.
295                 callback.onDetected(DETECTED_RESULT_AFTER_STOP_DETECTION);
296             }
297         }
298     }
299 
300     @Override
onStopDetection()301     public void onStopDetection() {
302         super.onStopDetection();
303         Log.d(TAG, "onStopDetection");
304         synchronized (mLock) {
305             mHandler.removeCallbacks(mDetectionJob);
306             mDetectionJob = null;
307             mStopDetectionCalled = true;
308         }
309     }
310 
311     @Override
onUpdateState( @ullable PersistableBundle options, @Nullable SharedMemory sharedMemory, long callbackTimeoutMillis, @Nullable IntConsumer statusCallback)312     public void onUpdateState(
313             @Nullable PersistableBundle options,
314             @Nullable SharedMemory sharedMemory,
315             long callbackTimeoutMillis,
316             @Nullable IntConsumer statusCallback) {
317         super.onUpdateState(options, sharedMemory, callbackTimeoutMillis, statusCallback);
318         Log.d(TAG, "onUpdateState");
319 
320         // Reset mDetectionJob and mStopDetectionCalled when service is initializing.
321         synchronized (mLock) {
322             // When the service is initializing, the statusCallback will be not null.
323             if (statusCallback != null) {
324                 if (mDetectionJob != null) {
325                     Log.d(TAG, "onUpdateState mDetectionJob is not null");
326                     mHandler.removeCallbacks(mDetectionJob);
327                     mDetectionJob = null;
328                 }
329                 mStopDetectionCalled = false;
330 
331                 if (options != null) {
332                     if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
333                             == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_OVER_MAX_INIT_STATUS) {
334                         Log.d(TAG, "send over the max custom initialization status");
335                         final int initializationStatus =
336                                 SandboxedDetectionInitializer.getMaxCustomInitializationStatus();
337                         try {
338                             statusCallback.accept(initializationStatus + 1);
339                         } catch (IllegalArgumentException ex) {
340                             Log.d(TAG, "expect to get IllegalArgumentException here");
341                             statusCallback.accept(initializationStatus);
342                         }
343                         return;
344                     } else if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
345                             == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_CUSTOM_INIT_STATUS) {
346                         Log.d(TAG, "send custom initialization status");
347                         statusCallback.accept(options.getInt(Utils.KEY_INITIALIZATION_STATUS, -1));
348                         return;
349                     } else if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
350                             == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_SEND_SUCCESS_IF_CREATED_AFTER) {
351                         // verify that the HotwordDetectionService was created after the timestamp
352                         // passed in via the options parameter
353                         long optionsTimestampMillis = options.getLong(Utils.KEY_TIMESTAMP_MILLIS,
354                                 -1);
355                         Log.d(TAG, "send initialization success if created after: "
356                                 + optionsTimestampMillis
357                                 + ", onCreate timestamp=" + mServiceCreatedTimeMillis);
358                         statusCallback.accept((mServiceCreatedTimeMillis > optionsTimestampMillis)
359                                 ? SandboxedDetectionInitializer.INITIALIZATION_STATUS_SUCCESS
360                                 : SandboxedDetectionInitializer.getMaxCustomInitializationStatus());
361                         return;
362                     }
363                 }
364             }
365             if (options != null) {
366                 mDetectionDelayMs = options.getInt(Utils.KEY_DETECTION_DELAY_MS, 0);
367 
368                 if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
369                         == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_CLEAR_SOFTWARE_DETECTION_JOB) {
370                     Log.d(TAG, "options : Clear software detection job");
371                     if (mDetectionJob != null) {
372                         Log.d(TAG, "Clear mDetectionJob");
373                         mHandler.removeCallbacks(mDetectionJob);
374                         mDetectionJob = null;
375                     }
376                     return;
377                 }
378             }
379         }
380 
381         if (options != null) {
382             if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
383                     == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ON_UPDATE_STATE_CRASH) {
384                 Log.d(TAG, "Crash itself. Pid: " + Process.myPid());
385                 Process.killProcess(Process.myPid());
386                 return;
387             }
388             if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
389                     == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ON_UPDATE_STATE_UNEXPECTED_CALLBACK) {
390                 Log.d(TAG, "options : Test unexpected callback");
391                 mIsTestUnexpectedCallback = true;
392                 return;
393             }
394             if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
395                     == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_ENABLE_AUDIO_EGRESS) {
396                 mUseIllegalAudioEgressCopyBufferSize = options.getBoolean(
397                         Utils.KEY_AUDIO_EGRESS_USE_ILLEGAL_COPY_BUFFER_SIZE,
398                         /* defaultValue= */ false);
399                 Log.d(TAG, "options : Test audio egress use illegal copy buffer size = "
400                         + mUseIllegalAudioEgressCopyBufferSize);
401                 mIsTestAudioEgress = true;
402                 return;
403             }
404             if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
405                     == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_NO_NEED_ACTION_DURING_DETECTION) {
406                 Log.d(TAG, "options : Test no need action during detection");
407                 mIsNoNeedActionDuringDetection = true;
408                 return;
409             }
410             if (options.getInt(Utils.KEY_TEST_SCENARIO, -1)
411                     == Utils.EXTRA_HOTWORD_DETECTION_SERVICE_CAN_READ_AUDIO_DATA_IS_NOT_ZERO) {
412                 Log.d(TAG, "options : Test can read audio, and data is not zero");
413                 mCheckAudioDataIsNotZero = true;
414                 return;
415             }
416 
417             String fakeData = options.getString(KEY_FAKE_DATA);
418             if (!TextUtils.equals(fakeData, VALUE_FAKE_DATA)) {
419                 Log.d(TAG, "options : data is not the same");
420                 return;
421             }
422         }
423 
424         if (sharedMemory != null) {
425             try {
426                 sharedMemory.mapReadWrite();
427                 Log.d(TAG, "sharedMemory : is not read-only");
428                 return;
429             } catch (ErrnoException e) {
430                 // For read-only case
431             } finally {
432                 sharedMemory.close();
433             }
434         }
435 
436         // Report success
437         Log.d(TAG, "onUpdateState success");
438         if (statusCallback != null) {
439             statusCallback.accept(INITIALIZATION_STATUS_SUCCESS);
440         }
441     }
442 
isSame(byte[] array1, byte[] array2, int length)443     private boolean isSame(byte[] array1, byte[] array2, int length) {
444         if (length <= 0) {
445             return false;
446         }
447         if (array1 == null || array2 == null || array1.length < length || array2.length < length) {
448             return false;
449         }
450         for (int i = 0; i < length; i++) {
451             if (array1[i] != array2[i]) {
452                 return false;
453             }
454         }
455         return true;
456     }
457 
canReadAudio()458     private boolean canReadAudio() {
459         int bytesPerSample = 2; // for ENCODING_PCM_16BIT
460         int sampleRate = 16000;
461         int bytesPerSecond = bytesPerSample * sampleRate; // for single channel
462         AudioRecord record =
463                 new AudioRecord.Builder()
464                         .setAudioAttributes(
465                                 new AudioAttributes.Builder()
466                                         .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
467                                         .build())
468                         .setAudioFormat(
469                                 new AudioFormat.Builder()
470                                         .setChannelMask(AudioFormat.CHANNEL_IN_MONO)
471                                         .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
472                                         .setSampleRate(sampleRate)
473                                         .build())
474                         .setBufferSizeInBytes(bytesPerSecond)
475                         .build();
476         if (record.getState() != AudioRecord.STATE_INITIALIZED) {
477             Log.e(TAG, "Failed to initialize AudioRecord");
478             record.release();
479             return false;
480         }
481 
482         record.startRecording();
483         try {
484             byte[] buffer = new byte[bytesPerSecond]; // read 1 second of audio
485             int numBytes = 0;
486             while (numBytes < buffer.length) {
487                 int bytesRead =
488                         record.read(buffer, numBytes, Math.min(1024, buffer.length - numBytes));
489                 if (bytesRead < 0) {
490                     Log.e(TAG, "Error reading from mic: " + bytesRead);
491                     return false;
492                 }
493                 numBytes += bytesRead;
494             }
495             // The audio data will be zero on virtual device, so it would be better to skip to
496             // check the audio data.
497             if (Utils.isVirtualDevice()) {
498                 return true;
499             }
500             if (mCheckAudioDataIsNotZero) {
501                 for (byte b : buffer) {
502                     // TODO: Maybe check that some portion of the bytes are non-zero.
503                     if (b != 0) {
504                         return true;
505                     }
506                 }
507                 Log.d(TAG, "All data are zero");
508                 return false;
509             }
510 
511             return true;
512         } finally {
513             record.release();
514         }
515     }
516 }
517