• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.service.voice;
18 
19 import android.annotation.NonNull;
20 import android.annotation.Nullable;
21 import android.compat.annotation.UnsupportedAppUsage;
22 import android.media.AudioFormat;
23 import android.media.AudioRecord;
24 import android.media.AudioTimestamp;
25 import android.os.Parcel;
26 import android.os.ParcelFileDescriptor;
27 import android.os.Parcelable;
28 import android.os.PersistableBundle;
29 
30 import java.util.Arrays;
31 import java.util.Objects;
32 
33 /**
34  * Represents an audio stream supporting the hotword detection.
35  *
36  * @hide
37  */
38 public final class HotwordAudioStream implements Parcelable {
39 
40     /**
41      * Key for int value to be read from {@link #getMetadata()}. The value is read by the system and
42      * is the length (in bytes) of the byte buffers created to copy bytes in the
43      * {@link #getAudioStreamParcelFileDescriptor()} written by the {@link HotwordDetectionService}.
44      * The buffer length should be chosen such that no additional latency is introduced. Typically,
45      * this should be <em>at least</em> the size of byte chunks written by the
46      * {@link HotwordDetectionService}.
47      *
48      * <p>If no value specified in the metadata for the buffer length, or if the value is less than
49      * 1, or if it is greater than 65,536, or if it is not an int, the default value of 2,560 will
50      * be used.</p>
51      */
52     public static final String KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES =
53             "android.service.voice.key.AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES";
54 
55     /**
56      * The {@link AudioFormat} of the audio stream.
57      */
58     @NonNull
59     @UnsupportedAppUsage
60     private final AudioFormat mAudioFormat;
61 
62     /**
63      * This stream typically starts with the audio bytes used for hotword detection, but continues
64      * streaming the audio (e.g., with the query) until the stream is shutdown by the
65      * {@link HotwordDetectionService}. The data format is expected to match
66      * {@link #getAudioFormat()}.
67      *
68      * <p>
69      * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()}
70      * to pass the start of the audio instead of streaming it here. This may prevent added latency
71      * caused by the streaming buffer (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not
72      * being large enough to handle this initial chunk of audio.
73      * </p>
74      */
75     @NonNull
76     @UnsupportedAppUsage
77     private final ParcelFileDescriptor mAudioStreamParcelFileDescriptor;
78 
79     /**
80      * The timestamp when the audio stream was captured by the Audio platform.
81      *
82      * <p>
83      * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying
84      * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this
85      * field by {@link AudioRecord#getTimestamp}.
86      * </p>
87      *
88      * <p>
89      * This timestamp can be used in conjunction with the
90      * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and
91      * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to
92      * timestamps.
93      * </p>
94      *
95      * @see #getAudioStreamParcelFileDescriptor()
96      */
97     @Nullable
98     @UnsupportedAppUsage
99     private final AudioTimestamp mTimestamp;
100 
defaultTimestamp()101     private static AudioTimestamp defaultTimestamp() {
102         return null;
103     }
104 
105     /**
106      * The metadata associated with the audio stream.
107      */
108     @NonNull
109     @UnsupportedAppUsage
110     private final PersistableBundle mMetadata;
111 
defaultMetadata()112     private static PersistableBundle defaultMetadata() {
113         return new PersistableBundle();
114     }
115 
timestampToString()116     private String timestampToString() {
117         if (mTimestamp == null) {
118             return "";
119         }
120         return "TimeStamp:"
121                 + " framePos=" + mTimestamp.framePosition
122                 + " nanoTime=" + mTimestamp.nanoTime;
123     }
124 
parcelTimestamp(Parcel dest, int flags)125     private void parcelTimestamp(Parcel dest, int flags) {
126         if (mTimestamp != null) {
127             // mTimestamp is not null, we write it to the parcel, set true.
128             dest.writeBoolean(true);
129             dest.writeLong(mTimestamp.framePosition);
130             dest.writeLong(mTimestamp.nanoTime);
131         } else {
132             // mTimestamp is null, we don't write any value out, set false.
133             dest.writeBoolean(false);
134         }
135     }
136 
137     @Nullable
unparcelTimestamp(Parcel in)138     private static AudioTimestamp unparcelTimestamp(Parcel in) {
139         // If it is true, it means we wrote the value to the parcel before, parse it.
140         // Otherwise, return null.
141         if (in.readBoolean()) {
142             final AudioTimestamp timeStamp = new AudioTimestamp();
143             timeStamp.framePosition = in.readLong();
144             timeStamp.nanoTime = in.readLong();
145             return timeStamp;
146         } else {
147             return null;
148         }
149     }
150 
151     /**
152      * The start of the audio used for hotword detection. The data format is expected to match
153      * {@link #getAudioFormat()}.
154      *
155      * <p>
156      * The {@link HotwordDetectionService} may use this instead of using
157      * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio. This
158      * may prevent added latency caused by the streaming buffer (see
159      * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this
160      * initial chunk of audio.
161      * </p>
162      */
163     @NonNull
164     @UnsupportedAppUsage
165     private final byte[] mInitialAudio;
166 
167     private static final byte[] DEFAULT_INITIAL_EMPTY_AUDIO = {};
168 
defaultInitialAudio()169     private static byte[] defaultInitialAudio() {
170         return DEFAULT_INITIAL_EMPTY_AUDIO;
171     }
172 
initialAudioToString()173     private String initialAudioToString() {
174         return "length=" + mInitialAudio.length;
175     }
176 
177     /**
178      * Provides an instance of {@link Builder} with state corresponding to this instance.
179      * @hide
180      */
buildUpon()181     public Builder buildUpon() {
182         return new Builder(mAudioFormat, mAudioStreamParcelFileDescriptor)
183             .setTimestamp(mTimestamp)
184             .setMetadata(mMetadata)
185             .setInitialAudio(mInitialAudio);
186     }
187 
188     /* package-private */
HotwordAudioStream( @onNull AudioFormat audioFormat, @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor, @Nullable AudioTimestamp timestamp, @NonNull PersistableBundle metadata, @NonNull byte[] initialAudio)189     HotwordAudioStream(
190             @NonNull AudioFormat audioFormat,
191             @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor,
192             @Nullable AudioTimestamp timestamp,
193             @NonNull PersistableBundle metadata,
194             @NonNull byte[] initialAudio) {
195         this.mAudioFormat = audioFormat;
196         com.android.internal.util.AnnotationValidations.validate(
197                 NonNull.class, null, mAudioFormat);
198         this.mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor;
199         com.android.internal.util.AnnotationValidations.validate(
200                 NonNull.class, null, mAudioStreamParcelFileDescriptor);
201         this.mTimestamp = timestamp;
202         this.mMetadata = metadata;
203         com.android.internal.util.AnnotationValidations.validate(
204                 NonNull.class, null, mMetadata);
205         this.mInitialAudio = initialAudio;
206         com.android.internal.util.AnnotationValidations.validate(
207                 NonNull.class, null, mInitialAudio);
208 
209         // onConstructed(); // You can define this method to get a callback
210     }
211 
212     /**
213      * The {@link AudioFormat} of the audio stream.
214      */
215     @UnsupportedAppUsage
216     @NonNull
getAudioFormat()217     public AudioFormat getAudioFormat() {
218         return mAudioFormat;
219     }
220 
221     /**
222      * This stream typically starts with the audio bytes used for hotword detection, but continues
223      * streaming the audio (e.g., with the query) until the stream is shutdown by the
224      * {@link HotwordDetectionService}. The data format is expected to match
225      * {@link #getAudioFormat()}.
226      *
227      * <p>
228      * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()}
229      * to pass the start of the audio instead of streaming it here. This may prevent added latency
230      * caused by the streaming buffer (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not
231      * being large enough to handle this initial chunk of audio.
232      * </p>
233      */
234     @UnsupportedAppUsage
235     @NonNull
getAudioStreamParcelFileDescriptor()236     public ParcelFileDescriptor getAudioStreamParcelFileDescriptor() {
237         return mAudioStreamParcelFileDescriptor;
238     }
239 
240     /**
241      * The timestamp when the audio stream was captured by the Audio platform.
242      *
243      * <p>
244      * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying
245      * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this
246      * field by {@link AudioRecord#getTimestamp}.
247      * </p>
248      *
249      * <p>
250      * This timestamp can be used in conjunction with the
251      * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and
252      * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to
253      * timestamps.
254      * </p>
255      *
256      * @see #getAudioStreamParcelFileDescriptor()
257      */
258     @UnsupportedAppUsage
259     @Nullable
getTimestamp()260     public AudioTimestamp getTimestamp() {
261         return mTimestamp;
262     }
263 
264     /**
265      * The metadata associated with the audio stream.
266      */
267     @UnsupportedAppUsage
268     @NonNull
getMetadata()269     public PersistableBundle getMetadata() {
270         return mMetadata;
271     }
272 
273     /**
274      * The start of the audio used for hotword detection. The data format is expected to match
275      * {@link #getAudioFormat()}.
276      *
277      * <p>
278      * The {@link HotwordDetectionService} may use this instead of using
279      * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio. This
280      * may prevent added latency caused by the streaming buffer (see
281      * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this
282      * initial chunk of audio.
283      * </p>
284      */
285     @UnsupportedAppUsage
286     @NonNull
getInitialAudio()287     public byte[] getInitialAudio() {
288         return mInitialAudio;
289     }
290 
291     @Override
toString()292     public String toString() {
293         // You can override field toString logic by defining methods like:
294         // String fieldNameToString() { ... }
295 
296         return "HotwordAudioStream { "
297                 + "audioFormat = " + mAudioFormat + ", "
298                 + "audioStreamParcelFileDescriptor = " + mAudioStreamParcelFileDescriptor + ", "
299                 + "timestamp = " + timestampToString() + ", "
300                 + "metadata = " + mMetadata + ", "
301                 + "initialAudio = " + initialAudioToString() + " }";
302     }
303 
304     @Override
equals(@ullable Object o)305     public boolean equals(@Nullable Object o) {
306         // You can override field equality logic by defining either of the methods like:
307         // boolean fieldNameEquals(HotwordAudioStream other) { ... }
308         // boolean fieldNameEquals(FieldType otherValue) { ... }
309 
310         if (this == o) return true;
311         if (o == null || getClass() != o.getClass()) return false;
312         @SuppressWarnings("unchecked")
313         HotwordAudioStream that = (HotwordAudioStream) o;
314         //noinspection PointlessBooleanExpression
315         return Objects.equals(mAudioFormat, that.mAudioFormat)
316                 && Objects.equals(mAudioStreamParcelFileDescriptor,
317                 that.mAudioStreamParcelFileDescriptor)
318                 && Objects.equals(mTimestamp, that.mTimestamp)
319                 && Objects.equals(mMetadata, that.mMetadata)
320                 && Arrays.equals(mInitialAudio, that.mInitialAudio);
321     }
322 
323     @Override
hashCode()324     public int hashCode() {
325         // You can override field hashCode logic by defining methods like:
326         // int fieldNameHashCode() { ... }
327 
328         int _hash = 1;
329         _hash = 31 * _hash + Objects.hashCode(mAudioFormat);
330         _hash = 31 * _hash + Objects.hashCode(mAudioStreamParcelFileDescriptor);
331         _hash = 31 * _hash + Objects.hashCode(mTimestamp);
332         _hash = 31 * _hash + Objects.hashCode(mMetadata);
333         _hash = 31 * _hash + Arrays.hashCode(mInitialAudio);
334         return _hash;
335     }
336 
337     @Override
writeToParcel(@onNull Parcel dest, int flags)338     public void writeToParcel(@NonNull Parcel dest, int flags) {
339         // You can override field parcelling by defining methods like:
340         // void parcelFieldName(Parcel dest, int flags) { ... }
341 
342         byte flg = 0;
343         if (mTimestamp != null) flg |= 0x4;
344         dest.writeByte(flg);
345         dest.writeTypedObject(mAudioFormat, flags);
346         dest.writeTypedObject(mAudioStreamParcelFileDescriptor, flags);
347         parcelTimestamp(dest, flags);
348         dest.writeTypedObject(mMetadata, flags);
349         dest.writeByteArray(mInitialAudio);
350     }
351 
352     @Override
describeContents()353     public int describeContents() {
354         return 0;
355     }
356 
357     /** @hide */
358     @SuppressWarnings({"unchecked", "RedundantCast"})
359     /* package-private */
HotwordAudioStream(@onNull Parcel in)360     HotwordAudioStream(@NonNull Parcel in) {
361         // You can override field unparcelling by defining methods like:
362         // static FieldType unparcelFieldName(Parcel in) { ... }
363 
364         byte flg = in.readByte();
365         AudioFormat audioFormat = (AudioFormat) in.readTypedObject(AudioFormat.CREATOR);
366         ParcelFileDescriptor audioStreamParcelFileDescriptor =
367                 (ParcelFileDescriptor) in.readTypedObject(ParcelFileDescriptor.CREATOR);
368         AudioTimestamp timestamp = unparcelTimestamp(in);
369         PersistableBundle metadata = (PersistableBundle) in.readTypedObject(
370                 PersistableBundle.CREATOR);
371         byte[] initialAudio = in.createByteArray();
372 
373         this.mAudioFormat = audioFormat;
374         com.android.internal.util.AnnotationValidations.validate(
375                 NonNull.class, null, mAudioFormat);
376         this.mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor;
377         com.android.internal.util.AnnotationValidations.validate(
378                 NonNull.class, null, mAudioStreamParcelFileDescriptor);
379         this.mTimestamp = timestamp;
380         this.mMetadata = metadata;
381         com.android.internal.util.AnnotationValidations.validate(
382                 NonNull.class, null, mMetadata);
383         this.mInitialAudio = initialAudio;
384         com.android.internal.util.AnnotationValidations.validate(
385                 NonNull.class, null, mInitialAudio);
386 
387         // onConstructed(); // You can define this method to get a callback
388     }
389 
390     @NonNull
391     public static final Parcelable.Creator<HotwordAudioStream> CREATOR =
392             new Parcelable.Creator<HotwordAudioStream>() {
393                 @Override
394                 public HotwordAudioStream[] newArray(int size) {
395                     return new HotwordAudioStream[size];
396                 }
397 
398                 @Override
399                 public HotwordAudioStream createFromParcel(@NonNull Parcel in) {
400                     return new HotwordAudioStream(in);
401                 }
402             };
403 
404     /**
405      * A builder for {@link HotwordAudioStream}
406      */
407     @SuppressWarnings("WeakerAccess")
408     public static final class Builder {
409 
410         @NonNull
411         private AudioFormat mAudioFormat;
412         @NonNull
413         private ParcelFileDescriptor mAudioStreamParcelFileDescriptor;
414         @Nullable
415         private AudioTimestamp mTimestamp;
416         @NonNull
417         private PersistableBundle mMetadata;
418         @NonNull
419         private byte[] mInitialAudio;
420 
421         private long mBuilderFieldsSet = 0L;
422 
423         /**
424          * Creates a new Builder.
425          *
426          * @param audioFormat
427          *   The {@link AudioFormat} of the audio stream.
428          * @param audioStreamParcelFileDescriptor
429          *   This stream typically starts with the audio bytes used for hotword detection, but
430          *   continues streaming the audio (e.g., with the query) until the stream is shutdown by
431          *   the {@link HotwordDetectionService}. The data format is expected to match
432          *   {@link #getAudioFormat()}.
433          *
434          *   <p>
435          *   Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()}
436          *   to pass the start of the audio instead of streaming it here. This may prevent added
437          *   latency caused by the streaming buffer
438          *   (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to
439          *   handle this initial chunk of audio.
440          *   </p>
441          */
442         @UnsupportedAppUsage
Builder( @onNull AudioFormat audioFormat, @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor)443         public Builder(
444                 @NonNull AudioFormat audioFormat,
445                 @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor) {
446             mAudioFormat = audioFormat;
447             com.android.internal.util.AnnotationValidations.validate(
448                     NonNull.class, null, mAudioFormat);
449             mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor;
450             com.android.internal.util.AnnotationValidations.validate(
451                     NonNull.class, null, mAudioStreamParcelFileDescriptor);
452         }
453 
454         /**
455          * The {@link AudioFormat} of the audio stream.
456          */
457         @UnsupportedAppUsage
458         @NonNull
setAudioFormat(@onNull AudioFormat value)459         public Builder setAudioFormat(@NonNull AudioFormat value) {
460             checkNotUsed();
461             mBuilderFieldsSet |= 0x1;
462             mAudioFormat = value;
463             return this;
464         }
465 
466         /**
467          * This stream typically starts with the audio bytes used for hotword detection, but
468          * continues streaming the audio (e.g., with the query) until the stream is shutdown by the
469          * {@link HotwordDetectionService}. The data format is expected to match
470          * {@link #getAudioFormat()}.
471          *
472          * <p>
473          * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()}
474          * to pass the start of the audio instead of streaming it here. This may prevent added
475          * latency caused by the streaming buffer
476          * (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle
477          * this initial chunk of audio.
478          * </p>
479          */
480         @UnsupportedAppUsage
481         @NonNull
setAudioStreamParcelFileDescriptor(@onNull ParcelFileDescriptor value)482         public Builder setAudioStreamParcelFileDescriptor(@NonNull ParcelFileDescriptor value) {
483             checkNotUsed();
484             mBuilderFieldsSet |= 0x2;
485             mAudioStreamParcelFileDescriptor = value;
486             return this;
487         }
488 
489         /**
490          * The timestamp when the audio stream was captured by the Audio platform.
491          *
492          * <p>
493          * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying
494          * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this
495          * field by {@link AudioRecord#getTimestamp}.
496          * </p>
497          *
498          * <p>
499          * This timestamp can be used in conjunction with the
500          * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and
501          * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to
502          * timestamps.
503          * </p>
504          *
505          * @see #getAudioStreamParcelFileDescriptor()
506          */
507         @UnsupportedAppUsage
508         @NonNull
setTimestamp(@onNull AudioTimestamp value)509         public Builder setTimestamp(@NonNull AudioTimestamp value) {
510             checkNotUsed();
511             mBuilderFieldsSet |= 0x4;
512             mTimestamp = value;
513             return this;
514         }
515 
516         /**
517          * The metadata associated with the audio stream.
518          */
519         @UnsupportedAppUsage
520         @NonNull
setMetadata(@onNull PersistableBundle value)521         public Builder setMetadata(@NonNull PersistableBundle value) {
522             checkNotUsed();
523             mBuilderFieldsSet |= 0x8;
524             mMetadata = value;
525             return this;
526         }
527 
528         /**
529          * The start of the audio used for hotword detection. The data format is expected to match
530          * {@link #getAudioFormat()}.
531          *
532          * <p>
533          * The {@link HotwordDetectionService} may use this instead of using
534          * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio.
535          * This may prevent added latency caused by the streaming buffer (see
536          * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this
537          * initial chunk of audio.
538          * </p>
539          */
540         @UnsupportedAppUsage
541         @NonNull
setInitialAudio(@onNull byte[] value)542         public Builder setInitialAudio(@NonNull byte[] value) {
543             checkNotUsed();
544             mBuilderFieldsSet |= 0x10;
545             mInitialAudio = value;
546             return this;
547         }
548 
549         /** Builds the instance. This builder should not be touched after calling this! */
550         @UnsupportedAppUsage
551         @NonNull
build()552         public HotwordAudioStream build() {
553             checkNotUsed();
554             mBuilderFieldsSet |= 0x20; // Mark builder used
555 
556             if ((mBuilderFieldsSet & 0x4) == 0) {
557                 mTimestamp = defaultTimestamp();
558             }
559             if ((mBuilderFieldsSet & 0x8) == 0) {
560                 mMetadata = defaultMetadata();
561             }
562             if ((mBuilderFieldsSet & 0x10) == 0) {
563                 mInitialAudio = defaultInitialAudio();
564             }
565             HotwordAudioStream o = new HotwordAudioStream(
566                     mAudioFormat,
567                     mAudioStreamParcelFileDescriptor,
568                     mTimestamp,
569                     mMetadata,
570                     mInitialAudio);
571             return o;
572         }
573 
checkNotUsed()574         private void checkNotUsed() {
575             if ((mBuilderFieldsSet & 0x20) != 0) {
576                 throw new IllegalStateException(
577                         "This Builder should not be reused. Use a new Builder instance instead");
578             }
579         }
580     }
581 }
582