1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.service.voice; 18 19 import android.annotation.NonNull; 20 import android.annotation.Nullable; 21 import android.compat.annotation.UnsupportedAppUsage; 22 import android.media.AudioFormat; 23 import android.media.AudioRecord; 24 import android.media.AudioTimestamp; 25 import android.os.Parcel; 26 import android.os.ParcelFileDescriptor; 27 import android.os.Parcelable; 28 import android.os.PersistableBundle; 29 30 import java.util.Arrays; 31 import java.util.Objects; 32 33 /** 34 * Represents an audio stream supporting the hotword detection. 35 * 36 * @hide 37 */ 38 public final class HotwordAudioStream implements Parcelable { 39 40 /** 41 * Key for int value to be read from {@link #getMetadata()}. The value is read by the system and 42 * is the length (in bytes) of the byte buffers created to copy bytes in the 43 * {@link #getAudioStreamParcelFileDescriptor()} written by the {@link HotwordDetectionService}. 44 * The buffer length should be chosen such that no additional latency is introduced. Typically, 45 * this should be <em>at least</em> the size of byte chunks written by the 46 * {@link HotwordDetectionService}. 47 * 48 * <p>If no value specified in the metadata for the buffer length, or if the value is less than 49 * 1, or if it is greater than 65,536, or if it is not an int, the default value of 2,560 will 50 * be used.</p> 51 */ 52 public static final String KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES = 53 "android.service.voice.key.AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES"; 54 55 /** 56 * The {@link AudioFormat} of the audio stream. 57 */ 58 @NonNull 59 @UnsupportedAppUsage 60 private final AudioFormat mAudioFormat; 61 62 /** 63 * This stream typically starts with the audio bytes used for hotword detection, but continues 64 * streaming the audio (e.g., with the query) until the stream is shutdown by the 65 * {@link HotwordDetectionService}. The data format is expected to match 66 * {@link #getAudioFormat()}. 67 * 68 * <p> 69 * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()} 70 * to pass the start of the audio instead of streaming it here. This may prevent added latency 71 * caused by the streaming buffer (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not 72 * being large enough to handle this initial chunk of audio. 73 * </p> 74 */ 75 @NonNull 76 @UnsupportedAppUsage 77 private final ParcelFileDescriptor mAudioStreamParcelFileDescriptor; 78 79 /** 80 * The timestamp when the audio stream was captured by the Audio platform. 81 * 82 * <p> 83 * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying 84 * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this 85 * field by {@link AudioRecord#getTimestamp}. 86 * </p> 87 * 88 * <p> 89 * This timestamp can be used in conjunction with the 90 * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and 91 * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to 92 * timestamps. 93 * </p> 94 * 95 * @see #getAudioStreamParcelFileDescriptor() 96 */ 97 @Nullable 98 @UnsupportedAppUsage 99 private final AudioTimestamp mTimestamp; 100 defaultTimestamp()101 private static AudioTimestamp defaultTimestamp() { 102 return null; 103 } 104 105 /** 106 * The metadata associated with the audio stream. 107 */ 108 @NonNull 109 @UnsupportedAppUsage 110 private final PersistableBundle mMetadata; 111 defaultMetadata()112 private static PersistableBundle defaultMetadata() { 113 return new PersistableBundle(); 114 } 115 timestampToString()116 private String timestampToString() { 117 if (mTimestamp == null) { 118 return ""; 119 } 120 return "TimeStamp:" 121 + " framePos=" + mTimestamp.framePosition 122 + " nanoTime=" + mTimestamp.nanoTime; 123 } 124 parcelTimestamp(Parcel dest, int flags)125 private void parcelTimestamp(Parcel dest, int flags) { 126 if (mTimestamp != null) { 127 // mTimestamp is not null, we write it to the parcel, set true. 128 dest.writeBoolean(true); 129 dest.writeLong(mTimestamp.framePosition); 130 dest.writeLong(mTimestamp.nanoTime); 131 } else { 132 // mTimestamp is null, we don't write any value out, set false. 133 dest.writeBoolean(false); 134 } 135 } 136 137 @Nullable unparcelTimestamp(Parcel in)138 private static AudioTimestamp unparcelTimestamp(Parcel in) { 139 // If it is true, it means we wrote the value to the parcel before, parse it. 140 // Otherwise, return null. 141 if (in.readBoolean()) { 142 final AudioTimestamp timeStamp = new AudioTimestamp(); 143 timeStamp.framePosition = in.readLong(); 144 timeStamp.nanoTime = in.readLong(); 145 return timeStamp; 146 } else { 147 return null; 148 } 149 } 150 151 /** 152 * The start of the audio used for hotword detection. The data format is expected to match 153 * {@link #getAudioFormat()}. 154 * 155 * <p> 156 * The {@link HotwordDetectionService} may use this instead of using 157 * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio. This 158 * may prevent added latency caused by the streaming buffer (see 159 * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this 160 * initial chunk of audio. 161 * </p> 162 */ 163 @NonNull 164 @UnsupportedAppUsage 165 private final byte[] mInitialAudio; 166 167 private static final byte[] DEFAULT_INITIAL_EMPTY_AUDIO = {}; 168 defaultInitialAudio()169 private static byte[] defaultInitialAudio() { 170 return DEFAULT_INITIAL_EMPTY_AUDIO; 171 } 172 initialAudioToString()173 private String initialAudioToString() { 174 return "length=" + mInitialAudio.length; 175 } 176 177 /** 178 * Provides an instance of {@link Builder} with state corresponding to this instance. 179 * @hide 180 */ buildUpon()181 public Builder buildUpon() { 182 return new Builder(mAudioFormat, mAudioStreamParcelFileDescriptor) 183 .setTimestamp(mTimestamp) 184 .setMetadata(mMetadata) 185 .setInitialAudio(mInitialAudio); 186 } 187 188 /* package-private */ HotwordAudioStream( @onNull AudioFormat audioFormat, @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor, @Nullable AudioTimestamp timestamp, @NonNull PersistableBundle metadata, @NonNull byte[] initialAudio)189 HotwordAudioStream( 190 @NonNull AudioFormat audioFormat, 191 @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor, 192 @Nullable AudioTimestamp timestamp, 193 @NonNull PersistableBundle metadata, 194 @NonNull byte[] initialAudio) { 195 this.mAudioFormat = audioFormat; 196 com.android.internal.util.AnnotationValidations.validate( 197 NonNull.class, null, mAudioFormat); 198 this.mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor; 199 com.android.internal.util.AnnotationValidations.validate( 200 NonNull.class, null, mAudioStreamParcelFileDescriptor); 201 this.mTimestamp = timestamp; 202 this.mMetadata = metadata; 203 com.android.internal.util.AnnotationValidations.validate( 204 NonNull.class, null, mMetadata); 205 this.mInitialAudio = initialAudio; 206 com.android.internal.util.AnnotationValidations.validate( 207 NonNull.class, null, mInitialAudio); 208 209 // onConstructed(); // You can define this method to get a callback 210 } 211 212 /** 213 * The {@link AudioFormat} of the audio stream. 214 */ 215 @UnsupportedAppUsage 216 @NonNull getAudioFormat()217 public AudioFormat getAudioFormat() { 218 return mAudioFormat; 219 } 220 221 /** 222 * This stream typically starts with the audio bytes used for hotword detection, but continues 223 * streaming the audio (e.g., with the query) until the stream is shutdown by the 224 * {@link HotwordDetectionService}. The data format is expected to match 225 * {@link #getAudioFormat()}. 226 * 227 * <p> 228 * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()} 229 * to pass the start of the audio instead of streaming it here. This may prevent added latency 230 * caused by the streaming buffer (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not 231 * being large enough to handle this initial chunk of audio. 232 * </p> 233 */ 234 @UnsupportedAppUsage 235 @NonNull getAudioStreamParcelFileDescriptor()236 public ParcelFileDescriptor getAudioStreamParcelFileDescriptor() { 237 return mAudioStreamParcelFileDescriptor; 238 } 239 240 /** 241 * The timestamp when the audio stream was captured by the Audio platform. 242 * 243 * <p> 244 * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying 245 * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this 246 * field by {@link AudioRecord#getTimestamp}. 247 * </p> 248 * 249 * <p> 250 * This timestamp can be used in conjunction with the 251 * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and 252 * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to 253 * timestamps. 254 * </p> 255 * 256 * @see #getAudioStreamParcelFileDescriptor() 257 */ 258 @UnsupportedAppUsage 259 @Nullable getTimestamp()260 public AudioTimestamp getTimestamp() { 261 return mTimestamp; 262 } 263 264 /** 265 * The metadata associated with the audio stream. 266 */ 267 @UnsupportedAppUsage 268 @NonNull getMetadata()269 public PersistableBundle getMetadata() { 270 return mMetadata; 271 } 272 273 /** 274 * The start of the audio used for hotword detection. The data format is expected to match 275 * {@link #getAudioFormat()}. 276 * 277 * <p> 278 * The {@link HotwordDetectionService} may use this instead of using 279 * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio. This 280 * may prevent added latency caused by the streaming buffer (see 281 * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this 282 * initial chunk of audio. 283 * </p> 284 */ 285 @UnsupportedAppUsage 286 @NonNull getInitialAudio()287 public byte[] getInitialAudio() { 288 return mInitialAudio; 289 } 290 291 @Override toString()292 public String toString() { 293 // You can override field toString logic by defining methods like: 294 // String fieldNameToString() { ... } 295 296 return "HotwordAudioStream { " 297 + "audioFormat = " + mAudioFormat + ", " 298 + "audioStreamParcelFileDescriptor = " + mAudioStreamParcelFileDescriptor + ", " 299 + "timestamp = " + timestampToString() + ", " 300 + "metadata = " + mMetadata + ", " 301 + "initialAudio = " + initialAudioToString() + " }"; 302 } 303 304 @Override equals(@ullable Object o)305 public boolean equals(@Nullable Object o) { 306 // You can override field equality logic by defining either of the methods like: 307 // boolean fieldNameEquals(HotwordAudioStream other) { ... } 308 // boolean fieldNameEquals(FieldType otherValue) { ... } 309 310 if (this == o) return true; 311 if (o == null || getClass() != o.getClass()) return false; 312 @SuppressWarnings("unchecked") 313 HotwordAudioStream that = (HotwordAudioStream) o; 314 //noinspection PointlessBooleanExpression 315 return Objects.equals(mAudioFormat, that.mAudioFormat) 316 && Objects.equals(mAudioStreamParcelFileDescriptor, 317 that.mAudioStreamParcelFileDescriptor) 318 && Objects.equals(mTimestamp, that.mTimestamp) 319 && Objects.equals(mMetadata, that.mMetadata) 320 && Arrays.equals(mInitialAudio, that.mInitialAudio); 321 } 322 323 @Override hashCode()324 public int hashCode() { 325 // You can override field hashCode logic by defining methods like: 326 // int fieldNameHashCode() { ... } 327 328 int _hash = 1; 329 _hash = 31 * _hash + Objects.hashCode(mAudioFormat); 330 _hash = 31 * _hash + Objects.hashCode(mAudioStreamParcelFileDescriptor); 331 _hash = 31 * _hash + Objects.hashCode(mTimestamp); 332 _hash = 31 * _hash + Objects.hashCode(mMetadata); 333 _hash = 31 * _hash + Arrays.hashCode(mInitialAudio); 334 return _hash; 335 } 336 337 @Override writeToParcel(@onNull Parcel dest, int flags)338 public void writeToParcel(@NonNull Parcel dest, int flags) { 339 // You can override field parcelling by defining methods like: 340 // void parcelFieldName(Parcel dest, int flags) { ... } 341 342 byte flg = 0; 343 if (mTimestamp != null) flg |= 0x4; 344 dest.writeByte(flg); 345 dest.writeTypedObject(mAudioFormat, flags); 346 dest.writeTypedObject(mAudioStreamParcelFileDescriptor, flags); 347 parcelTimestamp(dest, flags); 348 dest.writeTypedObject(mMetadata, flags); 349 dest.writeByteArray(mInitialAudio); 350 } 351 352 @Override describeContents()353 public int describeContents() { 354 return 0; 355 } 356 357 /** @hide */ 358 @SuppressWarnings({"unchecked", "RedundantCast"}) 359 /* package-private */ HotwordAudioStream(@onNull Parcel in)360 HotwordAudioStream(@NonNull Parcel in) { 361 // You can override field unparcelling by defining methods like: 362 // static FieldType unparcelFieldName(Parcel in) { ... } 363 364 byte flg = in.readByte(); 365 AudioFormat audioFormat = (AudioFormat) in.readTypedObject(AudioFormat.CREATOR); 366 ParcelFileDescriptor audioStreamParcelFileDescriptor = 367 (ParcelFileDescriptor) in.readTypedObject(ParcelFileDescriptor.CREATOR); 368 AudioTimestamp timestamp = unparcelTimestamp(in); 369 PersistableBundle metadata = (PersistableBundle) in.readTypedObject( 370 PersistableBundle.CREATOR); 371 byte[] initialAudio = in.createByteArray(); 372 373 this.mAudioFormat = audioFormat; 374 com.android.internal.util.AnnotationValidations.validate( 375 NonNull.class, null, mAudioFormat); 376 this.mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor; 377 com.android.internal.util.AnnotationValidations.validate( 378 NonNull.class, null, mAudioStreamParcelFileDescriptor); 379 this.mTimestamp = timestamp; 380 this.mMetadata = metadata; 381 com.android.internal.util.AnnotationValidations.validate( 382 NonNull.class, null, mMetadata); 383 this.mInitialAudio = initialAudio; 384 com.android.internal.util.AnnotationValidations.validate( 385 NonNull.class, null, mInitialAudio); 386 387 // onConstructed(); // You can define this method to get a callback 388 } 389 390 @NonNull 391 public static final Parcelable.Creator<HotwordAudioStream> CREATOR = 392 new Parcelable.Creator<HotwordAudioStream>() { 393 @Override 394 public HotwordAudioStream[] newArray(int size) { 395 return new HotwordAudioStream[size]; 396 } 397 398 @Override 399 public HotwordAudioStream createFromParcel(@NonNull Parcel in) { 400 return new HotwordAudioStream(in); 401 } 402 }; 403 404 /** 405 * A builder for {@link HotwordAudioStream} 406 */ 407 @SuppressWarnings("WeakerAccess") 408 public static final class Builder { 409 410 @NonNull 411 private AudioFormat mAudioFormat; 412 @NonNull 413 private ParcelFileDescriptor mAudioStreamParcelFileDescriptor; 414 @Nullable 415 private AudioTimestamp mTimestamp; 416 @NonNull 417 private PersistableBundle mMetadata; 418 @NonNull 419 private byte[] mInitialAudio; 420 421 private long mBuilderFieldsSet = 0L; 422 423 /** 424 * Creates a new Builder. 425 * 426 * @param audioFormat 427 * The {@link AudioFormat} of the audio stream. 428 * @param audioStreamParcelFileDescriptor 429 * This stream typically starts with the audio bytes used for hotword detection, but 430 * continues streaming the audio (e.g., with the query) until the stream is shutdown by 431 * the {@link HotwordDetectionService}. The data format is expected to match 432 * {@link #getAudioFormat()}. 433 * 434 * <p> 435 * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()} 436 * to pass the start of the audio instead of streaming it here. This may prevent added 437 * latency caused by the streaming buffer 438 * (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to 439 * handle this initial chunk of audio. 440 * </p> 441 */ 442 @UnsupportedAppUsage Builder( @onNull AudioFormat audioFormat, @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor)443 public Builder( 444 @NonNull AudioFormat audioFormat, 445 @NonNull ParcelFileDescriptor audioStreamParcelFileDescriptor) { 446 mAudioFormat = audioFormat; 447 com.android.internal.util.AnnotationValidations.validate( 448 NonNull.class, null, mAudioFormat); 449 mAudioStreamParcelFileDescriptor = audioStreamParcelFileDescriptor; 450 com.android.internal.util.AnnotationValidations.validate( 451 NonNull.class, null, mAudioStreamParcelFileDescriptor); 452 } 453 454 /** 455 * The {@link AudioFormat} of the audio stream. 456 */ 457 @UnsupportedAppUsage 458 @NonNull setAudioFormat(@onNull AudioFormat value)459 public Builder setAudioFormat(@NonNull AudioFormat value) { 460 checkNotUsed(); 461 mBuilderFieldsSet |= 0x1; 462 mAudioFormat = value; 463 return this; 464 } 465 466 /** 467 * This stream typically starts with the audio bytes used for hotword detection, but 468 * continues streaming the audio (e.g., with the query) until the stream is shutdown by the 469 * {@link HotwordDetectionService}. The data format is expected to match 470 * {@link #getAudioFormat()}. 471 * 472 * <p> 473 * Alternatively, the {@link HotwordDetectionService} may use {@link #getInitialAudio()} 474 * to pass the start of the audio instead of streaming it here. This may prevent added 475 * latency caused by the streaming buffer 476 * (see {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle 477 * this initial chunk of audio. 478 * </p> 479 */ 480 @UnsupportedAppUsage 481 @NonNull setAudioStreamParcelFileDescriptor(@onNull ParcelFileDescriptor value)482 public Builder setAudioStreamParcelFileDescriptor(@NonNull ParcelFileDescriptor value) { 483 checkNotUsed(); 484 mBuilderFieldsSet |= 0x2; 485 mAudioStreamParcelFileDescriptor = value; 486 return this; 487 } 488 489 /** 490 * The timestamp when the audio stream was captured by the Audio platform. 491 * 492 * <p> 493 * The {@link HotwordDetectionService} egressing the audio is the owner of the underlying 494 * AudioRecord. The {@link HotwordDetectionService} is expected to optionally populate this 495 * field by {@link AudioRecord#getTimestamp}. 496 * </p> 497 * 498 * <p> 499 * This timestamp can be used in conjunction with the 500 * {@link HotwordDetectedResult#getHotwordOffsetMillis()} and 501 * {@link HotwordDetectedResult#getHotwordDurationMillis()} to translate these durations to 502 * timestamps. 503 * </p> 504 * 505 * @see #getAudioStreamParcelFileDescriptor() 506 */ 507 @UnsupportedAppUsage 508 @NonNull setTimestamp(@onNull AudioTimestamp value)509 public Builder setTimestamp(@NonNull AudioTimestamp value) { 510 checkNotUsed(); 511 mBuilderFieldsSet |= 0x4; 512 mTimestamp = value; 513 return this; 514 } 515 516 /** 517 * The metadata associated with the audio stream. 518 */ 519 @UnsupportedAppUsage 520 @NonNull setMetadata(@onNull PersistableBundle value)521 public Builder setMetadata(@NonNull PersistableBundle value) { 522 checkNotUsed(); 523 mBuilderFieldsSet |= 0x8; 524 mMetadata = value; 525 return this; 526 } 527 528 /** 529 * The start of the audio used for hotword detection. The data format is expected to match 530 * {@link #getAudioFormat()}. 531 * 532 * <p> 533 * The {@link HotwordDetectionService} may use this instead of using 534 * {@link #getAudioStreamParcelFileDescriptor()} to stream these initial bytes of audio. 535 * This may prevent added latency caused by the streaming buffer (see 536 * {@link #KEY_AUDIO_STREAM_COPY_BUFFER_LENGTH_BYTES}) not being large enough to handle this 537 * initial chunk of audio. 538 * </p> 539 */ 540 @UnsupportedAppUsage 541 @NonNull setInitialAudio(@onNull byte[] value)542 public Builder setInitialAudio(@NonNull byte[] value) { 543 checkNotUsed(); 544 mBuilderFieldsSet |= 0x10; 545 mInitialAudio = value; 546 return this; 547 } 548 549 /** Builds the instance. This builder should not be touched after calling this! */ 550 @UnsupportedAppUsage 551 @NonNull build()552 public HotwordAudioStream build() { 553 checkNotUsed(); 554 mBuilderFieldsSet |= 0x20; // Mark builder used 555 556 if ((mBuilderFieldsSet & 0x4) == 0) { 557 mTimestamp = defaultTimestamp(); 558 } 559 if ((mBuilderFieldsSet & 0x8) == 0) { 560 mMetadata = defaultMetadata(); 561 } 562 if ((mBuilderFieldsSet & 0x10) == 0) { 563 mInitialAudio = defaultInitialAudio(); 564 } 565 HotwordAudioStream o = new HotwordAudioStream( 566 mAudioFormat, 567 mAudioStreamParcelFileDescriptor, 568 mTimestamp, 569 mMetadata, 570 mInitialAudio); 571 return o; 572 } 573 checkNotUsed()574 private void checkNotUsed() { 575 if ((mBuilderFieldsSet & 0x20) != 0) { 576 throw new IllegalStateException( 577 "This Builder should not be reused. Use a new Builder instance instead"); 578 } 579 } 580 } 581 } 582