1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.android.exoplayer2.audio; 17 18 import androidx.annotation.IntDef; 19 import com.google.android.exoplayer2.C; 20 import com.google.android.exoplayer2.util.Util; 21 import java.lang.annotation.Documented; 22 import java.lang.annotation.Retention; 23 import java.lang.annotation.RetentionPolicy; 24 import java.nio.ByteBuffer; 25 26 /** 27 * An {@link AudioProcessor} that skips silence in the input stream. Input and output are 16-bit 28 * PCM. 29 */ 30 public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor { 31 32 /** 33 * The minimum duration of audio that must be below {@link #SILENCE_THRESHOLD_LEVEL} to classify 34 * that part of audio as silent, in microseconds. 35 */ 36 private static final long MINIMUM_SILENCE_DURATION_US = 150_000; 37 /** 38 * The duration of silence by which to extend non-silent sections, in microseconds. The value must 39 * not exceed {@link #MINIMUM_SILENCE_DURATION_US}. 40 */ 41 private static final long PADDING_SILENCE_US = 20_000; 42 /** 43 * The absolute level below which an individual PCM sample is classified as silent. Note: the 44 * specified value will be rounded so that the threshold check only depends on the more 45 * significant byte, for efficiency. 46 */ 47 private static final short SILENCE_THRESHOLD_LEVEL = 1024; 48 49 /** 50 * Threshold for classifying an individual PCM sample as silent based on its more significant 51 * byte. This is {@link #SILENCE_THRESHOLD_LEVEL} divided by 256 with rounding. 52 */ 53 private static final byte SILENCE_THRESHOLD_LEVEL_MSB = (SILENCE_THRESHOLD_LEVEL + 128) >> 8; 54 55 /** Trimming states. */ 56 @Documented 57 @Retention(RetentionPolicy.SOURCE) 58 @IntDef({ 59 STATE_NOISY, 60 STATE_MAYBE_SILENT, 61 STATE_SILENT, 62 }) 63 private @interface State {} 64 /** State when the input is not silent. */ 65 private static final int STATE_NOISY = 0; 66 /** State when the input may be silent but we haven't read enough yet to know. */ 67 private static final int STATE_MAYBE_SILENT = 1; 68 /** State when the input is silent. */ 69 private static final int STATE_SILENT = 2; 70 71 private int bytesPerFrame; 72 73 private boolean enabled; 74 75 /** 76 * Buffers audio data that may be classified as silence while in {@link #STATE_MAYBE_SILENT}. If 77 * the input becomes noisy before the buffer has filled, it will be output. Otherwise, the buffer 78 * contents will be dropped and the state will transition to {@link #STATE_SILENT}. 79 */ 80 private byte[] maybeSilenceBuffer; 81 82 /** 83 * Stores the latest part of the input while silent. It will be output as padding if the next 84 * input is noisy. 85 */ 86 private byte[] paddingBuffer; 87 88 @State private int state; 89 private int maybeSilenceBufferSize; 90 private int paddingSize; 91 private boolean hasOutputNoise; 92 private long skippedFrames; 93 94 /** Creates a new silence trimming audio processor. */ SilenceSkippingAudioProcessor()95 public SilenceSkippingAudioProcessor() { 96 maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY; 97 paddingBuffer = Util.EMPTY_BYTE_ARRAY; 98 } 99 100 /** 101 * Sets whether to skip silence in the input. This method may only be called after draining data 102 * through the processor. The value returned by {@link #isActive()} may change, and the processor 103 * must be {@link #flush() flushed} before queueing more data. 104 * 105 * @param enabled Whether to skip silence in the input. 106 */ setEnabled(boolean enabled)107 public void setEnabled(boolean enabled) { 108 this.enabled = enabled; 109 } 110 111 /** 112 * Returns the total number of frames of input audio that were skipped due to being classified as 113 * silence since the last call to {@link #flush()}. 114 */ getSkippedFrames()115 public long getSkippedFrames() { 116 return skippedFrames; 117 } 118 119 // AudioProcessor implementation. 120 121 @Override onConfigure(AudioFormat inputAudioFormat)122 public AudioFormat onConfigure(AudioFormat inputAudioFormat) 123 throws UnhandledAudioFormatException { 124 if (inputAudioFormat.encoding != C.ENCODING_PCM_16BIT) { 125 throw new UnhandledAudioFormatException(inputAudioFormat); 126 } 127 return enabled ? inputAudioFormat : AudioFormat.NOT_SET; 128 } 129 130 @Override isActive()131 public boolean isActive() { 132 return enabled; 133 } 134 135 @Override queueInput(ByteBuffer inputBuffer)136 public void queueInput(ByteBuffer inputBuffer) { 137 while (inputBuffer.hasRemaining() && !hasPendingOutput()) { 138 switch (state) { 139 case STATE_NOISY: 140 processNoisy(inputBuffer); 141 break; 142 case STATE_MAYBE_SILENT: 143 processMaybeSilence(inputBuffer); 144 break; 145 case STATE_SILENT: 146 processSilence(inputBuffer); 147 break; 148 default: 149 throw new IllegalStateException(); 150 } 151 } 152 } 153 154 @Override onQueueEndOfStream()155 protected void onQueueEndOfStream() { 156 if (maybeSilenceBufferSize > 0) { 157 // We haven't received enough silence to transition to the silent state, so output the buffer. 158 output(maybeSilenceBuffer, maybeSilenceBufferSize); 159 } 160 if (!hasOutputNoise) { 161 skippedFrames += paddingSize / bytesPerFrame; 162 } 163 } 164 165 @Override onFlush()166 protected void onFlush() { 167 if (enabled) { 168 bytesPerFrame = inputAudioFormat.bytesPerFrame; 169 int maybeSilenceBufferSize = durationUsToFrames(MINIMUM_SILENCE_DURATION_US) * bytesPerFrame; 170 if (maybeSilenceBuffer.length != maybeSilenceBufferSize) { 171 maybeSilenceBuffer = new byte[maybeSilenceBufferSize]; 172 } 173 paddingSize = durationUsToFrames(PADDING_SILENCE_US) * bytesPerFrame; 174 if (paddingBuffer.length != paddingSize) { 175 paddingBuffer = new byte[paddingSize]; 176 } 177 } 178 state = STATE_NOISY; 179 skippedFrames = 0; 180 maybeSilenceBufferSize = 0; 181 hasOutputNoise = false; 182 } 183 184 @Override onReset()185 protected void onReset() { 186 enabled = false; 187 paddingSize = 0; 188 maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY; 189 paddingBuffer = Util.EMPTY_BYTE_ARRAY; 190 } 191 192 // Internal methods. 193 194 /** 195 * Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY}, 196 * updating the state if needed. 197 */ processNoisy(ByteBuffer inputBuffer)198 private void processNoisy(ByteBuffer inputBuffer) { 199 int limit = inputBuffer.limit(); 200 201 // Check if there's any noise within the maybe silence buffer duration. 202 inputBuffer.limit(Math.min(limit, inputBuffer.position() + maybeSilenceBuffer.length)); 203 int noiseLimit = findNoiseLimit(inputBuffer); 204 if (noiseLimit == inputBuffer.position()) { 205 // The buffer contains the start of possible silence. 206 state = STATE_MAYBE_SILENT; 207 } else { 208 inputBuffer.limit(noiseLimit); 209 output(inputBuffer); 210 } 211 212 // Restore the limit. 213 inputBuffer.limit(limit); 214 } 215 216 /** 217 * Incrementally processes new input from {@code inputBuffer} while in {@link 218 * #STATE_MAYBE_SILENT}, updating the state if needed. 219 */ processMaybeSilence(ByteBuffer inputBuffer)220 private void processMaybeSilence(ByteBuffer inputBuffer) { 221 int limit = inputBuffer.limit(); 222 int noisePosition = findNoisePosition(inputBuffer); 223 int maybeSilenceInputSize = noisePosition - inputBuffer.position(); 224 int maybeSilenceBufferRemaining = maybeSilenceBuffer.length - maybeSilenceBufferSize; 225 if (noisePosition < limit && maybeSilenceInputSize < maybeSilenceBufferRemaining) { 226 // The maybe silence buffer isn't full, so output it and switch back to the noisy state. 227 output(maybeSilenceBuffer, maybeSilenceBufferSize); 228 maybeSilenceBufferSize = 0; 229 state = STATE_NOISY; 230 } else { 231 // Fill as much of the maybe silence buffer as possible. 232 int bytesToWrite = Math.min(maybeSilenceInputSize, maybeSilenceBufferRemaining); 233 inputBuffer.limit(inputBuffer.position() + bytesToWrite); 234 inputBuffer.get(maybeSilenceBuffer, maybeSilenceBufferSize, bytesToWrite); 235 maybeSilenceBufferSize += bytesToWrite; 236 if (maybeSilenceBufferSize == maybeSilenceBuffer.length) { 237 // We've reached a period of silence, so skip it, taking in to account padding for both 238 // the noisy to silent transition and any future silent to noisy transition. 239 if (hasOutputNoise) { 240 output(maybeSilenceBuffer, paddingSize); 241 skippedFrames += (maybeSilenceBufferSize - paddingSize * 2) / bytesPerFrame; 242 } else { 243 skippedFrames += (maybeSilenceBufferSize - paddingSize) / bytesPerFrame; 244 } 245 updatePaddingBuffer(inputBuffer, maybeSilenceBuffer, maybeSilenceBufferSize); 246 maybeSilenceBufferSize = 0; 247 state = STATE_SILENT; 248 } 249 250 // Restore the limit. 251 inputBuffer.limit(limit); 252 } 253 } 254 255 /** 256 * Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_SILENT}, 257 * updating the state if needed. 258 */ processSilence(ByteBuffer inputBuffer)259 private void processSilence(ByteBuffer inputBuffer) { 260 int limit = inputBuffer.limit(); 261 int noisyPosition = findNoisePosition(inputBuffer); 262 inputBuffer.limit(noisyPosition); 263 skippedFrames += inputBuffer.remaining() / bytesPerFrame; 264 updatePaddingBuffer(inputBuffer, paddingBuffer, paddingSize); 265 if (noisyPosition < limit) { 266 // Output the padding, which may include previous input as well as new input, then transition 267 // back to the noisy state. 268 output(paddingBuffer, paddingSize); 269 state = STATE_NOISY; 270 271 // Restore the limit. 272 inputBuffer.limit(limit); 273 } 274 } 275 276 /** 277 * Copies {@code length} elements from {@code data} to populate a new output buffer from the 278 * processor. 279 */ output(byte[] data, int length)280 private void output(byte[] data, int length) { 281 replaceOutputBuffer(length).put(data, 0, length).flip(); 282 if (length > 0) { 283 hasOutputNoise = true; 284 } 285 } 286 287 /** 288 * Copies remaining bytes from {@code data} to populate a new output buffer from the processor. 289 */ output(ByteBuffer data)290 private void output(ByteBuffer data) { 291 int length = data.remaining(); 292 replaceOutputBuffer(length).put(data).flip(); 293 if (length > 0) { 294 hasOutputNoise = true; 295 } 296 } 297 298 /** 299 * Fills {@link #paddingBuffer} using data from {@code input}, plus any additional buffered data 300 * at the end of {@code buffer} (up to its {@code size}) required to fill it, advancing the input 301 * position. 302 */ updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size)303 private void updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size) { 304 int fromInputSize = Math.min(input.remaining(), paddingSize); 305 int fromBufferSize = paddingSize - fromInputSize; 306 System.arraycopy( 307 /* src= */ buffer, 308 /* srcPos= */ size - fromBufferSize, 309 /* dest= */ paddingBuffer, 310 /* destPos= */ 0, 311 /* length= */ fromBufferSize); 312 input.position(input.limit() - fromInputSize); 313 input.get(paddingBuffer, fromBufferSize, fromInputSize); 314 } 315 316 /** 317 * Returns the number of input frames corresponding to {@code durationUs} microseconds of audio. 318 */ durationUsToFrames(long durationUs)319 private int durationUsToFrames(long durationUs) { 320 return (int) ((durationUs * inputAudioFormat.sampleRate) / C.MICROS_PER_SECOND); 321 } 322 323 /** 324 * Returns the earliest byte position in [position, limit) of {@code buffer} that contains a frame 325 * classified as a noisy frame, or the limit of the buffer if no such frame exists. 326 */ findNoisePosition(ByteBuffer buffer)327 private int findNoisePosition(ByteBuffer buffer) { 328 // The input is in ByteOrder.nativeOrder(), which is little endian on Android. 329 for (int i = buffer.position() + 1; i < buffer.limit(); i += 2) { 330 if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) { 331 // Round to the start of the frame. 332 return bytesPerFrame * (i / bytesPerFrame); 333 } 334 } 335 return buffer.limit(); 336 } 337 338 /** 339 * Returns the earliest byte position in [position, limit) of {@code buffer} such that all frames 340 * from the byte position to the limit are classified as silent. 341 */ findNoiseLimit(ByteBuffer buffer)342 private int findNoiseLimit(ByteBuffer buffer) { 343 // The input is in ByteOrder.nativeOrder(), which is little endian on Android. 344 for (int i = buffer.limit() - 1; i >= buffer.position(); i -= 2) { 345 if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) { 346 // Return the start of the next frame. 347 return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame; 348 } 349 } 350 return buffer.position(); 351 } 352 } 353