• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.android.exoplayer2.audio;
17 
18 import androidx.annotation.IntDef;
19 import com.google.android.exoplayer2.C;
20 import com.google.android.exoplayer2.util.Util;
21 import java.lang.annotation.Documented;
22 import java.lang.annotation.Retention;
23 import java.lang.annotation.RetentionPolicy;
24 import java.nio.ByteBuffer;
25 
26 /**
27  * An {@link AudioProcessor} that skips silence in the input stream. Input and output are 16-bit
28  * PCM.
29  */
30 public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
31 
32   /**
33    * The minimum duration of audio that must be below {@link #SILENCE_THRESHOLD_LEVEL} to classify
34    * that part of audio as silent, in microseconds.
35    */
36   private static final long MINIMUM_SILENCE_DURATION_US = 150_000;
37   /**
38    * The duration of silence by which to extend non-silent sections, in microseconds. The value must
39    * not exceed {@link #MINIMUM_SILENCE_DURATION_US}.
40    */
41   private static final long PADDING_SILENCE_US = 20_000;
42   /**
43    * The absolute level below which an individual PCM sample is classified as silent. Note: the
44    * specified value will be rounded so that the threshold check only depends on the more
45    * significant byte, for efficiency.
46    */
47   private static final short SILENCE_THRESHOLD_LEVEL = 1024;
48 
49   /**
50    * Threshold for classifying an individual PCM sample as silent based on its more significant
51    * byte. This is {@link #SILENCE_THRESHOLD_LEVEL} divided by 256 with rounding.
52    */
53   private static final byte SILENCE_THRESHOLD_LEVEL_MSB = (SILENCE_THRESHOLD_LEVEL + 128) >> 8;
54 
55   /** Trimming states. */
56   @Documented
57   @Retention(RetentionPolicy.SOURCE)
58   @IntDef({
59     STATE_NOISY,
60     STATE_MAYBE_SILENT,
61     STATE_SILENT,
62   })
63   private @interface State {}
64   /** State when the input is not silent. */
65   private static final int STATE_NOISY = 0;
66   /** State when the input may be silent but we haven't read enough yet to know. */
67   private static final int STATE_MAYBE_SILENT = 1;
68   /** State when the input is silent. */
69   private static final int STATE_SILENT = 2;
70 
71   private int bytesPerFrame;
72 
73   private boolean enabled;
74 
75   /**
76    * Buffers audio data that may be classified as silence while in {@link #STATE_MAYBE_SILENT}. If
77    * the input becomes noisy before the buffer has filled, it will be output. Otherwise, the buffer
78    * contents will be dropped and the state will transition to {@link #STATE_SILENT}.
79    */
80   private byte[] maybeSilenceBuffer;
81 
82   /**
83    * Stores the latest part of the input while silent. It will be output as padding if the next
84    * input is noisy.
85    */
86   private byte[] paddingBuffer;
87 
88   @State private int state;
89   private int maybeSilenceBufferSize;
90   private int paddingSize;
91   private boolean hasOutputNoise;
92   private long skippedFrames;
93 
94   /** Creates a new silence trimming audio processor. */
SilenceSkippingAudioProcessor()95   public SilenceSkippingAudioProcessor() {
96     maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
97     paddingBuffer = Util.EMPTY_BYTE_ARRAY;
98   }
99 
100   /**
101    * Sets whether to skip silence in the input. This method may only be called after draining data
102    * through the processor. The value returned by {@link #isActive()} may change, and the processor
103    * must be {@link #flush() flushed} before queueing more data.
104    *
105    * @param enabled Whether to skip silence in the input.
106    */
setEnabled(boolean enabled)107   public void setEnabled(boolean enabled) {
108     this.enabled = enabled;
109   }
110 
111   /**
112    * Returns the total number of frames of input audio that were skipped due to being classified as
113    * silence since the last call to {@link #flush()}.
114    */
getSkippedFrames()115   public long getSkippedFrames() {
116     return skippedFrames;
117   }
118 
119   // AudioProcessor implementation.
120 
121   @Override
onConfigure(AudioFormat inputAudioFormat)122   public AudioFormat onConfigure(AudioFormat inputAudioFormat)
123       throws UnhandledAudioFormatException {
124     if (inputAudioFormat.encoding != C.ENCODING_PCM_16BIT) {
125       throw new UnhandledAudioFormatException(inputAudioFormat);
126     }
127     return enabled ? inputAudioFormat : AudioFormat.NOT_SET;
128   }
129 
130   @Override
isActive()131   public boolean isActive() {
132     return enabled;
133   }
134 
135   @Override
queueInput(ByteBuffer inputBuffer)136   public void queueInput(ByteBuffer inputBuffer) {
137     while (inputBuffer.hasRemaining() && !hasPendingOutput()) {
138       switch (state) {
139         case STATE_NOISY:
140           processNoisy(inputBuffer);
141           break;
142         case STATE_MAYBE_SILENT:
143           processMaybeSilence(inputBuffer);
144           break;
145         case STATE_SILENT:
146           processSilence(inputBuffer);
147           break;
148         default:
149           throw new IllegalStateException();
150       }
151     }
152   }
153 
154   @Override
onQueueEndOfStream()155   protected void onQueueEndOfStream() {
156     if (maybeSilenceBufferSize > 0) {
157       // We haven't received enough silence to transition to the silent state, so output the buffer.
158       output(maybeSilenceBuffer, maybeSilenceBufferSize);
159     }
160     if (!hasOutputNoise) {
161       skippedFrames += paddingSize / bytesPerFrame;
162     }
163   }
164 
165   @Override
onFlush()166   protected void onFlush() {
167     if (enabled) {
168       bytesPerFrame = inputAudioFormat.bytesPerFrame;
169       int maybeSilenceBufferSize = durationUsToFrames(MINIMUM_SILENCE_DURATION_US) * bytesPerFrame;
170       if (maybeSilenceBuffer.length != maybeSilenceBufferSize) {
171         maybeSilenceBuffer = new byte[maybeSilenceBufferSize];
172       }
173       paddingSize = durationUsToFrames(PADDING_SILENCE_US) * bytesPerFrame;
174       if (paddingBuffer.length != paddingSize) {
175         paddingBuffer = new byte[paddingSize];
176       }
177     }
178     state = STATE_NOISY;
179     skippedFrames = 0;
180     maybeSilenceBufferSize = 0;
181     hasOutputNoise = false;
182   }
183 
184   @Override
onReset()185   protected void onReset() {
186     enabled = false;
187     paddingSize = 0;
188     maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
189     paddingBuffer = Util.EMPTY_BYTE_ARRAY;
190   }
191 
192   // Internal methods.
193 
194   /**
195    * Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY},
196    * updating the state if needed.
197    */
processNoisy(ByteBuffer inputBuffer)198   private void processNoisy(ByteBuffer inputBuffer) {
199     int limit = inputBuffer.limit();
200 
201     // Check if there's any noise within the maybe silence buffer duration.
202     inputBuffer.limit(Math.min(limit, inputBuffer.position() + maybeSilenceBuffer.length));
203     int noiseLimit = findNoiseLimit(inputBuffer);
204     if (noiseLimit == inputBuffer.position()) {
205       // The buffer contains the start of possible silence.
206       state = STATE_MAYBE_SILENT;
207     } else {
208       inputBuffer.limit(noiseLimit);
209       output(inputBuffer);
210     }
211 
212     // Restore the limit.
213     inputBuffer.limit(limit);
214   }
215 
216   /**
217    * Incrementally processes new input from {@code inputBuffer} while in {@link
218    * #STATE_MAYBE_SILENT}, updating the state if needed.
219    */
processMaybeSilence(ByteBuffer inputBuffer)220   private void processMaybeSilence(ByteBuffer inputBuffer) {
221     int limit = inputBuffer.limit();
222     int noisePosition = findNoisePosition(inputBuffer);
223     int maybeSilenceInputSize = noisePosition - inputBuffer.position();
224     int maybeSilenceBufferRemaining = maybeSilenceBuffer.length - maybeSilenceBufferSize;
225     if (noisePosition < limit && maybeSilenceInputSize < maybeSilenceBufferRemaining) {
226       // The maybe silence buffer isn't full, so output it and switch back to the noisy state.
227       output(maybeSilenceBuffer, maybeSilenceBufferSize);
228       maybeSilenceBufferSize = 0;
229       state = STATE_NOISY;
230     } else {
231       // Fill as much of the maybe silence buffer as possible.
232       int bytesToWrite = Math.min(maybeSilenceInputSize, maybeSilenceBufferRemaining);
233       inputBuffer.limit(inputBuffer.position() + bytesToWrite);
234       inputBuffer.get(maybeSilenceBuffer, maybeSilenceBufferSize, bytesToWrite);
235       maybeSilenceBufferSize += bytesToWrite;
236       if (maybeSilenceBufferSize == maybeSilenceBuffer.length) {
237         // We've reached a period of silence, so skip it, taking in to account padding for both
238         // the noisy to silent transition and any future silent to noisy transition.
239         if (hasOutputNoise) {
240           output(maybeSilenceBuffer, paddingSize);
241           skippedFrames += (maybeSilenceBufferSize - paddingSize * 2) / bytesPerFrame;
242         } else {
243           skippedFrames += (maybeSilenceBufferSize - paddingSize) / bytesPerFrame;
244         }
245         updatePaddingBuffer(inputBuffer, maybeSilenceBuffer, maybeSilenceBufferSize);
246         maybeSilenceBufferSize = 0;
247         state = STATE_SILENT;
248       }
249 
250       // Restore the limit.
251       inputBuffer.limit(limit);
252     }
253   }
254 
255   /**
256    * Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_SILENT},
257    * updating the state if needed.
258    */
processSilence(ByteBuffer inputBuffer)259   private void processSilence(ByteBuffer inputBuffer) {
260     int limit = inputBuffer.limit();
261     int noisyPosition = findNoisePosition(inputBuffer);
262     inputBuffer.limit(noisyPosition);
263     skippedFrames += inputBuffer.remaining() / bytesPerFrame;
264     updatePaddingBuffer(inputBuffer, paddingBuffer, paddingSize);
265     if (noisyPosition < limit) {
266       // Output the padding, which may include previous input as well as new input, then transition
267       // back to the noisy state.
268       output(paddingBuffer, paddingSize);
269       state = STATE_NOISY;
270 
271       // Restore the limit.
272       inputBuffer.limit(limit);
273     }
274   }
275 
276   /**
277    * Copies {@code length} elements from {@code data} to populate a new output buffer from the
278    * processor.
279    */
output(byte[] data, int length)280   private void output(byte[] data, int length) {
281     replaceOutputBuffer(length).put(data, 0, length).flip();
282     if (length > 0) {
283       hasOutputNoise = true;
284     }
285   }
286 
287   /**
288    * Copies remaining bytes from {@code data} to populate a new output buffer from the processor.
289    */
output(ByteBuffer data)290   private void output(ByteBuffer data) {
291     int length = data.remaining();
292     replaceOutputBuffer(length).put(data).flip();
293     if (length > 0) {
294       hasOutputNoise = true;
295     }
296   }
297 
298   /**
299    * Fills {@link #paddingBuffer} using data from {@code input}, plus any additional buffered data
300    * at the end of {@code buffer} (up to its {@code size}) required to fill it, advancing the input
301    * position.
302    */
updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size)303   private void updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size) {
304     int fromInputSize = Math.min(input.remaining(), paddingSize);
305     int fromBufferSize = paddingSize - fromInputSize;
306     System.arraycopy(
307         /* src= */ buffer,
308         /* srcPos= */ size - fromBufferSize,
309         /* dest= */ paddingBuffer,
310         /* destPos= */ 0,
311         /* length= */ fromBufferSize);
312     input.position(input.limit() - fromInputSize);
313     input.get(paddingBuffer, fromBufferSize, fromInputSize);
314   }
315 
316   /**
317    * Returns the number of input frames corresponding to {@code durationUs} microseconds of audio.
318    */
durationUsToFrames(long durationUs)319   private int durationUsToFrames(long durationUs) {
320     return (int) ((durationUs * inputAudioFormat.sampleRate) / C.MICROS_PER_SECOND);
321   }
322 
323   /**
324    * Returns the earliest byte position in [position, limit) of {@code buffer} that contains a frame
325    * classified as a noisy frame, or the limit of the buffer if no such frame exists.
326    */
findNoisePosition(ByteBuffer buffer)327   private int findNoisePosition(ByteBuffer buffer) {
328     // The input is in ByteOrder.nativeOrder(), which is little endian on Android.
329     for (int i = buffer.position() + 1; i < buffer.limit(); i += 2) {
330       if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) {
331         // Round to the start of the frame.
332         return bytesPerFrame * (i / bytesPerFrame);
333       }
334     }
335     return buffer.limit();
336   }
337 
338   /**
339    * Returns the earliest byte position in [position, limit) of {@code buffer} such that all frames
340    * from the byte position to the limit are classified as silent.
341    */
findNoiseLimit(ByteBuffer buffer)342   private int findNoiseLimit(ByteBuffer buffer) {
343     // The input is in ByteOrder.nativeOrder(), which is little endian on Android.
344     for (int i = buffer.limit() - 1; i >= buffer.position(); i -= 2) {
345       if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) {
346         // Return the start of the next frame.
347         return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame;
348       }
349     }
350     return buffer.position();
351   }
352 }
353