1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ 12 #define WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ 13 14 #include "webrtc/base/scoped_ptr.h" 15 #include "webrtc/common_audio/audio_ring_buffer.h" 16 #include "webrtc/common_audio/channel_buffer.h" 17 18 namespace webrtc { 19 20 // The callback function to process audio in the time domain. Input has already 21 // been windowed, and output will be windowed. The number of input channels 22 // must be >= the number of output channels. 23 class BlockerCallback { 24 public: ~BlockerCallback()25 virtual ~BlockerCallback() {} 26 27 virtual void ProcessBlock(const float* const* input, 28 size_t num_frames, 29 size_t num_input_channels, 30 size_t num_output_channels, 31 float* const* output) = 0; 32 }; 33 34 // The main purpose of Blocker is to abstract away the fact that often we 35 // receive a different number of audio frames than our transform takes. For 36 // example, most FFTs work best when the fft-size is a power of 2, but suppose 37 // we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames 38 // of audio, which is not a power of 2. Blocker allows us to specify the 39 // transform and all other necessary processing via the Process() callback 40 // function without any constraints on the transform-size 41 // (read: |block_size_|) or received-audio-size (read: |chunk_size_|). 42 // We handle this for the multichannel audio case, allowing for different 43 // numbers of input and output channels (for example, beamforming takes 2 or 44 // more input channels and returns 1 output channel). Audio signals are 45 // represented as deinterleaved floats in the range [-1, 1]. 46 // 47 // Blocker is responsible for: 48 // - blocking audio while handling potential discontinuities on the edges 49 // of chunks 50 // - windowing blocks before sending them to Process() 51 // - windowing processed blocks, and overlap-adding them together before 52 // sending back a processed chunk 53 // 54 // To use blocker: 55 // 1. Impelment a BlockerCallback object |bc|. 56 // 2. Instantiate a Blocker object |b|, passing in |bc|. 57 // 3. As you receive audio, call b.ProcessChunk() to get processed audio. 58 // 59 // A small amount of delay is added to the first received chunk to deal with 60 // the difference in chunk/block sizes. This delay is <= chunk_size. 61 // 62 // Ownership of window is retained by the caller. That is, Blocker makes a 63 // copy of window and does not attempt to delete it. 64 class Blocker { 65 public: 66 Blocker(size_t chunk_size, 67 size_t block_size, 68 size_t num_input_channels, 69 size_t num_output_channels, 70 const float* window, 71 size_t shift_amount, 72 BlockerCallback* callback); 73 74 void ProcessChunk(const float* const* input, 75 size_t chunk_size, 76 size_t num_input_channels, 77 size_t num_output_channels, 78 float* const* output); 79 80 private: 81 const size_t chunk_size_; 82 const size_t block_size_; 83 const size_t num_input_channels_; 84 const size_t num_output_channels_; 85 86 // The number of frames of delay to add at the beginning of the first chunk. 87 const size_t initial_delay_; 88 89 // The frame index into the input buffer where the first block should be read 90 // from. This is necessary because shift_amount_ is not necessarily a 91 // multiple of chunk_size_, so blocks won't line up at the start of the 92 // buffer. 93 size_t frame_offset_; 94 95 // Since blocks nearly always overlap, there are certain blocks that require 96 // frames from the end of one chunk and the beginning of the next chunk. The 97 // input and output buffers are responsible for saving those frames between 98 // calls to ProcessChunk(). 99 // 100 // Both contain |initial delay| + |chunk_size| frames. The input is a fairly 101 // standard FIFO, but due to the overlap-add it's harder to use an 102 // AudioRingBuffer for the output. 103 AudioRingBuffer input_buffer_; 104 ChannelBuffer<float> output_buffer_; 105 106 // Space for the input block (can't wrap because of windowing). 107 ChannelBuffer<float> input_block_; 108 109 // Space for the output block (can't wrap because of overlap/add). 110 ChannelBuffer<float> output_block_; 111 112 rtc::scoped_ptr<float[]> window_; 113 114 // The amount of frames between the start of contiguous blocks. For example, 115 // |shift_amount_| = |block_size_| / 2 for a Hann window. 116 size_t shift_amount_; 117 118 BlockerCallback* callback_; 119 }; 120 121 } // namespace webrtc 122 123 #endif // WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ 124