• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/test/conversational_speech/simulator.h"
12 
13 #include <math.h>
14 
15 #include <algorithm>
16 #include <memory>
17 #include <set>
18 #include <utility>
19 #include <vector>
20 
21 #include "api/array_view.h"
22 #include "common_audio/include/audio_util.h"
23 #include "common_audio/wav_file.h"
24 #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
25 #include "rtc_base/constructor_magic.h"
26 #include "rtc_base/logging.h"
27 #include "rtc_base/numerics/safe_conversions.h"
28 #include "test/testsupport/file_utils.h"
29 
30 namespace webrtc {
31 namespace test {
32 namespace {
33 
34 using conversational_speech::MultiEndCall;
35 using conversational_speech::SpeakerOutputFilePaths;
36 using conversational_speech::WavReaderInterface;
37 
38 // Combines output path and speaker names to define the output file paths for
39 // the near-end and far=end audio tracks.
40 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>>
InitSpeakerOutputFilePaths(const std::set<std::string> & speaker_names,const std::string & output_path)41 InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names,
42                            const std::string& output_path) {
43   // Create map.
44   auto speaker_output_file_paths_map =
45       std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>();
46 
47   // Add near-end and far-end output paths into the map.
48   for (const auto& speaker_name : speaker_names) {
49     const std::string near_end_path =
50         test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav");
51     RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in "
52                         << near_end_path << ".";
53 
54     const std::string far_end_path =
55         test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav");
56     RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in "
57                         << far_end_path << ".";
58 
59     // Add to map.
60     speaker_output_file_paths_map->emplace(
61         std::piecewise_construct, std::forward_as_tuple(speaker_name),
62         std::forward_as_tuple(near_end_path, far_end_path));
63   }
64 
65   return speaker_output_file_paths_map;
66 }
67 
68 // Class that provides one WavWriter for the near-end and one for the far-end
69 // output track of a speaker.
70 class SpeakerWavWriters {
71  public:
SpeakerWavWriters(const SpeakerOutputFilePaths & output_file_paths,int sample_rate)72   SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths,
73                     int sample_rate)
74       : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u),
75         far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {}
near_end_wav_writer()76   WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; }
far_end_wav_writer()77   WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; }
78 
79  private:
80   WavWriter near_end_wav_writer_;
81   WavWriter far_end_wav_writer_;
82 };
83 
84 // Initializes one WavWriter instance for each speaker and both the near-end and
85 // far-end output tracks.
86 std::unique_ptr<std::map<std::string, SpeakerWavWriters>>
InitSpeakersWavWriters(const std::map<std::string,SpeakerOutputFilePaths> & speaker_output_file_paths,int sample_rate)87 InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>&
88                            speaker_output_file_paths,
89                        int sample_rate) {
90   // Create map.
91   auto speaker_wav_writers_map =
92       std::make_unique<std::map<std::string, SpeakerWavWriters>>();
93 
94   // Add SpeakerWavWriters instance into the map.
95   for (auto it = speaker_output_file_paths.begin();
96        it != speaker_output_file_paths.end(); ++it) {
97     speaker_wav_writers_map->emplace(
98         std::piecewise_construct, std::forward_as_tuple(it->first),
99         std::forward_as_tuple(it->second, sample_rate));
100   }
101 
102   return speaker_wav_writers_map;
103 }
104 
105 // Reads all the samples for each audio track.
PreloadAudioTracks(const std::map<std::string,std::unique_ptr<WavReaderInterface>> & audiotrack_readers)106 std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks(
107     const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
108         audiotrack_readers) {
109   // Create map.
110   auto audiotracks_map =
111       std::make_unique<std::map<std::string, std::vector<int16_t>>>();
112 
113   // Add audio track vectors.
114   for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end();
115        ++it) {
116     // Add map entry.
117     audiotracks_map->emplace(std::piecewise_construct,
118                              std::forward_as_tuple(it->first),
119                              std::forward_as_tuple(it->second->NumSamples()));
120 
121     // Read samples.
122     it->second->ReadInt16Samples(audiotracks_map->at(it->first));
123   }
124 
125   return audiotracks_map;
126 }
127 
128 // Writes all the values in |source_samples| via |wav_writer|. If the number of
129 // previously written samples in |wav_writer| is less than |interval_begin|, it
130 // adds zeros as left padding. The padding corresponds to intervals during which
131 // a speaker is not active.
PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples,size_t interval_begin,WavWriter * wav_writer)132 void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples,
133                        size_t interval_begin,
134                        WavWriter* wav_writer) {
135   // Add left padding.
136   RTC_CHECK(wav_writer);
137   RTC_CHECK_GE(interval_begin, wav_writer->num_samples());
138   size_t padding_size = interval_begin - wav_writer->num_samples();
139   if (padding_size != 0) {
140     const std::vector<int16_t> padding(padding_size, 0);
141     wav_writer->WriteSamples(padding.data(), padding_size);
142   }
143 
144   // Write source samples.
145   wav_writer->WriteSamples(source_samples.data(), source_samples.size());
146 }
147 
148 // Appends zeros via |wav_writer|. The number of zeros is always non-negative
149 // and equal to the difference between the previously written samples and
150 // |pad_samples|.
PadRightWrite(WavWriter * wav_writer,size_t pad_samples)151 void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) {
152   RTC_CHECK(wav_writer);
153   RTC_CHECK_GE(pad_samples, wav_writer->num_samples());
154   size_t padding_size = pad_samples - wav_writer->num_samples();
155   if (padding_size != 0) {
156     const std::vector<int16_t> padding(padding_size, 0);
157     wav_writer->WriteSamples(padding.data(), padding_size);
158   }
159 }
160 
ScaleSignal(rtc::ArrayView<const int16_t> source_samples,int gain,rtc::ArrayView<int16_t> output_samples)161 void ScaleSignal(rtc::ArrayView<const int16_t> source_samples,
162                  int gain,
163                  rtc::ArrayView<int16_t> output_samples) {
164   const float gain_linear = DbToRatio(gain);
165   RTC_DCHECK_EQ(source_samples.size(), output_samples.size());
166   std::transform(source_samples.begin(), source_samples.end(),
167                  output_samples.begin(), [gain_linear](int16_t x) -> int16_t {
168                    return rtc::saturated_cast<int16_t>(x * gain_linear);
169                  });
170 }
171 
172 }  // namespace
173 
174 namespace conversational_speech {
175 
Simulate(const MultiEndCall & multiend_call,const std::string & output_path)176 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(
177     const MultiEndCall& multiend_call,
178     const std::string& output_path) {
179   // Set output file paths and initialize wav writers.
180   const auto& speaker_names = multiend_call.speaker_names();
181   auto speaker_output_file_paths =
182       InitSpeakerOutputFilePaths(speaker_names, output_path);
183   auto speakers_wav_writers = InitSpeakersWavWriters(
184       *speaker_output_file_paths, multiend_call.sample_rate());
185 
186   // Preload all the input audio tracks.
187   const auto& audiotrack_readers = multiend_call.audiotrack_readers();
188   auto audiotracks = PreloadAudioTracks(audiotrack_readers);
189 
190   // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end
191   // across the 2 speakers are symmetric; hence, the code below could be
192   // replaced by only creating the near-end or the far-end. However, this would
193   // require to split the unit tests and document the behavior in README.md.
194   // In practice, it should not be an issue since the files are not expected to
195   // be signinificant.
196 
197   // Write near-end and far-end output tracks.
198   for (const auto& speaking_turn : multiend_call.speaking_turns()) {
199     const std::string& active_speaker_name = speaking_turn.speaker_name;
200     const auto source_audiotrack =
201         audiotracks->at(speaking_turn.audiotrack_file_name);
202     std::vector<int16_t> scaled_audiotrack(source_audiotrack.size());
203     ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack);
204 
205     // Write active speaker's chunk to active speaker's near-end.
206     PadLeftWriteChunk(
207         scaled_audiotrack, speaking_turn.begin,
208         speakers_wav_writers->at(active_speaker_name).near_end_wav_writer());
209 
210     // Write active speaker's chunk to other participants' far-ends.
211     for (const std::string& speaker_name : speaker_names) {
212       if (speaker_name == active_speaker_name)
213         continue;
214       PadLeftWriteChunk(
215           scaled_audiotrack, speaking_turn.begin,
216           speakers_wav_writers->at(speaker_name).far_end_wav_writer());
217     }
218   }
219 
220   // Finalize all the output tracks with right padding.
221   // This is required to make all the output tracks duration equal.
222   size_t duration_samples = multiend_call.total_duration_samples();
223   for (const std::string& speaker_name : speaker_names) {
224     PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(),
225                   duration_samples);
226     PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(),
227                   duration_samples);
228   }
229 
230   return speaker_output_file_paths;
231 }
232 
233 }  // namespace conversational_speech
234 }  // namespace test
235 }  // namespace webrtc
236