1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/renderer/loudspeakers_renderer.h"
13
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <iomanip>
18 #include <sstream>
19 #include <string>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/base/no_destructor.h"
24 #include "absl/log/check.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/str_cat.h"
28 #include "absl/strings/string_view.h"
29 #include "absl/types/span.h"
30 #include "iamf/cli/channel_label.h"
31 #include "iamf/cli/renderer/precomputed_gains.h"
32 #include "iamf/common/utils/macros.h"
33 #include "iamf/common/utils/map_utils.h"
34 #include "iamf/common/utils/validation_utils.h"
35 #include "iamf/obu/audio_element.h"
36 #include "iamf/obu/demixing_info_parameter_data.h"
37 #include "iamf/obu/types.h"
38
39 namespace iamf_tools {
40
41 namespace {
42
ComputeGains(absl::string_view input_layout_string,absl::string_view output_layout_string,const DownMixingParams & down_mixing_params,std::vector<std::vector<double>> & gains)43 absl::Status ComputeGains(absl::string_view input_layout_string,
44 absl::string_view output_layout_string,
45 const DownMixingParams& down_mixing_params,
46 std::vector<std::vector<double>>& gains) {
47 const auto alpha = down_mixing_params.alpha;
48 const auto beta = down_mixing_params.beta;
49 const auto gamma = down_mixing_params.gamma;
50 const auto delta = down_mixing_params.delta;
51 const auto w = down_mixing_params.w;
52 // TODO(b/292174366): Strictly follow IAMF spec logic of when to use demixers
53 // vs. libear renderer.
54 LOG_FIRST_N(INFO, 5)
55 << "Rendering may be buggy or not follow the spec "
56 "recommendations. Computing gains based on demixing params: "
57 << input_layout_string << " --> " << output_layout_string;
58 if (input_layout_string == "4+7+0" && output_layout_string == "3.1.2") {
59 // Values checked; fixed.
60 gains = {{1, 0, 0, 0, 0, 0},
61 {0, 1, 0, 0, 0, 0},
62 {0, 0, 1, 0, 0, 0},
63 {0, 0, 0, 1, 0, 0},
64 // Lss7
65 {alpha * delta, 0, 0, 0, alpha * w * delta, 0},
66 // Rss7
67 {0, alpha * delta, 0, 0, 0, alpha * w * delta},
68 {beta * delta, 0, 0, 0, beta * w * delta, 0},
69 {0, beta * delta, 0, 0, 0, beta * w * delta},
70 {0, 0, 0, 0, 1, 0},
71 {0, 0, 0, 0, 0, 1},
72 {0, 0, 0, 0, gamma, 0},
73 {0, 0, 0, 0, 0, gamma}};
74 } else if (input_layout_string == "4+7+0" &&
75 output_layout_string == "7.1.2") {
76 // Just drop the last two channels.
77 gains = {
78 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
79 {0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
80 {0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
81 {0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
82 {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
83 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
84 };
85 } else {
86 return absl::UnknownError(absl::StrCat(
87 "The encoder did not implement matrices for ", input_layout_string,
88 " to ", output_layout_string, " yet."));
89 }
90 return absl::OkStatus();
91 }
92
LayoutStringHasHeightChannels(absl::string_view layout_string,bool & result)93 absl::Status LayoutStringHasHeightChannels(absl::string_view layout_string,
94 bool& result) {
95 // TODO(b/292174366): Fill in all possible layouts or determine this in a
96 // better way.
97 if (layout_string == "4+7+0" || layout_string == "7.1.2" ||
98 layout_string == "4+5+0" || layout_string == "2+5+0" ||
99 layout_string == "3.1.2") {
100 result = true;
101 return absl::OkStatus();
102 } else if (layout_string == "0+7+0" || layout_string == "0+5+0" ||
103 layout_string == "0+2+0" || layout_string == "0+1+0") {
104 result = false;
105 return absl::OkStatus();
106 } else {
107 return absl::UnknownError(
108 absl::StrCat("Unknown if ", layout_string, " has height channels"));
109 }
110 }
111
ComputeChannelLayoutToLoudspeakersGains(const std::vector<ChannelLabel::Label> & channel_labels,const DownMixingParams & down_mixing_params,absl::string_view input_layout_string,absl::string_view output_layout_string,std::vector<std::vector<double>> & gains)112 absl::Status ComputeChannelLayoutToLoudspeakersGains(
113 const std::vector<ChannelLabel::Label>& channel_labels,
114 const DownMixingParams& down_mixing_params,
115 absl::string_view input_layout_string,
116 absl::string_view output_layout_string,
117 std::vector<std::vector<double>>& gains) {
118 gains.clear();
119 if (!down_mixing_params.in_bitstream) {
120 // There is no DownMixingParamDefinition, which is fine. Do not fill the
121 // gains and let the caller use default precomputed gains.
122 return absl::OkStatus();
123 }
124
125 // TODO(b/292174366): Remove hacks. Updates logic of when to use demixers vs
126 // libear renderer.
127 bool input_layout_has_height_channels;
128 RETURN_IF_NOT_OK(LayoutStringHasHeightChannels(
129 input_layout_string, input_layout_has_height_channels));
130 bool playback_has_height_channels;
131 RETURN_IF_NOT_OK(LayoutStringHasHeightChannels(output_layout_string,
132 playback_has_height_channels));
133 if (!playback_has_height_channels && input_layout_has_height_channels) {
134 return absl::OkStatus();
135 }
136
137 // The bitstream tells use how to compute the gains. Use those.
138 RETURN_IF_NOT_OK(ComputeGains(input_layout_string, output_layout_string,
139 down_mixing_params, gains));
140
141 // Examine the computed gains.
142 LOG_FIRST_N(INFO, 5) << "Computed gains:";
143 auto fmt = std::setw(7);
144 std::stringstream ss;
145 for (const auto& label : channel_labels) {
146 ss << fmt << absl::StrCat(label);
147 }
148 LOG_FIRST_N(INFO, 5) << ss.str();
149 for (size_t i = 0; i < gains.front().size(); i++) {
150 ss.str({});
151 ss.clear();
152 ss << std::setprecision(3);
153 for (size_t j = 0; j < gains.size(); j++) {
154 ss << fmt << gains.at(j).at(i);
155 }
156 LOG_FIRST_N(INFO, 5) << ss.str();
157 }
158
159 return absl::OkStatus();
160 }
161
Q15ToSignedDouble(const int16_t input)162 double Q15ToSignedDouble(const int16_t input) {
163 return static_cast<double>(input) / 32768.0;
164 }
165
ProjectSamplesToRender(absl::Span<const std::vector<InternalSampleType>> & input_samples,const int16_t * demixing_matrix,const int output_channel_count)166 std::vector<std::vector<InternalSampleType>> ProjectSamplesToRender(
167 absl::Span<const std::vector<InternalSampleType>>& input_samples,
168 const int16_t* demixing_matrix, const int output_channel_count) {
169 CHECK_NE(demixing_matrix, nullptr);
170 std::vector<std::vector<InternalSampleType>> samples_to_render(
171 input_samples.size(),
172 std::vector<InternalSampleType>(output_channel_count, 0.0));
173
174 for (int t = 0; t < samples_to_render.size(); t++) {
175 for (int out_channel = 0; out_channel < output_channel_count;
176 out_channel++) {
177 // Project with `demixing_matrix`, which is encoded as Q15 and stored
178 // in column major.
179 for (int in_channel = 0; in_channel < input_samples[0].size();
180 in_channel++) {
181 samples_to_render[t][out_channel] +=
182 Q15ToSignedDouble(
183 demixing_matrix[in_channel * output_channel_count +
184 out_channel]) *
185 input_samples[t][in_channel];
186 }
187 }
188 }
189 return samples_to_render;
190 }
191
RenderSamplesUsingGains(absl::Span<const std::vector<InternalSampleType>> & input_samples,const std::vector<std::vector<double>> & gains,const int16_t * demixing_matrix,std::vector<InternalSampleType> & rendered_samples)192 void RenderSamplesUsingGains(
193 absl::Span<const std::vector<InternalSampleType>>& input_samples,
194 const std::vector<std::vector<double>>& gains,
195 const int16_t* demixing_matrix,
196 std::vector<InternalSampleType>& rendered_samples) {
197 // Project with `demixing_matrix` when in projection mode.
198 absl::Span<const std::vector<InternalSampleType>> samples_to_render_double;
199 std::vector<std::vector<InternalSampleType>> projected_samples;
200 if (demixing_matrix != nullptr) {
201 projected_samples =
202 ProjectSamplesToRender(input_samples, demixing_matrix, gains.size());
203 samples_to_render_double = absl::MakeConstSpan(projected_samples);
204 } else {
205 samples_to_render_double = input_samples;
206 }
207
208 int rendered_samples_index = 0;
209 std::fill(rendered_samples.begin(), rendered_samples.end(), 0);
210 for (int t = 0; t < samples_to_render_double.size(); t++) {
211 for (int out_channel = 0; out_channel < gains[0].size(); out_channel++) {
212 for (int in_channel = 0; in_channel < samples_to_render_double[0].size();
213 in_channel++) {
214 rendered_samples[rendered_samples_index] +=
215 samples_to_render_double[t][in_channel] *
216 gains[in_channel][out_channel];
217 }
218
219 rendered_samples_index++;
220 }
221 }
222 }
223
224 } // namespace
225
LookupPrecomputedGains(absl::string_view input_key,absl::string_view output_key)226 absl::StatusOr<std::vector<std::vector<double>>> LookupPrecomputedGains(
227 absl::string_view input_key, absl::string_view output_key) {
228 static const absl::NoDestructor<PrecomputedGains> precomputed_gains(
229 InitPrecomputedGains());
230
231 const std::string input_key_debug_message =
232 absl::StrCat("Precomputed gains not found for input_key= ", input_key);
233 // Search throughs two layers of maps. We want to find the gains associated
234 // with `[input_key][output_key]`.
235 auto input_key_it = precomputed_gains->find(input_key);
236 if (input_key_it == precomputed_gains->end()) [[unlikely]] {
237 return absl::NotFoundError(input_key_debug_message);
238 }
239
240 return LookupInMap(input_key_it->second, std::string(output_key),
241 absl::StrCat(input_key_debug_message, " and output_key"));
242 }
243
RenderChannelLayoutToLoudspeakers(absl::Span<const std::vector<InternalSampleType>> & input_samples,const DownMixingParams & down_mixing_params,const std::vector<ChannelLabel::Label> & channel_labels,absl::string_view input_key,absl::string_view output_key,const std::vector<std::vector<double>> & precomputed_gains,std::vector<InternalSampleType> & rendered_samples)244 absl::Status RenderChannelLayoutToLoudspeakers(
245 absl::Span<const std::vector<InternalSampleType>>& input_samples,
246 const DownMixingParams& down_mixing_params,
247 const std::vector<ChannelLabel::Label>& channel_labels,
248 absl::string_view input_key, absl::string_view output_key,
249 const std::vector<std::vector<double>>& precomputed_gains,
250 std::vector<InternalSampleType>& rendered_samples) {
251 // When the demixing parameters are in the bitstream, recompute for every
252 // frame and do not store the result in the map.
253 // TODO(b/292174366): Find a better solution and strictly follow the spec for
254 // which renderer to use.
255 std::vector<std::vector<double>> newly_computed_gains;
256 RETURN_IF_NOT_OK(ComputeChannelLayoutToLoudspeakersGains(
257 channel_labels, down_mixing_params, input_key, output_key,
258 newly_computed_gains));
259 const std::vector<std::vector<double>>& gains_to_use =
260 newly_computed_gains.empty() ? precomputed_gains : newly_computed_gains;
261
262 RenderSamplesUsingGains(input_samples, gains_to_use,
263 /*demixing_matrix=*/nullptr, rendered_samples);
264 return absl::OkStatus();
265 }
266
RenderAmbisonicsToLoudspeakers(absl::Span<const std::vector<InternalSampleType>> & input_samples,const AmbisonicsConfig & ambisonics_config,const std::vector<std::vector<double>> & gains,std::vector<InternalSampleType> & rendered_samples)267 absl::Status RenderAmbisonicsToLoudspeakers(
268 absl::Span<const std::vector<InternalSampleType>>& input_samples,
269 const AmbisonicsConfig& ambisonics_config,
270 const std::vector<std::vector<double>>& gains,
271 std::vector<InternalSampleType>& rendered_samples) {
272 // Exclude unsupported mode first, and deal with only mono or projection
273 // in the rest of the code.
274 const auto mode = ambisonics_config.ambisonics_mode;
275 if (mode != AmbisonicsConfig::kAmbisonicsModeMono &&
276 mode != AmbisonicsConfig::kAmbisonicsModeProjection) {
277 return absl::UnimplementedError(
278 absl::StrCat("Unsupported ambisonics mode. mode= ", mode));
279 }
280 const bool is_mono = mode == AmbisonicsConfig::kAmbisonicsModeMono;
281
282 // Input key for ambisonics is "A{ambisonics_order}".
283 const uint8_t output_channel_count =
284 is_mono
285 ? std::get<AmbisonicsMonoConfig>(ambisonics_config.ambisonics_config)
286 .output_channel_count
287 : std::get<AmbisonicsProjectionConfig>(
288 ambisonics_config.ambisonics_config)
289 .output_channel_count;
290
291 RETURN_IF_NOT_OK(
292 ValidateContainerSizeEqual("gains", gains, output_channel_count));
293
294 RenderSamplesUsingGains(input_samples, gains,
295 is_mono ? nullptr
296 : std::get<AmbisonicsProjectionConfig>(
297 ambisonics_config.ambisonics_config)
298 .demixing_matrix.data(),
299 rendered_samples);
300
301 return absl::OkStatus();
302 }
303
304 } // namespace iamf_tools
305