1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/cli/adm_to_user_metadata/adm/panner.h"
14
15 #include <algorithm>
16 #include <cerrno>
17 #include <cmath>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdio>
21 #include <cstring>
22 #include <numbers>
23 #include <string>
24 #include <vector>
25
26 #include "Eigen/Core"
27 #include "absl/log/check.h"
28 #include "absl/log/log.h"
29 #include "absl/status/status.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/str_format.h"
32 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
33 #include "iamf/cli/ambisonic_encoder/ambisonic_encoder.h"
34 #include "iamf/cli/wav_writer.h"
35 #include "iamf/common/utils/macros.h"
36 #include "iamf/common/utils/numeric_utils.h"
37 #include "iamf/common/utils/sample_processing_utils.h"
38 #include "src/dsp/read_wav_file.h"
39 #include "src/dsp/read_wav_info.h"
40
41 namespace iamf_tools {
42 namespace adm_to_user_metadata {
43
44 namespace {
45 constexpr int kDestinationAlignmentBytes = 4;
46 constexpr int kAmbisonicOrder = 3;
47 constexpr int kBufferSize = 256;
48 constexpr int kBitsPerByte = 8;
49 constexpr int kBitDepth16 = 16;
50 constexpr int kBitDepth24 = 24;
51 constexpr int kBitDepth32 = 32;
52
53 constexpr double kRadiansToDegrees = 180.0 / std::numbers::pi_v<double>;
54
55 } // namespace
56
PanObjectsToAmbisonics(const std::string & input_filename,const ADM & input_adm,const std::vector<size_t> & block_indices,WavWriter & wav_writer)57 absl::Status PanObjectsToAmbisonics(const std::string& input_filename,
58 const ADM& input_adm,
59 const std::vector<size_t>& block_indices,
60 WavWriter& wav_writer) {
61 // Read input wav file.
62 FILE* input_file = std::fopen(input_filename.c_str(), "rb");
63 if (input_file == nullptr) {
64 return absl::FailedPreconditionError(
65 absl::StrCat("Failed to open file: \"", input_filename,
66 "\" with error: ", std::strerror(errno), "."));
67 }
68
69 // Read header of input wav file.
70 ReadWavInfo info;
71 CHECK_NE(ReadWavHeader(input_file, &info), 0)
72 << "Error reading header of file \"" << input_file << "\"";
73
74 const int ip_wav_bits_per_sample = info.bit_depth;
75 const int ip_wav_nch = info.num_channels;
76 const int op_wav_nch = kOutputWavChannels;
77 const size_t ip_wav_total_num_samples = info.remaining_samples;
78 info.destination_alignment_bytes = kDestinationAlignmentBytes;
79 const size_t num_samples_per_channel = ip_wav_total_num_samples / ip_wav_nch;
80 const size_t buffer_size = num_samples_per_channel < kBufferSize
81 ? num_samples_per_channel
82 : kBufferSize;
83
84 if (ip_wav_bits_per_sample != kBitDepth16 &&
85 ip_wav_bits_per_sample != kBitDepth24 &&
86 ip_wav_bits_per_sample != kBitDepth32) {
87 return absl::NotFoundError(absl::StrFormat(
88 "Unsupported number of bits per sample: %d\n", ip_wav_bits_per_sample));
89 }
90
91 // Initialize the buffers.
92 const size_t ip_buffer_alloc_size = buffer_size * ip_wav_nch;
93 const size_t op_buffer_alloc_size = buffer_size * op_wav_nch;
94 std::vector<int32_t> ip_buffer_int32(ip_buffer_alloc_size);
95 std::vector<int32_t> op_buffer_int32(op_buffer_alloc_size);
96 std::vector<float> ip_buffer_float(ip_buffer_alloc_size);
97 std::vector<float> op_buffer_float(op_buffer_alloc_size);
98
99 // Create an Ambisonic encoder object.
100 AmbisonicEncoder encoder(buffer_size, info.num_channels, kAmbisonicOrder);
101
102 // Assign sources to the encoder at all available input channels.
103 for (int i = 0; i < ip_wav_nch; ++i) {
104 auto& audio_block =
105 input_adm.audio_channels[i].audio_blocks[block_indices[i]];
106 auto x = audio_block.position.x;
107 auto y = audio_block.position.y;
108 auto z = audio_block.position.z;
109 auto gain = audio_block.gain;
110
111 Eigen::Vector3d position(x, y, z);
112 auto azimuth = -((atan2(position[0], position[1])) * kRadiansToDegrees);
113 auto elevation = (atan2(position[2], hypot(position[0], position[1]))) *
114 kRadiansToDegrees;
115 auto distance = position.norm();
116
117 encoder.SetSource(i, gain, azimuth, elevation, distance);
118 }
119
120 // Main processing loop.
121 size_t samples_remaining = ip_wav_total_num_samples;
122 size_t num_samples_to_read = buffer_size;
123 auto max_value_db = 0.0f;
124 while (samples_remaining > 0) {
125 CHECK_EQ(num_samples_to_read, buffer_size);
126 // When remaining samples is below buffer capacity, pad unused buffer space
127 // with zeros to ensure only valid sample data is processed.
128 if (samples_remaining < ip_buffer_alloc_size) {
129 num_samples_to_read = samples_remaining / ip_wav_nch;
130 std::fill(ip_buffer_int32.begin() + samples_remaining,
131 ip_buffer_int32.end(), 0);
132 }
133 // Read from the input file.
134 const size_t samples_read = ReadWavSamples(
135 input_file, &info, ip_buffer_int32.data(), ip_buffer_alloc_size);
136 CHECK_EQ(samples_read, num_samples_to_read * ip_wav_nch);
137
138 // Convert int32 interleaved to float planar.
139 for (size_t smp = 0; smp < buffer_size; ++smp) {
140 for (size_t ch = 0; ch < ip_wav_nch; ++ch) {
141 ip_buffer_float[ch * buffer_size + smp] =
142 Int32ToNormalizedFloatingPoint<float>(
143 ip_buffer_int32[smp * ip_wav_nch + ch]);
144 }
145 }
146
147 // Process.
148 encoder.ProcessPlanarAudioData(ip_buffer_float, op_buffer_float);
149
150 // Warn if level exceeds 0 dBFS.
151 for (size_t smp = 0; smp < buffer_size; ++smp) {
152 auto ch = 0; // Only look at the first channel, as the scene is SN3D
153 // normalized. Therefore, the first channel is the loudest.
154
155 if (std::abs(op_buffer_float[ch * buffer_size + smp]) > 1.0f) {
156 auto timestamp = ip_wav_total_num_samples - samples_remaining + smp;
157 float level =
158 20 * std::log10(std::abs(op_buffer_float[ch * buffer_size + smp]));
159 max_value_db = std::max(max_value_db, level);
160
161 LOG_FIRST_N(WARNING, 5) << absl::StrFormat(
162 "Clipping detected at sample %d. Sample exceeds 0 dBFS by: "
163 "%.2f dB.",
164 timestamp, level);
165 }
166 }
167
168 // Convert float planar to int32 interleaved.
169 for (size_t smp = 0; smp < buffer_size; ++smp) {
170 for (size_t ch = 0; ch < op_wav_nch; ++ch) {
171 RETURN_IF_NOT_OK(NormalizedFloatingPointToInt32(
172 op_buffer_float[ch * buffer_size + smp],
173 op_buffer_int32[smp * op_wav_nch + ch]));
174 }
175 }
176
177 // Write to the output file.
178 std::vector<uint8_t> output_buffer_char(
179 num_samples_to_read * op_wav_nch *
180 (ip_wav_bits_per_sample / kBitsPerByte));
181 size_t write_position = 0;
182 for (size_t i = 0; i < num_samples_to_read * op_wav_nch; ++i) {
183 RETURN_IF_NOT_OK(WritePcmSample(
184 op_buffer_int32[i], ip_wav_bits_per_sample,
185 /*big_endian=*/false, output_buffer_char.data(), write_position));
186 }
187 RETURN_IF_NOT_OK(wav_writer.WritePcmSamples(output_buffer_char));
188
189 samples_remaining -= samples_read;
190 }
191
192 if (max_value_db > 0.0f) {
193 LOG(WARNING) << absl::StrFormat(
194 "Clipping detected during objects to Ambisonics panning. Maximum level "
195 "exceeded 0 dBFS by: "
196 "%.2f dB.",
197 max_value_db);
198 }
199
200 std::fclose(input_file);
201
202 return absl::OkStatus();
203 }
204
205 } // namespace adm_to_user_metadata
206 } // namespace iamf_tools
207