• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #include "iamf/cli/adm_to_user_metadata/adm/panner.h"
14 
15 #include <algorithm>
16 #include <cerrno>
17 #include <cmath>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdio>
21 #include <cstring>
22 #include <numbers>
23 #include <string>
24 #include <vector>
25 
26 #include "Eigen/Core"
27 #include "absl/log/check.h"
28 #include "absl/log/log.h"
29 #include "absl/status/status.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/str_format.h"
32 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
33 #include "iamf/cli/ambisonic_encoder/ambisonic_encoder.h"
34 #include "iamf/cli/wav_writer.h"
35 #include "iamf/common/utils/macros.h"
36 #include "iamf/common/utils/numeric_utils.h"
37 #include "iamf/common/utils/sample_processing_utils.h"
38 #include "src/dsp/read_wav_file.h"
39 #include "src/dsp/read_wav_info.h"
40 
41 namespace iamf_tools {
42 namespace adm_to_user_metadata {
43 
44 namespace {
45 constexpr int kDestinationAlignmentBytes = 4;
46 constexpr int kAmbisonicOrder = 3;
47 constexpr int kBufferSize = 256;
48 constexpr int kBitsPerByte = 8;
49 constexpr int kBitDepth16 = 16;
50 constexpr int kBitDepth24 = 24;
51 constexpr int kBitDepth32 = 32;
52 
53 constexpr double kRadiansToDegrees = 180.0 / std::numbers::pi_v<double>;
54 
55 }  // namespace
56 
PanObjectsToAmbisonics(const std::string & input_filename,const ADM & input_adm,const std::vector<size_t> & block_indices,WavWriter & wav_writer)57 absl::Status PanObjectsToAmbisonics(const std::string& input_filename,
58                                     const ADM& input_adm,
59                                     const std::vector<size_t>& block_indices,
60                                     WavWriter& wav_writer) {
61   // Read input wav file.
62   FILE* input_file = std::fopen(input_filename.c_str(), "rb");
63   if (input_file == nullptr) {
64     return absl::FailedPreconditionError(
65         absl::StrCat("Failed to open file: \"", input_filename,
66                      "\" with error: ", std::strerror(errno), "."));
67   }
68 
69   // Read header of input wav file.
70   ReadWavInfo info;
71   CHECK_NE(ReadWavHeader(input_file, &info), 0)
72       << "Error reading header of file \"" << input_file << "\"";
73 
74   const int ip_wav_bits_per_sample = info.bit_depth;
75   const int ip_wav_nch = info.num_channels;
76   const int op_wav_nch = kOutputWavChannels;
77   const size_t ip_wav_total_num_samples = info.remaining_samples;
78   info.destination_alignment_bytes = kDestinationAlignmentBytes;
79   const size_t num_samples_per_channel = ip_wav_total_num_samples / ip_wav_nch;
80   const size_t buffer_size = num_samples_per_channel < kBufferSize
81                                  ? num_samples_per_channel
82                                  : kBufferSize;
83 
84   if (ip_wav_bits_per_sample != kBitDepth16 &&
85       ip_wav_bits_per_sample != kBitDepth24 &&
86       ip_wav_bits_per_sample != kBitDepth32) {
87     return absl::NotFoundError(absl::StrFormat(
88         "Unsupported number of bits per sample: %d\n", ip_wav_bits_per_sample));
89   }
90 
91   // Initialize the buffers.
92   const size_t ip_buffer_alloc_size = buffer_size * ip_wav_nch;
93   const size_t op_buffer_alloc_size = buffer_size * op_wav_nch;
94   std::vector<int32_t> ip_buffer_int32(ip_buffer_alloc_size);
95   std::vector<int32_t> op_buffer_int32(op_buffer_alloc_size);
96   std::vector<float> ip_buffer_float(ip_buffer_alloc_size);
97   std::vector<float> op_buffer_float(op_buffer_alloc_size);
98 
99   // Create an Ambisonic encoder object.
100   AmbisonicEncoder encoder(buffer_size, info.num_channels, kAmbisonicOrder);
101 
102   // Assign sources to the encoder at all available input channels.
103   for (int i = 0; i < ip_wav_nch; ++i) {
104     auto& audio_block =
105         input_adm.audio_channels[i].audio_blocks[block_indices[i]];
106     auto x = audio_block.position.x;
107     auto y = audio_block.position.y;
108     auto z = audio_block.position.z;
109     auto gain = audio_block.gain;
110 
111     Eigen::Vector3d position(x, y, z);
112     auto azimuth = -((atan2(position[0], position[1])) * kRadiansToDegrees);
113     auto elevation = (atan2(position[2], hypot(position[0], position[1]))) *
114                      kRadiansToDegrees;
115     auto distance = position.norm();
116 
117     encoder.SetSource(i, gain, azimuth, elevation, distance);
118   }
119 
120   // Main processing loop.
121   size_t samples_remaining = ip_wav_total_num_samples;
122   size_t num_samples_to_read = buffer_size;
123   auto max_value_db = 0.0f;
124   while (samples_remaining > 0) {
125     CHECK_EQ(num_samples_to_read, buffer_size);
126     // When remaining samples is below buffer capacity, pad unused buffer space
127     // with zeros to ensure only valid sample data is processed.
128     if (samples_remaining < ip_buffer_alloc_size) {
129       num_samples_to_read = samples_remaining / ip_wav_nch;
130       std::fill(ip_buffer_int32.begin() + samples_remaining,
131                 ip_buffer_int32.end(), 0);
132     }
133     // Read from the input file.
134     const size_t samples_read = ReadWavSamples(
135         input_file, &info, ip_buffer_int32.data(), ip_buffer_alloc_size);
136     CHECK_EQ(samples_read, num_samples_to_read * ip_wav_nch);
137 
138     // Convert int32 interleaved to float planar.
139     for (size_t smp = 0; smp < buffer_size; ++smp) {
140       for (size_t ch = 0; ch < ip_wav_nch; ++ch) {
141         ip_buffer_float[ch * buffer_size + smp] =
142             Int32ToNormalizedFloatingPoint<float>(
143                 ip_buffer_int32[smp * ip_wav_nch + ch]);
144       }
145     }
146 
147     // Process.
148     encoder.ProcessPlanarAudioData(ip_buffer_float, op_buffer_float);
149 
150     // Warn if level exceeds 0 dBFS.
151     for (size_t smp = 0; smp < buffer_size; ++smp) {
152       auto ch = 0;  // Only look at the first channel, as the scene is SN3D
153                     // normalized. Therefore, the first channel is the loudest.
154 
155       if (std::abs(op_buffer_float[ch * buffer_size + smp]) > 1.0f) {
156         auto timestamp = ip_wav_total_num_samples - samples_remaining + smp;
157         float level =
158             20 * std::log10(std::abs(op_buffer_float[ch * buffer_size + smp]));
159         max_value_db = std::max(max_value_db, level);
160 
161         LOG_FIRST_N(WARNING, 5) << absl::StrFormat(
162             "Clipping detected at sample %d. Sample exceeds 0 dBFS by: "
163             "%.2f dB.",
164             timestamp, level);
165       }
166     }
167 
168     // Convert float planar to int32 interleaved.
169     for (size_t smp = 0; smp < buffer_size; ++smp) {
170       for (size_t ch = 0; ch < op_wav_nch; ++ch) {
171         RETURN_IF_NOT_OK(NormalizedFloatingPointToInt32(
172             op_buffer_float[ch * buffer_size + smp],
173             op_buffer_int32[smp * op_wav_nch + ch]));
174       }
175     }
176 
177     // Write to the output file.
178     std::vector<uint8_t> output_buffer_char(
179         num_samples_to_read * op_wav_nch *
180         (ip_wav_bits_per_sample / kBitsPerByte));
181     size_t write_position = 0;
182     for (size_t i = 0; i < num_samples_to_read * op_wav_nch; ++i) {
183       RETURN_IF_NOT_OK(WritePcmSample(
184           op_buffer_int32[i], ip_wav_bits_per_sample,
185           /*big_endian=*/false, output_buffer_char.data(), write_position));
186     }
187     RETURN_IF_NOT_OK(wav_writer.WritePcmSamples(output_buffer_char));
188 
189     samples_remaining -= samples_read;
190   }
191 
192   if (max_value_db > 0.0f) {
193     LOG(WARNING) << absl::StrFormat(
194         "Clipping detected during objects to Ambisonics panning. Maximum level "
195         "exceeded 0 dBFS by: "
196         "%.2f dB.",
197         max_value_db);
198   }
199 
200   std::fclose(input_file);
201 
202   return absl::OkStatus();
203 }
204 
205 }  // namespace adm_to_user_metadata
206 }  // namespace iamf_tools
207