1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/cli/adm_to_user_metadata/adm/wav_file_splicer.h"
14
15 #include <algorithm>
16 #include <cmath>
17 #include <cstddef>
18 #include <cstdint>
19 #include <filesystem>
20 #include <fstream>
21 #include <iostream>
22 #include <iterator>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "absl/log/check.h"
29 #include "absl/log/log.h"
30 #include "absl/status/status.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/string_view.h"
33 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
34 #include "iamf/cli/adm_to_user_metadata/adm/bw64_reader.h"
35 #include "iamf/cli/adm_to_user_metadata/adm/format_info_chunk.h"
36 #include "iamf/cli/adm_to_user_metadata/adm/panner.h"
37 #include "iamf/cli/wav_writer.h"
38 #include "iamf/common/utils/macros.h"
39 #include "iamf/obu/ia_sequence_header.h"
40
41 namespace iamf_tools {
42 namespace adm_to_user_metadata {
43
44 namespace {
45
46 constexpr int32_t kBitsPerByte = 8;
47 constexpr size_t kSizeToFlush = 4096;
48
49 // Arbitrary limit on how many samples will be written to the wav file at
50 // once. Chosen to agree with `kSizeToFlush`, even if there are 16-bit
51 // samples and one channel.
52 constexpr size_t kMaxNumSamplesPerFrame = kSizeToFlush / 2;
53
54 // Error tolerance set to the minimum precision allowed by ADM file to describe
55 // timing related parameters.
56 constexpr double kErrorTolerance = 1e-5;
57 // Offset for data chunk within the extensible format wav file.
58 constexpr int32_t kExtensibleOffset = 72;
59 // Standard size for a wav header.
60 constexpr int32_t kHeaderSize = 8;
61 // Total number of channels allowed per mix for the IAMF base enhanced profile.
62 constexpr int kMaxChannelsPerMixBaseEnhanced = 28;
63 // Max LFE channels allowed per mix for the IAMF base enhanced profile.
64 constexpr int kMaxLfeChannelsAllowed =
65 kMaxChannelsPerMixBaseEnhanced - kOutputWavChannels;
66
67 // Creates a map for the audioObject(s) and the audioTrack(s) present within.
GetAudioTracksForAudioObjects(const std::vector<struct AudioObject> & audio_objects)68 std::vector<std::vector<int32_t>> GetAudioTracksForAudioObjects(
69 const std::vector<struct AudioObject>& audio_objects) {
70 std::vector<std::vector<int32_t>> audio_tracks_for_audio_objects(
71 audio_objects.size(), std::vector<int32_t>());
72 int32_t audio_object_index = -1;
73 int32_t audio_track_index = -1;
74 for (const auto& audio_object : audio_objects) {
75 auto& audio_tracks_for_audio_object =
76 audio_tracks_for_audio_objects[++audio_object_index];
77 for (auto unused_audio_track : audio_object.audio_track_uid_ref) {
78 audio_tracks_for_audio_object.push_back(++audio_track_index);
79 }
80 }
81 return audio_tracks_for_audio_objects;
82 }
83
AbortAllWavWriters(std::vector<std::unique_ptr<WavWriter>> & audio_object_index_to_wav_writer)84 void AbortAllWavWriters(
85 std::vector<std::unique_ptr<WavWriter>>& audio_object_index_to_wav_writer) {
86 for (auto& wav_writer : audio_object_index_to_wav_writer) {
87 wav_writer->Abort();
88 }
89 }
90
FlushToWavWriter(std::vector<uint8_t> & samples_to_flush,WavWriter & wav_writer)91 absl::Status FlushToWavWriter(std::vector<uint8_t>& samples_to_flush,
92 WavWriter& wav_writer) {
93 RETURN_IF_NOT_OK(wav_writer.WritePcmSamples(samples_to_flush));
94 samples_to_flush.clear();
95 return absl::OkStatus();
96 }
97
98 // Returns a vector of pairs, each with a segment size and wav writer index.
99 // Non-LFE segments use index 0, and LFE segments are indexed starting from 1.
100 //
101 // For e.g., consider an input wav with layout 3.1.2 (where the 4th channel
102 // corresponds to LFE).
103 // Channel layout : {L3, R3, Centre, LFE, Ltf3, Rtf3}
104 // Input LFE-Id list : {4}
105 //
106 // The segmentation required:[L3, R3, Centre]; [LFE]; [Ltf3, Rtf3]
107 // Segment layout obtained: <3,0>, <1,1>, <2,0>
GenerateSegmentLayout(const std::vector<int> & lfe_ids,const int num_channels)108 std::vector<std::pair<int, int>> GenerateSegmentLayout(
109 const std::vector<int>& lfe_ids, const int num_channels) {
110 std::vector<std::pair<int, int>> segment_layout;
111 for (int lfe_index = 0; lfe_index <= lfe_ids.size(); ++lfe_index) {
112 const int start_index = (lfe_index == 0) ? 0 : lfe_ids[lfe_index - 1];
113 const int end_index =
114 (lfe_index < lfe_ids.size()) ? lfe_ids[lfe_index] - 1 : num_channels;
115
116 // Store the segment length corresponding to non-LFE channels and update the
117 // wav writer index as 0.
118 segment_layout.push_back({end_index - start_index, 0});
119
120 // Store the segment length corresponding to LFE channel (always 1) and
121 // update the wav writer index incrementally starting from 1.
122 if (lfe_index < lfe_ids.size()) {
123 segment_layout.push_back({1, lfe_index + 1});
124 }
125 }
126 return segment_layout;
127 }
128
129 // Distributes audio samples from the input buffer to WavWriter objects,
130 // segmenting them by LFE and non-LFE channels based on the provided layout.
131 // Samples are transformed and periodically flushed to each WavWriter upon
132 // reaching kSizeToFlush.
FlushLfeNonLfeWavs(const std::vector<char> & buffer,const size_t bytes_to_read,const int num_channels,const int32_t bytes_per_sample,const std::vector<std::pair<int,int>> & segment_layout,std::vector<std::unique_ptr<WavWriter>> & writers)133 absl::Status FlushLfeNonLfeWavs(
134 const std::vector<char>& buffer, const size_t bytes_to_read,
135 const int num_channels, const int32_t bytes_per_sample,
136 const std::vector<std::pair<int, int>>& segment_layout,
137 std::vector<std::unique_ptr<WavWriter>>& writers) {
138 // A vector of buffers to store the samples corresponding to non-LFE and LFE
139 // channels respectively.
140 std::vector<std::vector<uint8_t>> nonlfe_lfe_buffer(writers.size(),
141 std::vector<uint8_t>());
142 for (size_t sample_index = 0; sample_index < bytes_to_read;
143 sample_index += static_cast<size_t>(bytes_per_sample) * num_channels) {
144 int channel_offset = 0;
145 for (const auto& [segment_size, writer_index] : segment_layout) {
146 const size_t offset =
147 sample_index + static_cast<size_t>(channel_offset) * bytes_per_sample;
148 std::transform(buffer.begin() + offset,
149 buffer.begin() + offset +
150 static_cast<size_t>(segment_size) * bytes_per_sample,
151 std::back_inserter(nonlfe_lfe_buffer[writer_index]),
152 [](char c) { return static_cast<uint8_t>(c); });
153
154 channel_offset += segment_size;
155 }
156
157 // Occasionally flush the buffer to the corresponding wav writer.
158 // To avoid intermittent padding, ensure that the samples to flush is always
159 // even.
160 for (int index = 0; index < writers.size(); ++index) {
161 auto buffer_size = nonlfe_lfe_buffer[index].size();
162 if (buffer_size >= kSizeToFlush && buffer_size % 2 == 0) {
163 RETURN_IF_NOT_OK(
164 FlushToWavWriter(nonlfe_lfe_buffer[index], *writers[index]));
165 }
166 }
167 }
168
169 // Flush the remaining buffers.
170 for (int index = 0; index < writers.size(); ++index) {
171 RETURN_IF_NOT_OK(
172 FlushToWavWriter(nonlfe_lfe_buffer[index], *writers[index]));
173 }
174
175 return absl::OkStatus();
176 }
177
178 // Splices the wav to obtain the wav segment.
SpliceWavSegment(std::istream & input_stream,const size_t & sample_length,const size_t & total_channel_size,std::vector<uint8_t> & samples_buffer,WavWriter & wav_writer)179 absl::Status SpliceWavSegment(std::istream& input_stream,
180 const size_t& sample_length,
181 const size_t& total_channel_size,
182 std::vector<uint8_t>& samples_buffer,
183 WavWriter& wav_writer) {
184 for (size_t data_chunk_pos = 0;
185 data_chunk_pos < sample_length * total_channel_size;
186 data_chunk_pos += total_channel_size) {
187 std::vector<char> sample(total_channel_size);
188 if (!input_stream.read(sample.data(), sample.size())) {
189 wav_writer.Abort();
190 return absl::OutOfRangeError(
191 "Reached end of stream before the implied end of the `data` "
192 "chunk.");
193 }
194 std::transform(sample.begin(), sample.end(),
195 std::back_inserter(samples_buffer),
196 [](char c) { return static_cast<uint8_t>(c); });
197 auto buffer_size = samples_buffer.size();
198 if (buffer_size >= kSizeToFlush && buffer_size % 2 == 0) {
199 RETURN_IF_NOT_OK(FlushToWavWriter(samples_buffer, wav_writer));
200 }
201 }
202 RETURN_IF_NOT_OK(FlushToWavWriter(samples_buffer, wav_writer));
203 return absl::OkStatus();
204 }
205
206 // Calculates the total duration of the wav file.
CalculateTotalDuration(const size_t & data_chunk_size,const FormatInfoChunk & wav_file_fmt,const size_t & total_channel_size)207 double CalculateTotalDuration(const size_t& data_chunk_size,
208 const FormatInfoChunk& wav_file_fmt,
209 const size_t& total_channel_size) {
210 const auto& total_samples_per_channel = data_chunk_size / total_channel_size;
211 double total_duration = static_cast<double>(total_samples_per_channel) /
212 static_cast<double>(wav_file_fmt.samples_per_sec);
213 return total_duration;
214 }
215
216 // Computes the duration in seconds.
ConvertTimeToSeconds(const BlockTime & time)217 double ConvertTimeToSeconds(const BlockTime& time) {
218 return time.hour * 3600.0 + time.minute * 60.0 + time.second;
219 }
220
221 // Computes the audio block duration as the diff of start time between 2
222 // consecutive blocks.
CalculateBlockDuration(const std::vector<AudioBlockFormat> & audio_block,const int & block_index)223 double CalculateBlockDuration(const std::vector<AudioBlockFormat>& audio_block,
224 const int& block_index) {
225 double seg_duration = 0.0;
226 if (block_index < audio_block.size() - 1) {
227 const auto block_rtime =
228 ConvertTimeToSeconds(audio_block[block_index].rtime);
229 const auto next_block_rtime =
230 ConvertTimeToSeconds(audio_block[block_index + 1].rtime);
231 seg_duration = (next_block_rtime - block_rtime);
232 } else {
233 seg_duration = ConvertTimeToSeconds(audio_block[block_index].duration);
234 }
235 return seg_duration;
236 }
237
238 // Retrieves LFE channel IDs from the audio channels list, adds them to
239 // "lfe_ids" vector, and checks if the count exceeds the allowed limit.
GetLfeChannelIDs(const std::vector<AudioChannelFormat> & audio_channels)240 std::vector<int> GetLfeChannelIDs(
241 const std::vector<AudioChannelFormat>& audio_channels) {
242 std::vector<int> lfe_ids;
243 for (int index = 0; index < audio_channels.size(); ++index) {
244 if (audio_channels[index].name == "RoomCentricLFE") {
245 if (lfe_ids.size() < kMaxLfeChannelsAllowed) {
246 lfe_ids.push_back(index + 1);
247 } else {
248 LOG(WARNING)
249 << "The number of LFE channels exceeds the allowed limit. Only the "
250 "first "
251 << kMaxLfeChannelsAllowed
252 << " LFE channels will be processed as unique audio element(s). "
253 "The remaining LFE channels would be panned with rest of the "
254 "channels to obtain 3OA.";
255 break;
256 }
257 }
258 }
259 return lfe_ids;
260 }
261
262 // Updates wav splicing parameters such as remaining durations and block indices
263 // for each audio channel.
UpdateWavSplicingParams(const double & this_seg_duration,const std::vector<AudioChannelFormat> & audio_channels,std::vector<double> & seg_duration,std::vector<size_t> & audio_block_indices)264 void UpdateWavSplicingParams(
265 const double& this_seg_duration,
266 const std::vector<AudioChannelFormat>& audio_channels,
267 std::vector<double>& seg_duration,
268 std::vector<size_t>& audio_block_indices) {
269 for (size_t ch = 0; ch < audio_channels.size(); ++ch) {
270 if (seg_duration[ch] > kErrorTolerance)
271 seg_duration[ch] -= this_seg_duration;
272 if (seg_duration[ch] <= kErrorTolerance) {
273 size_t next_index = audio_block_indices[ch] + 1;
274 const auto& this_channel_block = audio_channels[ch].audio_blocks;
275 if (next_index < this_channel_block.size()) {
276 audio_block_indices[ch] = next_index;
277 seg_duration[ch] =
278 CalculateBlockDuration(this_channel_block, next_index);
279 } else {
280 seg_duration[ch] = 0.0;
281 }
282 }
283 }
284 }
285
286 // This function handles the splicing of wav data into segments to respect the
287 // positional metadata defined by audioBlockFormat and invokes the panner to
288 // obtain 3OA. The panned wav for each segment is appended to obtain the final
289 // output wav.
ConvertFromObjectsTo3OA(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const ADM & input_adm,const FormatInfoChunk & wav_file_fmt,std::istream & input_stream,const iamf_tools::adm_to_user_metadata::Bw64Reader::ChunkInfo & data_chunk_info)290 absl::Status ConvertFromObjectsTo3OA(
291 const std::filesystem::path& output_file_path,
292 absl::string_view file_prefix, const ADM& input_adm,
293 const FormatInfoChunk& wav_file_fmt, std::istream& input_stream,
294 const iamf_tools::adm_to_user_metadata::Bw64Reader::ChunkInfo&
295 data_chunk_info) {
296 const std::streamoff audio_data_position =
297 data_chunk_info.offset + Bw64Reader::kChunkHeaderOffset;
298 input_stream.seekg(audio_data_position);
299
300 // Buffer to temporarily store audio samples before writing to file.
301 std::vector<uint8_t> samples_buffer;
302 samples_buffer.reserve(kSizeToFlush);
303
304 // Prepare the file paths and initialize necessary file handling.
305 const auto temp_file_dir = std::filesystem::temp_directory_path();
306 std::filesystem::path input_file =
307 temp_file_dir / absl::StrCat(file_prefix, "_adm_segment.wav");
308 std::filesystem::path output_file =
309 output_file_path / absl::StrCat(file_prefix, "_converted1.wav");
310
311 // Output channels set to 16 as objects get panned to 3OA.
312 auto output_wav_writer = WavWriter::Create(
313 output_file.string(), kOutputWavChannels, wav_file_fmt.samples_per_sec,
314 wav_file_fmt.bits_per_sample, kMaxNumSamplesPerFrame);
315
316 // Calculate number of bytes per sample based on bits per sample.
317 const int32_t bytes_per_sample =
318 static_cast<int32_t>(wav_file_fmt.bits_per_sample) / kBitsPerByte;
319 const int32_t total_channels = wav_file_fmt.num_channels;
320 const size_t total_channel_size =
321 static_cast<size_t>(bytes_per_sample) * wav_file_fmt.num_channels;
322 const size_t data_chunk_size = data_chunk_info.size;
323 const auto total_duration =
324 CalculateTotalDuration(data_chunk_size, wav_file_fmt, total_channel_size);
325 const size_t total_samples =
326 total_duration * static_cast<size_t>(wav_file_fmt.samples_per_sec);
327
328 // Initialize vectors required to hold intermediate values.
329 std::vector<size_t> audio_block_indices(total_channels, 0);
330 std::vector<double> seg_duration(total_channels, 0);
331
332 // Holds the duration of current segment.
333 double this_seg_duration = 0.0;
334 double total_processed_duration = 0.0;
335 // Holds the number of samples left over from the previous segment due to
336 // rounding error.
337 float leftover_sample_duration = 0.0f;
338 int num_samples_count = 0;
339
340 // Initialize segment duration for all channels with the corresponding first
341 // audio block duration.
342 auto& audio_channels = input_adm.audio_channels;
343 for (int ch = 0; ch < total_channels; ++ch) {
344 seg_duration[ch] =
345 CalculateBlockDuration(audio_channels[ch].audio_blocks, 0);
346 if (seg_duration[ch] <= kErrorTolerance) {
347 seg_duration[ch] = total_duration;
348 }
349 }
350
351 // Iterate over the audio blocks within the audio channel which holds
352 // time-varying positional metadata. Splice the channels into segments such
353 // that segments in each channels have a constant metadata. Invoke the panner
354 // for the wav segments to obtain 3OA and later append the output wav file.
355 //
356 // For e.g., Consider 2 channels (containing 10 samples each) having varying
357 // positional metadata associated with it during different time duration as
358 // below:
359 //
360 // CH1 -----|---|--|
361 // CH2 ---|---|----|
362 //
363 // The above wav data will be spliced to 5 wav segments as below:
364 //
365 // seg1|seg2|seg3|seg4|seg5
366 // CH1 ---| -- | - | -- | --
367 // CH2 ---| -- | - | -- | --
368 while (true) {
369 // Find the minimum non-zero segment duration.
370 auto min_duration = std::min_element(
371 seg_duration.begin(), seg_duration.end(), [](double a, double b) {
372 return (a > kErrorTolerance && (b <= kErrorTolerance || a < b));
373 });
374
375 if (*min_duration > kErrorTolerance) {
376 this_seg_duration = *min_duration;
377 } else if (*min_duration < kErrorTolerance) {
378 break;
379 } else {
380 CHECK_GE(*min_duration, -kErrorTolerance)
381 << "Minimum duration should not be negative";
382 }
383
384 total_processed_duration += this_seg_duration;
385
386 // Read audio samples corresponding to the minimum segment duration
387 // and write to an intermediate wav file which will be input to the
388 // panner.
389 {
390 auto wav_writer = WavWriter::Create(
391 input_file.string(), wav_file_fmt.num_channels,
392 wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
393 kMaxNumSamplesPerFrame);
394 // Compute the length of audio samples corresponding to the current
395 // segment duration. The samples excluded due the rounding error at each
396 // segment is accounted in the next segment.
397 const float this_seg_length =
398 (this_seg_duration * wav_file_fmt.samples_per_sec) +
399 leftover_sample_duration;
400 // Length of the processed audio segment. Samples are rounded off for the
401 // current segment.
402 const auto processed_seg_length = std::floor(this_seg_length);
403 leftover_sample_duration = this_seg_length - processed_seg_length;
404
405 num_samples_count += processed_seg_length;
406
407 CHECK_LE(processed_seg_length, total_samples)
408 << "Samples in segment should not be greater than actual samples in "
409 "the wav file";
410
411 RETURN_IF_NOT_OK(SpliceWavSegment(input_stream, processed_seg_length,
412 total_channel_size, samples_buffer,
413 *wav_writer));
414 }
415
416 // Pan the current wav segment to 3OA and append the output wav.
417 RETURN_IF_NOT_OK(PanObjectsToAmbisonics(input_file.string(), input_adm,
418 audio_block_indices,
419 *output_wav_writer));
420
421 UpdateWavSplicingParams(this_seg_duration, audio_channels, seg_duration,
422 audio_block_indices);
423 }
424
425 CHECK_LE(fabs(total_processed_duration - total_duration), kErrorTolerance);
426 CHECK_LE(fabs(num_samples_count - total_samples), kErrorTolerance);
427
428 // Delete the temporary files.
429 if (!std::filesystem::remove(input_file)) {
430 return absl::InternalError("Error while removing temporary file.");
431 }
432 return absl::OkStatus();
433 }
434
435 // Separates each LFE channel present in the channel bed to individual wav
436 // file(s).
SeparateLfeChannels(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const std::string & non_lfe_file_path,std::istream & input_stream,const FormatInfoChunk & wav_file_fmt,const Bw64Reader::ChunkInfo & data_chunk_info,const std::vector<int> & lfe_ids)437 absl::Status SeparateLfeChannels(const std::filesystem::path& output_file_path,
438 absl::string_view file_prefix,
439 const std::string& non_lfe_file_path,
440 std::istream& input_stream,
441 const FormatInfoChunk& wav_file_fmt,
442 const Bw64Reader::ChunkInfo& data_chunk_info,
443 const std::vector<int>& lfe_ids) {
444 const size_t bits_per_sample = wav_file_fmt.bits_per_sample;
445 const int32_t bytes_per_sample = bits_per_sample / kBitsPerByte;
446 const int num_channels = wav_file_fmt.num_channels;
447 const size_t samples_per_sec = wav_file_fmt.samples_per_sec;
448 const int lfe_count = lfe_ids.size();
449 const int non_lfe_count = num_channels - lfe_count;
450
451 // Create wav writers to separate LFE and non-LFE channels. Index 0 holds
452 // the wav writer corresponding to non-LFE channels and subsequent indices
453 // correspond to each LFE channel present.
454 std::vector<std::unique_ptr<WavWriter>> nonlfe_lfe_wav_writer;
455 nonlfe_lfe_wav_writer.emplace_back(
456 WavWriter::Create(non_lfe_file_path, non_lfe_count, samples_per_sec,
457 bits_per_sample, kMaxNumSamplesPerFrame));
458 for (int lfe_index = 1; lfe_index <= lfe_ids.size(); ++lfe_index) {
459 nonlfe_lfe_wav_writer.emplace_back(WavWriter::Create(
460 (output_file_path /
461 absl::StrCat(file_prefix, "_converted", lfe_index + 1, ".wav"))
462 .string(),
463 1, samples_per_sec, bits_per_sample, kMaxNumSamplesPerFrame));
464 }
465
466 // The samples in the input wav are packed in a channel-interleaved fashion.
467 // To facilitate the splicing of LFE channels from non-LFE channels, a
468 // segment layout is generated, which is a vector of size equal to the total
469 // number of channels, and each element in the vector contains a pair that
470 // holds the size of the segment (which equals the number of channels in a
471 // segment) and the writer index respectively. The writer index for non-LFE
472 // channels is 0 and LFE channels have a writer index starting from 1
473 // (increasing in 1 increments). The channels are grouped together in
474 // sequence if they are non-LFE.
475 std::vector<std::pair<int, int>> segment_layout =
476 GenerateSegmentLayout(lfe_ids, num_channels);
477
478 const std::streamoff audio_data_position =
479 data_chunk_info.offset + Bw64Reader::kChunkHeaderOffset;
480 input_stream.seekg(audio_data_position);
481
482 size_t num_samples_to_read = kSizeToFlush;
483 std::vector<char> temp_buffer(num_samples_to_read * bytes_per_sample *
484 num_channels);
485
486 // Perform the file read in chunks and use the temporary buffer for further
487 // processing.
488 for (size_t data_chunk_pos = 0; data_chunk_pos < data_chunk_info.size;
489 data_chunk_pos += temp_buffer.capacity()) {
490 const size_t bytes_to_read =
491 std::min(temp_buffer.capacity(), data_chunk_info.size - data_chunk_pos);
492 if (!input_stream.read(temp_buffer.data(), bytes_to_read)) {
493 AbortAllWavWriters(nonlfe_lfe_wav_writer);
494 return absl::OutOfRangeError(
495 "Reached end of stream before the implied end of the `data` "
496 "chunk.");
497 }
498
499 RETURN_IF_NOT_OK(FlushLfeNonLfeWavs(temp_buffer, bytes_to_read,
500 num_channels, bytes_per_sample,
501 segment_layout, nonlfe_lfe_wav_writer));
502 temp_buffer.clear();
503 }
504 return absl::OkStatus();
505 }
506
507 // Separates each LFE channel present in the channel bed to individual wav
508 // file(s). The non-LFE channels and audio object(s) are panned to obtain
509 // 3rd-order ambisonics (3OA).
SeparateLfeAndConvertTo3OA(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const Bw64Reader & reader,std::istream & input_stream,const Bw64Reader::ChunkInfo & data_chunk_info,int & lfe_count)510 absl::Status SeparateLfeAndConvertTo3OA(
511 const std::filesystem::path& output_file_path,
512 absl::string_view file_prefix, const Bw64Reader& reader,
513 std::istream& input_stream, const Bw64Reader::ChunkInfo& data_chunk_info,
514 int& lfe_count) {
515 std::string non_lfe_file_name = absl::StrCat(file_prefix, "_non_lfe.wav");
516 const FormatInfoChunk& wav_file_fmt = reader.format_info_;
517 const int num_channels = wav_file_fmt.num_channels;
518
519 // Holds the track position corresponding to LFE channels.
520 std::vector<int> lfe_ids = GetLfeChannelIDs(reader.adm_.audio_channels);
521 lfe_count = lfe_ids.size();
522
523 if (lfe_count == 0) {
524 // If no LFE channels are present, pan all the channels to 3OA.
525 return ConvertFromObjectsTo3OA(output_file_path, file_prefix, reader.adm_,
526 reader.format_info_, input_stream,
527 data_chunk_info);
528 }
529
530 CHECK_LT(lfe_count, num_channels);
531 const int non_lfe_count = num_channels - lfe_count;
532 const auto& non_lfe_file_path =
533 (output_file_path / non_lfe_file_name).string();
534
535 // Separate LFE channels to individual wavs
536 RETURN_IF_NOT_OK(SeparateLfeChannels(output_file_path, file_prefix,
537 non_lfe_file_path, input_stream,
538 wav_file_fmt, data_chunk_info, lfe_ids));
539
540 std::ifstream non_lfe_file(non_lfe_file_path,
541 std::ios::binary | std::ios::in);
542
543 // Remove LFE channel related info from ADM before invoking the panner for
544 // non-LFE channels. The vector lfe_ids is sorted, so erasing in reverse does
545 // not invalidate the iterators, allows safe removal of lfe channels from the
546 // ADM.
547 ADM non_lfe_adm = reader.adm_;
548 for (int index = lfe_count - 1; index >= 0; --index) {
549 non_lfe_adm.audio_channels.erase(non_lfe_adm.audio_channels.begin() +
550 lfe_ids[index]);
551 }
552
553 // Modify FormatInfoChunk with non-LFE channel count before invoking the
554 // panner.
555 FormatInfoChunk non_lfe_format_info = reader.format_info_;
556 non_lfe_format_info.num_channels = non_lfe_count;
557
558 // Calculate data chunk size and set data chunk info for the generated non-LFE
559 // file and invoke the panner for the non-LFE file.
560 const size_t file_size = std::filesystem::file_size(non_lfe_file_path);
561 const size_t data_chunk_size = file_size - kExtensibleOffset - kHeaderSize;
562 Bw64Reader::ChunkInfo non_lfe_data_chunk_info = {data_chunk_size,
563 kExtensibleOffset};
564 RETURN_IF_NOT_OK(ConvertFromObjectsTo3OA(
565 output_file_path, file_prefix, non_lfe_adm, non_lfe_format_info,
566 non_lfe_file, non_lfe_data_chunk_info));
567 non_lfe_file.close();
568
569 // Delete the temporary file.
570 if (!std::filesystem::remove(non_lfe_file_path)) {
571 return absl::InternalError("Error while removing temporary file.");
572 }
573 return absl::OkStatus();
574 }
575
576 } // namespace
577
578 // Splices the input wav file depending on the ADM file type.
SpliceWavFilesFromAdm(const std::filesystem::path & output_file_path,absl::string_view file_prefix,ProfileVersion profile_version,const Bw64Reader & reader,std::istream & input_stream,int & lfe_count)579 absl::Status SpliceWavFilesFromAdm(
580 const std::filesystem::path& output_file_path,
581 absl::string_view file_prefix, ProfileVersion profile_version,
582 const Bw64Reader& reader, std::istream& input_stream, int& lfe_count) {
583 const auto& data_chunk_info = reader.GetChunkInfo("data");
584 const auto& fmt_chunk_info = reader.GetChunkInfo("fmt ");
585 if (!data_chunk_info.ok() || !fmt_chunk_info.ok()) {
586 return absl::NotFoundError("Missing data or fmt chunk.");
587 }
588 auto adm_file_type = reader.adm_.file_type;
589
590 // Separates the input wav file to 'n' number of wav file(s), where 'n' is the
591 // number of audioObject(s) present in the XML metadata.
592 if (adm_file_type == kAdmFileTypeDefault) {
593 const auto audio_tracks_for_audio_objects =
594 GetAudioTracksForAudioObjects(reader.adm_.audio_objects);
595
596 if (audio_tracks_for_audio_objects.empty()) {
597 return absl::NotFoundError("No audioObject present.");
598 };
599
600 // Construct the wav writers to use a file name of the form 'converted'
601 // followed by the 1-indexed content.
602 std::vector<std::unique_ptr<WavWriter>> audio_object_index_to_wav_writer;
603 audio_object_index_to_wav_writer.reserve(
604 audio_tracks_for_audio_objects.size());
605 const FormatInfoChunk& wav_file_fmt = reader.format_info_;
606 for (int audio_object_index = 0;
607 audio_object_index < audio_tracks_for_audio_objects.size();
608 ++audio_object_index) {
609 audio_object_index_to_wav_writer.emplace_back(WavWriter::Create(
610 (output_file_path / absl::StrCat(file_prefix, "_converted",
611 audio_object_index + 1, ".wav"))
612 .string(),
613 audio_tracks_for_audio_objects[audio_object_index].size(),
614 wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
615 kMaxNumSamplesPerFrame));
616 }
617
618 // Write audio samples into the corresponding output wav file(s).
619 const std::streamoff audio_data_position =
620 data_chunk_info->offset + Bw64Reader::kChunkHeaderOffset;
621 input_stream.seekg(audio_data_position);
622
623 // Buffer to store samples per audio object. They will be flushed
624 // occasionally when the buffer is full. The buffer will expand, so it is OK
625 // if it goes over the target size to flush.
626 std::vector<std::vector<uint8_t>> interlaced_samples_for_audio_objects(
627 audio_tracks_for_audio_objects.size(), std::vector<uint8_t>());
628
629 // Read audio samples from the buffer and organize them into individual
630 // audio tracks, based on the mapping specified in
631 // 'audio_tracks_for_audio_objects'. Write the audio track data to
632 // corresponding `WavWriter`s.
633 const int32_t bytes_per_sample =
634 static_cast<int32_t>(wav_file_fmt.bits_per_sample) / kBitsPerByte;
635 const int32_t channels = wav_file_fmt.num_channels;
636 for (size_t data_chunk_pos = 0; data_chunk_pos < data_chunk_info->size;
637 data_chunk_pos += static_cast<size_t>(bytes_per_sample) * channels) {
638 for (int audio_object_index = 0;
639 audio_object_index < audio_tracks_for_audio_objects.size();
640 ++audio_object_index) {
641 // Read in the samples for the current audio object.
642 std::vector<char> sample(
643 static_cast<size_t>(bytes_per_sample) *
644 audio_tracks_for_audio_objects[audio_object_index].size());
645
646 if (!input_stream.read(sample.data(), sample.size())) {
647 AbortAllWavWriters(audio_object_index_to_wav_writer);
648 return absl::OutOfRangeError(
649 "Reached end of stream before the implied end of the `data` "
650 "chunk.");
651 }
652
653 // Store the samples in the buffer.
654 auto& samples_for_audio_object =
655 interlaced_samples_for_audio_objects[audio_object_index];
656 std::transform(sample.begin(), sample.end(),
657 std::back_inserter(samples_for_audio_object),
658 [](char c) { return static_cast<uint8_t>(c); });
659
660 // Occasionally flush the buffer to the corresponding wav writer.
661 if (samples_for_audio_object.size() >= kSizeToFlush) {
662 RETURN_IF_NOT_OK(FlushToWavWriter(
663 samples_for_audio_object,
664 *audio_object_index_to_wav_writer[audio_object_index]));
665 }
666 }
667 }
668
669 // Flush the remaining buffers.
670 for (int audio_object_index = 0;
671 audio_object_index < audio_tracks_for_audio_objects.size();
672 ++audio_object_index) {
673 RETURN_IF_NOT_OK(FlushToWavWriter(
674 interlaced_samples_for_audio_objects[audio_object_index],
675 *audio_object_index_to_wav_writer[audio_object_index]));
676 }
677 } else {
678 CHECK_EQ(adm_file_type, kAdmFileTypeDolby);
679 using enum iamf_tools::ProfileVersion;
680 if (profile_version == kIamfBaseProfile) {
681 // For base profile version, convert the channel beds and audio objects
682 // present to 3OA (16 channels) to facilitate IAMF encoding.
683 RETURN_IF_NOT_OK(ConvertFromObjectsTo3OA(
684 output_file_path, file_prefix, reader.adm_, reader.format_info_,
685 input_stream, data_chunk_info.value()));
686 } else {
687 CHECK_EQ(static_cast<int>(profile_version),
688 static_cast<int>(kIamfBaseEnhancedProfile));
689 // For base enhanced profile version, convert the LFE channel(s) (if
690 // present) to separate wav file(s) and the remaining channels to 3OA (16
691 // channels) to facilitate IAMF encoding.
692 RETURN_IF_NOT_OK(SeparateLfeAndConvertTo3OA(
693 output_file_path, file_prefix, reader, input_stream,
694 data_chunk_info.value(), lfe_count));
695 }
696 }
697 return absl::OkStatus();
698 }
699
700 } // namespace adm_to_user_metadata
701 } // namespace iamf_tools
702