1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #include "iamf/cli/adm_to_user_metadata/adm/wav_file_splicer.h"
14 
15 #include <algorithm>
16 #include <cmath>
17 #include <cstddef>
18 #include <cstdint>
19 #include <filesystem>
20 #include <fstream>
21 #include <iostream>
22 #include <iterator>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "absl/log/check.h"
29 #include "absl/log/log.h"
30 #include "absl/status/status.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/string_view.h"
33 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
34 #include "iamf/cli/adm_to_user_metadata/adm/bw64_reader.h"
35 #include "iamf/cli/adm_to_user_metadata/adm/format_info_chunk.h"
36 #include "iamf/cli/adm_to_user_metadata/adm/panner.h"
37 #include "iamf/cli/wav_writer.h"
38 #include "iamf/common/utils/macros.h"
39 #include "iamf/obu/ia_sequence_header.h"
40 
41 namespace iamf_tools {
42 namespace adm_to_user_metadata {
43 
44 namespace {
45 
46 constexpr int32_t kBitsPerByte = 8;
47 constexpr size_t kSizeToFlush = 4096;
48 
49 // Arbitrary limit on how many samples will be written to the wav file at
50 // once. Chosen to agree with `kSizeToFlush`, even if there are 16-bit
51 // samples and one channel.
52 constexpr size_t kMaxNumSamplesPerFrame = kSizeToFlush / 2;
53 
54 // Error tolerance set to the minimum precision allowed by ADM file to describe
55 // timing related parameters.
56 constexpr double kErrorTolerance = 1e-5;
57 // Offset for data chunk within the extensible format wav file.
58 constexpr int32_t kExtensibleOffset = 72;
59 // Standard size for a wav header.
60 constexpr int32_t kHeaderSize = 8;
61 // Total number of channels allowed per mix for the IAMF base enhanced profile.
62 constexpr int kMaxChannelsPerMixBaseEnhanced = 28;
63 // Max LFE channels allowed per mix for the IAMF base enhanced profile.
64 constexpr int kMaxLfeChannelsAllowed =
65     kMaxChannelsPerMixBaseEnhanced - kOutputWavChannels;
66 
67 // Creates a map for the audioObject(s) and the audioTrack(s) present within.
GetAudioTracksForAudioObjects(const std::vector<struct AudioObject> & audio_objects)68 std::vector<std::vector<int32_t>> GetAudioTracksForAudioObjects(
69     const std::vector<struct AudioObject>& audio_objects) {
70   std::vector<std::vector<int32_t>> audio_tracks_for_audio_objects(
71       audio_objects.size(), std::vector<int32_t>());
72   int32_t audio_object_index = -1;
73   int32_t audio_track_index = -1;
74   for (const auto& audio_object : audio_objects) {
75     auto& audio_tracks_for_audio_object =
76         audio_tracks_for_audio_objects[++audio_object_index];
77     for (auto unused_audio_track : audio_object.audio_track_uid_ref) {
78       audio_tracks_for_audio_object.push_back(++audio_track_index);
79     }
80   }
81   return audio_tracks_for_audio_objects;
82 }
83 
AbortAllWavWriters(std::vector<std::unique_ptr<WavWriter>> & audio_object_index_to_wav_writer)84 void AbortAllWavWriters(
85     std::vector<std::unique_ptr<WavWriter>>& audio_object_index_to_wav_writer) {
86   for (auto& wav_writer : audio_object_index_to_wav_writer) {
87     wav_writer->Abort();
88   }
89 }
90 
FlushToWavWriter(std::vector<uint8_t> & samples_to_flush,WavWriter & wav_writer)91 absl::Status FlushToWavWriter(std::vector<uint8_t>& samples_to_flush,
92                               WavWriter& wav_writer) {
93   RETURN_IF_NOT_OK(wav_writer.WritePcmSamples(samples_to_flush));
94   samples_to_flush.clear();
95   return absl::OkStatus();
96 }
97 
98 // Returns a vector of pairs, each with a segment size and wav writer index.
99 // Non-LFE segments use index 0, and LFE segments are indexed starting from 1.
100 //
101 // For e.g., consider an input wav with layout 3.1.2 (where the 4th channel
102 // corresponds to LFE).
103 // Channel layout : {L3, R3, Centre, LFE, Ltf3, Rtf3}
104 // Input LFE-Id list : {4}
105 //
106 // The segmentation required:[L3, R3, Centre]; [LFE]; [Ltf3, Rtf3]
107 // Segment layout obtained: <3,0>, <1,1>, <2,0>
GenerateSegmentLayout(const std::vector<int> & lfe_ids,const int num_channels)108 std::vector<std::pair<int, int>> GenerateSegmentLayout(
109     const std::vector<int>& lfe_ids, const int num_channels) {
110   std::vector<std::pair<int, int>> segment_layout;
111   for (int lfe_index = 0; lfe_index <= lfe_ids.size(); ++lfe_index) {
112     const int start_index = (lfe_index == 0) ? 0 : lfe_ids[lfe_index - 1];
113     const int end_index =
114         (lfe_index < lfe_ids.size()) ? lfe_ids[lfe_index] - 1 : num_channels;
115 
116     // Store the segment length corresponding to non-LFE channels and update the
117     // wav writer index as 0.
118     segment_layout.push_back({end_index - start_index, 0});
119 
120     // Store the segment length corresponding to LFE channel (always 1) and
121     // update the wav writer index incrementally starting from 1.
122     if (lfe_index < lfe_ids.size()) {
123       segment_layout.push_back({1, lfe_index + 1});
124     }
125   }
126   return segment_layout;
127 }
128 
129 // Distributes audio samples from the input buffer to WavWriter objects,
130 // segmenting them by LFE and non-LFE channels based on the provided layout.
131 // Samples are transformed and periodically flushed to each WavWriter upon
132 // reaching kSizeToFlush.
FlushLfeNonLfeWavs(const std::vector<char> & buffer,const size_t bytes_to_read,const int num_channels,const int32_t bytes_per_sample,const std::vector<std::pair<int,int>> & segment_layout,std::vector<std::unique_ptr<WavWriter>> & writers)133 absl::Status FlushLfeNonLfeWavs(
134     const std::vector<char>& buffer, const size_t bytes_to_read,
135     const int num_channels, const int32_t bytes_per_sample,
136     const std::vector<std::pair<int, int>>& segment_layout,
137     std::vector<std::unique_ptr<WavWriter>>& writers) {
138   // A vector of buffers to store the samples corresponding to non-LFE and LFE
139   // channels respectively.
140   std::vector<std::vector<uint8_t>> nonlfe_lfe_buffer(writers.size(),
141                                                       std::vector<uint8_t>());
142   for (size_t sample_index = 0; sample_index < bytes_to_read;
143        sample_index += static_cast<size_t>(bytes_per_sample) * num_channels) {
144     int channel_offset = 0;
145     for (const auto& [segment_size, writer_index] : segment_layout) {
146       const size_t offset =
147           sample_index + static_cast<size_t>(channel_offset) * bytes_per_sample;
148       std::transform(buffer.begin() + offset,
149                      buffer.begin() + offset +
150                          static_cast<size_t>(segment_size) * bytes_per_sample,
151                      std::back_inserter(nonlfe_lfe_buffer[writer_index]),
152                      [](char c) { return static_cast<uint8_t>(c); });
153 
154       channel_offset += segment_size;
155     }
156 
157     // Occasionally flush the buffer to the corresponding wav writer.
158     // To avoid intermittent padding, ensure that the samples to flush is always
159     // even.
160     for (int index = 0; index < writers.size(); ++index) {
161       auto buffer_size = nonlfe_lfe_buffer[index].size();
162       if (buffer_size >= kSizeToFlush && buffer_size % 2 == 0) {
163         RETURN_IF_NOT_OK(
164             FlushToWavWriter(nonlfe_lfe_buffer[index], *writers[index]));
165       }
166     }
167   }
168 
169   // Flush the remaining buffers.
170   for (int index = 0; index < writers.size(); ++index) {
171     RETURN_IF_NOT_OK(
172         FlushToWavWriter(nonlfe_lfe_buffer[index], *writers[index]));
173   }
174 
175   return absl::OkStatus();
176 }
177 
178 // Splices the wav to obtain the wav segment.
SpliceWavSegment(std::istream & input_stream,const size_t & sample_length,const size_t & total_channel_size,std::vector<uint8_t> & samples_buffer,WavWriter & wav_writer)179 absl::Status SpliceWavSegment(std::istream& input_stream,
180                               const size_t& sample_length,
181                               const size_t& total_channel_size,
182                               std::vector<uint8_t>& samples_buffer,
183                               WavWriter& wav_writer) {
184   for (size_t data_chunk_pos = 0;
185        data_chunk_pos < sample_length * total_channel_size;
186        data_chunk_pos += total_channel_size) {
187     std::vector<char> sample(total_channel_size);
188     if (!input_stream.read(sample.data(), sample.size())) {
189       wav_writer.Abort();
190       return absl::OutOfRangeError(
191           "Reached end of stream before the implied end of the `data` "
192           "chunk.");
193     }
194     std::transform(sample.begin(), sample.end(),
195                    std::back_inserter(samples_buffer),
196                    [](char c) { return static_cast<uint8_t>(c); });
197     auto buffer_size = samples_buffer.size();
198     if (buffer_size >= kSizeToFlush && buffer_size % 2 == 0) {
199       RETURN_IF_NOT_OK(FlushToWavWriter(samples_buffer, wav_writer));
200     }
201   }
202   RETURN_IF_NOT_OK(FlushToWavWriter(samples_buffer, wav_writer));
203   return absl::OkStatus();
204 }
205 
206 // Calculates the total duration of the wav file.
CalculateTotalDuration(const size_t & data_chunk_size,const FormatInfoChunk & wav_file_fmt,const size_t & total_channel_size)207 double CalculateTotalDuration(const size_t& data_chunk_size,
208                               const FormatInfoChunk& wav_file_fmt,
209                               const size_t& total_channel_size) {
210   const auto& total_samples_per_channel = data_chunk_size / total_channel_size;
211   double total_duration = static_cast<double>(total_samples_per_channel) /
212                           static_cast<double>(wav_file_fmt.samples_per_sec);
213   return total_duration;
214 }
215 
216 // Computes the duration in seconds.
ConvertTimeToSeconds(const BlockTime & time)217 double ConvertTimeToSeconds(const BlockTime& time) {
218   return time.hour * 3600.0 + time.minute * 60.0 + time.second;
219 }
220 
221 // Computes the audio block duration as the diff of start time between 2
222 // consecutive blocks.
CalculateBlockDuration(const std::vector<AudioBlockFormat> & audio_block,const int & block_index)223 double CalculateBlockDuration(const std::vector<AudioBlockFormat>& audio_block,
224                               const int& block_index) {
225   double seg_duration = 0.0;
226   if (block_index < audio_block.size() - 1) {
227     const auto block_rtime =
228         ConvertTimeToSeconds(audio_block[block_index].rtime);
229     const auto next_block_rtime =
230         ConvertTimeToSeconds(audio_block[block_index + 1].rtime);
231     seg_duration = (next_block_rtime - block_rtime);
232   } else {
233     seg_duration = ConvertTimeToSeconds(audio_block[block_index].duration);
234   }
235   return seg_duration;
236 }
237 
238 // Retrieves LFE channel IDs from the audio channels list, adds them to
239 // "lfe_ids" vector, and checks if the count exceeds the allowed limit.
GetLfeChannelIDs(const std::vector<AudioChannelFormat> & audio_channels)240 std::vector<int> GetLfeChannelIDs(
241     const std::vector<AudioChannelFormat>& audio_channels) {
242   std::vector<int> lfe_ids;
243   for (int index = 0; index < audio_channels.size(); ++index) {
244     if (audio_channels[index].name == "RoomCentricLFE") {
245       if (lfe_ids.size() < kMaxLfeChannelsAllowed) {
246         lfe_ids.push_back(index + 1);
247       } else {
248         LOG(WARNING)
249             << "The number of LFE channels exceeds the allowed limit. Only the "
250                "first "
251             << kMaxLfeChannelsAllowed
252             << " LFE channels will be processed as unique audio element(s). "
253                "The remaining LFE channels would be panned with rest of the "
254                "channels to obtain 3OA.";
255         break;
256       }
257     }
258   }
259   return lfe_ids;
260 }
261 
262 // Updates wav splicing parameters such as remaining durations and block indices
263 // for each audio channel.
UpdateWavSplicingParams(const double & this_seg_duration,const std::vector<AudioChannelFormat> & audio_channels,std::vector<double> & seg_duration,std::vector<size_t> & audio_block_indices)264 void UpdateWavSplicingParams(
265     const double& this_seg_duration,
266     const std::vector<AudioChannelFormat>& audio_channels,
267     std::vector<double>& seg_duration,
268     std::vector<size_t>& audio_block_indices) {
269   for (size_t ch = 0; ch < audio_channels.size(); ++ch) {
270     if (seg_duration[ch] > kErrorTolerance)
271       seg_duration[ch] -= this_seg_duration;
272     if (seg_duration[ch] <= kErrorTolerance) {
273       size_t next_index = audio_block_indices[ch] + 1;
274       const auto& this_channel_block = audio_channels[ch].audio_blocks;
275       if (next_index < this_channel_block.size()) {
276         audio_block_indices[ch] = next_index;
277         seg_duration[ch] =
278             CalculateBlockDuration(this_channel_block, next_index);
279       } else {
280         seg_duration[ch] = 0.0;
281       }
282     }
283   }
284 }
285 
286 // This function handles the splicing of wav data into segments to respect the
287 // positional metadata defined by audioBlockFormat and invokes the panner to
288 // obtain 3OA. The panned wav for each segment is appended to obtain the final
289 // output wav.
ConvertFromObjectsTo3OA(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const ADM & input_adm,const FormatInfoChunk & wav_file_fmt,std::istream & input_stream,const iamf_tools::adm_to_user_metadata::Bw64Reader::ChunkInfo & data_chunk_info)290 absl::Status ConvertFromObjectsTo3OA(
291     const std::filesystem::path& output_file_path,
292     absl::string_view file_prefix, const ADM& input_adm,
293     const FormatInfoChunk& wav_file_fmt, std::istream& input_stream,
294     const iamf_tools::adm_to_user_metadata::Bw64Reader::ChunkInfo&
295         data_chunk_info) {
296   const std::streamoff audio_data_position =
297       data_chunk_info.offset + Bw64Reader::kChunkHeaderOffset;
298   input_stream.seekg(audio_data_position);
299 
300   // Buffer to temporarily store audio samples before writing to file.
301   std::vector<uint8_t> samples_buffer;
302   samples_buffer.reserve(kSizeToFlush);
303 
304   // Prepare the file paths and initialize necessary file handling.
305   const auto temp_file_dir = std::filesystem::temp_directory_path();
306   std::filesystem::path input_file =
307       temp_file_dir / absl::StrCat(file_prefix, "_adm_segment.wav");
308   std::filesystem::path output_file =
309       output_file_path / absl::StrCat(file_prefix, "_converted1.wav");
310 
311   // Output channels set to 16 as objects get panned to 3OA.
312   auto output_wav_writer = WavWriter::Create(
313       output_file.string(), kOutputWavChannels, wav_file_fmt.samples_per_sec,
314       wav_file_fmt.bits_per_sample, kMaxNumSamplesPerFrame);
315 
316   // Calculate number of bytes per sample based on bits per sample.
317   const int32_t bytes_per_sample =
318       static_cast<int32_t>(wav_file_fmt.bits_per_sample) / kBitsPerByte;
319   const int32_t total_channels = wav_file_fmt.num_channels;
320   const size_t total_channel_size =
321       static_cast<size_t>(bytes_per_sample) * wav_file_fmt.num_channels;
322   const size_t data_chunk_size = data_chunk_info.size;
323   const auto total_duration =
324       CalculateTotalDuration(data_chunk_size, wav_file_fmt, total_channel_size);
325   const size_t total_samples =
326       total_duration * static_cast<size_t>(wav_file_fmt.samples_per_sec);
327 
328   // Initialize vectors required to hold intermediate values.
329   std::vector<size_t> audio_block_indices(total_channels, 0);
330   std::vector<double> seg_duration(total_channels, 0);
331 
332   // Holds the duration of current segment.
333   double this_seg_duration = 0.0;
334   double total_processed_duration = 0.0;
335   // Holds the number of samples left over from the previous segment due to
336   // rounding error.
337   float leftover_sample_duration = 0.0f;
338   int num_samples_count = 0;
339 
340   // Initialize segment duration for all channels with the corresponding first
341   // audio block duration.
342   auto& audio_channels = input_adm.audio_channels;
343   for (int ch = 0; ch < total_channels; ++ch) {
344     seg_duration[ch] =
345         CalculateBlockDuration(audio_channels[ch].audio_blocks, 0);
346     if (seg_duration[ch] <= kErrorTolerance) {
347       seg_duration[ch] = total_duration;
348     }
349   }
350 
351   // Iterate over the audio blocks within the audio channel which holds
352   // time-varying positional metadata. Splice the channels into segments such
353   // that segments in each channels have a constant metadata. Invoke the panner
354   // for the wav segments to obtain 3OA and later append the output wav file.
355   //
356   // For e.g., Consider 2 channels (containing 10 samples each) having varying
357   // positional metadata associated with it during different time duration as
358   // below:
359   //
360   // CH1 -----|---|--|
361   // CH2 ---|---|----|
362   //
363   // The above wav data will be spliced to 5 wav segments as below:
364   //
365   //     seg1|seg2|seg3|seg4|seg5
366   // CH1  ---| -- | -  | -- | --
367   // CH2  ---| -- | -  | -- | --
368   while (true) {
369     // Find the minimum non-zero segment duration.
370     auto min_duration = std::min_element(
371         seg_duration.begin(), seg_duration.end(), [](double a, double b) {
372           return (a > kErrorTolerance && (b <= kErrorTolerance || a < b));
373         });
374 
375     if (*min_duration > kErrorTolerance) {
376       this_seg_duration = *min_duration;
377     } else if (*min_duration < kErrorTolerance) {
378       break;
379     } else {
380       CHECK_GE(*min_duration, -kErrorTolerance)
381           << "Minimum duration should not be negative";
382     }
383 
384     total_processed_duration += this_seg_duration;
385 
386     // Read audio samples corresponding to the minimum segment duration
387     // and write to an intermediate wav file which will be input to the
388     // panner.
389     {
390       auto wav_writer = WavWriter::Create(
391           input_file.string(), wav_file_fmt.num_channels,
392           wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
393           kMaxNumSamplesPerFrame);
394       // Compute the length of audio samples corresponding to the current
395       // segment duration. The samples excluded due the rounding error at each
396       // segment is accounted in the next segment.
397       const float this_seg_length =
398           (this_seg_duration * wav_file_fmt.samples_per_sec) +
399           leftover_sample_duration;
400       // Length of the processed audio segment. Samples are rounded off for the
401       // current segment.
402       const auto processed_seg_length = std::floor(this_seg_length);
403       leftover_sample_duration = this_seg_length - processed_seg_length;
404 
405       num_samples_count += processed_seg_length;
406 
407       CHECK_LE(processed_seg_length, total_samples)
408           << "Samples in segment should not be greater than actual samples in "
409              "the wav file";
410 
411       RETURN_IF_NOT_OK(SpliceWavSegment(input_stream, processed_seg_length,
412                                         total_channel_size, samples_buffer,
413                                         *wav_writer));
414     }
415 
416     // Pan the current wav segment to 3OA and append the output wav.
417     RETURN_IF_NOT_OK(PanObjectsToAmbisonics(input_file.string(), input_adm,
418                                             audio_block_indices,
419                                             *output_wav_writer));
420 
421     UpdateWavSplicingParams(this_seg_duration, audio_channels, seg_duration,
422                             audio_block_indices);
423   }
424 
425   CHECK_LE(fabs(total_processed_duration - total_duration), kErrorTolerance);
426   CHECK_LE(fabs(num_samples_count - total_samples), kErrorTolerance);
427 
428   // Delete the temporary files.
429   if (!std::filesystem::remove(input_file)) {
430     return absl::InternalError("Error while removing temporary file.");
431   }
432   return absl::OkStatus();
433 }
434 
435 // Separates each LFE channel present in the channel bed to individual wav
436 // file(s).
SeparateLfeChannels(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const std::string & non_lfe_file_path,std::istream & input_stream,const FormatInfoChunk & wav_file_fmt,const Bw64Reader::ChunkInfo & data_chunk_info,const std::vector<int> & lfe_ids)437 absl::Status SeparateLfeChannels(const std::filesystem::path& output_file_path,
438                                  absl::string_view file_prefix,
439                                  const std::string& non_lfe_file_path,
440                                  std::istream& input_stream,
441                                  const FormatInfoChunk& wav_file_fmt,
442                                  const Bw64Reader::ChunkInfo& data_chunk_info,
443                                  const std::vector<int>& lfe_ids) {
444   const size_t bits_per_sample = wav_file_fmt.bits_per_sample;
445   const int32_t bytes_per_sample = bits_per_sample / kBitsPerByte;
446   const int num_channels = wav_file_fmt.num_channels;
447   const size_t samples_per_sec = wav_file_fmt.samples_per_sec;
448   const int lfe_count = lfe_ids.size();
449   const int non_lfe_count = num_channels - lfe_count;
450 
451   // Create wav writers to separate LFE and non-LFE channels. Index 0 holds
452   // the wav writer corresponding to non-LFE channels and subsequent indices
453   // correspond to each LFE channel present.
454   std::vector<std::unique_ptr<WavWriter>> nonlfe_lfe_wav_writer;
455   nonlfe_lfe_wav_writer.emplace_back(
456       WavWriter::Create(non_lfe_file_path, non_lfe_count, samples_per_sec,
457                         bits_per_sample, kMaxNumSamplesPerFrame));
458   for (int lfe_index = 1; lfe_index <= lfe_ids.size(); ++lfe_index) {
459     nonlfe_lfe_wav_writer.emplace_back(WavWriter::Create(
460         (output_file_path /
461          absl::StrCat(file_prefix, "_converted", lfe_index + 1, ".wav"))
462             .string(),
463         1, samples_per_sec, bits_per_sample, kMaxNumSamplesPerFrame));
464   }
465 
466   // The samples in the input wav are packed in a channel-interleaved fashion.
467   // To facilitate the splicing of LFE channels from non-LFE channels, a
468   // segment layout is generated, which is a vector of size equal to the total
469   // number of channels, and each element in the vector contains a pair that
470   // holds the size of the segment (which equals the number of channels in a
471   // segment) and the writer index respectively. The writer index for non-LFE
472   // channels is 0 and LFE channels have a writer index starting from 1
473   // (increasing in 1 increments). The channels are grouped together in
474   // sequence if they are non-LFE.
475   std::vector<std::pair<int, int>> segment_layout =
476       GenerateSegmentLayout(lfe_ids, num_channels);
477 
478   const std::streamoff audio_data_position =
479       data_chunk_info.offset + Bw64Reader::kChunkHeaderOffset;
480   input_stream.seekg(audio_data_position);
481 
482   size_t num_samples_to_read = kSizeToFlush;
483   std::vector<char> temp_buffer(num_samples_to_read * bytes_per_sample *
484                                 num_channels);
485 
486   // Perform the file read in chunks and use the temporary buffer for further
487   // processing.
488   for (size_t data_chunk_pos = 0; data_chunk_pos < data_chunk_info.size;
489        data_chunk_pos += temp_buffer.capacity()) {
490     const size_t bytes_to_read =
491         std::min(temp_buffer.capacity(), data_chunk_info.size - data_chunk_pos);
492     if (!input_stream.read(temp_buffer.data(), bytes_to_read)) {
493       AbortAllWavWriters(nonlfe_lfe_wav_writer);
494       return absl::OutOfRangeError(
495           "Reached end of stream before the implied end of the `data` "
496           "chunk.");
497     }
498 
499     RETURN_IF_NOT_OK(FlushLfeNonLfeWavs(temp_buffer, bytes_to_read,
500                                         num_channels, bytes_per_sample,
501                                         segment_layout, nonlfe_lfe_wav_writer));
502     temp_buffer.clear();
503   }
504   return absl::OkStatus();
505 }
506 
507 // Separates each LFE channel present in the channel bed to individual wav
508 // file(s). The non-LFE channels and audio object(s) are panned to obtain
509 // 3rd-order ambisonics (3OA).
SeparateLfeAndConvertTo3OA(const std::filesystem::path & output_file_path,absl::string_view file_prefix,const Bw64Reader & reader,std::istream & input_stream,const Bw64Reader::ChunkInfo & data_chunk_info,int & lfe_count)510 absl::Status SeparateLfeAndConvertTo3OA(
511     const std::filesystem::path& output_file_path,
512     absl::string_view file_prefix, const Bw64Reader& reader,
513     std::istream& input_stream, const Bw64Reader::ChunkInfo& data_chunk_info,
514     int& lfe_count) {
515   std::string non_lfe_file_name = absl::StrCat(file_prefix, "_non_lfe.wav");
516   const FormatInfoChunk& wav_file_fmt = reader.format_info_;
517   const int num_channels = wav_file_fmt.num_channels;
518 
519   // Holds the track position corresponding to LFE channels.
520   std::vector<int> lfe_ids = GetLfeChannelIDs(reader.adm_.audio_channels);
521   lfe_count = lfe_ids.size();
522 
523   if (lfe_count == 0) {
524     // If no LFE channels are present, pan all the channels to 3OA.
525     return ConvertFromObjectsTo3OA(output_file_path, file_prefix, reader.adm_,
526                                    reader.format_info_, input_stream,
527                                    data_chunk_info);
528   }
529 
530   CHECK_LT(lfe_count, num_channels);
531   const int non_lfe_count = num_channels - lfe_count;
532   const auto& non_lfe_file_path =
533       (output_file_path / non_lfe_file_name).string();
534 
535   // Separate LFE channels to individual wavs
536   RETURN_IF_NOT_OK(SeparateLfeChannels(output_file_path, file_prefix,
537                                        non_lfe_file_path, input_stream,
538                                        wav_file_fmt, data_chunk_info, lfe_ids));
539 
540   std::ifstream non_lfe_file(non_lfe_file_path,
541                              std::ios::binary | std::ios::in);
542 
543   // Remove LFE channel related info from ADM before invoking the panner for
544   // non-LFE channels. The vector lfe_ids is sorted, so erasing in reverse does
545   // not invalidate the iterators, allows safe removal of lfe channels from the
546   // ADM.
547   ADM non_lfe_adm = reader.adm_;
548   for (int index = lfe_count - 1; index >= 0; --index) {
549     non_lfe_adm.audio_channels.erase(non_lfe_adm.audio_channels.begin() +
550                                      lfe_ids[index]);
551   }
552 
553   // Modify FormatInfoChunk with non-LFE channel count before invoking the
554   // panner.
555   FormatInfoChunk non_lfe_format_info = reader.format_info_;
556   non_lfe_format_info.num_channels = non_lfe_count;
557 
558   // Calculate data chunk size and set data chunk info for the generated non-LFE
559   // file and invoke the panner for the non-LFE file.
560   const size_t file_size = std::filesystem::file_size(non_lfe_file_path);
561   const size_t data_chunk_size = file_size - kExtensibleOffset - kHeaderSize;
562   Bw64Reader::ChunkInfo non_lfe_data_chunk_info = {data_chunk_size,
563                                                    kExtensibleOffset};
564   RETURN_IF_NOT_OK(ConvertFromObjectsTo3OA(
565       output_file_path, file_prefix, non_lfe_adm, non_lfe_format_info,
566       non_lfe_file, non_lfe_data_chunk_info));
567   non_lfe_file.close();
568 
569   // Delete the temporary file.
570   if (!std::filesystem::remove(non_lfe_file_path)) {
571     return absl::InternalError("Error while removing temporary file.");
572   }
573   return absl::OkStatus();
574 }
575 
576 }  // namespace
577 
578 // Splices the input wav file depending on the ADM file type.
SpliceWavFilesFromAdm(const std::filesystem::path & output_file_path,absl::string_view file_prefix,ProfileVersion profile_version,const Bw64Reader & reader,std::istream & input_stream,int & lfe_count)579 absl::Status SpliceWavFilesFromAdm(
580     const std::filesystem::path& output_file_path,
581     absl::string_view file_prefix, ProfileVersion profile_version,
582     const Bw64Reader& reader, std::istream& input_stream, int& lfe_count) {
583   const auto& data_chunk_info = reader.GetChunkInfo("data");
584   const auto& fmt_chunk_info = reader.GetChunkInfo("fmt ");
585   if (!data_chunk_info.ok() || !fmt_chunk_info.ok()) {
586     return absl::NotFoundError("Missing data or fmt chunk.");
587   }
588   auto adm_file_type = reader.adm_.file_type;
589 
590   // Separates the input wav file to 'n' number of wav file(s), where 'n' is the
591   // number of audioObject(s) present in the XML metadata.
592   if (adm_file_type == kAdmFileTypeDefault) {
593     const auto audio_tracks_for_audio_objects =
594         GetAudioTracksForAudioObjects(reader.adm_.audio_objects);
595 
596     if (audio_tracks_for_audio_objects.empty()) {
597       return absl::NotFoundError("No audioObject present.");
598     };
599 
600     // Construct the wav writers to use a file name of the form 'converted'
601     // followed by the 1-indexed content.
602     std::vector<std::unique_ptr<WavWriter>> audio_object_index_to_wav_writer;
603     audio_object_index_to_wav_writer.reserve(
604         audio_tracks_for_audio_objects.size());
605     const FormatInfoChunk& wav_file_fmt = reader.format_info_;
606     for (int audio_object_index = 0;
607          audio_object_index < audio_tracks_for_audio_objects.size();
608          ++audio_object_index) {
609       audio_object_index_to_wav_writer.emplace_back(WavWriter::Create(
610           (output_file_path / absl::StrCat(file_prefix, "_converted",
611                                            audio_object_index + 1, ".wav"))
612               .string(),
613           audio_tracks_for_audio_objects[audio_object_index].size(),
614           wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
615           kMaxNumSamplesPerFrame));
616     }
617 
618     // Write audio samples into the corresponding output wav file(s).
619     const std::streamoff audio_data_position =
620         data_chunk_info->offset + Bw64Reader::kChunkHeaderOffset;
621     input_stream.seekg(audio_data_position);
622 
623     // Buffer to store samples per audio object. They will be flushed
624     // occasionally when the buffer is full. The buffer will expand, so it is OK
625     // if it goes over the target size to flush.
626     std::vector<std::vector<uint8_t>> interlaced_samples_for_audio_objects(
627         audio_tracks_for_audio_objects.size(), std::vector<uint8_t>());
628 
629     // Read audio samples from the buffer and organize them into individual
630     // audio tracks, based on the mapping specified in
631     // 'audio_tracks_for_audio_objects'. Write the audio track data to
632     // corresponding `WavWriter`s.
633     const int32_t bytes_per_sample =
634         static_cast<int32_t>(wav_file_fmt.bits_per_sample) / kBitsPerByte;
635     const int32_t channels = wav_file_fmt.num_channels;
636     for (size_t data_chunk_pos = 0; data_chunk_pos < data_chunk_info->size;
637          data_chunk_pos += static_cast<size_t>(bytes_per_sample) * channels) {
638       for (int audio_object_index = 0;
639            audio_object_index < audio_tracks_for_audio_objects.size();
640            ++audio_object_index) {
641         // Read in the samples for the current audio object.
642         std::vector<char> sample(
643             static_cast<size_t>(bytes_per_sample) *
644             audio_tracks_for_audio_objects[audio_object_index].size());
645 
646         if (!input_stream.read(sample.data(), sample.size())) {
647           AbortAllWavWriters(audio_object_index_to_wav_writer);
648           return absl::OutOfRangeError(
649               "Reached end of stream before the implied end of the `data` "
650               "chunk.");
651         }
652 
653         // Store the samples in the buffer.
654         auto& samples_for_audio_object =
655             interlaced_samples_for_audio_objects[audio_object_index];
656         std::transform(sample.begin(), sample.end(),
657                        std::back_inserter(samples_for_audio_object),
658                        [](char c) { return static_cast<uint8_t>(c); });
659 
660         // Occasionally flush the buffer to the corresponding wav writer.
661         if (samples_for_audio_object.size() >= kSizeToFlush) {
662           RETURN_IF_NOT_OK(FlushToWavWriter(
663               samples_for_audio_object,
664               *audio_object_index_to_wav_writer[audio_object_index]));
665         }
666       }
667     }
668 
669     // Flush the remaining buffers.
670     for (int audio_object_index = 0;
671          audio_object_index < audio_tracks_for_audio_objects.size();
672          ++audio_object_index) {
673       RETURN_IF_NOT_OK(FlushToWavWriter(
674           interlaced_samples_for_audio_objects[audio_object_index],
675           *audio_object_index_to_wav_writer[audio_object_index]));
676     }
677   } else {
678     CHECK_EQ(adm_file_type, kAdmFileTypeDolby);
679     using enum iamf_tools::ProfileVersion;
680     if (profile_version == kIamfBaseProfile) {
681       // For base profile version, convert the channel beds and audio objects
682       // present to 3OA (16 channels) to facilitate IAMF encoding.
683       RETURN_IF_NOT_OK(ConvertFromObjectsTo3OA(
684           output_file_path, file_prefix, reader.adm_, reader.format_info_,
685           input_stream, data_chunk_info.value()));
686     } else {
687       CHECK_EQ(static_cast<int>(profile_version),
688                static_cast<int>(kIamfBaseEnhancedProfile));
689       // For base enhanced profile version, convert the LFE channel(s) (if
690       // present) to separate wav file(s) and the remaining channels to 3OA (16
691       // channels) to facilitate IAMF encoding.
692       RETURN_IF_NOT_OK(SeparateLfeAndConvertTo3OA(
693           output_file_path, file_prefix, reader, input_stream,
694           data_chunk_info.value(), lfe_count));
695     }
696   }
697   return absl::OkStatus();
698 }
699 
700 }  // namespace adm_to_user_metadata
701 }  // namespace iamf_tools
702