android-16.0.0_r2/s

/*
 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 3-Clause Clear License
 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
 * License was not distributed with this source code in the LICENSE file, you
 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
 * Alliance for Open Media Patent License 1.0 was not distributed with this
 * source code in the PATENTS file, you can obtain it at
 * www.aomedia.org/license/patent.
 */

#ifndef CLI_DEMIXING_MODULE_H_
#define CLI_DEMIXING_MODULE_H_

#include <cstdint>
#include <deque>
#include <list>
#include <utility>
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/node_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/obu/audio_element.h"
#include "iamf/obu/demixing_info_parameter_data.h"
#include "iamf/obu/recon_gain_info_parameter_data.h"
#include "iamf/obu/types.h"

namespace iamf_tools {

struct SubstreamData {
  uint32_t substream_id;

  // Samples arranged in a FIFO queue with a vector of channels. There can only
  // be one or two channels. Includes "virtual" samples that are output from the
  // encoder, but are not passed to the encoder.
  std::deque<std::vector<int32_t>> samples_obu;
  // Samples to pass to encoder.
  std::deque<std::vector<int32_t>> samples_encode;
  // One or two elements; corresponding to the output gain to be applied to
  // each channel.
  std::vector<double> output_gains_linear;
  uint32_t num_samples_to_trim_at_end;
  uint32_t num_samples_to_trim_at_start;
};

// Mapping from channel label to a frame of samples.
typedef absl::node_hash_map<ChannelLabel::Label,
                            std::vector<InternalSampleType>>
    LabelSamplesMap;

struct LabeledFrame {
  int32_t end_timestamp;
  uint32_t samples_to_trim_at_end;
  uint32_t samples_to_trim_at_start;
  LabelSamplesMap label_to_samples;
  DownMixingParams demixing_params;
  ReconGainInfoParameterData recon_gain_info_parameter_data;
  // Vector of length `num_layers`. Only populated for scalable channel audio.
  std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>
      loudspeaker_layout_per_layer;
};

// Mapping from audio element ids to `LabeledFrame`s.
typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap;

typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);

/*!\brief Manages data and processing to down-mix and demix audio elements.
 *
 * This class relates to the "Element Reconstructor" as used in the IAMF
 * specifications. "An Element Reconstructor re-assembles the Audio Elements by
 * combining the Channel Group(s) guided by Descriptors and Parameter
 * Substream(s)." This class does not apply the reconstruction gain, so
 * additional post processing is needed to finish audio element reconstruction.
 *
 * Down-mixers are used to down-mix the input channels to the substream
 * channels. Typically there are down-mixers for scalable channel audio
 * elements with more than one layer. Down-mixers are created according to
 * https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism
 *
 * Demixers are used to recreate the original audio from the substreams.
 * Demixers are created according to
 * https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio.
 */
class DemixingModule {
 public:
  struct DemixingMetadataForAudioElementId {
    std::list<Demixer> demixers;
    std::list<Demixer> down_mixers;
    SubstreamIdLabelsMap substream_id_to_labels;
    LabelGainMap label_to_output_gain;
  };

  struct DownmixingAndReconstructionConfig {
    absl::flat_hash_set<ChannelLabel::Label> user_labels;
    SubstreamIdLabelsMap substream_id_to_labels;
    LabelGainMap label_to_output_gain;
  };

  /*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
   *
   * This is most useful from the context of an encoder. For example, to encode
   * a scalable channel audio element with two layers, the input channels are
   * down-mixed according to various rules in the spec.
   *
   * Initializes metadata for each input audio element ID. The metadata includes
   * information about the channels and the specific down-mixers and demixers
   * needed for that audio element.
   *
   * \param id_to_config_map Map of Audio Element IDs to
   *        `DownmixingAndReconstructionConfig`, which contains the
   *        user-provided labels and the `substream_id_to_labels` and
   *        `label_to_output_gain` from the corresponding
   *        `AudioElementWithData`.
   * \return `absl::OkStatus()` on success. A specific status on failure.
   */
  static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
      const absl::flat_hash_map<DecodedUleb128,
                                DownmixingAndReconstructionConfig>&&
          id_to_config_map);

  /*!\brief Initializes for reconstruction (demixing) the input audio elements.
   *
   * This is most useful from the context of a decoder. For example, to decode
   * a scalable channel audio element with two layers, the substreams are
   * demixed according to various rules in the spec.
   *
   * Initializes metadata for each input audio element ID. The metadata includes
   * information about the channels and the specific down-mixers and demixers
   * needed for that audio element.
   *
   * \param audio_elements Audio elements.
   * \return `absl::OkStatus()` on success. A specific status on failure.
   */
  static absl::StatusOr<DemixingModule> CreateForReconstruction(
      const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
          audio_elements);

  /*!\brief Searches the input map for the target samples or demixed samples.
   *
   * \param label Label of the channel (or its demixed version) to search for.
   * \param label_to_samples Map of label to samples to search.
   * \param samples Output argument for the samples if found.
   * \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search
   *         failed.
   */
  static absl::Status FindSamplesOrDemixedSamples(
      ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
      const std::vector<InternalSampleType>** samples);

  /*!\brief Down-mixes samples of input channels to substreams.
   *
   * \param audio_element_id Audio Element ID of these substreams.
   * \param down_mixing_params Down mixing parameters to use. Ignored when
   *        there is no associated down-mixer.
   * \param input_label_to_samples Samples in input channels organized by the
   *        channel labels.
   * \param substream_id_to_substream_data Mapping from substream IDs to
   *        substream data.
   * \return `absl::OkStatus()` on success. A specific status on failure.
   */
  absl::Status DownMixSamplesToSubstreams(
      DecodedUleb128 audio_element_id,
      const DownMixingParams& down_mixing_params,
      LabelSamplesMap& input_label_to_samples,
      absl::flat_hash_map<uint32_t, SubstreamData>&
          substream_id_to_substream_data) const;

  /*!\brief Demix original audio samples.
   *
   * This is most useful when the original (before lossy codec) samples are
   * known, such as when encoding original audio.
   *
   * \param audio_frames Audio Frames.
   * \return Output data structure for samples, or a specific status on failure.
   */
  absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples(
      const std::list<AudioFrameWithData>& audio_frames) const;

  /*!\brief Demix decoded audio samples.
   *
   * This is most useful when the decoded (after lossy codec) samples are
   * known, such as when decoding an IA Sequence, or when analyzing the effect
   * of a lossy codec to determine appropriate recon gain values.
   *
   * \param decoded_audio_frames Decoded Audio Frames.
   * \return Output data structure for samples, or a specific status on failure.
   */
  absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples(
      const std::list<DecodedAudioFrame>& decoded_audio_frame) const;

  /*!\brief Gets the down-mixers associated with an Audio Element ID.
   *
   * \param audio_element_id Audio Element ID
   * \param down_mixers Output pointer to the list of down-mixers.
   * \return `absl::OkStatus()` on success. A specific status on failure.
   */
  absl::Status GetDownMixers(DecodedUleb128 audio_element_id,
                             const std::list<Demixer>*& down_mixers) const;

  /*!\brief Gets the demixers associated with an Audio Element ID.
   *
   * \param audio_element_id Audio Element ID
   * \param demixers Output pointer to the list of demixers.
   * \return `absl::OkStatus()` on success. A specific status on failure.
   */
  absl::Status GetDemixers(DecodedUleb128 audio_element_id,
                           const std::list<Demixer>*& demixers) const;

 private:
  enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction };

  /*!\brief Private constructor.
   *
   * For use with `CreateForDownMixingAndReconstruction` and
   * `CreateForReconstruction`.
   *
   * \param demixing_mode Mode of the class.
   * \param audio_element_id_to_demixing_metadata Mapping from audio element ID
   *        to demixing metadata.
   */
  DemixingModule(
      DemixingMode demixing_mode,
      absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
          audio_element_id_to_demixing_metadata)
      : demixing_mode_(demixing_mode),
        audio_element_id_to_demixing_metadata_(
            std::move(audio_element_id_to_demixing_metadata)) {}

  DemixingMode demixing_mode_;

  const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
      audio_element_id_to_demixing_metadata_;
};

}  // namespace iamf_tools

#endif  // CLI_DEMIXING_MODULE_H_