cli/renderer/loudspeakers_renderer.cc

/*
 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 3-Clause Clear License
 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
 * License was not distributed with this source code in the LICENSE file, you
 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
 * Alliance for Open Media Patent License 1.0 was not distributed with this
 * source code in the PATENTS file, you can obtain it at
 * www.aomedia.org/license/patent.
 */
#include "iamf/cli/renderer/loudspeakers_renderer.h"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/no_destructor.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "iamf/cli/channel_label.h"
#include "iamf/cli/renderer/precomputed_gains.h"
#include "iamf/common/utils/macros.h"
#include "iamf/common/utils/map_utils.h"
#include "iamf/common/utils/validation_utils.h"
#include "iamf/obu/audio_element.h"
#include "iamf/obu/demixing_info_parameter_data.h"
#include "iamf/obu/types.h"

namespace iamf_tools {

namespace {

absl::Status ComputeGains(absl::string_view input_layout_string,
                          absl::string_view output_layout_string,
                          const DownMixingParams& down_mixing_params,
                          std::vector<std::vector<double>>& gains) {
  const auto alpha = down_mixing_params.alpha;
  const auto beta = down_mixing_params.beta;
  const auto gamma = down_mixing_params.gamma;
  const auto delta = down_mixing_params.delta;
  const auto w = down_mixing_params.w;
  // TODO(b/292174366): Strictly follow IAMF spec logic of when to use demixers
  //                    vs. libear renderer.
  LOG_FIRST_N(INFO, 5)
      << "Rendering  may be buggy or not follow the spec "
         "recommendations. Computing gains based on demixing params: "
      << input_layout_string << " --> " << output_layout_string;
  if (input_layout_string == "4+7+0" && output_layout_string == "3.1.2") {
    // Values checked; fixed.
    gains = {{1, 0, 0, 0, 0, 0},
             {0, 1, 0, 0, 0, 0},
             {0, 0, 1, 0, 0, 0},
             {0, 0, 0, 1, 0, 0},
             // Lss7
             {alpha * delta, 0, 0, 0, alpha * w * delta, 0},
             // Rss7
             {0, alpha * delta, 0, 0, 0, alpha * w * delta},
             {beta * delta, 0, 0, 0, beta * w * delta, 0},
             {0, beta * delta, 0, 0, 0, beta * w * delta},
             {0, 0, 0, 0, 1, 0},
             {0, 0, 0, 0, 0, 1},
             {0, 0, 0, 0, gamma, 0},
             {0, 0, 0, 0, 0, gamma}};
  } else if (input_layout_string == "4+7+0" &&
             output_layout_string == "7.1.2") {
    // Just drop the last two channels.
    gains = {
        {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
        {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
    };
  } else {
    return absl::UnknownError(absl::StrCat(
        "The encoder did not implement matrices for ", input_layout_string,
        " to ", output_layout_string, " yet."));
  }
  return absl::OkStatus();
}

absl::Status LayoutStringHasHeightChannels(absl::string_view layout_string,
                                           bool& result) {
  // TODO(b/292174366): Fill in all possible layouts or determine this in a
  //                    better way.
  if (layout_string == "4+7+0" || layout_string == "7.1.2" ||
      layout_string == "4+5+0" || layout_string == "2+5+0" ||
      layout_string == "3.1.2") {
    result = true;
    return absl::OkStatus();
  } else if (layout_string == "0+7+0" || layout_string == "0+5+0" ||
             layout_string == "0+2+0" || layout_string == "0+1+0") {
    result = false;
    return absl::OkStatus();
  } else {
    return absl::UnknownError(
        absl::StrCat("Unknown if ", layout_string, " has height channels"));
  }
}

absl::Status ComputeChannelLayoutToLoudspeakersGains(
    const std::vector<ChannelLabel::Label>& channel_labels,
    const DownMixingParams& down_mixing_params,
    absl::string_view input_layout_string,
    absl::string_view output_layout_string,
    std::vector<std::vector<double>>& gains) {
  gains.clear();
  if (!down_mixing_params.in_bitstream) {
    // There is no DownMixingParamDefinition, which is fine. Do not fill the
    // gains and let the caller use default precomputed gains.
    return absl::OkStatus();
  }

  // TODO(b/292174366): Remove hacks. Updates logic of when to use demixers vs
  //                    libear renderer.
  bool input_layout_has_height_channels;
  RETURN_IF_NOT_OK(LayoutStringHasHeightChannels(
      input_layout_string, input_layout_has_height_channels));
  bool playback_has_height_channels;
  RETURN_IF_NOT_OK(LayoutStringHasHeightChannels(output_layout_string,
                                                 playback_has_height_channels));
  if (!playback_has_height_channels && input_layout_has_height_channels) {
    return absl::OkStatus();
  }

  // The bitstream tells use how to compute the gains. Use those.
  RETURN_IF_NOT_OK(ComputeGains(input_layout_string, output_layout_string,
                                down_mixing_params, gains));

  // Examine the computed gains.
  LOG_FIRST_N(INFO, 5) << "Computed gains:";
  auto fmt = std::setw(7);
  std::stringstream ss;
  for (const auto& label : channel_labels) {
    ss << fmt << absl::StrCat(label);
  }
  LOG_FIRST_N(INFO, 5) << ss.str();
  for (size_t i = 0; i < gains.front().size(); i++) {
    ss.str({});
    ss.clear();
    ss << std::setprecision(3);
    for (size_t j = 0; j < gains.size(); j++) {
      ss << fmt << gains.at(j).at(i);
    }
    LOG_FIRST_N(INFO, 5) << ss.str();
  }

  return absl::OkStatus();
}

double Q15ToSignedDouble(const int16_t input) {
  return static_cast<double>(input) / 32768.0;
}

std::vector<std::vector<InternalSampleType>> ProjectSamplesToRender(
    absl::Span<const std::vector<InternalSampleType>>& input_samples,
    const int16_t* demixing_matrix, const int output_channel_count) {
  CHECK_NE(demixing_matrix, nullptr);
  std::vector<std::vector<InternalSampleType>> samples_to_render(
      input_samples.size(),
      std::vector<InternalSampleType>(output_channel_count, 0.0));

  for (int t = 0; t < samples_to_render.size(); t++) {
    for (int out_channel = 0; out_channel < output_channel_count;
         out_channel++) {
      // Project with `demixing_matrix`, which is encoded as Q15 and stored
      // in column major.
      for (int in_channel = 0; in_channel < input_samples[0].size();
           in_channel++) {
        samples_to_render[t][out_channel] +=
            Q15ToSignedDouble(
                demixing_matrix[in_channel * output_channel_count +
                                out_channel]) *
            input_samples[t][in_channel];
      }
    }
  }
  return samples_to_render;
}

void RenderSamplesUsingGains(
    absl::Span<const std::vector<InternalSampleType>>& input_samples,
    const std::vector<std::vector<double>>& gains,
    const int16_t* demixing_matrix,
    std::vector<InternalSampleType>& rendered_samples) {
  // Project with `demixing_matrix` when in projection mode.
  absl::Span<const std::vector<InternalSampleType>> samples_to_render_double;
  std::vector<std::vector<InternalSampleType>> projected_samples;
  if (demixing_matrix != nullptr) {
    projected_samples =
        ProjectSamplesToRender(input_samples, demixing_matrix, gains.size());
    samples_to_render_double = absl::MakeConstSpan(projected_samples);
  } else {
    samples_to_render_double = input_samples;
  }

  int rendered_samples_index = 0;
  std::fill(rendered_samples.begin(), rendered_samples.end(), 0);
  for (int t = 0; t < samples_to_render_double.size(); t++) {
    for (int out_channel = 0; out_channel < gains[0].size(); out_channel++) {
      for (int in_channel = 0; in_channel < samples_to_render_double[0].size();
           in_channel++) {
        rendered_samples[rendered_samples_index] +=
            samples_to_render_double[t][in_channel] *
            gains[in_channel][out_channel];
      }

      rendered_samples_index++;
    }
  }
}

}  // namespace

absl::StatusOr<std::vector<std::vector<double>>> LookupPrecomputedGains(
    absl::string_view input_key, absl::string_view output_key) {
  static const absl::NoDestructor<PrecomputedGains> precomputed_gains(
      InitPrecomputedGains());

  const std::string input_key_debug_message =
      absl::StrCat("Precomputed gains not found for input_key= ", input_key);
  // Search throughs two layers of maps. We want to find the gains associated
  // with `[input_key][output_key]`.
  auto input_key_it = precomputed_gains->find(input_key);
  if (input_key_it == precomputed_gains->end()) [[unlikely]] {
    return absl::NotFoundError(input_key_debug_message);
  }

  return LookupInMap(input_key_it->second, std::string(output_key),
                     absl::StrCat(input_key_debug_message, " and output_key"));
}

absl::Status RenderChannelLayoutToLoudspeakers(
    absl::Span<const std::vector<InternalSampleType>>& input_samples,
    const DownMixingParams& down_mixing_params,
    const std::vector<ChannelLabel::Label>& channel_labels,
    absl::string_view input_key, absl::string_view output_key,
    const std::vector<std::vector<double>>& precomputed_gains,
    std::vector<InternalSampleType>& rendered_samples) {
  // When the demixing parameters are in the bitstream, recompute for every
  // frame and do not store the result in the map.
  // TODO(b/292174366): Find a better solution and strictly follow the spec for
  //                    which renderer to use.
  std::vector<std::vector<double>> newly_computed_gains;
  RETURN_IF_NOT_OK(ComputeChannelLayoutToLoudspeakersGains(
      channel_labels, down_mixing_params, input_key, output_key,
      newly_computed_gains));
  const std::vector<std::vector<double>>& gains_to_use =
      newly_computed_gains.empty() ? precomputed_gains : newly_computed_gains;

  RenderSamplesUsingGains(input_samples, gains_to_use,
                          /*demixing_matrix=*/nullptr, rendered_samples);
  return absl::OkStatus();
}

absl::Status RenderAmbisonicsToLoudspeakers(
    absl::Span<const std::vector<InternalSampleType>>& input_samples,
    const AmbisonicsConfig& ambisonics_config,
    const std::vector<std::vector<double>>& gains,
    std::vector<InternalSampleType>& rendered_samples) {
  // Exclude unsupported mode first, and deal with only mono or projection
  // in the rest of the code.
  const auto mode = ambisonics_config.ambisonics_mode;
  if (mode != AmbisonicsConfig::kAmbisonicsModeMono &&
      mode != AmbisonicsConfig::kAmbisonicsModeProjection) {
    return absl::UnimplementedError(
        absl::StrCat("Unsupported ambisonics mode. mode= ", mode));
  }
  const bool is_mono = mode == AmbisonicsConfig::kAmbisonicsModeMono;

  // Input key for ambisonics is "A{ambisonics_order}".
  const uint8_t output_channel_count =
      is_mono
          ? std::get<AmbisonicsMonoConfig>(ambisonics_config.ambisonics_config)
                .output_channel_count
          : std::get<AmbisonicsProjectionConfig>(
                ambisonics_config.ambisonics_config)
                .output_channel_count;

  RETURN_IF_NOT_OK(
      ValidateContainerSizeEqual("gains", gains, output_channel_count));

  RenderSamplesUsingGains(input_samples, gains,
                          is_mono ? nullptr
                                  : std::get<AmbisonicsProjectionConfig>(
                                        ambisonics_config.ambisonics_config)
                                        .demixing_matrix.data(),
                          rendered_samples);

  return absl::OkStatus();
}

}  // namespace iamf_tools