/* * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_coding/neteq/tools/neteq_quality_test.h" #include #include #include "absl/flags/flag.h" #include "modules/audio_coding/neteq/default_neteq_factory.h" #include "modules/audio_coding/neteq/tools/neteq_quality_test.h" #include "modules/audio_coding/neteq/tools/output_audio_file.h" #include "modules/audio_coding/neteq/tools/output_wav_file.h" #include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" #include "rtc_base/checks.h" #include "system_wrappers/include/clock.h" #include "test/testsupport/file_utils.h" const std::string& DefaultInFilename() { static const std::string path = ::webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", "pcm"); return path; } const std::string& DefaultOutFilename() { static const std::string path = ::webrtc::test::OutputPath() + "neteq_quality_test_out.pcm"; return path; } ABSL_FLAG( std::string, in_filename, DefaultInFilename(), "Filename for input audio (specify sample rate with --input_sample_rate, " "and channels with --channels)."); ABSL_FLAG(int, input_sample_rate, 16000, "Sample rate of input file in Hz."); ABSL_FLAG(int, channels, 1, "Number of channels in input audio."); ABSL_FLAG(std::string, out_filename, DefaultOutFilename(), "Name of output audio file."); ABSL_FLAG( int, runtime_ms, 10000, "Simulated runtime (milliseconds). -1 will consume the complete file."); ABSL_FLAG(int, packet_loss_rate, 10, "Percentile of packet loss."); ABSL_FLAG(int, random_loss_mode, ::webrtc::test::kUniformLoss, "Random loss mode: 0--no loss, 1--uniform loss, 2--Gilbert Elliot " "loss, 3--fixed loss."); ABSL_FLAG(int, burst_length, 30, "Burst length in milliseconds, only valid for Gilbert Elliot loss."); ABSL_FLAG(float, drift_factor, 0.0, "Time drift factor."); ABSL_FLAG(int, preload_packets, 1, "Preload the buffer with this many packets."); ABSL_FLAG(std::string, loss_events, "", "List of loss events time and duration separated by comma: " " , " ", ..."); namespace webrtc { namespace test { namespace { std::unique_ptr CreateNetEq( const NetEq::Config& config, Clock* clock, const rtc::scoped_refptr& decoder_factory) { return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); } } // namespace const uint8_t kPayloadType = 95; const int kOutputSizeMs = 10; const int kInitSeed = 0x12345678; const int kPacketLossTimeUnitMs = 10; // Common validator for file names. static bool ValidateFilename(const std::string& value, bool is_output) { if (!is_output) { RTC_CHECK_NE(value.substr(value.find_last_of(".") + 1), "wav") << "WAV file input is not supported"; } FILE* fid = is_output ? fopen(value.c_str(), "wb") : fopen(value.c_str(), "rb"); if (fid == nullptr) return false; fclose(fid); return true; } // ProbTrans00Solver() is to calculate the transition probability from no-loss // state to itself in a modified Gilbert Elliot packet loss model. The result is // to achieve the target packet loss rate |loss_rate|, when a packet is not // lost only if all |units| drawings within the duration of the packet result in // no-loss. static double ProbTrans00Solver(int units, double loss_rate, double prob_trans_10) { if (units == 1) return prob_trans_10 / (1.0f - loss_rate) - prob_trans_10; // 0 == prob_trans_00 ^ (units - 1) + (1 - loss_rate) / prob_trans_10 * // prob_trans_00 - (1 - loss_rate) * (1 + 1 / prob_trans_10). // There is a unique solution between 0.0 and 1.0, due to the monotonicity and // an opposite sign at 0.0 and 1.0. // For simplicity, we reformulate the equation as // f(x) = x ^ (units - 1) + a x + b. // Its derivative is // f'(x) = (units - 1) x ^ (units - 2) + a. // The derivative is strictly greater than 0 when x is between 0 and 1. // We use Newton's method to solve the equation, iteration is // x(k+1) = x(k) - f(x) / f'(x); const double kPrecision = 0.001f; const int kIterations = 100; const double a = (1.0f - loss_rate) / prob_trans_10; const double b = (loss_rate - 1.0f) * (1.0f + 1.0f / prob_trans_10); double x = 0.0; // Starting point; double f = b; double f_p; int iter = 0; while ((f >= kPrecision || f <= -kPrecision) && iter < kIterations) { f_p = (units - 1.0f) * std::pow(x, units - 2) + a; x -= f / f_p; if (x > 1.0f) { x = 1.0f; } else if (x < 0.0f) { x = 0.0f; } f = std::pow(x, units - 1) + a * x + b; iter++; } return x; } NetEqQualityTest::NetEqQualityTest( int block_duration_ms, int in_sampling_khz, int out_sampling_khz, const SdpAudioFormat& format, const rtc::scoped_refptr& decoder_factory) : audio_format_(format), channels_(absl::GetFlag(FLAGS_channels)), decoded_time_ms_(0), decodable_time_ms_(0), drift_factor_(absl::GetFlag(FLAGS_drift_factor)), packet_loss_rate_(absl::GetFlag(FLAGS_packet_loss_rate)), block_duration_ms_(block_duration_ms), in_sampling_khz_(in_sampling_khz), out_sampling_khz_(out_sampling_khz), in_size_samples_( static_cast(in_sampling_khz_ * block_duration_ms_)), payload_size_bytes_(0), max_payload_bytes_(0), in_file_( new ResampleInputAudioFile(absl::GetFlag(FLAGS_in_filename), absl::GetFlag(FLAGS_input_sample_rate), in_sampling_khz * 1000, absl::GetFlag(FLAGS_runtime_ms) > 0)), rtp_generator_( new RtpGenerator(in_sampling_khz_, 0, 0, decodable_time_ms_)), total_payload_size_bytes_(0) { // Flag validation RTC_CHECK(ValidateFilename(absl::GetFlag(FLAGS_in_filename), false)) << "Invalid input filename."; RTC_CHECK(absl::GetFlag(FLAGS_input_sample_rate) == 8000 || absl::GetFlag(FLAGS_input_sample_rate) == 16000 || absl::GetFlag(FLAGS_input_sample_rate) == 32000 || absl::GetFlag(FLAGS_input_sample_rate) == 48000) << "Invalid sample rate should be 8000, 16000, 32000 or 48000 Hz."; RTC_CHECK_EQ(absl::GetFlag(FLAGS_channels), 1) << "Invalid number of channels, current support only 1."; RTC_CHECK(ValidateFilename(absl::GetFlag(FLAGS_out_filename), true)) << "Invalid output filename."; RTC_CHECK(absl::GetFlag(FLAGS_packet_loss_rate) >= 0 && absl::GetFlag(FLAGS_packet_loss_rate) <= 100) << "Invalid packet loss percentile, should be between 0 and 100."; RTC_CHECK(absl::GetFlag(FLAGS_random_loss_mode) >= 0 && absl::GetFlag(FLAGS_random_loss_mode) < kLastLossMode) << "Invalid random packet loss mode, should be between 0 and " << kLastLossMode - 1 << "."; RTC_CHECK_GE(absl::GetFlag(FLAGS_burst_length), kPacketLossTimeUnitMs) << "Invalid burst length, should be greater than or equal to " << kPacketLossTimeUnitMs << " ms."; RTC_CHECK_GT(absl::GetFlag(FLAGS_drift_factor), -0.1) << "Invalid drift factor, should be greater than -0.1."; RTC_CHECK_GE(absl::GetFlag(FLAGS_preload_packets), 0) << "Invalid number of packets to preload; must be non-negative."; const std::string out_filename = absl::GetFlag(FLAGS_out_filename); const std::string log_filename = out_filename + ".log"; log_file_.open(log_filename.c_str(), std::ofstream::out); RTC_CHECK(log_file_.is_open()); if (out_filename.size() >= 4 && out_filename.substr(out_filename.size() - 4) == ".wav") { // Open a wav file. output_.reset( new webrtc::test::OutputWavFile(out_filename, 1000 * out_sampling_khz)); } else { // Open a pcm file. output_.reset(new webrtc::test::OutputAudioFile(out_filename)); } NetEq::Config config; config.sample_rate_hz = out_sampling_khz_ * 1000; neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory); max_payload_bytes_ = in_size_samples_ * channels_ * sizeof(int16_t); in_data_.reset(new int16_t[in_size_samples_ * channels_]); } NetEqQualityTest::~NetEqQualityTest() { log_file_.close(); } bool NoLoss::Lost(int now_ms) { return false; } UniformLoss::UniformLoss(double loss_rate) : loss_rate_(loss_rate) {} bool UniformLoss::Lost(int now_ms) { int drop_this = rand(); return (drop_this < loss_rate_ * RAND_MAX); } GilbertElliotLoss::GilbertElliotLoss(double prob_trans_11, double prob_trans_01) : prob_trans_11_(prob_trans_11), prob_trans_01_(prob_trans_01), lost_last_(false), uniform_loss_model_(new UniformLoss(0)) {} GilbertElliotLoss::~GilbertElliotLoss() {} bool GilbertElliotLoss::Lost(int now_ms) { // Simulate bursty channel (Gilbert model). // (1st order) Markov chain model with memory of the previous/last // packet state (lost or received). if (lost_last_) { // Previous packet was not received. uniform_loss_model_->set_loss_rate(prob_trans_11_); return lost_last_ = uniform_loss_model_->Lost(now_ms); } else { uniform_loss_model_->set_loss_rate(prob_trans_01_); return lost_last_ = uniform_loss_model_->Lost(now_ms); } } FixedLossModel::FixedLossModel( std::set loss_events) : loss_events_(loss_events) { loss_events_it_ = loss_events_.begin(); } FixedLossModel::~FixedLossModel() {} bool FixedLossModel::Lost(int now_ms) { if (loss_events_it_ != loss_events_.end() && now_ms > loss_events_it_->start_ms) { if (now_ms <= loss_events_it_->start_ms + loss_events_it_->duration_ms) { return true; } else { ++loss_events_it_; return false; } } return false; } void NetEqQualityTest::SetUp() { ASSERT_TRUE(neteq_->RegisterPayloadType(kPayloadType, audio_format_)); rtp_generator_->set_drift_factor(drift_factor_); int units = block_duration_ms_ / kPacketLossTimeUnitMs; switch (absl::GetFlag(FLAGS_random_loss_mode)) { case kUniformLoss: { // |unit_loss_rate| is the packet loss rate for each unit time interval // (kPacketLossTimeUnitMs). Since a packet loss event is generated if any // of |block_duration_ms_ / kPacketLossTimeUnitMs| unit time intervals of // a full packet duration is drawn with a loss, |unit_loss_rate| fulfills // (1 - unit_loss_rate) ^ (block_duration_ms_ / kPacketLossTimeUnitMs) == // 1 - packet_loss_rate. double unit_loss_rate = (1.0 - std::pow(1.0 - 0.01 * packet_loss_rate_, 1.0 / units)); loss_model_.reset(new UniformLoss(unit_loss_rate)); break; } case kGilbertElliotLoss: { // |FLAGS_burst_length| should be integer times of kPacketLossTimeUnitMs. ASSERT_EQ(0, absl::GetFlag(FLAGS_burst_length) % kPacketLossTimeUnitMs); // We do not allow 100 percent packet loss in Gilbert Elliot model, which // makes no sense. ASSERT_GT(100, packet_loss_rate_); // To guarantee the overall packet loss rate, transition probabilities // need to satisfy: // pi_0 * (1 - prob_trans_01_) ^ units + // pi_1 * prob_trans_10_ ^ (units - 1) == 1 - loss_rate // pi_0 = prob_trans_10 / (prob_trans_10 + prob_trans_01_) // is the stationary state probability of no-loss // pi_1 = prob_trans_01_ / (prob_trans_10 + prob_trans_01_) // is the stationary state probability of loss // After a derivation prob_trans_00 should satisfy: // prob_trans_00 ^ (units - 1) = (loss_rate - 1) / prob_trans_10 * // prob_trans_00 + (1 - loss_rate) * (1 + 1 / prob_trans_10). double loss_rate = 0.01f * packet_loss_rate_; double prob_trans_10 = 1.0f * kPacketLossTimeUnitMs / absl::GetFlag(FLAGS_burst_length); double prob_trans_00 = ProbTrans00Solver(units, loss_rate, prob_trans_10); loss_model_.reset( new GilbertElliotLoss(1.0f - prob_trans_10, 1.0f - prob_trans_00)); break; } case kFixedLoss: { std::istringstream loss_events_stream(absl::GetFlag(FLAGS_loss_events)); std::string loss_event_string; std::set loss_events; while (std::getline(loss_events_stream, loss_event_string, ',')) { std::vector loss_event_params; std::istringstream loss_event_params_stream(loss_event_string); std::copy(std::istream_iterator(loss_event_params_stream), std::istream_iterator(), std::back_inserter(loss_event_params)); RTC_CHECK_EQ(loss_event_params.size(), 2); auto result = loss_events.insert( FixedLossEvent(loss_event_params[0], loss_event_params[1])); RTC_CHECK(result.second); } RTC_CHECK_GT(loss_events.size(), 0); loss_model_.reset(new FixedLossModel(loss_events)); break; } default: { loss_model_.reset(new NoLoss); break; } } // Make sure that the packet loss profile is same for all derived tests. srand(kInitSeed); } std::ofstream& NetEqQualityTest::Log() { return log_file_; } bool NetEqQualityTest::PacketLost() { int cycles = block_duration_ms_ / kPacketLossTimeUnitMs; // The loop is to make sure that codecs with different block lengths share the // same packet loss profile. bool lost = false; for (int idx = 0; idx < cycles; idx++) { if (loss_model_->Lost(decoded_time_ms_)) { // The packet will be lost if any of the drawings indicates a loss, but // the loop has to go on to make sure that codecs with different block // lengths keep the same pace. lost = true; } } return lost; } int NetEqQualityTest::Transmit() { int packet_input_time_ms = rtp_generator_->GetRtpHeader( kPayloadType, in_size_samples_, &rtp_header_); Log() << "Packet of size " << payload_size_bytes_ << " bytes, for frame at " << packet_input_time_ms << " ms "; if (payload_size_bytes_ > 0) { if (!PacketLost()) { int ret = neteq_->InsertPacket( rtp_header_, rtc::ArrayView(payload_.data(), payload_size_bytes_)); if (ret != NetEq::kOK) return -1; Log() << "was sent."; } else { Log() << "was lost."; } } Log() << std::endl; return packet_input_time_ms; } int NetEqQualityTest::DecodeBlock() { bool muted; int ret = neteq_->GetAudio(&out_frame_, &muted); RTC_CHECK(!muted); if (ret != NetEq::kOK) { return -1; } else { RTC_DCHECK_EQ(out_frame_.num_channels_, channels_); RTC_DCHECK_EQ(out_frame_.samples_per_channel_, static_cast(kOutputSizeMs * out_sampling_khz_)); RTC_CHECK(output_->WriteArray( out_frame_.data(), out_frame_.samples_per_channel_ * out_frame_.num_channels_)); return static_cast(out_frame_.samples_per_channel_); } } void NetEqQualityTest::Simulate() { int audio_size_samples; bool end_of_input = false; int runtime_ms = absl::GetFlag(FLAGS_runtime_ms) >= 0 ? absl::GetFlag(FLAGS_runtime_ms) : INT_MAX; while (!end_of_input && decoded_time_ms_ < runtime_ms) { // Preload the buffer if needed. while (decodable_time_ms_ - absl::GetFlag(FLAGS_preload_packets) * block_duration_ms_ < decoded_time_ms_) { if (!in_file_->Read(in_size_samples_ * channels_, &in_data_[0])) { end_of_input = true; ASSERT_TRUE(end_of_input && absl::GetFlag(FLAGS_runtime_ms) < 0); break; } payload_.Clear(); payload_size_bytes_ = EncodeBlock(&in_data_[0], in_size_samples_, &payload_, max_payload_bytes_); total_payload_size_bytes_ += payload_size_bytes_; decodable_time_ms_ = Transmit() + block_duration_ms_; } audio_size_samples = DecodeBlock(); if (audio_size_samples > 0) { decoded_time_ms_ += audio_size_samples / out_sampling_khz_; } } Log() << "Average bit rate was " << 8.0f * total_payload_size_bytes_ / absl::GetFlag(FLAGS_runtime_ms) << " kbps" << std::endl; } } // namespace test } // namespace webrtc