1 /*
2 * Copyright (c) 2023-2024 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "audio_aac_codec.h"
17 #include <cstdint>
18 #include <libswresample/swresample.h>
19 #include <memory>
20 #include <securec.h>
21 #include "common_macro.h"
22 #include "const_def.h"
23 #include "sharing_log.h"
24
25 namespace OHOS {
26 namespace Sharing {
27 constexpr int32_t MAX_AUDIO_BUFFER_SIZE = 100 * 100 * 1024;
28 constexpr uint32_t ADTS_HEADER_SIZE = 7;
29 constexpr uint32_t ADTS_HEADER_BEGIN = 0xFF;
30 constexpr uint32_t ADTS_HEADER_END = 0xFC;
31 constexpr uint32_t ADTS_HEADER_MPEG4_AACLC = 0xF1;
32 constexpr uint32_t ADTS_HEADER_PROFILE_SHIFT = 6;
33 constexpr uint32_t ADTS_HEADER_SAMPLE_MASK = 0x0F;
34 constexpr uint32_t ADTS_HEADER_SAMPLE_SHIFT = 2;
35 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT = 2;
36 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK = 0x01;
37 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT1 = 6;
38 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK1 = 0x03;
39 constexpr uint32_t ADTS_HEADER_DATA_SZIE_OFFSET = 7;
40 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT = 11;
41 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT1 = 3;
42 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT2 = 5;
43 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK = 0xFF;
44 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK1 = 0x1F;
45 constexpr uint32_t ADTS_HEADER_INDEX_2 = 2;
46 constexpr uint32_t ADTS_HEADER_INDEX_3 = 3;
47 constexpr uint32_t ADTS_HEADER_INDEX_4 = 4;
48 constexpr uint32_t ADTS_HEADER_INDEX_5 = 5;
49 constexpr uint32_t ADTS_HEADER_INDEX_6 = 6;
50
51 static std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
52 static uint64_t duration = 0;
53
AudioAACDecoder()54 AudioAACDecoder::AudioAACDecoder()
55 {
56 SHARING_LOGD("trace.");
57 }
58
~AudioAACDecoder()59 AudioAACDecoder::~AudioAACDecoder()
60 {
61 SHARING_LOGD("trace.");
62 if (avFrame_) {
63 av_frame_free(&avFrame_);
64 }
65
66 if (avPacket_) {
67 av_packet_free(&avPacket_);
68 }
69
70 if (swrContext_) {
71 swr_free(&swrContext_);
72 }
73
74 if (swrOutBuffer_) {
75 av_freep(&swrOutBuffer_);
76 }
77
78 if (codecCtx_) {
79 avcodec_free_context(&codecCtx_);
80 }
81 }
82
Init(const AudioTrack & audioTrack)83 int32_t AudioAACDecoder::Init(const AudioTrack &audioTrack)
84 {
85 SHARING_LOGD("trace.");
86 const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_AAC);
87 if (!dec) {
88 SHARING_LOGE("Failed to find codec.");
89 return -1;
90 }
91
92 codecCtx_ = avcodec_alloc_context3(dec);
93 if (!codecCtx_) {
94 SHARING_LOGE("Failed to allocate the codec context.");
95 return -1;
96 }
97
98 if (avcodec_open2(codecCtx_, dec, nullptr) < 0) {
99 SHARING_LOGE("Failed to open codec.");
100 return -1;
101 }
102
103 avPacket_ = av_packet_alloc();
104 if (avPacket_ == nullptr) {
105 SHARING_LOGE("Failed to alloc packet.");
106 return -1;
107 }
108
109 avFrame_ = av_frame_alloc();
110 if (avFrame_ == nullptr) {
111 SHARING_LOGE("Failed to alloc frame.");
112 return -1;
113 }
114 return 0;
115 }
116
OnFrame(const Frame::Ptr & frame)117 void AudioAACDecoder::OnFrame(const Frame::Ptr &frame)
118 {
119 if (frame == nullptr) {
120 SHARING_LOGE("frame is nullptr!");
121 return;
122 }
123
124 if (avPacket_ == nullptr || avFrame_ == nullptr || codecCtx_ == nullptr) {
125 return;
126 }
127
128 av_packet_unref(avPacket_);
129 av_frame_unref(avFrame_);
130
131 avPacket_->data = frame->Data();
132 avPacket_->size = frame->Size();
133
134 avcodec_send_packet(codecCtx_, avPacket_);
135 avcodec_receive_frame(codecCtx_, avFrame_);
136
137 if (swrContext_ == nullptr) {
138 swrContext_ = swr_alloc_set_opts(nullptr, (int64_t)avFrame_->channel_layout, // out_ch_layout
139 AV_SAMPLE_FMT_S16, // out_sample_fmt
140 avFrame_->sample_rate, // out_sample_rate
141 (int64_t)avFrame_->channel_layout, // in_ch_layout
142 (AVSampleFormat)avFrame_->format, // AV_SAMPLE_FMT_FLTP
143 avFrame_->sample_rate, // out_sample_rate
144 0, nullptr);
145 if (swrContext_ == nullptr) {
146 SHARING_LOGE("swrContext_ alloc failed!");
147 return;
148 }
149
150 swr_init(swrContext_);
151
152 swrOutBufferSize_ =
153 av_samples_get_buffer_size(nullptr, avFrame_->channels, avFrame_->nb_samples, AV_SAMPLE_FMT_S16, 0);
154 if (swrOutBufferSize_ <= 0 || swrOutBufferSize_ > MAX_AUDIO_BUFFER_SIZE) {
155 SHARING_LOGE("invalid buffer size %{public}d", swrOutBufferSize_);
156 return;
157 }
158
159 swrOutBuffer_ = (uint8_t *)av_malloc(swrOutBufferSize_);
160 if (swrOutBuffer_ == nullptr) {
161 SHARING_LOGE("swrOutBuffer_ av_malloc failed!");
162 return;
163 }
164 }
165
166 int nbSamples = swr_convert(swrContext_, &swrOutBuffer_, avFrame_->nb_samples, (const uint8_t **)avFrame_->data,
167 avFrame_->nb_samples);
168 if (nbSamples != avFrame_->nb_samples) {
169 SHARING_LOGE("swr_convert failed!");
170 return;
171 }
172
173 auto pcmFrame = FrameImpl::Create();
174 pcmFrame->codecId_ = CODEC_PCM;
175 pcmFrame->Assign((char *)swrOutBuffer_, swrOutBufferSize_);
176 DeliverFrame(pcmFrame);
177 }
178
AudioAACEncoder()179 AudioAACEncoder::AudioAACEncoder()
180 {
181 SHARING_LOGD("trace.");
182 }
183
~AudioAACEncoder()184 AudioAACEncoder::~AudioAACEncoder()
185 {
186 SHARING_LOGD("trace.");
187 if (encFrame_) {
188 av_frame_free(&encFrame_);
189 }
190
191 if (encPacket_) {
192 av_packet_free(&encPacket_);
193 }
194
195 if (swr_) {
196 swr_free(&swr_);
197 }
198
199 if (swrData_) {
200 av_freep(&swrData_);
201 }
202
203 if (outBuffer_) {
204 av_freep(&outBuffer_);
205 }
206
207 if (enc_) {
208 avcodec_free_context(&enc_);
209 }
210 }
211
InitSwr()212 int AudioAACEncoder::InitSwr()
213 {
214 int64_t in_ch_layout = AV_CH_LAYOUT_STEREO;
215 if (inChannels_ == 1) {
216 in_ch_layout = AV_CH_LAYOUT_MONO;
217 }
218 AVSampleFormat in_sample_fmt = AV_SAMPLE_FMT_S16;
219 if (inSampleBit_ == AUDIO_SAMPLE_BIT_U8) {
220 in_sample_fmt = AV_SAMPLE_FMT_U8;
221 }
222 int in_sample_rate = (int)inSampleRate_;
223 swr_ = swr_alloc_set_opts(NULL, enc_->channel_layout, enc_->sample_fmt, enc_->sample_rate, in_ch_layout,
224 in_sample_fmt, in_sample_rate, 0, NULL);
225 if (!swr_) {
226 SHARING_LOGE("alloc swr failed.");
227 }
228
229 int error;
230 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
231 if ((error = swr_init(swr_)) < 0) {
232 SHARING_LOGE("open swr(%{public}d:%{public}s)", error,
233 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
234 }
235
236 if (!(swrData_ = (uint8_t **)calloc(enc_->channels, sizeof(*swrData_)))) {
237 SHARING_LOGE("alloc swr buffer failed!");
238 }
239
240 if ((error = av_samples_alloc(swrData_, NULL, enc_->channels, enc_->frame_size, enc_->sample_fmt, 0)) < 0) {
241 SHARING_LOGE("alloc swr buffer(%{public}d:%{public}s)\n", error,
242 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
243 }
244
245 return 0;
246 }
247
InitEncoderCtx(uint32_t channels,uint32_t sampleBit,uint32_t sampleRate)248 void AudioAACEncoder::InitEncoderCtx(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
249 {
250 enc_->sample_rate = (int32_t)sampleRate; // dst_samplerate;
251 enc_->channels = (int32_t)channels; // dst_channels;
252 enc_->channel_layout = (uint64_t)av_get_default_channel_layout(channels);
253 enc_->bit_rate = AUDIO_BIT_RATE_12800;
254 enc_->time_base.num = 1;
255 enc_->time_base.den = (int32_t)sampleRate;
256 enc_->compression_level = 1;
257 enc_->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
258 }
259
InitEncPacket()260 void AudioAACEncoder::InitEncPacket()
261 {
262 av_init_packet(encPacket_);
263 encPacket_->data = NULL;
264 encPacket_->size = 0;
265 }
266
Init(uint32_t channels,uint32_t sampleBit,uint32_t sampleRate)267 int32_t AudioAACEncoder::Init(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
268 {
269 SHARING_LOGD("trace.");
270 inChannels_ = channels;
271 inSampleBit_ = sampleBit;
272 inSampleRate_ = sampleRate;
273 const AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
274 if (!codec) {
275 SHARING_LOGE("Codec not found failed!");
276 return 1;
277 }
278
279 enc_ = avcodec_alloc_context3(codec);
280 if (!enc_) {
281 SHARING_LOGE("Could not allocate audio codec context ");
282 return 1;
283 }
284 enc_->sample_fmt = codec->sample_fmts[0]; // only supports AV_SAMPLE_FMT_FLTP
285 InitEncoderCtx(channels, sampleBit, sampleRate);
286
287 if (avcodec_open2(enc_, codec, NULL) < 0) {
288 SHARING_LOGE("Could not open codec");
289 }
290
291 encFrame_ = av_frame_alloc();
292 if (!encFrame_) {
293 SHARING_LOGE("Could not allocate audio encode in frame");
294 return 1;
295 }
296 encFrame_->format = enc_->sample_fmt;
297 encFrame_->nb_samples = enc_->frame_size;
298 encFrame_->channel_layout = enc_->channel_layout;
299
300 if (av_frame_get_buffer(encFrame_, 0) < 0) {
301 SHARING_LOGE("Could not get audio frame buffer");
302 return 1;
303 }
304 encPacket_ = av_packet_alloc();
305 if (!encPacket_) {
306 SHARING_LOGE("Could not allocate audio encode out packet");
307 return 1;
308 }
309 if (!(fifo_ = av_audio_fifo_alloc(enc_->sample_fmt, enc_->channels, enc_->frame_size))) {
310 SHARING_LOGE("Could not allocate FIFO");
311 return 1;
312 }
313 auto bufferSize = av_samples_get_buffer_size(nullptr, encFrame_->channels, encFrame_->nb_samples,
314 AVSampleFormat(encFrame_->format), 0);
315 outBuffer_ = (uint8_t *)av_malloc(bufferSize);
316 if (outBuffer_ == nullptr) {
317 SHARING_LOGE("outBuffer_ av_malloc failed!");
318 return 1;
319 }
320
321 return 0;
322 }
323
AddSamplesToFifo(uint8_t ** samples,int frame_size)324 int AudioAACEncoder::AddSamplesToFifo(uint8_t **samples, int frame_size)
325 {
326 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
327 int error;
328
329 if ((error = av_audio_fifo_realloc(fifo_, av_audio_fifo_size(fifo_) + frame_size)) < 0) {
330 SHARING_LOGE("Could not reallocate FIFO(%{public}d:%{public}s)", error,
331 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
332 }
333
334 if ((error = av_audio_fifo_write(fifo_, (void **)samples, frame_size)) < frame_size) {
335 SHARING_LOGE("Could not write data to FIFO(%{public}d:%{public}s)", error,
336 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
337 }
338
339 return 0;
340 }
341
AddAdtsHeader(uint8_t * data,int dataSize)342 void AddAdtsHeader(uint8_t *data, int dataSize)
343 {
344 // ADTS header format (7 or 9 bytes):
345 // 12 bits syncword (0xFFF)
346 // 1 bit MPEG version (0 for MPEG-4, 1 for MPEG-2)
347 // 2 bits layer (always 0 for MPEG-4)
348 // 1 bit protection absent
349 // 2 bits profile (audio object type)
350 // 4 bits sampling frequency index
351 // 1 bit private bit
352 // 3 bits channel configuration
353 // 1 bit original/copy
354 // 1 bit home
355 // variable bits variable header length
356 // 16 bits frame length
357 // 16 bits buffer fullness
358 // 1 bit number of raw data blocks in frame (set to 0)
359
360 uint8_t adtsHeader[ADTS_HEADER_SIZE];
361 int profile = 2; // 2: AAC LC
362 int samplingFrequencyIndex = 3; // 3: 48Khz, 4: 44.1kHz
363 int channelConfiguration = 2; // 2: Stereo
364
365 adtsHeader[0] = ADTS_HEADER_BEGIN;
366 adtsHeader[1] = ADTS_HEADER_MPEG4_AACLC;
367 adtsHeader[ADTS_HEADER_INDEX_2] =
368 (static_cast<uint32_t>(profile - 1) << ADTS_HEADER_PROFILE_SHIFT) |
369 ((static_cast<uint32_t>(samplingFrequencyIndex) & ADTS_HEADER_SAMPLE_MASK) << ADTS_HEADER_SAMPLE_SHIFT) |
370 ((static_cast<uint32_t>(channelConfiguration) >> ADTS_HEADER_CHANNEL_SHIFT) & ADTS_HEADER_CHANNEL_MASK);
371 adtsHeader[ADTS_HEADER_INDEX_3] =
372 ((static_cast<uint32_t>(channelConfiguration) & ADTS_HEADER_CHANNEL_MASK1) << ADTS_HEADER_CHANNEL_SHIFT1) |
373 (static_cast<uint32_t>(dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT);
374 adtsHeader[ADTS_HEADER_INDEX_4] =
375 (static_cast<uint32_t>(dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT1) &
376 ADTS_HEADER_DATA_SZIE_MASK;
377 adtsHeader[ADTS_HEADER_INDEX_5] =
378 (static_cast<uint32_t>(dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) << ADTS_HEADER_DATA_SZIE_SHIFT2) |
379 ADTS_HEADER_DATA_SZIE_MASK1;
380 adtsHeader[ADTS_HEADER_INDEX_6] = ADTS_HEADER_END;
381
382 if (memcpy_s(data, sizeof(adtsHeader), adtsHeader, sizeof(adtsHeader)) != EOK) {
383 SHARING_LOGE("copy adtsHeader failed!");
384 }
385 }
386
DoSwr(const Frame::Ptr & frame)387 void AudioAACEncoder::DoSwr(const Frame::Ptr &frame)
388 {
389 RETURN_IF_NULL(frame);
390 int err = 0;
391 int error = 0;
392 int in_samples = frame->Size();
393 uint8_t *in_sample[1];
394 in_sample[0] = frame->Data();
395 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
396
397 do {
398 int sample_size = (int)(inChannels_ * inSampleBit_ / 8);
399 if (sample_size == 0) {
400 continue;
401 }
402 in_samples = in_samples / sample_size;
403
404 int frame_size = swr_convert(swr_, swrData_, enc_->frame_size, (const uint8_t **)in_sample, in_samples);
405 if ((error = frame_size) < 0) {
406 SHARING_LOGE("Could not convert input samples(%{public}d:%{public}s)", error,
407 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
408 }
409
410 in_sample[0] = NULL;
411 in_samples = 0;
412 if ((err = AddSamplesToFifo(swrData_, frame_size)) != 0) {
413 SHARING_LOGE("write samples failed");
414 }
415 } while (swr_get_out_samples(swr_, in_samples) >= enc_->frame_size);
416 }
417
OnFrame(const Frame::Ptr & frame)418 void AudioAACEncoder::OnFrame(const Frame::Ptr &frame)
419 {
420 RETURN_IF_NULL(frame);
421 if (duration == 0) {
422 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
423 duration = (uint64_t)std::chrono::duration_cast<std::chrono::milliseconds>(now - start).count();
424 }
425
426 int error = 0;
427 if ((error = InitSwr()) != 0 && !swr_) {
428 SHARING_LOGE("resample init failed!");
429 return;
430 }
431 DoSwr(frame);
432
433 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
434 encFrame_->format = AV_SAMPLE_FMT_FLTP;
435 while (av_audio_fifo_size(fifo_) >= enc_->frame_size) {
436 if (av_frame_make_writable(encFrame_) < 0) {
437 SHARING_LOGE("Could not make writable frame");
438 }
439 if (av_audio_fifo_read(fifo_, (void **)encFrame_->data, enc_->frame_size) < enc_->frame_size) {
440 SHARING_LOGE("Could not read data from FIFO");
441 }
442 encFrame_->pts = nextOutPts_;
443 nextOutPts_ += enc_->frame_size;
444 error = avcodec_send_frame(enc_, encFrame_);
445 if (error < 0) {
446 SHARING_LOGE("send failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
447 }
448
449 InitEncPacket();
450 while (error >= 0) {
451 error = avcodec_receive_packet(enc_, encPacket_);
452 if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
453 break;
454 } else if (error < 0) {
455 SHARING_LOGE("recv failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
456 }
457
458 encPacket_->dts = av_rescale(encPacket_->dts, 1000, enc_->time_base.den); // rescale time base 1000.
459 encPacket_->pts = av_rescale(encPacket_->pts, 1000, enc_->time_base.den); // rescale time base 1000.
460 if (memcpy_s(outBuffer_ + ADTS_HEADER_SIZE, encPacket_->size, encPacket_->data, encPacket_->size) != EOK) {
461 SHARING_LOGE("copy data failed!");
462 break;
463 }
464 AddAdtsHeader((uint8_t *)outBuffer_, encPacket_->size);
465 auto aacFrame = FrameImpl::Create();
466 aacFrame->codecId_ = CODEC_AAC;
467 aacFrame->pts_ = (uint32_t)((int64_t)duration + encPacket_->pts);
468 aacFrame->Assign((char *)outBuffer_, encPacket_->size + 7); // 7: size offset
469 DeliverFrame(aacFrame);
470 }
471 }
472 }
473 } // namespace Sharing
474 } // namespace OHOS
475