1/* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12#import "RTCVideoEncoderH264.h" 13 14#import <VideoToolbox/VideoToolbox.h> 15#include <vector> 16 17#if defined(WEBRTC_IOS) 18#import "helpers/UIDevice+RTCDevice.h" 19#endif 20#import "RTCCodecSpecificInfoH264.h" 21#import "RTCH264ProfileLevelId.h" 22#import "api/peerconnection/RTCRtpFragmentationHeader+Private.h" 23#import "api/peerconnection/RTCVideoCodecInfo+Private.h" 24#import "base/RTCCodecSpecificInfo.h" 25#import "base/RTCI420Buffer.h" 26#import "base/RTCVideoEncoder.h" 27#import "base/RTCVideoFrame.h" 28#import "base/RTCVideoFrameBuffer.h" 29#import "components/video_frame_buffer/RTCCVPixelBuffer.h" 30#import "helpers.h" 31 32#include "common_video/h264/h264_bitstream_parser.h" 33#include "common_video/h264/profile_level_id.h" 34#include "common_video/include/bitrate_adjuster.h" 35#include "modules/include/module_common_types.h" 36#include "modules/video_coding/include/video_error_codes.h" 37#include "rtc_base/buffer.h" 38#include "rtc_base/logging.h" 39#include "rtc_base/time_utils.h" 40#include "sdk/objc/components/video_codec/nalu_rewriter.h" 41#include "third_party/libyuv/include/libyuv/convert_from.h" 42 43@interface RTC_OBJC_TYPE (RTCVideoEncoderH264) 44() 45 46 - (void)frameWasEncoded : (OSStatus)status flags : (VTEncodeInfoFlags)infoFlags sampleBuffer 47 : (CMSampleBufferRef)sampleBuffer codecSpecificInfo 48 : (id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo width : (int32_t)width height 49 : (int32_t)height renderTimeMs : (int64_t)renderTimeMs timestamp : (uint32_t)timestamp rotation 50 : (RTCVideoRotation)rotation; 51 52@end 53 54namespace { // anonymous namespace 55 56// The ratio between kVTCompressionPropertyKey_DataRateLimits and 57// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher 58// than the average bit rate to avoid undershooting the target. 59const float kLimitToAverageBitRateFactor = 1.5f; 60// These thresholds deviate from the default h264 QP thresholds, as they 61// have been found to work better on devices that support VideoToolbox 62const int kLowH264QpThreshold = 28; 63const int kHighH264QpThreshold = 39; 64 65const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; 66 67// Struct that we pass to the encoder per frame to encode. We receive it again 68// in the encoder callback. 69struct RTCFrameEncodeParams { 70 RTCFrameEncodeParams(RTC_OBJC_TYPE(RTCVideoEncoderH264) * e, 71 RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) * csi, 72 int32_t w, 73 int32_t h, 74 int64_t rtms, 75 uint32_t ts, 76 RTCVideoRotation r) 77 : encoder(e), width(w), height(h), render_time_ms(rtms), timestamp(ts), rotation(r) { 78 if (csi) { 79 codecSpecificInfo = csi; 80 } else { 81 codecSpecificInfo = [[RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) alloc] init]; 82 } 83 } 84 85 RTC_OBJC_TYPE(RTCVideoEncoderH264) * encoder; 86 RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) * codecSpecificInfo; 87 int32_t width; 88 int32_t height; 89 int64_t render_time_ms; 90 uint32_t timestamp; 91 RTCVideoRotation rotation; 92}; 93 94// We receive I420Frames as input, but we need to feed CVPixelBuffers into the 95// encoder. This performs the copy and format conversion. 96// TODO(tkchin): See if encoder will accept i420 frames and compare performance. 97bool CopyVideoFrameToNV12PixelBuffer(id<RTC_OBJC_TYPE(RTCI420Buffer)> frameBuffer, 98 CVPixelBufferRef pixelBuffer) { 99 RTC_DCHECK(pixelBuffer); 100 RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat); 101 RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height); 102 RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width); 103 104 CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0); 105 if (cvRet != kCVReturnSuccess) { 106 RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; 107 return false; 108 } 109 uint8_t *dstY = reinterpret_cast<uint8_t *>(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)); 110 int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); 111 uint8_t *dstUV = reinterpret_cast<uint8_t *>(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)); 112 int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); 113 // Convert I420 to NV12. 114 int ret = libyuv::I420ToNV12(frameBuffer.dataY, 115 frameBuffer.strideY, 116 frameBuffer.dataU, 117 frameBuffer.strideU, 118 frameBuffer.dataV, 119 frameBuffer.strideV, 120 dstY, 121 dstStrideY, 122 dstUV, 123 dstStrideUV, 124 frameBuffer.width, 125 frameBuffer.height); 126 CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); 127 if (ret) { 128 RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; 129 return false; 130 } 131 return true; 132} 133 134CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) { 135 if (!pixel_buffer_pool) { 136 RTC_LOG(LS_ERROR) << "Failed to get pixel buffer pool."; 137 return nullptr; 138 } 139 CVPixelBufferRef pixel_buffer; 140 CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, &pixel_buffer); 141 if (ret != kCVReturnSuccess) { 142 RTC_LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; 143 // We probably want to drop frames here, since failure probably means 144 // that the pool is empty. 145 return nullptr; 146 } 147 return pixel_buffer; 148} 149 150// This is the callback function that VideoToolbox calls when encode is 151// complete. From inspection this happens on its own queue. 152void compressionOutputCallback(void *encoder, 153 void *params, 154 OSStatus status, 155 VTEncodeInfoFlags infoFlags, 156 CMSampleBufferRef sampleBuffer) { 157 if (!params) { 158 // If there are pending callbacks when the encoder is destroyed, this can happen. 159 return; 160 } 161 std::unique_ptr<RTCFrameEncodeParams> encodeParams( 162 reinterpret_cast<RTCFrameEncodeParams *>(params)); 163 [encodeParams->encoder frameWasEncoded:status 164 flags:infoFlags 165 sampleBuffer:sampleBuffer 166 codecSpecificInfo:encodeParams->codecSpecificInfo 167 width:encodeParams->width 168 height:encodeParams->height 169 renderTimeMs:encodeParams->render_time_ms 170 timestamp:encodeParams->timestamp 171 rotation:encodeParams->rotation]; 172} 173 174// Extract VideoToolbox profile out of the webrtc::SdpVideoFormat. If there is 175// no specific VideoToolbox profile for the specified level, AutoLevel will be 176// returned. The user must initialize the encoder with a resolution and 177// framerate conforming to the selected H264 level regardless. 178CFStringRef ExtractProfile(const webrtc::H264::ProfileLevelId &profile_level_id) { 179 switch (profile_level_id.profile) { 180 case webrtc::H264::kProfileConstrainedBaseline: 181 case webrtc::H264::kProfileBaseline: 182 switch (profile_level_id.level) { 183 case webrtc::H264::kLevel3: 184 return kVTProfileLevel_H264_Baseline_3_0; 185 case webrtc::H264::kLevel3_1: 186 return kVTProfileLevel_H264_Baseline_3_1; 187 case webrtc::H264::kLevel3_2: 188 return kVTProfileLevel_H264_Baseline_3_2; 189 case webrtc::H264::kLevel4: 190 return kVTProfileLevel_H264_Baseline_4_0; 191 case webrtc::H264::kLevel4_1: 192 return kVTProfileLevel_H264_Baseline_4_1; 193 case webrtc::H264::kLevel4_2: 194 return kVTProfileLevel_H264_Baseline_4_2; 195 case webrtc::H264::kLevel5: 196 return kVTProfileLevel_H264_Baseline_5_0; 197 case webrtc::H264::kLevel5_1: 198 return kVTProfileLevel_H264_Baseline_5_1; 199 case webrtc::H264::kLevel5_2: 200 return kVTProfileLevel_H264_Baseline_5_2; 201 case webrtc::H264::kLevel1: 202 case webrtc::H264::kLevel1_b: 203 case webrtc::H264::kLevel1_1: 204 case webrtc::H264::kLevel1_2: 205 case webrtc::H264::kLevel1_3: 206 case webrtc::H264::kLevel2: 207 case webrtc::H264::kLevel2_1: 208 case webrtc::H264::kLevel2_2: 209 return kVTProfileLevel_H264_Baseline_AutoLevel; 210 } 211 212 case webrtc::H264::kProfileMain: 213 switch (profile_level_id.level) { 214 case webrtc::H264::kLevel3: 215 return kVTProfileLevel_H264_Main_3_0; 216 case webrtc::H264::kLevel3_1: 217 return kVTProfileLevel_H264_Main_3_1; 218 case webrtc::H264::kLevel3_2: 219 return kVTProfileLevel_H264_Main_3_2; 220 case webrtc::H264::kLevel4: 221 return kVTProfileLevel_H264_Main_4_0; 222 case webrtc::H264::kLevel4_1: 223 return kVTProfileLevel_H264_Main_4_1; 224 case webrtc::H264::kLevel4_2: 225 return kVTProfileLevel_H264_Main_4_2; 226 case webrtc::H264::kLevel5: 227 return kVTProfileLevel_H264_Main_5_0; 228 case webrtc::H264::kLevel5_1: 229 return kVTProfileLevel_H264_Main_5_1; 230 case webrtc::H264::kLevel5_2: 231 return kVTProfileLevel_H264_Main_5_2; 232 case webrtc::H264::kLevel1: 233 case webrtc::H264::kLevel1_b: 234 case webrtc::H264::kLevel1_1: 235 case webrtc::H264::kLevel1_2: 236 case webrtc::H264::kLevel1_3: 237 case webrtc::H264::kLevel2: 238 case webrtc::H264::kLevel2_1: 239 case webrtc::H264::kLevel2_2: 240 return kVTProfileLevel_H264_Main_AutoLevel; 241 } 242 243 case webrtc::H264::kProfileConstrainedHigh: 244 case webrtc::H264::kProfileHigh: 245 switch (profile_level_id.level) { 246 case webrtc::H264::kLevel3: 247 return kVTProfileLevel_H264_High_3_0; 248 case webrtc::H264::kLevel3_1: 249 return kVTProfileLevel_H264_High_3_1; 250 case webrtc::H264::kLevel3_2: 251 return kVTProfileLevel_H264_High_3_2; 252 case webrtc::H264::kLevel4: 253 return kVTProfileLevel_H264_High_4_0; 254 case webrtc::H264::kLevel4_1: 255 return kVTProfileLevel_H264_High_4_1; 256 case webrtc::H264::kLevel4_2: 257 return kVTProfileLevel_H264_High_4_2; 258 case webrtc::H264::kLevel5: 259 return kVTProfileLevel_H264_High_5_0; 260 case webrtc::H264::kLevel5_1: 261 return kVTProfileLevel_H264_High_5_1; 262 case webrtc::H264::kLevel5_2: 263 return kVTProfileLevel_H264_High_5_2; 264 case webrtc::H264::kLevel1: 265 case webrtc::H264::kLevel1_b: 266 case webrtc::H264::kLevel1_1: 267 case webrtc::H264::kLevel1_2: 268 case webrtc::H264::kLevel1_3: 269 case webrtc::H264::kLevel2: 270 case webrtc::H264::kLevel2_1: 271 case webrtc::H264::kLevel2_2: 272 return kVTProfileLevel_H264_High_AutoLevel; 273 } 274 } 275} 276 277// The function returns the max allowed sample rate (pixels per second) that 278// can be processed by given encoder with |profile_level_id|. 279// See https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-201610-S!!PDF-E&type=items 280// for details. 281NSUInteger GetMaxSampleRate(const webrtc::H264::ProfileLevelId &profile_level_id) { 282 switch (profile_level_id.level) { 283 case webrtc::H264::kLevel3: 284 return 10368000; 285 case webrtc::H264::kLevel3_1: 286 return 27648000; 287 case webrtc::H264::kLevel3_2: 288 return 55296000; 289 case webrtc::H264::kLevel4: 290 case webrtc::H264::kLevel4_1: 291 return 62914560; 292 case webrtc::H264::kLevel4_2: 293 return 133693440; 294 case webrtc::H264::kLevel5: 295 return 150994944; 296 case webrtc::H264::kLevel5_1: 297 return 251658240; 298 case webrtc::H264::kLevel5_2: 299 return 530841600; 300 case webrtc::H264::kLevel1: 301 case webrtc::H264::kLevel1_b: 302 case webrtc::H264::kLevel1_1: 303 case webrtc::H264::kLevel1_2: 304 case webrtc::H264::kLevel1_3: 305 case webrtc::H264::kLevel2: 306 case webrtc::H264::kLevel2_1: 307 case webrtc::H264::kLevel2_2: 308 // Zero means auto rate setting. 309 return 0; 310 } 311} 312} // namespace 313 314@implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) { 315 RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo; 316 std::unique_ptr<webrtc::BitrateAdjuster> _bitrateAdjuster; 317 uint32_t _targetBitrateBps; 318 uint32_t _encoderBitrateBps; 319 uint32_t _encoderFrameRate; 320 uint32_t _maxAllowedFrameRate; 321 RTCH264PacketizationMode _packetizationMode; 322 absl::optional<webrtc::H264::ProfileLevelId> _profile_level_id; 323 RTCVideoEncoderCallback _callback; 324 int32_t _width; 325 int32_t _height; 326 VTCompressionSessionRef _compressionSession; 327 CVPixelBufferPoolRef _pixelBufferPool; 328 RTCVideoCodecMode _mode; 329 330 webrtc::H264BitstreamParser _h264BitstreamParser; 331 std::vector<uint8_t> _frameScaleBuffer; 332} 333 334// .5 is set as a mininum to prevent overcompensating for large temporary 335// overshoots. We don't want to degrade video quality too badly. 336// .95 is set to prevent oscillations. When a lower bitrate is set on the 337// encoder than previously set, its output seems to have a brief period of 338// drastically reduced bitrate, so we want to avoid that. In steady state 339// conditions, 0.95 seems to give us better overall bitrate over long periods 340// of time. 341- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo { 342 if (self = [super init]) { 343 _codecInfo = codecInfo; 344 _bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); 345 _packetizationMode = RTCH264PacketizationModeNonInterleaved; 346 _profile_level_id = 347 webrtc::H264::ParseSdpProfileLevelId([codecInfo nativeSdpVideoFormat].parameters); 348 RTC_DCHECK(_profile_level_id); 349 RTC_LOG(LS_INFO) << "Using profile " << CFStringToString(ExtractProfile(*_profile_level_id)); 350 RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH264Name]); 351 } 352 return self; 353} 354 355- (void)dealloc { 356 [self destroyCompressionSession]; 357} 358 359- (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)settings 360 numberOfCores:(int)numberOfCores { 361 RTC_DCHECK(settings); 362 RTC_DCHECK([settings.name isEqualToString:kRTCVideoCodecH264Name]); 363 364 _width = settings.width; 365 _height = settings.height; 366 _mode = settings.mode; 367 368 uint32_t aligned_width = (((_width + 15) >> 4) << 4); 369 uint32_t aligned_height = (((_height + 15) >> 4) << 4); 370 _maxAllowedFrameRate = static_cast<uint32_t>(GetMaxSampleRate(*_profile_level_id) / 371 (aligned_width * aligned_height)); 372 373 // We can only set average bitrate on the HW encoder. 374 _targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps. 375 _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); 376 _encoderFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate); 377 if (settings.maxFramerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { 378 RTC_LOG(LS_WARNING) << "Initial encoder frame rate setting " << settings.maxFramerate 379 << " is larger than the " 380 << "maximal allowed frame rate " << _maxAllowedFrameRate << "."; 381 } 382 383 // TODO(tkchin): Try setting payload size via 384 // kVTCompressionPropertyKey_MaxH264SliceBytes. 385 386 return [self resetCompressionSessionWithPixelFormat:kNV12PixelFormat]; 387} 388 389- (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame 390 codecSpecificInfo:(nullable id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo 391 frameTypes:(NSArray<NSNumber *> *)frameTypes { 392 RTC_DCHECK_EQ(frame.width, _width); 393 RTC_DCHECK_EQ(frame.height, _height); 394 if (!_callback || !_compressionSession) { 395 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 396 } 397 BOOL isKeyframeRequired = NO; 398 399 // Get a pixel buffer from the pool and copy frame data over. 400 if ([self resetCompressionSessionIfNeededWithFrame:frame]) { 401 isKeyframeRequired = YES; 402 } 403 404 CVPixelBufferRef pixelBuffer = nullptr; 405 if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { 406 // Native frame buffer 407 RTC_OBJC_TYPE(RTCCVPixelBuffer) *rtcPixelBuffer = 408 (RTC_OBJC_TYPE(RTCCVPixelBuffer) *)frame.buffer; 409 if (![rtcPixelBuffer requiresCropping]) { 410 // This pixel buffer might have a higher resolution than what the 411 // compression session is configured to. The compression session can 412 // handle that and will output encoded frames in the configured 413 // resolution regardless of the input pixel buffer resolution. 414 pixelBuffer = rtcPixelBuffer.pixelBuffer; 415 CVBufferRetain(pixelBuffer); 416 } else { 417 // Cropping required, we need to crop and scale to a new pixel buffer. 418 pixelBuffer = CreatePixelBuffer(_pixelBufferPool); 419 if (!pixelBuffer) { 420 return WEBRTC_VIDEO_CODEC_ERROR; 421 } 422 int dstWidth = CVPixelBufferGetWidth(pixelBuffer); 423 int dstHeight = CVPixelBufferGetHeight(pixelBuffer); 424 if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { 425 int size = 426 [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight]; 427 _frameScaleBuffer.resize(size); 428 } else { 429 _frameScaleBuffer.clear(); 430 } 431 _frameScaleBuffer.shrink_to_fit(); 432 if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_frameScaleBuffer.data()]) { 433 CVBufferRelease(pixelBuffer); 434 return WEBRTC_VIDEO_CODEC_ERROR; 435 } 436 } 437 } 438 439 if (!pixelBuffer) { 440 // We did not have a native frame buffer 441 pixelBuffer = CreatePixelBuffer(_pixelBufferPool); 442 if (!pixelBuffer) { 443 return WEBRTC_VIDEO_CODEC_ERROR; 444 } 445 RTC_DCHECK(pixelBuffer); 446 if (!CopyVideoFrameToNV12PixelBuffer([frame.buffer toI420], pixelBuffer)) { 447 RTC_LOG(LS_ERROR) << "Failed to copy frame data."; 448 CVBufferRelease(pixelBuffer); 449 return WEBRTC_VIDEO_CODEC_ERROR; 450 } 451 } 452 453 // Check if we need a keyframe. 454 if (!isKeyframeRequired && frameTypes) { 455 for (NSNumber *frameType in frameTypes) { 456 if ((RTCFrameType)frameType.intValue == RTCFrameTypeVideoFrameKey) { 457 isKeyframeRequired = YES; 458 break; 459 } 460 } 461 } 462 463 CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); 464 CFDictionaryRef frameProperties = nullptr; 465 if (isKeyframeRequired) { 466 CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; 467 CFTypeRef values[] = {kCFBooleanTrue}; 468 frameProperties = CreateCFTypeDictionary(keys, values, 1); 469 } 470 471 std::unique_ptr<RTCFrameEncodeParams> encodeParams; 472 encodeParams.reset(new RTCFrameEncodeParams(self, 473 codecSpecificInfo, 474 _width, 475 _height, 476 frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 477 frame.timeStamp, 478 frame.rotation)); 479 encodeParams->codecSpecificInfo.packetizationMode = _packetizationMode; 480 481 // Update the bitrate if needed. 482 [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:_encoderFrameRate]; 483 484 OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession, 485 pixelBuffer, 486 presentationTimeStamp, 487 kCMTimeInvalid, 488 frameProperties, 489 encodeParams.release(), 490 nullptr); 491 if (frameProperties) { 492 CFRelease(frameProperties); 493 } 494 if (pixelBuffer) { 495 CVBufferRelease(pixelBuffer); 496 } 497 498 if (status == kVTInvalidSessionErr) { 499 // This error occurs when entering foreground after backgrounding the app. 500 RTC_LOG(LS_ERROR) << "Invalid compression session, resetting."; 501 [self resetCompressionSessionWithPixelFormat:[self pixelFormatOfFrame:frame]]; 502 503 return WEBRTC_VIDEO_CODEC_NO_OUTPUT; 504 } else if (status == kVTVideoEncoderMalfunctionErr) { 505 // Sometimes the encoder malfunctions and needs to be restarted. 506 RTC_LOG(LS_ERROR) 507 << "Encountered video encoder malfunction error. Resetting compression session."; 508 [self resetCompressionSessionWithPixelFormat:[self pixelFormatOfFrame:frame]]; 509 510 return WEBRTC_VIDEO_CODEC_NO_OUTPUT; 511 } else if (status != noErr) { 512 RTC_LOG(LS_ERROR) << "Failed to encode frame with code: " << status; 513 return WEBRTC_VIDEO_CODEC_ERROR; 514 } 515 return WEBRTC_VIDEO_CODEC_OK; 516} 517 518- (void)setCallback:(RTCVideoEncoderCallback)callback { 519 _callback = callback; 520} 521 522- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { 523 _targetBitrateBps = 1000 * bitrateKbit; 524 _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); 525 if (framerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { 526 RTC_LOG(LS_WARNING) << "Encoder frame rate setting " << framerate << " is larger than the " 527 << "maximal allowed frame rate " << _maxAllowedFrameRate << "."; 528 } 529 framerate = MIN(framerate, _maxAllowedFrameRate); 530 [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:framerate]; 531 return WEBRTC_VIDEO_CODEC_OK; 532} 533 534#pragma mark - Private 535 536- (NSInteger)releaseEncoder { 537 // Need to destroy so that the session is invalidated and won't use the 538 // callback anymore. Do not remove callback until the session is invalidated 539 // since async encoder callbacks can occur until invalidation. 540 [self destroyCompressionSession]; 541 _callback = nullptr; 542 return WEBRTC_VIDEO_CODEC_OK; 543} 544 545- (OSType)pixelFormatOfFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame { 546 // Use NV12 for non-native frames. 547 if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { 548 RTC_OBJC_TYPE(RTCCVPixelBuffer) *rtcPixelBuffer = 549 (RTC_OBJC_TYPE(RTCCVPixelBuffer) *)frame.buffer; 550 return CVPixelBufferGetPixelFormatType(rtcPixelBuffer.pixelBuffer); 551 } 552 553 return kNV12PixelFormat; 554} 555 556- (BOOL)resetCompressionSessionIfNeededWithFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame { 557 BOOL resetCompressionSession = NO; 558 559 // If we're capturing native frames in another pixel format than the compression session is 560 // configured with, make sure the compression session is reset using the correct pixel format. 561 OSType framePixelFormat = [self pixelFormatOfFrame:frame]; 562 563 if (_compressionSession) { 564 // The pool attribute `kCVPixelBufferPixelFormatTypeKey` can contain either an array of pixel 565 // formats or a single pixel format. 566 NSDictionary *poolAttributes = 567 (__bridge NSDictionary *)CVPixelBufferPoolGetPixelBufferAttributes(_pixelBufferPool); 568 id pixelFormats = 569 [poolAttributes objectForKey:(__bridge NSString *)kCVPixelBufferPixelFormatTypeKey]; 570 NSArray<NSNumber *> *compressionSessionPixelFormats = nil; 571 if ([pixelFormats isKindOfClass:[NSArray class]]) { 572 compressionSessionPixelFormats = (NSArray *)pixelFormats; 573 } else if ([pixelFormats isKindOfClass:[NSNumber class]]) { 574 compressionSessionPixelFormats = @[ (NSNumber *)pixelFormats ]; 575 } 576 577 if (![compressionSessionPixelFormats 578 containsObject:[NSNumber numberWithLong:framePixelFormat]]) { 579 resetCompressionSession = YES; 580 RTC_LOG(LS_INFO) << "Resetting compression session due to non-matching pixel format."; 581 } 582 } else { 583 resetCompressionSession = YES; 584 } 585 586 if (resetCompressionSession) { 587 [self resetCompressionSessionWithPixelFormat:framePixelFormat]; 588 } 589 return resetCompressionSession; 590} 591 592- (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { 593 [self destroyCompressionSession]; 594 595 // Set source image buffer attributes. These attributes will be present on 596 // buffers retrieved from the encoder's pixel buffer pool. 597 const size_t attributesSize = 3; 598 CFTypeRef keys[attributesSize] = { 599#if defined(WEBRTC_IOS) 600 kCVPixelBufferOpenGLESCompatibilityKey, 601#elif defined(WEBRTC_MAC) 602 kCVPixelBufferOpenGLCompatibilityKey, 603#endif 604 kCVPixelBufferIOSurfacePropertiesKey, 605 kCVPixelBufferPixelFormatTypeKey 606 }; 607 CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); 608 int64_t pixelFormatType = framePixelFormat; 609 CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &pixelFormatType); 610 CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, pixelFormat}; 611 CFDictionaryRef sourceAttributes = CreateCFTypeDictionary(keys, values, attributesSize); 612 if (ioSurfaceValue) { 613 CFRelease(ioSurfaceValue); 614 ioSurfaceValue = nullptr; 615 } 616 if (pixelFormat) { 617 CFRelease(pixelFormat); 618 pixelFormat = nullptr; 619 } 620 CFMutableDictionaryRef encoder_specs = nullptr; 621#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) 622 // Currently hw accl is supported above 360p on mac, below 360p 623 // the compression session will be created with hw accl disabled. 624 encoder_specs = CFDictionaryCreateMutable( 625 nullptr, 1, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 626 CFDictionarySetValue(encoder_specs, 627 kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, 628 kCFBooleanTrue); 629#endif 630 OSStatus status = 631 VTCompressionSessionCreate(nullptr, // use default allocator 632 _width, 633 _height, 634 kCMVideoCodecType_H264, 635 encoder_specs, // use hardware accelerated encoder if available 636 sourceAttributes, 637 nullptr, // use default compressed data allocator 638 compressionOutputCallback, 639 nullptr, 640 &_compressionSession); 641 if (sourceAttributes) { 642 CFRelease(sourceAttributes); 643 sourceAttributes = nullptr; 644 } 645 if (encoder_specs) { 646 CFRelease(encoder_specs); 647 encoder_specs = nullptr; 648 } 649 if (status != noErr) { 650 RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; 651 return WEBRTC_VIDEO_CODEC_ERROR; 652 } 653#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) 654 CFBooleanRef hwaccl_enabled = nullptr; 655 status = VTSessionCopyProperty(_compressionSession, 656 kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, 657 nullptr, 658 &hwaccl_enabled); 659 if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { 660 RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; 661 } else { 662 RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; 663 } 664#endif 665 [self configureCompressionSession]; 666 667 // The pixel buffer pool is dependent on the compression session so if the session is reset, the 668 // pool should be reset as well. 669 _pixelBufferPool = VTCompressionSessionGetPixelBufferPool(_compressionSession); 670 671 return WEBRTC_VIDEO_CODEC_OK; 672} 673 674- (void)configureCompressionSession { 675 RTC_DCHECK(_compressionSession); 676 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true); 677 SetVTSessionProperty(_compressionSession, 678 kVTCompressionPropertyKey_ProfileLevel, 679 ExtractProfile(*_profile_level_id)); 680 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false); 681 [self setEncoderBitrateBps:_targetBitrateBps frameRate:_encoderFrameRate]; 682 // TODO(tkchin): Look at entropy mode and colorspace matrices. 683 // TODO(tkchin): Investigate to see if there's any way to make this work. 684 // May need it to interop with Android. Currently this call just fails. 685 // On inspecting encoder output on iOS8, this value is set to 6. 686 // internal::SetVTSessionProperty(compression_session_, 687 // kVTCompressionPropertyKey_MaxFrameDelayCount, 688 // 1); 689 690 // Set a relatively large value for keyframe emission (7200 frames or 4 minutes). 691 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval, 7200); 692 SetVTSessionProperty( 693 _compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240); 694} 695 696- (void)destroyCompressionSession { 697 if (_compressionSession) { 698 VTCompressionSessionInvalidate(_compressionSession); 699 CFRelease(_compressionSession); 700 _compressionSession = nullptr; 701 _pixelBufferPool = nullptr; 702 } 703} 704 705- (NSString *)implementationName { 706 return @"VideoToolbox"; 707} 708 709- (void)setBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { 710 if (_encoderBitrateBps != bitrateBps || _encoderFrameRate != frameRate) { 711 [self setEncoderBitrateBps:bitrateBps frameRate:frameRate]; 712 } 713} 714 715- (void)setEncoderBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { 716 if (_compressionSession) { 717 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps); 718 719 // With zero |_maxAllowedFrameRate|, we fall back to automatic frame rate detection. 720 if (_maxAllowedFrameRate > 0) { 721 SetVTSessionProperty( 722 _compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, frameRate); 723 } 724 725 // TODO(tkchin): Add a helper method to set array value. 726 int64_t dataLimitBytesPerSecondValue = 727 static_cast<int64_t>(bitrateBps * kLimitToAverageBitRateFactor / 8); 728 CFNumberRef bytesPerSecond = 729 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataLimitBytesPerSecondValue); 730 int64_t oneSecondValue = 1; 731 CFNumberRef oneSecond = 732 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue); 733 const void *nums[2] = {bytesPerSecond, oneSecond}; 734 CFArrayRef dataRateLimits = CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks); 735 OSStatus status = VTSessionSetProperty( 736 _compressionSession, kVTCompressionPropertyKey_DataRateLimits, dataRateLimits); 737 if (bytesPerSecond) { 738 CFRelease(bytesPerSecond); 739 } 740 if (oneSecond) { 741 CFRelease(oneSecond); 742 } 743 if (dataRateLimits) { 744 CFRelease(dataRateLimits); 745 } 746 if (status != noErr) { 747 RTC_LOG(LS_ERROR) << "Failed to set data rate limit with code: " << status; 748 } 749 750 _encoderBitrateBps = bitrateBps; 751 _encoderFrameRate = frameRate; 752 } 753} 754 755- (void)frameWasEncoded:(OSStatus)status 756 flags:(VTEncodeInfoFlags)infoFlags 757 sampleBuffer:(CMSampleBufferRef)sampleBuffer 758 codecSpecificInfo:(id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo 759 width:(int32_t)width 760 height:(int32_t)height 761 renderTimeMs:(int64_t)renderTimeMs 762 timestamp:(uint32_t)timestamp 763 rotation:(RTCVideoRotation)rotation { 764 if (status != noErr) { 765 RTC_LOG(LS_ERROR) << "H264 encode failed with code: " << status; 766 return; 767 } 768 if (infoFlags & kVTEncodeInfo_FrameDropped) { 769 RTC_LOG(LS_INFO) << "H264 encode dropped frame."; 770 return; 771 } 772 773 BOOL isKeyframe = NO; 774 CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0); 775 if (attachments != nullptr && CFArrayGetCount(attachments)) { 776 CFDictionaryRef attachment = 777 static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)); 778 isKeyframe = !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); 779 } 780 781 if (isKeyframe) { 782 RTC_LOG(LS_INFO) << "Generated keyframe"; 783 } 784 785 __block std::unique_ptr<rtc::Buffer> buffer = std::make_unique<rtc::Buffer>(); 786 RTC_OBJC_TYPE(RTCRtpFragmentationHeader) * header; 787 { 788 std::unique_ptr<webrtc::RTPFragmentationHeader> header_cpp; 789 bool result = 790 H264CMSampleBufferToAnnexBBuffer(sampleBuffer, isKeyframe, buffer.get(), &header_cpp); 791 header = [[RTC_OBJC_TYPE(RTCRtpFragmentationHeader) alloc] 792 initWithNativeFragmentationHeader:header_cpp.get()]; 793 if (!result) { 794 return; 795 } 796 } 797 798 RTC_OBJC_TYPE(RTCEncodedImage) *frame = [[RTC_OBJC_TYPE(RTCEncodedImage) alloc] init]; 799 // This assumes ownership of `buffer` and is responsible for freeing it when done. 800 frame.buffer = [[NSData alloc] initWithBytesNoCopy:buffer->data() 801 length:buffer->size() 802 deallocator:^(void *bytes, NSUInteger size) { 803 buffer.reset(); 804 }]; 805 frame.encodedWidth = width; 806 frame.encodedHeight = height; 807 frame.completeFrame = YES; 808 frame.frameType = isKeyframe ? RTCFrameTypeVideoFrameKey : RTCFrameTypeVideoFrameDelta; 809 frame.captureTimeMs = renderTimeMs; 810 frame.timeStamp = timestamp; 811 frame.rotation = rotation; 812 frame.contentType = (_mode == RTCVideoCodecModeScreensharing) ? RTCVideoContentTypeScreenshare : 813 RTCVideoContentTypeUnspecified; 814 frame.flags = webrtc::VideoSendTiming::kInvalid; 815 816 int qp; 817 _h264BitstreamParser.ParseBitstream(buffer->data(), buffer->size()); 818 _h264BitstreamParser.GetLastSliceQp(&qp); 819 frame.qp = @(qp); 820 821 BOOL res = _callback(frame, codecSpecificInfo, header); 822 if (!res) { 823 RTC_LOG(LS_ERROR) << "Encode callback failed"; 824 return; 825 } 826 _bitrateAdjuster->Update(frame.buffer.length); 827} 828 829- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings { 830 return [[RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) alloc] 831 initWithThresholdsLow:kLowH264QpThreshold 832 high:kHighH264QpThreshold]; 833} 834 835@end 836