/* * Copyright (c) 2017-2023, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ //! //! \file codechal_encoder_base.cpp //! \brief Implements the encode interface for CodecHal. //! \details The encode interface is further sub-divided by standard, this file is for the base interface which is shared by all encode standards. //! #include "codechal_encoder_base.h" #include "mos_solo_generic.h" #include "hal_oca_interface.h" #include "codechal_encode_csc_ds.h" #include "mos_os_cp_interface_specific.h" #if defined (_HEVC_ENCODE_VME_SUPPORTED) || defined (_HEVC_ENCODE_VDENC_SUPPORTED) #include "codechal_encode_tracked_buffer_hevc.h" #endif void CodechalEncoderState::PrepareNodes( MOS_GPU_NODE& videoGpuNode, bool& setVideoNode) { if (MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface)) { MOS_GPU_NODE node = m_osInterface->pfnGetLatestVirtualNode(m_osInterface, COMPONENT_Decode); if (node != MOS_GPU_NODE_MAX) { setVideoNode = true; videoGpuNode = (node == MOS_GPU_NODE_VIDEO) ? MOS_GPU_NODE_VIDEO2 : MOS_GPU_NODE_VIDEO; } return; } if (m_vdboxOneDefaultUsed) { setVideoNode = true; videoGpuNode = MOS_GPU_NODE_VIDEO; } else if (m_needCheckCpEnabled) { if (m_osInterface->osCpInterface->IsCpEnabled() || m_vdencEnabled) { setVideoNode = true; videoGpuNode = MOS_GPU_NODE_VIDEO; } } } MOS_STATUS CodechalEncoderState::SetGpuCtxCreatOption() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS); CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt); return eStatus; } MOS_STATUS CodechalEncoderState::CreateGpuContexts() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; if (CodecHalUsesVideoEngine(m_codecFunction)) { MOS_GPU_NODE videoGpuNode = MOS_GPU_NODE_VIDEO; bool setVideoNode = false; // Create Video Context if (MEDIA_IS_SKU(m_skuTable, FtrVcs2) || (MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface) && m_numVdbox > 1)) // Eventually move this functionality to Mhw { setVideoNode = false; PrepareNodes(videoGpuNode, setVideoNode); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateVideoNodeAssociation( m_osInterface, setVideoNode, &videoGpuNode)); m_videoNodeAssociationCreated = true; } m_videoGpuNode = videoGpuNode; CODECHAL_ENCODE_CHK_STATUS_RETURN(SetGpuCtxCreatOption()); CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt); MOS_GPU_CONTEXT gpuContext = (videoGpuNode == MOS_GPU_NODE_VIDEO2) && !MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface) ? MOS_GPU_CONTEXT_VDBOX2_VIDEO3 : MOS_GPU_CONTEXT_VIDEO3; eStatus = (MOS_STATUS)m_osInterface->pfnCreateGpuContext( m_osInterface, gpuContext, videoGpuNode, m_gpuCtxCreatOpt); if (eStatus != MOS_STATUS_SUCCESS) { // Failed to create new context. Try to reuse the existing one on the same VDBox. if (videoGpuNode == MOS_GPU_NODE_VIDEO2) { // check other GPU contexts on VDBox2 gpuContext = MOS_GPU_CONTEXT_VDBOX2_VIDEO; if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS) { gpuContext = MOS_GPU_CONTEXT_VDBOX2_VIDEO2; eStatus = (MOS_STATUS)m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext); } } else // videoGpuNode == MOS_GPU_NODE_VIDEO { // check other GPU contexts on VDBox1 gpuContext = MOS_GPU_CONTEXT_VIDEO; if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS) { gpuContext = MOS_GPU_CONTEXT_VIDEO2; eStatus = (MOS_STATUS)m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext); } } if (eStatus != MOS_STATUS_SUCCESS) { // No valid GPU context on current VDBox, so destroy the video node association. if (MEDIA_IS_SKU(m_skuTable, FtrVcs2)) { m_osInterface->pfnDestroyVideoNodeAssociation(m_osInterface, videoGpuNode); m_videoNodeAssociationCreated = false; } if (videoGpuNode == MOS_GPU_NODE_VIDEO2) { // If no valid GPU context on VDBox2, check GPU contexts on VDBox1 gpuContext = MOS_GPU_CONTEXT_VIDEO3; if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS) { gpuContext = MOS_GPU_CONTEXT_VIDEO; if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS) { // If this context is also invalid, return an error as no context for the video engine // is available, so PAK cannot occur gpuContext = MOS_GPU_CONTEXT_VIDEO2; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext)); } } // When using existing VDBOX1, UMD needs to notify KMD to increase the VDBOX1 counter setVideoNode = true; videoGpuNode = MOS_GPU_NODE_VIDEO; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateVideoNodeAssociation( m_osInterface, setVideoNode, &videoGpuNode)); m_videoNodeAssociationCreated = true; } else // videoGpuNode == MOS_GPU_NODE_VIDEO { // We won't check GPU contexts on VDBox2 if there is no valid GPU context on VDBox1 // since VDBox2 is not full featured. CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus); } } // save the updated VDBox ordinal m_videoGpuNode = videoGpuNode; } if (m_videoNodeAssociationCreated) { CODECHAL_UPDATE_VDBOX_USER_FEATURE(videoGpuNode, m_osInterface->pOsContext); } m_videoContext = gpuContext; m_osInterface->pfnSetEncodePakContext(m_osInterface, m_videoContext); } if (m_hwInterface->UsesRenderEngine(m_codecFunction, m_standard)) { MOS_GPU_CONTEXT gpuContext = MOS_GPU_CONTEXT_RENDER2; MOS_GPU_NODE renderGpuNode = MOS_GPU_NODE_3D; MOS_GPUCTX_CREATOPTIONS createOption; if (!MEDIA_IS_SKU(m_skuTable, FtrCCSNode)) { m_computeContextEnabled = false; } if (m_osInterface->osCpInterface->IsHMEnabled() && (MEDIA_IS_SKU(m_skuTable, FtrRAMode) || MEDIA_IS_SKU(m_skuTable, FtrProtectedEnableBitRequired))) { if (m_computeContextEnabled) { gpuContext = MOS_GPU_CONTEXT_COMPUTE_RA; renderGpuNode = MOS_GPU_NODE_COMPUTE; } else { gpuContext = MOS_GPU_CONTEXT_RENDER_RA; renderGpuNode = MOS_GPU_NODE_3D; } createOption.RAMode = MEDIA_IS_SKU(m_skuTable, FtrRAMode); createOption.ProtectMode = MEDIA_IS_SKU(m_skuTable, FtrProtectedEnableBitRequired); } else { if (m_computeContextEnabled) { gpuContext = MOS_GPU_CONTEXT_COMPUTE; renderGpuNode = MOS_GPU_NODE_COMPUTE; } else { gpuContext = MOS_GPU_CONTEXT_RENDER2; renderGpuNode = MOS_GPU_NODE_3D; } createOption.RAMode = 0; createOption.ProtectMode = 0; } if (m_hwInterface->m_slicePowerGate) { createOption.packed.SubSliceCount = (m_gtSystemInfo->SubSliceCount / m_gtSystemInfo->SliceCount); // If there are multiply sub slices, disable half of sub slices. if (createOption.packed.SubSliceCount > 1) createOption.packed.SubSliceCount >>= 1; createOption.packed.SliceCount = (uint8_t)m_gtSystemInfo->SliceCount; createOption.packed.MaxEUcountPerSubSlice = (uint8_t)(m_gtSystemInfo->EUCount / m_gtSystemInfo->SubSliceCount); createOption.packed.MinEUcountPerSubSlice = (uint8_t)(m_gtSystemInfo->EUCount / m_gtSystemInfo->SubSliceCount); } eStatus = (MOS_STATUS)m_osInterface->pfnCreateGpuContext(m_osInterface, gpuContext, renderGpuNode, &createOption); if (eStatus != MOS_STATUS_SUCCESS) { // If this context is also invalid, return an error as no context for the 3D engine // is available, so ENC cannot occur gpuContext = MOS_GPU_CONTEXT_RENDER; CODECHAL_ENCODE_ASSERTMESSAGE("create gpu context failure for Render Engine!"); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext)); } m_renderContext = gpuContext; m_osInterface->pfnSetEncodeEncContext(m_osInterface, m_renderContext); } // Set Vdbox index in use m_vdboxIndex = (m_videoGpuNode == MOS_GPU_NODE_VIDEO2)? MHW_VDBOX_NODE_2 : MHW_VDBOX_NODE_1; return eStatus; } MOS_STATUS CodechalEncoderState::DestroyMeResources( HmeParams* param) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(param); if (param->ps16xMeMvDataBuffer != nullptr) { m_osInterface->pfnFreeResource( m_osInterface, ¶m->ps16xMeMvDataBuffer->OsResource); } if (param->ps32xMeMvDataBuffer != nullptr) { m_osInterface->pfnFreeResource( m_osInterface, ¶m->ps32xMeMvDataBuffer->OsResource); } if (param->ps4xMeDistortionBuffer != nullptr) { m_osInterface->pfnFreeResource( m_osInterface, ¶m->ps4xMeDistortionBuffer->OsResource); } if (param->ps4xMeMvDataBuffer != nullptr) { m_osInterface->pfnFreeResource( m_osInterface, ¶m->ps4xMeMvDataBuffer->OsResource); } if (param->presMvAndDistortionSumSurface != nullptr) { m_osInterface->pfnFreeResource( m_osInterface, param->presMvAndDistortionSumSurface); } return eStatus; } MOS_STATUS CodechalEncoderState::CleanUpResource( PMOS_RESOURCE resource, PMOS_ALLOC_GFXRES_PARAMS allocParams) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(resource); CODECHAL_ENCODE_CHK_NULL_RETURN(allocParams); MOS_LOCK_PARAMS lockFlag; MOS_ZeroMemory(&lockFlag, sizeof(lockFlag)); lockFlag.WriteOnly = true; uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, resource, &lockFlag); if(data == nullptr) { return MOS_STATUS_NULL_POINTER; } if(allocParams->Format == Format_Buffer) { MOS_ZeroMemory(data, allocParams->dwBytes); } else if(allocParams->Format == Format_Buffer_2D) { MOS_ZeroMemory(data, allocParams->dwHeight * allocParams->dwWidth); } else { eStatus = MOS_STATUS_INVALID_PARAMETER; } m_osInterface->pfnUnlockResource(m_osInterface, resource); return eStatus; } MOS_STATUS CodechalEncoderState::AllocateResources4xMe( HmeParams* param) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(param); if(!m_encEnabled || !m_hmeSupported) { return eStatus; } MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D; MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBuffer2D.Type = MOS_GFXRES_2D; allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR; allocParamsForBuffer2D.Format = Format_Buffer_2D; MOS_ZeroMemory(param->ps4xMeMvDataBuffer, sizeof(MOS_SURFACE)); param->ps4xMeMvDataBuffer->TileType = MOS_TILE_LINEAR; param->ps4xMeMvDataBuffer->bArraySpacing = true; param->ps4xMeMvDataBuffer->Format = Format_Buffer_2D; param->ps4xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear. param->ps4xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); param->ps4xMeMvDataBuffer->dwPitch = param->ps4xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwWidth = param->ps4xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwHeight = param->ps4xMeMvDataBuffer->dwHeight; allocParamsForBuffer2D.pBufName = "4xME MV Data Buffer"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBuffer2D, ¶m->ps4xMeMvDataBuffer->OsResource); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 4xME MV Data Buffer."); return eStatus; } CleanUpResource(¶m->ps4xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D); if (param->b4xMeDistortionBufferSupported) { uint32_t adjustedHeight = m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT * SCALE_FACTOR_4x; uint32_t downscaledFieldHeightInMb4x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(((adjustedHeight + 1) >> 1)/4); MOS_ZeroMemory(param->ps4xMeDistortionBuffer, sizeof(MOS_SURFACE)); param->ps4xMeDistortionBuffer->TileType = MOS_TILE_LINEAR; param->ps4xMeDistortionBuffer->bArraySpacing = true; param->ps4xMeDistortionBuffer->Format = Format_Buffer_2D; param->ps4xMeDistortionBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64); param->ps4xMeDistortionBuffer->dwHeight = 2 * MOS_ALIGN_CEIL((downscaledFieldHeightInMb4x * 4 * 10), 8); param->ps4xMeDistortionBuffer->dwPitch = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64); allocParamsForBuffer2D.dwWidth = param->ps4xMeDistortionBuffer->dwWidth; allocParamsForBuffer2D.dwHeight = param->ps4xMeDistortionBuffer->dwHeight; allocParamsForBuffer2D.pBufName = "4xME Distortion Buffer"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBuffer2D, ¶m->ps4xMeDistortionBuffer->OsResource); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 4xME Distortion Buffer."); return eStatus; } CleanUpResource(¶m->ps4xMeDistortionBuffer->OsResource, &allocParamsForBuffer2D); } return eStatus; } MOS_STATUS CodechalEncoderState::AllocateResources16xMe( HmeParams* param) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(param); if (!m_encEnabled || !m_hmeSupported) { return eStatus; } MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D; MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBuffer2D.Type = MOS_GFXRES_2D; allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR; allocParamsForBuffer2D.Format = Format_Buffer_2D; if (m_16xMeSupported) { MOS_ZeroMemory(param->ps16xMeMvDataBuffer, sizeof(MOS_SURFACE)); param->ps16xMeMvDataBuffer->TileType = MOS_TILE_LINEAR; param->ps16xMeMvDataBuffer->bArraySpacing = true; param->ps16xMeMvDataBuffer->Format = Format_Buffer_2D; param->ps16xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear param->ps16xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); param->ps16xMeMvDataBuffer->dwPitch = param->ps16xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwWidth = param->ps16xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwHeight = param->ps16xMeMvDataBuffer->dwHeight; allocParamsForBuffer2D.pBufName = "16xME MV Data Buffer"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBuffer2D, ¶m->ps16xMeMvDataBuffer->OsResource); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 16xME MV Data Buffer."); return eStatus; } CleanUpResource(¶m->ps16xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D); } return eStatus; } MOS_STATUS CodechalEncoderState::AllocateResources32xMe( HmeParams* param) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(param); if (!m_encEnabled || !m_hmeSupported) { return eStatus; } MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D; MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBuffer2D.Type = MOS_GFXRES_2D; allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR; allocParamsForBuffer2D.Format = Format_Buffer_2D; if (m_32xMeSupported) { MOS_ZeroMemory(param->ps32xMeMvDataBuffer, sizeof(MOS_SURFACE)); param->ps32xMeMvDataBuffer->TileType = MOS_TILE_LINEAR; param->ps32xMeMvDataBuffer->bArraySpacing = true; param->ps32xMeMvDataBuffer->Format = Format_Buffer_2D; param->ps32xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear param->ps32xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); param->ps32xMeMvDataBuffer->dwPitch = param->ps32xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwWidth = param->ps32xMeMvDataBuffer->dwWidth; allocParamsForBuffer2D.dwHeight = param->ps32xMeMvDataBuffer->dwHeight; allocParamsForBuffer2D.pBufName = "32xME MV Data Buffer"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBuffer2D, ¶m->ps32xMeMvDataBuffer->OsResource); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("%s: Failed to allocate 32xME MV Data Buffer\n", __FUNCTION__); return eStatus; } CleanUpResource(¶m->ps32xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D); } return eStatus; } // Encode Public Interface Functions MOS_STATUS CodechalEncoderState::Allocate(CodechalSetting * codecHalSettings) { CODECHAL_ENCODE_FUNCTION_ENTER; if (m_cscDsState) { // call before m_hwInterface->Initialize() to reserve ISH space for CscDs kernel CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->Initialize()); } { CODECHAL_PUBLIC_FUNCTION_ENTER; CODECHAL_PUBLIC_CHK_NULL_RETURN(codecHalSettings); CODECHAL_PUBLIC_CHK_NULL_RETURN(m_hwInterface); //CODECHAL_PUBLIC_CHK_NULL_RETURN(m_osInterface); MOS_TraceEvent(EVENT_CODECHAL_CREATE, EVENT_TYPE_INFO, &codecHalSettings->codecFunction, sizeof(uint32_t), nullptr, 0); CODECHAL_PUBLIC_CHK_STATUS_RETURN(m_hwInterface->Initialize(codecHalSettings)); MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable; nullHWAccelerationEnable.Value = 0; #if (_DEBUG || _RELEASE_INTERNAL) if (!m_statusReportDebugInterface) { m_statusReportDebugInterface = MOS_New(CodechalDebugInterface); CODECHAL_PUBLIC_CHK_NULL_RETURN(m_statusReportDebugInterface); CODECHAL_PUBLIC_CHK_STATUS_RETURN( m_statusReportDebugInterface->Initialize(m_hwInterface, codecHalSettings->codecFunction)); } ReadUserSettingForDebug( m_userSettingPtr, nullHWAccelerationEnable.Value, __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE, MediaUserSetting::Group::Device); m_useNullHw[MOS_GPU_CONTEXT_VIDEO] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVideo); m_useNullHw[MOS_GPU_CONTEXT_VIDEO2] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVideo2); m_useNullHw[MOS_GPU_CONTEXT_VIDEO3] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVideo3); m_useNullHw[MOS_GPU_CONTEXT_VDBOX2_VIDEO] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVDBox2Video); m_useNullHw[MOS_GPU_CONTEXT_VDBOX2_VIDEO2] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVDBox2Video2); m_useNullHw[MOS_GPU_CONTEXT_VDBOX2_VIDEO3] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxVDBox2Video3); m_useNullHw[MOS_GPU_CONTEXT_RENDER] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxRender); m_useNullHw[MOS_GPU_CONTEXT_RENDER2] = (nullHWAccelerationEnable.CodecGlobal || nullHWAccelerationEnable.CtxRender2); #endif // _DEBUG || _RELEASE_INTERNAL } CODECHAL_ENCODE_CHK_STATUS_RETURN(Initialize(codecHalSettings)); // Create MMC state if (m_mmcState == nullptr) { CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState = MOS_New(CodecHalMmcState, m_hwInterface)); } // create resource allocator CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator = MOS_New(CodechalEncodeAllocator, this)); // create tracked buffer state #if defined (_HEVC_ENCODE_VME_SUPPORTED) || defined (_HEVC_ENCODE_VDENC_SUPPORTED) if (m_standard == CODECHAL_HEVC) { CODECHAL_ENCODE_CHK_NULL_RETURN(m_trackedBuf = MOS_New(CodechalEncodeTrackedBufferHevc, this)); } else #endif { CODECHAL_ENCODE_CHK_NULL_RETURN(m_trackedBuf = MOS_New(CodechalEncodeTrackedBuffer, this)); } MotionEstimationDisableCheck(); CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources()); CODECHAL_ENCODE_CHK_STATUS_RETURN(CreateGpuContexts()); if (m_hwInterface->UsesRenderEngine(codecHalSettings->codecFunction, codecHalSettings->standard)) { m_renderContextUsesNullHw = m_useNullHw[m_renderContext]; } if (CodecHalUsesVideoEngine(codecHalSettings->codecFunction)) { m_videoContextUsesNullHw = m_useNullHw[m_videoContext]; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterBBCompleteNotifyEvent( m_osInterface, m_videoContext)); } else { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterBBCompleteNotifyEvent( m_osInterface, m_renderContext)); } if (!m_perfProfiler) { m_perfProfiler = MediaPerfProfiler::Instance(); CODECHAL_ENCODE_CHK_NULL_RETURN(m_perfProfiler); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->Initialize((void*)this, m_osInterface)); } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::Execute(void *params) { CODECHAL_ENCODE_FUNCTION_ENTER; PERF_UTILITY_AUTO(__FUNCTION__, PERF_ENCODE, PERF_LEVEL_HAL); MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_START, &m_codecFunction, sizeof(m_codecFunction), nullptr, 0); CODECHAL_ENCODE_CHK_STATUS_RETURN(Codechal::Execute(params)); EncoderParams *encodeParams = (EncoderParams *)params; // MSDK event handling CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_SetGpuAppTaskEvent(m_osInterface,encodeParams->gpuAppTaskEvent)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->SetWatchdogTimerThreshold(m_frameWidth, m_frameHeight)); if (m_frameNum == 0) { m_osInterface->pfnSetLatestVirtualNode(m_osInterface, m_videoGpuNode); } if (m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC) { CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecutePreEnc(encodeParams)); } else { CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteEnc(encodeParams)); } MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_END, nullptr, 0, nullptr, 0); return MOS_STATUS_SUCCESS; } // Encoder Public Interface Functions MOS_STATUS CodechalEncoderState::Initialize( CodechalSetting * settings) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MOS_STATUS statusKey = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(settings); m_storeData = 1; m_firstFrame = true; m_firstTwoFrames = true; m_standard = settings->standard; m_mode = settings->mode; m_codecFunction = settings->codecFunction; if (CodecHalUsesVideoEngine(m_codecFunction)) { m_pakEnabled = true; } if (m_hwInterface->UsesRenderEngine(m_codecFunction, m_standard)) { m_encEnabled = true; } MOS_USER_FEATURE_VALUE_DATA userFeatureData; if (m_encEnabled) { m_brcPakStatisticsSize = CODECHAL_ENCODE_BRC_PAK_STATISTICS_SIZE; m_hwScoreboardType = 1; m_encodeVfeMaxThreads = 0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_VFE_MAX_THREADS_ID, &userFeatureData, m_osInterface->pOsContext); m_encodeVfeMaxThreads = (uint32_t)userFeatureData.u32Data; #endif // _DEBUG || _RELEASE_INTERNAL m_encodeVfeMaxThreadsScaling = 0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_VFE_MAX_THREADS_SCALING_ID, &userFeatureData, m_osInterface->pOsContext); m_encodeVfeMaxThreadsScaling = (uint32_t)userFeatureData.i32Data; #endif // _DEBUG || _RELEASE_INTERNAL { MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_ID, &userFeatureData, m_osInterface->pOsContext); m_hwWalker = (userFeatureData.i32Data) ? true : false; if (m_hwWalker) { m_walkerMode = (MHW_WALKER_MODE)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_MODE_ID, &userFeatureData, m_osInterface->pOsContext); m_walkerMode = (MHW_WALKER_MODE)userFeatureData.u32Data; #endif // _DEBUG || _RELEASE_INTERNAL if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdownOverride)) { //Default Slice State m_sliceShutdownDefaultState = (uint32_t)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_DEFAULT_STATE_ID, &userFeatureData, m_osInterface->pOsContext); m_sliceShutdownDefaultState = (uint32_t)userFeatureData.u32Data; #endif // _DEBUG || _RELEASE_INTERNAL //Requested Slice State m_sliceShutdownRequestState = (uint32_t)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_REQUEST_STATE_ID, &userFeatureData, m_osInterface->pOsContext); m_sliceShutdownRequestState = (uint32_t)userFeatureData.u32Data; #endif // _DEBUG || _RELEASE_INTERNAL //Slice Shutdown Resolution Threshold m_ssdResolutionThreshold = (uint32_t)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_RESOLUTION_THRESHOLD_ID, &userFeatureData, m_osInterface->pOsContext); m_ssdResolutionThreshold = (uint32_t)userFeatureData.i32Data; #endif // _DEBUG || _RELEASE_INTERNAL //Slice Shutdown Target Usage Threshold m_ssdTargetUsageThreshold = (uint32_t)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_TARGET_USAGE_THRESHOLD_ID, &userFeatureData, m_osInterface->pOsContext); m_ssdTargetUsageThreshold = (uint32_t)userFeatureData.i32Data; #endif // _DEBUG || _RELEASE_INTERNAL if (!m_sliceShutdownDefaultState && !m_sliceShutdownRequestState && !m_ssdTargetUsageThreshold && !m_ssdResolutionThreshold) { // slice shutdown used for power efficiency // use it in case of ult and if hw has more than 2 slices if (MEDIA_IS_SKU(m_skuTable, FtrULT)) { if ((GFX_IS_GEN_10_OR_LATER(m_platform) && m_gtSystemInfo->SliceCount >= 2) || MEDIA_IS_SKU(m_skuTable, FtrGT3)) { m_sliceShutdownDefaultState = CODECHAL_SLICE_SHUTDOWN_ONE_SLICE; m_sliceShutdownRequestState = CODECHAL_SLICE_SHUTDOWN_TWO_SLICES; m_ssdResolutionThreshold = m_hwInterface->m_ssdResolutionThreshold; m_ssdTargetUsageThreshold = m_hwInterface->m_ssdTargetUsageThreshold; } } else if (MEDIA_IS_SKU(m_skuTable, FtrGT4)) { m_sliceShutdownDefaultState = CODECHAL_SLICE_SHUTDOWN_ONE_SLICE; m_sliceShutdownRequestState = CODECHAL_SLICE_SHUTDOWN_TWO_SLICES; m_ssdResolutionThreshold = m_hwInterface->m_ssdResolutionThreshold; m_ssdTargetUsageThreshold = m_hwInterface->m_ssdTargetUsageThreshold; } } } } } #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_ID, &userFeatureData, m_osInterface->pOsContext); if (userFeatureData.i32Data) { char path_buffer[256]; MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_ZeroMemory(path_buffer, 256); userFeatureData.StringData.pStringData = path_buffer; statusKey = MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_PATH_ID, &userFeatureData, m_osInterface->pOsContext); if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swBrcMode)); } } // SW BRC DLL Reporting CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_IN_USE_ID, ((m_swBrcMode == nullptr) ? false : true), m_osInterface->pOsContext); #endif // _DEBUG || _RELEASE_INTERNAL if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdown)) { MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_ENABLE_ID, &userFeatureData, m_osInterface->pOsContext); m_sliceShutdownEnable = (userFeatureData.i32Data) ? true : false; } m_targetUsageOverride = (uint8_t)0; #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_TARGET_USAGE_OVERRIDE_ID, &userFeatureData, m_osInterface->pOsContext); m_targetUsageOverride = (uint8_t)userFeatureData.u32Data; #endif // _DEBUG || _RELEASE_INTERNAL } if (m_pakEnabled) { //RCPanic settings MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_RC_PANIC_ENABLE_ID, &userFeatureData, m_osInterface->pOsContext); m_panicEnable = (userFeatureData.i32Data) ? true : false; MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); userFeatureData.i32Data = 1; userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE; MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_SUPPRESS_RECON_PIC_ENABLE_ID, &userFeatureData, m_osInterface->pOsContext); m_suppressReconPicSupported = (userFeatureData.u32Data) ? true : false; } #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); statusKey = MOS_UserFeature_ReadValue_ID( NULL, __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_COMPUTE_CONTEXT_ID, &userFeatureData, m_osInterface->pOsContext); if (statusKey == MOS_STATUS_SUCCESS) { // Change the default value only when CCS=on/off is set directly m_computeContextEnabled = (userFeatureData.u32Data) ? true : false; } #endif #if USE_CODECHAL_DEBUG_TOOL MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_CODECHAL_ENABLE_FAKE_HEADER_SIZE_ID, &userFeatureData, m_osInterface->pOsContext); m_enableFakeHrdSize = (uint32_t)userFeatureData.u32Data; MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_CODECHAL_FAKE_IFRAME_HEADER_SIZE_ID, &userFeatureData, m_osInterface->pOsContext); m_fakeIFrameHrdSize = (uint32_t)userFeatureData.u32Data; MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_CODECHAL_FAKE_PBFRAME_HEADER_SIZE_ID, &userFeatureData, m_osInterface->pOsContext); m_fakePBFrameHrdSize = (uint32_t)userFeatureData.u32Data; #endif m_oriFrameWidth = settings->width; m_oriFrameHeight = settings->height; m_picWidthInMb = (uint16_t)CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_oriFrameWidth); m_picHeightInMb = (uint16_t)CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_oriFrameHeight); m_frameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH; m_frameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT; m_createWidth = m_frameWidth; m_createHeight = m_frameHeight; // HME Scaling WxH m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x); m_downscaledHeightInMb4x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x); m_downscaledWidth4x = m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH; m_downscaledHeight4x = m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT; // SuperHME Scaling WxH m_downscaledWidthInMb16x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x); m_downscaledHeightInMb16x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x); m_downscaledWidth16x = m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH; m_downscaledHeight16x = m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT; // UltraHME Scaling WxH m_downscaledWidthInMb32x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_32x); m_downscaledHeightInMb32x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_32x); m_downscaledWidth32x = m_downscaledWidthInMb32x * CODECHAL_MACROBLOCK_WIDTH; m_downscaledHeight32x = m_downscaledHeightInMb32x * CODECHAL_MACROBLOCK_HEIGHT; m_minScaledDimension = CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE; m_minScaledDimensionInMb = (CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE + 15) >> 4; m_currOriginalPic.PicFlags = PICTURE_INVALID; m_currOriginalPic.FrameIdx = 0; m_currOriginalPic.PicEntry = 0; m_hwInterface->GetCpInterface()->RegisterParams(settings->GetCpParams()); // flag to enable kmd for the frame tracking (so encoder driver doesn't need to send a separate command buffer // for frame tracking purpose). Currently this feature is disabled for HEVC. // For HEVC, this feature will be enabled later. MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); statusKey = MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_FRAME_TRACKING_ID, &userFeatureData, m_osInterface->pOsContext); if (statusKey == MOS_STATUS_SUCCESS) { m_frameTrackingEnabled = userFeatureData.i32Data ? true : false; } else { m_frameTrackingEnabled = m_osInterface->bEnableKmdMediaFrameTracking ? true: false; } if (m_standard == CODECHAL_AVC) { if (CodecHalUsesVideoEngine(m_codecFunction) && !(MEDIA_IS_WA(m_waTable, WaForceAllocateLML3))) { m_inlineEncodeStatusUpdate = m_osInterface->bInlineCodecStatusUpdate ? true: false; } } if (m_standard == CODECHAL_AVC) { m_bRenderOcaEnabled = true; } // Disable SHME and UHME if HME is disabled if(!m_hmeSupported) { m_16xMeSupported = false; m_32xMeSupported = false; } // Disable UHME if SHME is disabled else if(!m_16xMeSupported) { m_32xMeSupported = false; } // Set Vdbox index in use m_vdboxIndex = (m_videoGpuNode == MOS_GPU_NODE_VIDEO2)? MHW_VDBOX_NODE_2 : MHW_VDBOX_NODE_1; if (!m_feiEnable) { eStatus = AllocateMDFResources(); } if (eStatus != MOS_STATUS_SUCCESS) { Destroy(); } return eStatus; } MOS_STATUS CodechalEncoderState::AllocateMDFResources() { uint32_t devOp; if (CodecHalIsFeiEncode(m_codecFunction) && m_codecFunction != CODECHAL_FUNCTION_FEI_PAK) { devOp = CM_DEVICE_CREATE_OPTION_SCRATCH_SPACE_DISABLE; if (m_cmDev == nullptr) { CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface); m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateCmDevice(m_osInterface->pOsContext, m_cmDev, devOp, CM_DEVICE_CREATE_PRIORITY_DEFAULT)); } //just WA for issues in MDF null support if (!m_cmQueue) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateQueue(m_cmQueue)); } if (!m_cmTask) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateTask(m_cmTask)); } } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::DestroyMDFResources() { uint32_t i; if (m_cmDev && m_cmTask) { m_cmDev->DestroyTask(m_cmTask); m_cmTask = nullptr; } if (m_cmDev && m_osInterface) { m_osInterface->pfnDestroyCmDevice(m_cmDev); m_cmDev = nullptr; } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::SetMfeSharedState(MfeSharedState *pMfeSharedState) { CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(pMfeSharedState); m_mfeEncodeSharedState = pMfeSharedState; return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::AddKernelMdf( CmDevice * device, CmQueue * queue, CmKernel * kernel, CmTask * task, CmThreadSpace *threadspace, CmEvent *& event, bool isEnqueue) { CODECHAL_ENCODE_CHK_NULL_RETURN(device); CODECHAL_ENCODE_CHK_NULL_RETURN(kernel); CODECHAL_ENCODE_CHK_NULL_RETURN(queue); CODECHAL_ENCODE_CHK_NULL_RETURN(task); CODECHAL_ENCODE_CHK_NULL_RETURN(threadspace); CODECHAL_ENCODE_CHK_STATUS_RETURN(kernel->AssociateThreadSpace(threadspace)); CODECHAL_ENCODE_CHK_STATUS_RETURN(task->AddKernel(kernel)); if (isEnqueue) { CODECHAL_ENCODE_CHK_STATUS_RETURN(queue->Enqueue(task, event)); task->Reset(); } else { CODECHAL_ENCODE_CHK_STATUS_RETURN(task->AddSync()); } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::CreateMDFKernelResource( CodechalEncodeMdfKernelResource *resource, uint8_t kernelNum, uint8_t bufNum, uint8_t surfNum, uint8_t vmeSurfNum, uint16_t curbeSize) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_CHK_NULL_RETURN(resource); if (kernelNum > 0) { resource->ppKernel = (CmKernel **)MOS_AllocAndZeroMemory(sizeof(CmKernel *) * kernelNum); resource->KernelNum = kernelNum; } if (bufNum > 0) { resource->ppCmBuf = (CmBuffer **)MOS_AllocAndZeroMemory(sizeof(CmBuffer *) * bufNum); resource->BufNum = bufNum; } if (surfNum > 0) { resource->ppCmSurf = (CmSurface2D **)MOS_AllocAndZeroMemory(sizeof(CmSurface2D *) * surfNum); resource->SurfNum = surfNum; } if (vmeSurfNum > 0) { resource->ppCmVmeSurf = (SurfaceIndex **)MOS_AllocAndZeroMemory(sizeof(SurfaceIndex *) * vmeSurfNum); resource->VmeSurfNum = vmeSurfNum; } if (curbeSize > 0) { resource->pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeSize); resource->wCurbeSize = curbeSize; } resource->e = nullptr; return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::DestroyMDFKernelResource( CodechalEncodeMdfKernelResource *resource) { int i; CODECHAL_ENCODE_CHK_NULL_RETURN(resource); CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMDFKernelSurfaces(resource)); if (resource->ppKernel && resource->KernelNum) { for (i = 0; i < resource->KernelNum; i++) { if (resource->ppKernel != nullptr) { m_cmDev->DestroyKernel(resource->ppKernel[i]); resource->ppKernel[i] = nullptr; } } MOS_FreeMemory(resource->ppKernel); resource->ppKernel = nullptr; } if (resource->pTS) { m_cmDev->DestroyThreadSpace(resource->pTS); resource->pTS = nullptr; } if (resource->ppCmBuf && resource->BufNum) { MOS_FreeMemory(resource->ppCmBuf); resource->ppCmBuf = nullptr; resource->BufNum = 0; } if (resource->ppCmSurf && resource->SurfNum) { MOS_FreeMemory(resource->ppCmSurf); resource->ppCmSurf = nullptr; resource->SurfNum = 0; } if (resource->ppCmVmeSurf && resource->VmeSurfNum) { MOS_FreeMemory(resource->ppCmVmeSurf); resource->ppCmVmeSurf = nullptr; resource->VmeSurfNum = 0; } if (resource->ppKernel && resource->KernelNum) { MOS_FreeMemory(resource->ppKernel); resource->ppKernel = nullptr; resource->KernelNum = 0; } if (resource->pCurbe && resource->wCurbeSize) { MOS_FreeMemory(resource->pCurbe); resource->pCurbe = nullptr; resource->wCurbeSize = 0; } if (resource->pCommonISA) { MOS_FreeMemory(resource->pCommonISA); resource->pCommonISA = nullptr; } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::FreeMDFKernelSurfaces( CodechalEncodeMdfKernelResource* resource) { int i; for (i = 0; i < resource->VmeSurfNum; i++) { if (resource->ppCmVmeSurf[i] != nullptr && resource->ppCmVmeSurf[i] != (SurfaceIndex *)CM_NULL_SURFACE) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(resource->ppCmVmeSurf[i])); resource->ppCmVmeSurf[i] = nullptr; } } for (i = 0; i < resource->BufNum; i++) { if (resource->ppCmBuf[i] != nullptr) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(resource->ppCmBuf[i])); resource->ppCmBuf[i] = nullptr; } } for (i = 0; i < resource->SurfNum; i++) { if (resource->ppCmSurf[i] != nullptr) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(resource->ppCmSurf[i])); resource->ppCmSurf[i] = nullptr; } } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::InitCommon() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; EncoderParams* encodeParams = &m_encodeParams; m_newSeq = encodeParams->bNewSeq ? true: false; // used by all except JPEG m_mbDataBufferSize = encodeParams->dwMbDataBufferSize; // used by all except JPEG m_newVuiData = encodeParams->bNewVuiData ? true: false; // used by AVC and MPEG2 m_picQuant = encodeParams->bPicQuant ? true: false; // used by AVC and MPEG2 m_newQmatrixData = encodeParams->bNewQmatrixData ? true: false; // used by AVC and MPEG2 m_numSlices = encodeParams->dwNumSlices; // used by all except VP9 m_slcData = (PCODEC_ENCODER_SLCDATA)(encodeParams->pSlcHeaderData); // used by AVC, MPEG2, and HEVC CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->presBitstreamBuffer); m_rawSurface = *(encodeParams->psRawSurface); // used by all m_resBitstreamBuffer = *(encodeParams->presBitstreamBuffer); // used by all if(encodeParams->presMetadataBuffer) { m_presMetadataBuffer = encodeParams->presMetadataBuffer; m_metaDataOffset = encodeParams->metaDataOffset; } CODECHAL_ENCODE_CHK_COND_RETURN( Mos_ResourceIsNull(&m_rawSurface.OsResource), "Raw surface is nullptr!"); m_rawSurfaceToEnc = m_rawSurfaceToPak = &m_rawSurface; if(encodeParams->psReconSurface) { m_reconSurface = *(encodeParams->psReconSurface); // used by all except JPEG } if(encodeParams->pBSBuffer) { m_bsBuffer = *(encodeParams->pBSBuffer); // used by all except VP9 } return eStatus; } void CodechalEncoderState::ResizeOnResChange() { CODECHAL_ENCODE_FUNCTION_ENTER; // if resolution changed, free existing tracked buffer resources m_trackedBuf->Resize(); } MOS_STATUS CodechalEncoderState::CheckResChangeAndCsc() { CODECHAL_ENCODE_FUNCTION_ENTER; if (m_resolutionChanged) { ResizeOnResChange(); } if (m_cscDsState) { // check recon surface's alignment meet HW requirement CODECHAL_ENCODE_CHK_STATUS_RETURN( m_cscDsState->CheckReconSurfaceAlignment(&m_reconSurface)); if (!m_cscDsState->IsEnabled() || CodecHal_PictureIsField(m_currOriginalPic) || CodecHal_PictureIsInterlacedFrame(m_currOriginalPic)) { // CSC disabled for interlaced frame m_cscDsState->ResetCscFlag(); // check raw surface's alignment meet HW requirement CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->CheckRawSurfaceAlignment(m_rawSurfaceToEnc)); } else { // check if we need to do CSC or copy non-aligned surface CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->CheckCondition()); } } return MOS_STATUS_SUCCESS; } // Function to allocate all resources common to all encoders MOS_STATUS CodechalEncoderState::AllocateResources() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; uint32_t numMbs = m_picWidthInMb * ((m_picHeightInMb+1)>>1)<<1; // initiate allocation paramters and lock flags MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; allocParamsForBufferLinear.Format = Format_Buffer; MOS_ALLOC_GFXRES_PARAMS allocParams2D; MOS_ZeroMemory(&allocParams2D, sizeof(allocParams2D)); allocParams2D.Type = MOS_GFXRES_2D; allocParams2D.TileType = MOS_TILE_LINEAR; allocParams2D.Format = Format_Buffer_2D; MOS_LOCK_PARAMS lockFlagsNoOverWrite;; MOS_ZeroMemory(&lockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS)); lockFlagsNoOverWrite.WriteOnly = 1; lockFlagsNoOverWrite.NoOverWrite = 1; MOS_LOCK_PARAMS lockFlags; MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); lockFlags.WriteOnly = 1; // create VME and MFX sync objects if ((m_codecFunction == CODECHAL_FUNCTION_ENC_PAK) || (m_codecFunction == (CODECHAL_FUNCTION_ENC | CODECHAL_FUNCTION_ENC_PAK)) || (m_codecFunction == CODECHAL_FUNCTION_FEI_ENC_PAK) || (m_codecFunction == (CODECHAL_FUNCTION_FEI_ENC | CODECHAL_FUNCTION_FEI_ENC_PAK))) { // Create OS synchronization object to sync between MFX => VME // if 3 is not good enough, need to increase MBCode buffer number m_semaphoreMaxCount = MOS_MAX_SEMAPHORE_COUNT; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse)); // Create OS synchronization object to sync between VME => MFX CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectVideoContextInUse)); } // Create VME and VDENC/PAK sync objects if (m_codecFunction == CODECHAL_FUNCTION_ENC_VDENC_PAK) { m_semaphoreMaxCount = MOS_MAX_SEMAPHORE_COUNT; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse)); } //For HEVC, moved to standard specific as LCU size is not available here if (m_hwInterface->GetMfxInterface()->IsRowStoreCachingSupported() && ((m_mode == CODECHAL_ENCODE_MODE_AVC) || (m_mode == CODECHAL_ENCODE_MODE_VP9 && m_vdencEnabled))) { MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams = {}; rowstoreParams.Mode = m_mode; rowstoreParams.dwPicWidth = m_frameWidth; rowstoreParams.bMbaff = false; m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams); } // eStatus query reporting // HW requires the MI_CONDITIONAL_BATCH_BUFFER_END compare address aligned with cache line since TGL, // this change will guarantee the multi pak pass BRC works correctly m_encodeStatusBuf.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), MHW_CACHELINE_SIZE); uint32_t size = m_encodeStatusBuf.dwReportSize * CODECHAL_ENCODE_STATUS_NUM + sizeof(uint32_t) * 2; allocParamsForBufferLinear.dwBytes = size; allocParamsForBufferLinear.pBufName = "StatusQueryBuffer"; allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_encodeStatusBuf.resStatusBuffer), "Failed to allocate Encode eStatus Buffer."); CODECHAL_ENCODE_CHK_STATUS_RETURN( m_osInterface->pfnSkipResourceSync( &m_encodeStatusBuf.resStatusBuffer)); uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &(m_encodeStatusBuf.resStatusBuffer), &lockFlagsNoOverWrite); MOS_ZeroMemory(data, size); m_encodeStatusBuf.pData = (uint32_t*)data; m_encodeStatusBuf.pEncodeStatus = (uint8_t*)(data + sizeof(uint32_t) * 2); m_encodeStatusBuf.dwSize = size; // Addresses writen to by HW commands (MI_STORE_DATA_IMM, MI_FLUSH_DW, PIPE_CONTROL) must be QW aligned since these // commands are capable of writing QWs so the least significant 3 bits of the address field are not used for the // actual address m_encodeStatusBuf.dwStoreDataOffset = 0; m_encodeStatusBuf.dwBSByteCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwMFCBitstreamByteCountPerFrame); m_encodeStatusBuf.dwBSSEBitCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwMFCBitstreamSyntaxElementOnlyBitCount); m_encodeStatusBuf.dwImageStatusMaskOffset = CODECHAL_OFFSETOF(EncodeStatus, dwImageStatusMask); m_encodeStatusBuf.dwImageStatusCtrlOffset = CODECHAL_OFFSETOF(EncodeStatus, ImageStatusCtrl); m_encodeStatusBuf.dwNumSlicesOffset = CODECHAL_OFFSETOF(EncodeStatus, NumSlices); m_encodeStatusBuf.dwErrorFlagOffset = CODECHAL_OFFSETOF(EncodeStatus, dwErrorFlags); m_encodeStatusBuf.dwBRCQPReportOffset = CODECHAL_OFFSETOF(EncodeStatus, BrcQPReport); m_encodeStatusBuf.dwNumPassesOffset = CODECHAL_OFFSETOF(EncodeStatus, dwNumberPasses); m_encodeStatusBuf.dwQpStatusCountOffset = CODECHAL_OFFSETOF(EncodeStatus, QpStatusCount); m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset = CODECHAL_OFFSETOF(EncodeStatus, ImageStatusCtrlOfLastBRCPass); m_encodeStatusBuf.dwSceneChangedOffset = CODECHAL_OFFSETOF(EncodeStatus, dwSceneChangedFlag); m_encodeStatusBuf.dwSumSquareErrorOffset = CODECHAL_OFFSETOF(EncodeStatus, sumSquareError[0]); m_encodeStatusBuf.dwSliceReportOffset = CODECHAL_OFFSETOF(EncodeStatus, sliceReport); m_encodeStatusBuf.dwHuCStatusMaskOffset = CODECHAL_OFFSETOF(EncodeStatus, HuCStatusRegMask); m_encodeStatusBuf.dwHuCStatusRegOffset = CODECHAL_OFFSETOF(EncodeStatus, HuCStatusReg); m_encodeStatusBuf.dwHuCStatus2RegOffset = CODECHAL_OFFSETOF(EncodeStatus, HuCStatus2Reg); m_encodeStatusBuf.dwLookaheadStatusOffset = CODECHAL_OFFSETOF(EncodeStatus, lookaheadStatus); m_encodeStatusBuf.dwSADLumaOffset = CODECHAL_OFFSETOF(EncodeStatus, dwSADLuma); m_encodeStatusBuf.dwIntraBlockCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwIntraBlockCount); m_encodeStatusBuf.dwInterBlockCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwInterBlockCount); m_encodeStatusBuf.dwSkipBlockCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwSkipBlockCount); m_encodeStatusBuf.wCurrIndex = 0; m_encodeStatusBuf.wFirstIndex = 0; if (m_encEnabled) { m_encodeStatusBufRcs.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), sizeof(uint64_t)); size = m_encodeStatusBufRcs.dwReportSize * CODECHAL_ENCODE_STATUS_NUM + sizeof(uint32_t) * 2; allocParamsForBufferLinear.dwBytes = size; allocParamsForBufferLinear.pBufName = "StatusQueryBufferRcs"; allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_encodeStatusBufRcs.resStatusBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Encode eStatus Buffer."); return eStatus; } data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &(m_encodeStatusBufRcs.resStatusBuffer), &lockFlagsNoOverWrite); if (data == nullptr) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to lock Encode eStatus Buffer RCS."); return eStatus; } MOS_ZeroMemory(data, size); m_encodeStatusBufRcs.pData = (uint32_t*)data; m_encodeStatusBufRcs.pEncodeStatus = (uint8_t*)(data + sizeof(uint32_t) * 2); m_encodeStatusBufRcs.dwSize = size; m_encodeStatusBufRcs.dwStoreDataOffset = 0; m_encodeStatusBufRcs.wCurrIndex = 0; m_encodeStatusBufRcs.wFirstIndex = 0; } if (m_pakEnabled) { m_stateHeapInterface->pfnSetCmdBufStatusPtr(m_stateHeapInterface, m_encodeStatusBuf.pData); } else { m_stateHeapInterface->pfnSetCmdBufStatusPtr(m_stateHeapInterface, m_encodeStatusBufRcs.pData); } MOS_LOCK_PARAMS lockFlagsWriteOnly; MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); lockFlagsWriteOnly.WriteOnly = 1; if(m_inlineEncodeStatusUpdate) { m_atomicScratchBuf.dwSize = MOS_ALIGN_CEIL(sizeof(AtomicScratchBuffer), sizeof(uint64_t)); allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; allocParamsForBufferLinear.Format = Format_Buffer; size = MHW_CACHELINE_SIZE * 4 * 2; // each set of scratch is 4 cacheline size, and allocate 2 set. allocParamsForBufferLinear.dwBytes = size; allocParamsForBufferLinear.pBufName = "atomic sratch buffer"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &(m_atomicScratchBuf.resAtomicScratchBuffer)); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Finger Print Source Buffer."); return eStatus; } data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &(m_atomicScratchBuf.resAtomicScratchBuffer), &lockFlagsWriteOnly); if (data == nullptr) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to lock Finger Print Source Buffer."); return eStatus; } MOS_ZeroMemory(data, size); m_atomicScratchBuf.pData = (uint32_t*)data; m_atomicScratchBuf.dwSize = size; m_atomicScratchBuf.dwZeroValueOffset = 0; m_atomicScratchBuf.dwOperand1Offset = MHW_CACHELINE_SIZE; m_atomicScratchBuf.dwOperand2Offset = MHW_CACHELINE_SIZE * 2; m_atomicScratchBuf.dwOperand3Offset = MHW_CACHELINE_SIZE * 3; m_atomicScratchBuf.wEncodeUpdateIndex = 0; m_atomicScratchBuf.wTearDownIndex = 1; m_atomicScratchBuf.dwOperandSetSize = MHW_CACHELINE_SIZE * 4; } if (m_pakEnabled) { if(m_hwInterface->GetMfxInterface()->IsDeblockingFilterRowstoreCacheEnabled() == false) { // Deblocking Filter Row Store Scratch buffer allocParamsForBufferLinear.dwBytes = m_picWidthInMb * 4 * CODECHAL_CACHELINE_SIZE; // 4 cachelines per MB allocParamsForBufferLinear.pBufName = "Deblocking Filter Row Store Scratch Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resDeblockingFilterRowStoreScratchBuffer), "Failed to allocate Deblocking Filter Row Store Scratch Buffer."); } if(m_hwInterface->GetMfxInterface()->IsBsdMpcRowstoreCacheEnabled() == false) { // MPC Row Store Scratch buffer allocParamsForBufferLinear.dwBytes = m_picWidthInMb * 2 * 64; // 2 cachelines per MB allocParamsForBufferLinear.pBufName = "MPC Row Store Scratch Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resMPCRowStoreScratchBuffer), "Failed to allocate MPC Row Store Scratch Buffer."); } if (!m_vdencEnabled && m_standard != CODECHAL_HEVC) // StreamOut is needed for HEVC VDEnc { // streamout data buffer allocParamsForBufferLinear.dwBytes = numMbs * MFX_PAK_STREAMOUT_DATA_BYTE_SIZE * sizeof(uint32_t); allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer"; for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resStreamOutBuffer[i]), "Failed to allocate Pak Stream Out Buffer."); } } } if (m_encEnabled || m_vdencEnabled) { // Scaled surfaces are required to run both HME and IFrameDist CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateScalingResources()); } if(m_encEnabled && (!m_vdencEnabled)) { for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++) { allocParamsForBufferLinear.dwBytes = CODECHAL_MAD_BUFFER_SIZE; allocParamsForBufferLinear.pBufName = "MAD Data Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resMadDataBuffer[i]), "Failed to allocate MAD Data Buffer."); } } if (m_vdencEnabled) { // VDENC BRC PAK MMIO buffer allocParamsForBufferLinear.dwBytes = sizeof(VdencBrcPakMmio); allocParamsForBufferLinear.pBufName = "VDENC BRC PAK MMIO Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resPakMmioBuffer), "%s: Failed to allocate VDENC BRC PAK MMIO Buffer\n", __FUNCTION__); // VDENC Huc Error Status Buffer allocParamsForBufferLinear.dwBytes = sizeof(VdencHucErrorStatus); allocParamsForBufferLinear.pBufName = "VDENC Huc Error Status Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resHucErrorStatusBuffer), "%s: Failed to allocate VDENC Huc Error Status Buffer\n", __FUNCTION__); // VDEnc StreamIn data buffers, shared between driver/ME kernel/VDEnc if ((m_mode == CODECHAL_ENCODE_MODE_HEVC) || (m_mode == CODECHAL_ENCODE_MODE_VP9)) { allocParamsForBufferLinear.dwBytes = (MOS_ALIGN_CEIL(m_frameWidth, 64)/32) * (MOS_ALIGN_CEIL(m_frameHeight, 64)/32) * CODECHAL_CACHELINE_SIZE; } else { allocParamsForBufferLinear.dwBytes = m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE; } allocParamsForBufferLinear.pBufName = "VDEnc StreamIn Data Buffer"; for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resVdencStreamInBuffer[i]), "Failed to allocate VDEnc StreamIn Data Buffer."); data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &m_resVdencStreamInBuffer[i], &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); MOS_ZeroMemory( data, allocParamsForBufferLinear.dwBytes); m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencStreamInBuffer[i]); } } if (m_vdencEnabled) { // HUC STATUS 2 Buffer for HuC status check in COND_BB_END allocParamsForBufferLinear.dwBytes = sizeof(uint64_t); allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resHucStatus2Buffer), "%s: Failed to allocate HUC STATUS 2 Buffer\n", __FUNCTION__); } allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); allocParamsForBufferLinear.pBufName = "PredicationBuffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_predicationBuffer), "%s: Failed to allocate predication buffer\n", __FUNCTION__); return eStatus; } MOS_STATUS CodechalEncoderState::AllocateScalingResources() { uint32_t numMBs, size; MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D; MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; //Allocate the Batch Buffer for scaling Kernel. numMBs = m_picWidthInMb * ((m_picHeightInMb + 1) >> 1) << 1; size = m_hwInterface->GetMediaObjectBufferSize( numMBs, 64); for (int i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++) { MOS_ZeroMemory(&m_scalingBBUF[i].BatchBuffer, sizeof(m_scalingBBUF[0].BatchBuffer)); /* For CM based Downscale kernel, unlike the old asm based downscale kernel, HW walker can be used as no inline data is required by the kernel. */ if (!m_useCmScalingKernel && !m_useMwWlkrForAsmScalingKernel) { m_scalingBBUF[i].BatchBuffer.bSecondLevel = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( m_osInterface, &m_scalingBBUF[i].BatchBuffer, NULL, size)); m_scalingBBUF[i].dwSize = size; m_scalingBBUF[i].dwNumMbsInBBuf = 0; m_scalingBBufIdx = CODECHAL_ENCODE_VME_BBUF_NUM - 1; } } //MB stats buffer is supported by AVC kernels on g9+. if(m_mbStatsSupported) { MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; allocParamsForBufferLinear.Format = Format_Buffer; // Starting from g9 HVS kernel, MBEnc Curbe is decoupled from BRC kernel and a new MBEnc BRC surface is added. // new HVS-based BRC kernel requires size of MBStat surface be 1024-aligned m_hwInterface->m_avcMbStatBufferSize = MOS_ALIGN_CEIL(m_picWidthInMb * 16 * sizeof(uint32_t)* (4 * m_downscaledHeightInMb4x), 1024); allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize; allocParamsForBufferLinear.pBufName = "MB Statistics Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resMbStatsBuffer), "Failed to allocate MB Statistics Buffer."); m_mbStatsBottomFieldOffset = m_picWidthInMb * 16 * sizeof(uint32_t) * (2 * m_downscaledHeightInMb4x); MOS_LOCK_PARAMS lockFlagsWriteOnly; MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); lockFlagsWriteOnly.WriteOnly = 1; uint8_t* pData = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &m_resMbStatsBuffer, &lockFlagsWriteOnly); if (pData == nullptr) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to Lock m_resMbStatsBuffer"); eStatus = MOS_STATUS_UNKNOWN; return eStatus; } MOS_ZeroMemory(pData, m_hwInterface->m_avcMbStatBufferSize); m_osInterface->pfnUnlockResource( m_osInterface, &m_resMbStatsBuffer); } else if(m_flatnessCheckSupported) { MOS_ZeroMemory(&m_flatnessCheckSurface, sizeof(MOS_SURFACE)); m_flatnessCheckSurface.TileType = MOS_TILE_LINEAR; m_flatnessCheckSurface.bArraySpacing = true; m_flatnessCheckSurface.Format = Format_Buffer_2D; MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBuffer2D.Type = MOS_GFXRES_2D; allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR; allocParamsForBuffer2D.Format = Format_Buffer_2D; // Data size for 1MB is 1DWORDs (4Bytes) allocParamsForBuffer2D.dwWidth = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64); // Because FlatnessCheckSurface was referenced and filled during 4xDownScaling operation, // the height should be fit to MediaWalker height setting for 4xDS Kernel. allocParamsForBuffer2D.dwHeight = MOS_ALIGN_CEIL(4 * m_downscaledHeightInMb4x, 64); allocParamsForBuffer2D.pBufName = "Flatness Check Surface"; eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBuffer2D, &m_flatnessCheckSurface.OsResource); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate FlatnessCheck Surface."); return eStatus; } CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo( m_osInterface, &m_flatnessCheckSurface)); m_flatnessCheckBottomFieldOffset = m_flatnessCheckSurface.dwPitch * m_flatnessCheckSurface.dwHeight >> 1; } return eStatus; } MOS_STATUS CodechalEncoderState::ExecuteMeKernel( MeCurbeParams *meParams, MeSurfaceParams *meSurfaceParams, HmeLevel hmeLevel) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(meParams); CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams); PerfTagSetting perfTag; perfTag.Value = 0; perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK; perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL; perfTag.PictureCodingType = m_pictureCodingType; m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value); // Each ME kernel buffer counts as a separate perf task m_osInterface->pfnResetPerfBufferID(m_osInterface); CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME : (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME; bool vdencMeInUse = false; if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME)) { vdencMeInUse = true; // Non legacy stream in is for hevc vp9 streamin kernel encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN; } uint32_t krnStateIdx = vdencMeInUse ? CODECHAL_ENCODE_ME_IDX_VDENC : ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B); PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx]; // If Single Task Phase is not enabled, use BT count for the kernel state. if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) { uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( m_stateHeapInterface, maxBtCount)); m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( m_stateHeapInterface, kernelState, false, 0, false, m_storeData)); MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; MOS_ZeroMemory(&idParams, sizeof(idParams)); idParams.pKernelState = kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( m_stateHeapInterface, 1, &idParams)); // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here) meParams->hmeLvl = hmeLevel; meParams->pKernelState = kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoderGenState->SetCurbeMe(meParams)); CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_DSH_TYPE, kernelState)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( encFunctionType, kernelState)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_ISH_TYPE, kernelState)); ) MOS_COMMAND_BUFFER cmdBuffer; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); SendKernelCmdsParams sendKernelCmdsParams; sendKernelCmdsParams = SendKernelCmdsParams(); sendKernelCmdsParams.EncFunctionType = encFunctionType; sendKernelCmdsParams.pKernelState = kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); // Add binding table CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( m_stateHeapInterface, kernelState)); // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here) meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x; meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x; meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false; meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false; meSurfaceParams->pKernelState = kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoderGenState->SendMeSurfaces(&cmdBuffer, meSurfaceParams)); // Dump SSH for ME kernel CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_SSH_TYPE, kernelState))); /* zero out the mv data memory and me distortion buffer for the driver ULT kernel only writes out this data used for current frame, in some cases the the data used for previous frames would be left in the buffer (for example, the L1 mv for B frame would still show in the P frame mv data buffer */ // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface); uint8_t* data = NULL; uint32_t size = 0; bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType); if (driverMeDumpEnabled) { MOS_LOCK_PARAMS lockFlags; MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); lockFlags.WriteOnly = 1; switch (hmeLevel) { case HME_LEVEL_32x: data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &meSurfaceParams->ps32xMeMvDataBuffer->OsResource, &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); MOS_ZeroMemory(data, size); m_osInterface->pfnUnlockResource( m_osInterface, &meSurfaceParams->ps32xMeMvDataBuffer->OsResource); break; case HME_LEVEL_16x: data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &meSurfaceParams->ps16xMeMvDataBuffer->OsResource, &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); MOS_ZeroMemory(data, size); m_osInterface->pfnUnlockResource( m_osInterface, &meSurfaceParams->ps16xMeMvDataBuffer->OsResource); break; case HME_LEVEL_4x: if (!m_vdencEnabled) { data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &meSurfaceParams->ps4xMeMvDataBuffer->OsResource, &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER); MOS_ZeroMemory(data, size); m_osInterface->pfnUnlockResource( m_osInterface, &meSurfaceParams->ps4xMeMvDataBuffer->OsResource); } break; default: return MOS_STATUS_INVALID_PARAMETER; } // zeroing out ME dist buffer if (meSurfaceParams->b4xMeDistortionBufferSupported) { data = (uint8_t*)m_osInterface->pfnLockResource( m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch; MOS_ZeroMemory(data, size); m_osInterface->pfnUnlockResource( m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource); } } ); uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x : (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x; uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor); uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor); CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); walkerCodecParams.WalkerMode = m_walkerMode; walkerCodecParams.dwResolutionX = resolutionX; walkerCodecParams.dwResolutionY = resolutionY; walkerCodecParams.bNoDependency = true; walkerCodecParams.bMbaff = meSurfaceParams->bMbaff; walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported; walkerCodecParams.ucGroupId = m_groupId; MHW_WALKER_PARAMS walkerParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( m_hwInterface, &walkerParams, &walkerCodecParams)); HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__)); HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderEngineInterface->GetMmioRegisters()); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( &cmdBuffer, &walkerParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( m_stateHeapInterface, kernelState)); if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( m_stateHeapInterface)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); } CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( &cmdBuffer, encFunctionType, nullptr))); m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) { HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface); m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); m_lastTaskInPhase = false; } return eStatus; } bool CodechalEncoderState::CheckSupportedFormat( PMOS_SURFACE surface) { bool isColorFormatSupported = true; if (!surface) { CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (NULL) Pointer."); return isColorFormatSupported; } // if input is not Tile-Y, or color format not NV12, invoke Ds+Copy kernel if (!IS_Y_MAJOR_TILE_FORMAT(surface->TileType) || surface->Format != Format_NV12) { isColorFormatSupported = false; } return isColorFormatSupported; } void CodechalEncoderState::FreeResources() { CODECHAL_ENCODE_FUNCTION_ENTER; // destroy sync objects if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse)) { m_osInterface->pfnDestroySyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse); } if (!Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse)) { m_osInterface->pfnDestroySyncResource(m_osInterface, &m_resSyncObjectVideoContextInUse); } // Release eStatus buffer if (!Mos_ResourceIsNull(&m_encodeStatusBuf.resStatusBuffer)) { if(m_encodeStatusBuf.pEncodeStatus != nullptr) { EncodeStatus* tmpEncodeStatus = nullptr; for(int i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++) { tmpEncodeStatus = (EncodeStatus*)(m_encodeStatusBuf.pEncodeStatus + i * m_encodeStatusBuf.dwReportSize); if(tmpEncodeStatus != nullptr && tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo != nullptr) { MOS_FreeMemory(tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo); tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo = nullptr; } } } m_osInterface->pfnUnlockResource( m_osInterface, &(m_encodeStatusBuf.resStatusBuffer)); m_osInterface->pfnFreeResource( m_osInterface, &m_encodeStatusBuf.resStatusBuffer); m_encodeStatusBuf.pData = nullptr; m_encodeStatusBuf.pEncodeStatus = nullptr; } // Release HW Counter buffer if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_skipFrameBasedHWCounterRead == false) { if (!Mos_ResourceIsNull(&m_resHwCount)) { m_osInterface->pfnUnlockResource( m_osInterface, &(m_resHwCount)); m_osInterface->pfnFreeResource( m_osInterface, &m_resHwCount); } } if (!Mos_ResourceIsNull(&m_encodeStatusBufRcs.resStatusBuffer)) { if(m_encodeStatusBufRcs.pEncodeStatus != nullptr) { EncodeStatus* tmpEncodeStatus = nullptr; for(int i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++) { tmpEncodeStatus = (EncodeStatus*)(m_encodeStatusBufRcs.pEncodeStatus + i * m_encodeStatusBufRcs.dwReportSize); if(tmpEncodeStatus != nullptr && tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo != nullptr) { MOS_FreeMemory(tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo); tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo = nullptr; } } } m_osInterface->pfnUnlockResource( m_osInterface, &(m_encodeStatusBufRcs.resStatusBuffer)); m_osInterface->pfnFreeResource( m_osInterface, &m_encodeStatusBufRcs.resStatusBuffer); m_encodeStatusBufRcs.pData = nullptr; m_encodeStatusBufRcs.pEncodeStatus = nullptr; } if (m_pakEnabled) { if (!Mos_ResourceIsNull(&m_resDeblockingFilterRowStoreScratchBuffer)) { m_osInterface->pfnFreeResource( m_osInterface, &m_resDeblockingFilterRowStoreScratchBuffer); } if (!Mos_ResourceIsNull(&m_resMPCRowStoreScratchBuffer)) { m_osInterface->pfnFreeResource( m_osInterface, &m_resMPCRowStoreScratchBuffer); } for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { if (!Mos_ResourceIsNull(&m_resStreamOutBuffer[i])) { m_osInterface->pfnFreeResource( m_osInterface, &m_resStreamOutBuffer[i]); } if (!Mos_ResourceIsNull(&m_sliceMapSurface[i].OsResource)) { m_osInterface->pfnFreeResource( m_osInterface, &m_sliceMapSurface[i].OsResource); } } } // release CSC Downscaling kernel resources if (m_cscDsState) { MOS_Delete(m_cscDsState); m_cscDsState = nullptr; } if (m_encoderGenState) { MOS_Delete(m_encoderGenState); m_encoderGenState = nullptr; } if(m_inlineEncodeStatusUpdate) { if (!Mos_ResourceIsNull(&m_atomicScratchBuf.resAtomicScratchBuffer)) { m_osInterface->pfnUnlockResource( m_osInterface, &(m_atomicScratchBuf.resAtomicScratchBuffer)); m_osInterface->pfnFreeResource( m_osInterface, &m_atomicScratchBuf.resAtomicScratchBuffer); } } if (m_encEnabled) { for (auto i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++) { if (!Mos_ResourceIsNull(&m_scalingBBUF[i].BatchBuffer.OsResource)) { Mhw_FreeBb(m_osInterface, &m_scalingBBUF[i].BatchBuffer, nullptr); } } if(!Mos_ResourceIsNull(&m_flatnessCheckSurface.OsResource)) { m_osInterface->pfnFreeResource( m_osInterface, &m_flatnessCheckSurface.OsResource); } if(!Mos_ResourceIsNull(&m_resMbStatsBuffer)) { m_osInterface->pfnFreeResource( m_osInterface, &m_resMbStatsBuffer); } for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++) { if (!Mos_ResourceIsNull(&m_resMadDataBuffer[i])) { m_osInterface->pfnFreeResource( m_osInterface, &m_resMadDataBuffer[i]); } } } if (m_vdencEnabled) { m_osInterface->pfnFreeResource( m_osInterface, &m_resPakMmioBuffer); m_osInterface->pfnFreeResource( m_osInterface, &m_resHucErrorStatusBuffer); m_osInterface->pfnFreeResource(m_osInterface, &m_resHucFwBuffer); for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { m_osInterface->pfnFreeResource( m_osInterface, &m_resVdencStreamInBuffer[i]); } } if (m_vdencEnabled) { m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer); } m_osInterface->pfnFreeResource(m_osInterface, &m_resVdencCmdInitializerDmemBuffer); for (auto i = 0; i < 2; i++) { m_osInterface->pfnFreeResource(m_osInterface, &m_resVdencCmdInitializerDataBuffer[i]); } m_osInterface->pfnFreeResource(m_osInterface, &m_predicationBuffer); return; } void CodechalEncoderState::Destroy() { CODECHAL_ENCODE_FUNCTION_ENTER; if (m_videoNodeAssociationCreated && MEDIA_IS_SKU(m_skuTable, FtrVcs2) && (m_videoGpuNode < MOS_GPU_NODE_MAX)) { // Destroy encode video node associations m_osInterface->pfnDestroyVideoNodeAssociation(m_osInterface, m_videoGpuNode); m_osInterface->pfnSetLatestVirtualNode(m_osInterface, MOS_GPU_NODE_MAX); } if (m_mmcState != nullptr) { MOS_Delete(m_mmcState); m_mmcState = nullptr; } MOS_Delete(m_allocator); m_allocator = nullptr; MOS_Delete(m_trackedBuf); m_trackedBuf = nullptr; // Release encoder resources FreeResources(); return; } uint32_t CodechalEncoderState::CalculateCommandBufferSize() { uint32_t commandBufferSize = m_pictureStatesSize + m_extraPictureStatesSize + (m_sliceStatesSize * m_numSlices); if (m_singleTaskPhaseSupported) { commandBufferSize *= (m_numPasses + 1); } // 4K align since allocation is in chunks of 4K bytes. commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, 0x1000); return commandBufferSize; } MOS_STATUS CodechalEncoderState::VerifySpaceAvailable() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; uint32_t requestedSize = 0; if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext) { requestedSize = m_vmeStatesSize; eStatus = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize( m_osInterface, requestedSize, 0); return eStatus; } uint32_t requestedPatchListSize = 0; MOS_STATUS statusPatchList = MOS_STATUS_SUCCESS, statusCmdBuf; bool m_usePatchList = m_osInterface->bUsesPatchList || MEDIA_IS_SKU(m_skuTable, FtrMediaPatchless); if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext) { if (m_usePatchList) { requestedPatchListSize = m_picturePatchListSize + m_extraPicturePatchListSize + (m_slicePatchListSize * m_numSlices); if (m_singleTaskPhaseSupported) { requestedPatchListSize *= (m_numPasses + 1); } } requestedSize = CalculateCommandBufferSize(); // Try a maximum of 3 attempts to request the required sizes from OS // OS could reset the sizes if necessary, therefore, requires to re-verify for (auto i = 0; i < 3; i++) { //Experiment shows resizing CmdBuf size and PatchList size in two calls one after the other would cause previously //successfully requested size to fallback to wrong value, hence never satisfying the requirement. So we call pfnResize() //only once depending on whether CmdBuf size not enough, or PatchList size not enough, or both. if (m_usePatchList) { statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize( m_osInterface, requestedPatchListSize); } statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize( m_osInterface, requestedSize, 0); if (statusPatchList != MOS_STATUS_SUCCESS && statusCmdBuf != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSize + COMMAND_BUFFER_RESERVED_SPACE, requestedPatchListSize)); } else if (statusPatchList != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(0, requestedPatchListSize)); } else if (statusCmdBuf != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSize + COMMAND_BUFFER_RESERVED_SPACE, 0)); } else { m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported; return eStatus; } } } if (m_usePatchList) { statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize( m_osInterface, requestedPatchListSize); } statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize( m_osInterface, requestedSize, 0); if ((statusCmdBuf == MOS_STATUS_SUCCESS) && (statusPatchList == MOS_STATUS_SUCCESS)) { m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported; return eStatus; } if (m_singleTaskPhaseSupported) { uint32_t requestedSizeOriginal = 0, requestedPatchListSizeOriginal = 0; for (auto i = 0; i < 3; i++) { //Experiment shows resizing CmdBuf size and PatchList size in two calls one after the other would cause previously //successfully requested size to fallback to wrong value, hence never satisfying the requirement. So we call pfnResize() //only once depending on whether CmdBuf size not enough, or PatchList size not enough, or both. if (m_usePatchList) { statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize( m_osInterface, requestedPatchListSizeOriginal); } statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize( m_osInterface, requestedSizeOriginal, 0); if (statusPatchList != MOS_STATUS_SUCCESS && statusCmdBuf != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSizeOriginal + COMMAND_BUFFER_RESERVED_SPACE, requestedPatchListSizeOriginal)); } else if (statusPatchList != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(0, requestedPatchListSizeOriginal)); } else if (statusCmdBuf != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSizeOriginal + COMMAND_BUFFER_RESERVED_SPACE, 0)); } else { m_singleTaskPhaseSupportedInPak = false; return eStatus; } } if (m_usePatchList) { statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize( m_osInterface, requestedPatchListSizeOriginal); } statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize( m_osInterface, requestedSizeOriginal, 0); if (statusPatchList == MOS_STATUS_SUCCESS && statusCmdBuf == MOS_STATUS_SUCCESS) { m_singleTaskPhaseSupportedInPak = false; } else { eStatus = MOS_STATUS_NO_SPACE; } } else { eStatus = MOS_STATUS_NO_SPACE; } return eStatus; } MOS_STATUS CodechalEncoderState::AddMediaVfeCmd( PMOS_COMMAND_BUFFER cmdBuffer, SendKernelCmdsParams *params) { CODECHAL_ENCODE_CHK_NULL_RETURN(params); MHW_VFE_PARAMS vfeParams = {}; vfeParams.pKernelState = params->pKernelState; vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL; vfeParams.Scoreboard.ScoreboardEnable = m_useHwScoreboard; vfeParams.Scoreboard.ScoreboardType = m_hwScoreboardType; vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads; if (!m_useHwScoreboard) { vfeParams.Scoreboard.ScoreboardMask = 0; } else if (params->bEnableCustomScoreBoard == true) { MOS_SecureMemcpy(&vfeParams.Scoreboard, sizeof(vfeParams.Scoreboard), params->pCustomScoreBoard, sizeof(*params->pCustomScoreBoard)); } else if (params->bEnable45ZWalkingPattern == true) { vfeParams.Scoreboard.ScoreboardMask = 0x0F; vfeParams.Scoreboard.ScoreboardType = 1; // Scoreboard 0 vfeParams.Scoreboard.ScoreboardDelta[0].x = 0; vfeParams.Scoreboard.ScoreboardDelta[0].y = 0xF; // Scoreboard 1 vfeParams.Scoreboard.ScoreboardDelta[1].x = 0; vfeParams.Scoreboard.ScoreboardDelta[1].y = 0xE; // Scoreboard 2 vfeParams.Scoreboard.ScoreboardDelta[2].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[2].y = 3; // Scoreboard 3 vfeParams.Scoreboard.ScoreboardDelta[3].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[3].y = 1; } else { vfeParams.Scoreboard.ScoreboardMask = 0xFF; // Scoreboard 0 vfeParams.Scoreboard.ScoreboardDelta[0].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[0].y = 0; // Scoreboard 1 vfeParams.Scoreboard.ScoreboardDelta[1].x = 0; vfeParams.Scoreboard.ScoreboardDelta[1].y = 0xF; // Scoreboard 2 vfeParams.Scoreboard.ScoreboardDelta[2].x = 1; vfeParams.Scoreboard.ScoreboardDelta[2].y = 0xF; // Scoreboard 3 vfeParams.Scoreboard.ScoreboardDelta[3].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[3].y = 0xF; // Scoreboard 4 vfeParams.Scoreboard.ScoreboardDelta[4].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[4].y = 1; // Scoreboard 5 vfeParams.Scoreboard.ScoreboardDelta[5].x = 0; vfeParams.Scoreboard.ScoreboardDelta[5].y = 0xE; // Scoreboard 6 vfeParams.Scoreboard.ScoreboardDelta[6].x = 1; vfeParams.Scoreboard.ScoreboardDelta[6].y = 0xE; // Scoreboard 7 vfeParams.Scoreboard.ScoreboardDelta[7].x = 0xF; vfeParams.Scoreboard.ScoreboardDelta[7].y = 0xE; } if (MEDIA_IS_WA(m_waTable, WaUseStallingScoreBoard)) vfeParams.Scoreboard.ScoreboardType = 0; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams)); return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::SendGenericKernelCmds( PMOS_COMMAND_BUFFER cmdBuffer, SendKernelCmdsParams *params) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState); if(MEDIA_IS_SKU(m_skuTable, FtrSSEUPowerGating)) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetDefaultSSEuSetting(params->EncFunctionType, m_setRequestedEUSlices, m_setRequestedSubSlices, m_setRequestedEUs)); } if (!m_singleTaskPhaseSupported || m_firstTaskInPhase) { bool requestFrameTracking = false; if (CodecHalUsesOnlyRenderEngine(m_codecFunction) && m_lastEncPhase) { // frame tracking tag is only added in the last command buffer header requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase; } // Send command buffer header at the beginning (OS dependent) CODECHAL_ENCODE_CHK_STATUS_RETURN( SendPrologWithFrameTracking(cmdBuffer, requestFrameTracking, m_bRenderOcaEnabled ? m_renderEngineInterface->GetMmioRegisters() : nullptr)); m_firstTaskInPhase = false; } CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(cmdBuffer, params->EncFunctionType)); if (m_renderEngineInterface->GetL3CacheConfig()->bL3CachingEnabled) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->SetL3Cache(cmdBuffer)); } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->EnablePreemption(cmdBuffer)); // Add Pipeline select command CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddPipelineSelectCmd(cmdBuffer, m_computeContextEnabled)); // Add State Base Addr command MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams; MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams)); // This bit will not be used in Driver ID but it will be used to determine if Render Target Flag has to be Clear or Set // Read this bit in pfnAddStateBaseAddrCmd and propagate it using ResourceParams via bRenderTarget stateBaseAddrParams.bDynamicStateRenderTarget = params->bDshInUse; MOS_RESOURCE* dsh = params->pKernelState->m_dshRegion.GetResource(); CODECHAL_ENCODE_CHK_NULL_RETURN(dsh); MOS_RESOURCE* ish = params->pKernelState->m_ishRegion.GetResource(); CODECHAL_ENCODE_CHK_NULL_RETURN(ish); stateBaseAddrParams.presDynamicState = dsh; stateBaseAddrParams.dwDynamicStateSize = params->pKernelState->m_dshRegion.GetHeapSize(); stateBaseAddrParams.presInstructionBuffer = ish; stateBaseAddrParams.dwInstructionBufferSize = params->pKernelState->m_ishRegion.GetHeapSize(); if (m_computeContextEnabled) { stateBaseAddrParams.presGeneralState = dsh; stateBaseAddrParams.dwGeneralStateSize = params->pKernelState->m_dshRegion.GetHeapSize(); stateBaseAddrParams.presIndirectObjectBuffer = dsh; stateBaseAddrParams.dwIndirectObjectBufferSize = params->pKernelState->m_dshRegion.GetHeapSize(); stateBaseAddrParams.bDynamicStateRenderTarget = false; } if (m_standard == CODECHAL_HEVC) { stateBaseAddrParams.mocs4InstructionCache = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_L3].Value; } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddStateBaseAddrCmd(cmdBuffer, &stateBaseAddrParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupWalkerContext(cmdBuffer, params)); return eStatus; } // Refer to layout of EncodeBRCPAKStatistics_g7 MOS_STATUS CodechalEncoderState::ReadBrcPakStatistics( PMOS_COMMAND_BUFFER cmdBuffer, EncodeReadBrcPakStatsParams* params) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_NULL_RETURN(params); CODECHAL_ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer); CODECHAL_ENCODE_CHK_NULL_RETURN(params->presStatusBuffer); CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum"); MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer; miStoreRegMemParams.dwOffset = 0; miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer; miStoreRegMemParams.dwOffset = sizeof(uint32_t); miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountSliceRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); MHW_MI_STORE_DATA_PARAMS storeDataParams; storeDataParams.pOsResource = params->presBrcPakStatisticBuffer; storeDataParams.dwResourceOffset = sizeof(uint32_t) * 2; storeDataParams.dwValue = params->ucPass + 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); storeDataParams.pOsResource = params->presStatusBuffer; storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset; storeDataParams.dwValue = params->ucPass + 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer; miStoreRegMemParams.dwOffset = sizeof(uint32_t) * (4 + params->ucPass); miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Retrieves the MFC image eStatus information //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::ReadImageStatus( PMOS_COMMAND_BUFFER cmdBuffer) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum"); MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf; uint32_t baseOffset = (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwImageStatusMaskOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusMaskRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwImageStatusCtrlOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); // VDEnc dynamic slice overflow semaphore, DW0 is SW programmed mask(MFX_IMAGE_MASK does not support), DW1 is MFX_IMAGE_STATUS_CONTROL if (m_vdencBrcEnabled) { MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; // Added for VDEnc slice overflow bit in MFC_IMAGE_STATUS_CONTROL // The bit is connected on the non-AVC encoder side of MMIO register. // Need a dummy MFX_PIPE_MODE_SELECT to decoder and read this register. if (m_waReadVDEncOverflowStatus) { pipeModeSelectParams = {}; pipeModeSelectParams.Mode = CODECHAL_DECODE_MODE_AVCVLD; m_hwInterface->GetMfxInterface()->SetDecodeInUse(true); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMfxInterface()->AddMfxPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); } // Store MFC_IMAGE_STATUS_CONTROL MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame. for (int i = 0; i < 2; i++) { if (m_resVdencBrcUpdateDmemBufferPtr[i]) { miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i]; miStoreRegMemParams.dwOffset = 7 * sizeof(uint32_t); // offset of SliceSizeViolation in HUC_BRC_UPDATE_DMEM miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); } } // Restore MFX_PIPE_MODE_SELECT to encode mode if (m_waReadVDEncOverflowStatus) { pipeModeSelectParams = {}; pipeModeSelectParams.Mode = m_mode; pipeModeSelectParams.bVdencEnabled = true; m_hwInterface->GetMfxInterface()->SetDecodeInUse(false); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMfxInterface()->AddMfxPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); } } MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Retrieves the MFC registers and stores them in the eStatus report //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::ReadMfcStatus( PMOS_COMMAND_BUFFER cmdBuffer) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum"); MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf; uint32_t baseOffset = (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwBSSEBitCountOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamSeBitcountFrameRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwQpStatusCountOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcQPStatusCountOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); if (mmioRegisters->mfcAvcNumSlicesRegOffset > 0) { //read MFC_AVC_NUM_SLICES register to status report miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer; miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwNumSlicesOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcAvcNumSlicesRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); } if (m_vdencBrcEnabled) { // Store PAK FrameSize MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame. for (int i = 0; i < 2; i++) { if (m_resVdencBrcUpdateDmemBufferPtr[i]) { miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i]; miStoreRegMemParams.dwOffset = 5 * sizeof(uint32_t); miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); if (m_vdencBrcNumOfSliceOffset) { miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i]; miStoreRegMemParams.dwOffset = m_vdencBrcNumOfSliceOffset; miStoreRegMemParams.dwRegister = mmioRegisters->mfcAvcNumSlicesRegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); } } } } CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(cmdBuffer)); return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Retrieves the MFC registers and stores them in the eStatus report //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::SetStatusReportParams( PCODEC_REF_LIST currRefList) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; EncodeStatusBuffer* encodeStatusBuf = nullptr; if ((m_codecFunction == CODECHAL_FUNCTION_ENC) || (m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC) || (m_codecFunction == CODECHAL_FUNCTION_FEI_ENC) || (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK)) { encodeStatusBuf = &m_encodeStatusBufRcs; } else { encodeStatusBuf = &m_encodeStatusBuf; } EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize); EncodeStatusReport* encodeStatusReport = &encodeStatus->encodeStatusReport; encodeStatus->dwStoredData = m_storeData; encodeStatusReport->StatusReportNumber = m_statusReportFeedbackNumber; encodeStatusReport->CurrOriginalPic = m_currOriginalPic; encodeStatus->wPictureCodingType = m_pictureCodingType; switch (m_codecFunction) { case CODECHAL_FUNCTION_ENC: encodeStatusReport->Func = CODECHAL_ENCODE_ENC_ID; break; case CODECHAL_FUNCTION_PAK: encodeStatusReport->Func = CODECHAL_ENCODE_PAK_ID; break; case CODECHAL_FUNCTION_ENC_PAK: case CODECHAL_FUNCTION_ENC_VDENC_PAK: encodeStatusReport->Func = CODECHAL_ENCODE_ENC_PAK_ID; break; case CODECHAL_FUNCTION_FEI_PRE_ENC: encodeStatusReport->Func = CODECHAL_ENCODE_FEI_PRE_ENC_ID; break; case CODECHAL_FUNCTION_FEI_ENC: encodeStatusReport->Func = CODECHAL_ENCODE_FEI_ENC_ID; break; case CODECHAL_FUNCTION_FEI_PAK: encodeStatusReport->Func = CODECHAL_ENCODE_FEI_PAK_ID; break; case CODECHAL_FUNCTION_FEI_ENC_PAK: encodeStatusReport->Func = CODECHAL_ENCODE_FEI_ENC_PAK_ID; break; case CODECHAL_FUNCTION_HYBRIDPAK: encodeStatusReport->Func = CODECHAL_ENCODE_ENC_ID; /* Only the render engine(EU) is used, MFX is not used */ break; default: break; } encodeStatusReport->pCurrRefList = m_currRefList; encodeStatusReport->NumberTilesInFrame = m_numberTilesInFrame; encodeStatusReport->UsedVdBoxNumber = m_numUsedVdbox; if (m_lookaheadDepth > 0) { uint32_t laStatusIndex = (encodeStatusBuf->wCurrIndex + m_lookaheadDepth - 1) % CODECHAL_ENCODE_STATUS_NUM; EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + laStatusIndex * encodeStatusBuf->dwReportSize); encodeStatus->lookaheadStatus.StatusReportNumber = m_statusReportFeedbackNumber; } return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Set each of status report buffer to completed status (only render context) //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::InitStatusReport() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf; CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf); EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize); CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus); for (auto i = 0; i < CODECHAL_NUM_MEDIA_STATES; i += 1) { encodeStatus->qwStoredDataEnc[i].dwStoredData = CODECHAL_STATUS_QUERY_END_FLAG; } if (m_encEnabled) { EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs; CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBufRcs); encodeStatus = (EncodeStatus*)(encodeStatusBufRcs->pEncodeStatus + encodeStatusBufRcs->wCurrIndex * encodeStatusBufRcs->dwReportSize); CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus); for (auto i = 0; i < CODECHAL_NUM_MEDIA_STATES; i += 1) { encodeStatus->qwStoredDataEnc[i].dwStoredData = CODECHAL_STATUS_QUERY_END_FLAG; } } return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Indicates to the driver that the batch buffer has started processing //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::StartStatusReport( PMOS_COMMAND_BUFFER cmdBuffer, CODECHAL_MEDIA_STATE_TYPE encFunctionType) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf; EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs; if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext) { uint32_t offset = (encodeStatusBufRcs->wCurrIndex * m_encodeStatusBufRcs.dwReportSize) + encodeStatusBufRcs->dwStoreDataOffset + 8 + // VME stored data offset is 2nd sizeof(uint32_t) * 2 * encFunctionType + // Each VME stored data is 1 QW sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource MHW_PIPE_CONTROL_PARAMS pipeControlParams; MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams)); pipeControlParams.presDest = &encodeStatusBufRcs->resStatusBuffer; pipeControlParams.dwPostSyncOp = MHW_FLUSH_WRITE_IMMEDIATE_DATA; pipeControlParams.dwResourceOffset = offset; pipeControlParams.dwDataDW1 = CODECHAL_STATUS_QUERY_START_FLAG; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddPipeControl( cmdBuffer, nullptr, &pipeControlParams)); } if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext) { uint32_t offset = (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) + encodeStatusBuf->dwStoreDataOffset + // MFX stored data offset is 1st, so no additional offset is needed sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource MHW_MI_STORE_DATA_PARAMS storeDataParams; storeDataParams.pOsResource = &encodeStatusBuf->resStatusBuffer; storeDataParams.dwResourceOffset = offset; storeDataParams.dwValue = CODECHAL_STATUS_QUERY_START_FLAG; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_skipFrameBasedHWCounterRead == false ) { uint32_t writeOffset = sizeof(HwCounter) * CODECHAL_ENCODE_STATUS_NUM; CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); // Lazy allocation if (Mos_ResourceIsNull(&m_resHwCount)) { MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; allocParamsForBufferLinear.Format = Format_Buffer; MOS_LOCK_PARAMS lockFlagsNoOverWrite;; MOS_ZeroMemory(&lockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS)); lockFlagsNoOverWrite.WriteOnly = 1; lockFlagsNoOverWrite.NoOverWrite = 1; // eStatus query reporting m_encodeStatusBuf.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), MHW_CACHELINE_SIZE); uint32_t size = sizeof(HwCounter) * CODECHAL_ENCODE_STATUS_NUM + sizeof(HwCounter); allocParamsForBufferLinear.dwBytes = size; allocParamsForBufferLinear.pBufName = "HWCounterQueryBuffer"; allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferLinear, &m_resHwCount); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Encode eStatus Buffer."); return eStatus; } CODECHAL_ENCODE_CHK_STATUS_RETURN( m_osInterface->pfnSkipResourceSync( &m_resHwCount)); uint8_t *dataHwCount = (uint8_t *)m_osInterface->pfnLockResource( m_osInterface, &(m_resHwCount), &lockFlagsNoOverWrite); if (!dataHwCount) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to Local Resource for MbEnc Adv Count Query Buffer."); return eStatus; } MOS_ZeroMemory(dataHwCount, size); m_dataHwCount = (uint32_t*)dataHwCount; } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW( m_osInterface, cmdBuffer, &m_resHwCount, encodeStatusBuf->wCurrIndex)); } } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void*)this, m_osInterface, m_miInterface, cmdBuffer)); CODECHAL_ENCODE_CHK_STATUS_RETURN(NullHW::StartPredicate(m_osInterface, m_miInterface, cmdBuffer)); return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Indicates to the driver that the batch buffer has completed processing //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::EndStatusReport( PMOS_COMMAND_BUFFER cmdBuffer, CODECHAL_MEDIA_STATE_TYPE encFunctionType) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_STATUS_RETURN(NullHW::StopPredicate(m_osInterface, m_miInterface, cmdBuffer)); // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag if applicable if (m_frameTrackingEnabled && m_osInterface->bTagResourceSync) { MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; bool writeResourceSyncTag = false; if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext) { syncParams.GpuContext = m_renderContext; // Enc only and VDEnc SHME requires render engine GPU tag if (CodecHalUsesOnlyRenderEngine(m_codecFunction) || (m_vdencEnabled && m_16xMeSupported)) { writeResourceSyncTag = m_lastEncPhase && m_lastTaskInPhase; } } else if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext) { syncParams.GpuContext = m_videoContext; writeResourceSyncTag = m_lastTaskInPhase; } if (writeResourceSyncTag) { if (!m_firstField || CodecHal_PictureIsFrame(m_currOriginalPic)) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->WriteSyncTagToResource(cmdBuffer, &syncParams)); } } } MHW_MI_STORE_DATA_PARAMS storeDataParams = {}; uint32_t offset = 0; if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext) { // Flush the write cache for ENC output MHW_PIPE_CONTROL_PARAMS pipeControlParams; MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams)); pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE; pipeControlParams.bGenericMediaStateClear = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(cmdBuffer, nullptr, &pipeControlParams)); if (MEDIA_IS_WA(m_waTable, WaSendDummyVFEafterPipelineSelect)) { MHW_VFE_PARAMS vfeStateParams = {}; vfeStateParams.dwNumberofURBEntries = 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeStateParams)); } offset = (m_encodeStatusBufRcs.wCurrIndex * m_encodeStatusBufRcs.dwReportSize) + m_encodeStatusBufRcs.dwStoreDataOffset + 8 + // VME stored data offset is 2nd sizeof(uint32_t) * 2 * encFunctionType + // Each VME stored data is 1 QW sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource storeDataParams.pOsResource = &m_encodeStatusBufRcs.resStatusBuffer; } else if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext) { offset = m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + m_encodeStatusBuf.dwStoreDataOffset + // MFX stored data offset is 1st, so no additional offset is needed sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer; } storeDataParams.dwResourceOffset = offset; storeDataParams.dwValue = CODECHAL_STATUS_QUERY_END_FLAG; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); if (encFunctionType == CODECHAL_NUM_MEDIA_STATES && m_inlineEncodeStatusUpdate) { if (m_currPass < m_numPasses) { if(m_vdencBrcEnabled) { //delay to check at the beginning of next pass util huc status updated; } else { // inc m_storeData conditionaly UpdateEncodeStatus(cmdBuffer, false); } } else { // inc m_storeData forcely UpdateEncodeStatus(cmdBuffer, true); } } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void*)this, m_osInterface, m_miInterface, cmdBuffer)); return eStatus; } //! //! \brief Update m_storeData in offset 0 of statusReport. //! \details Add conditonal encode status report to avoid of extra small batch buffer //! to avoid of extra context switch interrupt. if ImageStatusRegister show //! encoding completion, update the m_storeData, otherwise continue. //! n0 n1 n2 n3 //! +-------+--------+--------+--------+--------+--------+--------+--------+ //! | 0 0 | 0 | val/0 1/0 | 0 1 | //! +-------+--------+--------+--------+--------+--------+--------+--------+ //! low high low high low high low high //! //! if(m_forceOperation==true) //! step-1: m_storeData = m_storeData + 1 // ADD directly //! else //! step-1: n2_lo = ImageStatusCtrl & dwImageStatusMask // AND //! step-2: n2_lo = (n2_lo == 0) ? 0 : n2_lo // uint32_t CMP //! step-3: n2_lo:n2_hi = (n2_lo:n2_hi == 0:1) ? 0:0 : n2_lo:n2_hi // uint64_t CMP //! step-4: n2_hi = n2_hi ^ n3_hi // XOR //! step-5: m_storeData = m_storeData + n2_hi // ADD conditionaly //! //! \param [in] cmdBuffer //! Command buffer //! \param [in] forceOperation //! whether add m_storeData directly //! \return MOS_STATUS //! MOS_STATUS_SUCCESS if success, else fail reason //! MOS_STATUS CodechalEncoderState::UpdateEncodeStatus( PMOS_COMMAND_BUFFER cmdBuffer, bool forceOperation) { MmioRegistersMfx *mmioRegisters; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum"); mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); // Get the right offset of EncodeStatusUpdate Operand scratch buffer uint32_t baseOffset = m_atomicScratchBuf.dwOperandSetSize * m_atomicScratchBuf.wEncodeUpdateIndex; uint32_t zeroValueOffset = baseOffset; uint32_t operand1Offset = baseOffset + m_atomicScratchBuf.dwOperand1Offset; uint32_t operand2Offset = baseOffset + m_atomicScratchBuf.dwOperand2Offset; uint32_t operand3Offset = baseOffset + m_atomicScratchBuf.dwOperand3Offset; MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams)); flushDwParams.bVideoPipelineCacheInvalidate = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; miLoadRegMemParams.dwOffset = 0; miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams)); MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams; miLoadRegImmParams.dwData = 0; miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams)); miLoadRegImmParams.dwData = 0xFFFFFFFF; miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams)); miLoadRegImmParams.dwData = 0; miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MHW_MI_ALU_PARAMS aluParams[20]; int aluCount = 0; aluCount = 0; //load1 srca, reg1 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA; aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; ++aluCount; //load srcb, reg2 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4; ++aluCount; //add aluParams[aluCount].AluOpcode = MHW_MI_ALU_SUB; aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4; ++aluCount; //store reg1, accu aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE; aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0; aluParams[aluCount].Operand2 = MHW_MI_ALU_ZF; ++aluCount; MHW_MI_MATH_PARAMS miMathParams; miMathParams.dwNumAluParams = aluCount; miMathParams.pAluPayload = aluParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams)); miLoadRegImmParams.dwData = 1; miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams)); miLoadRegImmParams.dwData = 0; miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams)); aluCount = 0; //load1 srca, reg1 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA; aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; ++aluCount; //load srcb, reg2 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4; ++aluCount; //add aluParams[aluCount].AluOpcode = MHW_MI_ALU_AND; ++aluCount; //store reg1, accu aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE; aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0; aluParams[aluCount].Operand2 = MHW_MI_ALU_ACCU; ++aluCount; miMathParams.dwNumAluParams = aluCount; miMathParams.pAluPayload = aluParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams)); MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); miStoreRegMemParams.presStoreBuffer = &m_atomicScratchBuf.resAtomicScratchBuffer; miStoreRegMemParams.dwOffset = operand1Offset; miStoreRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams)); // Make Flush DW call to make sure all previous work is done MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams)); flushDwParams.bVideoPipelineCacheInvalidate = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // Forcely ADD m_storeData, always happened in last pass. if(forceOperation) { // Make Flush DW call to make sure all previous work is done MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // n2_hi = 0x1 MHW_MI_STORE_DATA_PARAMS storeDataParams; MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); storeDataParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer); storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t) ; storeDataParams.dwValue = 0x1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); // VCS_GPR0_Lo = n2_hi = 0x1 MHW_MI_STORE_REGISTER_MEM_PARAMS registerMemParams; MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + 1 MHW_MI_ATOMIC_PARAMS atomicParams; MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer; atomicParams.dwResourceOffset = 0; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_ADD; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand1Offset; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer; atomicParams.dwResourceOffset = 0; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_ADD; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); return MOS_STATUS_SUCCESS; } else { // Make Flush DW call to make sure all previous work is done MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // n2_lo = dwImageStatusMask MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams; MOS_ZeroMemory(©MemMemParams , sizeof(copyMemMemParams)); if(!m_vdencBrcEnabled) { copyMemMemParams.presSrc = &m_encodeStatusBuf.resStatusBuffer; copyMemMemParams.dwSrcOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + m_encodeStatusBuf.dwImageStatusMaskOffset + (sizeof(uint32_t) * 2); } else { copyMemMemParams.presSrc = &m_resPakMmioBuffer; copyMemMemParams.dwSrcOffset = (sizeof(uint32_t) * 1); } copyMemMemParams.presDst = &m_atomicScratchBuf.resAtomicScratchBuffer; copyMemMemParams.dwDstOffset = operand2Offset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd( cmdBuffer, ©MemMemParams)); // VCS_GPR0_Lo = ImageStatusCtrl MHW_MI_STORE_REGISTER_MEM_PARAMS registerMemParams; MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); if(!m_vdencBrcEnabled) { registerMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; registerMemParams.dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + m_encodeStatusBuf.dwImageStatusMaskOffset + (sizeof(uint32_t) * 2) + sizeof(uint32_t); } else { registerMemParams.presStoreBuffer = &m_resPakMmioBuffer; registerMemParams.dwOffset = (sizeof(uint32_t) * 0); } registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Reset GPR4_Lo MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0. registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // step-1: n2_lo = n2_lo & VCS_GPR0_Lo = dwImageStatusMask & ImageStatusCtrl MHW_MI_ATOMIC_PARAMS atomicParams; MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource = &m_atomicScratchBuf.resAtomicScratchBuffer; atomicParams.dwResourceOffset = operand2Offset; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_AND; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // n3_lo = 0 MHW_MI_STORE_DATA_PARAMS storeDataParams; MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); storeDataParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer); storeDataParams.dwResourceOffset = operand3Offset; storeDataParams.dwValue = 0; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MOS_ZeroMemory(©MemMemParams , sizeof(copyMemMemParams)); copyMemMemParams.presSrc = &m_atomicScratchBuf.resAtomicScratchBuffer; copyMemMemParams.dwSrcOffset = operand2Offset; copyMemMemParams.presDst = &m_atomicScratchBuf.resAtomicScratchBuffer; copyMemMemParams.dwDstOffset = operand1Offset + sizeof(uint32_t); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd( cmdBuffer, ©MemMemParams)); // GPR0_lo = n1_lo = 0 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = zeroValueOffset; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Reset GPR4_Lo MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0. registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // step-2: n2_lo == n1_lo ? 0 : n2_lo // compare n1 vs n2. i.e. GRP0 vs. memory of operand2 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer); atomicParams.dwResourceOffset = operand2Offset; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_CMP; atomicParams.bReturnData = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // n2_hi = 1 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); storeDataParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer); storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t); storeDataParams.dwValue = 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); // n3_hi = 1 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); storeDataParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer); storeDataParams.dwResourceOffset = operand3Offset + sizeof(uint32_t); storeDataParams.dwValue = 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( cmdBuffer, &storeDataParams)); // VCS_GPR0_Lo = n3_lo = 0 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand3Offset; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // GPR0_Hi = n2_hi = 1 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ; // update 1 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; // VCS_GPR0_Hi CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Reset GPR4_Lo and GPR4_Hi MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer =&(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = zeroValueOffset ; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4_Hi CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = zeroValueOffset ; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; // VCS_GPR4_Hi CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // steop-3: n2 = (n2 == 0:1) ? 0:0 : n2 // uint64_t CMP // If n2==0 (Lo) and 1 (Hi), covert n2 to 0 (Lo)and 0 (Hi), else no change. // n2 == 0:1 means encoding completsion. the n2 memory will be updated with 0:0, otherwise, no change. MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer); atomicParams.dwResourceOffset = operand2Offset; atomicParams.dwDataSize = sizeof(uint64_t); atomicParams.Operation = MHW_MI_ATOMIC_CMP; atomicParams.bReturnData = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // VCS_GPR0_Lo = n3_hi = 1 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand3Offset + sizeof(uint32_t); registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); // step-4: n2_hi = n2_hi ^ VCS_GPR0_Lo = n2_hi ^ n3_hi MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer); atomicParams.dwResourceOffset = operand2Offset + sizeof(uint32_t); atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_XOR; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // VCS_GPR0_Lo = n2_hi MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); // step-5: m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + n2_hi // if not completed n2_hi should be 0, then m_storeData = m_storeData + 0 // if completed, n2_hi should be 1, then m_storeData = m_storeData + 1 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer; atomicParams.dwResourceOffset = 0; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_ADD; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); // Make Flush DW call to make sure all previous work is done CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand1Offset + sizeof(uint32_t); registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource = &m_atomicScratchBuf.resAtomicScratchBuffer; atomicParams.dwResourceOffset = operand1Offset; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_AND; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams)); registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer); registerMemParams.dwOffset = operand1Offset; registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, ®isterMemParams)); MOS_ZeroMemory(&atomicParams, sizeof(atomicParams)); atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer; atomicParams.dwResourceOffset = 0; atomicParams.dwDataSize = sizeof(uint32_t); atomicParams.Operation = MHW_MI_ATOMIC_ADD; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd( cmdBuffer, &atomicParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd( cmdBuffer, &flushDwParams)); } return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Sets up the eStatus reporting values for the next frame //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::ResetStatusReport() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(m_encodeStatusBuf.pEncodeStatus); EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf; EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs; EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize); if (!m_frameTrackingEnabled && !m_inlineEncodeStatusUpdate) { bool renderEngineInUse = m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext; bool nullRendering = false; MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; if (renderEngineInUse) { syncParams.GpuContext = m_renderContext; nullRendering = m_renderContextUsesNullHw; } else { syncParams.GpuContext = m_videoContext; nullRendering = m_videoContextUsesNullHw; } m_osInterface->pfnResetOsStates(m_osInterface); MOS_COMMAND_BUFFER cmdBuffer; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); cmdBuffer.Attributes.bTurboMode = m_hwInterface->m_turboMode; cmdBuffer.Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices; CODECHAL_ENCODE_CHK_STATUS_RETURN(UpdateCmdBufAttribute(&cmdBuffer, renderEngineInUse)); MHW_GENERIC_PROLOG_PARAMS genericPrologParams; MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); genericPrologParams.pOsInterface = m_osInterface; genericPrologParams.pvMiInterface = m_miInterface; genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false; genericPrologParams.presStoreData = (renderEngineInUse) ? &encodeStatusBufRcs->resStatusBuffer : &encodeStatusBuf->resStatusBuffer; genericPrologParams.dwStoreDataValue = m_storeData; CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams)); // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag if (m_osInterface->bTagResourceSync) { if (!m_firstField || CodecHal_PictureIsFrame(m_currOriginalPic)) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->WriteSyncTagToResource(&cmdBuffer, &syncParams)); } } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd( &cmdBuffer, nullptr)); CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( &cmdBuffer, CODECHAL_NUM_MEDIA_STATES, "_RESET_STATUS"))); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, nullRendering)); } if (m_videoContextUsesNullHw || m_renderContextUsesNullHw) { if (CodecHalUsesOnlyRenderEngine(m_codecFunction)) { *(encodeStatusBufRcs->pData) = m_storeData; } else { *(encodeStatusBuf->pData) = m_storeData; } } encodeStatus->dwHeaderBytesInserted = m_headerBytesInserted; m_headerBytesInserted = 0; if (!m_disableStatusReport) { m_storeData = m_storeData + 1 ? m_storeData + 1 : 1; encodeStatusBuf->wCurrIndex = (encodeStatusBuf->wCurrIndex + 1) % CODECHAL_ENCODE_STATUS_NUM; encodeStatusBufRcs->wCurrIndex = (encodeStatusBufRcs->wCurrIndex + 1) % CODECHAL_ENCODE_STATUS_NUM; } // clean up the Status for next frame encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize); uint32_t statusReportNumber = encodeStatus->lookaheadStatus.StatusReportNumber; MOS_ZeroMemory((uint8_t*)encodeStatus, sizeof(EncodeStatus)); encodeStatus->lookaheadStatus.StatusReportNumber = statusReportNumber; if (m_encEnabled) { EncodeStatus* pEncodeStatusRcs = (EncodeStatus*)(encodeStatusBufRcs->pEncodeStatus + encodeStatusBufRcs->wCurrIndex * encodeStatusBufRcs->dwReportSize); MOS_ZeroMemory((uint8_t*)pEncodeStatusRcs, sizeof(EncodeStatus)); } return eStatus; } MOS_STATUS CodechalEncoderState::ReadCounterValue(uint16_t index, EncodeStatusReport* encodeStatusReport) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport); uint64_t *address2Counter = nullptr; uint32_t ctr[4] = { 0 }; if (m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface)) { if (Mos_ResourceIsNull(&m_resHwCount)) { CODECHAL_ENCODE_ASSERTMESSAGE("m_resHwCount is not allocated"); return MOS_STATUS_NULL_POINTER; } //Report HW counter by command output resource address2Counter = (uint64_t *)(((char *)(m_dataHwCount)) + (index * sizeof(HwCounter))); } else { //Report driver generated counter which was submitted to HW by command eStatus = m_hwInterface->GetCpInterface()->GetCounterValue(ctr); if (MOS_STATUS_SUCCESS == eStatus) { address2Counter = (uint64_t *)ctr; } else { return eStatus; } } encodeStatusReport->HWCounterValue.Count = *address2Counter; //Report back in Big endian encodeStatusReport->HWCounterValue.Count = SwapEndianness(encodeStatusReport->HWCounterValue.Count); //IV value computation encodeStatusReport->HWCounterValue.IV = *(++address2Counter); encodeStatusReport->HWCounterValue.IV = SwapEndianness(encodeStatusReport->HWCounterValue.IV); CODECHAL_ENCODE_NORMALMESSAGE( "encodeStatusReport->HWCounterValue.Count = 0x%llx, encodeStatusReport->HWCounterValue.IV = 0x%llx", encodeStatusReport->HWCounterValue.Count, encodeStatusReport->HWCounterValue.IV); return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Gets available eStatus report data //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::GetStatusReport( void *status, uint16_t numStatus) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(status); EncodeStatusReport *codecStatus = (EncodeStatusReport *)status; EncodeStatusBuffer* encodeStatusBuf = nullptr; if (m_pakEnabled) { encodeStatusBuf = &m_encodeStatusBuf; } else { encodeStatusBuf = &m_encodeStatusBufRcs; } CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf->pEncodeStatus); uint16_t numReportsAvailable = (encodeStatusBuf->wCurrIndex - encodeStatusBuf->wFirstIndex) & (CODECHAL_ENCODE_STATUS_NUM - 1); // max is (CODECHAL_ENCODE_STATUS_NUM - 1) uint32_t globalHWStoredData = 0; if (m_pakEnabled) { globalHWStoredData = *(m_encodeStatusBuf.pData); // HW stored Data } else { globalHWStoredData = *(m_encodeStatusBufRcs.pData); // HW stored Data } uint32_t globalCount = m_storeData - globalHWStoredData; uint16_t reportsGenerated = 0; if (m_videoContextUsesNullHw || m_renderContextUsesNullHw) { for (auto i = 0; i < numReportsAvailable; i++) { codecStatus[i].CodecStatus = CODECHAL_STATUS_SUCCESSFUL; // Set fake bitstream size to avoid DDI report error codecStatus[i].bitstreamSize = 1024; reportsGenerated++; } encodeStatusBuf->wFirstIndex = (encodeStatusBuf->wFirstIndex + reportsGenerated) % CODECHAL_ENCODE_STATUS_NUM; return eStatus; } CODECHAL_ENCODE_VERBOSEMESSAGE(" numStatus = %d, dwNumReportsAvailable = %d.", numStatus, numReportsAvailable); CODECHAL_ENCODE_VERBOSEMESSAGE(" hwstoreData = %d, globalCount = %d", globalHWStoredData, globalCount); if (numReportsAvailable < numStatus && numStatus < CODECHAL_ENCODE_STATUS_NUM) { for (auto i = numReportsAvailable; i < numStatus; i++) { codecStatus[i].CodecStatus = CODECHAL_STATUS_UNAVAILABLE; } numStatus = numReportsAvailable; } if (numReportsAvailable == 0) { CODECHAL_ENCODE_ASSERTMESSAGE("No reports available, wCurrIndex = %d, wFirstIndex = %d", encodeStatusBuf->wCurrIndex, encodeStatusBuf->wFirstIndex); return MOS_STATUS_SUCCESS; } uint16_t index = 0; for (auto i = 0; i < numStatus; i++) { if(codecStatus->bSequential) { index = (encodeStatusBuf->wFirstIndex + i) & (CODECHAL_ENCODE_STATUS_NUM - 1); } else { index = (encodeStatusBuf->wFirstIndex + numStatus - i - 1) & (CODECHAL_ENCODE_STATUS_NUM - 1); } EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + index * encodeStatusBuf->dwReportSize); EncodeStatusReport* encodeStatusReport = &encodeStatus->encodeStatusReport; PCODEC_REF_LIST refList = encodeStatusReport->pCurrRefList; uint32_t localCount = encodeStatus->dwStoredData - globalHWStoredData; encodeStatusReport->pFrmStatsInfo = codecStatus[i].pFrmStatsInfo; encodeStatusReport->pBlkStatsInfo = codecStatus[i].pBlkStatsInfo; if (localCount == 0 || localCount > globalCount) { CODECHAL_DEBUG_TOOL( m_statusReportDebugInterface->m_bufferDumpFrameNum = encodeStatus->dwStoredData; ) // to be discussed, how to identify whether huc involved in pipeline if (!m_swBrcMode && m_vdencEnabled && m_vdencBrcEnabled && (m_standard == CODECHAL_HEVC || m_standard == CODECHAL_AVC || m_standard == CODECHAL_VP9)) { MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData; MOS_ZeroMemory(&userFeatureWriteData, sizeof(MOS_USER_FEATURE_VALUE_WRITE_DATA)); if (!MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrEnableMediaKernels)) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to load HuC firmware!"); // Reporting MOS_ZeroMemory(&userFeatureWriteData, sizeof(MOS_USER_FEATURE_VALUE_WRITE_DATA)); userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = 1; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HUC_FIRMWARE_LOAD_FAILED_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); return MOS_STATUS_HUC_KERNEL_FAILED; } else if (!(encodeStatus->HuCStatus2Reg & m_hucInterface->GetHucStatus2ImemLoadedMask())) { CODECHAL_ENCODE_ASSERTMESSAGE("HuC status2 indicates Valid Imem Load failed!"); // Reporting MOS_ZeroMemory(&userFeatureWriteData, sizeof(MOS_USER_FEATURE_VALUE_WRITE_DATA)); userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = 1; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HUC_IMEM_LOAD_FALIED_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&userFeatureWriteData, sizeof(MOS_USER_FEATURE_VALUE_WRITE_DATA)); userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.u32Data = encodeStatus->HuCStatus2Reg; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HUC_STATUS2_VALUE; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); #endif return MOS_STATUS_HUC_KERNEL_FAILED; } } if ((m_standard == CODECHAL_HEVC && m_vdencEnabled && (encodeStatus->HuCStatusReg & m_hucInterface->GetHevcVdencHucErrorFlagMask())) || (m_standard == CODECHAL_AVC && m_vdencEnabled && (encodeStatus->HuCStatusReg & m_hucInterface->GetAvcVdencHucErrorFlagMask())) || (m_standard == CODECHAL_VP9 && m_vdencEnabled && (encodeStatus->HuCStatusReg & m_hucInterface->GetVp9VdencHucErrorFlagMask())) ) { CODECHAL_ENCODE_ASSERTMESSAGE("HuC status indicates error"); CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpData( &encodeStatus->HuCStatusReg, sizeof(encodeStatus->HuCStatusReg), CodechalDbgAttr::attrStatusReport, "HuC_StatusRegister"));) } // Current command is executed if (m_osInterface->pfnIsGPUHung(m_osInterface)) { encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; *(encodeStatusBuf->pData) += 1; } else if (encodeStatusReport->Func != CODECHAL_ENCODE_ENC_ID && encodeStatusReport->Func != CODECHAL_ENCODE_FEI_ENC_ID && encodeStatus->dwStoredDataMfx != CODECHAL_STATUS_QUERY_END_FLAG) { if(encodeStatusReport->Func == CODECHAL_ENCODE_FEI_PRE_ENC_ID) { CODECHAL_DEBUG_TOOL( m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaledBottomFieldOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER), CodechalDbgAttr::attrReconstructedSurface, "4xScaledSurf")); /*CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncode1Dbuffer( m_debugInterface, pEncoder));*/ // dump EncodeFeiPreproc FeiPreEncParams PreEncParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpBuffer( CodecHal_PictureIsBottomField(m_currOriginalPic) ? &PreEncParams.resStatsBotFieldBuffer : &PreEncParams.resStatsBotFieldBuffer, CodechalDbgAttr::attrOutput, "MbStats", m_picWidthInMb * m_frameFieldHeightInMb * 64, CodecHal_PictureIsBottomField(m_currOriginalPic) ? m_mbvProcStatsBottomFieldOffset : 0, CODECHAL_MEDIA_STATE_PREPROC));) encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL; } else { CODECHAL_ENCODE_NORMALMESSAGE("Media reset may have occured."); encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; } } else if (m_hwWalker && encodeStatusReport->Func == CODECHAL_ENCODE_ENC_ID) { // iterate over all media states and check that all of them completed for (auto j = 0; j < CODECHAL_NUM_MEDIA_STATES; j += 1) { if (encodeStatus->qwStoredDataEnc[j].dwStoredData != CODECHAL_STATUS_QUERY_END_FLAG) { // some media state failed to complete CODECHAL_ENCODE_ASSERTMESSAGE("Error: Unable to finish encoding"); encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; break; } } encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL; if (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK && m_mode == CODECHAL_ENCODE_MODE_VP9 && encodeStatusReport->CodecStatus != CODECHAL_STATUS_ERROR) { unsigned int size = ((m_frameWidth + 63) >> 6) * ((m_frameHeight + 63) >> 6) + 1; encodeStatusReport->bitstreamSize = CODECHAL_VP9_MB_CODE_SIZE * sizeof(uint32_t) * size; } } else { if (m_codecGetStatusReportDefined) { // Call corresponding CODEC's status report function if existing eStatus = GetStatusReport(encodeStatus, encodeStatusReport); } else { eStatus = GetStatusReportCommon(encodeStatus, encodeStatusReport); } if (MOS_STATUS_SUCCESS != eStatus) { return eStatus; } CODECHAL_ENCODE_CHK_STATUS_RETURN(GetStatusReportExt(encodeStatus, encodeStatusReport, index)); if (m_osInterface->osCpInterface->IsCpEnabled() && m_skipFrameBasedHWCounterRead == false) { eStatus = ReadCounterValue(index, encodeStatusReport); if (MOS_STATUS_SUCCESS != eStatus) { return eStatus; } } if (encodeStatusReport->bitstreamSize > m_bitstreamUpperBound) { encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; encodeStatusReport->bitstreamSize = 0; CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!"); return MOS_STATUS_NOT_ENOUGH_BUFFER; } if(refList && refList->bMADEnabled) { // set lock flag to READ_ONLY MOS_LOCK_PARAMS lockFlags; MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); lockFlags.ReadOnly = 1; uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource( m_osInterface, &m_resMadDataBuffer[refList->ucMADBufferIdx], &lockFlags); CODECHAL_ENCODE_CHK_NULL_RETURN(data); eStatus = MOS_SecureMemcpy( &encodeStatusReport->MAD, CODECHAL_MAD_BUFFER_SIZE, data, CODECHAL_MAD_BUFFER_SIZE); if(eStatus != MOS_STATUS_SUCCESS) { CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory."); return eStatus; } m_osInterface->pfnUnlockResource( m_osInterface, &m_resMadDataBuffer[refList->ucMADBufferIdx]); // The driver needs to divide the output distortion by 4 before sending to the app encodeStatusReport->MAD /= 4; } else { encodeStatusReport->MAD = 0; } CODECHAL_DEBUG_TOOL( CODEC_REF_LIST currRefList = *refList; currRefList.RefPic = encodeStatusReport->CurrOriginalPic; m_statusReportDebugInterface->m_currPic = encodeStatusReport->CurrOriginalPic; m_statusReportDebugInterface->m_bufferDumpFrameNum = encodeStatus->dwStoredData; m_statusReportDebugInterface->m_frameType = encodeStatus->wPictureCodingType; if (!m_vdencEnabled) { if (currRefList.bMADEnabled) { CODECHAL_ENCODE_CHK_STATUS_RETURN( m_statusReportDebugInterface->DumpBuffer( &m_resMadDataBuffer[currRefList.ucMADBufferIdx], CodechalDbgAttr::attrInput, "MADWrite", CODECHAL_MAD_BUFFER_SIZE, 0, CODECHAL_MEDIA_STATE_ENC_NORMAL)); } DumpMbEncPakOutput(refList, m_statusReportDebugInterface); } if (CodecHalUsesVideoEngine(m_codecFunction)) { /* Only where the MFX engine is used the bitstream surface will be available */ CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpBuffer( &currRefList.resBitstreamBuffer, CodechalDbgAttr::attrBitstream, "_PAK", encodeStatusReport->bitstreamSize, 0, CODECHAL_NUM_MEDIA_STATES)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpData( encodeStatusReport, sizeof(EncodeStatusReport), CodechalDbgAttr::attrStatusReport, "EncodeStatusReport_Buffer")); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpEncodeStatusReport( encodeStatusReport)); CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpFrameStatsBuffer(m_statusReportDebugInterface)); if (m_vdencEnabled) { /*CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeVdencOutputs( m_debugInterface, pEncoder)); if (m_cmdGenHucUsed) { CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeHucCmdGen( m_debugInterface, pEncoder)); }*/ } } if (currRefList.b32xScalingUsed) { m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaled32xBottomFieldOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( m_trackedBuf->Get32xDsSurface(currRefList.ucScalingIdx), CodechalDbgAttr::attrReconstructedSurface, "32xScaledSurf")) } if (currRefList.b2xScalingUsed) // Currently only used for Gen10 Hevc Encode { m_statusReportDebugInterface->m_scaledBottomFieldOffset = 0; // No bottom field offset for Hevc CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( m_trackedBuf->Get2xDsSurface(currRefList.ucScalingIdx), CodechalDbgAttr::attrReconstructedSurface, "2xScaledSurf")) } if (currRefList.b16xScalingUsed) { m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaled16xBottomFieldOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( m_trackedBuf->Get16xDsSurface(currRefList.ucScalingIdx), CodechalDbgAttr::attrReconstructedSurface, "16xScaledSurf")) } if (currRefList.b4xScalingUsed) { m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaledBottomFieldOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( m_trackedBuf->Get4xDsSurface(currRefList.ucScalingIdx), CodechalDbgAttr::attrReconstructedSurface, "4xScaledSurf")) } if (!(m_codecFunction == CODECHAL_FUNCTION_ENC || m_codecFunction == CODECHAL_FUNCTION_FEI_ENC)) { if (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK) { m_statusReportDebugInterface->m_hybridPakP1 = false; } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface( &currRefList.sRefReconBuffer, CodechalDbgAttr::attrReconstructedSurface, "ReconSurf")) } if (currRefList.bUsedAsRef && m_vdencMvTemporalBufferSize) { uint8_t coloc_idx = (m_currRefList->bIsIntra) ? CODEC_CURR_TRACKED_BUFFER : currRefList.ucScalingIdx; auto coloc_buffer = m_trackedBuf->GetMvTemporalBuffer(coloc_idx); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpBuffer( coloc_buffer, CodechalDbgAttr::attrMvData, "_CoLocated_Out", m_vdencMvTemporalBufferSize, 0, CODECHAL_NUM_MEDIA_STATES)); }) } CODECHAL_ENCODE_VERBOSEMESSAGE("Incrementing reports generated to %d.", (reportsGenerated + 1)); reportsGenerated++; } else { //update GPU status, and skip the hang frame if(m_osInterface->pfnIsGPUHung(m_osInterface)) { *(encodeStatusBuf->pData) += 1; reportsGenerated++; } CODECHAL_ENCODE_VERBOSEMESSAGE("Status buffer %d is INCOMPLETE.", i); encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE; } codecStatus[i] = *encodeStatusReport; NullHW::StatusReport(m_osInterface, (uint32_t &)codecStatus[i].CodecStatus, codecStatus[i].bitstreamSize); } encodeStatusBuf->wFirstIndex = (encodeStatusBuf->wFirstIndex + reportsGenerated) % CODECHAL_ENCODE_STATUS_NUM; CODECHAL_ENCODE_VERBOSEMESSAGE("wFirstIndex now becomes %d.", encodeStatusBuf->wFirstIndex); return eStatus; } MOS_STATUS CodechalEncoderState::GetStatusReportCommon( EncodeStatus* encodeStatus, EncodeStatusReport* encodeStatusReport) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL; encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted; // dwHeaderBytesInserted is for WAAVCSWHeaderInsertion // and is 0 otherwise encodeStatusReport->QpY = encodeStatus->BrcQPReport.DW0.QPPrimeY; encodeStatusReport->SuggestedQpYDelta = encodeStatus->ImageStatusCtrl.CumulativeSliceDeltaQP; encodeStatusReport->NumberPasses = (uint8_t)(encodeStatus->ImageStatusCtrl.TotalNumPass + 1); encodeStatusReport->SceneChangeDetected = (encodeStatus->dwSceneChangedFlag & CODECHAL_ENCODE_SCENE_CHANGE_DETECTED_MASK) ? 1 : 0; CODECHAL_ENCODE_CHK_NULL_RETURN(m_skuTable); if (m_picWidthInMb != 0 && m_frameFieldHeightInMb != 0) { encodeStatusReport->AverageQp = (unsigned char)(((uint32_t)encodeStatus->QpStatusCount.cumulativeQP) / (m_picWidthInMb * m_frameFieldHeightInMb)); } encodeStatusReport->PanicMode = encodeStatus->ImageStatusCtrl.Panic; // If Num slices is greater than spec limit set NumSlicesNonCompliant to 1 and report error PMHW_VDBOX_PAK_NUM_OF_SLICES numSlices = &encodeStatus->NumSlices; if (numSlices->NumberOfSlices > m_maxNumSlicesAllowed) { encodeStatusReport->NumSlicesNonCompliant = 1; } encodeStatusReport->NumberSlices = numSlices->NumberOfSlices; return eStatus; } //------------------------------------------------------------------------------ //| Purpose: Reports user feature keys used for encoding //| Return: N/A //------------------------------------------------------------------------------ MOS_STATUS CodechalEncoderState::UserFeatureKeyReport() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData; // Encode HW Walker Reporting userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_hwWalker; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); if (m_hwWalker) { // Encode HW Walker m_mode Reporting #if (_DEBUG || _RELEASE_INTERNAL) userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_walkerMode; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_MODE_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); #endif // _DEBUG || _RELEASE_INTERNAL } if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdown)) { // SliceShutdownEnable Reporting userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_sliceShutdownEnable; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_ENABLE_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); } #if (_DEBUG || _RELEASE_INTERNAL) // report encode CSC method if (m_cscDsState) { userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_cscDsState->CscMethod(); userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_CSC_METHOD_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.u32Data = (uint32_t)m_rawSurface.TileType; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_RAW_TILE_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.u32Data = (uint32_t)m_rawSurface.Format; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_RAW_FORMAT_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); } // Encode compute context Reporting userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_computeContextEnabled; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_COMPUTE_CONTEXT_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); // Single Task Phase support reporting userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; userFeatureWriteData.Value.i32Data = m_singleTaskPhaseSupported; userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID; MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, m_osInterface->pOsContext); #endif return eStatus; } MOS_STATUS CodechalEncoderState::SubmitCommandBuffer( PMOS_COMMAND_BUFFER cmdBuffer, bool bNullRendering) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); HalOcaInterface::On1stLevelBBEnd(*cmdBuffer, *m_osInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering)); return eStatus; } void CodechalEncoderState::MotionEstimationDisableCheck() { CODECHAL_ENCODE_FUNCTION_ENTER; if (m_downscaledWidth4x < m_minScaledDimension || m_downscaledWidthInMb4x < m_minScaledDimensionInMb || m_downscaledHeight4x < m_minScaledDimension || m_downscaledHeightInMb4x < m_minScaledDimensionInMb) { m_32xMeSupported = false; m_16xMeSupported = false; if (m_downscaledWidth4x < m_minScaledDimension || m_downscaledWidthInMb4x < m_minScaledDimensionInMb) { m_downscaledWidth4x = m_minScaledDimension; m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x); } if (m_downscaledHeight4x < m_minScaledDimension || m_downscaledHeightInMb4x < m_minScaledDimensionInMb) { m_downscaledHeight4x = m_minScaledDimension; m_downscaledHeightInMb4x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight4x); } } else if (m_downscaledWidth16x < m_minScaledDimension || m_downscaledWidthInMb16x < m_minScaledDimensionInMb || m_downscaledHeight16x < m_minScaledDimension || m_downscaledHeightInMb16x < m_minScaledDimensionInMb) { m_32xMeSupported = false; if (m_downscaledWidth16x < m_minScaledDimension || m_downscaledWidthInMb16x < m_minScaledDimensionInMb) { m_downscaledWidth16x = m_minScaledDimension; m_downscaledWidthInMb16x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x); } if (m_downscaledHeight16x < m_minScaledDimension || m_downscaledHeightInMb16x < m_minScaledDimensionInMb) { m_downscaledHeight16x = m_minScaledDimension; m_downscaledHeightInMb16x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight16x); } } else { if (m_downscaledWidth32x < m_minScaledDimension || m_downscaledWidthInMb32x < m_minScaledDimensionInMb) { m_downscaledWidth32x = m_minScaledDimension; m_downscaledWidthInMb32x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x); } if (m_downscaledHeight32x < m_minScaledDimension || m_downscaledHeightInMb32x < m_minScaledDimensionInMb) { m_downscaledHeight32x = m_minScaledDimension; m_downscaledHeightInMb32x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight32x); } } } MOS_STATUS CodechalEncoderState::SendPrologWithFrameTracking( PMOS_COMMAND_BUFFER cmdBuffer, bool frameTrackingRequested, MHW_MI_MMIOREGISTERS* mmioRegister) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface); // Send Start Marker command if (m_encodeParams.m_setMarkerEnabled) { CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMarkerCommand(cmdBuffer, MOS_RCS_ENGINE_USED(gpuContext))); } // initialize command buffer attributes cmdBuffer->Attributes.bTurboMode = m_hwInterface->m_turboMode; cmdBuffer->Attributes.bMediaPreemptionEnabled = MOS_RCS_ENGINE_USED(gpuContext) ? m_renderEngineInterface->IsPreemptionEnabled() : 0; cmdBuffer->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices; cmdBuffer->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices; cmdBuffer->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus; cmdBuffer->Attributes.bValidPowerGatingRequest = true; if (frameTrackingRequested && m_frameTrackingEnabled) { cmdBuffer->Attributes.bEnableMediaFrameTracking = true; cmdBuffer->Attributes.resMediaFrameTrackingSurface = &m_encodeStatusBuf.resStatusBuffer; cmdBuffer->Attributes.dwMediaFrameTrackingTag = m_storeData; // Set media frame tracking address offset(the offset from the encoder status buffer page) cmdBuffer->Attributes.dwMediaFrameTrackingAddrOffset = 0; } #ifdef _MMC_SUPPORTED CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext)); #endif MHW_GENERIC_PROLOG_PARAMS genericPrologParams; MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); genericPrologParams.pOsInterface = m_osInterface; genericPrologParams.pvMiInterface = m_miInterface; genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false; genericPrologParams.dwStoreDataValue = m_storeData - 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(cmdBuffer, &genericPrologParams, mmioRegister)); // Send predication command if (m_encodeParams.m_predicationEnabled) { CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPredicationCommand(cmdBuffer)); } return eStatus; } MOS_STATUS CodechalEncoderState::SendPredicationCommand( PMOS_COMMAND_BUFFER cmdBuffer) { CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_NULL_RETURN(m_miInterface); // Predication can be set based on the value of 64-bits within a buffer auto PreparePredicationBuf = [&](bool predicationNotEqualZero) { auto mmioRegistersMfx = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); // load presPredication to general purpose register0 MHW_MI_LOAD_REGISTER_MEM_PARAMS loadRegisterMemParams; MOS_ZeroMemory(&loadRegisterMemParams, sizeof(loadRegisterMemParams)); loadRegisterMemParams.presStoreBuffer = m_encodeParams.m_presPredication; loadRegisterMemParams.dwOffset = (uint32_t)m_encodeParams.m_predicationResOffset; loadRegisterMemParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, &loadRegisterMemParams)); MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegisterImmParams; MOS_ZeroMemory(&loadRegisterImmParams, sizeof(loadRegisterImmParams)); loadRegisterImmParams.dwData = 0; loadRegisterImmParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &loadRegisterImmParams)); MOS_ZeroMemory(&loadRegisterMemParams, sizeof(loadRegisterMemParams)); loadRegisterMemParams.presStoreBuffer = m_encodeParams.m_presPredication; loadRegisterMemParams.dwOffset = (uint32_t)m_encodeParams.m_predicationResOffset + sizeof(uint32_t); loadRegisterMemParams.dwRegister = mmioRegistersMfx->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd( cmdBuffer, &loadRegisterMemParams)); MOS_ZeroMemory(&loadRegisterImmParams, sizeof(loadRegisterImmParams)); loadRegisterImmParams.dwData = 0; loadRegisterImmParams.dwRegister = mmioRegistersMfx->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &loadRegisterImmParams)); //perform operation MHW_MI_MATH_PARAMS miMathParams; MHW_MI_ALU_PARAMS miAluParams[4]; MOS_ZeroMemory(&miMathParams, sizeof(miMathParams)); MOS_ZeroMemory(&miAluParams, sizeof(miAluParams)); // load srcA, reg0 miAluParams[0].AluOpcode = MHW_MI_ALU_LOAD; miAluParams[0].Operand1 = MHW_MI_ALU_SRCA; miAluParams[0].Operand2 = MHW_MI_ALU_GPREG0; // load srcB, reg4 miAluParams[1].AluOpcode = MHW_MI_ALU_LOAD; miAluParams[1].Operand1 = MHW_MI_ALU_SRCB; miAluParams[1].Operand2 = MHW_MI_ALU_GPREG4; // add srcA, srcB miAluParams[2].AluOpcode = predicationNotEqualZero ? MHW_MI_ALU_ADD : MHW_MI_ALU_OR; miAluParams[2].Operand1 = MHW_MI_ALU_SRCB; miAluParams[2].Operand2 = MHW_MI_ALU_GPREG4; // store reg0, ZF miAluParams[3].AluOpcode = MHW_MI_ALU_STORE; miAluParams[3].Operand1 = MHW_MI_ALU_GPREG0; miAluParams[3].Operand2 = predicationNotEqualZero ? MHW_MI_ALU_ZF : MHW_MI_ALU_ACCU; miMathParams.pAluPayload = miAluParams; miMathParams.dwNumAluParams = 4; // four ALU commands needed for this substract opertaion. see following ALU commands. CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd( cmdBuffer, &miMathParams)); // if MHW_MI_ALU_ZF, the zero flag will be 0xFFFFFFFF, else zero flag will be 0x0. // if MHW_MI_ALU_ACCU, the OR result directly copied MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); storeRegParams.presStoreBuffer = &m_predicationBuffer; storeRegParams.dwOffset = 0; storeRegParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd( cmdBuffer, &storeRegParams)); CODECHAL_ENCODE_CHK_NULL_RETURN(m_encodeParams.m_tempPredicationBuffer); *m_encodeParams.m_tempPredicationBuffer = &m_predicationBuffer; return MOS_STATUS_SUCCESS; }; CODECHAL_ENCODE_CHK_STATUS_RETURN(PreparePredicationBuf(m_encodeParams.m_predicationNotEqualZero)); MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS condBBEndParams; MOS_ZeroMemory(&condBBEndParams, sizeof(condBBEndParams)); condBBEndParams.presSemaphoreBuffer = &m_predicationBuffer; condBBEndParams.dwOffset = 0; condBBEndParams.dwValue = 0; condBBEndParams.bDisableCompareMask = true; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd( cmdBuffer, &condBBEndParams)); return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::SendMarkerCommand( PMOS_COMMAND_BUFFER cmdBuffer, bool isRender) { CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); CODECHAL_ENCODE_CHK_NULL_RETURN(m_miInterface); PMOS_RESOURCE presSetMarker = (PMOS_RESOURCE) m_encodeParams.m_presSetMarker; if (Mos_ResourceIsNull(presSetMarker)) { return MOS_STATUS_SUCCESS; } if (isRender) { // Send pipe_control to get the timestamp MHW_PIPE_CONTROL_PARAMS pipeControlParams; MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams)); pipeControlParams.presDest = presSetMarker; pipeControlParams.dwResourceOffset = 0; pipeControlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG; pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(cmdBuffer, NULL, &pipeControlParams)); } else { // Send flush_dw to get the timestamp MHW_MI_FLUSH_DW_PARAMS flushDwParams; MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); flushDwParams.pOsResource = presSetMarker; flushDwParams.dwResourceOffset = 0; flushDwParams.postSyncOperation = MHW_FLUSH_WRITE_TIMESTAMP_REG; flushDwParams.bQWordEnable = 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::UpdateCmdBufAttribute( PMOS_COMMAND_BUFFER cmdBuffer, bool renderEngineInUse) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; return eStatus; } MOS_STATUS CodechalEncoderState::ExecuteEnc( EncoderParams* encodeParams) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); if (m_mfeEnabled == false || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_ENC || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_FEI_ENC) { MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_START, &encodeParams->ExecCodecFunction, sizeof(encodeParams->ExecCodecFunction), nullptr, 0); // No need to wait if the driver is executing on a simulator EncodeStatusBuffer* pencodeStatusBuf = CodecHalUsesOnlyRenderEngine(m_codecFunction) ? &m_encodeStatusBufRcs : &m_encodeStatusBuf; if (!m_osInterface->bSimIsActive && m_recycledBufStatusNum[m_currRecycledBufIdx] > *(pencodeStatusBuf->pData)) { uint32_t waitMs; // Wait for Batch Buffer complete event OR timeout for (waitMs = MHW_TIMEOUT_MS_DEFAULT; waitMs > 0; waitMs -= MHW_EVENT_TIMEOUT_MS) { if (m_recycledBufStatusNum[m_currRecycledBufIdx] <= *(pencodeStatusBuf->pData)) { break; } MosUtilities::MosSleep(MHW_EVENT_TIMEOUT_MS); } CODECHAL_ENCODE_VERBOSEMESSAGE("Waited for %d ms", (MHW_TIMEOUT_MS_DEFAULT - waitMs)); if (m_recycledBufStatusNum[m_currRecycledBufIdx] > *(pencodeStatusBuf->pData)) { CODECHAL_ENCODE_ASSERTMESSAGE("No recycled buffers available, wait timed out at %d ms!", MHW_TIMEOUT_MS_DEFAULT); CODECHAL_ENCODE_ASSERTMESSAGE("m_storeData = %d, m_recycledBufStatusNum[%d] = %d, data = %d", m_storeData, m_currRecycledBufIdx, m_recycledBufStatusNum[m_currRecycledBufIdx], *(pencodeStatusBuf->pData)); return MOS_STATUS_CLIENT_AR_NO_SPACE; } } m_recycledBufStatusNum[m_currRecycledBufIdx] = m_storeData; // These parameters are updated at the DDI level if (encodeParams->bMbDisableSkipMapEnabled) { CodecHalGetResourceInfo(m_osInterface, encodeParams->psMbDisableSkipMapSurface); } CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->psRawSurface); CodecHalGetResourceInfo(m_osInterface, encodeParams->psRawSurface); if (encodeParams->bMbQpDataEnabled) { CodecHalGetResourceInfo(m_osInterface, encodeParams->psMbQpDataSurface); } if (m_standard != CODECHAL_JPEG) { CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->psReconSurface); CodecHalGetResourceInfo(m_osInterface, encodeParams->psReconSurface); } m_encodeParams = *encodeParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->UpdateParams(true)); if (CodecHalUsesVideoEngine(m_codecFunction)) { // Get resource details of the bitstream resource MOS_SURFACE details; MOS_ZeroMemory(&details, sizeof(details)); details.Format = Format_Invalid; CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->presBitstreamBuffer); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, encodeParams->presBitstreamBuffer, &details)); m_encodeParams.dwBitstreamSize = details.dwHeight * details.dwWidth; } m_osInterface->pfnIncPerfFrameID(m_osInterface); // init function common to all codecs, before encode each frame CODECHAL_ENCODE_CHK_STATUS_RETURN(InitCommon()); CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(InitializePicture(m_encodeParams), "Encoding initialization failed."); if (m_newSeq) { CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckResChangeAndCsc()); } if (FRAME_SKIP_NORMAL == m_skipFrameFlag) { if (m_standard == CODECHAL_MPEG2) { CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(EncodeCopySkipFrame(), "Skip-frame failed.\n"); m_skipFrameFlag = FRAME_NO_SKIP; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ResetStatusReport(), "Flushing encode status buffer for skipped frame failed.\n"); m_firstFrame = false; return eStatus; } } MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; syncParams.bReadOnly = true; // Synchronize MB QP data surface resource if any. if (encodeParams->bMbQpDataEnabled) { syncParams.presSyncResource = &encodeParams->psMbQpDataSurface->OsResource; syncParams.GpuContext = m_renderContext; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); } // Check if source surface needs to be synchronized and should wait for decode or VPP or any other context syncParams.presSyncResource = &m_rawSurface.OsResource; if (m_hwInterface->UsesRenderEngine(m_codecFunction, m_standard) && m_firstField) { syncParams.GpuContext = m_renderContext; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); if (CodecHalUsesVideoEngine(m_codecFunction)) { // Perform Sync on PAK context if it is not ENC only case. // This is done to set the read mask for PAK context for on demand sync syncParams.GpuContext = m_videoContext; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); } // Update the resource tag (s/w tag) for On-Demand Sync // set the tag on render context for ENC case only, else set it on video context for ENC+PAK case m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams); } else if (CodecHalUsesVideoEngine(m_codecFunction)) { // Perform resource sync for encode uses only video engine syncParams.GpuContext = m_videoContext; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams); } CODECHAL_ENCODE_CHK_NULL_RETURN(m_currRefList); if (CODECHAL_JPEG != m_standard && m_firstField) { for (int i = 0; i < m_currRefList->ucNumRef; i++) { CODECHAL_ENCODE_CHK_COND_RETURN( m_currReconstructedPic.FrameIdx == m_currRefList->RefList[i].FrameIdx, "the same frame (FrameIdx = %d) cannot be used as both Recon surface and ref frame", m_currReconstructedPic.FrameIdx); } // clear flags m_currRefList->b2xScalingUsed = m_currRefList->b4xScalingUsed = m_currRefList->b16xScalingUsed = m_currRefList->b32xScalingUsed = false; // allocate tracked buffer for current frame CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateForCurrFrame()); m_currRefList->ucScalingIdx = m_trackedBuf->GetCurrIndex(); if (m_trackedBuf->IsMbCodeAllocationNeeded()) { // MbCode/MvData buffer can be tracked using the same index as DS surface m_currRefList->ucMbCodeIdx = m_currMbCodeIdx = m_trackedBuf->GetCurrIndexMbCode(); m_resMbCodeSurface = m_currRefList->resRefMbCodeBuffer = *m_trackedBuf->GetCurrMbCodeBuffer(); if (m_trackedBuf->GetCurrMvDataBuffer()) { m_resMvDataSurface = m_currRefList->resRefMvDataBuffer = *m_trackedBuf->GetCurrMvDataBuffer(); } } else { CODECHAL_ENCODE_NORMALMESSAGE("App provides MbCode and MvData buffer!"); if(CODECHAL_AVC == m_standard) { m_currRefList->resRefMbCodeBuffer = m_resMbCodeSurface; m_currRefList->resRefMvDataBuffer = m_resMvDataSurface; } } m_trackedBuf->SetAllocationFlag(false); } if (m_hwInterface->UsesRenderEngine(m_codecFunction, m_standard)) { // set render engine context m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext); m_osInterface->pfnResetOsStates(m_osInterface); // set all status reports to completed state InitStatusReport(); // on-demand sync for tracked buffer syncParams = g_cInitSyncParams; syncParams.GpuContext = m_renderContext; syncParams.bReadOnly = false; if (m_trackedBuf->GetWait() && !Mos_ResourceIsNull(&m_resMbCodeSurface)) { syncParams.presSyncResource = &m_resMbCodeSurface; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams); } // Call ENC Kernels CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecuteKernelFunctions(), "ENC failed."); } MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_END, nullptr, 0, nullptr, 0); } if (m_mfeEnabled == false || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_PAK || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_FEI_PAK) { MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_START, &encodeParams->ExecCodecFunction, sizeof(encodeParams->ExecCodecFunction), nullptr, 0); CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_PreProcessEncode(m_osInterface, &m_resBitstreamBuffer, &m_reconSurface)); if (CodecHalUsesVideoEngine(m_codecFunction)) { // Set to video context m_osInterface->pfnSetGpuContext(m_osInterface, m_videoContext); m_osInterface->pfnResetOsStates(m_osInterface); m_currPass = 0; for (m_currPass = 0; m_currPass <= m_numPasses; m_currPass++) { m_firstTaskInPhase = (m_currPass == 0); m_lastTaskInPhase = (m_currPass == m_numPasses); if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); // Setup picture level PAK commands CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecutePictureLevel(), "Picture level encoding failed."); // Setup slice level PAK commands CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecuteSliceLevel(), "Slice level encoding failed."); m_lastTaskInPhase = false; } } m_prevRawSurface = *m_rawSurfaceToPak; // User Feature Key Reporting - only happens after first frame if (m_firstFrame == true) { CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(UserFeatureKeyReport(), "Reporting user feature keys failed."); } m_currRecycledBufIdx = (m_currRecycledBufIdx + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; if (m_currRecycledBufIdx == 0) { MOS_ZeroMemory(m_recycledBufStatusNum, sizeof(m_recycledBufStatusNum)); } m_currLaDataIdx = (m_currLaDataIdx + 1) % m_numLaDataEntry; // Flush encode eStatus buffer CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ResetStatusReport(), "Flushing encode eStatus buffer failed."); if (m_firstFrame == false && m_firstTwoFrames == true) { m_firstTwoFrames = false; } m_firstFrame = false; CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_resBitstreamBuffer, &m_reconSurface)); MOS_TraceEventExt(EVENT_CODECHAL_EXECUTE, EVENT_TYPE_END, nullptr, 0, nullptr, 0); } return eStatus; } uint8_t CodechalEncoderState::GetNumBrcPakPasses(uint16_t usBRCPrecision) { uint8_t numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES; switch (usBRCPrecision) { case 0: case 2: numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES; break; case 1: numBRCPAKPasses = CODECHAL_ENCODE_BRC_MINIMUM_NUM_PASSES; break; case 3: numBRCPAKPasses = CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES; break; default: CODECHAL_ENCODE_ASSERT("Invalid BRC Precision value in Pic Params."); numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES; break; } return numBRCPAKPasses; } CodechalEncoderGenState::CodechalEncoderGenState(CodechalEncoderState* encoder) { CODECHAL_ENCODE_ASSERT(encoder); m_encoder = encoder; m_hwInterface = encoder->GetHwInterface(); m_osInterface = encoder->GetOsInterface(); m_miInterface = encoder->m_miInterface; m_renderEngineInterface = encoder->m_renderEngineInterface; m_stateHeapInterface = encoder->m_stateHeapInterface; } CodechalEncoderState::CodechalEncoderState( CodechalHwInterface* hwInterface, CodechalDebugInterface* debugInterface, PCODECHAL_STANDARD_INFO standardInfo): Codechal((hwInterface==nullptr) ?nullptr:hwInterface->m_hwInterfaceNext, debugInterface) { m_hwInterface = hwInterface; pfnGetKernelHeaderAndSize = nullptr; // Add Null checks here for all interfaces. CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_hwInterface); m_mfxInterface = m_hwInterface->GetMfxInterface(); m_hcpInterface = m_hwInterface->GetHcpInterface(); m_hucInterface = m_hwInterface->GetHucInterface(); m_vdencInterface = m_hwInterface->GetVdencInterface(); m_miInterface = hwInterface->GetMiInterface(); m_renderEngineInterface = hwInterface->GetRenderInterface(); CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_renderEngineInterface); m_stateHeapInterface = m_renderEngineInterface->m_stateHeapInterface; CODECHAL_ENCODE_ASSERT(m_renderEngineInterface->GetHwCaps()); m_osInterface = hwInterface->GetOsInterface(); CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface); m_userSettingPtr = m_osInterface->pfnGetUserSettingInstance(m_osInterface); m_osInterface->pfnGetPlatform(m_osInterface, &m_platform); m_skuTable = m_osInterface->pfnGetSkuTable(m_osInterface); m_waTable = m_osInterface->pfnGetWaTable(m_osInterface); m_gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface); m_videoGpuNode = MOS_GPU_NODE_MAX; m_renderContext = MOS_GPU_CONTEXT_INVALID_HANDLE; m_videoContext = MOS_GPU_CONTEXT_INVALID_HANDLE; m_vdencEnabled = CodecHalUsesVdencEngine(standardInfo->CodecFunction); m_codecFunction = standardInfo->CodecFunction; m_vdencMeKernelState = MHW_KERNEL_STATE(); m_vdencStreaminKernelState = MHW_KERNEL_STATE(); m_vdencMeKernelStateRAB = MHW_KERNEL_STATE(); m_vdencStreaminKernelStateRAB = MHW_KERNEL_STATE(); for (auto i = 0; i < CODEC_NUM_FIELDS_PER_FRAME; i++) { m_scaling2xKernelStates[i] = MHW_KERNEL_STATE(); m_scaling4xKernelStates[i] = MHW_KERNEL_STATE(); } for (auto i = 0; i < CODECHAL_ENCODE_ME_IDX_NUM; i++) { m_meKernelStates[i] = MHW_KERNEL_STATE(); } MOS_ZeroMemory(&m_encodeParams, sizeof(m_encodeParams)); MOS_ZeroMemory(&m_resHwCount, sizeof(m_resHwCount)); MOS_ZeroMemory(&m_rawSurface, sizeof(m_rawSurface)); // Pointer to MOS_SURFACE of raw surface MOS_ZeroMemory(&m_reconSurface, sizeof(m_reconSurface)); // Pointer to MOS_SURFACE of reconstructed surface MOS_ZeroMemory(&m_resBitstreamBuffer, sizeof(m_resBitstreamBuffer)); // Pointer to MOS_SURFACE of bitstream surface MOS_ZeroMemory(&m_resMbCodeSurface, sizeof(m_resMbCodeSurface)); // Pointer to MOS_SURFACE of MbCode surface MOS_ZeroMemory(&m_resMvDataSurface, sizeof(m_resMvDataSurface)); // Pointer to MOS_SURFACE of MvData surface MOS_ZeroMemory(&m_resSyncObjectRenderContextInUse, sizeof(m_resSyncObjectRenderContextInUse)); MOS_ZeroMemory(&m_resSyncObjectVideoContextInUse, sizeof(m_resSyncObjectVideoContextInUse)); MOS_ZeroMemory(&m_encodeStatusBuf, sizeof(m_encodeStatusBuf)); // Stores all the status_query related data for PAK engine MOS_ZeroMemory(&m_encodeStatusBufRcs, sizeof(m_encodeStatusBufRcs)); // Stores all the status_query related data for render ring (RCS) MOS_ZeroMemory(&m_imgStatusControlBuffer, sizeof(m_imgStatusControlBuffer)); // Stores image eStatus control data MOS_ZeroMemory(&m_atomicScratchBuf, sizeof(m_atomicScratchBuf)); // Stores atomic operands and result MOS_ZeroMemory(&m_bsBuffer, sizeof(m_bsBuffer)); MOS_ZeroMemory(&m_resVdencCmdInitializerDmemBuffer, sizeof(m_resVdencCmdInitializerDmemBuffer)); MOS_ZeroMemory(&m_resVdencCmdInitializerDataBuffer, sizeof(m_resVdencCmdInitializerDataBuffer)); MOS_ZeroMemory(&m_resDistortionBuffer, sizeof(m_resDistortionBuffer)); // MBEnc Distortion Buffer for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++) { MOS_ZeroMemory(&m_resMadDataBuffer[i], sizeof(m_resMadDataBuffer[i])); // Buffers to store Mean of Absolute Differences } for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { MOS_ZeroMemory(&m_sliceMapSurface[i], sizeof(m_sliceMapSurface[i])); } for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { MOS_ZeroMemory(&m_resVdencStreamInBuffer[i], sizeof(m_resVdencStreamInBuffer[i])); } MOS_ZeroMemory(&m_resPakMmioBuffer, sizeof(m_resPakMmioBuffer)); MOS_ZeroMemory(&m_resHucErrorStatusBuffer, sizeof(m_resHucErrorStatusBuffer)); MOS_ZeroMemory(&m_resHucStatus2Buffer, sizeof(m_resHucStatus2Buffer)); MOS_ZeroMemory(&m_resHucFwBuffer, sizeof(m_resHucFwBuffer)); MOS_ZeroMemory(&m_resDeblockingFilterRowStoreScratchBuffer, sizeof(m_resDeblockingFilterRowStoreScratchBuffer)); // Handle of deblock row store surface MOS_ZeroMemory(&m_resMPCRowStoreScratchBuffer, sizeof(m_resMPCRowStoreScratchBuffer)); // Handle of mpc row store surface for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) { MOS_ZeroMemory(&m_resStreamOutBuffer[i], sizeof(m_resStreamOutBuffer[i])); // Handle of streamout data surface } MOS_ZeroMemory(&m_scaling4xBindingTable, sizeof(m_scaling4xBindingTable)); MOS_ZeroMemory(&m_scaling2xBindingTable, sizeof(m_scaling2xBindingTable)); for (auto i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++) { MOS_ZeroMemory(&m_scalingBBUF[i], sizeof(m_scalingBBUF[i])); // This Batch Buffer is used for scaling kernel. } MOS_ZeroMemory(&m_flatnessCheckSurface, sizeof(m_flatnessCheckSurface)); MOS_ZeroMemory(&m_resMbStatisticsSurface, sizeof(m_resMbStatisticsSurface)); MOS_ZeroMemory(&m_resMbStatsBuffer, sizeof(m_resMbStatsBuffer)); MOS_ZeroMemory(&m_meBindingTable, sizeof(m_meBindingTable)); MOS_ZeroMemory(&m_vdencMeKernelBindingTable, sizeof(m_vdencMeKernelBindingTable)); MOS_ZeroMemory(&m_vdencStreaminKernelBindingTable, sizeof(m_vdencStreaminKernelBindingTable)); } CodechalEncoderState::~CodechalEncoderState() { if (m_gpuCtxCreatOpt) { MOS_Delete(m_gpuCtxCreatOpt); m_gpuCtxCreatOpt = nullptr; } DestroyMDFResources(); if (m_perfProfiler) { MediaPerfProfiler::Destroy(m_perfProfiler, (void*)this, m_osInterface); m_perfProfiler = nullptr; } // Destroy HW interface objects (GSH, SSH, etc) if (m_hwInterface != nullptr) { MOS_Delete(m_hwInterface); m_hwInterface = nullptr; Codechal::m_hwInterface = nullptr; } } MOS_STATUS CodechalEncoderState::SetupWalkerContext( MOS_COMMAND_BUFFER* cmdBuffer, SendKernelCmdsParams* params) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_CHK_NULL_RETURN(params); CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState); MOS_RESOURCE* dsh = params->pKernelState->m_dshRegion.GetResource(); CODECHAL_ENCODE_CHK_NULL_RETURN(dsh); // Add Media VFE command CODECHAL_ENCODE_CHK_STATUS_RETURN(AddMediaVfeCmd(cmdBuffer, params)); // Add Media Curbe Load command if (params->pKernelState->KernelParams.iCurbeLength) { MHW_CURBE_LOAD_PARAMS curbeLoadParams; MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams)); curbeLoadParams.pKernelState = params->pKernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaCurbeLoadCmd(cmdBuffer, &curbeLoadParams)); HalOcaInterface::OnIndirectState( *cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, dsh, params->pKernelState->m_dshRegion.GetOffset() + params->pKernelState->dwCurbeOffset, false, params->pKernelState->KernelParams.iCurbeLength); } uint32_t InterfaceDescriptorTotalLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); uint32_t InterfaceDescriptorDataStartOffset = MOS_ALIGN_CEIL( params->pKernelState->m_dshRegion.GetOffset() + params->pKernelState->dwIdOffset, m_stateHeapInterface->pStateHeapInterface->GetIdAlignment()); // Media_State_Flush should be used before MEDIA_INTERFACE_DESCRIPTOR_LOAD to ensure that the temporary Interface Descriptor storage is cleared MHW_MEDIA_STATE_FLUSH_PARAM mediaStateFlushParams; MOS_ZeroMemory(&mediaStateFlushParams, sizeof(mediaStateFlushParams)); mediaStateFlushParams.bFlushToGo = true; mediaStateFlushParams.ui8InterfaceDescriptorOffset = (uint8_t)InterfaceDescriptorDataStartOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMediaStateFlush(cmdBuffer, nullptr, &mediaStateFlushParams)); MHW_ID_LOAD_PARAMS idLoadParams; MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams)); idLoadParams.pKernelState = params->pKernelState; idLoadParams.dwNumKernelsLoaded = 1; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaIDLoadCmd(cmdBuffer, &idLoadParams)); HalOcaInterface::OnIndirectState( *cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, dsh, InterfaceDescriptorDataStartOffset, false, InterfaceDescriptorTotalLength); return eStatus; } MOS_STATUS CodechalEncoderState::ResolveMetaData( PMOS_RESOURCE pHwLayoutMetaData, PMOS_RESOURCE pResolvedLayoutMetadata) { CODECHAL_ENCODE_FUNCTION_ENTER; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MOS_COMMAND_BUFFER cmdBuffer; MOS_ZeroMemory(&cmdBuffer, sizeof(cmdBuffer)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); MHW_MI_COPY_MEM_MEM_PARAMS CpyParams; CpyParams.presSrc = pHwLayoutMetaData; CpyParams.presDst = pResolvedLayoutMetadata; int bufSize = m_metaDataOffset.dwMetaDataSize + m_numSlices * m_metaDataOffset.dwMetaDataSubRegionSize; for (int i = 0; i < bufSize; i = i + 4) { CpyParams.dwSrcOffset = i; CpyParams.dwDstOffset = i; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &CpyParams)); } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, false)); return eStatus; } MOS_STATUS CodechalEncoderState::ReportErrorFlag( PMOS_RESOURCE pMetadataBuffer, uint32_t size, uint32_t offset, uint32_t flag) { CODECHAL_ENCODE_FUNCTION_ENTER; m_metaDataOffset.dwMetaDataSize = size; // init common MOS_COMMAND_BUFFER cmdBuffer; MOS_ZeroMemory(&cmdBuffer, sizeof(cmdBuffer)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); MHW_MI_STORE_DATA_PARAMS storeDataParams; MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); // Report error flags to metadata buffer storeDataParams.pOsResource = pMetadataBuffer; storeDataParams.dwResourceOffset = offset; storeDataParams.dwValue = flag; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, false)); return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::StoreHuCStatus2Report(PMOS_COMMAND_BUFFER cmdBuffer) { CODECHAL_ENCODE_FUNCTION_ENTER; #if (_DEBUG || _RELEASE_INTERNAL) if (m_swBrcMode != nullptr) { // Skip check if SW BRC DLL path return MOS_STATUS_SUCCESS; } #endif // _DEBUG || _RELEASE_INTERNAL CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf; uint32_t baseOffset = (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource // store HUC_STATUS2 register MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer; storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatus2RegOffset; storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(m_vdboxIndex)->hucStatus2RegOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams)); return MOS_STATUS_SUCCESS; } #if USE_CODECHAL_DEBUG_TOOL MOS_STATUS CodechalEncoderState::DumpMbEncPakOutput(PCODEC_REF_LIST currRefList, CodechalDebugInterface* debugInterface) { CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(currRefList); CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( &currRefList->resRefMbCodeBuffer, CodechalDbgAttr::attrOutput, "MbCode", m_picWidthInMb * m_frameFieldHeightInMb * 64, CodecHal_PictureIsBottomField(currRefList->RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0, (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); if (m_mvDataSize) { CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( &currRefList->resRefMvDataBuffer, CodechalDbgAttr::attrOutput, "MbData", m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4), CodecHal_PictureIsBottomField(currRefList->RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0, (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); } if (CodecHalIsFeiEncode(m_codecFunction)) { CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( &m_resDistortionBuffer, CodechalDbgAttr::attrOutput, "DistortionSurf", m_picWidthInMb * m_frameFieldHeightInMb * 48, CodecHal_PictureIsBottomField(currRefList->RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0, (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); } return MOS_STATUS_SUCCESS; } MOS_STATUS CodechalEncoderState::AddBufferWithIMMValue( PMOS_COMMAND_BUFFER cmdBuffer, PMOS_RESOURCE presStoreBuffer, uint32_t offset, uint32_t value, bool bAdd) { MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams; MHW_MI_LOAD_REGISTER_REG_PARAMS LoadRegRegParams; MHW_MI_LOAD_REGISTER_IMM_PARAMS LoadRegisterImmParams; MHW_MI_FLUSH_DW_PARAMS FlushDwParams; MHW_MI_MATH_PARAMS MiMathParams; MHW_MI_ALU_PARAMS MiAluParams[4]; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) { CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } auto pMmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex); auto pMmioRegistersHcp = m_hcpInterface->GetMmioRegisters(m_vdboxIndex); MOS_ZeroMemory(&FlushDwParams, sizeof(FlushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &FlushDwParams)); MOS_ZeroMemory(&LoadRegRegParams, sizeof(LoadRegRegParams)); MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams)); miLoadRegMemParams.presStoreBuffer = presStoreBuffer; miLoadRegMemParams.dwOffset = offset; miLoadRegMemParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = value; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&MiMathParams, sizeof(MiMathParams)); MOS_ZeroMemory(&MiAluParams, sizeof(MiAluParams)); // load srcA, reg0 MiAluParams[0].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[0].Operand1 = MHW_MI_ALU_SRCA; MiAluParams[0].Operand2 = MHW_MI_ALU_GPREG0; // load srcB, reg4 MiAluParams[1].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[1].Operand1 = MHW_MI_ALU_SRCB; MiAluParams[1].Operand2 = MHW_MI_ALU_GPREG4; if (bAdd) { // add srcA, srcB MiAluParams[2].AluOpcode = MHW_MI_ALU_ADD; } else { // sub srcA, srcB MiAluParams[2].AluOpcode = MHW_MI_ALU_SUB; } // store reg0, ACCU MiAluParams[3].AluOpcode = MHW_MI_ALU_STORE; MiAluParams[3].Operand1 = MHW_MI_ALU_GPREG0; MiAluParams[3].Operand2 = MHW_MI_ALU_ACCU; MiMathParams.pAluPayload = MiAluParams; MiMathParams.dwNumAluParams = 4; // four ALU commands needed for this substract opertaion. see following ALU commands. CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd( cmdBuffer, &MiMathParams)); // update the value MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams)); StoreRegParams.presStoreBuffer = presStoreBuffer; StoreRegParams.dwOffset = offset; StoreRegParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &StoreRegParams)); return eStatus; } MOS_STATUS CodechalEncoderState::SetBufferWithIMMValueU16( PMOS_COMMAND_BUFFER cmdBuffer, PMOS_RESOURCE presStoreBuffer, uint32_t offset, uint32_t value, bool bSecond) { MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams; MHW_MI_LOAD_REGISTER_REG_PARAMS LoadRegRegParams; MHW_MI_LOAD_REGISTER_IMM_PARAMS LoadRegisterImmParams; MHW_MI_FLUSH_DW_PARAMS FlushDwParams; MHW_MI_MATH_PARAMS MiMathParams; MHW_MI_ALU_PARAMS MiAluParams[4]; // is used twice MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) { CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } auto pMmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex); auto pMmioRegistersHcp = m_hcpInterface->GetMmioRegisters(m_vdboxIndex); MOS_ZeroMemory(&FlushDwParams, sizeof(FlushDwParams)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &FlushDwParams)); MOS_ZeroMemory(&LoadRegRegParams, sizeof(LoadRegRegParams)); MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams)); miLoadRegMemParams.presStoreBuffer = presStoreBuffer; miLoadRegMemParams.dwOffset = offset; miLoadRegMemParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); uint32_t mask = bSecond ? 0xffff : 0xffff0000; value = bSecond ? value << 16 : value; MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = mask; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&MiMathParams, sizeof(MiMathParams)); MOS_ZeroMemory(&MiAluParams, sizeof(MiAluParams)); // load srcA, reg0 MiAluParams[0].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[0].Operand1 = MHW_MI_ALU_SRCA; MiAluParams[0].Operand2 = MHW_MI_ALU_GPREG0; // load srcB, reg4 MiAluParams[1].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[1].Operand1 = MHW_MI_ALU_SRCB; MiAluParams[1].Operand2 = MHW_MI_ALU_GPREG4; // and srcA, srcB MiAluParams[2].AluOpcode = MHW_MI_ALU_AND; // store reg0, ACCU MiAluParams[3].AluOpcode = MHW_MI_ALU_STORE; MiAluParams[3].Operand1 = MHW_MI_ALU_GPREG0; MiAluParams[3].Operand2 = MHW_MI_ALU_ACCU; MiMathParams.pAluPayload = MiAluParams; MiMathParams.dwNumAluParams = 4; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd( cmdBuffer, &MiMathParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = value; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4HiOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &LoadRegisterImmParams)); // load srcA, reg0 MiAluParams[0].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[0].Operand1 = MHW_MI_ALU_SRCA; MiAluParams[0].Operand2 = MHW_MI_ALU_GPREG0; // load srcB, reg4 MiAluParams[1].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[1].Operand1 = MHW_MI_ALU_SRCB; MiAluParams[1].Operand2 = MHW_MI_ALU_GPREG4; // or srcA, srcB MiAluParams[2].AluOpcode = MHW_MI_ALU_OR; // store reg0, ACCU MiAluParams[3].AluOpcode = MHW_MI_ALU_STORE; MiAluParams[3].Operand1 = MHW_MI_ALU_GPREG0; MiAluParams[3].Operand2 = MHW_MI_ALU_ACCU; MiMathParams.pAluPayload = MiAluParams; MiMathParams.dwNumAluParams = 4; // 4 ALU commands needed for this opertaion. see following ALU commands. CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd( cmdBuffer, &MiMathParams)); // update the value MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams)); StoreRegParams.presStoreBuffer = presStoreBuffer; StoreRegParams.dwOffset = offset; StoreRegParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &StoreRegParams)); return eStatus; } #endif