/* * Copyright (c) 2017-2020, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ //! //! \file codechal_encode_wp.cpp //! \brief Defines base class for weighted prediction kernel //! #include "codechal_encoder_base.h" #include "codechal_encode_wp.h" #include "hal_oca_interface.h" MOS_STATUS CodechalEncodeWP::AllocateResources() { CODECHAL_ENCODE_FUNCTION_ENTER; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; if (Mos_ResourceIsNull(&m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx].OsResource)) { MOS_ZeroMemory(&m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx], sizeof(MOS_SURFACE)); MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferNV12; MOS_ZeroMemory(&allocParamsForBufferNV12, sizeof(MOS_ALLOC_GFXRES_PARAMS)); allocParamsForBufferNV12.Type = MOS_GFXRES_2D; allocParamsForBufferNV12.TileType = MOS_TILE_Y; allocParamsForBufferNV12.Format = Format_NV12; allocParamsForBufferNV12.dwWidth = m_frameWidth; allocParamsForBufferNV12.dwHeight = m_frameHeight; allocParamsForBufferNV12.pBufName = "WP Scaled output Buffer"; CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( m_osInterface, &allocParamsForBufferNV12, &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx].OsResource), "Failed to allocate WP Scaled output Buffer."); CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(m_osInterface, &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx])); } return eStatus; } void CodechalEncodeWP::ReleaseResources() { for (auto i = 0; i < CODEC_NUM_WP_FRAME; i++) { if (!Mos_ResourceIsNull(&m_surfaceParams.weightedPredOutputPicList[i].OsResource)) { m_osInterface->pfnFreeResource( m_osInterface, &m_surfaceParams.weightedPredOutputPicList[i].OsResource); } } } uint8_t CodechalEncodeWP::GetBTCount() { return (uint8_t)wpNumSurfaces; } MOS_STATUS CodechalEncodeWP::InitKernelState() { CODECHAL_ENCODE_FUNCTION_ENTER; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; if (!m_kernelState) { CODECHAL_ENCODE_CHK_NULL_RETURN(m_kernelState = MOS_New(MHW_KERNEL_STATE)); } uint8_t* binary; CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize( m_kernelBase, m_kernelUID, &binary, &m_combinedKernelSize)); auto kernelSize = m_combinedKernelSize; CODECHAL_KERNEL_HEADER currKrnHeader; CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( binary, ENC_WP, 0, &currKrnHeader, &kernelSize)); m_kernelState->KernelParams.iBTCount = wpNumSurfaces; m_kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads; m_kernelState->KernelParams.iCurbeLength = m_curbeLength; m_kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH; m_kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT; m_kernelState->KernelParams.iIdCount = 1; m_kernelState->KernelParams.iInlineDataLength = 0; m_kernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); m_kernelState->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); m_kernelState->KernelParams.iSize = kernelSize; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested( m_kernelState->KernelParams.iBTCount, &m_kernelState->dwSshSize, &m_kernelState->dwBindingTableSize)); CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_kernelState)); return eStatus; } MOS_STATUS CodechalEncodeWP::SetCurbe() { CODECHAL_ENCODE_FUNCTION_ENTER; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CurbeData curbe; MOS_ZeroMemory(&curbe, sizeof(CurbeData)); /* Weights[i][j][k][m] is interpreted as: i refers to reference picture list 0 or 1; j refers to reference list entry 0-31; k refers to data for the luma (Y) component when it is 0, the Cb chroma component when it is 1 and the Cr chroma component when it is 2; m refers to weight when it is 0 and offset when it is 1 */ //C Model hard code log2WeightDenom = 6. No need to send WD paramters to WP Kernel. curbe.DW0.defaultWeight = m_curbeParams.slcParams->weights[m_curbeParams.refPicListIdx][m_curbeParams.wpIdx][0][0]; curbe.DW0.defaultOffset = m_curbeParams.slcParams->weights[m_curbeParams.refPicListIdx][m_curbeParams.wpIdx][0][1]; curbe.DW49.inputSurface = wpInputRefSurface; curbe.DW50.outputSurface = wpOutputScaledSurface; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_kernelState->m_dshRegion.AddData( &curbe, m_kernelState->dwCurbeOffset, sizeof(curbe))); return eStatus; } MOS_STATUS CodechalEncodeWP::SendSurface(PMOS_COMMAND_BUFFER cmdBuffer) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); auto currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic); // Program the surface based on current picture's field/frame mode uint32_t refBindingTableOffset; uint32_t refVerticalLineStride; uint32_t refVerticalLineStrideOffset; uint8_t refVDirection; if (currFieldPicture) // if current picture is field { if (m_surfaceParams.refIsBottomField) { refVDirection = CODECHAL_VDIRECTION_BOT_FIELD; refVerticalLineStride = CODECHAL_VLINESTRIDE_FIELD; refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_BOT_FIELD; } else { refVDirection = CODECHAL_VDIRECTION_TOP_FIELD; refVerticalLineStride = CODECHAL_VLINESTRIDE_FIELD; refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD; } } else // if current picture is frame { refVDirection = CODECHAL_VDIRECTION_FRAME; refVerticalLineStride = CODECHAL_VLINESTRIDE_FRAME; refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD; } CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); surfaceCodecParams.bIs2DSurface = true; surfaceCodecParams.bMediaBlockRW = true; surfaceCodecParams.psSurface = m_surfaceParams.refFrameInput; // Input surface surfaceCodecParams.bIsWritable = false; surfaceCodecParams.bRenderTarget = false; surfaceCodecParams.dwBindingTableOffset = wpInputRefSurface; surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_WP_DOWNSAMPLED_ENCODE].Value; surfaceCodecParams.dwVerticalLineStride = refVerticalLineStride; surfaceCodecParams.dwVerticalLineStrideOffset = refVerticalLineStrideOffset; surfaceCodecParams.ucVDirection = refVDirection; CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( m_hwInterface, cmdBuffer, &surfaceCodecParams, m_kernelState)); MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); surfaceCodecParams.bIs2DSurface = true; surfaceCodecParams.bMediaBlockRW = true; surfaceCodecParams.psSurface = &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx]; // output surface surfaceCodecParams.bIsWritable = true; surfaceCodecParams.bRenderTarget = true; surfaceCodecParams.dwBindingTableOffset = wpOutputScaledSurface; surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_WP_DOWNSAMPLED_ENCODE].Value; surfaceCodecParams.dwVerticalLineStride = refVerticalLineStride; surfaceCodecParams.dwVerticalLineStrideOffset = refVerticalLineStrideOffset; surfaceCodecParams.ucVDirection = refVDirection; CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( m_hwInterface, cmdBuffer, &surfaceCodecParams, m_kernelState)); return eStatus; } MOS_STATUS CodechalEncodeWP::Execute(KernelParams *params) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_ENCODE_FUNCTION_ENTER; CODECHAL_ENCODE_CHK_NULL_RETURN(params); if (params->slcWPParams && params->slcWPParams->luma_log2_weight_denom != 6) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_ENCODE_ASSERTMESSAGE("Weighted Prediction Kernel does not support Log2LumaWeightDenom != 6!"); return eStatus; } PerfTagSetting perfTag; CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_WP_KERNEL); if (params->useRefPicList1) { *(params->useWeightedSurfaceForL1) = true; m_surfaceParams.wpOutListIdx = CODEC_WP_OUTPUT_L1_START + params->wpIndex; } else { *(params->useWeightedSurfaceForL0) = true; m_surfaceParams.wpOutListIdx = CODEC_WP_OUTPUT_L0_START + params->wpIndex; } if (m_surfaceParams.wpOutListIdx >= CODEC_NUM_WP_FRAME) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_ENCODE_ASSERTMESSAGE("index exceeds maximum value of array weightedPredOutputPicList."); return eStatus; } // Allocate output surface CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources()); // If Single Task Phase is not enabled, use BT count for the kernel state. if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) { auto maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : m_kernelState->KernelParams.iBTCount; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount)); m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable()); } // setup DSH and Interface Descriptor auto stateHeapInterface = m_renderInterface->m_stateHeapInterface; CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( stateHeapInterface, m_kernelState, false, 0, false, m_storeData)); MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; MOS_ZeroMemory(&idParams, sizeof(idParams)); idParams.pKernelState = m_kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams)); // Setup Curbe m_curbeParams.refPicListIdx = (params->useRefPicList1) ? LIST_1 : LIST_0; m_curbeParams.wpIdx = params->wpIndex; m_curbeParams.slcParams = params->slcWPParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbe()); auto encFunctionType = CODECHAL_MEDIA_STATE_ENC_WP; CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_DSH_TYPE, m_kernelState)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( encFunctionType, m_kernelState)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_ISH_TYPE, m_kernelState)); ) MOS_COMMAND_BUFFER cmdBuffer; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); sendKernelCmdsParams.EncFunctionType = encFunctionType; sendKernelCmdsParams.pKernelState = m_kernelState; CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); // add binding table CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_kernelState)); (params->useRefPicList1) ? (*params->useWeightedSurfaceForL1 = true) : (*params->useWeightedSurfaceForL0 = true); CodecHalGetResourceInfo(m_osInterface, params->refFrameInput); //Set Surface States m_surfaceParams.refFrameInput = params->refFrameInput; m_surfaceParams.refIsBottomField = params->refIsBottomField; CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurface(&cmdBuffer)); CODECHAL_DEBUG_TOOL( CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( encFunctionType, MHW_SSH_TYPE, m_kernelState)); ) // Thread Dispatch Pattern - MEDIA OBJECT WALKER if (m_hwWalker) { auto resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth); auto resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight); CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); walkerCodecParams.WalkerMode = m_walkerMode; walkerCodecParams.bUseScoreboard = m_useHwScoreboard; walkerCodecParams.dwResolutionX = resolutionX; walkerCodecParams.dwResolutionY = resolutionY; walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported; walkerCodecParams.ucGroupId = m_groupId; walkerCodecParams.bNoDependency = true; MHW_WALKER_PARAMS walkerParams; CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(m_hwInterface, &walkerParams, &walkerCodecParams)); HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__)); HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters()); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams)); } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType)); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_kernelState)); if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) { CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId()); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); } CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( &cmdBuffer, encFunctionType, nullptr))); CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer( &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase)); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) { HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface); m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); m_lastTaskInPhase = false; } return eStatus; } CodechalEncodeWP::CodechalEncodeWP(CodechalEncoderState *encoder) : m_useHwScoreboard(encoder->m_useHwScoreboard), m_renderContextUsesNullHw(encoder->m_renderContextUsesNullHw), m_groupIdSelectSupported(encoder->m_groupIdSelectSupported), m_singleTaskPhaseSupported(encoder->m_singleTaskPhaseSupported), m_firstTaskInPhase(encoder->m_firstTaskInPhase), m_lastTaskInPhase(encoder->m_lastTaskInPhase), m_hwWalker(encoder->m_hwWalker), m_groupId(encoder->m_groupId), m_pictureCodingType(encoder->m_pictureCodingType), m_mode(encoder->m_mode), m_verticalLineStride(encoder->m_verticalLineStride), m_maxBtCount(encoder->m_maxBtCount), m_vmeStatesSize(encoder->m_vmeStatesSize), m_storeData(encoder->m_storeData), m_frameWidth(encoder->m_frameWidth), m_frameHeight(encoder->m_frameHeight), m_frameFieldHeight(encoder->m_frameFieldHeight), m_currOriginalPic(encoder->m_currOriginalPic), m_walkerMode(encoder->m_walkerMode) { CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(encoder); // Initilize interface pointers m_encoder = encoder; m_osInterface = encoder->GetOsInterface(); m_hwInterface = encoder->GetHwInterface(); m_debugInterface = encoder->GetDebugInterface(); m_miInterface = m_hwInterface->GetMiInterface(); m_renderInterface = m_hwInterface->GetRenderInterface(); m_stateHeapInterface = m_renderInterface->m_stateHeapInterface->pStateHeapInterface; m_curbeLength = sizeof(CurbeData); } CodechalEncodeWP::~CodechalEncodeWP() { // free weighted prediction surface ReleaseResources(); MOS_Delete(m_kernelState); m_kernelState = nullptr; }