/* * Copyright (c) 2016-2020, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ //! //! \file codechal_decode_scalability.cpp //! \brief Implements the decode interface extension for decode scalability. //! \details Implements all functions required by CodecHal for scalability decoding. //! #include "codechal_decoder.h" #include "codechal_decode_scalability.h" #include "mos_util_user_interface.h" #include "mos_solo_generic.h" #include "mos_os_virtualengine_next.h" //! //! \brief calculate secondary cmd buffer index //! \details calculate secondary cmd buffer index to get or return secondary cmd buffer //! \param [in] pScalabilityState //! pointer to scalability decode state //! \param [in] pdwBufIdxPlus1 //! pointer to buf index, will contain the returned buf index value. //! \return MOS_STATUS //! MOS_STATUS_SUCCESS if success, else fail reason //! static MOS_STATUS CodecHalDecodeScalability_CalculateScdryCmdBufIndex( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, uint32_t *pdwBufIdxPlus1) { uint32_t HcpDecPhaseForBufIdx; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pdwBufIdxPlus1); HcpDecPhaseForBufIdx = pScalabilityState->HcpDecPhase; if (pScalabilityState->HcpDecPhase == CodechalDecode::CodechalHcpDecodePhaseLegacyS2L) { //S2L commands put in the FE secondary command buffer. CODECHAL_DECODE_ASSERT(pScalabilityState->bShortFormatInUse); HcpDecPhaseForBufIdx = CODECHAL_HCP_DECODE_PHASE_FE; } //buffer index order is same as the buffer order in resScalableBatchBufs[] of MOS_VIRTUALENGINE_HINT_PARAMS *pdwBufIdxPlus1 = HcpDecPhaseForBufIdx - (pScalabilityState->bFESeparateSubmission ? CODECHAL_HCP_DECODE_PHASE_BE0 : CODECHAL_HCP_DECODE_PHASE_FE) + 1; return eStatus; } //! //! \brief check if valid decode phase //! \param [in] pScalabilityState //! pointer to scalability decode state //! \param [in] HcpDecPhase //! Hcp Decode Phase //! \return MOS_STATUS //! MOS_STATUS_SUCCESS if valid decode phase, else fail reason //! static MOS_STATUS CodecHalDecodeScalability_CheckDecPhaseValidity( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, uint32_t HcpDecPhase) { bool bInValidPhase = false; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); switch (HcpDecPhase) { case CodechalDecode::CodechalHcpDecodePhaseInitialized: break; case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: if (!pScalabilityState->bShortFormatInUse) { bInValidPhase = true; } break; case CodechalDecode::CodechalHcpDecodePhaseLegacyLong: if (pScalabilityState->bScalableDecodeMode) { bInValidPhase = true; } break; case CODECHAL_HCP_DECODE_PHASE_FE: case CODECHAL_HCP_DECODE_PHASE_BE0: if (!pScalabilityState->bScalableDecodeMode) { bInValidPhase = true; } else if (pScalabilityState->ucScalablePipeNum < 2) { //at least 2 pipe bInValidPhase = true; } break; case CODECHAL_HCP_DECODE_PHASE_BE1: case CODECHAL_HCP_DECODE_PHASE_RESERVED: if (!pScalabilityState->bScalableDecodeMode) { bInValidPhase = true; } else if (pScalabilityState->ucScalablePipeNum < (HcpDecPhase - CODECHAL_HCP_DECODE_PHASE_BE0 + 1)) { bInValidPhase = true; } break; default: bInValidPhase = true; break; } if (bInValidPhase) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("invalid decode phase : %d !", HcpDecPhase); } return eStatus; } //! //! \brief Allocate fixed size resources for scalability decode //! \details Allocate fixed size resources for scalability decode //! \param [in] pScalabilityState //! pointer to scalability decode state //! \return MOS_STATUS //! MOS_STATUS_SUCCESS if success, else fail reason //! MOS_STATUS CodecHalDecodeScalability_AllocateResources_FixedSizes( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState) { PMOS_INTERFACE pOsInterface; MOS_ALLOC_GFXRES_PARAMS AllocParamsForBufferLinear; MOS_LOCK_PARAMS LockFlagsWriteOnly; uint8_t *pData; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); MOS_ZeroMemory(&LockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); LockFlagsWriteOnly.WriteOnly = 1; // initiate allocation paramters MOS_ZeroMemory(&AllocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); AllocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; AllocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; AllocParamsForBufferLinear.Format = Format_Buffer; if (pScalabilityState->Standard == CODECHAL_HEVC)//Confirmed by HW that VP9 does not need this buffer { //for Scalability --- Slice State Stream Out Buffer AllocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * pScalabilityState->sliceStateCLs * CODECHAL_CACHELINE_SIZE; AllocParamsForBufferLinear.pBufName = "SliceStateStreamOut"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resSliceStateStreamOutBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate SliceState StreamOut Buffer."); return eStatus; } } //Semaphore memory for BEs to start at the same time AllocParamsForBufferLinear.dwBytes = sizeof(uint32_t); AllocParamsForBufferLinear.pBufName = "BESemaphoreMemory"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resSemaMemBEs); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate BE Semaphore memory."); return eStatus; } pData = (uint8_t*)pOsInterface->pfnLockResource( pOsInterface, &pScalabilityState->resSemaMemBEs, &LockFlagsWriteOnly); CODECHAL_DECODE_CHK_NULL_RETURN(pData); MOS_ZeroMemory(pData, sizeof(uint32_t)); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnUnlockResource( pOsInterface, &pScalabilityState->resSemaMemBEs)); AllocParamsForBufferLinear.dwBytes = sizeof(uint32_t); AllocParamsForBufferLinear.pBufName = "DelayMinusMemory"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resDelayMinus); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate delay minus memory."); return eStatus; } pData = (uint8_t*)pOsInterface->pfnLockResource( pOsInterface, &pScalabilityState->resDelayMinus, &LockFlagsWriteOnly); CODECHAL_DECODE_CHK_NULL_RETURN(pData); MOS_ZeroMemory(pData, sizeof(uint32_t)); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnUnlockResource( pOsInterface, &pScalabilityState->resDelayMinus)); if (pScalabilityState->pHwInterface->GetMfxInterface()->GetNumVdbox() > 2) { if (pScalabilityState->bFESeparateSubmission) { //SW Semaphore sync object for FE/BE synchronization CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnCreateSyncResource(pOsInterface, &pScalabilityState->resFeBeSyncObject)); } else if (pOsInterface->bUseHwSemaForResSyncInVE) { //Semaphore memory for FE /BE synchronization AllocParamsForBufferLinear.dwBytes = sizeof(uint32_t); AllocParamsForBufferLinear.pBufName = "FEBESemaphMemory"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resSemaMemFEBE); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate FEBE Semaph memory."); return eStatus; } pData = (uint8_t*)pOsInterface->pfnLockResource( pOsInterface, &pScalabilityState->resSemaMemFEBE, &LockFlagsWriteOnly); CODECHAL_DECODE_CHK_NULL_RETURN(pData); MOS_ZeroMemory(pData, sizeof(uint32_t)); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnUnlockResource( pOsInterface, &pScalabilityState->resSemaMemFEBE)); } } AllocParamsForBufferLinear.dwBytes = sizeof(CODECHAL_DECODE_SCALABILITY_FE_STATUS); AllocParamsForBufferLinear.pBufName = "FEStatusBuffer"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resFEStatusBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate FE status Buffer."); return eStatus; } //Semaphore memory for frame decode completion synchronization AllocParamsForBufferLinear.dwBytes = sizeof(uint32_t); AllocParamsForBufferLinear.pBufName = "CompletionSemaphMemory"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resSemaMemCompletion); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate Completion Semaph memory."); return eStatus; } pData = (uint8_t*)pOsInterface->pfnLockResource( pOsInterface, &pScalabilityState->resSemaMemCompletion, &LockFlagsWriteOnly); CODECHAL_DECODE_CHK_NULL_RETURN(pData); MOS_ZeroMemory(pData, sizeof(uint32_t)); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnUnlockResource( pOsInterface, &pScalabilityState->resSemaMemCompletion)); return eStatus; } //! //! \brief Get secondary cmd buffer //! \param [in] pScalabilityState //! Scalability decode state //! \param [in] pSdryCmdBuf //! secondary cmd buffer address //! \return MOS_STATUS //! MOS_STATUS_SUCCESS if success, else fail reason //! static MOS_STATUS CodecHalDecodeScalability_GetVESecondaryCmdBuffer( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pSdryCmdBuf) { PMOS_INTERFACE pOsInterface; uint32_t HcpDecPhase; uint32_t dwBufIdxPlus1 = 0; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pSdryCmdBuf); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); HcpDecPhase = pScalabilityState->HcpDecPhase; //calculate bufidx for getting secondary cmd buffer. CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CalculateScdryCmdBufIndex(pScalabilityState, &dwBufIdxPlus1)); //Check if valid decode phase CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CheckDecPhaseValidity(pScalabilityState, HcpDecPhase)); //Get batch buffer according to current decode phase switch (HcpDecPhase) { case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: //Note: no break here, S2L and FE commands put in one secondary command buffer. case CODECHAL_HCP_DECODE_PHASE_FE: if(!pScalabilityState->bFESeparateSubmission) { CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnGetCommandBuffer(pOsInterface, pSdryCmdBuf, dwBufIdxPlus1)); CODECHAL_DECODE_CHK_NULL_RETURN(pSdryCmdBuf); } else { //if FE separate submission, S2L and FE cmd buffer are in primary cmd buffer, shall not call this function to get secondary cmd buffer eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("S2L or FE does not need secondary cmd buffer in FE separate submission!"); } break; case CODECHAL_HCP_DECODE_PHASE_BE0: case CODECHAL_HCP_DECODE_PHASE_BE1: case CODECHAL_HCP_DECODE_PHASE_RESERVED: CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnGetCommandBuffer(pOsInterface, pSdryCmdBuf, dwBufIdxPlus1)); CODECHAL_DECODE_CHK_NULL_RETURN(pSdryCmdBuf); break; default: //never comes here because other decode phase already checked invalid in function CodecHalDecodeScalability_CheckDecPhaseValidity, eStatus = MOS_STATUS_INVALID_PARAMETER; break; } return eStatus; } MOS_STATUS CodecHalDecodeScalability_AllocateResources_VariableSizes( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMHW_VDBOX_HCP_BUFFER_SIZE_PARAMS pHcpBufSizeParam, PMHW_VDBOX_HCP_BUFFER_REALLOC_PARAMS pAllocParam) { MOS_ALLOC_GFXRES_PARAMS AllocParamsForBufferLinear; PMOS_INTERFACE pOsInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pHcpBufSizeParam); CODECHAL_DECODE_CHK_NULL_RETURN(pAllocParam); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); // initiate allocation paramters MOS_ZeroMemory(&AllocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); AllocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; AllocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; AllocParamsForBufferLinear.Format = Format_Buffer; if (pScalabilityState->Standard == CODECHAL_HEVC)//Confirmed by HW that this buffer is not used in VP9 decoding { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnIsHcpBufferReallocNeeded(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_MV_UP_RT_COL, pAllocParam)); if (pAllocParam->bNeedBiggerSize || Mos_ResourceIsNull(&pScalabilityState->resMvUpRightColStoreBuffer)) { if (!Mos_ResourceIsNull(&pScalabilityState->resMvUpRightColStoreBuffer)) { pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resMvUpRightColStoreBuffer); } // MV UpperRight Column Store Buffer CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnGetHcpBufferSize(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_MV_UP_RT_COL, pHcpBufSizeParam)); AllocParamsForBufferLinear.dwBytes = pHcpBufSizeParam->dwBufferSize; AllocParamsForBufferLinear.pBufName = "MVUpperRightColumnStore"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resMvUpRightColStoreBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate MV UpperRight Column Store Buffer."); return eStatus; } } } CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnIsHcpBufferReallocNeeded(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_INTRA_PRED_UP_RIGHT_COL, pAllocParam)); if (pAllocParam->bNeedBiggerSize || Mos_ResourceIsNull(&pScalabilityState->resIntraPredUpRightColStoreBuffer)) { if (!Mos_ResourceIsNull(&pScalabilityState->resIntraPredUpRightColStoreBuffer)) { pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resIntraPredUpRightColStoreBuffer); } // IntraPred UpperRight Column Store Buffer CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnGetHcpBufferSize(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_INTRA_PRED_UP_RIGHT_COL, pHcpBufSizeParam)); AllocParamsForBufferLinear.dwBytes = pHcpBufSizeParam->dwBufferSize; AllocParamsForBufferLinear.pBufName = "IntraPredUpperRightColumnStore"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resIntraPredUpRightColStoreBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate IntraPred UpperRight Column Store Buffer."); return eStatus; } } CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnIsHcpBufferReallocNeeded(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_INTRA_PRED_LFT_RECON_COL, pAllocParam)); if (pAllocParam->bNeedBiggerSize || Mos_ResourceIsNull(&pScalabilityState->resIntraPredLeftReconColStoreBuffer)) { if (!Mos_ResourceIsNull(&pScalabilityState->resIntraPredLeftReconColStoreBuffer)) { pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resIntraPredLeftReconColStoreBuffer); } // IntraPred Left Recon Column Store Buffer CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnGetHcpBufferSize(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_INTRA_PRED_LFT_RECON_COL, pHcpBufSizeParam)); AllocParamsForBufferLinear.dwBytes = pHcpBufSizeParam->dwBufferSize; AllocParamsForBufferLinear.pBufName = "IntraPredLeftReconColumnStore"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, &pScalabilityState->resIntraPredLeftReconColStoreBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate IntraPred Left Recon Column Store Buffer."); return eStatus; } } CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_AllocateCABACStreamOutBuffer(pScalabilityState, pHcpBufSizeParam, pAllocParam, &pScalabilityState->resCABACSyntaxStreamOutBuffer[0])); pScalabilityState->presCABACStreamOutBuffer = &pScalabilityState->resCABACSyntaxStreamOutBuffer[0]; return eStatus; } MOS_STATUS CodecHalDecodeScalability_AllocateCABACStreamOutBuffer( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMHW_VDBOX_HCP_BUFFER_SIZE_PARAMS pHcpBufSizeParam, PMHW_VDBOX_HCP_BUFFER_REALLOC_PARAMS pAllocParam, PMOS_RESOURCE presCABACStreamOutBuffer) { PMOS_INTERFACE pOsInterface; MOS_ALLOC_GFXRES_PARAMS AllocParamsForBufferLinear; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pHcpBufSizeParam); CODECHAL_DECODE_CHK_NULL_RETURN(pAllocParam); CODECHAL_DECODE_CHK_NULL_RETURN(presCABACStreamOutBuffer); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); // initiate allocation paramters MOS_ZeroMemory(&AllocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); AllocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; AllocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; AllocParamsForBufferLinear.Format = Format_Buffer; CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnIsHcpBufferReallocNeeded(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_CABAC_STREAMOUT, pAllocParam)); if (pAllocParam->bNeedBiggerSize || Mos_ResourceIsNull(presCABACStreamOutBuffer)) { if (!Mos_ResourceIsNull(presCABACStreamOutBuffer)) { pOsInterface->pfnFreeResource( pOsInterface, presCABACStreamOutBuffer); } CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnGetHcpBufferSize(pScalabilityState->pHwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_CABAC_STREAMOUT, pHcpBufSizeParam)); pScalabilityState->dwCABACSyntaxStreamOutBufferSize = pHcpBufSizeParam->dwBufferSize; AllocParamsForBufferLinear.dwBytes = pHcpBufSizeParam->dwBufferSize; AllocParamsForBufferLinear.pBufName = "CABACStreamOutBuffer"; eStatus = (MOS_STATUS)pOsInterface->pfnAllocateResource( pOsInterface, &AllocParamsForBufferLinear, presCABACStreamOutBuffer); if (eStatus != MOS_STATUS_SUCCESS) { CODECHAL_DECODE_ASSERTMESSAGE("Failed to allocate CABAC StreamOut Buffer."); return eStatus; } } return eStatus; } void CodecHalDecodeScalability_Destroy ( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState) { PMOS_INTERFACE pOsInterface; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_NO_STATUS_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_NO_STATUS_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_NO_STATUS_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resSliceStateStreamOutBuffer); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resMvUpRightColStoreBuffer); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resIntraPredLeftReconColStoreBuffer); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resIntraPredUpRightColStoreBuffer); for (int i = 0; i < CODECHAL_HCP_STREAMOUT_BUFFER_MAX_NUM; i++) { pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resCABACSyntaxStreamOutBuffer[i]); } pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resSemaMemBEs); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resDelayMinus); if (pOsInterface->bUseHwSemaForResSyncInVE) { pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resSemaMemFEBE); } pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resFEStatusBuffer); pOsInterface->pfnDestroySyncResource(pOsInterface, &pScalabilityState->resFeBeSyncObject); pOsInterface->pfnFreeResource( pOsInterface, &pScalabilityState->resSemaMemCompletion); return; } MOS_STATUS CodecHalDecodeScalability_GetCmdBufferToUse( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pScdryCmdBuf, PMOS_COMMAND_BUFFER *ppCmdBufToUse) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(ppCmdBufToUse); if (!CodecHalDecodeScalabilityIsFESeparateSubmission(pScalabilityState) || CodecHalDecodeScalabilityIsBEPhase(pScalabilityState)) { pScalabilityState->bUseSecdryCmdBuffer = true; CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_GetVESecondaryCmdBuffer(pScalabilityState, pScdryCmdBuf)); *ppCmdBufToUse = pScdryCmdBuf; } else { pScalabilityState->bUseSecdryCmdBuffer = false; } return eStatus; } MOS_STATUS CodecHalDecodeScalability_ReturnSdryCmdBuffer( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pSdryCmdBuf) { PMOS_INTERFACE pOsInterface; uint32_t HcpDecPhase; uint32_t dwBufIdxPlus1; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pSdryCmdBuf); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); if (!pScalabilityState->bUseSecdryCmdBuffer) { return eStatus; } HcpDecPhase = pScalabilityState->HcpDecPhase; //calculate bufidx for getting secondary cmd buffer. CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CalculateScdryCmdBufIndex(pScalabilityState, &dwBufIdxPlus1)); //Check if valid decode phase CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CheckDecPhaseValidity(pScalabilityState, HcpDecPhase)); //Get batch buffer according to current decode phase switch (HcpDecPhase) { case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: //Note: no break here, S2L and FE commands put in one secondary command buffer. case CODECHAL_HCP_DECODE_PHASE_FE: if(!pScalabilityState->bFESeparateSubmission) { pOsInterface->pfnReturnCommandBuffer(pOsInterface, pSdryCmdBuf, dwBufIdxPlus1); } else { //if FE separate submission, S2L and FE cmd buffer are in primary cmd buffer, shall not call this function to get secondary cmd buffer eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("S2L or FE does not need secondary cmd buffer in FE separate submission!"); } break; case CODECHAL_HCP_DECODE_PHASE_BE0: case CODECHAL_HCP_DECODE_PHASE_BE1: case CODECHAL_HCP_DECODE_PHASE_RESERVED: pOsInterface->pfnReturnCommandBuffer(pOsInterface, pSdryCmdBuf, dwBufIdxPlus1); break; default: //never comes here because other decode phase already checked invalid in function CodecHalDecodeScalability_CheckDecPhaseValidity, eStatus = MOS_STATUS_INVALID_PARAMETER; break; } return eStatus; } MOS_STATUS CodecHalDecodeScalablity_SetFECabacStreamoutOverflowStatus( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBufferInUse) { MhwMiInterface *pMiInterface; CodechalHwInterface *pHwInterface; MmioRegistersMfx *pMmioRegistersMfx; MmioRegistersHcp *pMmioRegistersHcp; MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams; MHW_MI_STORE_DATA_PARAMS StoreDataParams; MHW_MI_LOAD_REGISTER_REG_PARAMS LoadRegRegParams; MHW_MI_LOAD_REGISTER_IMM_PARAMS LoadRegisterImmParams; MHW_MI_FLUSH_DW_PARAMS FlushDwParams; MHW_MI_MATH_PARAMS MiMathParams; MHW_MI_ALU_PARAMS MiAluParams[4]; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pCmdBufferInUse); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); pMiInterface = pScalabilityState->pHwInterface->GetMiInterface(); pHwInterface = pScalabilityState->pHwInterface; //relative MMIO addressing pMmioRegistersMfx = pHwInterface->GetMfxInterface()->GetMmioRegisters(MHW_VDBOX_NODE_1); pMmioRegistersHcp = pHwInterface->GetHcpInterface()->GetMmioRegisters(MHW_VDBOX_NODE_1); MOS_ZeroMemory(&FlushDwParams, sizeof(FlushDwParams)); CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiFlushDwCmd(pCmdBufferInUse, &FlushDwParams)); // store stream out size register to general purpose register0 - Hi=0, Lo=FE stream out size MOS_ZeroMemory(&LoadRegRegParams, sizeof(LoadRegRegParams)); LoadRegRegParams.dwSrcRegister = pMmioRegistersHcp->hcpDebugFEStreamOutSizeRegOffset; LoadRegRegParams.dwDstRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiLoadRegisterRegCmd( pCmdBufferInUse, &LoadRegRegParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0HiOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiLoadRegisterImmCmd( pCmdBufferInUse, &LoadRegisterImmParams)); // load allocated size to general purpose register4 - Hi = 0 MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = pScalabilityState->dwCABACSyntaxStreamOutBufferSize; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4LoOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiLoadRegisterImmCmd( pCmdBufferInUse, &LoadRegisterImmParams)); MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams)); LoadRegisterImmParams.dwData = 0; LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4HiOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiLoadRegisterImmCmd( pCmdBufferInUse, &LoadRegisterImmParams)); //perform the sub operation(reported size - allocated size) MOS_ZeroMemory(&MiMathParams, sizeof(MiMathParams)); MOS_ZeroMemory(&MiAluParams, sizeof(MiAluParams)); // load srcA, reg0 MiAluParams[0].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[0].Operand1 = MHW_MI_ALU_SRCA; MiAluParams[0].Operand2 = MHW_MI_ALU_GPREG0; // load srcB, reg4 MiAluParams[1].AluOpcode = MHW_MI_ALU_LOAD; MiAluParams[1].Operand1 = MHW_MI_ALU_SRCB; MiAluParams[1].Operand2 = MHW_MI_ALU_GPREG4; // sub srcA, srcB MiAluParams[2].AluOpcode = MHW_MI_ALU_SUB; MiAluParams[2].Operand1 = MHW_MI_ALU_SRCB; MiAluParams[2].Operand2 = MHW_MI_ALU_GPREG4; // store reg0, CF MiAluParams[3].AluOpcode = MHW_MI_ALU_STORE; MiAluParams[3].Operand1 = MHW_MI_ALU_GPREG0; MiAluParams[3].Operand2 = MHW_MI_ALU_CF; MiMathParams.pAluPayload = MiAluParams; MiMathParams.dwNumAluParams = 4; // four ALU commands needed for this substract opertaion. see following ALU commands. CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiMathCmd( pCmdBufferInUse, &MiMathParams)); // store the carry flag of (reported size - allocated size), // if reported size < allocated size, the carry flag will be 0xFFFFFFFF, else carry flag will be 0x0. MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams)); StoreRegParams.presStoreBuffer = &pScalabilityState->resFEStatusBuffer; StoreRegParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_DECODE_SCALABILITY_FE_STATUS, dwCarryFlagOfReportedSizeMinusAllocSize); StoreRegParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiStoreRegisterMemCmd(pCmdBufferInUse, &StoreRegParams)); return eStatus; } MOS_STATUS CodecHalDecodeScalablity_GetFEReportedCabacStreamoutBufferSize( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBufferInUse) { MhwMiInterface *pMiInterface; CodechalHwInterface *pHwInterface; MmioRegistersHcp *pMmioRegistersHcp; MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams; MHW_MI_FLUSH_DW_PARAMS FlushDwParams; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pCmdBufferInUse); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); pMiInterface = pScalabilityState->pHwInterface->GetMiInterface(); pHwInterface = pScalabilityState->pHwInterface; //relative MMIO addressing pMmioRegistersHcp = pHwInterface->GetHcpInterface()->GetMmioRegisters(MHW_VDBOX_NODE_1); MOS_ZeroMemory(&FlushDwParams, sizeof(FlushDwParams)); CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiFlushDwCmd(pCmdBufferInUse, &FlushDwParams)); //store the cabac streamout buff size in register into mem MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams)); StoreRegParams.presStoreBuffer = &pScalabilityState->resFEStatusBuffer; StoreRegParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_DECODE_SCALABILITY_FE_CABAC_STREAMOUT_BUFF_SIZE, dwCabacStreamoutBuffSize); StoreRegParams.dwRegister = pMmioRegistersHcp->hcpDebugFEStreamOutSizeRegOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiStoreRegisterMemCmd(pCmdBufferInUse, &StoreRegParams)); return eStatus; } MOS_STATUS CodecHalDecodeScalability_DetermineDecodePhase( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, uint32_t *pHcpDecPhase) { uint32_t CurPhase; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pHcpDecPhase); CurPhase = *pHcpDecPhase; //Check if valid decode phase CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CheckDecPhaseValidity(pScalabilityState, CurPhase)); if (CodecHalDecodeScalabilityIsScalableMode(pScalabilityState)) { switch (CurPhase) { case CodechalDecode::CodechalHcpDecodePhaseInitialized: if (pScalabilityState->bShortFormatInUse) { *pHcpDecPhase = CodechalDecode::CodechalHcpDecodePhaseLegacyS2L; } else { *pHcpDecPhase = CODECHAL_HCP_DECODE_PHASE_FE; } break; case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: *pHcpDecPhase = CODECHAL_HCP_DECODE_PHASE_FE; break; case CODECHAL_HCP_DECODE_PHASE_FE: *pHcpDecPhase = CODECHAL_HCP_DECODE_PHASE_BE0; break; case CODECHAL_HCP_DECODE_PHASE_BE0: *pHcpDecPhase = CODECHAL_HCP_DECODE_PHASE_BE1; break; case CODECHAL_HCP_DECODE_PHASE_BE1: *pHcpDecPhase = CODECHAL_HCP_DECODE_PHASE_RESERVED; break; default: //never comes here because other decode phase already checked invalid in function CodecHalDecodeScalability_CheckDecPhaseValidity, eStatus = MOS_STATUS_INVALID_PARAMETER; break; } } else { switch (CurPhase) { case CodechalDecode::CodechalHcpDecodePhaseInitialized: if (pScalabilityState->bShortFormatInUse) { *pHcpDecPhase = CodechalDecode::CodechalHcpDecodePhaseLegacyS2L; } else { *pHcpDecPhase = CodechalDecode::CodechalHcpDecodePhaseLegacyLong; } break; case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: *pHcpDecPhase = CodechalDecode::CodechalHcpDecodePhaseLegacyLong; break; default: //never comes here because other decode phase already checked invalid in function CodecHalDecodeScalability_CheckDecPhaseValidity, eStatus = MOS_STATUS_INVALID_PARAMETER; break; } } pScalabilityState->HcpDecPhase = *pHcpDecPhase; return eStatus; } MOS_STATUS CodecHalDecodeScalability_DetermineSendWatchdogTimerStart( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, bool *pbSend) { uint32_t HcpDecPhase; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); HcpDecPhase = pScalabilityState->HcpDecPhase; //Check if valid decode phase CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_CheckDecPhaseValidity(pScalabilityState, HcpDecPhase)); switch (HcpDecPhase) { case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: *pbSend = true; break; case CODECHAL_HCP_DECODE_PHASE_FE: *pbSend = !pScalabilityState->bShortFormatInUse; break; case CODECHAL_HCP_DECODE_PHASE_BE0: case CODECHAL_HCP_DECODE_PHASE_BE1: case CODECHAL_HCP_DECODE_PHASE_RESERVED: *pbSend = true; break; default: //never comes here because other decode phase already checked invalid in function CodecHalDecodeScalability_CheckDecPhaseValidity, eStatus = MOS_STATUS_INVALID_PARAMETER; break; } return eStatus; } MOS_STATUS CodecHalDecodeScalability_SwitchGpuContext( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState) { PMOS_INTERFACE pOsInterface; MOS_GPU_CONTEXT GpuContext; uint32_t HcpDecPhase; bool bFESepSwitchContextFlag = false; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); HcpDecPhase = pScalabilityState->HcpDecPhase; if (pScalabilityState->bFESeparateSubmission) { if (CodecHalDecodeScalability1stPhaseofSubmission(pScalabilityState)) { bFESepSwitchContextFlag = true; if (pScalabilityState->bFESeparateSubmission && HcpDecPhase == CODECHAL_HCP_DECODE_PHASE_BE0) { GpuContext = pScalabilityState->VideoContext; } else { GpuContext = pScalabilityState->VideoContextForFE; } } } #if (_DEBUG || _RELEASE_INTERNAL) if (CodecHalDecodeScalability1stDecPhase(pScalabilityState)) { // report in-use MOS_USER_FEATURE_VALUE_WRITE_DATA UserFeatureWriteData; MOS_ZeroMemory(&UserFeatureWriteData, sizeof(UserFeatureWriteData)); UserFeatureWriteData.Value.i32Data = pScalabilityState->bFESeparateSubmission; UserFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_SCALABILITY_FE_SEPARATE_SUBMISSION_IN_USE_ID; MOS_UserFeature_WriteValues_ID(nullptr, &UserFeatureWriteData, 1, pOsInterface->pOsContext); } #endif if (bFESepSwitchContextFlag) { CODECHAL_DECODE_VERBOSEMESSAGE("Change Decode GPU Ctxt to %d.", GpuContext); // Switch GPU context CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnSetGpuContext(pOsInterface, GpuContext)); // Reset allocation list and house keeping pOsInterface->pfnResetOsStates(pOsInterface); } return eStatus; } MOS_STATUS CodecHalDecodeScalability_InitSemaMemResources( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBuffer) { CodechalHwInterface *pHwInterface; MhwMiInterface *pMiInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); pHwInterface = pScalabilityState->pHwInterface; pMiInterface = pHwInterface->GetMiInterface(); MHW_MI_STORE_DATA_PARAMS StoreDataParams; MOS_ZeroMemory(&StoreDataParams, sizeof(StoreDataParams)); if (!Mos_ResourceIsNull(&pScalabilityState->resSemaMemCompletion)) { StoreDataParams.pOsResource = &pScalabilityState->resSemaMemCompletion; StoreDataParams.dwResourceOffset = 0; StoreDataParams.dwValue = 0; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiStoreDataImmCmd( pCmdBuffer, &StoreDataParams)); } return eStatus; } MOS_STATUS CodecHalDecodeScalability_DecidePipeNum( PCODECHAL_DECODE_SCALABILITY_STATE pScalState, PCODECHAL_DECODE_SCALABILITY_INIT_PARAMS pInitParams) { PMOS_VIRTUALENGINE_INTERFACE pVEInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalState->pVEInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pInitParams); pVEInterface = pScalState->pVEInterface; pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_Legacy_PIPE_NUM_1; if (pInitParams->usingSFC) { //using SFC can only work in single pipe mode. return MOS_STATUS_SUCCESS; } #if (_DEBUG || _RELEASE_INTERNAL) if (pScalState->bAlwaysFrameSplit) { if (pScalState->ucNumVdbox != 1) { if (pScalState->ucNumVdbox == 2) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } else { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED; } } } else #endif { if (pScalState->ucNumVdbox != 1) { if (pScalState->ucNumVdbox == 2) { if (pScalState->dwHcpDecModeSwtichTh1Width != 0) { if (pInitParams->u32PicWidthInPixel >= pScalState->dwHcpDecModeSwtichTh1Width) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } } else if ((!CodechalDecodeNonRextFormat(pInitParams->format) && CodechalDecodeResolutionEqualLargerThan4k(pInitParams->u32PicWidthInPixel, pInitParams->u32PicHeightInPixel)) || (CodechalDecodeNonRextFormat(pInitParams->format) && CodechalDecodeResolutionEqualLargerThan5k(pInitParams->u32PicWidthInPixel, pInitParams->u32PicHeightInPixel))) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } if (pScalState->bIsEvenSplit == false) { // disable scalability for clips with width less than split condition when MMC is on if (pInitParams->u32PicWidthInPixel <= CODEC_SCALABILITY_FIRST_TILE_WIDTH_4K) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_Legacy_PIPE_NUM_1; } } } else { if (pScalState->dwHcpDecModeSwtichTh1Width != 0 && pScalState->dwHcpDecModeSwtichTh2Width != 0) { if (pInitParams->u32PicWidthInPixel >= pScalState->dwHcpDecModeSwtichTh2Width) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED; } else if (pInitParams->u32PicWidthInPixel >= pScalState->dwHcpDecModeSwtichTh1Width) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } } else { if ((pInitParams->u32PicWidthInPixel * pInitParams->u32PicHeightInPixel) >= (CODECHAL_HCP_DECODE_SCALABLE_THRESHOLD4_WIDTH * CODECHAL_HCP_DECODE_SCALABLE_THRESHOLD4_HEIGHT)) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED; } else if ((!CodechalDecodeNonRextFormat(pInitParams->format) && CodechalDecodeResolutionEqualLargerThan4k(pInitParams->u32PicWidthInPixel, pInitParams->u32PicHeightInPixel)) || (CodechalDecodeNonRextFormat(pInitParams->format) && CodechalDecodeResolutionEqualLargerThan5k(pInitParams->u32PicWidthInPixel, pInitParams->u32PicHeightInPixel))) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } } } } } return eStatus; } MOS_STATUS CodechalDecodeScalability_MapPipeNumToLRCACount( PCODECHAL_DECODE_SCALABILITY_STATE pScalState, uint32_t *LRCACount) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalState); CODECHAL_DECODE_CHK_NULL_RETURN(LRCACount); *LRCACount = 1; // initialzed to 1. switch (pScalState->ucScalablePipeNum) { case CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED: *LRCACount = pScalState->bFESeparateSubmission ? 3 : 4; break; case CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2: // on GT2 or debug override enabled, FE separate submission = false, FE run on the same engine of BEs; // on GT3, FE separate submission = true, scalability submission includes only BEs. *LRCACount = 2; break; case CODECHAL_DECODE_HCP_Legacy_PIPE_NUM_1: *LRCACount = 1; break; default: CODECHAL_DECODE_ASSERTMESSAGE("invalid pipe number.") return MOS_STATUS_INVALID_PARAMETER; } if (*LRCACount > pScalState->ucNumVdbox) { CODECHAL_DECODE_ASSERTMESSAGE("LRCA count can not exceed vdbox number."); return MOS_STATUS_INVALID_PARAMETER; } return eStatus; } MOS_STATUS CodechalDecodeScalability_ChkGpuCtxReCreation( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_GPUCTX_CREATOPTIONS_ENHANCED CurgpuCtxCreatOpts) { PMOS_INTERFACE pOsInterface; bool changed = false; uint32_t NewLRCACount = 0, PreLRCACount = 0; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(CurgpuCtxCreatOpts); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); #if (_DEBUG || _RELEASE_INTERNAL) if (pOsInterface->bEnableDbgOvrdInVE) { changed = false; } else #endif { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnMapPipeNumToLRCACount( pScalabilityState, &NewLRCACount)); if (CurgpuCtxCreatOpts->LRCACount != NewLRCACount) { changed = true; PreLRCACount = CurgpuCtxCreatOpts->LRCACount; CurgpuCtxCreatOpts->LRCACount = NewLRCACount; } else { changed = false; } } if (changed) { auto contextToCreate = MOS_GPU_CONTEXT_VIDEO; switch (NewLRCACount) { case 2: contextToCreate = pScalabilityState->VideoContextForMP; break; case 3: contextToCreate = pScalabilityState->VideoContextFor3P; break; default: contextToCreate = pScalabilityState->VideoContextForSP; break; } CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnCreateGpuContext( pOsInterface, contextToCreate, MOS_GPU_NODE_VIDEO, CurgpuCtxCreatOpts)); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnRegisterBBCompleteNotifyEvent( pOsInterface, contextToCreate)); // Switch across single pipe/ scalable mode gpu contexts MOS_GPU_CONTEXT GpuContext = contextToCreate; CODECHAL_DECODE_VERBOSEMESSAGE("Change Decode GPU Ctxt to %d.", GpuContext); CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnSetGpuContext(pOsInterface, GpuContext)); // Reset allocation list and house keeping pOsInterface->pfnResetOsStates(pOsInterface); // Gpu context re-use pScalabilityState->VideoContext = GpuContext; } return eStatus; } #if (_DEBUG || _RELEASE_INTERNAL) MOS_STATUS CodechalDecodeScalability_DebugOvrdDecidePipeNum( PCODECHAL_DECODE_SCALABILITY_STATE pScalState) { PMOS_VIRTUALENGINE_INTERFACE pVEInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_CHK_NULL_RETURN(pScalState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalState->pVEInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalState->pHwInterface); pVEInterface = pScalState->pVEInterface; PMOS_INTERFACE pOsInterface = pScalState->pHwInterface->GetOsInterface(); CODECHAL_DECODE_CHK_NULL_RETURN(pOsInterface); if (pOsInterface->apoMosEnabled) { CODECHAL_DECODE_CHK_NULL_RETURN(pVEInterface->veInterface); auto veInterface = pVEInterface->veInterface; if (veInterface->GetEngineCount() == 1) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_Legacy_PIPE_NUM_1; } else if (veInterface->GetEngineCount() == 2) { //engine count = 2, only support FE run on the same engine as one of BE for now. pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } else if (veInterface->GetEngineCount() == 4 && veInterface->GetEngineLogicId(3) != veInterface->GetEngineLogicId(0) && veInterface->GetEngineLogicId(3) != veInterface->GetEngineLogicId(1) && veInterface->GetEngineLogicId(3) != veInterface->GetEngineLogicId(2)) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED; } else { CODECHAL_DECODE_ASSERTMESSAGE("invalid parameter settings in debug override."); return MOS_STATUS_INVALID_PARAMETER; } return eStatus; } // debug override for virtual tile if (pVEInterface->ucEngineCount == 1) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_Legacy_PIPE_NUM_1; } else if (pVEInterface->ucEngineCount == 2) { //engine count = 2, only support FE run on the same engine as one of BE for now. pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_2; } else if (pVEInterface->ucEngineCount == 4 && pVEInterface->EngineLogicId[3] != pVEInterface->EngineLogicId[0] && pVEInterface->EngineLogicId[3] != pVEInterface->EngineLogicId[1] && pVEInterface->EngineLogicId[3] != pVEInterface->EngineLogicId[2]) { pScalState->ucScalablePipeNum = CODECHAL_DECODE_HCP_SCALABLE_PIPE_NUM_RESERVED; } else { CODECHAL_DECODE_ASSERTMESSAGE("invalid parameter settings in debug override."); return MOS_STATUS_INVALID_PARAMETER; } return eStatus; } #endif MOS_STATUS CodechalDecodeScalability_ConstructParmsForGpuCtxCreation( PCODECHAL_DECODE_SCALABILITY_STATE pScalState, PMOS_GPUCTX_CREATOPTIONS_ENHANCED gpuCtxCreatOpts, CodechalSetting * codecHalSetting) { PMOS_INTERFACE pOsInterface; CODECHAL_DECODE_SCALABILITY_INIT_PARAMS initParams; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(gpuCtxCreatOpts); CODECHAL_DECODE_CHK_NULL_RETURN(codecHalSetting); bool sfcInUse = codecHalSetting->sfcInUseHinted && codecHalSetting->downsamplingHinted && (MEDIA_IS_SKU(pScalState->pHwInterface->GetSkuTable(), FtrSFCPipe) && !MEDIA_IS_SKU(pScalState->pHwInterface->GetSkuTable(), FtrDisableVDBox2SFC)); pOsInterface = pScalState->pHwInterface->GetOsInterface(); MEDIA_FEATURE_TABLE *m_skuTable = pOsInterface->pfnGetSkuTable(pOsInterface); #if (_DEBUG || _RELEASE_INTERNAL) if (pOsInterface->bEnableDbgOvrdInVE) { PMOS_VIRTUALENGINE_INTERFACE pVEInterface = pScalState->pVEInterface; CODECHAL_DECODE_CHK_NULL_RETURN(pVEInterface); gpuCtxCreatOpts->DebugOverride = true; if (MEDIA_IS_SKU(m_skuTable, FtrSfcScalability)) { gpuCtxCreatOpts->UsingSFC = false;// this param ignored when dbgoverride enabled } else { gpuCtxCreatOpts->UsingSFC = sfcInUse; // this param ignored when dbgoverride enabled } CODECHAL_DECODE_CHK_STATUS_RETURN(pScalState->pfnDebugOvrdDecidePipeNum(pScalState)); if (pOsInterface->apoMosEnabled) { CODECHAL_DECODE_CHK_NULL_RETURN(pVEInterface->veInterface); for (uint32_t i = 0; i < pVEInterface->veInterface->GetEngineCount(); i++) { gpuCtxCreatOpts->EngineInstance[i] = pVEInterface->veInterface->GetEngineLogicId(i); } } else { for (uint32_t i = 0; i < pVEInterface->ucEngineCount; i++) { gpuCtxCreatOpts->EngineInstance[i] = pVEInterface->EngineLogicId[i]; } } } else #endif { if (MEDIA_IS_SKU(m_skuTable, FtrSfcScalability)) { gpuCtxCreatOpts->UsingSFC = false; } else { gpuCtxCreatOpts->UsingSFC = sfcInUse; } MOS_ZeroMemory(&initParams, sizeof(initParams)); initParams.u32PicWidthInPixel = MOS_ALIGN_CEIL(codecHalSetting->width, 8); initParams.u32PicHeightInPixel = MOS_ALIGN_CEIL(codecHalSetting->height, 8); if (((codecHalSetting->standard == CODECHAL_VP9) || (codecHalSetting->standard == CODECHAL_HEVC)) && (codecHalSetting->chromaFormat == HCP_CHROMA_FORMAT_YUV420)) { initParams.format = Format_NV12; if (codecHalSetting->lumaChromaDepth == CODECHAL_LUMA_CHROMA_DEPTH_10_BITS) { initParams.format = Format_P010; } } initParams.usingSFC = sfcInUse; initParams.usingSecureDecode = codecHalSetting->secureMode; CODECHAL_DECODE_CHK_STATUS_RETURN(pScalState->pfnDecidePipeNum( pScalState, &initParams)); } CODECHAL_DECODE_CHK_STATUS_RETURN(pScalState->pfnMapPipeNumToLRCACount( pScalState, &gpuCtxCreatOpts->LRCACount)); return eStatus; } MOS_STATUS CodecHalDecodeScalability_InitScalableParams( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PCODECHAL_DECODE_SCALABILITY_INIT_PARAMS pInitParams, uint16_t *pucDecPassNum) { PMOS_INTERFACE pOsInterface; PMOS_VIRTUALENGINE_INTERFACE pVEInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); pVEInterface = pScalabilityState->pVEInterface; if (!pOsInterface->bSupportVirtualEngine) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("Scalability decode must run with virtual engine interface.\n"); return eStatus; } pScalabilityState->bScalableDecodeMode = false; // initialized to false #if (_DEBUG || _RELEASE_INTERNAL) if (pOsInterface->bEnableDbgOvrdInVE) { if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(pOsInterface)) { if (pOsInterface->apoMosEnabled) { CODECHAL_DECODE_CHK_NULL_RETURN(pVEInterface->veInterface); pScalabilityState->ucScalablePipeNum = pVEInterface->veInterface->GetEngineCount() - 1; } else { pScalabilityState->ucScalablePipeNum = pVEInterface->ucEngineCount - 1; } pScalabilityState->bScalableDecodeMode = true; } else { // do nothing since pipe number already decided at the gpu context creation. } } else #endif { // Decide pipe number CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pfnDecidePipeNum(pScalabilityState, pInitParams)); } // Decide scalable mode or single pipe mode if (pScalabilityState->ucScalablePipeNum > 1 && pOsInterface->frameSplit) { pScalabilityState->bScalableDecodeMode = true; } CODECHAL_DECODE_CHK_NULL_RETURN(pucDecPassNum); // Decide Decode pass number - pucDecPassNum if (pScalabilityState->bScalableDecodeMode) { *pucDecPassNum = pScalabilityState->ucScalablePipeNum + 1; // FE + all BEs } else { *pucDecPassNum = 1; } // Add one pass for S2L conversion in short format. if (pScalabilityState->bShortFormatInUse) { *pucDecPassNum = *pucDecPassNum + 1; } pScalabilityState->VideoContext = pInitParams->gpuCtxInUse; return eStatus; } MOS_STATUS CodecHalDecodeScalability_SetHintParams( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PCODECHAL_DECODE_SCALABILITY_SETHINT_PARMS pSetHintParms) { PMOS_VIRTUALENGINE_INTERFACE pVEInterface; MOS_VIRTUALENGINE_SET_PARAMS VEParams; PMOS_INTERFACE pOsInterface; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pSetHintParms); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); pVEInterface = pScalabilityState->pVEInterface; MOS_ZeroMemory(&VEParams, sizeof(VEParams)); if(!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(pOsInterface)) { //not used by VE2.0 VEParams.bNeedSyncWithPrevious = pSetHintParms->bNeedSyncWithPrevious; VEParams.bSameEngineAsLastSubmission = pSetHintParms->bSameEngineAsLastSubmission; VEParams.bSFCInUse = pSetHintParms->bSFCInUse; } VEParams.ucScalablePipeNum = pScalabilityState->ucScalablePipeNum; if (pScalabilityState->bScalableDecodeMode) { if (pScalabilityState->bFESeparateSubmission) { //set Hint parameter for FE submission VEParams.bScalableMode = false; if (pVEInterface->pfnVESetHintParams) { CODECHAL_DECODE_CHK_STATUS_RETURN(pVEInterface->pfnVESetHintParams(pVEInterface, &VEParams)); } } VEParams.bScalableMode = true; VEParams.bHaveFrontEndCmds = (pScalabilityState->bFESeparateSubmission ? false : true); if (pVEInterface->pfnVESetHintParams) { CODECHAL_DECODE_CHK_STATUS_RETURN(pVEInterface->pfnVESetHintParams(pVEInterface, &VEParams)); } } else { VEParams.bScalableMode = false; if (pVEInterface->pfnVESetHintParams) { CODECHAL_DECODE_CHK_STATUS_RETURN(pVEInterface->pfnVESetHintParams(pVEInterface, &VEParams)); } } return eStatus; } #if (_DEBUG || _RELEASE_INTERNAL) MOS_STATUS CodecHalDecodeScalability_DbgDumpCmdBuffer( CodechalDecode *pDecoder, PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, CodechalDebugInterface *debugInterface, PMOS_COMMAND_BUFFER pPrimCmdBuf) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MOS_COMMAND_BUFFER ScdryCmdBuffer; PMOS_COMMAND_BUFFER pCmdBufferInUse; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pDecoder); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pPrimCmdBuf); if (!pScalabilityState->bScalableDecodeMode) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("Invalid decode parameters!"); return eStatus; } if (CodecHalDecodeScalabilityIsFESeparateSubmission(pScalabilityState) && CodecHalDecodeScalabilityIsFEPhase(pScalabilityState)) { pCmdBufferInUse = pPrimCmdBuf; } else { CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_GetVESecondaryCmdBuffer(pScalabilityState, &ScdryCmdBuffer)); pCmdBufferInUse = &ScdryCmdBuffer; } CODECHAL_DECODE_CHK_STATUS_RETURN(debugInterface->DumpCmdBuffer( pCmdBufferInUse, CODECHAL_NUM_MEDIA_STATES, "_DEC")); return eStatus; } #endif MOS_STATUS CodecHalDecodeScalability_PopulateHintParams( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pPrimCmdBuf) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; PMOS_CMD_BUF_ATTRI_VE pAttriVe; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pPrimCmdBuf); pAttriVe = (PMOS_CMD_BUF_ATTRI_VE)(pPrimCmdBuf->Attributes.pAttriVe); if (pAttriVe) { if ((CodecHalDecodeScalabilityIsScalableMode(pScalabilityState) && !CodecHalDecodeScalabilityIsFESeparateSubmission(pScalabilityState)) || (CodecHalDecodeScalabilityIsFESeparateSubmission(pScalabilityState) && CodecHalDecodeScalabilityIsBEPhase(pScalabilityState))) { CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pScalHintParms); pAttriVe->VEngineHintParams = *(pScalabilityState->pScalHintParms); } else { CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pSingleHintParms); pAttriVe->VEngineHintParams = *(pScalabilityState->pSingleHintParms); } pAttriVe->bUseVirtualEngineHint = true; } return eStatus; } MOS_STATUS CodecHalDecodeScalability_SignalFE2BESemaphore( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBufferInUse) { PMOS_INTERFACE pOsInterface; MhwMiInterface *pMiInterface; MOS_SYNC_PARAMS SyncParams; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pCmdBufferInUse); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); pMiInterface = pScalabilityState->pHwInterface->GetMiInterface(); // FE semaphore to BE0 for FE/BE Sync. if (pScalabilityState->bScalableDecodeMode && pScalabilityState->ucNumVdbox > 2) { // When FE separate submission enabled, use SW semaphore between FE/BE0. Otherwise use HW semaphore if (pScalabilityState->bFESeparateSubmission) { SyncParams = g_cInitSyncParams; SyncParams.GpuContext = pScalabilityState->VideoContextForFE; SyncParams.presSyncResource = &pScalabilityState->resFeBeSyncObject; CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnEngineSignal(pOsInterface, &SyncParams)); } else { //post HW semaphore (FE-BE) after FE completion , mi atomic increase 1 CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd(&pScalabilityState->resSemaMemFEBE, 1, MHW_MI_ATOMIC_INC, pCmdBufferInUse)); } } return eStatus; } MOS_STATUS CodecHalDecodeScalability_FEBESync( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBufferInUse) { PMOS_INTERFACE pOsInterface; MhwMiInterface *pMiInterface; uint32_t HcpDecPhase; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetOsInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pCmdBufferInUse); pOsInterface = pScalabilityState->pHwInterface->GetOsInterface(); pMiInterface = pScalabilityState->pHwInterface->GetMiInterface(); HcpDecPhase = pScalabilityState->HcpDecPhase; //FE& BE0 Sync. if (HcpDecPhase == CODECHAL_HCP_DECODE_PHASE_BE0 && pScalabilityState->pHwInterface->GetMfxInterface()->GetNumVdbox() > 2) { if (pScalabilityState->bFESeparateSubmission) { MOS_SYNC_PARAMS SyncParams; SyncParams = g_cInitSyncParams; SyncParams.GpuContext = pScalabilityState->VideoContext; SyncParams.presSyncResource = &pScalabilityState->resFeBeSyncObject; CODECHAL_DECODE_CHK_STATUS_RETURN(pOsInterface->pfnEngineWait(pOsInterface, &SyncParams)); } else { pMiInterface->AddWatchdogTimerStopCmd(pCmdBufferInUse); CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendHwSemaphoreWaitCmd(&pScalabilityState->resSemaMemFEBE, 1, MHW_MI_SAD_EQUAL_SDD, pCmdBufferInUse)); //reset semaphore. mi atomic decrease 1 CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd(&pScalabilityState->resSemaMemFEBE, 1, MHW_MI_ATOMIC_DEC, pCmdBufferInUse)); } } if (CodecHalDecodeScalabilityIsBEPhase(pScalabilityState)) { // Stop Watchdog before BEs wait pMiInterface->AddWatchdogTimerStopCmd(pCmdBufferInUse); //HW Semaphore for BEs Starting at the same time CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd(&pScalabilityState->resSemaMemBEs, 1, MHW_MI_ATOMIC_INC, pCmdBufferInUse)); CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendHwSemaphoreWaitCmd( &pScalabilityState->resSemaMemBEs, pScalabilityState->ucScalablePipeNum, MHW_MI_SAD_EQUAL_SDD, pCmdBufferInUse)); // Program some placeholder cmds to resolve the hazard between BEs sync MHW_MI_STORE_DATA_PARAMS dataParams; dataParams.pOsResource = &pScalabilityState->resDelayMinus; dataParams.dwResourceOffset = 0; dataParams.dwValue = 0xDE1A; for (uint32_t i = 0; i < pScalabilityState->numDelay; i++) { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->GetMiInterface()->AddMiStoreDataImmCmd( pCmdBufferInUse, &dataParams)); } //reset HW semaphore CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd(&pScalabilityState->resSemaMemBEs, 1, MHW_MI_ATOMIC_DEC, pCmdBufferInUse)); // Condidtional BB END for streamout buffer writing over allocated size CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendCondBbEndCmd( &pScalabilityState->resFEStatusBuffer, CODECHAL_OFFSETOF(CODECHAL_DECODE_SCALABILITY_FE_STATUS, dwCarryFlagOfReportedSizeMinusAllocSize), 0, true, pCmdBufferInUse)); } return eStatus; } MOS_STATUS CodecHalDecodeScalability_BEsCompletionSync( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBufferInUse) { MhwMiInterface *pMiInterface; uint32_t HcpDecPhase; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); CODECHAL_DECODE_CHK_NULL_RETURN(pCmdBufferInUse); HcpDecPhase = pScalabilityState->HcpDecPhase; if (CodecHalDecodeScalabilityIsLastCompletePhase(pScalabilityState)) { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendHwSemaphoreWaitCmd( &pScalabilityState->resSemaMemCompletion, pScalabilityState->ucScalablePipeNum - 1, MHW_MI_SAD_EQUAL_SDD, pCmdBufferInUse)); for (int i = 0; i < pScalabilityState->ucScalablePipeNum - 1; i++) { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd( &pScalabilityState->resSemaMemCompletion, 1, MHW_MI_ATOMIC_DEC, pCmdBufferInUse)); } } else { CODECHAL_DECODE_CHK_STATUS_RETURN(pScalabilityState->pHwInterface->SendMiAtomicDwordCmd( &pScalabilityState->resSemaMemCompletion, 1, MHW_MI_ATOMIC_INC, pCmdBufferInUse)); } return eStatus; } MOS_STATUS CodecHalDecodeScalability_ReadCSEngineIDReg( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, CodechalDecodeStatusBuffer *pDecodeStatusBuf, PMOS_COMMAND_BUFFER pCmdBufferInUse) { MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams; MhwMiInterface *pMiInterface; MmioRegistersHcp *pMmioRegisters; uint8_t ucPhaseIndex = 0; uint32_t dwOffset = 0; uint32_t dwCurrIndex = 0; uint32_t dwPreIndex = 0; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pDecodeStatusBuf); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState->pHwInterface->GetMiInterface()); pMiInterface = pScalabilityState->pHwInterface->GetMiInterface(); pMmioRegisters = pScalabilityState->pHwInterface->GetHcpInterface()->GetMmioRegisters(MHW_VDBOX_NODE_1); //relative mmio addressing. ucPhaseIndex = (pScalabilityState->HcpDecPhase < CODECHAL_HCP_DECODE_PHASE_FE) ? 0 : (pScalabilityState->HcpDecPhase - CODECHAL_HCP_DECODE_PHASE_FE); if (ucPhaseIndex >= CODECHAL_HCP_DECODE_SCALABLE_MAX_PHASE_NUM) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("Invalid HCP decode phase!"); return eStatus; } if (CodecHalDecodeScalabilityIsScalableMode(pScalabilityState) && pScalabilityState->HcpDecPhase > CODECHAL_HCP_DECODE_PHASE_BE0) { if (pDecodeStatusBuf->m_currIndex == 0) { dwPreIndex = CODECHAL_DECODE_STATUS_NUM - 1; } else { dwPreIndex = pDecodeStatusBuf->m_currIndex - 1; } } dwCurrIndex = (CodecHalDecodeScalabilityIsScalableMode(pScalabilityState) && pScalabilityState->HcpDecPhase > CODECHAL_HCP_DECODE_PHASE_BE0) ? dwPreIndex : pDecodeStatusBuf->m_currIndex; dwOffset = (dwCurrIndex * sizeof(CodechalDecodeStatus)) + pDecodeStatusBuf->m_csEngineIdOffset + sizeof(uint32_t)* ucPhaseIndex + sizeof(uint32_t)* 2; MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams)); StoreRegParams.presStoreBuffer = &pDecodeStatusBuf->m_statusBuffer; StoreRegParams.dwOffset = dwOffset; StoreRegParams.dwRegister = pMmioRegisters->csEngineIdOffset; CODECHAL_DECODE_CHK_STATUS_RETURN(pMiInterface->AddMiStoreRegisterMemCmd(pCmdBufferInUse, &StoreRegParams)); return eStatus; } MOS_STATUS IsHevcBufferReallocNeeded( CodechalHwInterface *hwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_TYPE bufferType, PMHW_VDBOX_HCP_BUFFER_REALLOC_PARAMS reallocParam) { return hwInterface->GetHcpInterface()->IsHevcBufferReallocNeeded(bufferType, reallocParam); } MOS_STATUS GetHevcBufferSize( CodechalHwInterface *hwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_TYPE bufferType, PMHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam) { return hwInterface->GetHcpInterface()->GetHevcBufferSize(bufferType, hcpBufSizeParam); } MOS_STATUS IsVp9BufferReallocNeeded( CodechalHwInterface *hwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_TYPE bufferType, PMHW_VDBOX_HCP_BUFFER_REALLOC_PARAMS reallocParam) { return hwInterface->GetHcpInterface()->IsVp9BufferReallocNeeded(bufferType, reallocParam); } MOS_STATUS GetVp9BufferSize( CodechalHwInterface *hwInterface, MHW_VDBOX_HCP_INTERNAL_BUFFER_TYPE bufferType, PMHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam) { return hwInterface->GetHcpInterface()->GetVp9BufferSize(bufferType, hcpBufSizeParam); } MOS_STATUS CodecHalDecodeScalability_InitializeState ( CodechalDecode *pDecoder, PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, CodechalHwInterface *hwInterface, bool bShortFormat) { PMOS_VIRTUALENGINE_INTERFACE pVEInterface; MOS_VIRTUALENGINE_INIT_PARAMS VEInitParms; MOS_USER_FEATURE_VALUE_DATA UserFeatureData; PMOS_INTERFACE osInterface; MhwVdboxMfxInterface *vdboxMfxInterface; uint8_t vdboxNum; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; CODECHAL_DECODE_FUNCTION_ENTER; CODECHAL_DECODE_CHK_NULL_RETURN(pDecoder); CODECHAL_DECODE_CHK_NULL_RETURN(pScalabilityState); CODECHAL_DECODE_CHK_NULL_RETURN(hwInterface); osInterface = hwInterface->GetOsInterface(); CODECHAL_DECODE_CHK_NULL_RETURN(osInterface); vdboxMfxInterface = hwInterface->GetMfxInterface(); CODECHAL_DECODE_CHK_NULL_RETURN(vdboxMfxInterface); vdboxNum = vdboxMfxInterface->GetNumVdbox(); if (vdboxNum < 2 || !osInterface->bHcpDecScalabilityMode) { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("not support scalability on this platform."); return eStatus; } pScalabilityState->VideoContextForSP = MOS_GPU_CONTEXT_VIDEO; pScalabilityState->VideoContextForMP = MOS_VE_MULTINODESCALING_SUPPORTED(osInterface) ? MOS_GPU_CONTEXT_VIDEO5 : MOS_GPU_CONTEXT_VDBOX2_VIDEO; pScalabilityState->VideoContextFor3P = MOS_VE_MULTINODESCALING_SUPPORTED(osInterface) ? MOS_GPU_CONTEXT_VIDEO7 : MOS_GPU_CONTEXT_VDBOX2_VIDEO2; pScalabilityState->numDelay = 15; #if (_DEBUG || _RELEASE_INTERNAL) // Reg key of the threshold for mode switch single pipe <-> 2 pipe. Using pic width value to control mode switch for now MOS_ZeroMemory(&UserFeatureData, sizeof(UserFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_HCP_DECODE_MODE_SWITCH_THRESHOLD1_ID, &UserFeatureData, osInterface->pOsContext); pScalabilityState->dwHcpDecModeSwtichTh1Width = UserFeatureData.u32Data; MOS_ZeroMemory(&UserFeatureData, sizeof(UserFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_HCP_DECODE_MODE_SWITCH_THRESHOLD2_ID, &UserFeatureData, osInterface->pOsContext); pScalabilityState->dwHcpDecModeSwtichTh2Width = UserFeatureData.u32Data; MOS_ZeroMemory(&UserFeatureData, sizeof(UserFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_SCALABILITY_OVERRIDE_SPLIT_WIDTH_IN_MINCB, &UserFeatureData, osInterface->pOsContext); pScalabilityState->dbgOvrdWidthInMinCb = UserFeatureData.u32Data; #endif // enable FE separate submission by default in multi-pipe mode if (hwInterface->GetMfxInterface()->GetNumVdbox() > 2) { pScalabilityState->bFESeparateSubmission = true; } else { // no benefit to enable FE separate submission on 2 vdbox config. pScalabilityState->bFESeparateSubmission = false; } #if (_DEBUG || _RELEASE_INTERNAL) if (osInterface->bEnableDbgOvrdInVE || Mos_Solo_IsInUse(osInterface)) { //if DbgOverride is enabled, FE separate submission is not supported pScalabilityState->bFESeparateSubmission = false; } #endif if (pScalabilityState->bFESeparateSubmission) { MOS_GPU_CONTEXT GpuContext = MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(osInterface) ? MOS_GPU_CONTEXT_VIDEO : MOS_GPU_CONTEXT_VIDEO4; GpuContext = MOS_VE_MULTINODESCALING_SUPPORTED(osInterface) ? MOS_GPU_CONTEXT_VIDEO4 : GpuContext; MHW_VDBOX_GPUNODE_LIMIT gpuNodeLimit; MOS_ZeroMemory(&gpuNodeLimit, sizeof(MHW_VDBOX_GPUNODE_LIMIT)); CODECHAL_DECODE_CHK_STATUS_RETURN(vdboxMfxInterface->FindGpuNodeToUse( &gpuNodeLimit)); MOS_GPU_NODE videoGpuNode = (MOS_GPU_NODE)(gpuNodeLimit.dwGpuNodeToUse); MOS_GPUCTX_CREATOPTIONS createOpts; CODECHAL_DECODE_CHK_STATUS_RETURN(osInterface->pfnCreateGpuContext( osInterface, GpuContext, videoGpuNode, &createOpts)); pScalabilityState->VideoContextForFE = GpuContext; } pScalabilityState->Standard = pDecoder->GetStandard(); pScalabilityState->VideoContext = pDecoder->GetVideoContext(); pScalabilityState->bShortFormatInUse = bShortFormat; pScalabilityState->ucNumVdbox = vdboxNum; pScalabilityState->pHwInterface = hwInterface; //virtual engine init with scalability MOS_ZeroMemory(&VEInitParms, sizeof(VEInitParms)); VEInitParms.bScalabilitySupported = true; VEInitParms.bFESeparateSubmit = pScalabilityState->bFESeparateSubmission; VEInitParms.ucMaxNumPipesInUse = (vdboxNum == 2) ? 2 : 3; VEInitParms.ucNumOfSdryCmdBufSets = CODECHAL_SCALABILITY_DECODE_SECONDARY_CMDBUFSET_NUM; VEInitParms.ucMaxNumOfSdryCmdBufInOneFrame = (pScalabilityState->bFESeparateSubmission) ? VEInitParms.ucMaxNumPipesInUse : (VEInitParms.ucMaxNumPipesInUse + 1); CODECHAL_DECODE_CHK_STATUS_RETURN(osInterface->pfnVirtualEngineInterfaceInitialize(osInterface, &VEInitParms)); pScalabilityState->pVEInterface = pVEInterface = osInterface->pVEInterf; if (pVEInterface->pfnVEGetHintParams) { CODECHAL_DECODE_CHK_STATUS_RETURN(pVEInterface->pfnVEGetHintParams(pVEInterface, true, &pScalabilityState->pScalHintParms)); } if (pVEInterface->pfnVEGetHintParams) { CODECHAL_DECODE_CHK_STATUS_RETURN(pVEInterface->pfnVEGetHintParams(pVEInterface, false, &pScalabilityState->pSingleHintParms)); } #if (_DEBUG || _RELEASE_INTERNAL) MOS_ZeroMemory(&UserFeatureData, sizeof(UserFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_HCP_DECODE_ALWAYS_FRAME_SPLIT_ID, &UserFeatureData, osInterface->pOsContext); pScalabilityState->bAlwaysFrameSplit = UserFeatureData.u32Data ? true : false; #endif pScalabilityState->bIsEvenSplit = true; if (pDecoder->GetStandard() == CODECHAL_HEVC) { pScalabilityState->pfnGetHcpBufferSize = GetHevcBufferSize; pScalabilityState->pfnIsHcpBufferReallocNeeded = IsHevcBufferReallocNeeded; } else if (pDecoder->GetStandard() == CODECHAL_VP9) { pScalabilityState->pfnGetHcpBufferSize = GetVp9BufferSize; pScalabilityState->pfnIsHcpBufferReallocNeeded = IsVp9BufferReallocNeeded; } else { eStatus = MOS_STATUS_INVALID_PARAMETER; CODECHAL_DECODE_ASSERTMESSAGE("unsupported decode format for scalability mode."); return eStatus; } pScalabilityState->bToggleCABACStreamOutBuffer = false; pScalabilityState->sliceStateCLs = CODECHAL_SCALABILITY_SLICE_STATE_CACHELINES_PER_SLICE; pScalabilityState->pfnDecidePipeNum = CodecHalDecodeScalability_DecidePipeNum; pScalabilityState->pfnMapPipeNumToLRCACount = CodechalDecodeScalability_MapPipeNumToLRCACount; #if (_DEBUG || _RELEASE_INTERNAL) pScalabilityState->pfnDebugOvrdDecidePipeNum = CodechalDecodeScalability_DebugOvrdDecidePipeNum; #endif CODECHAL_DECODE_CHK_STATUS_RETURN(CodecHalDecodeScalability_AllocateResources_FixedSizes(pScalabilityState)); return eStatus; } bool CodecHalDecodeScalabilityIsToSubmitCmdBuffer( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState) { if (pScalabilityState == nullptr) { return false; } else { return (CodecHalDecodeScalabilityIsFinalBEPhase(pScalabilityState) || (pScalabilityState->HcpDecPhase == CODECHAL_HCP_DECODE_PHASE_FE && pScalabilityState->bFESeparateSubmission)); } } void CodecHalDecodeScalability_DecPhaseToSubmissionType( PCODECHAL_DECODE_SCALABILITY_STATE pScalabilityState, PMOS_COMMAND_BUFFER pCmdBuffer) { switch (pScalabilityState->HcpDecPhase) { case CodechalDecode::CodechalHcpDecodePhaseLegacyS2L: //Note: no break here, S2L and FE commands put in one secondary command buffer. case CODECHAL_HCP_DECODE_PHASE_FE: pCmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_ALONE; break; case CODECHAL_HCP_DECODE_PHASE_BE0: pCmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_MASTER; break; case CODECHAL_HCP_DECODE_PHASE_BE1: pCmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_SLAVE | SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE; break; case CODECHAL_HCP_DECODE_PHASE_RESERVED: default: pCmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_ALONE; break; } }