/* * Copyright (c) 2013-2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ //! //! \file codechal_memdecomp.cpp //! \brief This module sets up a kernel for media memory decompression. #include "codechal_memdecomp.h" #include "codeckrnheader.h" #include "mos_os_cp_interface_specific.h" //! //! \class MediaObjectCopyCurbe //! \brief Media object memory decompress copy knernel curbe. //! Note: Cube data DW0-6 must be defined at the begining of the class. //! class MediaObjectCopyCurbe { public: // DW 0 union { struct { uint32_t srcSurface0Index; }; struct { uint32_t value; }; } m_dw0; // DW 1 union { struct { uint32_t srcSurface1Index; }; struct { uint32_t value; }; } m_dw1; // DW 2 union { struct { uint32_t srcSurface2Index; }; struct { uint32_t value; }; } m_dw2; // DW 3 union { struct { uint32_t dstSurface0Index; }; struct { uint32_t value; }; } m_dw3; // DW 4 union { struct { uint32_t dstSurface1Index; }; struct { uint32_t value; }; } m_dw4; // DW 5 union { struct { uint32_t dstSurface2Index; }; struct { uint32_t value; }; } m_dw5; // DW 6 union { struct { uint32_t surfaceWidth; }; struct { uint32_t value; }; } m_dw6; //! //! \brief Constructor //! MediaObjectCopyCurbe(); //! //! \brief Destructor //! ~MediaObjectCopyCurbe(){}; static const size_t m_byteSize = 28; //!< Byte size of cube data DW0-6. } ; MediaObjectCopyCurbe::MediaObjectCopyCurbe() { MOS_ZeroMemory(this, m_byteSize); } MediaMemDecompState::~MediaMemDecompState() { MHW_FUNCTION_ENTER; if (m_cpInterface) { if (m_osInterface) { m_osInterface->pfnDeleteMhwCpInterface(m_cpInterface); m_cpInterface = nullptr; } else { MHW_ASSERTMESSAGE("Failed to destroy cpInterface."); } } if (m_cmdBufIdGlobal) { if (m_osInterface) { m_osInterface->pfnUnlockResource(m_osInterface, &m_resCmdBufIdGlobal); m_osInterface->pfnFreeResource(m_osInterface, &m_resCmdBufIdGlobal); m_cmdBufIdGlobal = nullptr; } else { MHW_ASSERTMESSAGE("Failed to destroy command buffer global Id."); } } if (m_miInterface) { MOS_Delete(m_miInterface); m_miInterface = nullptr; } if (m_renderInterface) { MOS_Delete(m_renderInterface); m_renderInterface = nullptr; } if (m_osInterface) { m_osInterface->pfnDestroy(m_osInterface, false); MOS_FreeMemory(m_osInterface); m_osInterface = nullptr; } } MediaMemDecompState::MediaMemDecompState() : MediaMemDecompBaseState(), m_currCmdBufId(0) { MHW_FUNCTION_ENTER; m_stateHeapSettings.m_ishBehavior = HeapManager::Behavior::clientControlled; m_stateHeapSettings.m_dshBehavior = HeapManager::Behavior::destructiveExtend; m_stateHeapSettings.m_keepDshLocked = true; m_stateHeapSettings.dwDshIncrement = 2 * MOS_PAGE_SIZE; MOS_ZeroMemory(&m_renderContext, sizeof(m_renderContext)); MOS_ZeroMemory(&m_krnUniId, sizeof(m_krnUniId)); MOS_ZeroMemory(&m_kernelSize, sizeof(m_kernelSize)); MOS_ZeroMemory(&m_resCmdBufIdGlobal, sizeof(m_resCmdBufIdGlobal)); for (uint8_t idx = decompKernelStatePa; idx < decompKernelStateMax; idx++) { m_kernelBinary[idx] = nullptr; m_kernelStates[idx] = MHW_KERNEL_STATE(); } m_krnUniId[decompKernelStatePa] = IDR_CODEC_ALLPACopy; m_krnUniId[decompKernelStatePl2] = IDR_CODEC_ALLPL2Copy; } MOS_STATUS MediaMemDecompState::GetKernelBinaryAndSize( uint8_t *kernelBase, uint32_t krnUniId, uint8_t **kernelBinary, uint32_t *kernelSize) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_CHK_NULL_RETURN(kernelBase); MHW_CHK_NULL_RETURN(kernelBinary); MHW_CHK_NULL_RETURN(kernelSize); if (krnUniId >= IDR_CODEC_TOTAL_NUM_KERNELS) { eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } uint32_t *kernelOffsetTable = (uint32_t*)kernelBase; uint8_t *base = (uint8_t*)(kernelOffsetTable + IDR_CODEC_TOTAL_NUM_KERNELS + 1); *kernelSize = kernelOffsetTable[krnUniId + 1] - kernelOffsetTable[krnUniId]; *kernelBinary = ((*kernelSize) > 0) ? (base + kernelOffsetTable[krnUniId]) : nullptr; return eStatus; } MOS_STATUS MediaMemDecompState::InitKernelState( uint32_t kernelStateIdx) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_FUNCTION_ENTER; if (kernelStateIdx >= decompKernelStateMax) { eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } uint8_t **kernelBase = &m_kernelBinary[kernelStateIdx]; uint32_t *kernelSize = &m_kernelSize[kernelStateIdx]; MHW_CHK_STATUS_RETURN(GetKernelBinaryAndSize( m_kernelBase, m_krnUniId[kernelStateIdx], kernelBase, kernelSize)); m_stateHeapSettings.dwIshSize += MOS_ALIGN_CEIL(*kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT)); m_stateHeapSettings.dwDshSize += MHW_CACHELINE_SIZE* m_numMemDecompSyncTags; m_stateHeapSettings.dwNumSyncTags += m_numMemDecompSyncTags; return eStatus; } MOS_STATUS MediaMemDecompState::MemoryDecompress( PMOS_RESOURCE targetResource) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_FUNCTION_ENTER; MHW_CHK_NULL_RETURN(targetResource); MOS_SURFACE targetSurface; MOS_ZeroMemory(&targetSurface, sizeof(MOS_SURFACE)); targetSurface.Format = Format_Invalid; targetSurface.OsResource = *targetResource; MHW_CHK_STATUS_RETURN(GetResourceInfo(&targetSurface)); //Set context before proceeding auto gpuContext = m_osInterface->CurrentGpuContextOrdinal; m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext); m_osInterface->pfnResetOsStates(m_osInterface); DecompKernelStateIdx kernelStateIdx; bool useUVPlane; if ((targetSurface.Format == Format_YUY2) || (targetSurface.Format == Format_UYVY)) { kernelStateIdx = decompKernelStatePa; useUVPlane = false; } else if ((targetSurface.Format == Format_NV12) || (targetSurface.Format == Format_P010)) { kernelStateIdx = decompKernelStatePl2; useUVPlane = true; } else { eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } auto kernelState = &m_kernelStates[kernelStateIdx]; kernelState->m_currTrackerId = m_currCmdBufId; // preprocess in cp first m_osInterface->osCpInterface->PrepareResources((void **)&targetResource, 1, nullptr, 0); if (kernelStateIdx == decompKernelStatePl2) { if (m_osInterface->osCpInterface->IsSMEnabled()) { uint32_t *kernelBase = nullptr; uint32_t kernelSize = 0; MHW_CHK_STATUS_RETURN(m_osInterface->osCpInterface->GetTK( &kernelBase, &kernelSize, nullptr)); if (nullptr == kernelBase || 0 == kernelSize) { MHW_ASSERT("Could not get TK kernels for MMC!"); eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } kernelState->KernelParams.pBinary = (uint8_t *)kernelBase; } else { kernelState->KernelParams.pBinary = m_kernelBinary[kernelStateIdx]; } MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData( kernelState->KernelParams.pBinary, 0, kernelState->KernelParams.iSize)); } MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( m_stateHeapInterface, kernelState->KernelParams.iBTCount)); uint32_t dshSize = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData() + MOS_ALIGN_CEIL(kernelState->KernelParams.iCurbeLength, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment()); eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap( m_stateHeapInterface, MHW_DSH_TYPE, kernelState, dshSize, false, true); if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE) { MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap( m_stateHeapInterface, MHW_DSH_TYPE, kernelState, dshSize, false, true)); } else if (eStatus != MOS_STATUS_SUCCESS) { return eStatus; } MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap( m_stateHeapInterface, MHW_SSH_TYPE, kernelState, kernelState->dwSshSize, false, false)); MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; MOS_ZeroMemory(&idParams, sizeof(idParams)); idParams.pKernelState = kernelState; MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( m_stateHeapInterface, 1, &idParams)); MHW_CHK_STATUS_RETURN(SetMediaObjectCopyCurbe(kernelStateIdx)); MOS_COMMAND_BUFFER cmdBuffer; // Send HW commands (including SSH) MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); MHW_GENERIC_PROLOG_PARAMS genericPrologParams; MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); genericPrologParams.pOsInterface = m_osInterface; genericPrologParams.pvMiInterface = m_miInterface; genericPrologParams.bMmcEnabled = true; MHW_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams)); MHW_CHK_NULL_RETURN(m_renderInterface); if (m_renderInterface->GetL3CacheConfig()->bL3CachingEnabled) { MHW_CHK_STATUS_RETURN(m_renderInterface->SetL3Cache(&cmdBuffer)); } MHW_CHK_STATUS_RETURN(m_renderInterface->EnablePreemption(&cmdBuffer)); MHW_CHK_STATUS_RETURN(m_renderInterface->AddPipelineSelectCmd(&cmdBuffer, false)); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( m_stateHeapInterface, kernelState)); MHW_RCS_SURFACE_PARAMS surfaceParams; MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams)); surfaceParams.dwNumPlanes = useUVPlane ? 2 : 1; // Y+UV : Y surfaceParams.psSurface = &targetSurface; // Y Plane surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceSrcY; if (surfaceParams.psSurface->Format == Format_YUY2) { surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL; } else if (surfaceParams.psSurface->Format == Format_UYVY) { surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY; } else if (surfaceParams.psSurface->Format == Format_P010) { surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UNORM; } else //NV12 { surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R8_UNORM; } uint32_t widthInBytes = GetSurfaceWidthInBytes(surfaceParams.psSurface); surfaceParams.dwWidthToUse[MHW_Y_PLANE] = MHW_WIDTH_IN_DW(widthInBytes); // UV Plane if (useUVPlane) { surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceSrcU; if (surfaceParams.psSurface->Format == Format_P010) { surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY; } else //NV12 { surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UINT; } surfaceParams.dwBaseAddrOffset[MHW_U_PLANE] = targetSurface.dwPitch * MOS_ALIGN_FLOOR(targetSurface.UPlaneOffset.iYOffset, MOS_YTILE_H_ALIGNMENT); surfaceParams.dwWidthToUse[MHW_U_PLANE] = MHW_WIDTH_IN_DW(widthInBytes); surfaceParams.dwHeightToUse[MHW_U_PLANE] = surfaceParams.psSurface->dwHeight / 2; surfaceParams.dwYOffset[MHW_U_PLANE] = (targetSurface.UPlaneOffset.iYOffset % MOS_YTILE_H_ALIGNMENT); } m_osInterface->pfnGetMemoryCompressionMode( m_osInterface, &targetSurface.OsResource, (PMOS_MEMCOMP_STATE)&surfaceParams.psSurface->CompressionMode); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState( m_stateHeapInterface, kernelState, &cmdBuffer, 1, &surfaceParams)); //In place decompression: src shares the same surface with dst. surfaceParams.bIsWritable = true; surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceDstY; if (useUVPlane) { surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceDstU; } MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState( m_stateHeapInterface, kernelState, &cmdBuffer, 1, &surfaceParams)); MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams; MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams)); MOS_RESOURCE *dsh = nullptr, *ish = nullptr; MHW_CHK_NULL_RETURN(dsh = kernelState->m_dshRegion.GetResource()); MHW_CHK_NULL_RETURN(ish = kernelState->m_ishRegion.GetResource()); stateBaseAddrParams.presDynamicState = dsh; stateBaseAddrParams.dwDynamicStateSize = kernelState->m_dshRegion.GetHeapSize(); stateBaseAddrParams.presInstructionBuffer = ish; stateBaseAddrParams.dwInstructionBufferSize = kernelState->m_ishRegion.GetHeapSize(); MHW_CHK_STATUS_RETURN(m_renderInterface->AddStateBaseAddrCmd( &cmdBuffer, &stateBaseAddrParams)); MHW_VFE_PARAMS vfeParams = {}; vfeParams.pKernelState = kernelState; auto waTable = m_osInterface->pfnGetWaTable(m_osInterface); vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL; MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd( &cmdBuffer, &vfeParams)); MHW_CURBE_LOAD_PARAMS curbeLoadParams; MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams)); curbeLoadParams.pKernelState = kernelState; MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaCurbeLoadCmd( &cmdBuffer, &curbeLoadParams)); MHW_ID_LOAD_PARAMS idLoadParams; MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams)); idLoadParams.pKernelState = kernelState; idLoadParams.dwNumKernelsLoaded = 1; MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaIDLoadCmd( &cmdBuffer, &idLoadParams)); uint32_t resolutionX; if (kernelStateIdx == decompKernelStatePa) // Format_YUY2, Format_UYVY { resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32); } else // DecompKernelStatePl2: Format_NV12, Format_P010 { if (targetSurface.Format == Format_P010) // Format_P010 { resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32); } else // Format_NV12 { resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth, 32); } } uint32_t resolutionY = MOS_ROUNDUP_DIVIDE(targetSurface.dwHeight, 16); MHW_WALKER_PARAMS walkerParams; MOS_ZeroMemory(&walkerParams, sizeof(walkerParams)); walkerParams.WalkerMode = MHW_WALKER_MODE_SINGLE; walkerParams.BlockResolution.x = resolutionX; walkerParams.BlockResolution.y = resolutionY; walkerParams.GlobalResolution.x = resolutionX; walkerParams.GlobalResolution.y = resolutionY; walkerParams.GlobalOutlerLoopStride.x = resolutionX; walkerParams.GlobalOutlerLoopStride.y = 0; walkerParams.GlobalInnerLoopUnit.x = 0; walkerParams.GlobalInnerLoopUnit.y = resolutionY; walkerParams.dwLocalLoopExecCount = 0xFFFF; //MAX VALUE walkerParams.dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE // No dependency walkerParams.ScoreboardMask = 0; // Raster scan walking pattern walkerParams.LocalOutLoopStride.x = 0; walkerParams.LocalOutLoopStride.y = 1; walkerParams.LocalInnerLoopUnit.x = 1; walkerParams.LocalInnerLoopUnit.y = 0; walkerParams.LocalEnd.x = resolutionX - 1; walkerParams.LocalEnd.y = 0; MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd( &cmdBuffer, &walkerParams)); // Check if destination surface needs to be synchronized, before command buffer submission MOS_SYNC_PARAMS syncParams; MOS_ZeroMemory(&syncParams, sizeof(syncParams)); syncParams.uiSemaphoreCount = 1; syncParams.GpuContext = m_renderContext; syncParams.presSyncResource = &targetSurface.OsResource; syncParams.bReadOnly = false; syncParams.bDisableDecodeSyncLock = m_disableDecodeSyncLock; syncParams.bDisableLockForTranscode = m_disableLockForTranscode; MHW_CHK_STATUS_RETURN(m_osInterface->pfnPerformOverlaySync(m_osInterface, &syncParams)); MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); // Update the resource tag (s/w tag) for On-Demand Sync m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams); // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag if (m_osInterface->bTagResourceSync) { MHW_PIPE_CONTROL_PARAMS pipeControlParams; MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams)); pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE; MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl( &cmdBuffer, nullptr, &pipeControlParams)); MHW_CHK_STATUS_RETURN(WriteSyncTagToResourceCmd(&cmdBuffer)); } MHW_MI_STORE_DATA_PARAMS miStoreDataParams; MOS_ZeroMemory(&miStoreDataParams, sizeof(miStoreDataParams)); miStoreDataParams.pOsResource = &m_resCmdBufIdGlobal; miStoreDataParams.dwValue = m_currCmdBufId; MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( &cmdBuffer, &miStoreDataParams)); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( m_stateHeapInterface, kernelState)); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( m_stateHeapInterface)); // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue // This code is temporal and it will be moved to batch buffer end in short PLATFORM platform; m_osInterface->pfnGetPlatform(m_osInterface, &platform); if (GFX_IS_GEN_9_OR_LATER(platform)) { MHW_PIPE_CONTROL_PARAMS pipeControlParams; MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams)); pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE; pipeControlParams.bGenericMediaStateClear = true; pipeControlParams.bIndirectStatePointersDisable = true; pipeControlParams.bDisableCSStall = false; MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(&cmdBuffer, NULL, &pipeControlParams)); if (MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaSendDummyVFEafterPipelineSelect)) { MHW_VFE_PARAMS vfeStateParams = {}; vfeStateParams.dwNumberofURBEntries = 1; MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(&cmdBuffer, &vfeStateParams)); } } MHW_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd( &cmdBuffer, nullptr)); m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); MHW_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer( m_osInterface, &cmdBuffer, m_renderContextUsesNullHw)); // Update the compression mode MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionMode( m_osInterface, targetResource, MOS_MEMCOMP_DISABLED)); MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionHint( m_osInterface, targetResource, false)); //Update CmdBufId... m_currCmdBufId++; if (m_currCmdBufId == MemoryBlock::m_invalidTrackerId) { m_currCmdBufId++; } // Send the signal to indicate decode completion, in case On-Demand Sync is not present MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceSignal(m_osInterface, &syncParams)); if (gpuContext != m_renderContext) { m_osInterface->pfnSetGpuContext(m_osInterface, gpuContext); } return eStatus; } MOS_STATUS MediaMemDecompState::GetResourceInfo( PMOS_SURFACE surface) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_CHK_NULL_RETURN(m_osInterface); MHW_CHK_NULL_RETURN(surface); MOS_SURFACE details; MOS_ZeroMemory(&details, sizeof(details)); details.Format = Format_Invalid; MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo( m_osInterface, &surface->OsResource, &details)); surface->Format = details.Format; surface->dwWidth = details.dwWidth; surface->dwHeight = details.dwHeight; surface->dwPitch = details.dwPitch; surface->dwDepth = details.dwDepth; surface->bArraySpacing = details.bArraySpacing; surface->TileType = details.TileType; surface->TileModeGMM = details.TileModeGMM; surface->bGMMTileEnabled = details.bGMMTileEnabled; surface->dwOffset = details.RenderOffset.YUV.Y.BaseOffset; surface->UPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.U.BaseOffset; surface->UPlaneOffset.iXOffset = details.RenderOffset.YUV.U.XOffset; surface->UPlaneOffset.iYOffset = (surface->UPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch + details.RenderOffset.YUV.U.YOffset; surface->VPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.V.BaseOffset; surface->VPlaneOffset.iXOffset = details.RenderOffset.YUV.V.XOffset; surface->VPlaneOffset.iYOffset = (surface->VPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch + details.RenderOffset.YUV.V.YOffset; surface->bCompressible = details.bCompressible; surface->bIsCompressed = details.bIsCompressed; surface->CompressionMode = details.CompressionMode; return eStatus; } uint32_t MediaMemDecompState::GetSurfaceWidthInBytes(PMOS_SURFACE surface) { uint32_t widthInBytes; switch (surface->Format) { case Format_IMC1: case Format_IMC3: case Format_IMC2: case Format_IMC4: case Format_NV12: case Format_YV12: case Format_I420: case Format_IYUV: case Format_400P: case Format_411P: case Format_422H: case Format_422V: case Format_444P: case Format_RGBP: case Format_BGRP: widthInBytes = surface->dwWidth; break; case Format_YUY2: case Format_YUYV: case Format_YVYU: case Format_UYVY: case Format_VYUY: case Format_P010: widthInBytes = surface->dwWidth << 1; break; case Format_A8R8G8B8: case Format_X8R8G8B8: case Format_A8B8G8R8: widthInBytes = surface->dwWidth << 2; break; default: widthInBytes = surface->dwWidth; break; } return widthInBytes; } MOS_STATUS MediaMemDecompState::WriteSyncTagToResourceCmd( PMOS_COMMAND_BUFFER cmdBuffer) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_FUNCTION_ENTER; PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr; MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource( m_osInterface, globalGpuContextSyncTagBuffer)); MHW_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer); uint32_t offset = m_osInterface->pfnGetGpuStatusTagOffset( m_osInterface, m_osInterface->CurrentGpuContextOrdinal); uint32_t value = m_osInterface->pfnGetGpuStatusTag( m_osInterface, m_osInterface->CurrentGpuContextOrdinal); MHW_MI_STORE_DATA_PARAMS params; params.pOsResource = globalGpuContextSyncTagBuffer; params.dwResourceOffset = offset; params.dwValue = value; MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, ¶ms)); // Increment GPU Context Tag for next use m_osInterface->pfnIncrementGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal); return eStatus; } MOS_STATUS MediaMemDecompState::SetMediaObjectCopyCurbe( DecompKernelStateIdx kernelStateIdx) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_FUNCTION_ENTER; if ((kernelStateIdx >= decompKernelStateMax)) { eStatus = MOS_STATUS_INVALID_PARAMETER; return eStatus; } MediaObjectCopyCurbe cmd; cmd.m_dw0.srcSurface0Index = copySurfaceSrcY; cmd.m_dw3.dstSurface0Index = copySurfaceDstY; if (kernelStateIdx == decompKernelStatePl2) { cmd.m_dw1.srcSurface1Index = copySurfaceSrcU; cmd.m_dw4.dstSurface1Index = copySurfaceDstU; } MHW_CHK_STATUS_RETURN(m_kernelStates[kernelStateIdx].m_dshRegion.AddData( &cmd, m_kernelStates[kernelStateIdx].dwCurbeOffset, sizeof(cmd))); return eStatus; } MOS_STATUS MediaMemDecompState::SetKernelStateParams() { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MHW_FUNCTION_ENTER; MHW_CHK_NULL_RETURN(m_renderInterface->GetHwCaps()); for (uint32_t krnIdx = 0; krnIdx < decompKernelStateMax; krnIdx++) { auto kernelState = &m_kernelStates[krnIdx]; kernelState->KernelParams.pBinary = m_kernelBinary[krnIdx]; kernelState->KernelParams.iSize = m_kernelSize[krnIdx]; kernelState->KernelParams.iBTCount = copySurfaceNum; kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads; kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL( MediaObjectCopyCurbe::m_byteSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment()); kernelState->KernelParams.iBlockWidth = 32; kernelState->KernelParams.iBlockHeight = 16; kernelState->KernelParams.iIdCount = 1; kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( m_stateHeapInterface, kernelState->KernelParams.iBTCount, &kernelState->dwSshSize, &kernelState->dwBindingTableSize)); kernelState->dwKernelBinaryOffset = 0; eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap( m_stateHeapInterface, MHW_ISH_TYPE, kernelState, kernelState->KernelParams.iSize, true, false); if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE) { MHW_ASSERTMESSAGE("CodecHal does not handle this case"); return eStatus; } else if (eStatus != MOS_STATUS_SUCCESS) { return eStatus; } MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData( kernelState->KernelParams.pBinary, 0, kernelState->KernelParams.iSize)); } return eStatus; } MOS_STATUS MediaMemDecompState::Initialize( PMOS_INTERFACE osInterface, MhwCpInterface *cpInterface, MhwMiInterface *miInterface, MhwRenderInterface *renderInterface) { MOS_STATUS eStatus = MOS_STATUS_SUCCESS; MediaUserSettingSharedPtr userSettingPtr = nullptr; MOS_USER_FEATURE_VALUE_DATA userFeatureData = {}; MHW_FUNCTION_ENTER; MHW_CHK_NULL_RETURN(osInterface); MHW_CHK_NULL_RETURN(cpInterface); MHW_CHK_NULL_RETURN(miInterface); MHW_CHK_NULL_RETURN(renderInterface); m_osInterface = osInterface; m_cpInterface = cpInterface; m_miInterface = miInterface; m_renderInterface = renderInterface; userSettingPtr = osInterface->pfnGetUserSettingInstance(osInterface); for (uint8_t kernelIdx = decompKernelStatePa; kernelIdx < decompKernelStateMax; kernelIdx++) { MHW_CHK_STATUS_RETURN(InitKernelState(kernelIdx)); } if (m_stateHeapSettings.dwIshSize > 0 && m_stateHeapSettings.dwDshSize > 0 && m_stateHeapSettings.dwNumSyncTags > 0) { MHW_CHK_STATUS_RETURN(m_renderInterface->AllocateHeaps( m_stateHeapSettings)); } m_stateHeapInterface = m_renderInterface->m_stateHeapInterface; MHW_CHK_NULL_RETURN(m_stateHeapInterface); if (m_osInterface->pfnIsGpuContextValid(m_osInterface, MOS_GPU_CONTEXT_RENDER) == MOS_STATUS_SUCCESS) { m_renderContext = MOS_GPU_CONTEXT_RENDER; } else { MOS_GPUCTX_CREATOPTIONS createOption; MHW_CHK_STATUS_RETURN(m_osInterface->pfnCreateGpuContext( m_osInterface, MOS_GPU_CONTEXT_RENDER, MOS_GPU_NODE_3D, &createOption)); m_renderContext = MOS_GPU_CONTEXT_RENDER; } MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable; nullHWAccelerationEnable.Value = 0; m_disableDecodeSyncLock = false; #if (_DEBUG || _RELEASE_INTERNAL) ReadUserSettingForDebug( userSettingPtr, nullHWAccelerationEnable.Value, __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE, MediaUserSetting::Group::Device); MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); MOS_UserFeature_ReadValue_ID( nullptr, __MEDIA_USER_FEATURE_VALUE_DECODE_LOCK_DISABLE_ID, &userFeatureData, m_osInterface->pOsContext); m_disableDecodeSyncLock = userFeatureData.u32Data ? true : false; #endif // _DEBUG || _RELEASE_INTERNAL m_disableLockForTranscode = MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaDisableLockForTranscodePerf); MHW_CHK_STATUS_RETURN(SetKernelStateParams()); m_renderContextUsesNullHw = ((m_renderContext == MOS_GPU_CONTEXT_RENDER) ? nullHWAccelerationEnable.CtxRender : nullHWAccelerationEnable.CtxRender2) || nullHWAccelerationEnable.Mmc; MOS_ALLOC_GFXRES_PARAMS allocParams; MOS_ZeroMemory(&allocParams, sizeof(allocParams)); allocParams.Type = MOS_GFXRES_BUFFER; allocParams.TileType = MOS_TILE_LINEAR; allocParams.Format = Format_Buffer; allocParams.dwBytes = MHW_CACHELINE_SIZE; allocParams.pBufName = "CmdBufIdGlobal"; MHW_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( m_osInterface, &allocParams, &m_resCmdBufIdGlobal)); m_currCmdBufId = MemoryBlock::m_invalidTrackerId + 1; MOS_LOCK_PARAMS lockParams; MOS_ZeroMemory(&lockParams, sizeof(lockParams)); lockParams.WriteOnly = 1; m_cmdBufIdGlobal = (uint32_t *)m_osInterface->pfnLockResource( m_osInterface, &m_resCmdBufIdGlobal, &lockParams); MHW_CHK_NULL_RETURN(m_cmdBufIdGlobal); MOS_ZeroMemory(m_cmdBufIdGlobal, allocParams.dwBytes); MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetCmdBufStatusPtr( m_stateHeapInterface, m_cmdBufIdGlobal)); return eStatus; }