/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ //! //! \file cm_queue.h //! \brief Contains CmQueue declarations. //! #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_ #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_ #include "cm_def.h" enum CM_FASTCOPY_OPTION { CM_FASTCOPY_OPTION_NONBLOCKING = 0x00, CM_FASTCOPY_OPTION_BLOCKING = 0x01, CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST = 0x02 }; enum CM_GPUCOPY_DIRECTION { CM_FASTCOPY_GPU2CPU = 0, CM_FASTCOPY_CPU2GPU = 1, CM_FASTCOPY_GPU2GPU = 2, CM_FASTCOPY_CPU2CPU = 3 }; namespace CMRT_UMD { class CmTask; class CmEvent; class CmThreadSpace; class CmThreadGroupSpace; class CmVebox; class CmSurface2D; class CmBuffer; //! //! \brief CmQueue class for task queue management. //! \details The CmQueue object represents a CM task queue. Each task //! represented by a CmTask object has one or more kernels that //! can be run concurrently. Each kernel can run in multiple //! threads concurrently. The CmQueue is an in-order queue. //! Tasks get executed according to the order they get enqueued. //! The next task doesn't start execution until the current task //! finishes. Hence an output of one task can be assumed to be //! available for use as input for any subsequent task in the //! queue.Each Enqueue generates a CmEvent. Task with thread //! space should call Enqueue with a valid thread space pointer, //! while task with group specification should call //! EnqueueWithGroup with a valid pTGS group space pointer. //! class CmQueue { public: //! //! \brief Enqueue a task for execution with per-task thread space. //! \details This function enqueues a task represented by the CmTask object. //! The kernels in the CmTask object may be run concurrently. //! Tasks get executed according to the order they get enqueued. //! This is a non-blocking call. It returns immediately without waiting //! for GPU to start or finish execution. A CmEvent is generated each time //! a task is enqueued. The CmEvent can be used to check the status of task. //! The generated event needs to be managed and released by user. //! Since event is not useful in some cases, runtime provides the capability //! to avoid generating event. //! If thread space is valid, the dependency defined by thread space will be honored. //! \param [in] task //! pointer to task to submit //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \param [in] threadSpace //! pointer to thread space which can define the thread dependency within the task. //! This is a per task thread space. If this task has multiple kernels, each kernel //! will have the thread space of same dimension, same dependency etc. If it is nullptr, //! there is no thread dependency and the maximum thread space width will be asssumed //! to calculate the coordinates for each thread. For each kernel , the per kernel thread space //! that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t Enqueue(CmTask *task, CmEvent *&event, const CmThreadSpace *threadSpace = nullptr) = 0; //! //! \brief Destroy the CmEvent generated by Enqueue. //! \details Destroy the event object previously generated by Enqueue. //! The CmEvent object can be destroyed even before the corresponding task flushed or finished. //! If this happens, there is no way the app can get the task status. //! \param [in] event //! reference to pointer to event //! \retval CM_SUCCESS if event destroyed successfully //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t DestroyEvent(CmEvent *&event) = 0; //! //! \brief Enqueue the task with thread group space. //! \details //! \param [in] task //! pointer to task to submit //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \param [in] threadGroupSpace //! pointer to thread group space which defines the dimensions of the task. //! threadGroupSpace can not be NULL. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_INVALID_ARG_VALUE if input task is not valid //! \retval CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation. //! \retval CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid. //! \retval CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments //! CM_RT_API virtual int32_t EnqueueWithGroup(CmTask *task, CmEvent *&event, const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0; //! //! \brief Enqueues the kernel to copy from system(CPU) memory to video(GPU) memory. //! \details This function enqueues a task, which contains a pre-defined kernel to copy from host //! system memory to video surface. //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. //! The CmEvent can be used to check the status. //! The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well. //! \param [in] surface //! surface as copy destination, surface's width in bytes must be 16-Byte aligned //! \param [in] sysMem //! host memory as copy source, must be 16-Byte aligned //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface, const unsigned char *sysMem, CmEvent* &event) = 0; //! //! \brief Enqueues the kernel to copy from video(GPU) memory to system(CPU) memory. //! \details This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory. //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. //! The CmEvent can be used to check the status or other data regarding the task execution. //! The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well. //! \param [in] surface //! surface as copy source, surface's width in bytes must be 16-Byte aligned //! \param [in] sysMem //! host memory as copy destination, must be 16-Byte aligned //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface, unsigned char *sysMem, CmEvent *&event) = 0; //! //! \brief Enqueues the kernel to initialize a 2D surface. //! \details This function enqueues a task, which contains a pre-defined kernel to initialize a surface 2d //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. //! The CmEvent can be used to check the status or other data regarding the task execution. //! \param [in] surface //! surface to initialize //! \param [in] initValue //! value to fill the surface //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueInitSurface2D(CmSurface2D *surface, const uint32_t initValue, CmEvent *&event) = 0; //! //! \brief Enqueue the kernel to copy memory between surfaces. //! \details This function enqueues a task, which contains a pre-defined kernel to copy memory between surfaces. //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. //! The CmEvent can be used to check the status or other data regarding the task execution. //! The input and output surfaces should have the same width, height and format. //! \param [in] inputSurface //! surface as copy source //! \param [in] outputSurface //! surface as copy destination //! \param [in] option //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_SURFACES if the input and output surfaces have different //! width, height and format. //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface, CmSurface2D *inputSurface, uint32_t option, CmEvent *&event) = 0; //! //! \brief Enqueues the kernel to copy memory between host memories. //! \details This function enqueues a task, which contains a pre-defined kernel to copy memory from src to dest memory. //! Both dstSysMem and srcSysMem need to be 16-Byte aligned. The maximum size is determined by sytem's memory //! and it should be less than CM_MAX_1D_SURF_WIDTH bytes which is 1G bytes now. If the copy size is less than //! 1K bytes, the event will not be generated and it is a blocking call. //! For the size larger than 1K bytes, this is a non-blocking call. //! A CmEvent is generated to check the status or other data regarding the task execution. //! To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function //! \param [in] dstSysMem //! destination memory, must be 16-Byte aligned //! \param [in] srcSysMem //! source memory, must be 16-Byte aligned //! \param [in] size //! size of memory to copy in bytes //! \param [in] option //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_SYSMEM if dstSysMem or srcSysMem is not 16-Byte aligned. //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem, unsigned char *srcSysMem, uint32_t size, uint32_t option, CmEvent *&event) = 0; //! //! \brief Enqueue the kernel to copy memory from system memory to video memory with width and height stride. //! \details This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface. //! Depending on user "opiton", this is a non-blocking or blocking call. //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any //! alignment restriction. //! \param [in] surface //! surface as copy destination //! \param [in] sysMem //! system memory as copy source must be 16-Byte aligned //! \param [in] widthStride //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned //! \param [in] heightStride //! height stride of memory stored in host memory, in bytes. //! \param [in] option //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface, const unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *& event) = 0; //! //! \brief Enqueue the kernel to copy memory from video memory to system memory with width and height stride. //! \details This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory. //! Depending on user "opiton", this is a non-blocking or blocking call. //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any //! alignment restriction. //! \param [in] surface //! surface as copy source //! \param [in] sysMem //! system memory as copy destination, must be 16-Byte aligned //! \param [in] widthStride //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned //! \param [in] heightStride //! height stride of memory stored in host memory, in bytes, //! \param [in] option //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued //! \retval CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surface's width in bytes. //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface, unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *& event) = 0; //! //! \brief Enqueue a task for execution with hints. //! \details This API is designed to saturate the EUs when running a large dependency kernel. //! At least two kernels must exist in the task. The ideal case is at least one large dependency kernel //! running with smaller kernels. The idea is to get the smaller kernels for free during the time it already //! takes to execute the large dependency kernel. Each task can have up to CAP_KERNEL_COUNT_PER_TASK kernels. //! The 0th bit of the hints indicates to use media object or media walker. Currently, only media object is valid. //! The next bits indicate whether the next kernel is in the same or different kernel group. //! For example, if the 1th bit is set then the second kernel is in a different kernel group from the first kernel, //! if it is not set it is in the same kernel group. The kernels are interleaved between different kernel groups //! and run concurrently. Within a kernel group, the kernels are dispatched in order. The kernel groups are dispatched //! to separate sub-slices. The assumption is made that the kernel groups are comparable in kernel execution time. //! There can be no dependency between different kernels; all kernels in the task should be independent of one another. //! Additionally, pKernel->AssociateThreadSpace(CmThreadSpace*& pTS) must be called for each kernel. //! A CmEvent is generated to check the status or other data regarding the task execution. //! To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function. //! \param [in] task //! pointer to task to submit //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \param [in] hints //! Hints about work load from host to driver. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueWithHints(CmTask *task, CmEvent *&event, uint32_t hints = 0) = 0; //! //! \brief Enqueue a vebox task to vebox engine. //! \details This call submits a VEBOX task to VEBOX engine for execution. //! Before this function is called, user need call CmDevice::CreateVebox() to create a CmVebox object, //! and call the APIs in CmVebox class to set up VEBOX state and surfaces. //! \param [in] vebox //! Pointer to a CmVebox object. //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory //! \retval CM_INVALID_ARG_VALUE if input vebox is not valid //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event) = 0; //! //! \brief Enqueue a task for execution with per-task thread space in a fast path. //! \details This function enqueues a task represented by the CmTask object. //! The kernels in the CmTask object may be run concurrently. //! Tasks get executed according to the order they get enqueued. //! This is a non-blocking call. It returns immediately without waiting //! for GPU to start or finish execution. A CmEvent is generated each time //! a task is enqueued. The CmEvent can be used to check the status of task. //! The generated event needs to be managed and released by user. //! Since event is not useful in some cases, runtime provides the capability //! to avoid generating event. //! If thread space is valid, the dependency defined by thread space will be honored. //! \param [in] task //! pointer to task to submit //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \param [in] threadSpace //! pointer to thread space which can define the thread dependency within the task. //! This is a per task thread space. If this task has multiple kernels, each kernel //! will have the thread space of same dimension, same dependency etc. If it is nullptr, //! there is no thread dependency and the maximum thread space width will be asssumed //! to calculate the coordinates for each thread. For each kernel , the per kernel thread space //! that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t EnqueueFast(CmTask *task, CmEvent *&event, const CmThreadSpace *threadSpace = nullptr) = 0; //! //! \brief Destroy the CmEvent generated by EnqueueFast. //! \details Destroy the event object previously generated by EnqueueFast. //! The CmEvent object can be destroyed even before the corresponding task flushed or finished. //! If this happens, there is no way the app can get the task status. //! \param [in] event //! reference to pointer to event //! \retval CM_SUCCESS if event destroyed successfully //! \retval CM_FAILURE otherwise //! CM_RT_API virtual int32_t DestroyEventFast(CmEvent *&event) = 0; //! //! \brief Enqueue the task with thread group space in a fast path. //! \details //! \param [in]task //! pointer to task to submit //! \param [in,out] event //! reference to pointer of event generated. If it is set as CM_NO_EVENT, //! its value returned by runtime is NULL. //! \param [in] threadGroupSpace //! pointer to thread group space which defines the dimensions of the task. //! pThreadGroupSpace can not be NULL. //! \retval CM_SUCCESS if the task is successfully enqueued. //! \retval CM_INVALID_ARG_VALUE if input task is not valid //! \retval CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation. //! \retval CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid. //! \retval CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments //! CM_RT_API virtual int32_t EnqueueWithGroupFast(CmTask *task, CmEvent *&event, const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0; }; };//namespace #endif // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_