1 /*
2 * Copyright (c) 2018-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_command_buffer.cpp
24 //! \brief Contains Class CmCommandBuffer definitions
25 //!
26
27 #include "cm_command_buffer.h"
28 #include "cm_ish.h"
29 #include "cm_ssh.h"
30 #include "cm_media_state.h"
31 #include "cm_thread_space_rt.h"
32 #include "cm_mem.h"
33 #include "cm_kernel_ex.h"
34 #include "cm_group_space.h"
35 #if IGFX_GEN11_SUPPORTED
36 #include "mhw_render_g11_X.h"
37 #endif
38 #if IGFX_GEN12_SUPPORTED
39 #include "mhw_render_g12_X.h"
40 #include "mhw_mi_g12_X.h"
41 #include "cm_hal_g12.h"
42 #endif
43 #include "mos_solo_generic.h"
44 #include "mos_os_cp_interface_specific.h"
45
CmCommandBuffer(CM_HAL_STATE * cmhal)46 CmCommandBuffer::CmCommandBuffer(CM_HAL_STATE *cmhal):
47 m_cmhal(cmhal),
48 m_osInterface(nullptr),
49 m_miInterface(nullptr),
50 m_hwRender(nullptr),
51 m_ssh(nullptr),
52 m_origRemain(0)
53 {
54 MOS_ZeroMemory(&m_cmdBuf, sizeof(m_cmdBuf));
55 MOS_ZeroMemory(m_masks, sizeof(m_masks));
56 }
57
~CmCommandBuffer()58 CmCommandBuffer::~CmCommandBuffer()
59 {
60 if (m_ssh)
61 {
62 MOS_Delete(m_ssh);
63 }
64 }
65
Initialize()66 MOS_STATUS CmCommandBuffer::Initialize()
67 {
68 if (m_cmhal == nullptr)
69 {
70 return MOS_STATUS_NULL_POINTER;
71 }
72 m_osInterface = m_cmhal->osInterface;
73 m_miInterface = m_cmhal->renderHal->pMhwMiInterface;
74 m_hwRender = m_cmhal->renderHal->pMhwRenderInterface;
75 if (m_osInterface == nullptr)
76 {
77 return MOS_STATUS_NULL_POINTER;
78 }
79
80 CM_CHK_MOSSTATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_cmdBuf, 0));
81
82 m_cmdBuf.Attributes.bIsMdfLoad = true;
83 m_origRemain = m_cmdBuf.iRemaining;
84
85 return MOS_STATUS_SUCCESS;
86 }
87
GetSSH()88 CmSSH* CmCommandBuffer::GetSSH()
89 {
90 if (m_ssh != nullptr)
91 {
92 return m_ssh;
93 }
94 m_ssh = MOS_New(CmSSH, m_cmhal, &m_cmdBuf);
95 return m_ssh;
96 }
97
AddFlushCacheAndSyncTask(bool isRead,bool rtCache,MOS_RESOURCE * syncBuffer)98 MOS_STATUS CmCommandBuffer::AddFlushCacheAndSyncTask(bool isRead,
99 bool rtCache,
100 MOS_RESOURCE *syncBuffer)
101 {
102 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
103 MOS_ZeroMemory(&pipeCtlParams, sizeof(pipeCtlParams));
104 pipeCtlParams.presDest = syncBuffer;
105 pipeCtlParams.bFlushRenderTargetCache = rtCache;
106 pipeCtlParams.dwFlushMode = isRead ? MHW_FLUSH_READ_CACHE : MHW_FLUSH_WRITE_CACHE;
107 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
108 return m_miInterface->AddPipeControl(&m_cmdBuf, nullptr, &pipeCtlParams);
109 }
110
AddReadTimeStamp(MOS_RESOURCE * resource,uint32_t offset,bool isRead)111 MOS_STATUS CmCommandBuffer::AddReadTimeStamp(MOS_RESOURCE *resource, uint32_t offset, bool isRead)
112 {
113 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
114 MOS_ZeroMemory(&pipeCtlParams, sizeof(pipeCtlParams));
115 pipeCtlParams.bFlushRenderTargetCache = true;
116 pipeCtlParams.presDest = resource;
117 pipeCtlParams.dwResourceOffset = offset;
118 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
119 pipeCtlParams.dwFlushMode = isRead ? MHW_FLUSH_READ_CACHE : MHW_FLUSH_WRITE_CACHE;
120 return m_miInterface->AddPipeControl(&m_cmdBuf, nullptr, &pipeCtlParams);
121 }
122
AddL3CacheConfig(L3ConfigRegisterValues * l3Values)123 MOS_STATUS CmCommandBuffer::AddL3CacheConfig(L3ConfigRegisterValues *l3Values)
124 {
125 if (m_cmhal->platform.eRenderCoreFamily <= IGFX_GEN10_CORE) //gen10-
126 {
127 MHW_RENDER_ENGINE_L3_CACHE_SETTINGS l3CacheSettting = {};
128 if (l3Values->config_register3)
129 {
130 l3CacheSettting.dwCntlReg = l3Values->config_register3;
131 }
132 else
133 {
134 l3CacheSettting.dwCntlReg = 0x60000060;
135 }
136 CM_CHK_MOSSTATUS_RETURN(m_hwRender->EnableL3Caching(&l3CacheSettting));
137 return m_hwRender->SetL3Cache(&m_cmdBuf);
138 }
139 #if IGFX_GEN11_SUPPORTED
140 else if (m_cmhal->platform.eRenderCoreFamily == IGFX_GEN11_CORE)
141 {
142 MHW_RENDER_ENGINE_L3_CACHE_SETTINGS_G11 l3CacheSettting = {};
143 l3CacheSettting.dwTcCntlReg = l3Values->config_register1;
144 l3CacheSettting.dwCntlReg = (l3Values->config_register0 == 0)?0xA0000420:l3Values->config_register0;
145 CM_CHK_MOSSTATUS_RETURN(m_hwRender->EnableL3Caching(&l3CacheSettting));
146 return m_hwRender->SetL3Cache(&m_cmdBuf);
147 }
148 #endif
149 #if IGFX_GEN12_SUPPORTED
150 else //gen12
151 {
152 MHW_RENDER_ENGINE_L3_CACHE_SETTINGS_G12 l3CacheSettting = {};
153
154 l3CacheSettting.dwAllocReg = (l3Values->config_register0 == 0)?
155 m_cmhal->cmHalInterface->m_l3Plane[0].config_register0
156 :l3Values->config_register0;
157 l3CacheSettting.dwTcCntlReg = (l3Values->config_register1 == 0)?
158 m_cmhal->cmHalInterface->m_l3Plane[0].config_register1
159 :l3Values->config_register1;
160
161 CM_CHK_MOSSTATUS_RETURN(m_hwRender->EnableL3Caching(&l3CacheSettting));
162 return m_hwRender->SetL3Cache(&m_cmdBuf);
163 }
164 #endif
165 return MOS_STATUS_UNKNOWN;
166 }
167
AddPipelineSelect(bool gpgpu)168 MOS_STATUS CmCommandBuffer::AddPipelineSelect(bool gpgpu)
169 {
170 return m_hwRender->AddPipelineSelectCmd(&m_cmdBuf, gpgpu);
171 }
172
AddStateBaseAddress(CmISH * ish,CmMediaState * mediaState)173 MOS_STATUS CmCommandBuffer::AddStateBaseAddress(CmISH *ish, CmMediaState *mediaState)
174 {
175 MHW_STATE_BASE_ADDR_PARAMS stateBaseAddressParams;
176 MOS_ZeroMemory(&stateBaseAddressParams, sizeof(stateBaseAddressParams));
177
178 MOS_RESOURCE *gshResource = mediaState->GetHeapResource();
179 uint32_t gshSize = mediaState->GetHeapSize();
180 MOS_RESOURCE *ishResource = ish->GetResource();
181 uint32_t ishSize = ish->GetSize();
182
183 stateBaseAddressParams.presGeneralState = gshResource;
184 stateBaseAddressParams.dwGeneralStateSize = gshSize;
185 stateBaseAddressParams.presDynamicState = gshResource;
186 stateBaseAddressParams.dwDynamicStateSize = gshSize;
187 stateBaseAddressParams.bDynamicStateRenderTarget = false;
188 stateBaseAddressParams.presIndirectObjectBuffer = gshResource;
189 stateBaseAddressParams.dwIndirectObjectBufferSize = gshSize;
190 stateBaseAddressParams.presInstructionBuffer = ishResource;
191 stateBaseAddressParams.dwInstructionBufferSize = ishSize;
192
193 uint32_t heapMocs = m_osInterface->pfnCachePolicyGetMemoryObject(MOS_CM_RESOURCE_USAGE_SurfaceState,
194 m_osInterface->pfnGetGmmClientContext(m_osInterface)).DwordValue;
195 stateBaseAddressParams.mocs4DynamicState = heapMocs;
196 stateBaseAddressParams.mocs4GeneralState = heapMocs;
197 stateBaseAddressParams.mocs4InstructionCache = heapMocs;
198 stateBaseAddressParams.mocs4SurfaceState = heapMocs;
199 stateBaseAddressParams.mocs4IndirectObjectBuffer = heapMocs;
200 stateBaseAddressParams.mocs4StatelessDataport = heapMocs;
201
202 return m_hwRender->AddStateBaseAddrCmd(&m_cmdBuf, &stateBaseAddressParams);
203 }
204
AddMediaVFE(CmMediaState * mediaState,bool fusedEuDispatch,CMRT_UMD::CmThreadSpaceRT ** threadSpaces,uint32_t count)205 MOS_STATUS CmCommandBuffer::AddMediaVFE(CmMediaState *mediaState, bool fusedEuDispatch, CMRT_UMD::CmThreadSpaceRT **threadSpaces, uint32_t count)
206 {
207 MHW_VFE_PARAMS vfeParams = {};
208 #if IGFX_GEN12_SUPPORTED
209 MHW_VFE_PARAMS_G12 vfeParamsG12 = {};
210 #endif
211 MHW_VFE_PARAMS *param = nullptr;
212 if (m_cmhal->platform.eRenderCoreFamily <= IGFX_GEN11_CORE)
213 {
214 param = &vfeParams;
215 }
216 #if IGFX_GEN12_SUPPORTED
217 else
218 {
219 param = &vfeParamsG12;
220 vfeParamsG12.bFusedEuDispatch = fusedEuDispatch;
221 }
222 #endif
223 MHW_RENDER_ENGINE_CAPS *hwCaps = m_hwRender->GetHwCaps();
224
225 param->dwDebugCounterControl = MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING;
226 param->dwNumberofURBEntries = 32;
227 param->dwMaximumNumberofThreads = hwCaps->dwMaxThreads;
228 param->dwCURBEAllocationSize = MOS_ROUNDUP_SHIFT(mediaState->GetCurbeSize(), 5) << 5;
229 param->dwURBEntryAllocationSize = 1;
230 param->dwPerThreadScratchSpace = 0;
231 param->dwScratchSpaceBasePointer = mediaState->GetScratchSpaceOffset();
232
233 uint32_t scratchSizePerThread = mediaState->GetScratchSizePerThread();
234 if (scratchSizePerThread > 0)
235 {
236 scratchSizePerThread = scratchSizePerThread >> 9;
237 int remain = scratchSizePerThread % 2;
238 scratchSizePerThread = scratchSizePerThread / 2;
239 int sizeParam = 0;
240 while ((scratchSizePerThread / 2) && !remain)
241 {
242 sizeParam++;
243 remain = scratchSizePerThread % 2;
244 scratchSizePerThread = scratchSizePerThread / 2;
245 }
246 param->dwPerThreadScratchSpace = sizeParam;
247 }
248
249 if (threadSpaces != nullptr && m_cmhal->cmHalInterface->IsScoreboardParamNeeded())
250 {
251 bool globalSpace = (count == 0);
252 uint32_t spaceCount = globalSpace ? 1 : count;
253
254 uint8_t map[256] = {0}; // x*4+y as index, order in the global dependency vector as value (starting from 1)
255 uint8_t index = 1;
256 for (uint32_t i = 0; i < spaceCount; i ++)
257 {
258 if (threadSpaces[i] == nullptr)
259 {
260 continue;
261 }
262 CM_HAL_DEPENDENCY *dependency;
263 threadSpaces[i]->GetDependency(dependency);
264 for (uint32_t j = 0; j < dependency->count; j ++)
265 {
266 uint8_t depVec = (uint8_t)(dependency->deltaX[j]) * 16 + (uint8_t)(dependency->deltaY[j]);
267 if (map[depVec] == 0)
268 {
269 param->Scoreboard.ScoreboardDelta[index - 1].x = (uint8_t)(dependency->deltaX[j]);
270 param->Scoreboard.ScoreboardDelta[index - 1].y = (uint8_t)(dependency->deltaY[j]);
271 map[depVec] = index ++;
272
273 }
274 m_masks[i] |= 1 << (map[depVec] - 1);
275 }
276 }
277
278 if (globalSpace)
279 {
280 CmSafeMemSet(m_masks, m_masks[0], sizeof(m_masks));
281 }
282
283 param->Scoreboard.ScoreboardEnable = 1;
284 param->Scoreboard.ScoreboardMask = (1 << (index-1)) - 1;
285 param->Scoreboard.ScoreboardType = (param->Scoreboard.ScoreboardMask != 0);
286 }
287 else
288 {
289 param->Scoreboard.ScoreboardEnable = 1;
290 }
291
292 return m_hwRender->AddMediaVfeCmd(&m_cmdBuf, param);
293
294 }
295
AddCurbeLoad(CmMediaState * mediaState)296 MOS_STATUS CmCommandBuffer::AddCurbeLoad(CmMediaState *mediaState)
297 {
298 MHW_CURBE_LOAD_PARAMS curbeLoadParams;
299 MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
300
301 uint32_t curbeSize = mediaState->GetCurbeSize();
302 if (curbeSize > 0)
303 {
304 curbeLoadParams.pKernelState = nullptr;
305 curbeLoadParams.bOldInterface = false;
306 curbeLoadParams.dwCURBETotalDataLength = curbeSize;
307 curbeLoadParams.dwCURBEDataStartAddress = mediaState->GetCurbeOffset();
308
309 return m_hwRender->AddMediaCurbeLoadCmd(&m_cmdBuf, &curbeLoadParams);
310 }
311 return MOS_STATUS_SUCCESS;
312 }
313
AddMediaIDLoad(CmMediaState * mediaState)314 MOS_STATUS CmCommandBuffer::AddMediaIDLoad(CmMediaState *mediaState)
315 {
316 MHW_ID_LOAD_PARAMS idLoadParams;
317 MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
318
319 idLoadParams.dwInterfaceDescriptorStartOffset = mediaState->GetMediaIDOffset();
320 idLoadParams.dwInterfaceDescriptorLength = mediaState->GetMediaIDSize();
321
322 return m_hwRender->AddMediaIDLoadCmd(&m_cmdBuf, &idLoadParams);
323 }
324
AddSyncBetweenKernels()325 MOS_STATUS CmCommandBuffer::AddSyncBetweenKernels()
326 {
327 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
328 MOS_ZeroMemory(&pipeCtlParams, sizeof(pipeCtlParams));
329 pipeCtlParams.bInvalidateTextureCache = true;
330 pipeCtlParams.bFlushRenderTargetCache = true;
331 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
332 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
333 return m_miInterface->AddPipeControl(&m_cmdBuf, nullptr, &pipeCtlParams);
334 }
335
AddMediaObjectWalker(CMRT_UMD::CmThreadSpaceRT * threadSpace,uint32_t mediaID)336 MOS_STATUS CmCommandBuffer::AddMediaObjectWalker(CMRT_UMD::CmThreadSpaceRT *threadSpace, uint32_t mediaID)
337 {
338 MHW_WALKER_PARAMS mediaWalkerParams;
339 MOS_ZeroMemory(&mediaWalkerParams, sizeof(mediaWalkerParams));
340
341 mediaWalkerParams.CmWalkerEnable = true;
342 mediaWalkerParams.InterfaceDescriptorOffset = mediaID;
343
344 mediaWalkerParams.InlineDataLength = 0;
345 mediaWalkerParams.pInlineData = nullptr;
346
347 uint32_t colorCountM1 = 0;
348 CM_MW_GROUP_SELECT groupSelect = CM_MW_GROUP_NONE;
349
350 CM_WALKING_PATTERN walkPattern = CM_WALK_DEFAULT;
351 CM_DEPENDENCY_PATTERN dependencyPattern = CM_NONE_DEPENDENCY;
352 uint32_t threadSpaceWidth = 1;
353 uint32_t threadSpaceHeight = 1;
354
355 if (threadSpace != nullptr)
356 {
357 threadSpace->GetColorCountMinusOne(colorCountM1);
358 threadSpace->GetMediaWalkerGroupSelect(groupSelect);
359 threadSpace->GetWalkingPattern(walkPattern);
360 threadSpace->GetDependencyPatternType(dependencyPattern);
361 threadSpace->GetThreadSpaceSize(threadSpaceWidth, threadSpaceHeight);
362 }
363
364 mediaWalkerParams.ColorCountMinusOne = colorCountM1;
365 mediaWalkerParams.GroupIdLoopSelect = (uint32_t)groupSelect;
366
367 uint32_t threadCount = threadSpaceWidth * threadSpaceHeight;
368 switch (dependencyPattern)
369 {
370 case CM_NONE_DEPENDENCY:
371 break;
372 case CM_HORIZONTAL_WAVE:
373 walkPattern = CM_WALK_HORIZONTAL;
374 break;
375 case CM_VERTICAL_WAVE:
376 walkPattern = CM_WALK_VERTICAL;
377 break;
378 case CM_WAVEFRONT:
379 walkPattern = CM_WALK_WAVEFRONT;
380 break;
381 case CM_WAVEFRONT26:
382 walkPattern = CM_WALK_WAVEFRONT26;
383 break;
384 case CM_WAVEFRONT26X:
385 if (threadSpaceWidth > 1)
386 {
387 walkPattern = CM_WALK_WAVEFRONT26X;
388 }
389 else
390 {
391 walkPattern = CM_WALK_DEFAULT;
392 }
393 break;
394 case CM_WAVEFRONT26ZIG:
395 if (threadSpaceWidth > 2)
396 {
397 walkPattern = CM_WALK_WAVEFRONT26ZIG;
398 }
399 else
400 {
401 walkPattern = CM_WALK_DEFAULT;
402 }
403 break;
404 default:
405 CM_ASSERTMESSAGE("Error: Invalid walking pattern.");
406 walkPattern = CM_WALK_DEFAULT;
407 break;
408 }
409
410 mediaWalkerParams.BlockResolution.x = threadSpaceWidth;
411 mediaWalkerParams.BlockResolution.y = threadSpaceHeight;
412
413 mediaWalkerParams.LocalStart.x = 0;
414 mediaWalkerParams.LocalStart.y = 0;
415 mediaWalkerParams.LocalEnd.x = 0;
416 mediaWalkerParams.LocalEnd.y = 0;
417
418 mediaWalkerParams.dwGlobalLoopExecCount = 1;
419 mediaWalkerParams.MidLoopUnitX = 0;
420 mediaWalkerParams.MidLoopUnitY = 0;
421 mediaWalkerParams.MiddleLoopExtraSteps = 0;
422
423 uint32_t adjHeight = ((threadSpaceHeight + 1) >> 1) << 1;
424 uint32_t adjWidth = ((threadSpaceWidth + 1) >> 1) << 1;
425
426 uint32_t maxThreadWidth = m_cmhal->cmHalInterface->GetMediaWalkerMaxThreadWidth();
427
428 switch (walkPattern)
429 {
430 case CM_WALK_DEFAULT:
431 case CM_WALK_HORIZONTAL:
432 if (threadSpaceWidth == threadCount && threadSpaceHeight == 1 && maxThreadWidth)
433 {
434 mediaWalkerParams.BlockResolution.x = MOS_MIN(threadCount, maxThreadWidth);
435 mediaWalkerParams.BlockResolution.y = 1 + threadCount / maxThreadWidth;
436 }
437 mediaWalkerParams.dwLocalLoopExecCount = mediaWalkerParams.BlockResolution.y - 1;
438
439 mediaWalkerParams.LocalOutLoopStride.x = 0;
440 mediaWalkerParams.LocalOutLoopStride.y = 1;
441 mediaWalkerParams.LocalInnerLoopUnit.x = 1;
442 mediaWalkerParams.LocalInnerLoopUnit.y = 0;
443
444 mediaWalkerParams.LocalEnd.x = mediaWalkerParams.BlockResolution.x - 1;
445
446 break;
447
448 case CM_WALK_WAVEFRONT:
449 mediaWalkerParams.dwLocalLoopExecCount = threadSpaceWidth + (threadSpaceHeight - 1) * 1 - 1;
450
451 mediaWalkerParams.LocalOutLoopStride.x = 1;
452 mediaWalkerParams.LocalOutLoopStride.y = 0;
453 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
454 mediaWalkerParams.LocalInnerLoopUnit.y = 1;
455 break;
456
457 case CM_WALK_WAVEFRONT26:
458 mediaWalkerParams.dwLocalLoopExecCount = threadSpaceWidth + (threadSpaceHeight - 1) * 2 - 1;
459
460 mediaWalkerParams.LocalOutLoopStride.x = 1;
461 mediaWalkerParams.LocalOutLoopStride.y = 0;
462 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
463 mediaWalkerParams.LocalInnerLoopUnit.y = 1;
464 break;
465
466 case CM_WALK_WAVEFRONT26X:
467 case CM_WALK_WAVEFRONT26XALT:
468 mediaWalkerParams.dwLocalLoopExecCount = 0x7ff;
469 mediaWalkerParams.dwGlobalLoopExecCount = 0;
470
471 mediaWalkerParams.LocalOutLoopStride.x = 1;
472 mediaWalkerParams.LocalOutLoopStride.y = 0;
473 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
474 mediaWalkerParams.LocalInnerLoopUnit.y = 2;
475
476 mediaWalkerParams.MiddleLoopExtraSteps = 1;
477 mediaWalkerParams.MidLoopUnitX = 0;
478 mediaWalkerParams.MidLoopUnitY = 1;
479 break;
480
481 case CM_WALK_WAVEFRONT26ZIG:
482 mediaWalkerParams.dwLocalLoopExecCount = 1;
483 mediaWalkerParams.dwGlobalLoopExecCount = (adjHeight / 2 - 1) * 2 + (adjWidth / 2) - 1;
484
485 mediaWalkerParams.LocalOutLoopStride.x = 0;
486 mediaWalkerParams.LocalOutLoopStride.y = 1;
487 mediaWalkerParams.LocalInnerLoopUnit.x = 1;
488 mediaWalkerParams.LocalInnerLoopUnit.y = 0;
489
490 mediaWalkerParams.BlockResolution.x = 2;
491 mediaWalkerParams.BlockResolution.y = 2;
492
493 mediaWalkerParams.LocalEnd.x = mediaWalkerParams.BlockResolution.x - 1;
494 break;
495
496 case CM_WALK_VERTICAL:
497 mediaWalkerParams.dwLocalLoopExecCount = mediaWalkerParams.BlockResolution.x - 1;
498
499 mediaWalkerParams.LocalOutLoopStride.x = 1;
500 mediaWalkerParams.LocalOutLoopStride.y = 0;
501 mediaWalkerParams.LocalInnerLoopUnit.x = 0;
502 mediaWalkerParams.LocalInnerLoopUnit.y = 1;
503
504 mediaWalkerParams.LocalEnd.y = mediaWalkerParams.BlockResolution.y - 1;
505
506 break;
507
508 case CM_WALK_WAVEFRONT45D:
509 mediaWalkerParams.dwLocalLoopExecCount = 0x7ff;
510 mediaWalkerParams.dwGlobalLoopExecCount = 0x7ff;
511
512 mediaWalkerParams.LocalStart.x = threadSpaceWidth;
513 mediaWalkerParams.LocalOutLoopStride.x = 1;
514 mediaWalkerParams.LocalOutLoopStride.y = 0;
515 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
516 mediaWalkerParams.LocalInnerLoopUnit.y = 1;
517 break;
518
519 case CM_WALK_WAVEFRONT45XD_2:
520 mediaWalkerParams.dwLocalLoopExecCount = 0x7ff;
521 mediaWalkerParams.dwGlobalLoopExecCount = 0x7ff;
522
523 // Local
524 mediaWalkerParams.LocalStart.x = threadSpaceWidth;
525 mediaWalkerParams.LocalOutLoopStride.x = 1;
526 mediaWalkerParams.LocalOutLoopStride.y = 0;
527 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
528 mediaWalkerParams.LocalInnerLoopUnit.y = 2;
529
530 // Mid
531 mediaWalkerParams.MiddleLoopExtraSteps = 1;
532 mediaWalkerParams.MidLoopUnitX = 0;
533 mediaWalkerParams.MidLoopUnitY = 1;
534
535 break;
536
537 case CM_WALK_WAVEFRONT26D:
538 mediaWalkerParams.dwLocalLoopExecCount = 0x7ff;
539 mediaWalkerParams.dwGlobalLoopExecCount = 0x7ff;
540
541 mediaWalkerParams.LocalStart.x = threadSpaceWidth;
542 mediaWalkerParams.LocalOutLoopStride.x = 1;
543 mediaWalkerParams.LocalOutLoopStride.y = 0;
544 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
545 mediaWalkerParams.LocalInnerLoopUnit.y = 1;
546 break;
547
548 case CM_WALK_WAVEFRONT26XD:
549 mediaWalkerParams.dwLocalLoopExecCount = 0x7ff;
550 mediaWalkerParams.dwGlobalLoopExecCount = 0x7ff;
551
552 // Local
553 mediaWalkerParams.LocalStart.x = threadSpaceWidth;
554 mediaWalkerParams.LocalOutLoopStride.x = 1;
555 mediaWalkerParams.LocalOutLoopStride.y = 0;
556 mediaWalkerParams.LocalInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
557 mediaWalkerParams.LocalInnerLoopUnit.y = 2;
558
559 // Mid
560 mediaWalkerParams.MiddleLoopExtraSteps = 1;
561 mediaWalkerParams.MidLoopUnitX = 0;
562 mediaWalkerParams.MidLoopUnitY = 1;
563 break;
564
565 default:
566 mediaWalkerParams.dwLocalLoopExecCount = MOS_MIN(threadCount, 0x3FF);
567
568 mediaWalkerParams.LocalOutLoopStride.x = 0;
569 mediaWalkerParams.LocalOutLoopStride.y = 1;
570 mediaWalkerParams.LocalInnerLoopUnit.x = 1;
571 mediaWalkerParams.LocalInnerLoopUnit.y = 0;
572 break;
573 }
574
575 //Global loop parameters: execution count, resolution and strides
576 //Since no global loop, global resolution equals block resolution.
577 mediaWalkerParams.GlobalStart.x = 0;
578 mediaWalkerParams.GlobalStart.y = 0;
579 mediaWalkerParams.GlobalOutlerLoopStride.y = 0;
580
581 if (walkPattern == CM_WALK_WAVEFRONT26ZIG)
582 {
583 mediaWalkerParams.GlobalResolution.x = threadSpaceWidth;
584 mediaWalkerParams.GlobalResolution.y = threadSpaceHeight;
585 mediaWalkerParams.GlobalOutlerLoopStride.x = 2;
586 mediaWalkerParams.GlobalInnerLoopUnit.x = 0xFFFC;
587 mediaWalkerParams.GlobalInnerLoopUnit.y = 2;
588 }
589 else
590 {
591 mediaWalkerParams.GlobalResolution.x = mediaWalkerParams.BlockResolution.x;
592 mediaWalkerParams.GlobalResolution.y = mediaWalkerParams.BlockResolution.y;
593 mediaWalkerParams.GlobalOutlerLoopStride.x = mediaWalkerParams.GlobalResolution.x;
594 mediaWalkerParams.GlobalInnerLoopUnit.x = 0;
595 mediaWalkerParams.GlobalInnerLoopUnit.y = mediaWalkerParams.GlobalResolution.y;
596 }
597
598 mediaWalkerParams.UseScoreboard = 1;
599 mediaWalkerParams.ScoreboardMask = m_masks[mediaID];
600
601 return m_hwRender->AddMediaObjectWalkerCmd(&m_cmdBuf, &mediaWalkerParams);
602 }
603
AddDummyVFE()604 MOS_STATUS CmCommandBuffer::AddDummyVFE()
605 {
606 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
607 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
608
609 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
610 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
611 pipeControlParams.bGenericMediaStateClear = true;
612 pipeControlParams.bIndirectStatePointersDisable = true;
613 pipeControlParams.bDisableCSStall = false;
614 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddPipeControl(&m_cmdBuf, nullptr, &pipeControlParams));
615
616 if (MEDIA_IS_WA(m_cmhal->renderHal->pWaTable, WaSendDummyVFEafterPipelineSelect))
617 {
618 MHW_VFE_PARAMS vfeStateParams;
619
620 MOS_ZeroMemory(&vfeStateParams, sizeof(vfeStateParams));
621 vfeStateParams.dwNumberofURBEntries = 1;
622 CM_CHK_MOSSTATUS_RETURN(m_hwRender->AddMediaVfeCmd(&m_cmdBuf, &vfeStateParams));
623 }
624
625 return MOS_STATUS_SUCCESS;
626 }
627
AddBatchBufferEnd()628 MOS_STATUS CmCommandBuffer::AddBatchBufferEnd()
629 {
630 return m_miInterface->AddMiBatchBufferEnd(&m_cmdBuf, nullptr);
631 }
632
AddMMCProlog()633 MOS_STATUS CmCommandBuffer::AddMMCProlog()
634 {
635 #if IGFX_GEN12_SUPPORTED
636 uint64_t auxTableBaseAddr = 0;
637
638 auxTableBaseAddr = m_cmhal->osInterface->pfnGetAuxTableBaseAddr(m_cmhal->osInterface);
639
640 if (auxTableBaseAddr)
641 {
642 MHW_MI_LOAD_REGISTER_IMM_PARAMS lriParams;
643 MOS_ZeroMemory(&lriParams, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
644
645 lriParams.dwRegister = MhwMiInterfaceG12::m_mmioRcsAuxTableBaseLow;
646 lriParams.dwData = (auxTableBaseAddr & 0xffffffff);
647 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&m_cmdBuf, &lriParams));
648
649 lriParams.dwRegister = MhwMiInterfaceG12::m_mmioRcsAuxTableBaseHigh;
650 lriParams.dwData = ((auxTableBaseAddr >> 32) & 0xffffffff);
651 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&m_cmdBuf, &lriParams));
652 }
653 #endif
654 return MOS_STATUS_SUCCESS;
655 }
656
AddProtectedProlog()657 MOS_STATUS CmCommandBuffer::AddProtectedProlog()
658 {
659 return m_miInterface->AddProtectedProlog(&m_cmdBuf);
660 }
661
ReturnUnusedBuffer()662 void CmCommandBuffer::ReturnUnusedBuffer()
663 {
664 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_cmdBuf, 0);
665 }
666
ReturnWholeBuffer()667 void CmCommandBuffer::ReturnWholeBuffer()
668 {
669 int tmp = m_origRemain - m_cmdBuf.iRemaining;
670 m_cmdBuf.iRemaining = m_origRemain;
671 m_cmdBuf.iOffset -= tmp;
672 m_cmdBuf.pCmdPtr = m_cmdBuf.pCmdBase + m_cmdBuf.iOffset/sizeof(uint32_t);
673
674 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_cmdBuf, 0);
675 }
676
Submit()677 MOS_STATUS CmCommandBuffer::Submit()
678 {
679 return m_osInterface->pfnSubmitCommandBuffer(m_osInterface,
680 &m_cmdBuf,
681 m_cmhal->nullHwRenderCm);
682 }
683
AddPreemptionConfig(bool isGpgpu)684 MOS_STATUS CmCommandBuffer::AddPreemptionConfig(bool isGpgpu)
685 {
686 bool csrEnable = !m_cmhal->midThreadPreemptionDisabled;
687 if (MEDIA_IS_SKU(m_cmhal->skuTable, FtrPerCtxtPreemptionGranularityControl))
688 {
689 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
690 MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
691
692 loadRegImm.dwRegister = MHW_RENDER_ENGINE_PREEMPTION_CONTROL_OFFSET;
693
694 // Same reg offset and value for gpgpu pipe and media pipe
695 if (isGpgpu)
696 {
697 if (MEDIA_IS_SKU(m_cmhal->skuTable, FtrGpGpuMidThreadLevelPreempt))
698 {
699 if (csrEnable)
700 {
701 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_THREAD_PREEMPT_VALUE;
702 }
703 else
704 {
705 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
706 }
707 }
708 else if (MEDIA_IS_SKU(m_cmhal->skuTable, FtrGpGpuThreadGroupLevelPreempt))
709 {
710 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
711 }
712 else if (MEDIA_IS_SKU(m_cmhal->skuTable, FtrGpGpuMidBatchPreempt))
713 {
714 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
715 }
716 else
717 {
718 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
719 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
720 }
721 }
722 else
723 {
724 if ( MEDIA_IS_SKU(m_cmhal->skuTable, FtrMediaMidThreadLevelPreempt))
725 {
726 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_THREAD_PREEMPT_VALUE;
727 }
728 else if ( MEDIA_IS_SKU(m_cmhal->skuTable, FtrMediaThreadGroupLevelPreempt) )
729 {
730 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
731 }
732 else if ( MEDIA_IS_SKU(m_cmhal->skuTable, FtrMediaMidBatchPreempt))
733 {
734 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
735 }
736 else
737 {
738 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
739 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
740 }
741 }
742 m_cmdBuf.Attributes.bMediaPreemptionEnabled = m_hwRender->IsPreemptionEnabled();
743 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&m_cmdBuf, &loadRegImm));
744 }
745
746 return MOS_STATUS_SUCCESS;
747 }
748
AddSipState(uint32_t sipKernelOffset)749 MOS_STATUS CmCommandBuffer::AddSipState(uint32_t sipKernelOffset)
750 {
751 if (m_cmhal->midThreadPreemptionDisabled)
752 {
753 return MOS_STATUS_SUCCESS;
754 }
755
756 // Send CS_STALL pipe control
757 //Insert a pipe control as synchronization
758 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
759 MOS_ZeroMemory(&pipeCtlParams, sizeof(MHW_PIPE_CONTROL_PARAMS));
760 pipeCtlParams.presDest = &m_cmhal->renderTimeStampResource.osResource;
761 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
762 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
763 pipeCtlParams.bDisableCSStall = 0;
764 pipeCtlParams.bFlushRenderTargetCache = true;
765 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddPipeControl(&m_cmdBuf, nullptr, &pipeCtlParams));
766
767 MHW_SIP_STATE_PARAMS sipStateParams;
768 MOS_ZeroMemory(&sipStateParams, sizeof(MHW_SIP_STATE_PARAMS));
769 sipStateParams.bSipKernel = true;
770 sipStateParams.dwSipBase = sipKernelOffset;
771
772 CM_CHK_MOSSTATUS_RETURN(m_hwRender->AddSipStateCmd(&m_cmdBuf, &sipStateParams));
773
774 return MOS_STATUS_SUCCESS;
775 }
776
AddCsrBaseAddress(MOS_RESOURCE * resource)777 MOS_STATUS CmCommandBuffer::AddCsrBaseAddress(MOS_RESOURCE *resource)
778 {
779 if (m_cmhal->midThreadPreemptionDisabled)
780 {
781 return MOS_STATUS_SUCCESS;
782 }
783
784 // Send csr base addr command
785 CM_CHK_MOSSTATUS_RETURN(m_hwRender->AddGpgpuCsrBaseAddrCmd(&m_cmdBuf, resource));
786
787 return MOS_STATUS_SUCCESS;
788 }
789
AddConditionalBatchBufferEnd(CM_HAL_CONDITIONAL_BB_END_INFO * cbbInfo)790 MOS_STATUS CmCommandBuffer::AddConditionalBatchBufferEnd(CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfo)
791 {
792 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS cbbParams;
793 MOS_ZeroMemory(&cbbParams, sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
794
795 cbbParams.presSemaphoreBuffer = &(m_cmhal->bufferTable[cbbInfo->bufferTableIndex].osResource);
796 cbbParams.dwValue = cbbInfo->compareValue;
797 cbbParams.bDisableCompareMask = cbbInfo->disableCompareMask;
798 cbbParams.dwOffset = cbbInfo->offset;
799
800 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(&m_cmdBuf, &cbbParams));
801
802 return MOS_STATUS_SUCCESS;
803 }
804
AddPowerOption(CM_POWER_OPTION * option)805 MOS_STATUS CmCommandBuffer::AddPowerOption(CM_POWER_OPTION *option)
806 {
807 if (option == nullptr)
808 {
809 return MOS_STATUS_SUCCESS;
810 }
811 if (m_cmhal->cmHalInterface->IsOverridePowerOptionPerGpuContext())
812 {
813 return MOS_STATUS_SUCCESS;
814 }
815
816 MEDIA_FEATURE_TABLE *skuTable = m_cmhal->renderHal->pSkuTable;
817 MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
818
819 // set command buffer attributes
820 if (skuTable &&
821 (MEDIA_IS_SKU(skuTable, FtrSSEUPowerGating)|| MEDIA_IS_SKU(skuTable, FtrSSEUPowerGatingControlByUMD)))
822 {
823 if ((option->nSlice || option->nSubSlice || option->nEU)
824 && (gtSystemInfo != nullptr && gtSystemInfo->SliceCount && gtSystemInfo->SubSliceCount))
825 {
826 m_cmdBuf.Attributes.dwNumRequestedEUSlices = NonZeroMin(option->nSlice, gtSystemInfo->SliceCount);
827 m_cmdBuf.Attributes.dwNumRequestedSubSlices = NonZeroMin(option->nSubSlice,
828 (gtSystemInfo->SubSliceCount / gtSystemInfo->SliceCount));
829 m_cmdBuf.Attributes.dwNumRequestedEUs = NonZeroMin(option->nEU,
830 (gtSystemInfo->EUCount / gtSystemInfo->SubSliceCount));
831 m_cmdBuf.Attributes.bValidPowerGatingRequest = true;
832 if (m_cmhal->platform.eRenderCoreFamily == IGFX_GEN12_CORE)
833 {
834 m_cmdBuf.Attributes.bUmdSSEUEnable = true;
835 }
836 }
837 if (m_cmhal->requestSingleSlice)
838 {
839 m_cmdBuf.Attributes.dwNumRequestedEUSlices = 1;
840 }
841
842 if (GFX_IS_PRODUCT(m_cmhal->platform, IGFX_SKYLAKE) && m_osInterface->pfnSetSliceCount)
843 {
844 uint32_t sliceCount = m_cmdBuf.Attributes.dwNumRequestedEUSlices;
845 m_osInterface->pfnSetSliceCount(m_osInterface, &sliceCount);
846 }
847 }
848
849 // Add Load register command
850 if(m_cmdBuf.Attributes.bUmdSSEUEnable)
851 {
852 MHW_MI_LOAD_REGISTER_IMM_PARAMS MiLoadRegImmParams;
853 MHW_RENDER_PWR_CLK_STATE_PARAMS params;
854
855 MOS_ZeroMemory(¶ms, sizeof(params));
856 params.PowerClkStateEn = true;
857 params.SCountEn = true;
858 params.SSCountEn = true;
859 params.SliceCount = m_cmdBuf.Attributes.dwNumRequestedEUSlices;
860 params.SubSliceCount = m_cmdBuf.Attributes.dwNumRequestedSubSlices;
861 params.EUmax = m_cmdBuf.Attributes.dwNumRequestedEUs;
862 params.EUmin = m_cmdBuf.Attributes.dwNumRequestedEUs;
863
864 MOS_ZeroMemory(&MiLoadRegImmParams, sizeof(MiLoadRegImmParams));
865 MiLoadRegImmParams.dwRegister = MHW__PWR_CLK_STATE_REG;
866 MiLoadRegImmParams.dwData = params.Data;
867 CM_CHK_MOSSTATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(
868 &m_cmdBuf,
869 &MiLoadRegImmParams));
870 }
871
872 return MOS_STATUS_SUCCESS;
873 }
874
AddUmdProfilerStart()875 MOS_STATUS CmCommandBuffer::AddUmdProfilerStart()
876 {
877 if (m_cmhal->perfProfiler != nullptr)
878 {
879 CM_CHK_MOSSTATUS_RETURN(m_cmhal->perfProfiler->AddPerfCollectStartCmd((void *)m_cmhal, m_osInterface, m_miInterface, &m_cmdBuf));
880 }
881 return MOS_STATUS_SUCCESS;
882 }
883
AddUmdProfilerEnd()884 MOS_STATUS CmCommandBuffer::AddUmdProfilerEnd()
885 {
886 if (m_cmhal->perfProfiler != nullptr)
887 {
888 CM_CHK_MOSSTATUS_RETURN(m_cmhal->perfProfiler->AddPerfCollectEndCmd((void *)m_cmhal, m_osInterface, m_miInterface, &m_cmdBuf));
889 }
890 return MOS_STATUS_SUCCESS;
891 }
892
AddGpgpuWalker(CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,CmKernelEx * kernel,uint32_t mediaID)893 MOS_STATUS CmCommandBuffer::AddGpgpuWalker(CMRT_UMD::CmThreadGroupSpace *threadGroupSpace,
894 CmKernelEx *kernel,
895 uint32_t mediaID)
896 {
897 MHW_GPGPU_WALKER_PARAMS gpGpuWalkerParams;
898 MOS_ZeroMemory(&gpGpuWalkerParams, sizeof(MHW_GPGPU_WALKER_PARAMS));
899 gpGpuWalkerParams.InterfaceDescriptorOffset = mediaID;
900 gpGpuWalkerParams.GpGpuEnable = true;
901
902 threadGroupSpace->GetThreadGroupSpaceSize(gpGpuWalkerParams.ThreadWidth,
903 gpGpuWalkerParams.ThreadHeight,
904 gpGpuWalkerParams.ThreadDepth,
905 gpGpuWalkerParams.GroupWidth,
906 gpGpuWalkerParams.GroupHeight,
907 gpGpuWalkerParams.GroupDepth);
908 gpGpuWalkerParams.SLMSize = kernel->GetSLMSize();
909
910 CM_CHK_MOSSTATUS_RETURN(m_hwRender->AddGpGpuWalkerStateCmd(&m_cmdBuf, &gpGpuWalkerParams));
911 return MOS_STATUS_SUCCESS;
912 }
913
914 #if IGFX_GEN12_SUPPORTED
915 struct PACKET_SURFACE_STATE
916 {
917 SURFACE_STATE_TOKEN_COMMON token;
918 union
919 {
920 mhw_state_heap_g12_X::RENDER_SURFACE_STATE_CMD cmdSurfaceState;
921 mhw_state_heap_g12_X::MEDIA_SURFACE_STATE_CMD cmdSurfaceStateAdv;
922 };
923 };
924 #endif
925
Dump()926 void CmCommandBuffer::Dump()
927 {
928 #if MDF_COMMAND_BUFFER_DUMP
929 if (m_cmhal->dumpCommandBuffer)
930 {
931 m_cmhal->pfnDumpCommadBuffer(
932 m_cmhal,
933 &m_cmdBuf,
934 offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
935 mhw_state_heap_g12_X::RENDER_SURFACE_STATE_CMD::byteSize);
936 }
937 #endif
938 }
939