1 /*
2 * Copyright © 2021 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28
29 #include "util/vl_zscan_data.h"
30
31 void
genX(CmdBeginVideoCodingKHR)32 genX(CmdBeginVideoCodingKHR)(VkCommandBuffer commandBuffer,
33 const VkVideoBeginCodingInfoKHR *pBeginInfo)
34 {
35 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
36 ANV_FROM_HANDLE(anv_video_session, vid, pBeginInfo->videoSession);
37 ANV_FROM_HANDLE(anv_video_session_params, params, pBeginInfo->videoSessionParameters);
38
39 cmd_buffer->video.vid = vid;
40 cmd_buffer->video.params = params;
41 }
42
43 void
genX(CmdControlVideoCodingKHR)44 genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer,
45 const VkVideoCodingControlInfoKHR *pCodingControlInfo)
46 {
47 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
48
49 if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
50 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
51 flush.VideoPipelineCacheInvalidate = 1;
52 }
53 }
54 }
55
56 void
genX(CmdEndVideoCodingKHR)57 genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer,
58 const VkVideoEndCodingInfoKHR *pEndCodingInfo)
59 {
60 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
61
62 cmd_buffer->video.vid = NULL;
63 cmd_buffer->video.params = NULL;
64 }
65
66 static void
scaling_list(struct anv_cmd_buffer * cmd_buffer,const StdVideoH265ScalingLists * scaling_list)67 scaling_list(struct anv_cmd_buffer *cmd_buffer,
68 const StdVideoH265ScalingLists *scaling_list)
69 {
70 /* 4x4, 8x8, 16x16, 32x32 */
71 for (uint8_t size = 0; size < 4; size++) {
72 /* Intra, Inter */
73 for (uint8_t pred = 0; pred < 2; pred++) {
74 /* Y, Cb, Cr */
75 for (uint8_t color = 0; color < 3; color++) {
76 if (size == 3 && color > 0)
77 continue;
78
79 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
80 qm.SizeID = size;
81 qm.PredictionType = pred;
82 qm.ColorComponent = color;
83
84 qm.DCCoefficient = size > 1 ?
85 (size == 2 ? scaling_list->ScalingListDCCoef16x16[3 * pred + color] :
86 scaling_list->ScalingListDCCoef32x32[pred]) : 0;
87
88 if (size == 0) {
89 for (uint8_t i = 0; i < 4; i++)
90 for (uint8_t j = 0; j < 4; j++)
91 qm.QuantizerMatrix8x8[4 * i + j] =
92 scaling_list->ScalingList4x4[3 * pred + color][4 * i + j];
93 } else if (size == 1) {
94 for (uint8_t i = 0; i < 8; i++)
95 for (uint8_t j = 0; j < 8; j++)
96 qm.QuantizerMatrix8x8[8 * i + j] =
97 scaling_list->ScalingList8x8[3 * pred + color][8 * i + j];
98 } else if (size == 2) {
99 for (uint8_t i = 0; i < 8; i++)
100 for (uint8_t j = 0; j < 8; j++)
101 qm.QuantizerMatrix8x8[8 * i + j] =
102 scaling_list->ScalingList16x16[3 * pred + color][8 * i + j];
103 } else if (size == 3) {
104 for (uint8_t i = 0; i < 8; i++)
105 for (uint8_t j = 0; j < 8; j++)
106 qm.QuantizerMatrix8x8[8 * i + j] =
107 scaling_list->ScalingList32x32[pred][8 * i + j];
108 }
109 }
110 }
111 }
112 }
113 }
114
115 static void
anv_h265_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)116 anv_h265_decode_video(struct anv_cmd_buffer *cmd_buffer,
117 const VkVideoDecodeInfoKHR *frame_info)
118 {
119 ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
120 struct anv_video_session *vid = cmd_buffer->video.vid;
121 struct anv_video_session_params *params = cmd_buffer->video.params;
122
123 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
124 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
125
126 const StdVideoH265SequenceParameterSet *sps =
127 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
128 const StdVideoH265PictureParameterSet *pps =
129 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
130
131 struct vk_video_h265_reference ref_slots[2][8] = { 0 };
132 uint8_t dpb_idx[ANV_VIDEO_H265_MAX_NUM_REF_FRAME] = { 0,};
133 bool is_10bit = sps->bit_depth_chroma_minus8 || sps->bit_depth_luma_minus8;
134
135 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
136 flush.VideoPipelineCacheInvalidate = 1;
137 };
138
139 #if GFX_VER >= 12
140 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
141 wake.HEVCPowerWellControl = 1;
142 wake.MaskBits = 768;
143 }
144
145 anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
146 cs.PipelineInitialization = true;
147 }
148
149 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
150 mfx.MFXSyncControlFlag = 1;
151 }
152 #endif
153
154 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_MODE_SELECT), sel) {
155 sel.CodecSelect = Decode;
156 sel.CodecStandardSelect = HEVC;
157 }
158
159 #if GFX_VER >= 12
160 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
161 mfx.MFXSyncControlFlag = 1;
162 }
163 #endif
164
165 const struct anv_image_view *iv =
166 anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
167 const struct anv_image *img = iv->image;
168
169 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
170 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
171 ss.SurfaceID = HCP_CurrentDecodedPicture;
172 ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
173
174 ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
175 img->planes[0].primary_surface.isl.row_pitch_B;
176
177 #if GFX_VER >= 11
178 ss.DefaultAlphaValue = 0xffff;
179 #endif
180 }
181
182 #if GFX_VER >= 12
183 /* Seems to need to set same states to ref as decode on gen12 */
184 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
185 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
186 ss.SurfaceID = HCP_ReferencePicture;
187 ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
188
189 ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
190 img->planes[0].primary_surface.isl.row_pitch_B;
191
192 ss.DefaultAlphaValue = 0xffff;
193 }
194 #endif
195
196 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_BUF_ADDR_STATE), buf) {
197 buf.DecodedPictureAddress =
198 anv_image_address(img, &img->planes[0].primary_surface.memory_range);
199
200 buf.DecodedPictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
201 .MOCS = anv_mocs(cmd_buffer->device, buf.DecodedPictureAddress.bo, 0),
202 };
203
204 buf.DeblockingFilterLineBufferAddress = (struct anv_address) {
205 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].mem->bo,
206 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].offset
207 };
208
209 buf.DeblockingFilterLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
210 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterLineBufferAddress.bo, 0),
211 };
212
213 buf.DeblockingFilterTileLineBufferAddress = (struct anv_address) {
214 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].mem->bo,
215 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].offset
216 };
217
218 buf.DeblockingFilterTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
219 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileLineBufferAddress.bo, 0),
220 };
221
222 buf.DeblockingFilterTileColumnBufferAddress = (struct anv_address) {
223 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].mem->bo,
224 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].offset
225 };
226
227 buf.DeblockingFilterTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
228 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileColumnBufferAddress.bo, 0),
229 };
230
231 buf.MetadataLineBufferAddress = (struct anv_address) {
232 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].mem->bo,
233 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].offset
234 };
235
236 buf.MetadataLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
237 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataLineBufferAddress.bo, 0),
238 };
239
240 buf.MetadataTileLineBufferAddress = (struct anv_address) {
241 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].mem->bo,
242 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].offset
243 };
244
245 buf.MetadataTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
246 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileLineBufferAddress.bo, 0),
247 };
248
249 buf.MetadataTileColumnBufferAddress = (struct anv_address) {
250 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].mem->bo,
251 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].offset
252 };
253
254 buf.MetadataTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
255 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileColumnBufferAddress.bo, 0),
256 };
257
258 buf.SAOLineBufferAddress = (struct anv_address) {
259 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].mem->bo,
260 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].offset
261 };
262
263 buf.SAOLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
264 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOLineBufferAddress.bo, 0),
265 };
266
267 buf.SAOTileLineBufferAddress = (struct anv_address) {
268 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].mem->bo,
269 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].offset
270 };
271
272 buf.SAOTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
273 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileLineBufferAddress.bo, 0),
274 };
275
276 buf.SAOTileColumnBufferAddress = (struct anv_address) {
277 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].mem->bo,
278 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].offset
279 };
280
281 buf.SAOTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
282 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileColumnBufferAddress.bo, 0),
283 };
284
285 buf.CurrentMVTemporalBufferAddress = anv_image_address(img, &img->vid_dmv_top_surface);
286
287 buf.CurrentMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
288 .MOCS = anv_mocs(cmd_buffer->device, buf.CurrentMVTemporalBufferAddress.bo, 0),
289 };
290
291 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
292 const struct anv_image_view *ref_iv =
293 anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
294 int slot_idx = frame_info->pReferenceSlots[i].slotIndex;
295
296 assert(slot_idx < ANV_VIDEO_H265_MAX_NUM_REF_FRAME);
297 dpb_idx[slot_idx] = i;
298
299 buf.ReferencePictureAddress[i] =
300 anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range);
301 }
302
303 buf.ReferencePictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
304 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
305 };
306
307 buf.OriginalUncompressedPictureSourceMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
308 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
309 };
310
311 buf.StreamOutDataDestinationMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
312 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
313 };
314
315 buf.DecodedPictureStatusBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
316 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
317 };
318
319 buf.LCUILDBStreamOutBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
320 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
321 };
322
323 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
324 const struct anv_image_view *ref_iv =
325 anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
326
327 buf.CollocatedMVTemporalBufferAddress[i] =
328 anv_image_address(ref_iv->image, &ref_iv->image->vid_dmv_top_surface);
329 }
330
331 buf.CollocatedMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
332 .MOCS = anv_mocs(cmd_buffer->device, buf.CollocatedMVTemporalBufferAddress[0].bo, 0),
333 };
334
335 buf.VP9ProbabilityBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
336 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
337 };
338
339 buf.VP9SegmentIDBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
340 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
341 };
342
343 buf.VP9HVDLineRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
344 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
345 };
346
347 buf.VP9HVDTileRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
348 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
349 };
350 #if GFX_VER >= 11
351 buf.SAOStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
352 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
353 };
354 buf.FrameStatisticsStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
355 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
356 };
357 buf.SSESourcePixelRowStoreBufferMemoryAddressAttributesReadWrite = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
358 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
359 };
360 buf.HCPScalabilitySliceStateBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
361 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
362 };
363 buf.HCPScalabilityCABACDecodedSyntaxElementsBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
364 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
365 };
366 buf.MVUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
367 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
368 };
369 buf.IntraPredictionUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
370 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
371 };
372 buf.IntraPredictionLeftReconColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
373 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
374 };
375 #endif
376 }
377
378 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_IND_OBJ_BASE_ADDR_STATE), indirect) {
379 indirect.HCPIndirectBitstreamObjectBaseAddress =
380 anv_address_add(src_buffer->address, frame_info->srcBufferOffset & ~4095);
381
382 indirect.HCPIndirectBitstreamObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
383 .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
384 };
385
386 indirect.HCPIndirectBitstreamObjectAccessUpperBound =
387 anv_address_add(src_buffer->address, align64(frame_info->srcBufferRange, 4096));
388
389 indirect.HCPIndirectCUObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
390 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
391 };
392
393 indirect.HCPPAKBSEObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
394 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
395 };
396
397 #if GFX_VER >= 11
398 indirect.HCPVP9PAKCompressedHeaderSyntaxStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
399 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
400 };
401 indirect.HCPVP9PAKProbabilityCounterStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
402 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
403 };
404 indirect.HCPVP9PAKProbabilityDeltasStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
405 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
406 };
407 indirect.HCPVP9PAKTileRecordStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
408 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
409 };
410 indirect.HCPVP9PAKCULevelStatisticStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
411 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
412 };
413 #endif
414 }
415
416 if (sps->flags.scaling_list_enabled_flag) {
417 if (pps->flags.pps_scaling_list_data_present_flag) {
418 scaling_list(cmd_buffer, pps->pScalingLists);
419 } else if (sps->flags.sps_scaling_list_data_present_flag) {
420 scaling_list(cmd_buffer, sps->pScalingLists);
421 }
422 } else {
423 for (uint8_t size = 0; size < 4; size++) {
424 for (uint8_t pred = 0; pred < 2; pred++) {
425 for (uint8_t color = 0; color < 3; color++) {
426
427 if (size == 3 && color > 0)
428 continue;
429
430 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
431 qm.SizeID = size;
432 qm.PredictionType = pred;
433 qm.ColorComponent = color;
434 qm.DCCoefficient = (size > 1) ? 16 : 0;
435 unsigned len = (size == 0) ? 16 : 64;
436
437 for (uint8_t q = 0; q < len; q++)
438 qm.QuantizerMatrix8x8[q] = 0x10;
439 }
440 }
441 }
442 }
443 }
444
445 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIC_STATE), pic) {
446 pic.FrameWidthInMinimumCodingBlockSize =
447 sps->pic_width_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1;
448 pic.FrameHeightInMinimumCodingBlockSize =
449 sps->pic_height_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1;
450
451 pic.MinCUSize = sps->log2_min_luma_coding_block_size_minus3 & 0x3;
452 pic.LCUSize = (sps->log2_diff_max_min_luma_coding_block_size +
453 sps->log2_min_luma_coding_block_size_minus3) & 0x3;
454
455 pic.MinTUSize = sps->log2_min_luma_transform_block_size_minus2 & 0x3;
456 pic.MaxTUSize = (sps->log2_diff_max_min_luma_transform_block_size + sps->log2_min_luma_transform_block_size_minus2) & 0x3;
457 pic.MinPCMSize = sps->log2_min_pcm_luma_coding_block_size_minus3 & 0x3;
458 pic.MaxPCMSize = (sps->log2_diff_max_min_pcm_luma_coding_block_size + sps->log2_min_pcm_luma_coding_block_size_minus3) & 0x3;
459
460 #if GFX_VER >= 11
461 pic.Log2SAOOffsetScaleLuma = pps->log2_sao_offset_scale_luma;
462 pic.Log2SAOOffsetScaleChroma = pps->log2_sao_offset_scale_chroma;
463 pic.ChromaQPOffsetListLength = pps->chroma_qp_offset_list_len_minus1;
464 pic.DiffCUChromaQPOffsetDepth = pps->diff_cu_chroma_qp_offset_depth;
465 pic.ChromaQPOffsetListEnable = pps->flags.chroma_qp_offset_list_enabled_flag;
466 pic.ChromaSubsampling = sps->chroma_format_idc;
467
468 pic.HighPrecisionOffsetsEnable = sps->flags.high_precision_offsets_enabled_flag;
469 pic.Log2MaxTransformSkipSize = pps->log2_max_transform_skip_block_size_minus2 + 2;
470 pic.CrossComponentPredictionEnable = pps->flags.cross_component_prediction_enabled_flag;
471 pic.CABACBypassAlignmentEnable = sps->flags.cabac_bypass_alignment_enabled_flag;
472 pic.PersistentRiceAdaptationEnable = sps->flags.persistent_rice_adaptation_enabled_flag;
473 pic.IntraSmoothingDisable = sps->flags.intra_smoothing_disabled_flag;
474 pic.ExplicitRDPCMEnable = sps->flags.explicit_rdpcm_enabled_flag;
475 pic.ImplicitRDPCMEnable = sps->flags.implicit_rdpcm_enabled_flag;
476 pic.TransformSkipContextEnable = sps->flags.transform_skip_context_enabled_flag;
477 pic.TransformSkipRotationEnable = sps->flags.transform_skip_rotation_enabled_flag;
478 pic.SPSRangeExtensionEnable = sps->flags.sps_range_extension_flag;
479 #endif
480
481 pic.CollocatedPictureIsISlice = false;
482 pic.CurrentPictureIsISlice = false;
483 pic.SampleAdaptiveOffsetEnable = sps->flags.sample_adaptive_offset_enabled_flag;
484 pic.PCMEnable = sps->flags.pcm_enabled_flag;
485 pic.CUQPDeltaEnable = pps->flags.cu_qp_delta_enabled_flag;
486 pic.MaxDQPDepth = pps->diff_cu_qp_delta_depth;
487 pic.PCMLoopFilterDisable = sps->flags.pcm_loop_filter_disabled_flag;
488 pic.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
489 pic.Log2ParallelMergeLevel = pps->log2_parallel_merge_level_minus2;
490 pic.SignDataHiding = pps->flags.sign_data_hiding_enabled_flag;
491 pic.LoopFilterEnable = pps->flags.loop_filter_across_tiles_enabled_flag;
492 pic.EntropyCodingSyncEnable = pps->flags.entropy_coding_sync_enabled_flag;
493 pic.TilingEnable = pps->flags.tiles_enabled_flag;
494 pic.WeightedBiPredicationEnable = pps->flags.weighted_bipred_flag;
495 pic.WeightedPredicationEnable = pps->flags.weighted_pred_flag;
496 pic.FieldPic = 0;
497 pic.TopField = true;
498 pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag;
499 pic.AMPEnable = sps->flags.amp_enabled_flag;
500 pic.TransquantBypassEnable = pps->flags.transquant_bypass_enabled_flag;
501 pic.StrongIntraSmoothingEnable = sps->flags.strong_intra_smoothing_enabled_flag;
502 pic.CUPacketStructure = 0;
503
504 pic.PictureCbQPOffset = pps->pps_cb_qp_offset;
505 pic.PictureCrQPOffset = pps->pps_cr_qp_offset;
506 pic.IntraMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_intra;
507 pic.InterMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_inter;
508 pic.ChromaPCMSampleBitDepth = sps->pcm_sample_bit_depth_chroma_minus1 & 0xf;
509 pic.LumaPCMSampleBitDepth = sps->pcm_sample_bit_depth_luma_minus1 & 0xf;
510
511 pic.ChromaBitDepth = sps->bit_depth_chroma_minus8;
512 pic.LumaBitDepth = sps->bit_depth_luma_minus8;
513
514 #if GFX_VER >= 11
515 pic.CbQPOffsetList0 = pps->cb_qp_offset_list[0];
516 pic.CbQPOffsetList1 = pps->cb_qp_offset_list[1];
517 pic.CbQPOffsetList2 = pps->cb_qp_offset_list[2];
518 pic.CbQPOffsetList3 = pps->cb_qp_offset_list[3];
519 pic.CbQPOffsetList4 = pps->cb_qp_offset_list[4];
520 pic.CbQPOffsetList5 = pps->cb_qp_offset_list[5];
521
522 pic.CrQPOffsetList0 = pps->cr_qp_offset_list[0];
523 pic.CrQPOffsetList1 = pps->cr_qp_offset_list[1];
524 pic.CrQPOffsetList2 = pps->cr_qp_offset_list[2];
525 pic.CrQPOffsetList3 = pps->cr_qp_offset_list[3];
526 pic.CrQPOffsetList4 = pps->cr_qp_offset_list[4];
527 pic.CrQPOffsetList5 = pps->cr_qp_offset_list[5];
528 #endif
529 }
530
531 if (pps->flags.tiles_enabled_flag) {
532 int cum = 0;
533 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_TILE_STATE), tile) {
534 tile.NumberofTileColumns = pps->num_tile_columns_minus1;
535 tile.NumberofTileRows = pps->num_tile_rows_minus1;
536 for (unsigned i = 0; i < 5; i++) {
537 tile.ColumnPosition[i].CtbPos0i = cum;
538 if ((4 * i) == pps->num_tile_columns_minus1)
539 break;
540
541 cum += pps->column_width_minus1[4 * i] + 1;
542 tile.ColumnPosition[i].CtbPos1i = cum;
543
544 if ((4 * i + 1) == pps->num_tile_columns_minus1)
545 break;
546 cum += pps->column_width_minus1[4 * i + 1] + 1;
547 tile.ColumnPosition[i].CtbPos2i = cum;
548
549 if ((4 * i + 2) == pps->num_tile_columns_minus1)
550 break;
551 cum += pps->column_width_minus1[4 * i + 2] + 1;
552 tile.ColumnPosition[i].CtbPos3i = cum;
553
554 if ((4 * i + 3) >= MIN2(pps->num_tile_columns_minus1,
555 ARRAY_SIZE(pps->column_width_minus1)))
556 break;
557
558 cum += pps->column_width_minus1[4 * i + 3] + 1;
559 }
560
561 cum = 0;
562
563 for (unsigned i = 0; i < 5; i++) {
564 tile.Rowposition[i].CtbPos0i = cum;
565 if ((4 * i) == pps->num_tile_rows_minus1)
566 break;
567
568 cum += pps->row_height_minus1[4 * i] + 1;
569 tile.Rowposition[i].CtbPos1i = cum;
570
571 if ((4 * i + 1) == pps->num_tile_rows_minus1)
572 break;
573 cum += pps->row_height_minus1[4 * i + 1] + 1;
574 tile.Rowposition[i].CtbPos2i = cum;
575
576 if ((4 * i + 2) == pps->num_tile_rows_minus1)
577 break;
578 cum += pps->row_height_minus1[4 * i + 2] + 1;
579 tile.Rowposition[i].CtbPos3i = cum;
580
581 if ((4 * i + 3) == pps->num_tile_rows_minus1)
582 break;
583
584 cum += pps->row_height_minus1[4 * i + 3] + 1;
585 }
586
587 if (pps->num_tile_rows_minus1 == 20) {
588 tile.Rowposition[5].CtbPos0i = cum;
589 }
590 if (pps->num_tile_rows_minus1 == 20) {
591 tile.Rowposition[5].CtbPos0i = cum;
592 cum += pps->row_height_minus1[20] + 1;
593 tile.Rowposition[5].CtbPos1i = cum;
594 }
595 }
596 }
597
598 /* Slice parsing */
599 uint32_t last_slice = h265_pic_info->sliceSegmentCount - 1;
600 void *slice_map = anv_gem_mmap(cmd_buffer->device, src_buffer->address.bo,
601 src_buffer->address.offset, frame_info->srcBufferRange);
602
603 struct vk_video_h265_slice_params slice_params[h265_pic_info->sliceSegmentCount];
604
605 /* All slices should be parsed in advance to collect information necessary */
606 for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
607 uint32_t current_offset = h265_pic_info->pSliceSegmentOffsets[s];
608 void *map = slice_map + current_offset;
609 uint32_t slice_size = 0;
610
611 if (s == last_slice)
612 slice_size = frame_info->srcBufferRange - current_offset;
613 else
614 slice_size = h265_pic_info->pSliceSegmentOffsets[s + 1] - current_offset;
615
616 vk_video_parse_h265_slice_header(frame_info, h265_pic_info, sps, pps, map, slice_size, &slice_params[s]);
617 vk_fill_video_h265_reference_info(frame_info, h265_pic_info, &slice_params[s], ref_slots);
618 }
619
620 anv_gem_munmap(cmd_buffer->device, slice_map, frame_info->srcBufferRange);
621
622 for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
623 uint32_t ctb_size = 1 << (sps->log2_diff_max_min_luma_coding_block_size +
624 sps->log2_min_luma_coding_block_size_minus3 + 3);
625 uint32_t pic_width_in_min_cbs_y = sps->pic_width_in_luma_samples /
626 (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3));
627 uint32_t width_in_pix = (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) *
628 pic_width_in_min_cbs_y;
629 uint32_t ctb_w = DIV_ROUND_UP(width_in_pix, ctb_size);
630 bool is_last = (s == last_slice);
631 int slice_qp = (slice_params[s].slice_qp_delta + pps->init_qp_minus26 + 26) & 0x3f;
632
633 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SLICE_STATE), slice) {
634 slice.SliceHorizontalPosition = slice_params[s].slice_segment_address % ctb_w;
635 slice.SliceVerticalPosition = slice_params[s].slice_segment_address / ctb_w;
636
637 if (is_last) {
638 slice.NextSliceHorizontalPosition = 0;
639 slice.NextSliceVerticalPosition = 0;
640 } else {
641 slice.NextSliceHorizontalPosition = (slice_params[s + 1].slice_segment_address) % ctb_w;
642 slice.NextSliceVerticalPosition = (slice_params[s + 1].slice_segment_address) / ctb_w;
643 }
644
645 slice.SliceType = slice_params[s].slice_type;
646 slice.LastSlice = is_last;
647 slice.DependentSlice = slice_params[s].dependent_slice_segment;
648 slice.SliceTemporalMVPEnable = slice_params[s].temporal_mvp_enable;
649 slice.SliceQP = abs(slice_qp);
650 slice.SliceQPSign = slice_qp >= 0 ? 0 : 1;
651 slice.SliceCbQPOffset = slice_params[s].slice_cb_qp_offset;
652 slice.SliceCrQPOffset = slice_params[s].slice_cr_qp_offset;
653 slice.SliceHeaderDisableDeblockingFilter = pps->flags.deblocking_filter_override_enabled_flag ?
654 slice_params[s].disable_deblocking_filter_idc : pps->flags.pps_deblocking_filter_disabled_flag;
655 slice.SliceTCOffsetDiv2 = slice_params[s].tc_offset_div2;
656 slice.SliceBetaOffsetDiv2 = slice_params[s].beta_offset_div2;
657 slice.SliceLoopFilterEnable = slice_params[s].loop_filter_across_slices_enable;
658 slice.SliceSAOChroma = slice_params[s].sao_chroma_flag;
659 slice.SliceSAOLuma = slice_params[s].sao_luma_flag;
660 slice.MVDL1Zero = slice_params[s].mvd_l1_zero_flag;
661
662 uint8_t low_delay = true;
663
664 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_I) {
665 low_delay = false;
666 } else {
667 for (unsigned i = 0; i < slice_params[s].num_ref_idx_l0_active; i++) {
668 int slot_idx = ref_slots[0][i].slot_index;
669
670 if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
671 h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
672 low_delay = false;
673 break;
674 }
675 }
676
677 for (unsigned i = 0; i < slice_params[s].num_ref_idx_l1_active; i++) {
678 int slot_idx = ref_slots[1][i].slot_index;
679 if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
680 h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
681 low_delay = false;
682 break;
683 }
684 }
685 }
686
687 slice.LowDelay = low_delay;
688 slice.CollocatedFromL0 = slice_params[s].collocated_list == 0 ? true : false;
689 slice.Log2WeightDenominatorChroma = slice_params[s].luma_log2_weight_denom +
690 (slice_params[s].chroma_log2_weight_denom - slice_params[s].luma_log2_weight_denom);
691 slice.Log2WeightDenominatorLuma = slice_params[s].luma_log2_weight_denom;
692 slice.CABACInit = slice_params[s].cabac_init_idc;
693 slice.MaxMergeIndex = slice_params[s].max_num_merge_cand - 1;
694 slice.CollocatedMVTemporalBufferIndex =
695 dpb_idx[ref_slots[slice_params[s].collocated_list][slice_params[s].collocated_ref_idx].slot_index];
696 assert(slice.CollocatedMVTemporalBufferIndex < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
697
698 slice.SliceHeaderLength = slice_params[s].slice_data_bytes_offset;
699 slice.CABACZeroWordInsertionEnable = false;
700 slice.EmulationByteSliceInsertEnable = false;
701 slice.TailInsertionPresent = false;
702 slice.SliceDataInsertionPresent = false;
703 slice.HeaderInsertionPresent = false;
704
705 slice.IndirectPAKBSEDataStartOffset = 0;
706 slice.TransformSkipLambda = 0;
707 slice.TransformSkipNumberofNonZeroCoeffsFactor0 = 0;
708 slice.TransformSkipNumberofZeroCoeffsFactor0 = 0;
709 slice.TransformSkipNumberofNonZeroCoeffsFactor1 = 0;
710 slice.TransformSkipNumberofZeroCoeffsFactor1 = 0;
711
712 #if GFX_VER >= 12
713 slice.OriginalSliceStartCtbX = slice_params[s].slice_segment_address % ctb_w;
714 slice.OriginalSliceStartCtbY = slice_params[s].slice_segment_address / ctb_w;
715 #endif
716 }
717
718 if (slice_params[s].slice_type != STD_VIDEO_H265_SLICE_TYPE_I) {
719 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
720 ref.ReferencePictureListSelect = 0;
721 ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l0_active - 1;
722
723 for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
724 int slot_idx = ref_slots[0][i].slot_index;
725 unsigned poc = ref_slots[0][i].pic_order_cnt;
726 int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
727
728 assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
729
730 ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
731 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
732 ref.ReferenceListEntry[i].TopField = true;
733 }
734 }
735 }
736
737 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
738 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
739 ref.ReferencePictureListSelect = 1;
740 ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l1_active - 1;
741
742 for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
743 int slot_idx = ref_slots[1][i].slot_index;;
744 unsigned poc = ref_slots[1][i].pic_order_cnt;
745 int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
746
747 assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
748
749 ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
750 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
751 ref.ReferenceListEntry[i].TopField = true;
752 }
753 }
754 }
755
756 if ((pps->flags.weighted_pred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_P)) ||
757 (pps->flags.weighted_bipred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B))) {
758 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
759 w.ReferencePictureListSelect = 0;
760
761 for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
762 w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l0[i] & 0xff;
763 w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l0[i] & 0xff;
764 w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l0[i][0] & 0xff;
765 w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l0[i][0] & 0xff;
766 w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l0[i][1] & 0xff;
767 w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l0[i][1] & 0xff;
768 }
769 }
770
771 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
772 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
773 w.ReferencePictureListSelect = 1;
774
775 for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
776 w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l1[i] & 0xff;
777 w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l1[i] & 0xff;
778 w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l1[i][0] & 0xff;
779 w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l1[i][1] & 0xff;
780 w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l1[i][0] & 0xff;
781 w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l1[i][1] & 0xff;
782 }
783 }
784 }
785 }
786
787 uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
788
789 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_BSD_OBJECT), bsd) {
790 bsd.IndirectBSDDataLength = slice_params[s].slice_size - 3;
791 bsd.IndirectBSDDataStartAddress = buffer_offset + h265_pic_info->pSliceSegmentOffsets[s] + 3;
792 }
793 }
794
795 #if GFX_VER >= 12
796 anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
797 cs.MemoryImplicitFlush = true;
798 }
799 #endif
800
801 anv_batch_emit(&cmd_buffer->batch, GENX(VD_PIPELINE_FLUSH), flush) {
802 flush.HEVCPipelineDone = true;
803 flush.HEVCPipelineCommandFlush = true;
804 flush.VDCommandMessageParserDone = true;
805 }
806 }
807
808 static void
anv_h264_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)809 anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer,
810 const VkVideoDecodeInfoKHR *frame_info)
811 {
812 ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
813 struct anv_video_session *vid = cmd_buffer->video.vid;
814 struct anv_video_session_params *params = cmd_buffer->video.params;
815 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
816 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
817 const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
818 const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
819
820 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
821 flush.DWordLength = 2;
822 flush.VideoPipelineCacheInvalidate = 1;
823 };
824
825 #if GFX_VER >= 12
826 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
827 wake.MFXPowerWellControl = 1;
828 wake.MaskBits = 768;
829 }
830
831 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
832 mfx.MFXSyncControlFlag = 1;
833 }
834 #endif
835
836 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_MODE_SELECT), sel) {
837 sel.StandardSelect = SS_AVC;
838 sel.CodecSelect = Decode;
839 sel.DecoderShortFormatMode = ShortFormatDriverInterface;
840 sel.DecoderModeSelect = VLDMode; // Hardcoded
841
842 sel.PreDeblockingOutputEnable = 0;
843 sel.PostDeblockingOutputEnable = 1;
844 }
845
846 #if GFX_VER >= 12
847 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
848 mfx.MFXSyncControlFlag = 1;
849 }
850 #endif
851
852 const struct anv_image_view *iv = anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
853 const struct anv_image *img = iv->image;
854 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_SURFACE_STATE), ss) {
855 ss.Width = img->vk.extent.width - 1;
856 ss.Height = img->vk.extent.height - 1;
857 ss.SurfaceFormat = PLANAR_420_8; // assert on this?
858 ss.InterleaveChroma = 1;
859 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
860 ss.TiledSurface = img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
861 ss.TileWalk = TW_YMAJOR;
862
863 ss.YOffsetforUCb = ss.YOffsetforVCr =
864 img->planes[1].primary_surface.memory_range.offset / img->planes[0].primary_surface.isl.row_pitch_B;
865 }
866
867 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) {
868 bool use_pre_deblock = false;
869 if (use_pre_deblock) {
870 buf.PreDeblockingDestinationAddress = anv_image_address(img,
871 &img->planes[0].primary_surface.memory_range);
872 } else {
873 buf.PostDeblockingDestinationAddress = anv_image_address(img,
874 &img->planes[0].primary_surface.memory_range);
875 }
876 buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
877 .MOCS = anv_mocs(cmd_buffer->device, buf.PreDeblockingDestinationAddress.bo, 0),
878 };
879 buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
880 .MOCS = anv_mocs(cmd_buffer->device, buf.PostDeblockingDestinationAddress.bo, 0),
881 };
882
883 buf.IntraRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset };
884 buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
885 .MOCS = anv_mocs(cmd_buffer->device, buf.IntraRowStoreScratchBufferAddress.bo, 0),
886 };
887 buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset };
888 buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
889 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0),
890 };
891 buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
892 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
893 };
894 buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
895 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
896 };
897 buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
898 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
899 };
900 buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
901 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
902 };
903 buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
904 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
905 };
906 buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
907 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
908 };
909
910 struct anv_bo *ref_bo = NULL;
911 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
912 const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
913 int idx = frame_info->pReferenceSlots[i].slotIndex;
914 buf.ReferencePictureAddress[idx] = anv_image_address(ref_iv->image,
915 &ref_iv->image->planes[0].primary_surface.memory_range);
916
917 if (i == 0) {
918 ref_bo = ref_iv->image->bindings[0].address.bo;
919 }
920 }
921 buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
922 .MOCS = anv_mocs(cmd_buffer->device, ref_bo, 0),
923 };
924 }
925
926 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) {
927 index_obj.MFXIndirectBitstreamObjectAddress = anv_address_add(src_buffer->address,
928 frame_info->srcBufferOffset & ~4095);
929 index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
930 .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
931 };
932 index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
933 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
934 };
935 index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
936 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
937 };
938 index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
939 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
940 };
941 index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
942 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
943 };
944 }
945
946 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) {
947 bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo,
948 vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
949
950 bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
951 .MOCS = anv_mocs(cmd_buffer->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0),
952 };
953 bsp.MPRRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo,
954 vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
955
956 bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
957 .MOCS = anv_mocs(cmd_buffer->device, bsp.MPRRowStoreScratchBufferAddress.bo, 0),
958 };
959 bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
960 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
961 };
962 }
963
964 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_DPB_STATE), avc_dpb) {
965 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
966 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
967 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
968 const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
969 int idx = frame_info->pReferenceSlots[i].slotIndex;
970 avc_dpb.NonExistingFrame[idx] = ref_info->flags.is_non_existing;
971 avc_dpb.LongTermFrame[idx] = ref_info->flags.used_for_long_term_reference;
972 if (!ref_info->flags.top_field_flag && !ref_info->flags.bottom_field_flag)
973 avc_dpb.UsedforReference[idx] = 3;
974 else
975 avc_dpb.UsedforReference[idx] = ref_info->flags.top_field_flag | (ref_info->flags.bottom_field_flag << 1);
976 avc_dpb.LTSTFrameNumberList[idx] = ref_info->FrameNum;
977 }
978 }
979
980 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_PICID_STATE), picid) {
981 picid.PictureIDRemappingDisable = true;
982 }
983
984 uint32_t pic_height = sps->pic_height_in_map_units_minus1 + 1;
985 if (!sps->flags.frame_mbs_only_flag)
986 pic_height *= 2;
987 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_IMG_STATE), avc_img) {
988 avc_img.FrameWidth = sps->pic_width_in_mbs_minus1;
989 avc_img.FrameHeight = pic_height - 1;
990 avc_img.FrameSize = (sps->pic_width_in_mbs_minus1 + 1) * pic_height;
991
992 if (!h264_pic_info->pStdPictureInfo->flags.field_pic_flag)
993 avc_img.ImageStructure = FramePicture;
994 else if (h264_pic_info->pStdPictureInfo->flags.bottom_field_flag)
995 avc_img.ImageStructure = BottomFieldPicture;
996 else
997 avc_img.ImageStructure = TopFieldPicture;
998
999 avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc;
1000 avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag;
1001 avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset;
1002 avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset;
1003 avc_img.FieldPicture = h264_pic_info->pStdPictureInfo->flags.field_pic_flag;
1004 avc_img.MBAFFMode = (sps->flags.mb_adaptive_frame_field_flag &&
1005 !h264_pic_info->pStdPictureInfo->flags.field_pic_flag);
1006 avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag;
1007 avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag;
1008 avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag;
1009 avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
1010 avc_img.NonReferencePicture = !h264_pic_info->pStdPictureInfo->flags.is_reference;
1011 avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag;
1012 avc_img.ChromaFormatIDC = sps->chroma_format_idc;
1013 avc_img.TrellisQuantizationChromaDisable = true;
1014 avc_img.NumberofReferenceFrames = frame_info->referenceSlotCount;
1015 avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1;
1016 avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1;
1017 avc_img.InitialQPValue = pps->pic_init_qp_minus26;
1018 avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag;
1019 avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag;
1020 avc_img.PicOrderCountType = sps->pic_order_cnt_type;
1021 avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag;
1022 avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag;
1023 avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4;
1024 avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4;
1025 avc_img.CurrentPictureFrameNumber = h264_pic_info->pStdPictureInfo->frame_num;
1026 }
1027
1028 StdVideoH264ScalingLists scaling_lists;
1029 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1030 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1031 qm.DWordLength = 16;
1032 qm.AVC = AVC_4x4_Intra_MATRIX;
1033 for (unsigned m = 0; m < 3; m++)
1034 for (unsigned q = 0; q < 16; q++)
1035 qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m][q];
1036 }
1037 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1038 qm.DWordLength = 16;
1039 qm.AVC = AVC_4x4_Inter_MATRIX;
1040 for (unsigned m = 0; m < 3; m++)
1041 for (unsigned q = 0; q < 16; q++)
1042 qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m + 3][q];
1043 }
1044 if (pps->flags.transform_8x8_mode_flag) {
1045 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1046 qm.DWordLength = 16;
1047 qm.AVC = AVC_8x8_Intra_MATRIX;
1048 for (unsigned q = 0; q < 64; q++)
1049 qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[0][q];
1050 }
1051 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1052 qm.DWordLength = 16;
1053 qm.AVC = AVC_8x8_Inter_MATRIX;
1054 for (unsigned q = 0; q < 64; q++)
1055 qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[1][q];
1056 }
1057 }
1058
1059 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_DIRECTMODE_STATE), avc_directmode) {
1060 /* bind reference frame DMV */
1061 struct anv_bo *dmv_bo = NULL;
1062 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1063 int idx = frame_info->pReferenceSlots[i].slotIndex;
1064 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1065 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1066 const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1067 const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
1068 avc_directmode.DirectMVBufferAddress[idx] = anv_image_address(ref_iv->image,
1069 &ref_iv->image->vid_dmv_top_surface);
1070 if (i == 0) {
1071 dmv_bo = ref_iv->image->bindings[0].address.bo;
1072 }
1073 avc_directmode.POCList[2 * idx] = ref_info->PicOrderCnt[0];
1074 avc_directmode.POCList[2 * idx + 1] = ref_info->PicOrderCnt[1];
1075 }
1076 avc_directmode.DirectMVBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1077 .MOCS = anv_mocs(cmd_buffer->device, dmv_bo, 0),
1078 };
1079
1080 avc_directmode.DirectMVBufferWriteAddress = anv_image_address(img,
1081 &img->vid_dmv_top_surface);
1082 avc_directmode.DirectMVBufferWriteAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1083 .MOCS = anv_mocs(cmd_buffer->device, img->bindings[0].address.bo, 0),
1084 };
1085 avc_directmode.POCList[32] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1086 avc_directmode.POCList[33] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1087 }
1088
1089 uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
1090 #define HEADER_OFFSET 3
1091 for (unsigned s = 0; s < h264_pic_info->sliceCount; s++) {
1092 bool last_slice = s == (h264_pic_info->sliceCount - 1);
1093 uint32_t current_offset = h264_pic_info->pSliceOffsets[s];
1094 uint32_t this_end;
1095 if (!last_slice) {
1096 uint32_t next_offset = h264_pic_info->pSliceOffsets[s + 1];
1097 uint32_t next_end = h264_pic_info->pSliceOffsets[s + 2];
1098 if (s == h264_pic_info->sliceCount - 2)
1099 next_end = frame_info->srcBufferRange;
1100 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_SLICEADDR), sliceaddr) {
1101 sliceaddr.IndirectBSDDataLength = next_end - next_offset - HEADER_OFFSET;
1102 /* start decoding after the 3-byte header. */
1103 sliceaddr.IndirectBSDDataStartAddress = buffer_offset + next_offset + HEADER_OFFSET;
1104 };
1105 this_end = next_offset;
1106 } else
1107 this_end = frame_info->srcBufferRange;
1108 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_BSD_OBJECT), avc_bsd) {
1109 avc_bsd.IndirectBSDDataLength = this_end - current_offset - HEADER_OFFSET;
1110 /* start decoding after the 3-byte header. */
1111 avc_bsd.IndirectBSDDataStartAddress = buffer_offset + current_offset + HEADER_OFFSET;
1112 avc_bsd.InlineData.LastSlice = last_slice;
1113 avc_bsd.InlineData.FixPrevMBSkipped = 1;
1114 avc_bsd.InlineData.IntraPredictionErrorControl = 1;
1115 avc_bsd.InlineData.Intra8x84x4PredictionErrorConcealmentControl = 1;
1116 avc_bsd.InlineData.ISliceConcealmentMode = 1;
1117 };
1118 }
1119 }
1120
1121 void
genX(CmdDecodeVideoKHR)1122 genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer,
1123 const VkVideoDecodeInfoKHR *frame_info)
1124 {
1125 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1126
1127 switch (cmd_buffer->video.vid->vk.op) {
1128 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
1129 anv_h264_decode_video(cmd_buffer, frame_info);
1130 break;
1131 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
1132 anv_h265_decode_video(cmd_buffer, frame_info);
1133 break;
1134 default:
1135 assert(0);
1136 }
1137 }
1138
1139 #ifdef VK_ENABLE_BETA_EXTENSIONS
1140 void
genX(CmdEncodeVideoKHR)1141 genX(CmdEncodeVideoKHR)(VkCommandBuffer commandBuffer,
1142 const VkVideoEncodeInfoKHR *pEncodeInfo)
1143 {
1144 }
1145 #endif
1146