• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28 
29 #include "util/vl_zscan_data.h"
30 
31 void
genX(CmdBeginVideoCodingKHR)32 genX(CmdBeginVideoCodingKHR)(VkCommandBuffer commandBuffer,
33                              const VkVideoBeginCodingInfoKHR *pBeginInfo)
34 {
35    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
36    ANV_FROM_HANDLE(anv_video_session, vid, pBeginInfo->videoSession);
37    ANV_FROM_HANDLE(anv_video_session_params, params, pBeginInfo->videoSessionParameters);
38 
39    cmd_buffer->video.vid = vid;
40    cmd_buffer->video.params = params;
41 }
42 
43 void
genX(CmdControlVideoCodingKHR)44 genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer,
45                                const VkVideoCodingControlInfoKHR *pCodingControlInfo)
46 {
47    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
48 
49    if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
50       anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
51          flush.VideoPipelineCacheInvalidate = 1;
52       }
53    }
54 }
55 
56 void
genX(CmdEndVideoCodingKHR)57 genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer,
58                            const VkVideoEndCodingInfoKHR *pEndCodingInfo)
59 {
60    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
61 
62    cmd_buffer->video.vid = NULL;
63    cmd_buffer->video.params = NULL;
64 }
65 
66 static void
scaling_list(struct anv_cmd_buffer * cmd_buffer,const StdVideoH265ScalingLists * scaling_list)67 scaling_list(struct anv_cmd_buffer *cmd_buffer,
68              const StdVideoH265ScalingLists *scaling_list)
69 {
70    /* 4x4, 8x8, 16x16, 32x32 */
71    for (uint8_t size = 0; size < 4; size++) {
72       /* Intra, Inter */
73       for (uint8_t pred = 0; pred < 2; pred++) {
74          /* Y, Cb, Cr */
75          for (uint8_t color = 0; color < 3; color++) {
76             if (size == 3 && color > 0)
77                continue;
78 
79             anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
80                qm.SizeID = size;
81                qm.PredictionType = pred;
82                qm.ColorComponent = color;
83 
84                qm.DCCoefficient = size > 1 ?
85                   (size == 2 ? scaling_list->ScalingListDCCoef16x16[3 * pred + color] :
86                                scaling_list->ScalingListDCCoef32x32[pred]) : 0;
87 
88                if (size == 0) {
89                   for (uint8_t i = 0; i < 4; i++)
90                      for (uint8_t j = 0; j < 4; j++)
91                         qm.QuantizerMatrix8x8[4 * i + j] =
92                            scaling_list->ScalingList4x4[3 * pred + color][4 * i + j];
93                } else if (size == 1) {
94                   for (uint8_t i = 0; i < 8; i++)
95                      for (uint8_t j = 0; j < 8; j++)
96                         qm.QuantizerMatrix8x8[8 * i + j] =
97                            scaling_list->ScalingList8x8[3 * pred + color][8 * i + j];
98                } else if (size == 2) {
99                   for (uint8_t i = 0; i < 8; i++)
100                      for (uint8_t j = 0; j < 8; j++)
101                         qm.QuantizerMatrix8x8[8 * i + j] =
102                            scaling_list->ScalingList16x16[3 * pred + color][8 * i + j];
103                } else if (size == 3) {
104                   for (uint8_t i = 0; i < 8; i++)
105                      for (uint8_t j = 0; j < 8; j++)
106                         qm.QuantizerMatrix8x8[8 * i + j] =
107                            scaling_list->ScalingList32x32[pred][8 * i + j];
108                }
109             }
110          }
111       }
112    }
113 }
114 
115 static void
anv_h265_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)116 anv_h265_decode_video(struct anv_cmd_buffer *cmd_buffer,
117                       const VkVideoDecodeInfoKHR *frame_info)
118 {
119    ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
120    struct anv_video_session *vid = cmd_buffer->video.vid;
121    struct anv_video_session_params *params = cmd_buffer->video.params;
122 
123    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
124       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
125 
126    const StdVideoH265SequenceParameterSet *sps =
127       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
128    const StdVideoH265PictureParameterSet *pps =
129       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
130 
131    struct vk_video_h265_reference ref_slots[2][8] = { 0 };
132    uint8_t dpb_idx[ANV_VIDEO_H265_MAX_NUM_REF_FRAME] = { 0,};
133    bool is_10bit = sps->bit_depth_chroma_minus8 || sps->bit_depth_luma_minus8;
134 
135    anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
136       flush.VideoPipelineCacheInvalidate = 1;
137    };
138 
139 #if GFX_VER >= 12
140    anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
141       wake.HEVCPowerWellControl = 1;
142       wake.MaskBits = 768;
143    }
144 
145    anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
146       cs.PipelineInitialization = true;
147    }
148 
149    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
150       mfx.MFXSyncControlFlag = 1;
151    }
152 #endif
153 
154    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_MODE_SELECT), sel) {
155       sel.CodecSelect = Decode;
156       sel.CodecStandardSelect = HEVC;
157    }
158 
159 #if GFX_VER >= 12
160    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
161       mfx.MFXSyncControlFlag = 1;
162    }
163 #endif
164 
165    const struct anv_image_view *iv =
166       anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
167    const struct anv_image *img = iv->image;
168 
169    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
170       ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
171       ss.SurfaceID = HCP_CurrentDecodedPicture;
172       ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
173 
174       ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
175          img->planes[0].primary_surface.isl.row_pitch_B;
176 
177 #if GFX_VER >= 11
178       ss.DefaultAlphaValue = 0xffff;
179 #endif
180    }
181 
182 #if GFX_VER >= 12
183    /* Seems to need to set same states to ref as decode on gen12 */
184    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
185       ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
186       ss.SurfaceID = HCP_ReferencePicture;
187       ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
188 
189       ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
190          img->planes[0].primary_surface.isl.row_pitch_B;
191 
192       ss.DefaultAlphaValue = 0xffff;
193    }
194 #endif
195 
196    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_BUF_ADDR_STATE), buf) {
197       buf.DecodedPictureAddress =
198          anv_image_address(img, &img->planes[0].primary_surface.memory_range);
199 
200       buf.DecodedPictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
201          .MOCS = anv_mocs(cmd_buffer->device, buf.DecodedPictureAddress.bo, 0),
202       };
203 
204       buf.DeblockingFilterLineBufferAddress = (struct anv_address) {
205          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].mem->bo,
206          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].offset
207       };
208 
209       buf.DeblockingFilterLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
210          .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterLineBufferAddress.bo, 0),
211       };
212 
213       buf.DeblockingFilterTileLineBufferAddress = (struct anv_address) {
214          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].mem->bo,
215          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].offset
216       };
217 
218       buf.DeblockingFilterTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
219          .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileLineBufferAddress.bo, 0),
220       };
221 
222       buf.DeblockingFilterTileColumnBufferAddress = (struct anv_address) {
223          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].mem->bo,
224          vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].offset
225       };
226 
227       buf.DeblockingFilterTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
228          .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileColumnBufferAddress.bo, 0),
229       };
230 
231       buf.MetadataLineBufferAddress = (struct anv_address) {
232          vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].mem->bo,
233          vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].offset
234       };
235 
236       buf.MetadataLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
237          .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataLineBufferAddress.bo, 0),
238       };
239 
240       buf.MetadataTileLineBufferAddress = (struct anv_address) {
241          vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].mem->bo,
242          vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].offset
243       };
244 
245       buf.MetadataTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
246          .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileLineBufferAddress.bo, 0),
247       };
248 
249       buf.MetadataTileColumnBufferAddress = (struct anv_address) {
250          vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].mem->bo,
251          vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].offset
252       };
253 
254       buf.MetadataTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
255          .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileColumnBufferAddress.bo, 0),
256       };
257 
258       buf.SAOLineBufferAddress = (struct anv_address) {
259          vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].mem->bo,
260          vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].offset
261       };
262 
263       buf.SAOLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
264          .MOCS = anv_mocs(cmd_buffer->device, buf.SAOLineBufferAddress.bo, 0),
265       };
266 
267       buf.SAOTileLineBufferAddress = (struct anv_address) {
268          vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].mem->bo,
269          vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].offset
270       };
271 
272       buf.SAOTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
273          .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileLineBufferAddress.bo, 0),
274       };
275 
276       buf.SAOTileColumnBufferAddress = (struct anv_address) {
277          vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].mem->bo,
278          vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].offset
279       };
280 
281       buf.SAOTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
282          .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileColumnBufferAddress.bo, 0),
283       };
284 
285       buf.CurrentMVTemporalBufferAddress = anv_image_address(img, &img->vid_dmv_top_surface);
286 
287       buf.CurrentMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
288          .MOCS = anv_mocs(cmd_buffer->device, buf.CurrentMVTemporalBufferAddress.bo, 0),
289       };
290 
291       for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
292          const struct anv_image_view *ref_iv =
293             anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
294          int slot_idx = frame_info->pReferenceSlots[i].slotIndex;
295 
296          assert(slot_idx < ANV_VIDEO_H265_MAX_NUM_REF_FRAME);
297          dpb_idx[slot_idx] = i;
298 
299          buf.ReferencePictureAddress[i] =
300             anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range);
301       }
302 
303       buf.ReferencePictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
304          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
305       };
306 
307       buf.OriginalUncompressedPictureSourceMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
308          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
309       };
310 
311       buf.StreamOutDataDestinationMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
312          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
313       };
314 
315       buf.DecodedPictureStatusBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
316          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
317       };
318 
319       buf.LCUILDBStreamOutBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
320          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
321       };
322 
323       for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
324          const struct anv_image_view *ref_iv =
325             anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
326 
327          buf.CollocatedMVTemporalBufferAddress[i] =
328             anv_image_address(ref_iv->image, &ref_iv->image->vid_dmv_top_surface);
329       }
330 
331       buf.CollocatedMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
332          .MOCS = anv_mocs(cmd_buffer->device, buf.CollocatedMVTemporalBufferAddress[0].bo, 0),
333       };
334 
335       buf.VP9ProbabilityBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
336          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
337       };
338 
339       buf.VP9SegmentIDBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
340          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
341       };
342 
343       buf.VP9HVDLineRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
344          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
345       };
346 
347       buf.VP9HVDTileRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
348          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
349       };
350 #if GFX_VER >= 11
351       buf.SAOStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
352          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
353       };
354       buf.FrameStatisticsStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
355          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
356       };
357       buf.SSESourcePixelRowStoreBufferMemoryAddressAttributesReadWrite = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
358          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
359       };
360       buf.HCPScalabilitySliceStateBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
361          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
362       };
363       buf.HCPScalabilityCABACDecodedSyntaxElementsBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
364          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
365       };
366       buf.MVUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
367          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
368       };
369       buf.IntraPredictionUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
370          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
371       };
372       buf.IntraPredictionLeftReconColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
373          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
374       };
375 #endif
376    }
377 
378    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_IND_OBJ_BASE_ADDR_STATE), indirect) {
379       indirect.HCPIndirectBitstreamObjectBaseAddress =
380          anv_address_add(src_buffer->address, frame_info->srcBufferOffset & ~4095);
381 
382       indirect.HCPIndirectBitstreamObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
383          .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
384       };
385 
386       indirect.HCPIndirectBitstreamObjectAccessUpperBound =
387          anv_address_add(src_buffer->address, align64(frame_info->srcBufferRange, 4096));
388 
389       indirect.HCPIndirectCUObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
390          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
391       };
392 
393       indirect.HCPPAKBSEObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
394          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
395       };
396 
397 #if GFX_VER >= 11
398       indirect.HCPVP9PAKCompressedHeaderSyntaxStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
399          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
400       };
401       indirect.HCPVP9PAKProbabilityCounterStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
402          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
403       };
404       indirect.HCPVP9PAKProbabilityDeltasStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
405          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
406       };
407       indirect.HCPVP9PAKTileRecordStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
408          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
409       };
410       indirect.HCPVP9PAKCULevelStatisticStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
411          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
412       };
413 #endif
414    }
415 
416    if (sps->flags.scaling_list_enabled_flag) {
417       if (pps->flags.pps_scaling_list_data_present_flag) {
418          scaling_list(cmd_buffer, pps->pScalingLists);
419       } else if (sps->flags.sps_scaling_list_data_present_flag) {
420          scaling_list(cmd_buffer, sps->pScalingLists);
421       }
422    } else {
423       for (uint8_t size = 0; size < 4; size++) {
424          for (uint8_t pred = 0; pred < 2; pred++) {
425             for (uint8_t color = 0; color < 3; color++) {
426 
427                if (size == 3 && color > 0)
428                   continue;
429 
430                anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
431                   qm.SizeID = size;
432                   qm.PredictionType = pred;
433                   qm.ColorComponent = color;
434                   qm.DCCoefficient = (size > 1) ? 16 : 0;
435                   unsigned len = (size == 0) ? 16 : 64;
436 
437                   for (uint8_t q = 0; q < len; q++)
438                      qm.QuantizerMatrix8x8[q] = 0x10;
439                }
440             }
441          }
442       }
443    }
444 
445    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIC_STATE), pic) {
446       pic.FrameWidthInMinimumCodingBlockSize =
447          sps->pic_width_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1;
448       pic.FrameHeightInMinimumCodingBlockSize =
449          sps->pic_height_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3))  - 1;
450 
451       pic.MinCUSize = sps->log2_min_luma_coding_block_size_minus3 & 0x3;
452       pic.LCUSize = (sps->log2_diff_max_min_luma_coding_block_size +
453                      sps->log2_min_luma_coding_block_size_minus3) & 0x3;
454 
455       pic.MinTUSize = sps->log2_min_luma_transform_block_size_minus2 & 0x3;
456       pic.MaxTUSize = (sps->log2_diff_max_min_luma_transform_block_size + sps->log2_min_luma_transform_block_size_minus2) & 0x3;
457       pic.MinPCMSize = sps->log2_min_pcm_luma_coding_block_size_minus3 & 0x3;
458       pic.MaxPCMSize = (sps->log2_diff_max_min_pcm_luma_coding_block_size + sps->log2_min_pcm_luma_coding_block_size_minus3) & 0x3;
459 
460 #if GFX_VER >= 11
461       pic.Log2SAOOffsetScaleLuma = pps->log2_sao_offset_scale_luma;
462       pic.Log2SAOOffsetScaleChroma = pps->log2_sao_offset_scale_chroma;
463       pic.ChromaQPOffsetListLength = pps->chroma_qp_offset_list_len_minus1;
464       pic.DiffCUChromaQPOffsetDepth = pps->diff_cu_chroma_qp_offset_depth;
465       pic.ChromaQPOffsetListEnable = pps->flags.chroma_qp_offset_list_enabled_flag;
466       pic.ChromaSubsampling = sps->chroma_format_idc;
467 
468       pic.HighPrecisionOffsetsEnable = sps->flags.high_precision_offsets_enabled_flag;
469       pic.Log2MaxTransformSkipSize = pps->log2_max_transform_skip_block_size_minus2 + 2;
470       pic.CrossComponentPredictionEnable = pps->flags.cross_component_prediction_enabled_flag;
471       pic.CABACBypassAlignmentEnable = sps->flags.cabac_bypass_alignment_enabled_flag;
472       pic.PersistentRiceAdaptationEnable = sps->flags.persistent_rice_adaptation_enabled_flag;
473       pic.IntraSmoothingDisable = sps->flags.intra_smoothing_disabled_flag;
474       pic.ExplicitRDPCMEnable = sps->flags.explicit_rdpcm_enabled_flag;
475       pic.ImplicitRDPCMEnable = sps->flags.implicit_rdpcm_enabled_flag;
476       pic.TransformSkipContextEnable = sps->flags.transform_skip_context_enabled_flag;
477       pic.TransformSkipRotationEnable = sps->flags.transform_skip_rotation_enabled_flag;
478       pic.SPSRangeExtensionEnable = sps->flags.sps_range_extension_flag;
479 #endif
480 
481       pic.CollocatedPictureIsISlice = false;
482       pic.CurrentPictureIsISlice = false;
483       pic.SampleAdaptiveOffsetEnable = sps->flags.sample_adaptive_offset_enabled_flag;
484       pic.PCMEnable = sps->flags.pcm_enabled_flag;
485       pic.CUQPDeltaEnable = pps->flags.cu_qp_delta_enabled_flag;
486       pic.MaxDQPDepth = pps->diff_cu_qp_delta_depth;
487       pic.PCMLoopFilterDisable = sps->flags.pcm_loop_filter_disabled_flag;
488       pic.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
489       pic.Log2ParallelMergeLevel = pps->log2_parallel_merge_level_minus2;
490       pic.SignDataHiding = pps->flags.sign_data_hiding_enabled_flag;
491       pic.LoopFilterEnable = pps->flags.loop_filter_across_tiles_enabled_flag;
492       pic.EntropyCodingSyncEnable = pps->flags.entropy_coding_sync_enabled_flag;
493       pic.TilingEnable = pps->flags.tiles_enabled_flag;
494       pic.WeightedBiPredicationEnable = pps->flags.weighted_bipred_flag;
495       pic.WeightedPredicationEnable = pps->flags.weighted_pred_flag;
496       pic.FieldPic = 0;
497       pic.TopField = true;
498       pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag;
499       pic.AMPEnable = sps->flags.amp_enabled_flag;
500       pic.TransquantBypassEnable = pps->flags.transquant_bypass_enabled_flag;
501       pic.StrongIntraSmoothingEnable = sps->flags.strong_intra_smoothing_enabled_flag;
502       pic.CUPacketStructure = 0;
503 
504       pic.PictureCbQPOffset = pps->pps_cb_qp_offset;
505       pic.PictureCrQPOffset = pps->pps_cr_qp_offset;
506       pic.IntraMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_intra;
507       pic.InterMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_inter;
508       pic.ChromaPCMSampleBitDepth = sps->pcm_sample_bit_depth_chroma_minus1 & 0xf;
509       pic.LumaPCMSampleBitDepth = sps->pcm_sample_bit_depth_luma_minus1 & 0xf;
510 
511       pic.ChromaBitDepth = sps->bit_depth_chroma_minus8;
512       pic.LumaBitDepth = sps->bit_depth_luma_minus8;
513 
514 #if GFX_VER >= 11
515       pic.CbQPOffsetList0 = pps->cb_qp_offset_list[0];
516       pic.CbQPOffsetList1 = pps->cb_qp_offset_list[1];
517       pic.CbQPOffsetList2 = pps->cb_qp_offset_list[2];
518       pic.CbQPOffsetList3 = pps->cb_qp_offset_list[3];
519       pic.CbQPOffsetList4 = pps->cb_qp_offset_list[4];
520       pic.CbQPOffsetList5 = pps->cb_qp_offset_list[5];
521 
522       pic.CrQPOffsetList0 = pps->cr_qp_offset_list[0];
523       pic.CrQPOffsetList1 = pps->cr_qp_offset_list[1];
524       pic.CrQPOffsetList2 = pps->cr_qp_offset_list[2];
525       pic.CrQPOffsetList3 = pps->cr_qp_offset_list[3];
526       pic.CrQPOffsetList4 = pps->cr_qp_offset_list[4];
527       pic.CrQPOffsetList5 = pps->cr_qp_offset_list[5];
528 #endif
529    }
530 
531    if (pps->flags.tiles_enabled_flag) {
532       int cum = 0;
533       anv_batch_emit(&cmd_buffer->batch, GENX(HCP_TILE_STATE), tile) {
534          tile.NumberofTileColumns = pps->num_tile_columns_minus1;
535          tile.NumberofTileRows = pps->num_tile_rows_minus1;
536          for (unsigned i = 0; i < 5; i++) {
537             tile.ColumnPosition[i].CtbPos0i = cum;
538             if ((4 * i) == pps->num_tile_columns_minus1)
539                break;
540 
541             cum += pps->column_width_minus1[4 * i] + 1;
542             tile.ColumnPosition[i].CtbPos1i = cum;
543 
544             if ((4 * i + 1) == pps->num_tile_columns_minus1)
545                break;
546             cum += pps->column_width_minus1[4 * i + 1] + 1;
547             tile.ColumnPosition[i].CtbPos2i = cum;
548 
549             if ((4 * i + 2) == pps->num_tile_columns_minus1)
550                break;
551             cum += pps->column_width_minus1[4 * i + 2] + 1;
552             tile.ColumnPosition[i].CtbPos3i = cum;
553 
554             if ((4 * i + 3) >= MIN2(pps->num_tile_columns_minus1,
555                                     ARRAY_SIZE(pps->column_width_minus1)))
556                break;
557 
558             cum += pps->column_width_minus1[4 * i + 3] + 1;
559          }
560 
561          cum = 0;
562 
563          for (unsigned i = 0; i < 5; i++) {
564             tile.Rowposition[i].CtbPos0i = cum;
565             if ((4 * i) == pps->num_tile_rows_minus1)
566                break;
567 
568             cum += pps->row_height_minus1[4 * i] + 1;
569             tile.Rowposition[i].CtbPos1i = cum;
570 
571             if ((4 * i + 1) == pps->num_tile_rows_minus1)
572                break;
573             cum += pps->row_height_minus1[4 * i + 1] + 1;
574             tile.Rowposition[i].CtbPos2i = cum;
575 
576             if ((4 * i + 2) == pps->num_tile_rows_minus1)
577                break;
578             cum += pps->row_height_minus1[4 * i + 2] + 1;
579             tile.Rowposition[i].CtbPos3i = cum;
580 
581             if ((4 * i + 3) == pps->num_tile_rows_minus1)
582                break;
583 
584             cum += pps->row_height_minus1[4 * i + 3] + 1;
585          }
586 
587          if (pps->num_tile_rows_minus1 == 20) {
588             tile.Rowposition[5].CtbPos0i = cum;
589          }
590          if (pps->num_tile_rows_minus1 == 20) {
591             tile.Rowposition[5].CtbPos0i = cum;
592             cum += pps->row_height_minus1[20] + 1;
593             tile.Rowposition[5].CtbPos1i = cum;
594          }
595       }
596    }
597 
598    /* Slice parsing */
599    uint32_t last_slice = h265_pic_info->sliceSegmentCount - 1;
600    void *slice_map = anv_gem_mmap(cmd_buffer->device, src_buffer->address.bo,
601                                   src_buffer->address.offset, frame_info->srcBufferRange);
602 
603    struct vk_video_h265_slice_params slice_params[h265_pic_info->sliceSegmentCount];
604 
605    /* All slices should be parsed in advance to collect information necessary */
606    for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
607       uint32_t current_offset = h265_pic_info->pSliceSegmentOffsets[s];
608       void *map = slice_map + current_offset;
609       uint32_t slice_size = 0;
610 
611       if (s == last_slice)
612          slice_size = frame_info->srcBufferRange - current_offset;
613       else
614          slice_size = h265_pic_info->pSliceSegmentOffsets[s + 1] - current_offset;
615 
616       vk_video_parse_h265_slice_header(frame_info, h265_pic_info, sps, pps, map, slice_size, &slice_params[s]);
617       vk_fill_video_h265_reference_info(frame_info, h265_pic_info, &slice_params[s], ref_slots);
618    }
619 
620    anv_gem_munmap(cmd_buffer->device, slice_map, frame_info->srcBufferRange);
621 
622    for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
623       uint32_t ctb_size = 1 << (sps->log2_diff_max_min_luma_coding_block_size +
624           sps->log2_min_luma_coding_block_size_minus3 + 3);
625       uint32_t pic_width_in_min_cbs_y = sps->pic_width_in_luma_samples /
626          (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3));
627       uint32_t width_in_pix = (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) *
628          pic_width_in_min_cbs_y;
629       uint32_t ctb_w = DIV_ROUND_UP(width_in_pix, ctb_size);
630       bool is_last = (s == last_slice);
631       int slice_qp = (slice_params[s].slice_qp_delta + pps->init_qp_minus26 + 26) & 0x3f;
632 
633       anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SLICE_STATE), slice) {
634          slice.SliceHorizontalPosition = slice_params[s].slice_segment_address % ctb_w;
635          slice.SliceVerticalPosition = slice_params[s].slice_segment_address / ctb_w;
636 
637          if (is_last) {
638             slice.NextSliceHorizontalPosition = 0;
639             slice.NextSliceVerticalPosition = 0;
640          } else {
641             slice.NextSliceHorizontalPosition = (slice_params[s + 1].slice_segment_address) % ctb_w;
642             slice.NextSliceVerticalPosition = (slice_params[s + 1].slice_segment_address) / ctb_w;
643          }
644 
645          slice.SliceType = slice_params[s].slice_type;
646          slice.LastSlice = is_last;
647          slice.DependentSlice = slice_params[s].dependent_slice_segment;
648          slice.SliceTemporalMVPEnable = slice_params[s].temporal_mvp_enable;
649          slice.SliceQP = abs(slice_qp);
650          slice.SliceQPSign = slice_qp >= 0 ? 0 : 1;
651          slice.SliceCbQPOffset = slice_params[s].slice_cb_qp_offset;
652          slice.SliceCrQPOffset = slice_params[s].slice_cr_qp_offset;
653          slice.SliceHeaderDisableDeblockingFilter = pps->flags.deblocking_filter_override_enabled_flag ?
654                slice_params[s].disable_deblocking_filter_idc : pps->flags.pps_deblocking_filter_disabled_flag;
655          slice.SliceTCOffsetDiv2 = slice_params[s].tc_offset_div2;
656          slice.SliceBetaOffsetDiv2 = slice_params[s].beta_offset_div2;
657          slice.SliceLoopFilterEnable = slice_params[s].loop_filter_across_slices_enable;
658          slice.SliceSAOChroma = slice_params[s].sao_chroma_flag;
659          slice.SliceSAOLuma = slice_params[s].sao_luma_flag;
660          slice.MVDL1Zero = slice_params[s].mvd_l1_zero_flag;
661 
662          uint8_t low_delay = true;
663 
664          if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_I) {
665             low_delay = false;
666          } else {
667             for (unsigned i = 0; i < slice_params[s].num_ref_idx_l0_active; i++) {
668                int slot_idx = ref_slots[0][i].slot_index;
669 
670                if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
671                      h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
672                   low_delay = false;
673                   break;
674                }
675             }
676 
677             for (unsigned i = 0; i < slice_params[s].num_ref_idx_l1_active; i++) {
678                int slot_idx = ref_slots[1][i].slot_index;
679                if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
680                      h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
681                   low_delay = false;
682                   break;
683                }
684             }
685          }
686 
687          slice.LowDelay = low_delay;
688          slice.CollocatedFromL0 = slice_params[s].collocated_list == 0 ? true : false;
689          slice.Log2WeightDenominatorChroma = slice_params[s].luma_log2_weight_denom +
690             (slice_params[s].chroma_log2_weight_denom - slice_params[s].luma_log2_weight_denom);
691          slice.Log2WeightDenominatorLuma = slice_params[s].luma_log2_weight_denom;
692          slice.CABACInit = slice_params[s].cabac_init_idc;
693          slice.MaxMergeIndex = slice_params[s].max_num_merge_cand - 1;
694          slice.CollocatedMVTemporalBufferIndex =
695             dpb_idx[ref_slots[slice_params[s].collocated_list][slice_params[s].collocated_ref_idx].slot_index];
696          assert(slice.CollocatedMVTemporalBufferIndex < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
697 
698          slice.SliceHeaderLength = slice_params[s].slice_data_bytes_offset;
699          slice.CABACZeroWordInsertionEnable = false;
700          slice.EmulationByteSliceInsertEnable = false;
701          slice.TailInsertionPresent = false;
702          slice.SliceDataInsertionPresent = false;
703          slice.HeaderInsertionPresent = false;
704 
705          slice.IndirectPAKBSEDataStartOffset = 0;
706          slice.TransformSkipLambda = 0;
707          slice.TransformSkipNumberofNonZeroCoeffsFactor0 = 0;
708          slice.TransformSkipNumberofZeroCoeffsFactor0 = 0;
709          slice.TransformSkipNumberofNonZeroCoeffsFactor1 = 0;
710          slice.TransformSkipNumberofZeroCoeffsFactor1 = 0;
711 
712 #if GFX_VER >= 12
713          slice.OriginalSliceStartCtbX = slice_params[s].slice_segment_address % ctb_w;
714          slice.OriginalSliceStartCtbY = slice_params[s].slice_segment_address / ctb_w;
715 #endif
716       }
717 
718       if (slice_params[s].slice_type != STD_VIDEO_H265_SLICE_TYPE_I) {
719          anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
720             ref.ReferencePictureListSelect = 0;
721             ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l0_active - 1;
722 
723             for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
724                int slot_idx = ref_slots[0][i].slot_index;
725                unsigned poc = ref_slots[0][i].pic_order_cnt;
726                int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
727 
728                assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
729 
730                ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
731                ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
732                ref.ReferenceListEntry[i].TopField = true;
733             }
734          }
735       }
736 
737       if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
738          anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
739             ref.ReferencePictureListSelect = 1;
740             ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l1_active - 1;
741 
742             for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
743                int slot_idx = ref_slots[1][i].slot_index;;
744                unsigned poc = ref_slots[1][i].pic_order_cnt;
745                int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
746 
747                assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
748 
749                ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
750                ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
751                ref.ReferenceListEntry[i].TopField = true;
752             }
753          }
754       }
755 
756       if ((pps->flags.weighted_pred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_P)) ||
757             (pps->flags.weighted_bipred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B))) {
758          anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
759             w.ReferencePictureListSelect = 0;
760 
761             for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
762                w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l0[i] & 0xff;
763                w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l0[i] & 0xff;
764                w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l0[i][0] & 0xff;
765                w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l0[i][0] & 0xff;
766                w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l0[i][1] & 0xff;
767                w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l0[i][1] & 0xff;
768             }
769          }
770 
771          if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
772             anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
773                w.ReferencePictureListSelect = 1;
774 
775                for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
776                   w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l1[i] & 0xff;
777                   w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l1[i] & 0xff;
778                   w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l1[i][0] & 0xff;
779                   w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l1[i][1] & 0xff;
780                   w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l1[i][0] & 0xff;
781                   w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l1[i][1] & 0xff;
782                }
783             }
784          }
785       }
786 
787       uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
788 
789       anv_batch_emit(&cmd_buffer->batch, GENX(HCP_BSD_OBJECT), bsd) {
790          bsd.IndirectBSDDataLength = slice_params[s].slice_size - 3;
791          bsd.IndirectBSDDataStartAddress = buffer_offset + h265_pic_info->pSliceSegmentOffsets[s] + 3;
792       }
793    }
794 
795 #if GFX_VER >= 12
796    anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
797       cs.MemoryImplicitFlush = true;
798    }
799 #endif
800 
801    anv_batch_emit(&cmd_buffer->batch, GENX(VD_PIPELINE_FLUSH), flush) {
802       flush.HEVCPipelineDone = true;
803       flush.HEVCPipelineCommandFlush = true;
804       flush.VDCommandMessageParserDone = true;
805    }
806 }
807 
808 static void
anv_h264_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)809 anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer,
810                       const VkVideoDecodeInfoKHR *frame_info)
811 {
812    ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
813    struct anv_video_session *vid = cmd_buffer->video.vid;
814    struct anv_video_session_params *params = cmd_buffer->video.params;
815    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
816       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
817    const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
818    const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
819 
820    anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
821       flush.DWordLength = 2;
822       flush.VideoPipelineCacheInvalidate = 1;
823    };
824 
825 #if GFX_VER >= 12
826    anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
827       wake.MFXPowerWellControl = 1;
828       wake.MaskBits = 768;
829    }
830 
831    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
832       mfx.MFXSyncControlFlag = 1;
833    }
834 #endif
835 
836    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_MODE_SELECT), sel) {
837       sel.StandardSelect = SS_AVC;
838       sel.CodecSelect = Decode;
839       sel.DecoderShortFormatMode = ShortFormatDriverInterface;
840       sel.DecoderModeSelect = VLDMode; // Hardcoded
841 
842       sel.PreDeblockingOutputEnable = 0;
843       sel.PostDeblockingOutputEnable = 1;
844    }
845 
846 #if GFX_VER >= 12
847    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
848       mfx.MFXSyncControlFlag = 1;
849    }
850 #endif
851 
852    const struct anv_image_view *iv = anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
853    const struct anv_image *img = iv->image;
854    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_SURFACE_STATE), ss) {
855       ss.Width = img->vk.extent.width - 1;
856       ss.Height = img->vk.extent.height - 1;
857       ss.SurfaceFormat = PLANAR_420_8; // assert on this?
858       ss.InterleaveChroma = 1;
859       ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
860       ss.TiledSurface = img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
861       ss.TileWalk = TW_YMAJOR;
862 
863       ss.YOffsetforUCb = ss.YOffsetforVCr =
864          img->planes[1].primary_surface.memory_range.offset / img->planes[0].primary_surface.isl.row_pitch_B;
865    }
866 
867    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) {
868       bool use_pre_deblock = false;
869       if (use_pre_deblock) {
870          buf.PreDeblockingDestinationAddress = anv_image_address(img,
871                                                                  &img->planes[0].primary_surface.memory_range);
872       } else {
873          buf.PostDeblockingDestinationAddress = anv_image_address(img,
874                                                                   &img->planes[0].primary_surface.memory_range);
875       }
876       buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
877          .MOCS = anv_mocs(cmd_buffer->device, buf.PreDeblockingDestinationAddress.bo, 0),
878       };
879       buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
880          .MOCS = anv_mocs(cmd_buffer->device, buf.PostDeblockingDestinationAddress.bo, 0),
881       };
882 
883       buf.IntraRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset };
884       buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
885          .MOCS = anv_mocs(cmd_buffer->device, buf.IntraRowStoreScratchBufferAddress.bo, 0),
886       };
887       buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset };
888       buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
889          .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0),
890       };
891       buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
892          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
893       };
894       buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
895          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
896       };
897       buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
898          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
899       };
900       buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
901          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
902       };
903       buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
904          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
905       };
906       buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
907          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
908       };
909 
910       struct anv_bo *ref_bo = NULL;
911       for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
912          const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
913          int idx = frame_info->pReferenceSlots[i].slotIndex;
914          buf.ReferencePictureAddress[idx] = anv_image_address(ref_iv->image,
915                                                               &ref_iv->image->planes[0].primary_surface.memory_range);
916 
917          if (i == 0) {
918             ref_bo = ref_iv->image->bindings[0].address.bo;
919          }
920       }
921       buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
922          .MOCS = anv_mocs(cmd_buffer->device, ref_bo, 0),
923       };
924    }
925 
926    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) {
927       index_obj.MFXIndirectBitstreamObjectAddress = anv_address_add(src_buffer->address,
928                                                                     frame_info->srcBufferOffset & ~4095);
929       index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
930          .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
931       };
932       index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
933          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
934       };
935       index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
936          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
937       };
938       index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
939          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
940       };
941       index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
942          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
943       };
944    }
945 
946    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) {
947       bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo,
948          vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
949 
950       bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
951          .MOCS = anv_mocs(cmd_buffer->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0),
952       };
953       bsp.MPRRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo,
954          vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
955 
956       bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
957          .MOCS = anv_mocs(cmd_buffer->device, bsp.MPRRowStoreScratchBufferAddress.bo, 0),
958       };
959       bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
960          .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
961       };
962    }
963 
964    anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_DPB_STATE), avc_dpb) {
965       for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
966          const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
967             vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
968          const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
969          int idx = frame_info->pReferenceSlots[i].slotIndex;
970          avc_dpb.NonExistingFrame[idx] = ref_info->flags.is_non_existing;
971          avc_dpb.LongTermFrame[idx] = ref_info->flags.used_for_long_term_reference;
972          if (!ref_info->flags.top_field_flag && !ref_info->flags.bottom_field_flag)
973             avc_dpb.UsedforReference[idx] = 3;
974          else
975             avc_dpb.UsedforReference[idx] = ref_info->flags.top_field_flag | (ref_info->flags.bottom_field_flag << 1);
976          avc_dpb.LTSTFrameNumberList[idx] = ref_info->FrameNum;
977       }
978    }
979 
980    anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_PICID_STATE), picid) {
981       picid.PictureIDRemappingDisable = true;
982    }
983 
984    uint32_t pic_height = sps->pic_height_in_map_units_minus1 + 1;
985    if (!sps->flags.frame_mbs_only_flag)
986       pic_height *= 2;
987    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_IMG_STATE), avc_img) {
988       avc_img.FrameWidth = sps->pic_width_in_mbs_minus1;
989       avc_img.FrameHeight = pic_height - 1;
990       avc_img.FrameSize = (sps->pic_width_in_mbs_minus1 + 1) * pic_height;
991 
992       if (!h264_pic_info->pStdPictureInfo->flags.field_pic_flag)
993          avc_img.ImageStructure = FramePicture;
994       else if (h264_pic_info->pStdPictureInfo->flags.bottom_field_flag)
995          avc_img.ImageStructure = BottomFieldPicture;
996       else
997          avc_img.ImageStructure = TopFieldPicture;
998 
999       avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc;
1000       avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag;
1001       avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset;
1002       avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset;
1003       avc_img.FieldPicture = h264_pic_info->pStdPictureInfo->flags.field_pic_flag;
1004       avc_img.MBAFFMode = (sps->flags.mb_adaptive_frame_field_flag &&
1005                            !h264_pic_info->pStdPictureInfo->flags.field_pic_flag);
1006       avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag;
1007       avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag;
1008       avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag;
1009       avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
1010       avc_img.NonReferencePicture = !h264_pic_info->pStdPictureInfo->flags.is_reference;
1011       avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag;
1012       avc_img.ChromaFormatIDC = sps->chroma_format_idc;
1013       avc_img.TrellisQuantizationChromaDisable = true;
1014       avc_img.NumberofReferenceFrames = frame_info->referenceSlotCount;
1015       avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1;
1016       avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1;
1017       avc_img.InitialQPValue = pps->pic_init_qp_minus26;
1018       avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag;
1019       avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag;
1020       avc_img.PicOrderCountType = sps->pic_order_cnt_type;
1021       avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag;
1022       avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag;
1023       avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4;
1024       avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4;
1025       avc_img.CurrentPictureFrameNumber = h264_pic_info->pStdPictureInfo->frame_num;
1026    }
1027 
1028    StdVideoH264ScalingLists scaling_lists;
1029    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1030    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1031       qm.DWordLength = 16;
1032       qm.AVC = AVC_4x4_Intra_MATRIX;
1033       for (unsigned m = 0; m < 3; m++)
1034          for (unsigned q = 0; q < 16; q++)
1035             qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m][q];
1036    }
1037    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1038       qm.DWordLength = 16;
1039       qm.AVC = AVC_4x4_Inter_MATRIX;
1040       for (unsigned m = 0; m < 3; m++)
1041          for (unsigned q = 0; q < 16; q++)
1042             qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m + 3][q];
1043    }
1044    if (pps->flags.transform_8x8_mode_flag) {
1045       anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1046          qm.DWordLength = 16;
1047          qm.AVC = AVC_8x8_Intra_MATRIX;
1048          for (unsigned q = 0; q < 64; q++)
1049             qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[0][q];
1050       }
1051       anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1052          qm.DWordLength = 16;
1053          qm.AVC = AVC_8x8_Inter_MATRIX;
1054          for (unsigned q = 0; q < 64; q++)
1055             qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[1][q];
1056       }
1057    }
1058 
1059    anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_DIRECTMODE_STATE), avc_directmode) {
1060       /* bind reference frame DMV */
1061       struct anv_bo *dmv_bo = NULL;
1062       for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1063          int idx = frame_info->pReferenceSlots[i].slotIndex;
1064          const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1065             vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1066          const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1067          const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
1068          avc_directmode.DirectMVBufferAddress[idx] = anv_image_address(ref_iv->image,
1069                                                                      &ref_iv->image->vid_dmv_top_surface);
1070          if (i == 0) {
1071             dmv_bo = ref_iv->image->bindings[0].address.bo;
1072          }
1073          avc_directmode.POCList[2 * idx] = ref_info->PicOrderCnt[0];
1074          avc_directmode.POCList[2 * idx + 1] = ref_info->PicOrderCnt[1];
1075       }
1076       avc_directmode.DirectMVBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1077          .MOCS = anv_mocs(cmd_buffer->device, dmv_bo, 0),
1078       };
1079 
1080       avc_directmode.DirectMVBufferWriteAddress = anv_image_address(img,
1081                                                                     &img->vid_dmv_top_surface);
1082       avc_directmode.DirectMVBufferWriteAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1083          .MOCS = anv_mocs(cmd_buffer->device, img->bindings[0].address.bo, 0),
1084       };
1085       avc_directmode.POCList[32] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1086       avc_directmode.POCList[33] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1087    }
1088 
1089    uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
1090 #define HEADER_OFFSET 3
1091    for (unsigned s = 0; s < h264_pic_info->sliceCount; s++) {
1092       bool last_slice = s == (h264_pic_info->sliceCount - 1);
1093       uint32_t current_offset = h264_pic_info->pSliceOffsets[s];
1094       uint32_t this_end;
1095       if (!last_slice) {
1096          uint32_t next_offset = h264_pic_info->pSliceOffsets[s + 1];
1097          uint32_t next_end = h264_pic_info->pSliceOffsets[s + 2];
1098          if (s == h264_pic_info->sliceCount - 2)
1099             next_end = frame_info->srcBufferRange;
1100          anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_SLICEADDR), sliceaddr) {
1101             sliceaddr.IndirectBSDDataLength = next_end - next_offset - HEADER_OFFSET;
1102             /* start decoding after the 3-byte header. */
1103             sliceaddr.IndirectBSDDataStartAddress = buffer_offset + next_offset + HEADER_OFFSET;
1104          };
1105          this_end = next_offset;
1106       } else
1107          this_end = frame_info->srcBufferRange;
1108       anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_BSD_OBJECT), avc_bsd) {
1109          avc_bsd.IndirectBSDDataLength = this_end - current_offset - HEADER_OFFSET;
1110          /* start decoding after the 3-byte header. */
1111          avc_bsd.IndirectBSDDataStartAddress = buffer_offset + current_offset + HEADER_OFFSET;
1112          avc_bsd.InlineData.LastSlice = last_slice;
1113          avc_bsd.InlineData.FixPrevMBSkipped = 1;
1114          avc_bsd.InlineData.IntraPredictionErrorControl = 1;
1115          avc_bsd.InlineData.Intra8x84x4PredictionErrorConcealmentControl = 1;
1116          avc_bsd.InlineData.ISliceConcealmentMode = 1;
1117       };
1118    }
1119 }
1120 
1121 void
genX(CmdDecodeVideoKHR)1122 genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer,
1123                         const VkVideoDecodeInfoKHR *frame_info)
1124 {
1125    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1126 
1127    switch (cmd_buffer->video.vid->vk.op) {
1128    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
1129       anv_h264_decode_video(cmd_buffer, frame_info);
1130       break;
1131    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
1132       anv_h265_decode_video(cmd_buffer, frame_info);
1133       break;
1134    default:
1135       assert(0);
1136    }
1137 }
1138 
1139 #ifdef VK_ENABLE_BETA_EXTENSIONS
1140 void
genX(CmdEncodeVideoKHR)1141 genX(CmdEncodeVideoKHR)(VkCommandBuffer commandBuffer,
1142                         const VkVideoEncodeInfoKHR *pEncodeInfo)
1143 {
1144 }
1145 #endif
1146