• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_video_dec.h"
25 #include "d3d12_video_dec_h264.h"
26 
27 #include <cmath>
28 
29 void
d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder * pD3D12Dec)30 d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec)
31 {
32    // Method overview
33    // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active
34    // DPB references, leaving evicted ones as unused
35    // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture
36    // allocations associated)
37    //    that were left not marked as used in m_spDPBManager by step (2) are lost.
38 
39    // Assign DXVA original Index7Bits indices to current frame and references
40    DXVA_PicParams_H264 *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
41    for (uint8_t i = 0; i < 16; i++) {
42       // From H264 DXVA spec:
43       // Index7Bits
44       //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
45       //     parameters structure(section 4.0) or the RefPicList member of the slice control data
46       //     structure(section 6.0) When Index7Bits is used in the CurrPic and RefFrameList members of the picture
47       //     parameters structure, the value directly specifies the DXVA index of an uncompressed surface. When
48       //     Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies
49       //     the surface indirectly, as an index into the RefFrameList array of the associated picture parameters
50       //     structure.For more information, see section 6.2. In all cases, when Index7Bits does not contain a valid
51       //     index, the value is 127.
52       if (pCurrPicParams->RefFrameList[i].bPicEntry != DXVA_H264_INVALID_PICTURE_ENTRY_VALUE) {
53          pCurrPicParams->RefFrameList[i].Index7Bits =
54             pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]);
55       }
56    }
57 
58    pD3D12Dec->m_spDPBManager->mark_all_references_as_unused();
59    pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefFrameList);
60 
61    // Releases the underlying reference picture texture objects of all references that were not marked as used in this
62    // method.
63    pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory();
64 
65    pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget);
66 
67    debug_printf("[d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input] DXVA_PicParams_H264 converted "
68                  "from pipe_h264_picture_desc (No reference index remapping)\n");
69    d3d12_video_decoder_log_pic_params_h264(pCurrPicParams);
70 }
71 
72 void
d3d12_video_decoder_get_frame_info_h264(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB,bool & isInterlaced)73 d3d12_video_decoder_get_frame_info_h264(
74    struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced)
75 {
76    auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
77    // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in
78    // macroblocks is wFrameWidthInMbsMinus1 plus 1.) wFrameHeightInMbsMinus1 Height of the frame containing this
79    // picture, in units of macroblocks, minus 1. (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the
80    // picture is a field, the height of the frame is twice the height of the picture and is an integer multiple of 2 in
81    // units of macroblocks.
82    *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16;
83    *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1) / (pPicParams->frame_mbs_only_flag ? 1 : 2);
84    *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight;
85    *pHeight = *pHeight * 16;
86    *pMaxDPB = pPicParams->num_ref_frames + 1;
87    isInterlaced = !pPicParams->frame_mbs_only_flag;
88 }
89 
90 ///
91 /// Pushes the current frame as next reference, updates the DXVA H264 structure with the indices of the DPB and
92 /// transitions the references
93 ///
94 void
d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder * pD3D12Dec,ID3D12Resource * pTexture2D,uint32_t subresourceIndex)95 d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec,
96                                                           ID3D12Resource *pTexture2D,
97                                                           uint32_t subresourceIndex)
98 {
99    DXVA_PicParams_H264 *pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
100    pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits,
101                                                                                       pD3D12Dec->m_spVideoDecoderHeap,
102                                                                                       pTexture2D,
103                                                                                       subresourceIndex);
104 
105    // From H264 DXVA spec:
106    // Index7Bits
107    //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
108    //     parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0)
109    //     When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value
110    //     directly specifies the DXVA index of an uncompressed surface. When Index7Bits is used in the RefPicList member
111    //     of the slice control data structure, the value identifies the surface indirectly, as an index into the
112    //     RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. In
113    //     all cases, when Index7Bits does not contain a valid index, the value is 127.
114 
115    std::vector<D3D12_RESOURCE_BARRIER>
116       neededStateTransitions;   // Returned by update_entries to perform by the method caller
117    pD3D12Dec->m_spDPBManager->update_entries(
118       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec)->RefFrameList,
119       neededStateTransitions);
120 
121    pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(neededStateTransitions.size(), neededStateTransitions.data());
122 
123    // Schedule reverse (back to common) transitions before command list closes for current frame
124    for (auto BarrierDesc : neededStateTransitions) {
125       std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
126       pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc);
127    }
128 
129    debug_printf(
130       "[d3d12_video_decoder_prepare_current_frame_references_h264] DXVA_PicParams_H264 after index remapping)\n");
131    d3d12_video_decoder_log_pic_params_h264(
132       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec));
133 }
134 
135 void
d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder * pD3D12Dec,std::vector<DXVA_Slice_H264_Short> & pOutSliceControlBuffers,struct pipe_h264_picture_desc * picture_h264)136 d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder *pD3D12Dec,
137                                                      std::vector<DXVA_Slice_H264_Short> &pOutSliceControlBuffers,
138                                                      struct pipe_h264_picture_desc *picture_h264)
139 {
140    debug_printf("[d3d12_video_decoder_h264] Upper layer reported %d slices for this frame, parsing them below...\n",
141                   picture_h264->slice_count);
142    size_t processedBitstreamBytes = 0u;
143    uint32_t sliceIdx = 0;
144    bool sliceFound = false;
145    do {
146       DXVA_Slice_H264_Short currentSliceEntry = {};
147       // From DXVA spec: All bits for the slice are located within the corresponding bitstream data buffer.
148       currentSliceEntry.wBadSliceChopping = 0u;
149       sliceFound = d3d12_video_decoder_get_next_slice_size_and_offset_h264(pD3D12Dec->m_stagingDecodeBitstream,
150                                                                            processedBitstreamBytes,
151                                                                            currentSliceEntry.SliceBytesInBuffer,
152                                                                            currentSliceEntry.BSNALunitDataLocation);
153 
154       if (sliceFound) {
155          d3d12_video_decoder_nal_unit_type_h264 naluType = (d3d12_video_decoder_nal_unit_type_h264)(
156             pD3D12Dec->m_stagingDecodeBitstream[currentSliceEntry.BSNALunitDataLocation +
157                                                 (DXVA_H264_START_CODE_LEN_BITS / 8)] &
158             0x1F);
159          debug_printf("[d3d12_video_decoder_h264] Detected slice (NALU Type %d) index %" PRIu32 " with size %d and offset %d "
160                         "for frame with "
161                         "fenceValue: %d\n",
162                         naluType,
163                         sliceIdx,
164                         currentSliceEntry.SliceBytesInBuffer,
165                         currentSliceEntry.BSNALunitDataLocation,
166                         pD3D12Dec->m_fenceValue);
167 
168          sliceIdx++;
169          processedBitstreamBytes += currentSliceEntry.SliceBytesInBuffer;
170          pOutSliceControlBuffers.push_back(currentSliceEntry);
171       }
172    } while (sliceFound && (sliceIdx < picture_h264->slice_count));
173    assert(pOutSliceControlBuffers.size() == picture_h264->slice_count);
174 }
175 
176 bool
d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector<uint8_t> & buf,unsigned int bufferOffset,uint32_t & outSliceSize,uint32_t & outSliceOffset)177 d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector<uint8_t> &buf,
178                                                         unsigned int bufferOffset,
179                                                         uint32_t &outSliceSize,
180                                                         uint32_t &outSliceOffset)
181 {
182    // Search the rest of the full frame buffer after the offset
183    uint numBitsToSearchIntoBuffer = buf.size() - bufferOffset;
184    int currentSlicePosition = d3d12_video_decoder_get_next_startcode_offset(buf,
185                                                                             bufferOffset,
186                                                                             DXVA_H264_START_CODE,
187                                                                             DXVA_H264_START_CODE_LEN_BITS,
188                                                                             numBitsToSearchIntoBuffer);
189 
190    // Return false now if we didn't find a next slice based on the bufferOffset parameter
191    if (currentSlicePosition < 0) {
192       return false;
193    } else {
194       // Save the absolute buffer offset until the next slice in the output param
195       outSliceOffset = currentSlicePosition + bufferOffset;
196 
197       // Found a next NALU, make sure it's a slice:
198       d3d12_video_decoder_nal_unit_type_h264 naluType =
199          (d3d12_video_decoder_nal_unit_type_h264)(buf[outSliceOffset + (DXVA_H264_START_CODE_LEN_BITS / 8)] & 0x1F);
200 
201       bool isNaluSliceType = (naluType == type_slice) || (naluType == type_slice_part_A) ||
202                              (naluType == type_slice_part_B) || (naluType == type_slice_part_C) ||
203                              (naluType == type_slice_IDR) || (naluType == type_slice_aux) ||
204                              (naluType == type_slice_layer_ext);
205 
206       if (!isNaluSliceType) {
207          // We found a NALU, but it's not a slice
208          return false;
209       } else {
210          // We did find a next slice based on the bufferOffset parameter
211 
212          // Skip current start code, to get the slice after this, to calculate its size
213          bufferOffset += (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/);
214          numBitsToSearchIntoBuffer = buf.size() - bufferOffset;
215 
216          int c_signedStartCodeLen = (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/);
217          int nextSlicePosition = c_signedStartCodeLen   // Takes into account the skipped start code
218                                  + d3d12_video_decoder_get_next_startcode_offset(buf,
219                                                                                  bufferOffset,
220                                                                                  DXVA_H264_START_CODE,
221                                                                                  DXVA_H264_START_CODE_LEN_BITS,
222                                                                                  numBitsToSearchIntoBuffer);
223 
224          if (nextSlicePosition <
225              c_signedStartCodeLen)   // if no slice found, d3d12_video_decoder_get_next_startcode_offset returns - 1
226          {
227             // This means currentSlicePosition points to the last slice in the buffer
228             outSliceSize = buf.size() - outSliceOffset;
229          } else {
230             // This means there are more slices after the one pointed by currentSlicePosition
231             outSliceSize = nextSlicePosition - currentSlicePosition;
232          }
233          return true;
234       }
235    }
236 }
237 
238 static void
d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 & picEntry)239 d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 &picEntry)
240 {
241    debug_printf("\t\tIndex7Bits: %d\n"
242                  "\t\tAssociatedFlag: %d\n"
243                  "\t\tbPicEntry: %d\n",
244                  picEntry.Index7Bits,
245                  picEntry.AssociatedFlag,
246                  picEntry.bPicEntry);
247 }
248 
249 void
d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 * pPicParams)250 d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 *pPicParams)
251 {
252    debug_printf("\n=============================================\n");
253    debug_printf("wFrameWidthInMbsMinus1 = %d\n", pPicParams->wFrameWidthInMbsMinus1);
254    debug_printf("wFrameHeightInMbsMinus1 = %d\n", pPicParams->wFrameHeightInMbsMinus1);
255    debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits);
256    debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag);
257    debug_printf("num_ref_frames = %d\n", pPicParams->num_ref_frames);
258    debug_printf("sp_for_switch_flag = %d\n", pPicParams->sp_for_switch_flag);
259    debug_printf("field_pic_flag = %d\n", pPicParams->field_pic_flag);
260    debug_printf("MbaffFrameFlag = %d\n", pPicParams->MbaffFrameFlag);
261    debug_printf("residual_colour_transform_flag = %d\n", pPicParams->residual_colour_transform_flag);
262    debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc);
263    debug_printf("RefPicFlag = %d\n", pPicParams->RefPicFlag);
264    debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag);
265    debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag);
266    debug_printf("MinLumaBipredSize8x8Flag = %d\n", pPicParams->MinLumaBipredSize8x8Flag);
267    debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag);
268    debug_printf("weighted_bipred_idc = %d\n", pPicParams->weighted_bipred_idc);
269    debug_printf("MbsConsecutiveFlag = %d\n", pPicParams->MbsConsecutiveFlag);
270    debug_printf("frame_mbs_only_flag = %d\n", pPicParams->frame_mbs_only_flag);
271    debug_printf("transform_8x8_mode_flag = %d\n", pPicParams->transform_8x8_mode_flag);
272    debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber);
273    debug_printf("CurrFieldOrderCnt[0] = %d\n", pPicParams->CurrFieldOrderCnt[0]);
274    debug_printf("CurrFieldOrderCnt[1] = %d\n", pPicParams->CurrFieldOrderCnt[1]);
275    debug_printf("chroma_qp_index_offset = %d\n", pPicParams->chroma_qp_index_offset);
276    debug_printf("second_chroma_qp_index_offset = %d\n", pPicParams->second_chroma_qp_index_offset);
277    debug_printf("ContinuationFlag = %d\n", pPicParams->ContinuationFlag);
278    debug_printf("pic_init_qp_minus26 = %d\n", pPicParams->pic_init_qp_minus26);
279    debug_printf("pic_init_qs_minus26 = %d\n", pPicParams->pic_init_qs_minus26);
280    debug_printf("num_ref_idx_l0_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_active_minus1);
281    debug_printf("num_ref_idx_l1_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_active_minus1);
282    debug_printf("frame_num = %d\n", pPicParams->frame_num);
283    debug_printf("log2_max_frame_num_minus4 = %d\n", pPicParams->log2_max_frame_num_minus4);
284    debug_printf("pic_order_cnt_type = %d\n", pPicParams->pic_order_cnt_type);
285    debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4);
286    debug_printf("delta_pic_order_always_zero_flag = %d\n", pPicParams->delta_pic_order_always_zero_flag);
287    debug_printf("direct_8x8_inference_flag = %d\n", pPicParams->direct_8x8_inference_flag);
288    debug_printf("entropy_coding_mode_flag = %d\n", pPicParams->entropy_coding_mode_flag);
289    debug_printf("pic_order_present_flag = %d\n", pPicParams->pic_order_present_flag);
290    debug_printf("deblocking_filter_control_present_flag = %d\n", pPicParams->deblocking_filter_control_present_flag);
291    debug_printf("redundant_pic_cnt_present_flag = %d\n", pPicParams->redundant_pic_cnt_present_flag);
292    debug_printf("num_slice_groups_minus1 = %d\n", pPicParams->num_slice_groups_minus1);
293    debug_printf("slice_group_map_type = %d\n", pPicParams->slice_group_map_type);
294    debug_printf("slice_group_change_rate_minus1 = %d\n", pPicParams->slice_group_change_rate_minus1);
295    debug_printf("Reserved8BitsB = %d\n", pPicParams->Reserved8BitsB);
296    debug_printf("UsedForReferenceFlags 0x%08x\n", pPicParams->UsedForReferenceFlags);
297    debug_printf("NonExistingFrameFlags 0x%08x\n", pPicParams->NonExistingFrameFlags);
298 
299    const UINT16 RefPicListLength = _countof(DXVA_PicParams_H264::RefFrameList);
300 
301    debug_printf("[D3D12 Video Decoder H264 DXVA PicParams info]\n"
302                  "\t[Current Picture Entry]\n");
303    d3d12_video_decoder_log_pic_entry_h264(pPicParams->CurrPic);
304 
305    debug_printf("[Decode RefFrameList Pic_Entry list] Entries where bPicEntry == "
306                  "DXVA_H264_INVALID_PICTURE_ENTRY_VALUE are not printed\n");
307    for (uint32_t refIdx = 0; refIdx < RefPicListLength; refIdx++) {
308       if (DXVA_H264_INVALID_PICTURE_ENTRY_VALUE != pPicParams->RefFrameList[refIdx].bPicEntry) {
309          debug_printf("\t[Reference PicEntry %d]\n", refIdx);
310          d3d12_video_decoder_log_pic_entry_h264(pPicParams->RefFrameList[refIdx]);
311          debug_printf("\t\tFrameNumList: %d\n"
312                        "\t\tFieldOrderCntList[0]: %d\n"
313                        "\t\tFieldOrderCntList[1]: %d\n",
314                        pPicParams->FrameNumList[refIdx],
315                        pPicParams->FieldOrderCntList[refIdx][0],
316                        pPicParams->FieldOrderCntList[refIdx][1]);
317       }
318    }
319 }
320 
321 DXVA_PicParams_H264
d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(uint32_t frameNum,pipe_video_profile profile,uint32_t decodeWidth,uint32_t decodeHeight,pipe_h264_picture_desc * pPipeDesc)322 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(
323    uint32_t frameNum,
324    pipe_video_profile profile,
325    uint32_t decodeWidth,    // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
326                             // codecs.
327    uint32_t decodeHeight,   // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
328                             // codecs.
329    pipe_h264_picture_desc *pPipeDesc)
330 {
331    DXVA_PicParams_H264 dxvaStructure = {};
332 
333    // uint16_t  wFrameWidthInMbsMinus1;
334    uint width_in_mb = decodeWidth / D3D12_VIDEO_H264_MB_IN_PIXELS;
335    dxvaStructure.wFrameWidthInMbsMinus1 = width_in_mb - 1;
336    // uint16_t  wFrameHeightInMbsMinus1;
337    uint height_in_mb = static_cast<uint>(std::ceil(decodeHeight / D3D12_VIDEO_H264_MB_IN_PIXELS));
338    dxvaStructure.wFrameHeightInMbsMinus1 = height_in_mb - 1;
339 
340    // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
341    // CurrPic.AssociatedFlag
342    // If field_pic_flag is 1, the AssociatedFlag field in CurrPic is interpreted as follows:
343    // 0 -> The current picture is the top field of the uncompressed destination frame surface.
344    // 1 -> The current picture is the bottom field of the uncompressed destination frame surface.
345    // If field_pic_flag is 0, AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore the value.
346    if (pPipeDesc->field_pic_flag) {
347       dxvaStructure.CurrPic.AssociatedFlag = (pPipeDesc->bottom_field_flag == 0) ? 0 : 1;
348    } else {
349       dxvaStructure.CurrPic.AssociatedFlag = 0;
350    }
351 
352    // uint8_t   num_ref_frames;
353    dxvaStructure.num_ref_frames = pPipeDesc->num_ref_frames;
354    // union {
355    // struct {
356    // uint16_t  field_pic_flag                 : 1;
357    dxvaStructure.field_pic_flag = pPipeDesc->field_pic_flag;
358    // From H264 codec spec
359    // The variable MbaffFrameFlag is derived as
360    // MbaffFrameFlag = ( mb_adaptive_frame_field_flag && !field_pic_flag )
361    dxvaStructure.MbaffFrameFlag = (pPipeDesc->pps->sps->mb_adaptive_frame_field_flag && !pPipeDesc->field_pic_flag);
362    // uint16_t  residual_colour_transform_flag :1
363    dxvaStructure.residual_colour_transform_flag = pPipeDesc->pps->sps->separate_colour_plane_flag;
364    // uint16_t sp_for_switch_flag // switch slices are not supported by VA
365    dxvaStructure.sp_for_switch_flag = 0;
366    // uint16_t  chroma_format_idc              : 2;
367    assert(pPipeDesc->pps->sps->chroma_format_idc == 1);   // Not supported otherwise
368    dxvaStructure.chroma_format_idc = 1;   // This is always 4:2:0 for D3D12 Video. NV12/P010 DXGI formats only.
369    // uint16_t  RefPicFlag                     : 1;
370    dxvaStructure.RefPicFlag = pPipeDesc->is_reference;
371 
372    // uint16_t  constrained_intra_pred_flag    : 1;
373    dxvaStructure.constrained_intra_pred_flag = pPipeDesc->pps->constrained_intra_pred_flag;
374    // uint16_t  weighted_pred_flag             : 1;
375    dxvaStructure.weighted_pred_flag = pPipeDesc->pps->weighted_pred_flag;
376    // uint16_t  weighted_bipred_idc            : 2;
377    dxvaStructure.weighted_bipred_idc = pPipeDesc->pps->weighted_bipred_idc;
378    // From DXVA spec:
379    // The value shall be 1 unless the restricted-mode profile in use explicitly supports the value 0.
380    // FMO is not supported by VAAPI
381    dxvaStructure.MbsConsecutiveFlag = 1;
382    // uint16_t  frame_mbs_only_flag            : 1;
383    dxvaStructure.frame_mbs_only_flag = pPipeDesc->pps->sps->frame_mbs_only_flag;
384    // uint16_t  transform_8x8_mode_flag        : 1;
385    dxvaStructure.transform_8x8_mode_flag = pPipeDesc->pps->transform_8x8_mode_flag;
386    // };
387    // uint16_t  wBitFields;
388    // };
389    // uint8_t  bit_depth_luma_minus8;
390    dxvaStructure.bit_depth_luma_minus8 = pPipeDesc->pps->sps->bit_depth_luma_minus8;
391    assert(dxvaStructure.bit_depth_luma_minus8 == 0);   // Only support for NV12 now
392    // uint8_t  bit_depth_chroma_minus8;
393    dxvaStructure.bit_depth_chroma_minus8 = pPipeDesc->pps->sps->bit_depth_chroma_minus8;
394    assert(dxvaStructure.bit_depth_chroma_minus8 == 0);   // Only support for NV12 now
395    // uint16_t MinLumaBipredSize8x8Flag
396    dxvaStructure.MinLumaBipredSize8x8Flag = pPipeDesc->pps->sps->MinLumaBiPredSize8x8;
397    // char pic_init_qs_minus26
398    dxvaStructure.pic_init_qs_minus26 = pPipeDesc->pps->pic_init_qs_minus26;
399    // uint8_t   chroma_qp_index_offset;   /* also used for QScb */
400    dxvaStructure.chroma_qp_index_offset = pPipeDesc->pps->chroma_qp_index_offset;
401    // uint8_t   second_chroma_qp_index_offset; /* also for QScr */
402    dxvaStructure.second_chroma_qp_index_offset = pPipeDesc->pps->second_chroma_qp_index_offset;
403 
404    /* remainder for parsing */
405    // uint8_t   pic_init_qp_minus26;
406    dxvaStructure.pic_init_qp_minus26 = pPipeDesc->pps->pic_init_qp_minus26;
407    // uint8_t  num_ref_idx_l0_active_minus1;
408    dxvaStructure.num_ref_idx_l0_active_minus1 = pPipeDesc->num_ref_idx_l0_active_minus1;
409    // uint8_t  num_ref_idx_l1_active_minus1;
410    dxvaStructure.num_ref_idx_l1_active_minus1 = pPipeDesc->num_ref_idx_l1_active_minus1;
411 
412    // uint16_t frame_num;
413    dxvaStructure.frame_num = pPipeDesc->frame_num;
414 
415    // uint8_t  log2_max_frame_num_minus4;
416    dxvaStructure.log2_max_frame_num_minus4 = pPipeDesc->pps->sps->log2_max_frame_num_minus4;
417    // uint8_t  pic_order_cnt_type;
418    dxvaStructure.pic_order_cnt_type = pPipeDesc->pps->sps->pic_order_cnt_type;
419    // uint8_t  log2_max_pic_order_cnt_lsb_minus4;
420    dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = pPipeDesc->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
421    // uint8_t  delta_pic_order_always_zero_flag;
422    dxvaStructure.delta_pic_order_always_zero_flag = pPipeDesc->pps->sps->delta_pic_order_always_zero_flag;
423    // uint8_t  direct_8x8_inference_flag;
424    dxvaStructure.direct_8x8_inference_flag = pPipeDesc->pps->sps->direct_8x8_inference_flag;
425    // uint8_t  entropy_coding_mode_flag;
426    dxvaStructure.entropy_coding_mode_flag = pPipeDesc->pps->entropy_coding_mode_flag;
427    // uint8_t  num_slice_groups_minus1;
428    dxvaStructure.num_slice_groups_minus1 = pPipeDesc->pps->num_slice_groups_minus1;
429    assert(dxvaStructure.num_slice_groups_minus1 == 0);   // FMO Not supported by VA
430 
431    // uint8_t  slice_group_map_type;
432    dxvaStructure.slice_group_map_type = pPipeDesc->pps->slice_group_map_type;
433    // uint8_t  deblocking_filter_control_present_flag;
434    dxvaStructure.deblocking_filter_control_present_flag = pPipeDesc->pps->deblocking_filter_control_present_flag;
435    // uint8_t  redundant_pic_cnt_present_flag;
436    dxvaStructure.redundant_pic_cnt_present_flag = pPipeDesc->pps->redundant_pic_cnt_present_flag;
437    // uint16_t slice_group_change_rate_minus1;
438    dxvaStructure.slice_group_change_rate_minus1 = pPipeDesc->pps->slice_group_change_rate_minus1;
439 
440    // int32_t    CurrFieldOrderCnt[2];
441    dxvaStructure.CurrFieldOrderCnt[0] = pPipeDesc->field_order_cnt[0];
442    dxvaStructure.CurrFieldOrderCnt[1] = pPipeDesc->field_order_cnt[1];
443 
444    // DXVA_PicEntry_H264  RefFrameList[16]; /* DXVA_PicEntry_H264.AssociatedFlag 1 means LongTermRef */
445    // From DXVA spec:
446    // RefFrameList
447    // Contains a list of 16 uncompressed frame buffer surfaces.  All uncompressed surfaces that correspond to pictures
448    // currently marked as "used for reference" must appear in the RefFrameList array. Non-reference surfaces (those
449    // which only contain pictures for which the value of RefPicFlag was 0 when the picture was decoded) shall not appear
450    // in RefFrameList for a subsequent picture. In addition, surfaces that contain only pictures marked as "unused for
451    // reference" shall not appear in RefFrameList for a subsequent picture.
452 
453    dxvaStructure.UsedForReferenceFlags = 0;   // initialize to zero and set only the appropiate values below
454 
455    bool frameUsesAnyRefPicture = false;
456    for (uint i = 0; i < 16; i++) {
457       // Fix ad-hoc behaviour from the VA upper layer which always marks short term references as top_is_reference and
458       // bottom_is_reference as true and then differenciates using INT_MAX in field_order_cnt_list[i][0]/[1] to indicate
459       // not used convert to expected
460       if (pPipeDesc->field_order_cnt_list[i][0] == INT_MAX) {
461          pPipeDesc->top_is_reference[i] = false;
462          pPipeDesc->field_order_cnt_list[i][0] = 0;   // DXVA Spec says this has to be zero if unused
463       }
464 
465       if (pPipeDesc->field_order_cnt_list[i][1] == INT_MAX) {
466          pPipeDesc->bottom_is_reference[i] = false;
467          pPipeDesc->field_order_cnt_list[i][1] = 0;   // DXVA Spec says this has to be zero if unused
468       }
469 
470       // If both top and bottom reference flags are false, this is an invalid entry
471       bool validEntry = (pPipeDesc->top_is_reference[i] || pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]);
472       if (!validEntry) {
473          // From DXVA spec:
474          // Entries that will not be used for decoding the current picture, or any subsequent pictures, are indicated by
475          // setting bPicEntry to 0xFF. If bPicEntry is not 0xFF, the entry may be used as a reference surface for
476          // decoding the current picture or a subsequent picture (in decoding order).
477          dxvaStructure.RefFrameList[i].bPicEntry = DXVA_H264_INVALID_PICTURE_ENTRY_VALUE;
478          dxvaStructure.FieldOrderCntList[i][0] = 0;
479          dxvaStructure.FieldOrderCntList[i][1] = 0;
480          dxvaStructure.FrameNumList[i] = 0;
481       } else {
482          frameUsesAnyRefPicture = true;
483          // From DXVA spec:
484          // For each entry whose value is not 0xFF, the value of AssociatedFlag is interpreted as follows:
485          // 0 - Not a long-term reference frame.
486          // 1 - Long-term reference frame. The uncompressed frame buffer contains a reference frame or one or more
487          // reference fields marked as "used for long-term reference." If field_pic_flag is 1, the current uncompressed
488          // frame surface may appear in the list for the purpose of decoding the second field of a complementary
489          // reference field pair.
490          dxvaStructure.RefFrameList[i].AssociatedFlag = pPipeDesc->is_long_term[i] ? 1u : 0u;
491 
492          // dxvaStructure.RefFrameList[i].Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
493 
494          // uint16_t FrameNumList[16];
495          // 	 FrameNumList
496          // For each entry in RefFrameList, the corresponding entry in FrameNumList
497          // contains the value of FrameNum or LongTermFrameIdx, depending on the value of
498          // AssociatedFlag in the RefFrameList entry. (FrameNum is assigned to short-term
499          // reference pictures, and LongTermFrameIdx is assigned to long-term reference
500          // pictures.)
501          // If an element in the list of frames is not relevent (for example, if the corresponding
502          // entry in RefFrameList is empty or is marked as "not used for reference"), the value
503          // of the FrameNumList entry shall be 0. Accelerators can rely on this constraint being
504          // fulfilled.
505          dxvaStructure.FrameNumList[i] = pPipeDesc->frame_num_list[i];
506 
507          // int32_t    FieldOrderCntList[16][2];
508          // Contains the picture order counts for the reference frames listed in RefFrameList.
509          // For each entry i in the RefFrameList array, FieldOrderCntList[i][0] contains the
510          // value of TopFieldOrderCnt for entry i, and FieldOrderCntList[i][1] contains the
511          // value of BottomFieldOrderCnt for entry i.
512          //
513          // If an element of the list is not relevent (for example, if the corresponding entry in
514          // RefFrameList is empty or is marked as "not used for reference"), the value of
515          // TopFieldOrderCnt or BottomFieldOrderCnt in FieldOrderCntList shall be 0.
516          // Accelerators can rely on this constraint being fulfilled.
517 
518          dxvaStructure.FieldOrderCntList[i][0] = pPipeDesc->field_order_cnt_list[i][0];
519          dxvaStructure.FieldOrderCntList[i][1] = pPipeDesc->field_order_cnt_list[i][1];
520 
521          // From DXVA spec
522          // UsedForReferenceFlags
523          // Contains two 1-bit flags for each entry in RefFrameList. For the ith entry in RefFrameList, the two flags
524          // are accessed as follows:  Flag1i = (UsedForReferenceFlags >> (2 * i)) & 1  Flag2i = (UsedForReferenceFlags
525          // >> (2 * i + 1)) & 1 If Flag1i is 1, the top field of frame number i is marked as "used for reference," as
526          // defined by the H.264/AVC specification. If Flag2i is 1, the bottom field of frame number i is marked as
527          // "used for reference." (Otherwise, if either flag is 0, that field is not marked as "used for reference.") If
528          // an element in the list of frames is not relevent (for example, if the corresponding entry in RefFrameList is
529          // empty), the value of both flags for that entry shall be 0. Accelerators may rely on this constraint being
530          // fulfilled.
531 
532          if (pPipeDesc->top_is_reference[i] || pPipeDesc->is_long_term[i]) {
533             dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i));
534          }
535 
536          if (pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]) {
537             dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i + 1));
538          }
539       }
540    }
541 
542    // frame type (I, P, B, etc) is not included in pipeDesc data, let's try to derive it
543    // from the reference list...if frame doesn't use any references, it should be an I frame.
544    dxvaStructure.IntraPicFlag = !frameUsesAnyRefPicture;
545 
546    // uint8_t  pic_order_present_flag; /* Renamed to bottom_field_pic_order_in_frame_present_flag in newer standard
547    // versions. */
548    dxvaStructure.pic_order_present_flag = pPipeDesc->pps->bottom_field_pic_order_in_frame_present_flag;
549 
550    // Software decoders should be implemented, as soon as feasible, to set the value of
551    // Reserved16Bits to 3. The value 0 was previously assigned for uses prior to July 20,
552    // 2007. The value 1 was previously assigned for uses prior to October 12, 2007. The
553    // value 2 was previously assigned for uses prior to January 15, 2009. Software
554    // decoders shall not set Reserved16Bits to any value other than those listed here.
555    // Note Software decoders that set Reserved16Bits to 3 should ensure that any aspects of software decoder operation
556    // that were previously not in conformance with this version of the specification have been corrected in the current
557    // implementation. One particular aspect of conformance that should be checked is the ordering of quantization
558    // scaling list data, as specified in section 5.2. In addition, the ReservedIntraBit flag in the macroblock control
559    // buffer must use the semantics described in section 7.2 (this flag was previously reserved). The semantics of
560    // Index7Bits and RefPicList have also been clarified in updates to this specification.
561    dxvaStructure.Reserved16Bits = 3;
562 
563    // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report
564    // feedback data. The value should not equal 0, and should be different in each call to
565    // Execute. For more information, see section 12.0, Status Report Data Structure.
566    dxvaStructure.StatusReportFeedbackNumber = frameNum;
567    assert(dxvaStructure.StatusReportFeedbackNumber > 0);
568 
569    // from DXVA spec
570    // ContinuationFlag
571    // If this flag is 1, the remainder of this structure is present in the buffer and contains valid values. If this
572    // flag is 0, the structure might be truncated at this point in the buffer, or the remaining fields may be set to 0
573    // and shall be ignored by the accelerator. The remaining members of this structure are needed only for off-host
574    // bitstream parsing. If the host decoder parses the bitstream, the decoder can truncate the picture parameters data
575    // structure buffer after the ContinuationFlag or set the remaining members to zero. uint8_t  ContinuationFlag;
576    dxvaStructure.ContinuationFlag =
577       1;   // DXVA destination struct does contain members from the slice section of pipeDesc...
578 
579    return dxvaStructure;
580 }
581 
582 void
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc * pPipeDesc,DXVA_Qmatrix_H264 & outMatrixBuffer)583 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc,
584                                                           DXVA_Qmatrix_H264 &outMatrixBuffer)
585 {
586    // Please note here that the matrices coming from the gallium VA frontend are copied from VAIQMatrixBufferH264
587    // which are specified in VAAPI as being in raster scan order (different than zigzag needed by DXVA)
588    // also please note that VAIQMatrixBufferH264.ScalingList8x8 is copied into the first two rows of
589    // pipe_h264_pps.ScalingList8x8 leaving the upper 4 rows of  pipe_h264_pps.ScalingList8x8[6][64] unmodified
590    // Finally, please note that other gallium frontends might decide to copy the scaling lists in other order
591    // and this section might have to be extended to add support for them.
592 
593    // In DXVA each scaling list is ordered in zig-zag scan order, convert them from raster scan order.
594    unsigned i, j;
595    for (i = 0; i < 6; i++) {
596       for (j = 0; j < 16; j++) {
597          outMatrixBuffer.bScalingLists4x4[i][j] = pPipeDesc->pps->ScalingList4x4[i][d3d12_video_zigzag_scan[j]];
598       }
599    }
600    for (i = 0; i < 64; i++) {
601       outMatrixBuffer.bScalingLists8x8[0][i] = pPipeDesc->pps->ScalingList8x8[0][d3d12_video_zigzag_direct[i]];
602       outMatrixBuffer.bScalingLists8x8[1][i] = pPipeDesc->pps->ScalingList8x8[1][d3d12_video_zigzag_direct[i]];
603    }
604 }
605