1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_context.h"
25 #include "d3d12_format.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_surface.h"
29 #include "d3d12_video_dec.h"
30 #if VIDEO_CODEC_H264DEC
31 #include "d3d12_video_dec_h264.h"
32 #endif
33 #if VIDEO_CODEC_H265DEC
34 #include "d3d12_video_dec_hevc.h"
35 #endif
36 #if VIDEO_CODEC_AV1DEC
37 #include "d3d12_video_dec_av1.h"
38 #endif
39 #if VIDEO_CODEC_VP9DEC
40 #include "d3d12_video_dec_vp9.h"
41 #endif
42 #include "d3d12_video_buffer.h"
43 #include "d3d12_residency.h"
44
45 #include "vl/vl_video_buffer.h"
46 #include "util/format/u_format.h"
47 #include "util/u_inlines.h"
48 #include "util/u_memory.h"
49 #include "util/u_video.h"
50
51 uint64_t
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder * pD3D12Dec)52 d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec)
53 {
54 return pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH;
55 }
56
57 struct pipe_video_codec *
d3d12_video_create_decoder(struct pipe_context * context,const struct pipe_video_codec * codec)58 d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
59 {
60 ///
61 /// Initialize d3d12_video_decoder
62 ///
63
64
65 // Not using new doesn't call ctor and the initializations in the class declaration are lost
66 struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
67
68 pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
69
70 pD3D12Dec->base = *codec;
71 pD3D12Dec->m_screen = context->screen;
72
73 pD3D12Dec->base.context = context;
74 pD3D12Dec->base.width = codec->width;
75 pD3D12Dec->base.height = codec->height;
76 // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock
77 // / get_feedback for encode)
78 pD3D12Dec->base.destroy = d3d12_video_decoder_destroy;
79 pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame;
80 pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream;
81 pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
82 pD3D12Dec->base.flush = d3d12_video_decoder_flush;
83 pD3D12Dec->base.get_decoder_fence = d3d12_video_decoder_get_decoder_fence;
84
85 pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
86 pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
87 pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile);
88
89 ///
90 /// Try initializing D3D12 Video device and check for device caps
91 ///
92
93 struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
94 pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
95
96 ///
97 /// Create decode objects
98 ///
99 HRESULT hr = S_OK;
100 if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
101 IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) {
102 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n");
103 goto failed;
104 }
105
106 if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
107 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
108 "d3d12_video_decoder_check_caps_and_create_decoder\n");
109 goto failed;
110 }
111
112 if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
113 debug_printf(
114 "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n");
115 goto failed;
116 }
117
118 if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
119 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
120 "d3d12_video_decoder_create_video_state_buffers\n");
121 goto failed;
122 }
123
124 pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat };
125 hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO,
126 &pD3D12Dec->m_decodeFormatInfo,
127 sizeof(pD3D12Dec->m_decodeFormatInfo));
128 if (FAILED(hr)) {
129 debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
130 goto failed;
131 }
132
133 return &pD3D12Dec->base;
134
135 failed:
136 if (pD3D12Dec != nullptr) {
137 d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec);
138 }
139
140 return nullptr;
141 }
142
143 /**
144 * Destroys a d3d12_video_decoder
145 * Call destroy_XX for applicable XX nested member types before deallocating
146 * Destroy methods should check != nullptr on their input target argument as this method can be called as part of
147 * cleanup from failure on the creation method
148 */
149 void
d3d12_video_decoder_destroy(struct pipe_video_codec * codec)150 d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
151 {
152 if (codec == nullptr) {
153 return;
154 }
155
156 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
157 // Flush and wait for completion of any in-flight GPU work before destroying objects
158 d3d12_video_decoder_flush(codec);
159 if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) {
160 auto decode_queue_completion_fence = pD3D12Dec->m_inflightResourcesPool[(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_FenceData;
161 d3d12_video_decoder_sync_completion(codec, decode_queue_completion_fence.cmdqueue_fence, decode_queue_completion_fence.value, OS_TIMEOUT_INFINITE);
162 struct pipe_fence_handle *context_queue_completion_fence = NULL;
163 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
164 pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE);
165 pD3D12Dec->m_pD3D12Screen->base.fence_reference(&pD3D12Dec->m_pD3D12Screen->base, &context_queue_completion_fence, NULL);
166 }
167
168 //
169 // Destroys a decoder
170 // Call destroy_XX for applicable XX nested member types before deallocating
171 // Destroy methods should check != nullptr on their input target argument as this method can be called as part of
172 // cleanup from failure on the creation method
173 //
174
175 // No need for d3d12_destroy_video_objects
176 // All the objects created here are smart pointer members of d3d12_video_decoder
177 // No need for d3d12_destroy_video_decoder_and_heap
178 // All the objects created here are smart pointer members of d3d12_video_decoder
179 // No need for d3d12_destroy_video_dpbmanagers
180 // All the objects created here are smart pointer members of d3d12_video_decoder
181
182 // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder
183
184 // Call dtor to make ComPtr work
185 delete pD3D12Dec;
186 }
187
188 /**
189 * start decoding of a new frame
190 */
191 void
d3d12_video_decoder_begin_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)192 d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
193 struct pipe_video_buffer *target,
194 struct pipe_picture_desc *picture)
195 {
196 // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in
197 // d3d12_video_decoder_decode_bitstream
198 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
199 assert(pD3D12Dec);
200
201 ///
202 /// Wait here to make sure the next in flight resource set is empty before using it
203 ///
204 uint64_t fenceValueToWaitOn = static_cast<uint64_t>(
205 std::max(static_cast<int64_t>(0l),
206 static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH)));
207
208 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource "
209 "sets with previous work with fenceValue: %" PRIu64 "\n",
210 fenceValueToWaitOn);
211
212 ASSERTED bool wait_res =
213 d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
214 assert(wait_res);
215
216 HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(
217 pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
218 if (FAILED(hr)) {
219 debug_printf("[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n", hr);
220 assert(false);
221 }
222
223 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
224 pD3D12Dec->m_fenceValue);
225 }
226
227 /**
228 * decode a bitstream
229 */
230 void
d3d12_video_decoder_decode_bitstream(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)231 d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
232 struct pipe_video_buffer *target,
233 struct pipe_picture_desc *picture,
234 unsigned num_buffers,
235 const void *const *buffers,
236 const unsigned *sizes)
237 {
238 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
239 assert(pD3D12Dec);
240 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n",
241 pD3D12Dec->m_fenceValue);
242 assert(pD3D12Dec->m_spD3D12VideoDevice);
243 assert(pD3D12Dec->m_spDecodeCommandQueue);
244 assert(pD3D12Dec->m_pD3D12Screen);
245 ASSERTED struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
246 assert(pD3D12VideoBuffer);
247
248 ///
249 /// Compressed bitstream buffers
250 ///
251
252 /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED
253 /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 ->
254 /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If
255 /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3.
256 /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes
257 /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED
258 // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that:
259 // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data
260 // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate
261 // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all
262 // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed
263 // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start
264 // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at
265 // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we
266 // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does.
267
268 // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the
269 // combined bitstream of all decode_bitstream calls.
270
271 // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the
272 // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode
273 // (optional) , sliceN}
274
275 if (num_buffers > 2) // Assume this means multiple slices at once in a decode_bitstream call
276 {
277 // Based on VA frontend codebase, this never happens for video (no JPEG)
278 // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream
279
280 // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it
281 // was a series of different calls
282
283 // group by start codes and buffers and perform calls for the number of slices
284 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected "
285 "for fenceValue: %d, breaking down the calls into one per slice\n",
286 pD3D12Dec->m_fenceValue);
287
288 size_t curBufferIdx = 0;
289
290 // Vars to be used for the delegation calls to decode_bitstream
291 unsigned call_num_buffers = 0;
292 const void *const *call_buffers = nullptr;
293 const unsigned *call_sizes = nullptr;
294
295 while (curBufferIdx < num_buffers) {
296 // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a
297 // startcode+slicedata or just slicedata call
298 call_buffers = &buffers[curBufferIdx];
299 call_sizes = &sizes[curBufferIdx];
300
301 // Usually start codes are less or equal than 4 bytes
302 // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the
303 // current buffer.
304 call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1;
305
306 // Delegate call with one or two buffers only
307 d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes);
308
309 curBufferIdx += call_num_buffers; // Consume from the loop the buffers sent in the last call
310 }
311 } else {
312 ///
313 /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0].
314 ///
315
316 // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be
317 // handled by flattening all the buffers into a single one and passing that to HW.
318
319 size_t totalReceivedBuffersSize = 0u; // Combined size of all sizes[]
320 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
321 totalReceivedBuffersSize += sizes[bufferIdx];
322 }
323
324 // Bytes of data pre-staged before this decode_frame call
325 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
326 size_t preStagedDataSize = inFlightResources.m_stagingDecodeBitstream.size();
327
328 // Extend the staging buffer size, as decode_frame can be called several times before end_frame
329 inFlightResources.m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
330
331 // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
332 // buffers will be appended
333 uint8_t *newSliceDataPositionDstBase = inFlightResources.m_stagingDecodeBitstream.data() + preStagedDataSize;
334
335 // Append new data at the end.
336 size_t dstOffset = 0u;
337 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
338 memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]);
339 dstOffset += sizes[bufferIdx];
340 }
341
342 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n",
343 pD3D12Dec->m_fenceValue);
344 }
345
346 if (pD3D12Dec->m_d3d12DecProfileType == d3d12_video_decode_profile_type_h264) {
347 struct pipe_h264_picture_desc *h264 = (pipe_h264_picture_desc*) picture;
348 target->interlaced = !h264->pps->sps->frame_mbs_only_flag;
349 }
350 }
351
352 void
d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)353 d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
354 struct pipe_video_buffer *target,
355 struct pipe_picture_desc *picture)
356 {
357 #if D3D12_VIDEO_ANY_DECODER_ENABLED
358 pD3D12Dec->m_pCurrentDecodeTarget = target;
359 switch (pD3D12Dec->m_d3d12DecProfileType) {
360 #if VIDEO_CODEC_H264DEC
361 case d3d12_video_decode_profile_type_h264:
362 {
363 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
364 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref;
365 } break;
366 #endif
367 #if VIDEO_CODEC_H265DEC
368 case d3d12_video_decode_profile_type_hevc:
369 {
370 pipe_h265_picture_desc *pPicControlHevc = (pipe_h265_picture_desc *) picture;
371 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlHevc->ref;
372 } break;
373 #endif
374 #if VIDEO_CODEC_AV1DEC
375 case d3d12_video_decode_profile_type_av1:
376 {
377 pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
378 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlAV1->ref;
379 } break;
380 #endif
381 #if VIDEO_CODEC_VP9DEC
382 case d3d12_video_decode_profile_type_vp9:
383 {
384 pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
385 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlVP9->ref;
386 } break;
387 #endif
388 default:
389 {
390 unreachable("Unsupported d3d12_video_decode_profile_type");
391 } break;
392 }
393 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
394 }
395
396 /**
397 * end decoding of the current frame
398 */
399 void
d3d12_video_decoder_end_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)400 d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
401 struct pipe_video_buffer *target,
402 struct pipe_picture_desc *picture)
403 {
404 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
405 assert(pD3D12Dec);
406 struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
407 assert(pD3D12Screen);
408 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n",
409 pD3D12Dec->m_fenceValue);
410 assert(pD3D12Dec->m_spD3D12VideoDevice);
411 assert(pD3D12Dec->m_spDecodeCommandQueue);
412 struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
413 assert(pD3D12VideoBuffer);
414
415 ///
416 /// Store current decode output target texture and reference textures from upper layer
417 ///
418 d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture);
419
420 ///
421 /// Codec header picture parameters buffers
422 ///
423
424 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
425
426 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
427 assert(inFlightResources.m_picParamsBuffer.size() > 0);
428
429 ///
430 /// Prepare Slice control buffers before clearing staging buffer
431 ///
432 assert(inFlightResources.m_stagingDecodeBitstream.size() >
433 0); // Make sure the staging wasn't cleared yet in end_frame
434 d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
435 assert(inFlightResources.m_SliceControlBuffer.size() > 0);
436
437 ///
438 /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
439 ///
440
441 uint64_t sliceDataStagingBufferSize = inFlightResources.m_stagingDecodeBitstream.size();
442 uint8_t *sliceDataStagingBufferPtr = inFlightResources.m_stagingDecodeBitstream.data();
443
444 // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
445 if (inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
446 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
447 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
448 "d3d12_video_decoder_create_staging_bitstream_buffer\n");
449 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
450 pD3D12Dec->m_fenceValue);
451 assert(false);
452 return;
453 }
454 }
455
456 // Upload frame bitstream CPU data to ID3D12Resource buffer
457 inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize =
458 sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
459 assert(inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize <=
460 inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize);
461
462 /* One-shot transfer operation with data supplied in a user
463 * pointer.
464 */
465 inFlightResources.pPipeCompressedBufferObj =
466 d3d12_resource_from_resource(&pD3D12Screen->base, inFlightResources.m_curFrameCompressedBitstreamBuffer.Get());
467 assert(inFlightResources.pPipeCompressedBufferObj);
468 pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context
469 inFlightResources.pPipeCompressedBufferObj, // dst buffer
470 PIPE_MAP_WRITE, // usage PIPE_MAP_x
471 0, // offset
472 sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size
473 sliceDataStagingBufferPtr // data
474 );
475
476 // Flush buffer_subdata batch
477 // before deleting the source CPU buffer below
478
479 pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
480 &inFlightResources.m_pBitstreamUploadGPUCompletionFence,
481 PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
482 assert(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
483 // To be waited on GPU fence before flushing current frame DecodeFrame to GPU
484
485 ///
486 /// Proceed to record the GPU Decode commands
487 ///
488
489 // Requested conversions by caller upper layer (none for now)
490 d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {};
491
492 ///
493 /// Record DecodeFrame operation and resource state transitions.
494 ///
495
496 // Translate input D3D12 structure
497 D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
498
499 d3d12InputArguments.CompressedBitstream.pBuffer = inFlightResources.m_curFrameCompressedBitstreamBuffer.Get();
500 d3d12InputArguments.CompressedBitstream.Offset = 0u;
501 ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment =
502 128u; // specified in
503 // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
504 assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
505 ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
506 d3d12InputArguments.CompressedBitstream.Size = inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize;
507
508 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
509 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
510 D3D12_RESOURCE_STATE_COMMON,
511 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ),
512 };
513 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
514
515 // Schedule reverse (back to common) transitions before command list closes for current frame
516 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
517 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
518 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
519 D3D12_RESOURCE_STATE_COMMON));
520
521 ///
522 /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for
523 /// display/consumption
524 ///
525 ID3D12Resource *pOutputD3D12Texture;
526 uint outputD3D12Subresource = 0;
527
528 ///
529 /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output
530 /// and to store as future reference in DPB
531 ///
532 ID3D12Resource *pRefOnlyOutputD3D12Texture;
533 uint refOnlyOutputD3D12Subresource = 0;
534
535 if (!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec,
536 target,
537 pD3D12VideoBuffer,
538 &pOutputD3D12Texture, // output
539 &outputD3D12Subresource, // output
540 &pRefOnlyOutputD3D12Texture, // output
541 &refOnlyOutputD3D12Subresource, // output
542 requestedConversionArguments)) {
543 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
544 "d3d12_video_decoder_prepare_for_decode_frame\n");
545 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
546 pD3D12Dec->m_fenceValue);
547 assert(false);
548 return;
549 }
550
551 ///
552 /// Set codec picture parameters CPU buffer
553 ///
554
555 d3d12InputArguments.NumFrameArguments =
556 1u; // Only the codec data received from the above layer with picture params
557 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
558 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
559 static_cast<uint32_t>(inFlightResources.m_picParamsBuffer.size()),
560 inFlightResources.m_picParamsBuffer.data(),
561 };
562
563 if (inFlightResources.m_SliceControlBuffer.size() > 0) {
564 d3d12InputArguments.NumFrameArguments++;
565 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
566 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
567 static_cast<uint32_t>(inFlightResources.m_SliceControlBuffer.size()),
568 inFlightResources.m_SliceControlBuffer.data(),
569 };
570 }
571
572 if (inFlightResources.qp_matrix_frame_argument_enabled &&
573 (inFlightResources.m_InverseQuantMatrixBuffer.size() > 0)) {
574 d3d12InputArguments.NumFrameArguments++;
575 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
576 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
577 static_cast<uint32_t>(inFlightResources.m_InverseQuantMatrixBuffer.size()),
578 inFlightResources.m_InverseQuantMatrixBuffer.data(),
579 };
580 }
581
582 d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames();
583 if (D3D12_DEBUG_VERBOSE & d3d12_debug) {
584 pD3D12Dec->m_spDPBManager->print_dpb();
585 }
586
587 d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get();
588
589 // translate output D3D12 structure
590 D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {};
591 d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture;
592 d3d12OutputArguments.OutputSubresource = outputD3D12Subresource;
593
594 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
595 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
596 if (fReferenceOnly) {
597 d3d12OutputArguments.ConversionArguments.Enable = true;
598
599 assert(pRefOnlyOutputD3D12Texture);
600 d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture;
601 d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource;
602
603 const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D);
604 d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space(
605 !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)),
606 util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/,
607 /* StudioRGB= */ false,
608 /* P709= */ true,
609 /* StudioYUV= */ true);
610
611 const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D);
612 d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space(
613 !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)),
614 util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/,
615 /* StudioRGB= */ false,
616 /* P709= */ true,
617 /* StudioYUV= */ true);
618
619 const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get());
620 d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth;
621 d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight;
622 } else {
623 d3d12OutputArguments.ConversionArguments.Enable = false;
624 }
625
626 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D));
627 uint32_t MipLevel, PlaneSlice, ArraySlice;
628 D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource,
629 outputDesc.MipLevels,
630 outputDesc.ArraySize(),
631 MipLevel,
632 ArraySlice,
633 PlaneSlice);
634
635 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
636 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
637
638 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
639 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
640 D3D12_RESOURCE_STATE_COMMON,
641 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
642 planeOutputSubresource),
643 };
644 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
645 }
646
647 // Schedule reverse (back to common) transitions before command list closes for current frame
648 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
649 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
650 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
651 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
652 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
653 D3D12_RESOURCE_STATE_COMMON,
654 planeOutputSubresource));
655 }
656
657 // Record DecodeFrame
658
659 pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(),
660 &d3d12OutputArguments,
661 &d3d12InputArguments);
662
663 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
664 pD3D12Dec->m_fenceValue);
665
666 // Save extra references of Decoder, DecoderHeap and DPB allocations in case
667 // there's a reconfiguration that trigers the construction of new objects
668 inFlightResources.m_spDecoder = pD3D12Dec->m_spVideoDecoder;
669 inFlightResources.m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap;
670 inFlightResources.m_References = pD3D12Dec->m_spDPBManager;
671
672 ///
673 /// Flush work to the GPU
674 ///
675 pD3D12Dec->m_needsGPUFlush = true;
676 d3d12_video_decoder_flush(codec);
677 // Call to d3d12_video_decoder_flush increases m_FenceValue
678 uint64_t inflightIndexBeforeFlush = (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH;
679
680 if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
681 // No need to copy, the output surface fence is merely the decode queue fence
682 *picture->fence = (pipe_fence_handle *) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
683 } else {
684 ///
685 /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
686 /// We cannot use the standalone video buffer allocation directly and we must use instead
687 /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same
688 /// allocation
689 /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes
690 ///
691
692 // Get destination resource
693 struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target);
694
695 // Get source pipe_resource
696 pipe_resource *pPipeSrc =
697 d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
698 assert(pPipeSrc);
699
700 // GPU wait on the graphics context which will do the copy until the decode finishes
701 pD3D12Screen->cmdqueue->Wait(
702 pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
703 pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value);
704
705 // Copy all format subresources/texture planes
706 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
707 assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
708 struct pipe_box box = { 0,
709 0,
710 // src array slice, taken as Z for TEXTURE_2D_ARRAY
711 static_cast<int16_t>(d3d12OutputArguments.OutputSubresource),
712 static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0),
713 static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0),
714 1 };
715
716 pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context,
717 pPipeDstViews[PlaneSlice]->texture, // dst
718 0, // dst level
719 0, // dstX
720 0, // dstY
721 0, // dstZ
722 (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next, // src
723 0, // src level
724 &box);
725 }
726 // Flush resource_copy_region batch
727 // The output surface fence is the graphics queue that will signal after the copy ends
728 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, picture->fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
729 }
730 }
731
732 /**
733 * Get decoder fence.
734 */
735 int
d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec * codec,struct pipe_fence_handle * fence,uint64_t timeout)736 d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence, uint64_t timeout)
737 {
738 struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
739 assert(fenceValueToWaitOn);
740
741 ASSERTED bool wait_res =
742 d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->cmdqueue_fence, fenceValueToWaitOn->value, timeout);
743
744 // Return semantics based on p_video_codec interface
745 // ret == 0 -> Decode in progress
746 // ret != 0 -> Decode completed
747 return wait_res ? 1 : 0;
748 }
749
750 /**
751 * flush any outstanding command buffers to the hardware
752 * should be called before a video_buffer is acessed by the gallium frontend again
753 */
754 void
d3d12_video_decoder_flush(struct pipe_video_codec * codec)755 d3d12_video_decoder_flush(struct pipe_video_codec *codec)
756 {
757 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
758 assert(pD3D12Dec);
759 assert(pD3D12Dec->m_spD3D12VideoDevice);
760 assert(pD3D12Dec->m_spDecodeCommandQueue);
761 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on "
762 "fenceValue: %d\n",
763 pD3D12Dec->m_fenceValue);
764
765 if (!pD3D12Dec->m_needsGPUFlush) {
766 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n");
767 } else {
768 HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
769 if (hr != S_OK) {
770 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
771 " - D3D12Device was removed BEFORE commandlist "
772 "execution with HR %x.\n",
773 hr);
774 goto flush_fail;
775 }
776
777 if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
778 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
779 pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
780 pD3D12Dec->m_transitionsBeforeCloseCmdList.clear();
781 }
782
783 hr = pD3D12Dec->m_spDecodeCommandList->Close();
784 if (FAILED(hr)) {
785 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr);
786 goto flush_fail;
787 }
788
789 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
790 ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
791 struct d3d12_fence *pUploadBitstreamFence = d3d12_fence(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
792 pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value);
793 pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
794 pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
795
796 // Validate device was not removed
797 hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
798 if (hr != S_OK) {
799 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
800 " - D3D12Device was removed AFTER commandlist "
801 "execution with HR %x, but wasn't before.\n",
802 hr);
803 goto flush_fail;
804 }
805
806 // Set async fence info
807 memset(&inFlightResources.m_FenceData, 0, sizeof(inFlightResources.m_FenceData));
808
809 inFlightResources.m_FenceData.value = pD3D12Dec->m_fenceValue;
810 inFlightResources.m_FenceData.cmdqueue_fence = pD3D12Dec->m_spFence.Get();
811
812 pD3D12Dec->m_fenceValue++;
813 pD3D12Dec->m_needsGPUFlush = false;
814 }
815 return;
816
817 flush_fail:
818 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue);
819 assert(false);
820 }
821
822 bool
d3d12_video_decoder_create_command_objects(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)823 d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen,
824 struct d3d12_video_decoder *pD3D12Dec)
825 {
826 assert(pD3D12Dec->m_spD3D12VideoDevice);
827
828 D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE };
829 HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc,
830 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf()));
831 if (FAILED(hr)) {
832 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
833 "failed with HR %x\n",
834 hr);
835 return false;
836 }
837
838 hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&pD3D12Dec->m_spFence));
839 if (FAILED(hr)) {
840 debug_printf(
841 "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n",
842 hr);
843 return false;
844 }
845
846 for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) {
847 hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
848 D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
849 IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf()));
850 if (FAILED(hr)) {
851 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
852 "CreateCommandAllocator failed with HR %x\n",
853 hr);
854 return false;
855 }
856 }
857
858 ComPtr<ID3D12Device4> spD3D12Device4;
859 if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
860 debug_printf(
861 "[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n");
862 return false;
863 }
864
865 hr = spD3D12Device4->CreateCommandList1(0,
866 D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
867 D3D12_COMMAND_LIST_FLAG_NONE,
868 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
869
870 if (FAILED(hr)) {
871 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
872 "failed with HR %x\n",
873 hr);
874 return false;
875 }
876
877 return true;
878 }
879
880 bool
d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)881 d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen,
882 struct d3d12_video_decoder *pD3D12Dec)
883 {
884 assert(pD3D12Dec->m_spD3D12VideoDevice);
885
886 pD3D12Dec->m_decoderDesc = {};
887
888 D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile,
889 D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE,
890 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE };
891
892 D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {};
893 decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex;
894 decodeSupport.Configuration = decodeConfiguration;
895 decodeSupport.Width = pD3D12Dec->base.width;
896 decodeSupport.Height = pD3D12Dec->base.height;
897 decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat;
898 // no info from above layer on framerate/bitrate
899 decodeSupport.FrameRate.Numerator = 0;
900 decodeSupport.FrameRate.Denominator = 0;
901 decodeSupport.BitRate = 0;
902
903 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
904 &decodeSupport,
905 sizeof(decodeSupport));
906 if (FAILED(hr)) {
907 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport "
908 "failed with HR %x\n",
909 hr);
910 return false;
911 }
912
913 if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) {
914 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - "
915 "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n");
916 return false;
917 }
918
919 pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags;
920 pD3D12Dec->m_tier = decodeSupport.DecodeTier;
921
922 if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) {
923 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures;
924 }
925
926 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) {
927 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height;
928 }
929
930 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
931 pD3D12Dec->m_ConfigDecoderSpecificFlags |=
932 d3d12_video_decode_config_specific_flag_reference_only_textures_required;
933 }
934
935 pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask;
936 pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration;
937
938 hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc,
939 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
940 if (FAILED(hr)) {
941 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder "
942 "failed with HR %x\n",
943 hr);
944 return false;
945 }
946
947 return true;
948 }
949
950 bool
d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)951 d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen,
952 struct d3d12_video_decoder *pD3D12Dec)
953 {
954 assert(pD3D12Dec->m_spD3D12VideoDevice);
955 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen,
956 pD3D12Dec,
957 pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) {
958 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on "
959 "d3d12_video_decoder_create_staging_bitstream_buffer\n");
960 return false;
961 }
962
963 return true;
964 }
965
966 bool
d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec,uint64_t bufSize)967 d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen,
968 struct d3d12_video_decoder *pD3D12Dec,
969 uint64_t bufSize)
970 {
971 assert(pD3D12Dec->m_spD3D12VideoDevice);
972 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
973 if (inFlightResources.m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
974 inFlightResources.m_curFrameCompressedBitstreamBuffer.Reset();
975 }
976
977 auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
978 auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize);
979 HRESULT hr = pD3D12Screen->dev->CreateCommittedResource(
980 &descHeap,
981 D3D12_HEAP_FLAG_NONE,
982 &descResource,
983 D3D12_RESOURCE_STATE_COMMON,
984 nullptr,
985 IID_PPV_ARGS(inFlightResources.m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
986 if (FAILED(hr)) {
987 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
988 "CreateCommittedResource failed with HR %x\n",
989 hr);
990 return false;
991 }
992
993 inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
994 return true;
995 }
996
997 bool
d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * pCurrentDecodeTarget,struct d3d12_video_buffer * pD3D12VideoBuffer,ID3D12Resource ** ppOutTexture2D,uint32_t * pOutSubresourceIndex,ID3D12Resource ** ppRefOnlyOutTexture2D,uint32_t * pRefOnlyOutSubresourceIndex,const d3d12_video_decode_output_conversion_arguments & conversionArgs)998 d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
999 struct pipe_video_buffer *pCurrentDecodeTarget,
1000 struct d3d12_video_buffer *pD3D12VideoBuffer,
1001 ID3D12Resource **ppOutTexture2D,
1002 uint32_t *pOutSubresourceIndex,
1003 ID3D12Resource **ppRefOnlyOutTexture2D,
1004 uint32_t *pRefOnlyOutSubresourceIndex,
1005 const d3d12_video_decode_output_conversion_arguments &conversionArgs)
1006 {
1007 if (!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) {
1008 debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n");
1009 return false;
1010 }
1011
1012 // Refresh DPB active references for current frame, release memory for unused references.
1013 d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec);
1014
1015 // Get the output texture for the current frame to be decoded
1016 pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget,
1017 ppOutTexture2D,
1018 pOutSubresourceIndex);
1019
1020 auto vidBuffer = (struct d3d12_video_buffer *) (pCurrentDecodeTarget);
1021 // If is_pipe_buffer_underlying_output_decode_allocation is enabled,
1022 // we can just use the underlying allocation in pCurrentDecodeTarget
1023 // and avoid an extra copy after decoding the frame.
1024 // If this is the case, we need to handle the residency of this resource
1025 // (if not we're actually creating the resources with CreateCommitedResource with
1026 // residency by default)
1027 if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
1028 assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D);
1029 // Make it permanently resident for video use
1030 d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture);
1031 }
1032
1033 // Get the reference only texture for the current frame to be decoded (if applicable)
1034 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
1035 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
1036 if (fReferenceOnly) {
1037 bool needsTransitionToDecodeWrite = false;
1038 pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget,
1039 ppRefOnlyOutTexture2D,
1040 pRefOnlyOutSubresourceIndex,
1041 needsTransitionToDecodeWrite);
1042 assert(needsTransitionToDecodeWrite);
1043
1044 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D));
1045 uint32_t MipLevel, PlaneSlice, ArraySlice;
1046 D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex,
1047 outputDesc.MipLevels,
1048 outputDesc.ArraySize(),
1049 MipLevel,
1050 ArraySlice,
1051 PlaneSlice);
1052
1053 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1054 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1055
1056 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
1057 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1058 D3D12_RESOURCE_STATE_COMMON,
1059 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1060 planeOutputSubresource),
1061 };
1062 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
1063 }
1064
1065 // Schedule reverse (back to common) transitions before command list closes for current frame
1066 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1067 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1068 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
1069 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1070 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1071 D3D12_RESOURCE_STATE_COMMON,
1072 planeOutputSubresource));
1073 }
1074 }
1075
1076 // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame
1077 // otherwise, use the standard output resource
1078 [[maybe_unused]] ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D;
1079 [[maybe_unused]] uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex;
1080 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1081 switch (pD3D12Dec->m_d3d12DecProfileType) {
1082 #if VIDEO_CODEC_H264DEC
1083 case d3d12_video_decode_profile_type_h264:
1084 {
1085 d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec,
1086 pCurrentFrameDPBEntry,
1087 currentFrameDPBEntrySubresource);
1088 } break;
1089 #endif
1090 #if VIDEO_CODEC_H265DEC
1091 case d3d12_video_decode_profile_type_hevc:
1092 {
1093 d3d12_video_decoder_prepare_current_frame_references_hevc(pD3D12Dec,
1094 pCurrentFrameDPBEntry,
1095 currentFrameDPBEntrySubresource);
1096 } break;
1097 #endif
1098 #if VIDEO_CODEC_AV1DEC
1099 case d3d12_video_decode_profile_type_av1:
1100 {
1101 d3d12_video_decoder_prepare_current_frame_references_av1(pD3D12Dec,
1102 pCurrentFrameDPBEntry,
1103 currentFrameDPBEntrySubresource);
1104 } break;
1105 #endif
1106 #if VIDEO_CODEC_VP9DEC
1107 case d3d12_video_decode_profile_type_vp9:
1108 {
1109 d3d12_video_decoder_prepare_current_frame_references_vp9(pD3D12Dec,
1110 pCurrentFrameDPBEntry,
1111 currentFrameDPBEntrySubresource);
1112 } break;
1113 #endif
1114 default:
1115 {
1116 unreachable("Unsupported d3d12_video_decode_profile_type");
1117 } break;
1118 }
1119 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1120 return true;
1121 }
1122
1123 bool
d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder * pD3D12Dec,struct d3d12_video_buffer * pD3D12VideoBuffer,const d3d12_video_decode_output_conversion_arguments & conversionArguments)1124 d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec,
1125 struct d3d12_video_buffer *pD3D12VideoBuffer,
1126 const d3d12_video_decode_output_conversion_arguments &conversionArguments)
1127 {
1128 uint32_t width;
1129 uint32_t height;
1130 uint16_t maxDPB;
1131 d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB);
1132
1133 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1134 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1135
1136 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested =
1137 pD3D12VideoBuffer->base.interlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;
1138 if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) ||
1139 (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) {
1140 // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder.
1141 D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc;
1142 decoderDesc.Configuration.InterlaceType = interlaceTypeRequested;
1143 decoderDesc.Configuration.DecodeProfile =
1144 d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType, pD3D12Dec->m_decodeFormat);
1145 pD3D12Dec->m_spVideoDecoder.Reset();
1146 HRESULT hr =
1147 pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc,
1148 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
1149 if (FAILED(hr)) {
1150 debug_printf(
1151 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n",
1152 hr);
1153 return false;
1154 }
1155 // Update state after CreateVideoDecoder succeeds only.
1156 pD3D12Dec->m_decoderDesc = decoderDesc;
1157 }
1158
1159 if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap ||
1160 pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width ||
1161 pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height ||
1162 pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) {
1163 // Detect the combination of AOT/ReferenceOnly to configure the DPB manager
1164 uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount +
1165 1 /*extra slot for current picture*/ :
1166 maxDPB;
1167 d3d12_video_decode_dpb_descriptor dpbDesc = {};
1168 dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width;
1169 dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height;
1170 dpbDesc.Format =
1171 (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format;
1172 dpbDesc.fArrayOfTexture =
1173 ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0);
1174 dpbDesc.dpbSize = referenceCount;
1175 dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask;
1176 dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags &
1177 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0);
1178
1179 // Create DPB manager
1180 if (pD3D12Dec->m_spDPBManager == nullptr) {
1181 pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen,
1182 pD3D12Dec->m_NodeMask,
1183 pD3D12Dec->m_d3d12DecProfileType,
1184 dpbDesc));
1185 }
1186
1187 //
1188 // (Re)-create decoder heap
1189 //
1190 D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {};
1191 decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask;
1192 decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration;
1193 decoderHeapDesc.DecodeWidth = dpbDesc.Width;
1194 decoderHeapDesc.DecodeHeight = dpbDesc.Height;
1195 decoderHeapDesc.Format = dpbDesc.Format;
1196 decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB;
1197 pD3D12Dec->m_spVideoDecoderHeap.Reset();
1198 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap(
1199 &decoderHeapDesc,
1200 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf()));
1201 if (FAILED(hr)) {
1202 debug_printf(
1203 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n",
1204 hr);
1205 return false;
1206 }
1207 // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only.
1208 pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc;
1209 }
1210
1211 pD3D12Dec->m_decodeFormat = outputResourceDesc.Format;
1212
1213 return true;
1214 }
1215
1216 void
d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder * pD3D12Dec)1217 d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec)
1218 {
1219 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1220 switch (pD3D12Dec->m_d3d12DecProfileType) {
1221 #if VIDEO_CODEC_H264DEC
1222 case d3d12_video_decode_profile_type_h264:
1223 {
1224 d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec);
1225 } break;
1226 #endif
1227 #if VIDEO_CODEC_H265DEC
1228 case d3d12_video_decode_profile_type_hevc:
1229 {
1230 d3d12_video_decoder_refresh_dpb_active_references_hevc(pD3D12Dec);
1231 } break;
1232 #endif
1233 #if VIDEO_CODEC_AV1DEC
1234 case d3d12_video_decode_profile_type_av1:
1235 {
1236 d3d12_video_decoder_refresh_dpb_active_references_av1(pD3D12Dec);
1237 } break;
1238 #endif
1239 #if VIDEO_CODEC_VP9DEC
1240 case d3d12_video_decode_profile_type_vp9:
1241 {
1242 d3d12_video_decoder_refresh_dpb_active_references_vp9(pD3D12Dec);
1243 } break;
1244 #endif
1245 default:
1246 {
1247 unreachable("Unsupported d3d12_video_decode_profile_type");
1248 } break;
1249 }
1250 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1251 }
1252
1253 void
d3d12_video_decoder_get_frame_info(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)1254 d3d12_video_decoder_get_frame_info(
1255 struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
1256 {
1257 *pWidth = 0;
1258 *pHeight = 0;
1259 *pMaxDPB = 0;
1260
1261 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1262 switch (pD3D12Dec->m_d3d12DecProfileType) {
1263 #if VIDEO_CODEC_H264DEC
1264 case d3d12_video_decode_profile_type_h264:
1265 {
1266 d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1267 } break;
1268 #endif
1269 #if VIDEO_CODEC_H265DEC
1270 case d3d12_video_decode_profile_type_hevc:
1271 {
1272 d3d12_video_decoder_get_frame_info_hevc(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1273 } break;
1274 #endif
1275 #if VIDEO_CODEC_AV1DEC
1276 case d3d12_video_decode_profile_type_av1:
1277 {
1278 d3d12_video_decoder_get_frame_info_av1(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1279 } break;
1280 #endif
1281 #if VIDEO_CODEC_VP9DEC
1282 case d3d12_video_decode_profile_type_vp9:
1283 {
1284 d3d12_video_decoder_get_frame_info_vp9(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1285 } break;
1286 #endif
1287 default:
1288 {
1289 unreachable("Unsupported d3d12_video_decode_profile_type");
1290 } break;
1291 }
1292 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1293
1294 if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) {
1295 const uint32_t AlignmentMask = 31;
1296 *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask;
1297 }
1298 }
1299
1300 void
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder * codec,struct pipe_picture_desc * picture,struct d3d12_video_buffer * pD3D12VideoBuffer)1301 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
1302 struct d3d12_video_decoder *codec, // input argument, current decoder
1303 struct pipe_picture_desc
1304 *picture, // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name
1305 struct d3d12_video_buffer *pD3D12VideoBuffer // input argument, target video buffer
1306 )
1307 {
1308 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1309 assert(picture);
1310 assert(codec);
1311 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1312
1313 d3d12_video_decode_profile_type profileType =
1314 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
1315 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1316 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1317 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1318 inFlightResources.qp_matrix_frame_argument_enabled = false;
1319 switch (profileType) {
1320 #if VIDEO_CODEC_H264DEC
1321 case d3d12_video_decode_profile_type_h264:
1322 {
1323 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264);
1324 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
1325 DXVA_PicParams_H264 dxvaPicParamsH264 =
1326 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue,
1327 codec->base.profile,
1328 outputResourceDesc.Width,
1329 outputResourceDesc.Height,
1330 pPicControlH264);
1331
1332 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1333 &dxvaPicParamsH264,
1334 dxvaPicParamsBufferSize);
1335
1336 size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264);
1337 DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
1338 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, dxvaQmatrixH264);
1339 inFlightResources.qp_matrix_frame_argument_enabled =
1340 true; // We don't have a way of knowing from the pipe params so send always
1341 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
1342 } break;
1343 #endif
1344 #if VIDEO_CODEC_H265DEC
1345 case d3d12_video_decode_profile_type_hevc:
1346 {
1347 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_HEVC);
1348 pipe_h265_picture_desc *pPicControlHEVC = (pipe_h265_picture_desc *) picture;
1349 DXVA_PicParams_HEVC dxvaPicParamsHEVC =
1350 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(pD3D12Dec, codec->base.profile, pPicControlHEVC);
1351
1352 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1353 &dxvaPicParamsHEVC,
1354 dxvaPicParamsBufferSize);
1355
1356 size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC);
1357 DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {};
1358 inFlightResources.qp_matrix_frame_argument_enabled = false;
1359 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture,
1360 dxvaQmatrixHEVC,
1361 inFlightResources.qp_matrix_frame_argument_enabled);
1362 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize);
1363 } break;
1364 #endif
1365 #if VIDEO_CODEC_AV1DEC
1366 case d3d12_video_decode_profile_type_av1:
1367 {
1368 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_AV1);
1369 pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
1370 DXVA_PicParams_AV1 dxvaPicParamsAV1 =
1371 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_av1(pD3D12Dec->m_fenceValue,
1372 codec->base.profile,
1373 pPicControlAV1);
1374
1375 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsAV1, dxvaPicParamsBufferSize);
1376 inFlightResources.qp_matrix_frame_argument_enabled = false;
1377 } break;
1378 #endif
1379 #if VIDEO_CODEC_VP9DEC
1380 case d3d12_video_decode_profile_type_vp9:
1381 {
1382 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_VP9);
1383 pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
1384 DXVA_PicParams_VP9 dxvaPicParamsVP9 =
1385 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(pD3D12Dec, codec->base.profile, pPicControlVP9);
1386
1387 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsVP9, dxvaPicParamsBufferSize);
1388 inFlightResources.qp_matrix_frame_argument_enabled = false;
1389 } break;
1390 #endif
1391 default:
1392 {
1393 unreachable("Unsupported d3d12_video_decode_profile_type");
1394 } break;
1395 }
1396 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1397 }
1398
1399 void
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder * pD3D12Dec,struct pipe_picture_desc * picture)1400 d3d12_video_decoder_prepare_dxva_slices_control(
1401 struct d3d12_video_decoder *pD3D12Dec, // input argument, current decoder
1402 struct pipe_picture_desc *picture)
1403 {
1404 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1405 [[maybe_unused]] auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1406 d3d12_video_decode_profile_type profileType =
1407 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile);
1408 switch (profileType) {
1409 #if VIDEO_CODEC_H264DEC
1410 case d3d12_video_decode_profile_type_h264:
1411 {
1412 d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec,
1413 inFlightResources.m_SliceControlBuffer,
1414 (struct pipe_h264_picture_desc *) picture);
1415 } break;
1416 #endif
1417 #if VIDEO_CODEC_H265DEC
1418 case d3d12_video_decode_profile_type_hevc:
1419 {
1420 d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec,
1421 inFlightResources.m_SliceControlBuffer,
1422 (struct pipe_h265_picture_desc *) picture);
1423 } break;
1424 #endif
1425 #if VIDEO_CODEC_AV1DEC
1426 case d3d12_video_decode_profile_type_av1:
1427 {
1428 d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec,
1429 inFlightResources.m_SliceControlBuffer,
1430 (struct pipe_av1_picture_desc *) picture);
1431 } break;
1432 #endif
1433 #if VIDEO_CODEC_VP9DEC
1434 case d3d12_video_decode_profile_type_vp9:
1435 {
1436 d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec,
1437 inFlightResources.m_SliceControlBuffer,
1438 (struct pipe_vp9_picture_desc *) picture);
1439 } break;
1440 #endif
1441 default:
1442 {
1443 unreachable("Unsupported d3d12_video_decode_profile_type");
1444 } break;
1445 }
1446 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1447 }
1448
1449 void
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1450 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
1451 void *pDXVAStruct,
1452 uint64_t DXVAStructSize)
1453 {
1454 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1455 if (inFlightResources.m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
1456 inFlightResources.m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
1457 }
1458
1459 inFlightResources.m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
1460 memcpy(inFlightResources.m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
1461 }
1462
1463 void
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1464 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec,
1465 void *pDXVAStruct,
1466 uint64_t DXVAStructSize)
1467 {
1468 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1469 if (inFlightResources.m_picParamsBuffer.capacity() < DXVAStructSize) {
1470 inFlightResources.m_picParamsBuffer.reserve(DXVAStructSize);
1471 }
1472
1473 inFlightResources.m_picParamsBuffer.resize(DXVAStructSize);
1474 memcpy(inFlightResources.m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
1475 }
1476
1477 bool
d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,d3d12_video_decode_profile_type profileType)1478 d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
1479 d3d12_video_decode_profile_type profileType)
1480 {
1481 bool supportedProfile = false;
1482 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1483 switch (profileType) {
1484 #if VIDEO_CODEC_H264DEC
1485 case d3d12_video_decode_profile_type_h264:
1486 {
1487 supportedProfile = true;
1488 } break;
1489 #endif
1490 #if VIDEO_CODEC_H265DEC
1491 case d3d12_video_decode_profile_type_hevc:
1492 {
1493 supportedProfile = true;
1494 } break;
1495 #endif
1496 #if VIDEO_CODEC_AV1DEC
1497 case d3d12_video_decode_profile_type_av1:
1498 {
1499 supportedProfile = true;
1500 } break;
1501 #endif
1502 #if VIDEO_CODEC_VP9DEC
1503 case d3d12_video_decode_profile_type_vp9:
1504 {
1505 supportedProfile = true;
1506 } break;
1507 #endif
1508 default:
1509 supportedProfile = false;
1510 break;
1511 }
1512 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1513
1514 return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile;
1515 }
1516
1517 d3d12_video_decode_profile_type
d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)1518 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)
1519 {
1520 switch (profile) {
1521 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1522 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1523 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1524 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1525 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1526 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1527 return d3d12_video_decode_profile_type_h264;
1528 case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1529 case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1530 return d3d12_video_decode_profile_type_hevc;
1531 case PIPE_VIDEO_PROFILE_AV1_MAIN:
1532 return d3d12_video_decode_profile_type_av1;
1533 case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1534 case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1535 return d3d12_video_decode_profile_type_vp9;
1536 default:
1537 {
1538 unreachable("Unsupported pipe video profile");
1539 } break;
1540 }
1541 }
1542
1543 GUID
d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)1544 d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)
1545 {
1546 switch (profile) {
1547 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1548 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1549 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1550 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1551 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1552 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1553 return D3D12_VIDEO_DECODE_PROFILE_H264;
1554 case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1555 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1556 case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1557 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1558 case PIPE_VIDEO_PROFILE_AV1_MAIN:
1559 return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1560 case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1561 return D3D12_VIDEO_DECODE_PROFILE_VP9;
1562 case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1563 return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1564 default:
1565 return {};
1566 }
1567 }
1568
1569 GUID
d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,DXGI_FORMAT decode_format)1570 d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType, DXGI_FORMAT decode_format)
1571 {
1572 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1573 switch (profileType) {
1574 #if VIDEO_CODEC_H264DEC
1575 case d3d12_video_decode_profile_type_h264:
1576 return D3D12_VIDEO_DECODE_PROFILE_H264;
1577 #endif
1578 #if VIDEO_CODEC_H265DEC
1579 case d3d12_video_decode_profile_type_hevc:
1580 {
1581 switch (decode_format) {
1582 case DXGI_FORMAT_NV12:
1583 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1584 case DXGI_FORMAT_P010:
1585 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1586 default:
1587 {
1588 unreachable("Unsupported decode_format");
1589 } break;
1590 }
1591 } break;
1592 #endif
1593 #if VIDEO_CODEC_AV1DEC
1594 case d3d12_video_decode_profile_type_av1:
1595 return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1596 break;
1597 #endif
1598 #if VIDEO_CODEC_VP9DEC
1599 case d3d12_video_decode_profile_type_vp9:
1600 {
1601 switch (decode_format) {
1602 case DXGI_FORMAT_NV12:
1603 return D3D12_VIDEO_DECODE_PROFILE_VP9;
1604 case DXGI_FORMAT_P010:
1605 return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1606 default:
1607 {
1608 unreachable("Unsupported decode_format");
1609 } break;
1610 }
1611 } break;
1612 #endif
1613 default:
1614 {
1615 unreachable("Unsupported d3d12_video_decode_profile_type");
1616 } break;
1617 }
1618 #else
1619 return {};
1620 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1621 }
1622
1623 bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1624 d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec,
1625 ID3D12Fence *fence,
1626 uint64_t fenceValueToWaitOn,
1627 uint64_t timeout_ns)
1628 {
1629 bool wait_result = true;
1630 HRESULT hr = S_OK;
1631 uint64_t completedValue = fence->GetCompletedValue();
1632
1633 debug_printf(
1634 "[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
1635 ") to finish with "
1636 "fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
1637 timeout_ns,
1638 fenceValueToWaitOn,
1639 completedValue);
1640
1641 if (completedValue < fenceValueToWaitOn) {
1642
1643 HANDLE event = {};
1644 int event_fd = 0;
1645 event = d3d12_fence_create_event(&event_fd);
1646
1647 hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event);
1648 if (FAILED(hr)) {
1649 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for "
1650 "fenceValue %" PRIu64 " failed with HR %x\n",
1651 fenceValueToWaitOn,
1652 hr);
1653 return false;
1654 }
1655
1656 wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
1657 d3d12_fence_close_event(event, event_fd);
1658
1659 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
1660 "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1661 fenceValueToWaitOn,
1662 completedValue);
1663 } else {
1664 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
1665 "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1666 fenceValueToWaitOn,
1667 completedValue);
1668 }
1669 return wait_result;
1670 }
1671
1672 bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1673 d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
1674 ID3D12Fence *fence,
1675 uint64_t fenceValueToWaitOn,
1676 uint64_t timeout_ns)
1677 {
1678 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1679 assert(pD3D12Dec);
1680 assert(pD3D12Dec->m_spD3D12VideoDevice);
1681 assert(pD3D12Dec->m_spDecodeCommandQueue);
1682 HRESULT hr = S_OK;
1683
1684 ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns);
1685 assert(wait_result);
1686
1687 // Release references granted on end_frame for this inflight operations
1688 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
1689 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
1690 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
1691 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(
1692 0);
1693 pipe_resource_reference(
1694 &pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj,
1695 NULL);
1696
1697 struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
1698 assert(pD3D12Screen);
1699
1700 pD3D12Screen->base.fence_reference(
1701 &pD3D12Screen->base,
1702 &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]
1703 .m_pBitstreamUploadGPUCompletionFence,
1704 NULL);
1705
1706 hr =
1707 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
1708 if (FAILED(hr)) {
1709 debug_printf("failed with %x.\n", hr);
1710 goto sync_with_token_fail;
1711 }
1712
1713 // Validate device was not removed
1714 hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
1715 if (hr != S_OK) {
1716 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion"
1717 " - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished "
1718 "execution with HR %x, but wasn't before.\n",
1719 hr);
1720 goto sync_with_token_fail;
1721 }
1722
1723 debug_printf(
1724 "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
1725 "\n",
1726 fenceValueToWaitOn);
1727
1728 return wait_result;
1729
1730 sync_with_token_fail:
1731 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n",
1732 fenceValueToWaitOn);
1733 assert(false);
1734 return false;
1735 }