1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdexcept>
27
28 #include <unknwn.h>
29 #include <directx/d3d12.h>
30 #include <dxgi1_4.h>
31 #include <gtest/gtest.h>
32 #include <wrl.h>
33 #include <dxguids/dxguids.h>
34
35 #include "util/u_debug.h"
36 #include "clc_compiler.h"
37 #include "compute_test.h"
38 #include "dxil_validator.h"
39
40 #include <spirv-tools/libspirv.hpp>
41
42 #if (defined(_WIN32) && defined(_MSC_VER)) || D3D12_SDK_VERSION < 606
43 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)44 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
45 {
46 return heap->GetCPUDescriptorHandleForHeapStart();
47 }
48 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)49 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
50 {
51 return heap->GetGPUDescriptorHandleForHeapStart();
52 }
53 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)54 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
55 {
56 return dev->GetCustomHeapProperties(0, type);
57 }
58 #else
59 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)60 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
61 {
62 D3D12_CPU_DESCRIPTOR_HANDLE ret;
63 heap->GetCPUDescriptorHandleForHeapStart(&ret);
64 return ret;
65 }
66 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)67 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
68 {
69 D3D12_GPU_DESCRIPTOR_HANDLE ret;
70 heap->GetGPUDescriptorHandleForHeapStart(&ret);
71 return ret;
72 }
73 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)74 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
75 {
76 D3D12_HEAP_PROPERTIES ret;
77 dev->GetCustomHeapProperties(&ret, 0, type);
78 return ret;
79 }
80 #endif
81
82 using std::runtime_error;
83 using Microsoft::WRL::ComPtr;
84
85 enum compute_test_debug_flags {
86 COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
87 COMPUTE_DEBUG_USE_HW_D3D = 1 << 1,
88 COMPUTE_DEBUG_OPTIMIZE_LIBCLC = 1 << 2,
89 COMPUTE_DEBUG_SERIALIZE_LIBCLC = 1 << 3,
90 };
91
92 static const struct debug_named_value compute_debug_options[] = {
93 { "experimental_shaders", COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
94 { "use_hw_d3d", COMPUTE_DEBUG_USE_HW_D3D, "Use a hardware D3D device" },
95 { "optimize_libclc", COMPUTE_DEBUG_OPTIMIZE_LIBCLC, "Optimize the clc_libclc before using it" },
96 { "serialize_libclc", COMPUTE_DEBUG_SERIALIZE_LIBCLC, "Serialize and deserialize the clc_libclc" },
97 DEBUG_NAMED_VALUE_END
98 };
99
100 DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", compute_debug_options, 0)
101
warning_callback(void * priv,const char * msg)102 static void warning_callback(void *priv, const char *msg)
103 {
104 fprintf(stderr, "WARNING: %s\n", msg);
105 }
106
error_callback(void * priv,const char * msg)107 static void error_callback(void *priv, const char *msg)
108 {
109 fprintf(stderr, "ERROR: %s\n", msg);
110 }
111
112 static const struct clc_logger logger = {
113 NULL,
114 error_callback,
115 warning_callback,
116 };
117
118 void
enable_d3d12_debug_layer()119 ComputeTest::enable_d3d12_debug_layer()
120 {
121 HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
122 if (!hD3D12Mod) {
123 fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
124 return;
125 }
126
127 typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
128 void **ppFactory);
129 PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
130 if (!D3D12GetDebugInterface) {
131 fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
132 return;
133 }
134
135 ID3D12Debug *debug;
136 if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
137 fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
138 return;
139 }
140
141 debug->EnableDebugLayer();
142 }
143
144 IDXGIFactory4 *
get_dxgi_factory()145 ComputeTest::get_dxgi_factory()
146 {
147 static const GUID IID_IDXGIFactory4 = {
148 0x1bc6ea02, 0xef36, 0x464f,
149 { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
150 };
151
152 typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
153 void **ppFactory);
154 PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
155
156 HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
157 if (!hDXGIMod)
158 throw runtime_error("Failed to load DXGI.DLL");
159
160 CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
161 if (!CreateDXGIFactory)
162 throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
163
164 IDXGIFactory4 *factory = NULL;
165 HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
166 if (FAILED(hr))
167 throw runtime_error("CreateDXGIFactory failed");
168
169 return factory;
170 }
171
172 IDXGIAdapter1 *
choose_adapter(IDXGIFactory4 * factory)173 ComputeTest::choose_adapter(IDXGIFactory4 *factory)
174 {
175 IDXGIAdapter1 *ret;
176
177 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
178 for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
179 DXGI_ADAPTER_DESC1 desc;
180 ret->GetDesc1(&desc);
181 if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
182 return ret;
183 }
184 throw runtime_error("Failed to enum hardware adapter");
185 } else {
186 if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
187 (void **)& ret)))
188 throw runtime_error("Failed to enum warp adapter");
189 return ret;
190 }
191 }
192
193 ID3D12Device *
create_device(IDXGIAdapter1 * adapter)194 ComputeTest::create_device(IDXGIAdapter1 *adapter)
195 {
196 typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
197 PFN_D3D12CREATEDEVICE D3D12CreateDevice;
198
199 HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
200 if (!hD3D12Mod)
201 throw runtime_error("failed to load D3D12.DLL");
202
203 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
204 typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
205 PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
206 D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
207 GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
208 if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
209 throw runtime_error("failed to enable experimental shader models");
210 }
211
212 D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
213 if (!D3D12CreateDevice)
214 throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
215
216 ID3D12Device *dev;
217 if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
218 __uuidof(ID3D12Device), (void **)& dev)))
219 throw runtime_error("D3D12CreateDevice failed");
220
221 return dev;
222 }
223
224 ComPtr<ID3D12RootSignature>
create_root_signature(const ComputeTest::Resources & resources)225 ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
226 {
227 D3D12_ROOT_PARAMETER1 root_param;
228 root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
229 root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
230 root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
231 root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
232
233 D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
234 root_sig_desc.NumParameters = 1;
235 root_sig_desc.pParameters = &root_param;
236 root_sig_desc.NumStaticSamplers = 0;
237 root_sig_desc.pStaticSamplers = NULL;
238 root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
239
240 D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
241 versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
242 versioned_desc.Desc_1_1 = root_sig_desc;
243
244 ID3DBlob *sig, *error;
245 if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
246 &sig, &error)))
247 throw runtime_error("D3D12SerializeVersionedRootSignature failed");
248
249 ComPtr<ID3D12RootSignature> ret;
250 if (FAILED(dev->CreateRootSignature(0,
251 sig->GetBufferPointer(),
252 sig->GetBufferSize(),
253 __uuidof(ID3D12RootSignature),
254 (void **)& ret)))
255 throw runtime_error("CreateRootSignature failed");
256
257 return ret;
258 }
259
260 ComPtr<ID3D12PipelineState>
create_pipeline_state(ComPtr<ID3D12RootSignature> & root_sig,const struct clc_dxil_object & dxil)261 ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
262 const struct clc_dxil_object &dxil)
263 {
264 D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
265 pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
266 pipeline_desc.CS.BytecodeLength = dxil.binary.size;
267
268 ComPtr<ID3D12PipelineState> pipeline_state;
269 if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
270 __uuidof(ID3D12PipelineState),
271 (void **)& pipeline_state)))
272 throw runtime_error("Failed to create pipeline state");
273 return pipeline_state;
274 }
275
276 ComPtr<ID3D12Resource>
create_buffer(int size,D3D12_HEAP_TYPE heap_type)277 ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
278 {
279 D3D12_RESOURCE_DESC desc;
280 desc.Format = DXGI_FORMAT_UNKNOWN;
281 desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
282 desc.Width = size;
283 desc.Height = 1;
284 desc.DepthOrArraySize = 1;
285 desc.MipLevels = 1;
286 desc.SampleDesc.Count = 1;
287 desc.SampleDesc.Quality = 0;
288 desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
289 desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
290 desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
291
292 D3D12_HEAP_PROPERTIES heap_pris = GetCustomHeapProperties(dev, heap_type);
293
294 ComPtr<ID3D12Resource> res;
295 if (FAILED(dev->CreateCommittedResource(&heap_pris,
296 D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON,
297 NULL, __uuidof(ID3D12Resource), (void **)&res)))
298 throw runtime_error("CreateCommittedResource failed");
299
300 return res;
301 }
302
303 ComPtr<ID3D12Resource>
create_upload_buffer_with_data(const void * data,size_t size)304 ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
305 {
306 auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
307
308 void *ptr = NULL;
309 D3D12_RANGE res_range = { 0, (SIZE_T)size };
310 if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
311 throw runtime_error("Failed to map upload-buffer");
312 assert(ptr);
313 memcpy(ptr, data, size);
314 upload_res->Unmap(0, &res_range);
315 return upload_res;
316 }
317
318 ComPtr<ID3D12Resource>
create_sized_buffer_with_data(size_t buffer_size,const void * data,size_t data_size)319 ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
320 const void *data,
321 size_t data_size)
322 {
323 auto upload_res = create_upload_buffer_with_data(data, data_size);
324
325 auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
326 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
327 cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
328 resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
329 execute_cmdlist();
330
331 return res;
332 }
333
334 void
get_buffer_data(ComPtr<ID3D12Resource> res,void * buf,size_t size)335 ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
336 void *buf, size_t size)
337 {
338 auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
339 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
340 cmdlist->CopyResource(readback_res.Get(), res.Get());
341 resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
342 execute_cmdlist();
343
344 void *ptr = NULL;
345 D3D12_RANGE res_range = { 0, size };
346 if (FAILED(readback_res->Map(0, &res_range, &ptr)))
347 throw runtime_error("Failed to map readback-buffer");
348
349 memcpy(buf, ptr, size);
350
351 D3D12_RANGE empty_range = { 0, 0 };
352 readback_res->Unmap(0, &empty_range);
353 }
354
355 void
resource_barrier(ComPtr<ID3D12Resource> & res,D3D12_RESOURCE_STATES state_before,D3D12_RESOURCE_STATES state_after)356 ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
357 D3D12_RESOURCE_STATES state_before,
358 D3D12_RESOURCE_STATES state_after)
359 {
360 D3D12_RESOURCE_BARRIER barrier;
361 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
362 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
363 barrier.Transition.pResource = res.Get();
364 barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
365 barrier.Transition.StateBefore = state_before;
366 barrier.Transition.StateAfter = state_after;
367 cmdlist->ResourceBarrier(1, &barrier);
368 }
369
370 void
execute_cmdlist()371 ComputeTest::execute_cmdlist()
372 {
373 if (FAILED(cmdlist->Close()))
374 throw runtime_error("Closing ID3D12GraphicsCommandList failed");
375
376 ID3D12CommandList *cmdlists[] = { cmdlist };
377 cmdqueue->ExecuteCommandLists(1, cmdlists);
378 cmdqueue_fence->SetEventOnCompletion(fence_value, event);
379 cmdqueue->Signal(cmdqueue_fence, fence_value);
380 fence_value++;
381 WaitForSingleObject(event, INFINITE);
382
383 if (FAILED(cmdalloc->Reset()))
384 throw runtime_error("resetting ID3D12CommandAllocator failed");
385
386 if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
387 throw runtime_error("resetting ID3D12GraphicsCommandList failed");
388 }
389
390 void
create_uav_buffer(ComPtr<ID3D12Resource> res,size_t width,size_t byte_stride,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)391 ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
392 size_t width, size_t byte_stride,
393 D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
394 {
395 D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
396 uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
397 uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
398 uav_desc.Buffer.FirstElement = 0;
399 uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
400 uav_desc.Buffer.StructureByteStride = 0;
401 uav_desc.Buffer.CounterOffsetInBytes = 0;
402 uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
403
404 dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
405 }
406
407 void
create_cbv(ComPtr<ID3D12Resource> res,size_t size,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)408 ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
409 D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
410 {
411 D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
412 cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
413 cbv_desc.SizeInBytes = size;
414
415 dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
416 }
417
418 ComPtr<ID3D12Resource>
add_uav_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t num_elems,size_t elem_size)419 ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
420 unsigned spaceid, unsigned resid,
421 const void *data, size_t num_elems,
422 size_t elem_size)
423 {
424 size_t size = align(elem_size * num_elems, 4);
425 D3D12_CPU_DESCRIPTOR_HANDLE handle;
426 ComPtr<ID3D12Resource> res;
427 handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
428 handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
429
430 if (size) {
431 if (data)
432 res = create_buffer_with_data(data, size);
433 else
434 res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
435
436 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
437 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
438 }
439 create_uav_buffer(res, num_elems, elem_size, handle);
440 resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
441 return res;
442 }
443
444 ComPtr<ID3D12Resource>
add_cbv_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t size)445 ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
446 unsigned spaceid, unsigned resid,
447 const void *data, size_t size)
448 {
449 unsigned aligned_size = align(size, 256);
450 D3D12_CPU_DESCRIPTOR_HANDLE handle;
451 ComPtr<ID3D12Resource> res;
452 handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
453 handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
454
455 if (size) {
456 assert(data);
457 res = create_sized_buffer_with_data(aligned_size, data, size);
458 }
459 create_cbv(res, aligned_size, handle);
460 resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
461 return res;
462 }
463
464 void
run_shader_with_raw_args(Shader shader,const CompileArgs & compile_args,const std::vector<RawShaderArg * > & args)465 ComputeTest::run_shader_with_raw_args(Shader shader,
466 const CompileArgs &compile_args,
467 const std::vector<RawShaderArg *> &args)
468 {
469 if (args.size() < 1)
470 throw runtime_error("no inputs");
471
472 static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
473 if (!hD3D12Mod)
474 throw runtime_error("Failed to load D3D12.DLL");
475
476 D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
477
478 if (args.size() != shader.dxil->kernel->num_args)
479 throw runtime_error("incorrect number of inputs");
480
481 struct clc_runtime_kernel_conf conf = { 0 };
482
483 // Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
484 // A more complex runtime can be smarter about detecting when this needs to be done
485 conf.lower_bit_size = 64;
486
487 if (!shader.dxil->metadata.local_size[0])
488 conf.local_size[0] = compile_args.x;
489 else
490 conf.local_size[0] = shader.dxil->metadata.local_size[0];
491
492 if (!shader.dxil->metadata.local_size[1])
493 conf.local_size[1] = compile_args.y;
494 else
495 conf.local_size[1] = shader.dxil->metadata.local_size[1];
496
497 if (!shader.dxil->metadata.local_size[2])
498 conf.local_size[2] = compile_args.z;
499 else
500 conf.local_size[2] = shader.dxil->metadata.local_size[2];
501
502 if (compile_args.x % conf.local_size[0] ||
503 compile_args.y % conf.local_size[1] ||
504 compile_args.z % conf.local_size[2])
505 throw runtime_error("invalid global size must be a multiple of local size");
506
507 std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
508
509 conf.args = argsinfo.data();
510 conf.support_global_work_id_offsets =
511 compile_args.work_props.global_offset_x != 0 ||
512 compile_args.work_props.global_offset_y != 0 ||
513 compile_args.work_props.global_offset_z != 0;
514 conf.support_workgroup_id_offsets =
515 compile_args.work_props.group_id_offset_x != 0 ||
516 compile_args.work_props.group_id_offset_y != 0 ||
517 compile_args.work_props.group_id_offset_z != 0;
518
519 for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
520 RawShaderArg *arg = args[i];
521 size_t size = arg->get_elem_size() * arg->get_num_elems();
522
523 switch (shader.dxil->kernel->args[i].address_qualifier) {
524 case CLC_KERNEL_ARG_ADDRESS_LOCAL:
525 argsinfo[i].localptr.size = size;
526 break;
527 default:
528 break;
529 }
530 }
531
532 configure(shader, &conf);
533 validate(shader);
534
535 std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
536
537 std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
538 std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
539 clc_work_properties_data work_props = compile_args.work_props;
540 if (!conf.support_workgroup_id_offsets) {
541 work_props.group_count_total_x = compile_args.x / conf.local_size[0];
542 work_props.group_count_total_y = compile_args.y / conf.local_size[1];
543 work_props.group_count_total_z = compile_args.z / conf.local_size[2];
544 }
545 if (work_props.work_dim == 0)
546 work_props.work_dim = 3;
547 Resources resources;
548
549 for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
550 RawShaderArg *arg = args[i];
551 size_t size = arg->get_elem_size() * arg->get_num_elems();
552 void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
553
554 switch (dxil->kernel->args[i].address_qualifier) {
555 case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
556 case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
557 assert(dxil->metadata.args[i].size == sizeof(uint64_t));
558 uint64_t *ptr_slot = (uint64_t *)slot;
559 if (arg->get_data())
560 *ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
561 else
562 *ptr_slot = ~0ull;
563 break;
564 }
565 case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
566 assert(dxil->metadata.args[i].size == sizeof(uint64_t));
567 uint64_t *ptr_slot = (uint64_t *)slot;
568 *ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
569 break;
570 }
571 case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
572 assert(size == dxil->metadata.args[i].size);
573 memcpy(slot, arg->get_data(), size);
574 break;
575 }
576 default:
577 assert(0);
578 }
579 }
580
581 for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
582 RawShaderArg *arg = args[i];
583
584 if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
585 dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
586 argres[i] = add_uav_resource(resources, 0,
587 dxil->metadata.args[i].globconstptr.buf_id,
588 arg->get_data(), arg->get_num_elems(),
589 arg->get_elem_size());
590 }
591 }
592
593 if (dxil->metadata.printf.uav_id > 0)
594 add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4);
595
596 for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
597 add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
598 dxil->metadata.consts[i].data,
599 dxil->metadata.consts[i].size / 4, 4);
600
601 if (argsbuf.size())
602 add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
603 argsbuf.data(), argsbuf.size());
604
605 add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
606 &work_props, sizeof(work_props));
607
608 auto root_sig = create_root_signature(resources);
609 auto pipeline_state = create_pipeline_state(root_sig, *dxil);
610
611 cmdlist->SetDescriptorHeaps(1, &uav_heap);
612 cmdlist->SetComputeRootSignature(root_sig.Get());
613 cmdlist->SetComputeRootDescriptorTable(0, GetGPUDescriptorHandleForHeapStart(uav_heap));
614 cmdlist->SetPipelineState(pipeline_state.Get());
615
616 cmdlist->Dispatch(compile_args.x / conf.local_size[0],
617 compile_args.y / conf.local_size[1],
618 compile_args.z / conf.local_size[2]);
619
620 for (auto &range : resources.ranges) {
621 if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
622 for (unsigned i = range.OffsetInDescriptorsFromTableStart;
623 i < range.NumDescriptors; i++) {
624 if (!resources.descs[i].Get())
625 continue;
626
627 resource_barrier(resources.descs[i],
628 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
629 D3D12_RESOURCE_STATE_COMMON);
630 }
631 }
632 }
633
634 execute_cmdlist();
635
636 for (unsigned i = 0; i < args.size(); i++) {
637 if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
638 continue;
639
640 assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
641 get_buffer_data(argres[i], args[i]->get_data(),
642 args[i]->get_elem_size() * args[i]->get_num_elems());
643 }
644
645 ComPtr<ID3D12InfoQueue> info_queue;
646 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
647 if (info_queue)
648 {
649 EXPECT_EQ(0, info_queue->GetNumStoredMessages());
650 for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
651 SIZE_T message_size = 0;
652 info_queue->GetMessageA(i, nullptr, &message_size);
653 D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
654 info_queue->GetMessageA(i, message, &message_size);
655 FAIL() << message->pDescription;
656 free(message);
657 }
658 }
659 }
660
661 void
SetUp()662 ComputeTest::SetUp()
663 {
664 static struct clc_libclc *compiler_ctx_g = nullptr;
665
666 if (!compiler_ctx_g) {
667 clc_libclc_dxil_options options = { };
668 options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
669
670 compiler_ctx_g = clc_libclc_new_dxil(&logger, &options);
671 if (!compiler_ctx_g)
672 throw runtime_error("failed to create CLC compiler context");
673
674 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
675 void *serialized = nullptr;
676 size_t serialized_size = 0;
677 clc_libclc_serialize(compiler_ctx_g, &serialized, &serialized_size);
678 if (!serialized)
679 throw runtime_error("failed to serialize CLC compiler context");
680
681 clc_free_libclc(compiler_ctx_g);
682 compiler_ctx_g = nullptr;
683
684 compiler_ctx_g = clc_libclc_deserialize(serialized, serialized_size);
685 if (!compiler_ctx_g)
686 throw runtime_error("failed to deserialize CLC compiler context");
687
688 clc_libclc_free_serialized(serialized);
689 }
690 }
691 compiler_ctx = compiler_ctx_g;
692
693 enable_d3d12_debug_layer();
694
695 factory = get_dxgi_factory();
696 if (!factory)
697 throw runtime_error("failed to create DXGI factory");
698
699 adapter = choose_adapter(factory);
700 if (!adapter)
701 throw runtime_error("failed to choose adapter");
702
703 dev = create_device(adapter);
704 if (!dev)
705 throw runtime_error("failed to create device");
706
707 if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
708 __uuidof(cmdqueue_fence),
709 (void **)&cmdqueue_fence)))
710 throw runtime_error("failed to create fence\n");
711
712 D3D12_COMMAND_QUEUE_DESC queue_desc;
713 queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
714 queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
715 queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
716 queue_desc.NodeMask = 0;
717 if (FAILED(dev->CreateCommandQueue(&queue_desc,
718 __uuidof(cmdqueue),
719 (void **)&cmdqueue)))
720 throw runtime_error("failed to create command queue");
721
722 if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
723 __uuidof(cmdalloc), (void **)&cmdalloc)))
724 throw runtime_error("failed to create command allocator");
725
726 if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
727 cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
728 throw runtime_error("failed to create command list");
729
730 D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
731 heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
732 heap_desc.NumDescriptors = 1000;
733 heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
734 heap_desc.NodeMask = 0;
735 if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
736 __uuidof(uav_heap), (void **)&uav_heap)))
737 throw runtime_error("failed to create descriptor heap");
738
739 uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
740
741 event = CreateEvent(NULL, FALSE, FALSE, NULL);
742 if (!event)
743 throw runtime_error("Failed to create event");
744 fence_value = 1;
745 }
746
747 void
TearDown()748 ComputeTest::TearDown()
749 {
750 CloseHandle(event);
751
752 uav_heap->Release();
753 cmdlist->Release();
754 cmdalloc->Release();
755 cmdqueue->Release();
756 cmdqueue_fence->Release();
757 dev->Release();
758 adapter->Release();
759 factory->Release();
760 }
761
762 PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
763
764 bool
validate_module(const struct clc_dxil_object & dxil)765 validate_module(const struct clc_dxil_object &dxil)
766 {
767 struct dxil_validator *val = dxil_create_validator(NULL);
768 char *err;
769 bool res = dxil_validate_module(val, dxil.binary.data,
770 dxil.binary.size, &err);
771 if (!res && err)
772 fprintf(stderr, "D3D12: validation failed: %s", err);
773
774 dxil_destroy_validator(val);
775 return res;
776 }
777
778 static void
dump_blob(const char * path,const struct clc_dxil_object & dxil)779 dump_blob(const char *path, const struct clc_dxil_object &dxil)
780 {
781 FILE *fp = fopen(path, "wb");
782 if (fp) {
783 fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
784 fclose(fp);
785 printf("D3D12: wrote '%s'...\n", path);
786 }
787 }
788
789 ComputeTest::Shader
compile(const std::vector<const char * > & sources,const std::vector<const char * > & compile_args,bool create_library)790 ComputeTest::compile(const std::vector<const char *> &sources,
791 const std::vector<const char *> &compile_args,
792 bool create_library)
793 {
794 struct clc_compile_args args = {
795 };
796 args.args = compile_args.data();
797 args.num_args = (unsigned)compile_args.size();
798 ComputeTest::Shader shader;
799
800 std::vector<Shader> shaders;
801
802 args.source.name = "obj.cl";
803
804 for (unsigned i = 0; i < sources.size(); i++) {
805 args.source.value = sources[i];
806
807 clc_binary spirv{};
808 if (!clc_compile_c_to_spirv(&args, &logger, &spirv))
809 throw runtime_error("failed to compile object!");
810
811 Shader shader;
812 shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
813 {
814 clc_free_spirv(spirv);
815 delete spirv;
816 });
817 shaders.push_back(shader);
818 }
819
820 if (shaders.size() == 1 && create_library)
821 return shaders[0];
822
823 return link(shaders, create_library);
824 }
825
826 ComputeTest::Shader
link(const std::vector<Shader> & sources,bool create_library)827 ComputeTest::link(const std::vector<Shader> &sources,
828 bool create_library)
829 {
830 std::vector<const clc_binary*> objs;
831 for (auto& source : sources)
832 objs.push_back(&*source.obj);
833
834 struct clc_linker_args link_args = {};
835 link_args.in_objs = objs.data();
836 link_args.num_in_objs = (unsigned)objs.size();
837 link_args.create_library = create_library;
838 clc_binary spirv{};
839 if (!clc_link_spirv(&link_args, &logger, &spirv))
840 throw runtime_error("failed to link objects!");
841
842 ComputeTest::Shader shader;
843 shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
844 {
845 clc_free_spirv(spirv);
846 delete spirv;
847 });
848 if (!link_args.create_library)
849 configure(shader, NULL);
850
851 return shader;
852 }
853
854 ComputeTest::Shader
assemble(const char * source)855 ComputeTest::assemble(const char *source)
856 {
857 spvtools::SpirvTools tools(SPV_ENV_UNIVERSAL_1_0);
858 std::vector<uint32_t> binary;
859 if (!tools.Assemble(source, strlen(source), &binary))
860 throw runtime_error("failed to assemble");
861
862 ComputeTest::Shader shader;
863 shader.obj = std::shared_ptr<clc_binary>(new clc_binary{}, [](clc_binary *spirv)
864 {
865 free(spirv->data);
866 delete spirv;
867 });
868 shader.obj->size = binary.size() * 4;
869 shader.obj->data = malloc(shader.obj->size);
870 memcpy(shader.obj->data, binary.data(), shader.obj->size);
871
872 configure(shader, NULL);
873
874 return shader;
875 }
876
877 void
configure(Shader & shader,const struct clc_runtime_kernel_conf * conf)878 ComputeTest::configure(Shader &shader,
879 const struct clc_runtime_kernel_conf *conf)
880 {
881 if (!shader.metadata) {
882 shader.metadata = std::shared_ptr<clc_parsed_spirv>(new clc_parsed_spirv{}, [](clc_parsed_spirv *metadata)
883 {
884 clc_free_parsed_spirv(metadata);
885 delete metadata;
886 });
887 if (!clc_parse_spirv(shader.obj.get(), NULL, shader.metadata.get()))
888 throw runtime_error("failed to parse spirv!");
889 }
890
891 std::unique_ptr<clc_dxil_object> dxil(new clc_dxil_object{});
892 if (!clc_spirv_to_dxil(compiler_ctx, shader.obj.get(), shader.metadata.get(), "main_test", conf, nullptr, &logger, dxil.get()))
893 throw runtime_error("failed to compile kernel!");
894 shader.dxil = std::shared_ptr<clc_dxil_object>(dxil.release(), [](clc_dxil_object *dxil)
895 {
896 clc_free_dxil_object(dxil);
897 delete dxil;
898 });
899 }
900
901 void
validate(ComputeTest::Shader & shader)902 ComputeTest::validate(ComputeTest::Shader &shader)
903 {
904 dump_blob("unsigned.cso", *shader.dxil);
905 if (!validate_module(*shader.dxil))
906 throw runtime_error("failed to validate module!");
907
908 dump_blob("signed.cso", *shader.dxil);
909 }
910