1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24 #include "helpers.h"
25 #include "vulkan/vk_format.h"
26 #include "common/amd_family.h"
27 #include <stdio.h>
28 #include <sstream>
29 #include <llvm-c/Target.h>
30 #include <mutex>
31
32 using namespace aco;
33
34 extern "C" {
35 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
36 VkInstance instance,
37 const char* pName);
38 }
39
40 ac_shader_config config;
41 aco_shader_info info;
42 std::unique_ptr<Program> program;
43 Builder bld(NULL);
44 Temp inputs[16];
45
46 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
47 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
48 static std::mutex create_device_mutex;
49
50 #define FUNCTION_LIST\
51 ITEM(CreateInstance)\
52 ITEM(DestroyInstance)\
53 ITEM(EnumeratePhysicalDevices)\
54 ITEM(GetPhysicalDeviceProperties2)\
55 ITEM(CreateDevice)\
56 ITEM(DestroyDevice)\
57 ITEM(CreateShaderModule)\
58 ITEM(DestroyShaderModule)\
59 ITEM(CreateGraphicsPipelines)\
60 ITEM(CreateComputePipelines)\
61 ITEM(DestroyPipeline)\
62 ITEM(CreateDescriptorSetLayout)\
63 ITEM(DestroyDescriptorSetLayout)\
64 ITEM(CreatePipelineLayout)\
65 ITEM(DestroyPipelineLayout)\
66 ITEM(CreateRenderPass)\
67 ITEM(DestroyRenderPass)\
68 ITEM(GetPipelineExecutablePropertiesKHR)\
69 ITEM(GetPipelineExecutableInternalRepresentationsKHR)
70
71 #define ITEM(n) PFN_vk##n n;
72 FUNCTION_LIST
73 #undef ITEM
74
create_program(enum amd_gfx_level gfx_level,Stage stage,unsigned wave_size,enum radeon_family family)75 void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
76 {
77 memset(&config, 0, sizeof(config));
78 info.wave_size = wave_size;
79
80 program.reset(new Program);
81 aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
82 program->workgroup_size = UINT_MAX;
83 calc_min_waves(program.get());
84
85 program->debug.func = nullptr;
86 program->debug.private_data = nullptr;
87
88 program->debug.output = output;
89 program->debug.shorten_messages = true;
90 program->debug.func = nullptr;
91 program->debug.private_data = nullptr;
92
93 Block *block = program->create_and_insert_block();
94 block->kind = block_kind_top_level;
95
96 bld = Builder(program.get(), &program->blocks[0]);
97
98 config.float_mode = program->blocks[0].fp_mode.val;
99 }
100
setup_cs(const char * input_spec,enum amd_gfx_level gfx_level,enum radeon_family family,const char * subvariant,unsigned wave_size)101 bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
102 enum radeon_family family, const char* subvariant,
103 unsigned wave_size)
104 {
105 if (!set_variant(gfx_level, subvariant))
106 return false;
107
108 memset(&info, 0, sizeof(info));
109 create_program(gfx_level, compute_cs, wave_size, family);
110
111 if (input_spec) {
112 std::vector<RegClass> input_classes;
113 while (input_spec[0]) {
114 RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
115 unsigned size = input_spec[1] - '0';
116 bool in_bytes = input_spec[2] == 'b';
117 input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
118
119 input_spec += 2 + in_bytes;
120 while (input_spec[0] == ' ') input_spec++;
121 }
122
123 aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
124 aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
125 for (unsigned i = 0; i < input_classes.size(); i++) {
126 inputs[i] = bld.tmp(input_classes[i]);
127 startpgm->definitions[i] = Definition(inputs[i]);
128 }
129 bld.insert(std::move(startpgm));
130 }
131
132 return true;
133 }
134
finish_program(Program * prog)135 void finish_program(Program *prog)
136 {
137 for (Block& BB : prog->blocks) {
138 for (unsigned idx : BB.linear_preds)
139 prog->blocks[idx].linear_succs.emplace_back(BB.index);
140 for (unsigned idx : BB.logical_preds)
141 prog->blocks[idx].logical_succs.emplace_back(BB.index);
142 }
143
144 for (Block& block : prog->blocks) {
145 if (block.linear_succs.size() == 0) {
146 block.kind |= block_kind_uniform;
147 Builder(prog, &block).sopp(aco_opcode::s_endpgm);
148 }
149 }
150 }
151
finish_validator_test()152 void finish_validator_test()
153 {
154 finish_program(program.get());
155 aco_print_program(program.get(), output);
156 fprintf(output, "Validation results:\n");
157 if (aco::validate_ir(program.get()))
158 fprintf(output, "Validation passed\n");
159 else
160 fprintf(output, "Validation failed\n");
161 }
162
finish_opt_test()163 void finish_opt_test()
164 {
165 finish_program(program.get());
166 if (!aco::validate_ir(program.get())) {
167 fail_test("Validation before optimization failed");
168 return;
169 }
170 aco::optimize(program.get());
171 if (!aco::validate_ir(program.get())) {
172 fail_test("Validation after optimization failed");
173 return;
174 }
175 aco_print_program(program.get(), output);
176 }
177
finish_ra_test(ra_test_policy policy,bool lower)178 void finish_ra_test(ra_test_policy policy, bool lower)
179 {
180 finish_program(program.get());
181 if (!aco::validate_ir(program.get())) {
182 fail_test("Validation before register allocation failed");
183 return;
184 }
185
186 program->workgroup_size = program->wave_size;
187 aco::live live_vars = aco::live_var_analysis(program.get());
188 aco::register_allocation(program.get(), live_vars.live_out, policy);
189
190 if (aco::validate_ra(program.get())) {
191 fail_test("Validation after register allocation failed");
192 return;
193 }
194
195 if (lower) {
196 aco::ssa_elimination(program.get());
197 aco::lower_to_hw_instr(program.get());
198 }
199
200 aco_print_program(program.get(), output);
201 }
202
finish_optimizer_postRA_test()203 void finish_optimizer_postRA_test()
204 {
205 finish_program(program.get());
206 aco::optimize_postRA(program.get());
207 aco_print_program(program.get(), output);
208 }
209
finish_to_hw_instr_test()210 void finish_to_hw_instr_test()
211 {
212 finish_program(program.get());
213 aco::lower_to_hw_instr(program.get());
214 aco_print_program(program.get(), output);
215 }
216
finish_insert_nops_test()217 void finish_insert_nops_test()
218 {
219 finish_program(program.get());
220 aco::insert_NOPs(program.get());
221 aco_print_program(program.get(), output);
222 }
223
finish_form_hard_clause_test()224 void finish_form_hard_clause_test()
225 {
226 finish_program(program.get());
227 aco::form_hard_clauses(program.get());
228 aco_print_program(program.get(), output);
229 }
230
finish_assembler_test()231 void finish_assembler_test()
232 {
233 finish_program(program.get());
234 std::vector<uint32_t> binary;
235 unsigned exec_size = emit_program(program.get(), binary);
236
237 /* we could use CLRX for disassembly but that would require it to be
238 * installed */
239 if (program->gfx_level >= GFX8) {
240 print_asm(program.get(), binary, exec_size / 4u, output);
241 } else {
242 //TODO: maybe we should use CLRX and skip this test if it's not available?
243 for (uint32_t dword : binary)
244 fprintf(output, "%.8x\n", dword);
245 }
246 }
247
writeout(unsigned i,Temp tmp)248 void writeout(unsigned i, Temp tmp)
249 {
250 if (tmp.id())
251 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
252 else
253 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
254 }
255
writeout(unsigned i,aco::Builder::Result res)256 void writeout(unsigned i, aco::Builder::Result res)
257 {
258 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
259 }
260
writeout(unsigned i,Operand op)261 void writeout(unsigned i, Operand op)
262 {
263 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
264 }
265
writeout(unsigned i,Operand op0,Operand op1)266 void writeout(unsigned i, Operand op0, Operand op1)
267 {
268 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
269 }
270
fneg(Temp src,Builder b)271 Temp fneg(Temp src, Builder b)
272 {
273 if (src.bytes() == 2)
274 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
275 else
276 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
277 }
278
fabs(Temp src,Builder b)279 Temp fabs(Temp src, Builder b)
280 {
281 if (src.bytes() == 2) {
282 Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
283 res.instr->vop3().abs[1] = true;
284 return res;
285 } else {
286 Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
287 res.instr->vop3().abs[1] = true;
288 return res;
289 }
290 }
291
f2f32(Temp src,Builder b)292 Temp f2f32(Temp src, Builder b)
293 {
294 return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
295 }
296
f2f16(Temp src,Builder b)297 Temp f2f16(Temp src, Builder b)
298 {
299 return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
300 }
301
u2u16(Temp src,Builder b)302 Temp u2u16(Temp src, Builder b)
303 {
304 return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
305 }
306
fadd(Temp src0,Temp src1,Builder b)307 Temp fadd(Temp src0, Temp src1, Builder b)
308 {
309 if (src0.bytes() == 2)
310 return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
311 else
312 return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
313 }
314
fmul(Temp src0,Temp src1,Builder b)315 Temp fmul(Temp src0, Temp src1, Builder b)
316 {
317 if (src0.bytes() == 2)
318 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
319 else
320 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
321 }
322
fma(Temp src0,Temp src1,Temp src2,Builder b)323 Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
324 {
325 if (src0.bytes() == 2)
326 return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
327 else
328 return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
329 }
330
fsat(Temp src,Builder b)331 Temp fsat(Temp src, Builder b)
332 {
333 if (src.bytes() == 2)
334 return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
335 Operand::c16(0x3c00u), src);
336 else
337 return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
338 Operand::c32(0x3f800000u), src);
339 }
340
ext_ushort(Temp src,unsigned idx,Builder b)341 Temp ext_ushort(Temp src, unsigned idx, Builder b)
342 {
343 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
344 Operand::c32(16u), Operand::c32(false));
345 }
346
ext_ubyte(Temp src,unsigned idx,Builder b)347 Temp ext_ubyte(Temp src, unsigned idx, Builder b)
348 {
349 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
350 Operand::c32(8u), Operand::c32(false));
351 }
352
get_vk_device(enum amd_gfx_level gfx_level)353 VkDevice get_vk_device(enum amd_gfx_level gfx_level)
354 {
355 enum radeon_family family;
356 switch (gfx_level) {
357 case GFX6:
358 family = CHIP_TAHITI;
359 break;
360 case GFX7:
361 family = CHIP_BONAIRE;
362 break;
363 case GFX8:
364 family = CHIP_POLARIS10;
365 break;
366 case GFX9:
367 family = CHIP_VEGA10;
368 break;
369 case GFX10:
370 family = CHIP_NAVI10;
371 break;
372 case GFX10_3:
373 family = CHIP_NAVI21;
374 break;
375 case GFX11:
376 family = CHIP_GFX1100;
377 break;
378 default:
379 family = CHIP_UNKNOWN;
380 break;
381 }
382 return get_vk_device(family);
383 }
384
get_vk_device(enum radeon_family family)385 VkDevice get_vk_device(enum radeon_family family)
386 {
387 assert(family != CHIP_UNKNOWN);
388
389 std::lock_guard<std::mutex> guard(create_device_mutex);
390
391 if (device_cache[family])
392 return device_cache[family];
393
394 setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
395
396 VkApplicationInfo app_info = {};
397 app_info.pApplicationName = "aco_tests";
398 app_info.apiVersion = VK_API_VERSION_1_2;
399 VkInstanceCreateInfo instance_create_info = {};
400 instance_create_info.pApplicationInfo = &app_info;
401 instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
402 ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
403 assert(result == VK_SUCCESS);
404
405 #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
406 FUNCTION_LIST
407 #undef ITEM
408
409 uint32_t device_count = 1;
410 VkPhysicalDevice device = VK_NULL_HANDLE;
411 result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
412 assert(result == VK_SUCCESS);
413 assert(device != VK_NULL_HANDLE);
414
415 VkDeviceCreateInfo device_create_info = {};
416 device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
417 static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
418 device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
419 device_create_info.ppEnabledExtensionNames = extensions;
420 result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
421
422 return device_cache[family];
423 }
424
425 static struct DestroyDevices {
~DestroyDevicesDestroyDevices426 ~DestroyDevices() {
427 for (unsigned i = 0; i < CHIP_LAST; i++) {
428 if (!device_cache[i])
429 continue;
430 DestroyDevice(device_cache[i], NULL);
431 DestroyInstance(instance_cache[i], NULL);
432 }
433 }
434 } destroy_devices;
435
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)436 void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
437 const char *name, bool remove_encoding)
438 {
439 uint32_t executable_count = 16;
440 VkPipelineExecutablePropertiesKHR executables[16];
441 VkPipelineInfoKHR pipeline_info;
442 pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
443 pipeline_info.pNext = NULL;
444 pipeline_info.pipeline = pipeline;
445 ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
446 assert(result == VK_SUCCESS);
447
448 uint32_t executable = 0;
449 for (; executable < executable_count; executable++) {
450 if (executables[executable].stages == stages)
451 break;
452 }
453 assert(executable != executable_count);
454
455 VkPipelineExecutableInfoKHR exec_info;
456 exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
457 exec_info.pNext = NULL;
458 exec_info.pipeline = pipeline;
459 exec_info.executableIndex = executable;
460
461 uint32_t ir_count = 16;
462 VkPipelineExecutableInternalRepresentationKHR ir[16];
463 memset(ir, 0, sizeof(ir));
464 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
465 assert(result == VK_SUCCESS);
466
467 VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
468 for (unsigned i = 0; i < ir_count; ++i) {
469 if (strcmp(ir[i].name, name) == 0) {
470 requested_ir = &ir[i];
471 break;
472 }
473 }
474 assert(requested_ir && "Could not find requested IR");
475
476 char *data = (char*)malloc(requested_ir->dataSize);
477 requested_ir->pData = data;
478 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
479 assert(result == VK_SUCCESS);
480
481 if (remove_encoding) {
482 for (char *c = data; *c; c++) {
483 if (*c == ';') {
484 for (; *c && *c != '\n'; c++)
485 *c = ' ';
486 }
487 }
488 }
489
490 fprintf(output, "%s", data);
491 free(data);
492 }
493
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)494 VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
495 {
496 VkShaderModuleCreateInfo vk_module_info;
497 vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
498 vk_module_info.pNext = NULL;
499 vk_module_info.flags = 0;
500 vk_module_info.codeSize = module_info->spirvSize;
501 vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
502
503 VkShaderModule module;
504 ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
505 assert(result == VK_SUCCESS);
506
507 return module;
508 }
509
PipelineBuilder(VkDevice dev)510 PipelineBuilder::PipelineBuilder(VkDevice dev) {
511 memset(this, 0, sizeof(*this));
512 topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
513 device = dev;
514 }
515
~PipelineBuilder()516 PipelineBuilder::~PipelineBuilder()
517 {
518 DestroyPipeline(device, pipeline, NULL);
519
520 for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
521 VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
522 if (owned_stages & stage_info->stage)
523 DestroyShaderModule(device, stage_info->module, NULL);
524 }
525
526 DestroyPipelineLayout(device, pipeline_layout, NULL);
527
528 for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
529 DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
530
531 DestroyRenderPass(device, render_pass, NULL);
532 }
533
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)534 void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
535 uint32_t binding, VkDescriptorType type, uint32_t count)
536 {
537 desc_layouts_used |= 1ull << layout;
538 desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
539 }
540
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)541 void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
542 {
543 vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
544 }
545
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)546 void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
547 {
548 vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
549 }
550
add_resource_decls(QoShaderModuleCreateInfo * module)551 void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
552 {
553 for (unsigned i = 0; i < module->declarationCount; i++) {
554 const QoShaderDecl *decl = &module->pDeclarations[i];
555 switch (decl->decl_type) {
556 case QoShaderDeclType_ubo:
557 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
558 break;
559 case QoShaderDeclType_ssbo:
560 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
561 break;
562 case QoShaderDeclType_img_buf:
563 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
564 break;
565 case QoShaderDeclType_img:
566 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
567 break;
568 case QoShaderDeclType_tex_buf:
569 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
570 break;
571 case QoShaderDeclType_combined:
572 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
573 break;
574 case QoShaderDeclType_tex:
575 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
576 break;
577 case QoShaderDeclType_samp:
578 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
579 break;
580 default:
581 break;
582 }
583 }
584 }
585
add_io_decls(QoShaderModuleCreateInfo * module)586 void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
587 {
588 unsigned next_vtx_offset = 0;
589 for (unsigned i = 0; i < module->declarationCount; i++) {
590 const QoShaderDecl *decl = &module->pDeclarations[i];
591 switch (decl->decl_type) {
592 case QoShaderDeclType_in:
593 if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
594 if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
595 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
596 else if (decl->type[0] == 'u')
597 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
598 else if (decl->type[0] == 'i')
599 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
600 next_vtx_offset += 16;
601 }
602 break;
603 case QoShaderDeclType_out:
604 if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
605 if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
606 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
607 else if (decl->type[0] == 'u')
608 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
609 else if (decl->type[0] == 'i')
610 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
611 }
612 break;
613 default:
614 break;
615 }
616 }
617 if (next_vtx_offset)
618 add_vertex_binding(0, next_vtx_offset);
619 }
620
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)621 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
622 {
623 VkPipelineShaderStageCreateInfo *stage_info;
624 if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
625 stage_info = &stages[0];
626 else
627 stage_info = &stages[gfx_pipeline_info.stageCount++];
628 stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
629 stage_info->pNext = NULL;
630 stage_info->flags = 0;
631 stage_info->stage = stage;
632 stage_info->module = module;
633 stage_info->pName = name;
634 stage_info->pSpecializationInfo = NULL;
635 owned_stages |= stage;
636 }
637
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)638 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
639 {
640 add_stage(stage, __qoCreateShaderModule(device, &module), name);
641 add_resource_decls(&module);
642 add_io_decls(&module);
643 }
644
add_vsfs(VkShaderModule vs,VkShaderModule fs)645 void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
646 {
647 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
648 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
649 }
650
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)651 void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
652 {
653 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
654 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
655 }
656
add_cs(VkShaderModule cs)657 void PipelineBuilder::add_cs(VkShaderModule cs)
658 {
659 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
660 }
661
add_cs(QoShaderModuleCreateInfo cs)662 void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
663 {
664 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
665 }
666
is_compute()667 bool PipelineBuilder::is_compute() {
668 return gfx_pipeline_info.stageCount == 0;
669 }
670
create_compute_pipeline()671 void PipelineBuilder::create_compute_pipeline() {
672 VkComputePipelineCreateInfo create_info;
673 create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
674 create_info.pNext = NULL;
675 create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
676 create_info.stage = stages[0];
677 create_info.layout = pipeline_layout;
678 create_info.basePipelineHandle = VK_NULL_HANDLE;
679 create_info.basePipelineIndex = 0;
680
681 ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
682 assert(result == VK_SUCCESS);
683 }
684
create_graphics_pipeline()685 void PipelineBuilder::create_graphics_pipeline() {
686 /* create the create infos */
687 if (!samples)
688 samples = VK_SAMPLE_COUNT_1_BIT;
689
690 unsigned num_color_attachments = 0;
691 VkPipelineColorBlendAttachmentState blend_attachment_states[16];
692 VkAttachmentReference color_attachments[16];
693 VkAttachmentDescription attachment_descs[17];
694 for (unsigned i = 0; i < 16; i++) {
695 if (color_outputs[i] == VK_FORMAT_UNDEFINED)
696 continue;
697
698 VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
699 desc->flags = 0;
700 desc->format = color_outputs[i];
701 desc->samples = samples;
702 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
703 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
704 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
705 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
706 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
707 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
708
709 VkAttachmentReference *ref = &color_attachments[num_color_attachments];
710 ref->attachment = num_color_attachments;
711 ref->layout = VK_IMAGE_LAYOUT_GENERAL;
712
713 VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
714 blend->blendEnable = false;
715 blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
716 VK_COLOR_COMPONENT_G_BIT |
717 VK_COLOR_COMPONENT_B_BIT |
718 VK_COLOR_COMPONENT_A_BIT;
719
720 num_color_attachments++;
721 }
722
723 unsigned num_attachments = num_color_attachments;
724 VkAttachmentReference ds_attachment;
725 if (ds_output != VK_FORMAT_UNDEFINED) {
726 VkAttachmentDescription *desc = &attachment_descs[num_attachments];
727 desc->flags = 0;
728 desc->format = ds_output;
729 desc->samples = samples;
730 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
731 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
732 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
733 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
734 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
735 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
736
737 ds_attachment.attachment = num_color_attachments;
738 ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
739
740 num_attachments++;
741 }
742
743 vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
744 vs_input.pNext = NULL;
745 vs_input.flags = 0;
746 vs_input.pVertexBindingDescriptions = vs_bindings;
747 vs_input.pVertexAttributeDescriptions = vs_attributes;
748
749 VkPipelineInputAssemblyStateCreateInfo assembly_state;
750 assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
751 assembly_state.pNext = NULL;
752 assembly_state.flags = 0;
753 assembly_state.topology = topology;
754 assembly_state.primitiveRestartEnable = false;
755
756 VkPipelineTessellationStateCreateInfo tess_state;
757 tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
758 tess_state.pNext = NULL;
759 tess_state.flags = 0;
760 tess_state.patchControlPoints = patch_size;
761
762 VkPipelineViewportStateCreateInfo viewport_state;
763 viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
764 viewport_state.pNext = NULL;
765 viewport_state.flags = 0;
766 viewport_state.viewportCount = 1;
767 viewport_state.pViewports = NULL;
768 viewport_state.scissorCount = 1;
769 viewport_state.pScissors = NULL;
770
771 VkPipelineRasterizationStateCreateInfo rasterization_state;
772 rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
773 rasterization_state.pNext = NULL;
774 rasterization_state.flags = 0;
775 rasterization_state.depthClampEnable = false;
776 rasterization_state.rasterizerDiscardEnable = false;
777 rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
778 rasterization_state.cullMode = VK_CULL_MODE_NONE;
779 rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
780 rasterization_state.depthBiasEnable = false;
781 rasterization_state.lineWidth = 1.0;
782
783 VkPipelineMultisampleStateCreateInfo ms_state;
784 ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
785 ms_state.pNext = NULL;
786 ms_state.flags = 0;
787 ms_state.rasterizationSamples = samples;
788 ms_state.sampleShadingEnable = sample_shading_enable;
789 ms_state.minSampleShading = min_sample_shading;
790 VkSampleMask sample_mask = 0xffffffff;
791 ms_state.pSampleMask = &sample_mask;
792 ms_state.alphaToCoverageEnable = false;
793 ms_state.alphaToOneEnable = false;
794
795 VkPipelineDepthStencilStateCreateInfo ds_state;
796 ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
797 ds_state.pNext = NULL;
798 ds_state.flags = 0;
799 ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
800 ds_state.depthWriteEnable = true;
801 ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
802 ds_state.depthBoundsTestEnable = false;
803 ds_state.stencilTestEnable = true;
804 ds_state.front.failOp = VK_STENCIL_OP_KEEP;
805 ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
806 ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
807 ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
808 ds_state.front.compareMask = 0xffffffff,
809 ds_state.front.writeMask = 0;
810 ds_state.front.reference = 0;
811 ds_state.back = ds_state.front;
812
813 VkPipelineColorBlendStateCreateInfo color_blend_state;
814 color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
815 color_blend_state.pNext = NULL;
816 color_blend_state.flags = 0;
817 color_blend_state.logicOpEnable = false;
818 color_blend_state.attachmentCount = num_color_attachments;
819 color_blend_state.pAttachments = blend_attachment_states;
820
821 VkDynamicState dynamic_states[9] = {
822 VK_DYNAMIC_STATE_VIEWPORT,
823 VK_DYNAMIC_STATE_SCISSOR,
824 VK_DYNAMIC_STATE_LINE_WIDTH,
825 VK_DYNAMIC_STATE_DEPTH_BIAS,
826 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
827 VK_DYNAMIC_STATE_DEPTH_BOUNDS,
828 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
829 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
830 VK_DYNAMIC_STATE_STENCIL_REFERENCE
831 };
832
833 VkPipelineDynamicStateCreateInfo dynamic_state;
834 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
835 dynamic_state.pNext = NULL;
836 dynamic_state.flags = 0;
837 dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
838 dynamic_state.pDynamicStates = dynamic_states;
839
840 gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
841 gfx_pipeline_info.pNext = NULL;
842 gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
843 gfx_pipeline_info.pVertexInputState = &vs_input;
844 gfx_pipeline_info.pInputAssemblyState = &assembly_state;
845 gfx_pipeline_info.pTessellationState = &tess_state;
846 gfx_pipeline_info.pViewportState = &viewport_state;
847 gfx_pipeline_info.pRasterizationState = &rasterization_state;
848 gfx_pipeline_info.pMultisampleState = &ms_state;
849 gfx_pipeline_info.pDepthStencilState = &ds_state;
850 gfx_pipeline_info.pColorBlendState = &color_blend_state;
851 gfx_pipeline_info.pDynamicState = &dynamic_state;
852 gfx_pipeline_info.subpass = 0;
853
854 /* create the objects used to create the pipeline */
855 VkSubpassDescription subpass;
856 subpass.flags = 0;
857 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
858 subpass.inputAttachmentCount = 0;
859 subpass.pInputAttachments = NULL;
860 subpass.colorAttachmentCount = num_color_attachments;
861 subpass.pColorAttachments = color_attachments;
862 subpass.pResolveAttachments = NULL;
863 subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
864 subpass.preserveAttachmentCount = 0;
865 subpass.pPreserveAttachments = NULL;
866
867 VkRenderPassCreateInfo renderpass_info;
868 renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
869 renderpass_info.pNext = NULL;
870 renderpass_info.flags = 0;
871 renderpass_info.attachmentCount = num_attachments;
872 renderpass_info.pAttachments = attachment_descs;
873 renderpass_info.subpassCount = 1;
874 renderpass_info.pSubpasses = &subpass;
875 renderpass_info.dependencyCount = 0;
876 renderpass_info.pDependencies = NULL;
877
878 ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
879 assert(result == VK_SUCCESS);
880
881 gfx_pipeline_info.layout = pipeline_layout;
882 gfx_pipeline_info.renderPass = render_pass;
883
884 /* create the pipeline */
885 gfx_pipeline_info.pStages = stages;
886
887 result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
888 assert(result == VK_SUCCESS);
889 }
890
create_pipeline()891 void PipelineBuilder::create_pipeline() {
892 unsigned num_desc_layouts = 0;
893 for (unsigned i = 0; i < 64; i++) {
894 if (!(desc_layouts_used & (1ull << i)))
895 continue;
896
897 VkDescriptorSetLayoutCreateInfo desc_layout_info;
898 desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
899 desc_layout_info.pNext = NULL;
900 desc_layout_info.flags = 0;
901 desc_layout_info.bindingCount = num_desc_bindings[i];
902 desc_layout_info.pBindings = desc_bindings[i];
903
904 ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
905 assert(result == VK_SUCCESS);
906 num_desc_layouts++;
907 }
908
909 VkPipelineLayoutCreateInfo pipeline_layout_info;
910 pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
911 pipeline_layout_info.pNext = NULL;
912 pipeline_layout_info.flags = 0;
913 pipeline_layout_info.pushConstantRangeCount = 1;
914 pipeline_layout_info.pPushConstantRanges = &push_constant_range;
915 pipeline_layout_info.setLayoutCount = num_desc_layouts;
916 pipeline_layout_info.pSetLayouts = desc_layouts;
917
918 ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
919 assert(result == VK_SUCCESS);
920
921 if (is_compute())
922 create_compute_pipeline();
923 else
924 create_graphics_pipeline();
925 }
926
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)927 void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
928 {
929 if (!pipeline)
930 create_pipeline();
931 print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
932 }
933