• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 #include "vulkan/vk_format.h"
26 #include "common/amd_family.h"
27 #include <stdio.h>
28 #include <sstream>
29 #include <llvm-c/Target.h>
30 #include <mutex>
31 
32 using namespace aco;
33 
34 extern "C" {
35 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
36 	VkInstance                                  instance,
37 	const char*                                 pName);
38 }
39 
40 ac_shader_config config;
41 aco_shader_info info;
42 std::unique_ptr<Program> program;
43 Builder bld(NULL);
44 Temp inputs[16];
45 
46 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
47 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
48 static std::mutex create_device_mutex;
49 
50 #define FUNCTION_LIST\
51    ITEM(CreateInstance)\
52    ITEM(DestroyInstance)\
53    ITEM(EnumeratePhysicalDevices)\
54    ITEM(GetPhysicalDeviceProperties2)\
55    ITEM(CreateDevice)\
56    ITEM(DestroyDevice)\
57    ITEM(CreateShaderModule)\
58    ITEM(DestroyShaderModule)\
59    ITEM(CreateGraphicsPipelines)\
60    ITEM(CreateComputePipelines)\
61    ITEM(DestroyPipeline)\
62    ITEM(CreateDescriptorSetLayout)\
63    ITEM(DestroyDescriptorSetLayout)\
64    ITEM(CreatePipelineLayout)\
65    ITEM(DestroyPipelineLayout)\
66    ITEM(CreateRenderPass)\
67    ITEM(DestroyRenderPass)\
68    ITEM(GetPipelineExecutablePropertiesKHR)\
69    ITEM(GetPipelineExecutableInternalRepresentationsKHR)
70 
71 #define ITEM(n) PFN_vk##n n;
72 FUNCTION_LIST
73 #undef ITEM
74 
create_program(enum amd_gfx_level gfx_level,Stage stage,unsigned wave_size,enum radeon_family family)75 void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
76 {
77    memset(&config, 0, sizeof(config));
78    info.wave_size = wave_size;
79 
80    program.reset(new Program);
81    aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
82    program->workgroup_size = UINT_MAX;
83    calc_min_waves(program.get());
84 
85    program->debug.func = nullptr;
86    program->debug.private_data = nullptr;
87 
88    program->debug.output = output;
89    program->debug.shorten_messages = true;
90    program->debug.func = nullptr;
91    program->debug.private_data = nullptr;
92 
93    Block *block = program->create_and_insert_block();
94    block->kind = block_kind_top_level;
95 
96    bld = Builder(program.get(), &program->blocks[0]);
97 
98    config.float_mode = program->blocks[0].fp_mode.val;
99 }
100 
setup_cs(const char * input_spec,enum amd_gfx_level gfx_level,enum radeon_family family,const char * subvariant,unsigned wave_size)101 bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
102               enum radeon_family family, const char* subvariant,
103               unsigned wave_size)
104 {
105    if (!set_variant(gfx_level, subvariant))
106       return false;
107 
108    memset(&info, 0, sizeof(info));
109    create_program(gfx_level, compute_cs, wave_size, family);
110 
111    if (input_spec) {
112       std::vector<RegClass> input_classes;
113       while (input_spec[0]) {
114          RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
115          unsigned size = input_spec[1] - '0';
116          bool in_bytes = input_spec[2] == 'b';
117          input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
118 
119          input_spec += 2 + in_bytes;
120          while (input_spec[0] == ' ') input_spec++;
121       }
122 
123       aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
124          aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
125       for (unsigned i = 0; i < input_classes.size(); i++) {
126          inputs[i] = bld.tmp(input_classes[i]);
127          startpgm->definitions[i] = Definition(inputs[i]);
128       }
129       bld.insert(std::move(startpgm));
130    }
131 
132    return true;
133 }
134 
finish_program(Program * prog)135 void finish_program(Program *prog)
136 {
137    for (Block& BB : prog->blocks) {
138       for (unsigned idx : BB.linear_preds)
139          prog->blocks[idx].linear_succs.emplace_back(BB.index);
140       for (unsigned idx : BB.logical_preds)
141          prog->blocks[idx].logical_succs.emplace_back(BB.index);
142    }
143 
144    for (Block& block : prog->blocks) {
145       if (block.linear_succs.size() == 0) {
146          block.kind |= block_kind_uniform;
147          Builder(prog, &block).sopp(aco_opcode::s_endpgm);
148       }
149    }
150 }
151 
finish_validator_test()152 void finish_validator_test()
153 {
154    finish_program(program.get());
155    aco_print_program(program.get(), output);
156    fprintf(output, "Validation results:\n");
157    if (aco::validate_ir(program.get()))
158       fprintf(output, "Validation passed\n");
159    else
160       fprintf(output, "Validation failed\n");
161 }
162 
finish_opt_test()163 void finish_opt_test()
164 {
165    finish_program(program.get());
166    if (!aco::validate_ir(program.get())) {
167       fail_test("Validation before optimization failed");
168       return;
169    }
170    aco::optimize(program.get());
171    if (!aco::validate_ir(program.get())) {
172       fail_test("Validation after optimization failed");
173       return;
174    }
175    aco_print_program(program.get(), output);
176 }
177 
finish_ra_test(ra_test_policy policy,bool lower)178 void finish_ra_test(ra_test_policy policy, bool lower)
179 {
180    finish_program(program.get());
181    if (!aco::validate_ir(program.get())) {
182       fail_test("Validation before register allocation failed");
183       return;
184    }
185 
186    program->workgroup_size = program->wave_size;
187    aco::live live_vars = aco::live_var_analysis(program.get());
188    aco::register_allocation(program.get(), live_vars.live_out, policy);
189 
190    if (aco::validate_ra(program.get())) {
191       fail_test("Validation after register allocation failed");
192       return;
193    }
194 
195    if (lower) {
196       aco::ssa_elimination(program.get());
197       aco::lower_to_hw_instr(program.get());
198    }
199 
200    aco_print_program(program.get(), output);
201 }
202 
finish_optimizer_postRA_test()203 void finish_optimizer_postRA_test()
204 {
205    finish_program(program.get());
206    aco::optimize_postRA(program.get());
207    aco_print_program(program.get(), output);
208 }
209 
finish_to_hw_instr_test()210 void finish_to_hw_instr_test()
211 {
212    finish_program(program.get());
213    aco::lower_to_hw_instr(program.get());
214    aco_print_program(program.get(), output);
215 }
216 
finish_insert_nops_test()217 void finish_insert_nops_test()
218 {
219    finish_program(program.get());
220    aco::insert_NOPs(program.get());
221    aco_print_program(program.get(), output);
222 }
223 
finish_form_hard_clause_test()224 void finish_form_hard_clause_test()
225 {
226    finish_program(program.get());
227    aco::form_hard_clauses(program.get());
228    aco_print_program(program.get(), output);
229 }
230 
finish_assembler_test()231 void finish_assembler_test()
232 {
233    finish_program(program.get());
234    std::vector<uint32_t> binary;
235    unsigned exec_size = emit_program(program.get(), binary);
236 
237    /* we could use CLRX for disassembly but that would require it to be
238     * installed */
239    if (program->gfx_level >= GFX8) {
240       print_asm(program.get(), binary, exec_size / 4u, output);
241    } else {
242       //TODO: maybe we should use CLRX and skip this test if it's not available?
243       for (uint32_t dword : binary)
244          fprintf(output, "%.8x\n", dword);
245    }
246 }
247 
writeout(unsigned i,Temp tmp)248 void writeout(unsigned i, Temp tmp)
249 {
250    if (tmp.id())
251       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
252    else
253       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
254 }
255 
writeout(unsigned i,aco::Builder::Result res)256 void writeout(unsigned i, aco::Builder::Result res)
257 {
258    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
259 }
260 
writeout(unsigned i,Operand op)261 void writeout(unsigned i, Operand op)
262 {
263    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
264 }
265 
writeout(unsigned i,Operand op0,Operand op1)266 void writeout(unsigned i, Operand op0, Operand op1)
267 {
268    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
269 }
270 
fneg(Temp src,Builder b)271 Temp fneg(Temp src, Builder b)
272 {
273    if (src.bytes() == 2)
274       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
275    else
276       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
277 }
278 
fabs(Temp src,Builder b)279 Temp fabs(Temp src, Builder b)
280 {
281    if (src.bytes() == 2) {
282       Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
283       res.instr->vop3().abs[1] = true;
284       return res;
285    } else {
286       Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
287       res.instr->vop3().abs[1] = true;
288       return res;
289    }
290 }
291 
f2f32(Temp src,Builder b)292 Temp f2f32(Temp src, Builder b)
293 {
294    return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
295 }
296 
f2f16(Temp src,Builder b)297 Temp f2f16(Temp src, Builder b)
298 {
299    return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
300 }
301 
u2u16(Temp src,Builder b)302 Temp u2u16(Temp src, Builder b)
303 {
304    return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
305 }
306 
fadd(Temp src0,Temp src1,Builder b)307 Temp fadd(Temp src0, Temp src1, Builder b)
308 {
309    if (src0.bytes() == 2)
310       return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
311    else
312       return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
313 }
314 
fmul(Temp src0,Temp src1,Builder b)315 Temp fmul(Temp src0, Temp src1, Builder b)
316 {
317    if (src0.bytes() == 2)
318       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
319    else
320       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
321 }
322 
fma(Temp src0,Temp src1,Temp src2,Builder b)323 Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
324 {
325    if (src0.bytes() == 2)
326       return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
327    else
328       return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
329 }
330 
fsat(Temp src,Builder b)331 Temp fsat(Temp src, Builder b)
332 {
333    if (src.bytes() == 2)
334       return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
335                     Operand::c16(0x3c00u), src);
336    else
337       return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
338                     Operand::c32(0x3f800000u), src);
339 }
340 
ext_ushort(Temp src,unsigned idx,Builder b)341 Temp ext_ushort(Temp src, unsigned idx, Builder b)
342 {
343    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
344                    Operand::c32(16u), Operand::c32(false));
345 }
346 
ext_ubyte(Temp src,unsigned idx,Builder b)347 Temp ext_ubyte(Temp src, unsigned idx, Builder b)
348 {
349    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
350                    Operand::c32(8u), Operand::c32(false));
351 }
352 
get_vk_device(enum amd_gfx_level gfx_level)353 VkDevice get_vk_device(enum amd_gfx_level gfx_level)
354 {
355    enum radeon_family family;
356    switch (gfx_level) {
357    case GFX6:
358       family = CHIP_TAHITI;
359       break;
360    case GFX7:
361       family = CHIP_BONAIRE;
362       break;
363    case GFX8:
364       family = CHIP_POLARIS10;
365       break;
366    case GFX9:
367       family = CHIP_VEGA10;
368       break;
369    case GFX10:
370       family = CHIP_NAVI10;
371       break;
372    case GFX10_3:
373       family = CHIP_NAVI21;
374       break;
375    case GFX11:
376       family = CHIP_GFX1100;
377       break;
378    default:
379       family = CHIP_UNKNOWN;
380       break;
381    }
382    return get_vk_device(family);
383 }
384 
get_vk_device(enum radeon_family family)385 VkDevice get_vk_device(enum radeon_family family)
386 {
387    assert(family != CHIP_UNKNOWN);
388 
389    std::lock_guard<std::mutex> guard(create_device_mutex);
390 
391    if (device_cache[family])
392       return device_cache[family];
393 
394    setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
395 
396    VkApplicationInfo app_info = {};
397    app_info.pApplicationName = "aco_tests";
398    app_info.apiVersion = VK_API_VERSION_1_2;
399    VkInstanceCreateInfo instance_create_info = {};
400    instance_create_info.pApplicationInfo = &app_info;
401    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
402    ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
403    assert(result == VK_SUCCESS);
404 
405    #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
406    FUNCTION_LIST
407    #undef ITEM
408 
409    uint32_t device_count = 1;
410    VkPhysicalDevice device = VK_NULL_HANDLE;
411    result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
412    assert(result == VK_SUCCESS);
413    assert(device != VK_NULL_HANDLE);
414 
415    VkDeviceCreateInfo device_create_info = {};
416    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
417    static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
418    device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
419    device_create_info.ppEnabledExtensionNames = extensions;
420    result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
421 
422    return device_cache[family];
423 }
424 
425 static struct DestroyDevices {
~DestroyDevicesDestroyDevices426    ~DestroyDevices() {
427       for (unsigned i = 0; i < CHIP_LAST; i++) {
428          if (!device_cache[i])
429             continue;
430          DestroyDevice(device_cache[i], NULL);
431          DestroyInstance(instance_cache[i], NULL);
432       }
433    }
434 } destroy_devices;
435 
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)436 void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
437                        const char *name, bool remove_encoding)
438 {
439    uint32_t executable_count = 16;
440    VkPipelineExecutablePropertiesKHR executables[16];
441    VkPipelineInfoKHR pipeline_info;
442    pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
443    pipeline_info.pNext = NULL;
444    pipeline_info.pipeline = pipeline;
445    ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
446    assert(result == VK_SUCCESS);
447 
448    uint32_t executable = 0;
449    for (; executable < executable_count; executable++) {
450       if (executables[executable].stages == stages)
451          break;
452    }
453    assert(executable != executable_count);
454 
455    VkPipelineExecutableInfoKHR exec_info;
456    exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
457    exec_info.pNext = NULL;
458    exec_info.pipeline = pipeline;
459    exec_info.executableIndex = executable;
460 
461    uint32_t ir_count = 16;
462    VkPipelineExecutableInternalRepresentationKHR ir[16];
463    memset(ir, 0, sizeof(ir));
464    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
465    assert(result == VK_SUCCESS);
466 
467    VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
468    for (unsigned i = 0; i < ir_count; ++i) {
469       if (strcmp(ir[i].name, name) == 0) {
470          requested_ir = &ir[i];
471          break;
472       }
473    }
474    assert(requested_ir && "Could not find requested IR");
475 
476    char *data = (char*)malloc(requested_ir->dataSize);
477    requested_ir->pData = data;
478    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
479    assert(result == VK_SUCCESS);
480 
481    if (remove_encoding) {
482       for (char *c = data; *c; c++) {
483          if (*c == ';') {
484             for (; *c && *c != '\n'; c++)
485                *c = ' ';
486          }
487       }
488    }
489 
490    fprintf(output, "%s", data);
491    free(data);
492 }
493 
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)494 VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
495 {
496     VkShaderModuleCreateInfo vk_module_info;
497     vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
498     vk_module_info.pNext = NULL;
499     vk_module_info.flags = 0;
500     vk_module_info.codeSize = module_info->spirvSize;
501     vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
502 
503     VkShaderModule module;
504     ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
505     assert(result == VK_SUCCESS);
506 
507     return module;
508 }
509 
PipelineBuilder(VkDevice dev)510 PipelineBuilder::PipelineBuilder(VkDevice dev) {
511    memset(this, 0, sizeof(*this));
512    topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
513    device = dev;
514 }
515 
~PipelineBuilder()516 PipelineBuilder::~PipelineBuilder()
517 {
518    DestroyPipeline(device, pipeline, NULL);
519 
520    for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
521       VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
522       if (owned_stages & stage_info->stage)
523          DestroyShaderModule(device, stage_info->module, NULL);
524    }
525 
526    DestroyPipelineLayout(device, pipeline_layout, NULL);
527 
528    for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
529       DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
530 
531    DestroyRenderPass(device, render_pass, NULL);
532 }
533 
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)534 void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
535                                        uint32_t binding, VkDescriptorType type, uint32_t count)
536 {
537    desc_layouts_used |= 1ull << layout;
538    desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
539 }
540 
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)541 void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
542 {
543    vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
544 }
545 
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)546 void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
547 {
548    vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
549 }
550 
add_resource_decls(QoShaderModuleCreateInfo * module)551 void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
552 {
553    for (unsigned i = 0; i < module->declarationCount; i++) {
554       const QoShaderDecl *decl = &module->pDeclarations[i];
555       switch (decl->decl_type) {
556       case QoShaderDeclType_ubo:
557          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
558          break;
559       case QoShaderDeclType_ssbo:
560          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
561          break;
562       case QoShaderDeclType_img_buf:
563          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
564          break;
565       case QoShaderDeclType_img:
566          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
567          break;
568       case QoShaderDeclType_tex_buf:
569          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
570          break;
571       case QoShaderDeclType_combined:
572          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
573          break;
574       case QoShaderDeclType_tex:
575          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
576          break;
577       case QoShaderDeclType_samp:
578          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
579          break;
580       default:
581          break;
582       }
583    }
584 }
585 
add_io_decls(QoShaderModuleCreateInfo * module)586 void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
587 {
588    unsigned next_vtx_offset = 0;
589    for (unsigned i = 0; i < module->declarationCount; i++) {
590       const QoShaderDecl *decl = &module->pDeclarations[i];
591       switch (decl->decl_type) {
592       case QoShaderDeclType_in:
593          if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
594             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
595                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
596             else if (decl->type[0] == 'u')
597                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
598             else if (decl->type[0] == 'i')
599                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
600             next_vtx_offset += 16;
601          }
602          break;
603       case QoShaderDeclType_out:
604          if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
605             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
606                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
607             else if (decl->type[0] == 'u')
608                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
609             else if (decl->type[0] == 'i')
610                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
611          }
612          break;
613       default:
614          break;
615       }
616    }
617    if (next_vtx_offset)
618       add_vertex_binding(0, next_vtx_offset);
619 }
620 
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)621 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
622 {
623    VkPipelineShaderStageCreateInfo *stage_info;
624    if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
625       stage_info = &stages[0];
626    else
627       stage_info = &stages[gfx_pipeline_info.stageCount++];
628    stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
629    stage_info->pNext = NULL;
630    stage_info->flags = 0;
631    stage_info->stage = stage;
632    stage_info->module = module;
633    stage_info->pName = name;
634    stage_info->pSpecializationInfo = NULL;
635    owned_stages |= stage;
636 }
637 
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)638 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
639 {
640    add_stage(stage, __qoCreateShaderModule(device, &module), name);
641    add_resource_decls(&module);
642    add_io_decls(&module);
643 }
644 
add_vsfs(VkShaderModule vs,VkShaderModule fs)645 void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
646 {
647    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
648    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
649 }
650 
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)651 void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
652 {
653    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
654    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
655 }
656 
add_cs(VkShaderModule cs)657 void PipelineBuilder::add_cs(VkShaderModule cs)
658 {
659    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
660 }
661 
add_cs(QoShaderModuleCreateInfo cs)662 void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
663 {
664    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
665 }
666 
is_compute()667 bool PipelineBuilder::is_compute() {
668    return gfx_pipeline_info.stageCount == 0;
669 }
670 
create_compute_pipeline()671 void PipelineBuilder::create_compute_pipeline() {
672    VkComputePipelineCreateInfo create_info;
673    create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
674    create_info.pNext = NULL;
675    create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
676    create_info.stage = stages[0];
677    create_info.layout = pipeline_layout;
678    create_info.basePipelineHandle = VK_NULL_HANDLE;
679    create_info.basePipelineIndex = 0;
680 
681    ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
682    assert(result == VK_SUCCESS);
683 }
684 
create_graphics_pipeline()685 void PipelineBuilder::create_graphics_pipeline() {
686    /* create the create infos */
687    if (!samples)
688       samples = VK_SAMPLE_COUNT_1_BIT;
689 
690    unsigned num_color_attachments = 0;
691    VkPipelineColorBlendAttachmentState blend_attachment_states[16];
692    VkAttachmentReference color_attachments[16];
693    VkAttachmentDescription attachment_descs[17];
694    for (unsigned i = 0; i < 16; i++) {
695       if (color_outputs[i] == VK_FORMAT_UNDEFINED)
696          continue;
697 
698       VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
699       desc->flags = 0;
700       desc->format = color_outputs[i];
701       desc->samples = samples;
702       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
703       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
704       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
705       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
706       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
707       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
708 
709       VkAttachmentReference *ref = &color_attachments[num_color_attachments];
710       ref->attachment = num_color_attachments;
711       ref->layout = VK_IMAGE_LAYOUT_GENERAL;
712 
713       VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
714       blend->blendEnable = false;
715       blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
716                               VK_COLOR_COMPONENT_G_BIT |
717                               VK_COLOR_COMPONENT_B_BIT |
718                               VK_COLOR_COMPONENT_A_BIT;
719 
720       num_color_attachments++;
721    }
722 
723    unsigned num_attachments = num_color_attachments;
724    VkAttachmentReference ds_attachment;
725    if (ds_output != VK_FORMAT_UNDEFINED) {
726       VkAttachmentDescription *desc = &attachment_descs[num_attachments];
727       desc->flags = 0;
728       desc->format = ds_output;
729       desc->samples = samples;
730       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
731       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
732       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
733       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
734       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
735       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
736 
737       ds_attachment.attachment = num_color_attachments;
738       ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
739 
740       num_attachments++;
741    }
742 
743    vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
744    vs_input.pNext = NULL;
745    vs_input.flags = 0;
746    vs_input.pVertexBindingDescriptions = vs_bindings;
747    vs_input.pVertexAttributeDescriptions = vs_attributes;
748 
749    VkPipelineInputAssemblyStateCreateInfo assembly_state;
750    assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
751    assembly_state.pNext = NULL;
752    assembly_state.flags = 0;
753    assembly_state.topology = topology;
754    assembly_state.primitiveRestartEnable = false;
755 
756    VkPipelineTessellationStateCreateInfo tess_state;
757    tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
758    tess_state.pNext = NULL;
759    tess_state.flags = 0;
760    tess_state.patchControlPoints = patch_size;
761 
762    VkPipelineViewportStateCreateInfo viewport_state;
763    viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
764    viewport_state.pNext = NULL;
765    viewport_state.flags = 0;
766    viewport_state.viewportCount = 1;
767    viewport_state.pViewports = NULL;
768    viewport_state.scissorCount = 1;
769    viewport_state.pScissors = NULL;
770 
771    VkPipelineRasterizationStateCreateInfo rasterization_state;
772    rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
773    rasterization_state.pNext = NULL;
774    rasterization_state.flags = 0;
775    rasterization_state.depthClampEnable = false;
776    rasterization_state.rasterizerDiscardEnable = false;
777    rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
778    rasterization_state.cullMode = VK_CULL_MODE_NONE;
779    rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
780    rasterization_state.depthBiasEnable = false;
781    rasterization_state.lineWidth = 1.0;
782 
783    VkPipelineMultisampleStateCreateInfo ms_state;
784    ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
785    ms_state.pNext = NULL;
786    ms_state.flags = 0;
787    ms_state.rasterizationSamples = samples;
788    ms_state.sampleShadingEnable = sample_shading_enable;
789    ms_state.minSampleShading = min_sample_shading;
790    VkSampleMask sample_mask = 0xffffffff;
791    ms_state.pSampleMask = &sample_mask;
792    ms_state.alphaToCoverageEnable = false;
793    ms_state.alphaToOneEnable = false;
794 
795    VkPipelineDepthStencilStateCreateInfo ds_state;
796    ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
797    ds_state.pNext = NULL;
798    ds_state.flags = 0;
799    ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
800    ds_state.depthWriteEnable = true;
801    ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
802    ds_state.depthBoundsTestEnable = false;
803    ds_state.stencilTestEnable = true;
804    ds_state.front.failOp = VK_STENCIL_OP_KEEP;
805    ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
806    ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
807    ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
808    ds_state.front.compareMask = 0xffffffff,
809    ds_state.front.writeMask = 0;
810    ds_state.front.reference = 0;
811    ds_state.back = ds_state.front;
812 
813    VkPipelineColorBlendStateCreateInfo color_blend_state;
814    color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
815    color_blend_state.pNext = NULL;
816    color_blend_state.flags = 0;
817    color_blend_state.logicOpEnable = false;
818    color_blend_state.attachmentCount = num_color_attachments;
819    color_blend_state.pAttachments = blend_attachment_states;
820 
821    VkDynamicState dynamic_states[9] = {
822       VK_DYNAMIC_STATE_VIEWPORT,
823       VK_DYNAMIC_STATE_SCISSOR,
824       VK_DYNAMIC_STATE_LINE_WIDTH,
825       VK_DYNAMIC_STATE_DEPTH_BIAS,
826       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
827       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
828       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
829       VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
830       VK_DYNAMIC_STATE_STENCIL_REFERENCE
831    };
832 
833    VkPipelineDynamicStateCreateInfo dynamic_state;
834    dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
835    dynamic_state.pNext = NULL;
836    dynamic_state.flags = 0;
837    dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
838    dynamic_state.pDynamicStates = dynamic_states;
839 
840    gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
841    gfx_pipeline_info.pNext = NULL;
842    gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
843    gfx_pipeline_info.pVertexInputState = &vs_input;
844    gfx_pipeline_info.pInputAssemblyState = &assembly_state;
845    gfx_pipeline_info.pTessellationState = &tess_state;
846    gfx_pipeline_info.pViewportState = &viewport_state;
847    gfx_pipeline_info.pRasterizationState = &rasterization_state;
848    gfx_pipeline_info.pMultisampleState = &ms_state;
849    gfx_pipeline_info.pDepthStencilState = &ds_state;
850    gfx_pipeline_info.pColorBlendState = &color_blend_state;
851    gfx_pipeline_info.pDynamicState = &dynamic_state;
852    gfx_pipeline_info.subpass = 0;
853 
854    /* create the objects used to create the pipeline */
855    VkSubpassDescription subpass;
856    subpass.flags = 0;
857    subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
858    subpass.inputAttachmentCount = 0;
859    subpass.pInputAttachments = NULL;
860    subpass.colorAttachmentCount = num_color_attachments;
861    subpass.pColorAttachments = color_attachments;
862    subpass.pResolveAttachments = NULL;
863    subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
864    subpass.preserveAttachmentCount = 0;
865    subpass.pPreserveAttachments = NULL;
866 
867    VkRenderPassCreateInfo renderpass_info;
868    renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
869    renderpass_info.pNext = NULL;
870    renderpass_info.flags = 0;
871    renderpass_info.attachmentCount = num_attachments;
872    renderpass_info.pAttachments = attachment_descs;
873    renderpass_info.subpassCount = 1;
874    renderpass_info.pSubpasses = &subpass;
875    renderpass_info.dependencyCount = 0;
876    renderpass_info.pDependencies = NULL;
877 
878    ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
879    assert(result == VK_SUCCESS);
880 
881    gfx_pipeline_info.layout = pipeline_layout;
882    gfx_pipeline_info.renderPass = render_pass;
883 
884    /* create the pipeline */
885    gfx_pipeline_info.pStages = stages;
886 
887    result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
888    assert(result == VK_SUCCESS);
889 }
890 
create_pipeline()891 void PipelineBuilder::create_pipeline() {
892    unsigned num_desc_layouts = 0;
893    for (unsigned i = 0; i < 64; i++) {
894       if (!(desc_layouts_used & (1ull << i)))
895          continue;
896 
897       VkDescriptorSetLayoutCreateInfo desc_layout_info;
898       desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
899       desc_layout_info.pNext = NULL;
900       desc_layout_info.flags = 0;
901       desc_layout_info.bindingCount = num_desc_bindings[i];
902       desc_layout_info.pBindings = desc_bindings[i];
903 
904       ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
905       assert(result == VK_SUCCESS);
906       num_desc_layouts++;
907    }
908 
909    VkPipelineLayoutCreateInfo pipeline_layout_info;
910    pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
911    pipeline_layout_info.pNext = NULL;
912    pipeline_layout_info.flags = 0;
913    pipeline_layout_info.pushConstantRangeCount = 1;
914    pipeline_layout_info.pPushConstantRanges = &push_constant_range;
915    pipeline_layout_info.setLayoutCount = num_desc_layouts;
916    pipeline_layout_info.pSetLayouts = desc_layouts;
917 
918    ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
919    assert(result == VK_SUCCESS);
920 
921    if (is_compute())
922       create_compute_pipeline();
923    else
924       create_graphics_pipeline();
925 }
926 
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)927 void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
928 {
929    if (!pipeline)
930       create_pipeline();
931    print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
932 }
933