• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 
8 #include "common/amd_family.h"
9 #include "common/nir/ac_nir.h"
10 #include "vk_format.h"
11 
12 #include <llvm-c/Target.h>
13 
14 #include <mutex>
15 #include <sstream>
16 #include <stdio.h>
17 
18 using namespace aco;
19 
20 extern "C" {
21 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
22 }
23 
24 ac_shader_config config;
25 aco_shader_info info;
26 std::unique_ptr<Program> program;
27 Builder bld(NULL);
28 Temp inputs[16];
29 
30 static radeon_info rad_info;
31 static nir_shader_compiler_options nir_options;
32 static nir_builder _nb;
33 nir_builder *nb;
34 
35 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
36 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
37 static std::mutex create_device_mutex;
38 
39 #define FUNCTION_LIST                                                                              \
40    ITEM(CreateInstance)                                                                            \
41    ITEM(DestroyInstance)                                                                           \
42    ITEM(EnumeratePhysicalDevices)                                                                  \
43    ITEM(GetPhysicalDeviceProperties2)                                                              \
44    ITEM(CreateDevice)                                                                              \
45    ITEM(DestroyDevice)                                                                             \
46    ITEM(CreateShaderModule)                                                                        \
47    ITEM(DestroyShaderModule)                                                                       \
48    ITEM(CreateGraphicsPipelines)                                                                   \
49    ITEM(CreateComputePipelines)                                                                    \
50    ITEM(DestroyPipeline)                                                                           \
51    ITEM(CreateDescriptorSetLayout)                                                                 \
52    ITEM(DestroyDescriptorSetLayout)                                                                \
53    ITEM(CreatePipelineLayout)                                                                      \
54    ITEM(DestroyPipelineLayout)                                                                     \
55    ITEM(CreateRenderPass)                                                                          \
56    ITEM(DestroyRenderPass)                                                                         \
57    ITEM(GetPipelineExecutablePropertiesKHR)                                                        \
58    ITEM(GetPipelineExecutableInternalRepresentationsKHR)
59 
60 #define ITEM(n) PFN_vk##n n;
61 FUNCTION_LIST
62 #undef ITEM
63 
64 void
create_program(enum amd_gfx_level gfx_level,Stage stage,unsigned wave_size,enum radeon_family family)65 create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
66                enum radeon_family family)
67 {
68    memset(&config, 0, sizeof(config));
69    info.wave_size = wave_size;
70 
71    program.reset(new Program);
72    aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
73    program->workgroup_size = UINT_MAX;
74    calc_min_waves(program.get());
75 
76    program->debug.func = nullptr;
77    program->debug.private_data = nullptr;
78 
79    program->debug.output = output;
80    program->debug.shorten_messages = true;
81    program->debug.func = nullptr;
82    program->debug.private_data = nullptr;
83 
84    Block* block = program->create_and_insert_block();
85    block->kind = block_kind_top_level;
86 
87    bld = Builder(program.get(), &program->blocks[0]);
88 
89    config.float_mode = program->blocks[0].fp_mode.val;
90 }
91 
92 bool
setup_cs(const char * input_spec,enum amd_gfx_level gfx_level,enum radeon_family family,const char * subvariant,unsigned wave_size)93 setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
94          const char* subvariant, unsigned wave_size)
95 {
96    if (!set_variant(gfx_level, subvariant))
97       return false;
98 
99    memset(&info, 0, sizeof(info));
100    create_program(gfx_level, compute_cs, wave_size, family);
101 
102    if (input_spec) {
103       std::vector<RegClass> input_classes;
104       while (input_spec[0]) {
105          RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
106          unsigned size = input_spec[1] - '0';
107          bool in_bytes = input_spec[2] == 'b';
108          input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
109 
110          input_spec += 2 + in_bytes;
111          while (input_spec[0] == ' ')
112             input_spec++;
113       }
114 
115       aco_ptr<Instruction> startpgm{
116          create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
117       for (unsigned i = 0; i < input_classes.size(); i++) {
118          inputs[i] = bld.tmp(input_classes[i]);
119          startpgm->definitions[i] = Definition(inputs[i]);
120       }
121       bld.insert(std::move(startpgm));
122    }
123 
124    return true;
125 }
126 
127 bool
setup_nir_cs(enum amd_gfx_level gfx_level,gl_shader_stage stage,enum radeon_family family,const char * subvariant)128 setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_family family, const char* subvariant)
129 {
130    if (!set_variant(gfx_level, subvariant))
131       return false;
132 
133    if (family == CHIP_UNKNOWN) {
134       switch (gfx_level) {
135       case GFX6: family = CHIP_TAHITI; break;
136       case GFX7: family = CHIP_BONAIRE; break;
137       case GFX8: family = CHIP_POLARIS10; break;
138       case GFX9: family = CHIP_VEGA10; break;
139       case GFX10: family = CHIP_NAVI10; break;
140       case GFX10_3: family = CHIP_NAVI21; break;
141       case GFX11: family = CHIP_NAVI31; break;
142       default: family = CHIP_UNKNOWN; break;
143       }
144    }
145 
146    memset(&rad_info, 0, sizeof(rad_info));
147    rad_info.gfx_level = gfx_level;
148    rad_info.family = family;
149 
150    memset(&nir_options, 0, sizeof(nir_options));
151    ac_nir_set_options(&rad_info, false, &nir_options);
152 
153    glsl_type_singleton_init_or_ref();
154 
155    _nb = nir_builder_init_simple_shader(stage, &nir_options, "aco_test");
156    nb = &_nb;
157 
158    return true;
159 }
160 
161 void
finish_program(Program * prog,bool endpgm,bool dominance)162 finish_program(Program* prog, bool endpgm, bool dominance)
163 {
164    for (Block& BB : prog->blocks) {
165       for (unsigned idx : BB.linear_preds)
166          prog->blocks[idx].linear_succs.emplace_back(BB.index);
167       for (unsigned idx : BB.logical_preds)
168          prog->blocks[idx].logical_succs.emplace_back(BB.index);
169    }
170 
171    for (Block& block : prog->blocks) {
172       if (block.linear_succs.size() == 0) {
173          block.kind |= block_kind_uniform;
174          if (endpgm)
175             Builder(prog, &block).sopp(aco_opcode::s_endpgm);
176       }
177    }
178 
179    if (dominance)
180       dominator_tree(program.get());
181 }
182 
183 void
finish_validator_test()184 finish_validator_test()
185 {
186    finish_program(program.get(), true, true);
187    aco_print_program(program.get(), output);
188    fprintf(output, "Validation results:\n");
189    if (aco::validate_ir(program.get()))
190       fprintf(output, "Validation passed\n");
191    else
192       fprintf(output, "Validation failed\n");
193 }
194 
195 void
finish_opt_test()196 finish_opt_test()
197 {
198    finish_program(program.get(), true, true);
199    if (!aco::validate_ir(program.get())) {
200       fail_test("Validation before optimization failed");
201       return;
202    }
203    aco::optimize(program.get());
204    if (!aco::validate_ir(program.get())) {
205       fail_test("Validation after optimization failed");
206       return;
207    }
208    aco_print_program(program.get(), output);
209 }
210 
211 void
finish_setup_reduce_temp_test()212 finish_setup_reduce_temp_test()
213 {
214    finish_program(program.get(), true, true);
215    if (!aco::validate_ir(program.get())) {
216       fail_test("Validation before setup_reduce_temp failed");
217       return;
218    }
219    aco::setup_reduce_temp(program.get());
220    if (!aco::validate_ir(program.get())) {
221       fail_test("Validation after setup_reduce_temp failed");
222       return;
223    }
224    aco_print_program(program.get(), output);
225 }
226 
227 void
finish_lower_subdword_test()228 finish_lower_subdword_test()
229 {
230    finish_program(program.get(), true, true);
231    if (!aco::validate_ir(program.get())) {
232       fail_test("Validation before lower_subdword failed");
233       return;
234    }
235    aco::lower_subdword(program.get());
236    if (!aco::validate_ir(program.get())) {
237       fail_test("Validation after lower_subdword failed");
238       return;
239    }
240    aco_print_program(program.get(), output);
241 }
242 
243 void
finish_ra_test(ra_test_policy policy)244 finish_ra_test(ra_test_policy policy)
245 {
246    finish_program(program.get(), true, true);
247    if (!aco::validate_ir(program.get())) {
248       fail_test("Validation before register allocation failed");
249       return;
250    }
251 
252    program->workgroup_size = program->wave_size;
253    aco::live_var_analysis(program.get());
254    aco::register_allocation(program.get(), policy);
255 
256    if (aco::validate_ra(program.get())) {
257       fail_test("Validation after register allocation failed");
258       return;
259    }
260 
261    aco_print_program(program.get(), output);
262 }
263 
264 void
finish_optimizer_postRA_test()265 finish_optimizer_postRA_test()
266 {
267    finish_program(program.get(), true, true);
268 
269    if (!aco::validate_ir(program.get())) {
270       fail_test("Validation before optimize_postRA failed");
271       return;
272    }
273 
274    aco::optimize_postRA(program.get());
275 
276    if (!aco::validate_ir(program.get())) {
277       fail_test("Validation after optimize_postRA failed");
278       return;
279    }
280 
281    aco_print_program(program.get(), output);
282 }
283 
284 void
finish_to_hw_instr_test()285 finish_to_hw_instr_test()
286 {
287    finish_program(program.get(), true, true);
288 
289    if (!aco::validate_ir(program.get())) {
290       fail_test("Validation before lower_to_hw_instr failed");
291       return;
292    }
293 
294    aco::lower_to_hw_instr(program.get());
295 
296    if (!aco::validate_ir(program.get())) {
297       fail_test("Validation after lower_to_hw_instr failed");
298       return;
299    }
300 
301    aco_print_program(program.get(), output);
302 }
303 
304 void
finish_schedule_vopd_test()305 finish_schedule_vopd_test()
306 {
307    finish_program(program.get());
308    aco::schedule_vopd(program.get());
309    aco_print_program(program.get(), output);
310 }
311 
312 void
finish_waitcnt_test()313 finish_waitcnt_test()
314 {
315    finish_program(program.get());
316    aco::insert_waitcnt(program.get());
317    aco_print_program(program.get(), output);
318 }
319 
320 void
finish_insert_nops_test(bool endpgm)321 finish_insert_nops_test(bool endpgm)
322 {
323    finish_program(program.get(), endpgm);
324    aco::insert_NOPs(program.get());
325    aco_print_program(program.get(), output);
326 }
327 
328 void
finish_form_hard_clause_test()329 finish_form_hard_clause_test()
330 {
331    finish_program(program.get());
332    aco::form_hard_clauses(program.get());
333    aco_print_program(program.get(), output);
334 }
335 
336 void
finish_assembler_test()337 finish_assembler_test()
338 {
339    finish_program(program.get());
340    std::vector<uint32_t> binary;
341    unsigned exec_size = emit_program(program.get(), binary);
342 
343    /* we could use CLRX for disassembly but that would require it to be
344     * installed */
345    if (program->gfx_level >= GFX8) {
346       print_asm(program.get(), binary, exec_size / 4u, output);
347    } else {
348       // TODO: maybe we should use CLRX and skip this test if it's not available?
349       for (uint32_t dword : binary)
350          fprintf(output, "%.8x\n", dword);
351    }
352 }
353 
354 void
live_var_analysis_debug_func(void * private_data,enum aco_compiler_debug_level level,const char * message)355 live_var_analysis_debug_func(void* private_data, enum aco_compiler_debug_level level, const char* message)
356 {
357    if (level == ACO_COMPILER_DEBUG_LEVEL_ERROR)
358       *(bool *)private_data = true;
359 }
360 
361 void
finish_isel_test(enum ac_hw_stage hw_stage,unsigned wave_size)362 finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size)
363 {
364    nir_validate_shader(nb->shader, "in finish_isel_test");
365    nir_validate_ssa_dominance(nb->shader, "in finish_isel_test");
366 
367    program.reset(new Program);
368    program->debug.func = nullptr;
369    program->debug.private_data = nullptr;
370 
371    ac_shader_args args = {};
372 
373    aco_compiler_options options = {};
374    options.family = rad_info.family;
375    options.gfx_level = rad_info.gfx_level;
376 
377    memset(&info, 0, sizeof(info));
378    info.hw_stage = hw_stage;
379    info.wave_size = wave_size;
380    info.workgroup_size = nb->shader->info.workgroup_size[0] * nb->shader->info.workgroup_size[1] * nb->shader->info.workgroup_size[2];
381 
382    memset(&config, 0, sizeof(config));
383 
384    select_program(program.get(), 1, &nb->shader, &config, &options, &info, &args);
385    dominator_tree(program.get());
386    if (program->should_repair_ssa)
387       repair_ssa(program.get());
388    lower_phis(program.get());
389 
390    ralloc_free(nb->shader);
391    glsl_type_singleton_decref();
392 
393    aco_print_program(program.get(), output);
394 
395    if (!aco::validate_ir(program.get())) {
396       fail_test("Validation after instruction selection failed");
397       return;
398    }
399    if (!aco::validate_cfg(program.get())) {
400       fail_test("Invalidate CFG");
401       return;
402    }
403 
404    bool live_var_fail = false;
405    program->debug.func = &live_var_analysis_debug_func;
406    program->debug.private_data = &live_var_fail;
407    aco::live_var_analysis(program.get());
408    if (live_var_fail) {
409       fail_test("Live var analysis failed");
410       return;
411    }
412 }
413 
414 void
writeout(unsigned i,Temp tmp)415 writeout(unsigned i, Temp tmp)
416 {
417    if (tmp.id())
418       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
419    else
420       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
421 }
422 
423 void
writeout(unsigned i,aco::Builder::Result res)424 writeout(unsigned i, aco::Builder::Result res)
425 {
426    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
427 }
428 
429 void
writeout(unsigned i,Operand op)430 writeout(unsigned i, Operand op)
431 {
432    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
433 }
434 
435 void
writeout(unsigned i,Operand op0,Operand op1)436 writeout(unsigned i, Operand op0, Operand op1)
437 {
438    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
439 }
440 
441 Temp
fneg(Temp src,Builder b)442 fneg(Temp src, Builder b)
443 {
444    if (src.bytes() == 2)
445       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
446    else
447       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
448 }
449 
450 Temp
fabs(Temp src,Builder b)451 fabs(Temp src, Builder b)
452 {
453    if (src.bytes() == 2) {
454       Builder::Result res =
455          b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
456       res->valu().abs[1] = true;
457       return res;
458    } else {
459       Builder::Result res =
460          b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
461       res->valu().abs[1] = true;
462       return res;
463    }
464 }
465 
466 Temp
f2f32(Temp src,Builder b)467 f2f32(Temp src, Builder b)
468 {
469    return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
470 }
471 
472 Temp
f2f16(Temp src,Builder b)473 f2f16(Temp src, Builder b)
474 {
475    return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
476 }
477 
478 Temp
u2u16(Temp src,Builder b)479 u2u16(Temp src, Builder b)
480 {
481    return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
482 }
483 
484 Temp
fadd(Temp src0,Temp src1,Builder b)485 fadd(Temp src0, Temp src1, Builder b)
486 {
487    if (src0.bytes() == 2)
488       return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
489    else
490       return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
491 }
492 
493 Temp
fmul(Temp src0,Temp src1,Builder b)494 fmul(Temp src0, Temp src1, Builder b)
495 {
496    if (src0.bytes() == 2)
497       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
498    else
499       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
500 }
501 
502 Temp
fma(Temp src0,Temp src1,Temp src2,Builder b)503 fma(Temp src0, Temp src1, Temp src2, Builder b)
504 {
505    if (src0.bytes() == 2)
506       return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
507    else
508       return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
509 }
510 
511 Temp
fsat(Temp src,Builder b)512 fsat(Temp src, Builder b)
513 {
514    if (src.bytes() == 2)
515       return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
516                     src);
517    else
518       return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
519                     src);
520 }
521 
522 Temp
fmin(Temp src0,Temp src1,Builder b)523 fmin(Temp src0, Temp src1, Builder b)
524 {
525    return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
526 }
527 
528 Temp
fmax(Temp src0,Temp src1,Builder b)529 fmax(Temp src0, Temp src1, Builder b)
530 {
531    return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
532 }
533 
534 static Temp
extract(Temp src,unsigned idx,unsigned size,bool sign_extend,Builder b)535 extract(Temp src, unsigned idx, unsigned size, bool sign_extend, Builder b)
536 {
537    if (src.type() == RegType::sgpr)
538       return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), bld.def(s1, scc), src,
539                       Operand::c32(idx), Operand::c32(size), Operand::c32(sign_extend));
540    else
541       return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
542                       Operand::c32(size), Operand::c32(sign_extend));
543 }
544 
545 Temp
ext_ushort(Temp src,unsigned idx,Builder b)546 ext_ushort(Temp src, unsigned idx, Builder b)
547 {
548    return extract(src, idx, 16, false, b);
549 }
550 
551 Temp
ext_sshort(Temp src,unsigned idx,Builder b)552 ext_sshort(Temp src, unsigned idx, Builder b)
553 {
554    return extract(src, idx, 16, true, b);
555 }
556 
557 Temp
ext_ubyte(Temp src,unsigned idx,Builder b)558 ext_ubyte(Temp src, unsigned idx, Builder b)
559 {
560    return extract(src, idx, 8, false, b);
561 }
562 
563 Temp
ext_sbyte(Temp src,unsigned idx,Builder b)564 ext_sbyte(Temp src, unsigned idx, Builder b)
565 {
566    return extract(src, idx, 8, true, b);
567 }
568 
569 void
emit_divergent_if_else(Program * prog,aco::Builder & b,Operand cond,std::function<void ()> then,std::function<void ()> els)570 emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
571                        std::function<void()> els)
572 {
573    prog->blocks.reserve(prog->blocks.size() + 6);
574 
575    Block* if_block = &prog->blocks.back();
576    Block* then_logical = prog->create_and_insert_block();
577    Block* then_linear = prog->create_and_insert_block();
578    Block* invert = prog->create_and_insert_block();
579    Block* else_logical = prog->create_and_insert_block();
580    Block* else_linear = prog->create_and_insert_block();
581    Block* endif_block = prog->create_and_insert_block();
582 
583    if_block->kind |= block_kind_branch;
584    invert->kind |= block_kind_invert;
585    endif_block->kind |= block_kind_merge | (if_block->kind & block_kind_top_level);
586 
587    /* Set up logical CF */
588    then_logical->logical_preds.push_back(if_block->index);
589    else_logical->logical_preds.push_back(if_block->index);
590    endif_block->logical_preds.push_back(then_logical->index);
591    endif_block->logical_preds.push_back(else_logical->index);
592 
593    /* Set up linear CF */
594    then_logical->linear_preds.push_back(if_block->index);
595    then_linear->linear_preds.push_back(if_block->index);
596    invert->linear_preds.push_back(then_logical->index);
597    invert->linear_preds.push_back(then_linear->index);
598    else_logical->linear_preds.push_back(invert->index);
599    else_linear->linear_preds.push_back(invert->index);
600    endif_block->linear_preds.push_back(else_logical->index);
601    endif_block->linear_preds.push_back(else_linear->index);
602 
603    PhysReg saved_exec_reg(84);
604 
605    b.reset(if_block);
606    Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
607                             Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
608    b.branch(aco_opcode::p_cbranch_nz, then_logical->index, then_linear->index);
609 
610    b.reset(then_logical);
611    b.pseudo(aco_opcode::p_logical_start);
612    then();
613    b.pseudo(aco_opcode::p_logical_end);
614    b.branch(aco_opcode::p_branch, invert->index);
615 
616    b.reset(then_linear);
617    b.branch(aco_opcode::p_branch, invert->index);
618 
619    b.reset(invert);
620    b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
621           Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
622    b.branch(aco_opcode::p_cbranch_nz, else_logical->index, else_linear->index);
623 
624    b.reset(else_logical);
625    b.pseudo(aco_opcode::p_logical_start);
626    els();
627    b.pseudo(aco_opcode::p_logical_end);
628    b.branch(aco_opcode::p_branch, endif_block->index);
629 
630    b.reset(else_linear);
631    b.branch(aco_opcode::p_branch, endif_block->index);
632 
633    b.reset(endif_block);
634    b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
635             Operand(saved_exec, saved_exec_reg));
636 }
637 
638 VkDevice
get_vk_device(enum amd_gfx_level gfx_level)639 get_vk_device(enum amd_gfx_level gfx_level)
640 {
641    enum radeon_family family;
642    switch (gfx_level) {
643    case GFX6: family = CHIP_TAHITI; break;
644    case GFX7: family = CHIP_BONAIRE; break;
645    case GFX8: family = CHIP_POLARIS10; break;
646    case GFX9: family = CHIP_VEGA10; break;
647    case GFX10: family = CHIP_NAVI10; break;
648    case GFX10_3: family = CHIP_NAVI21; break;
649    case GFX11: family = CHIP_NAVI31; break;
650    case GFX12: family = CHIP_GFX1200; break;
651    default: family = CHIP_UNKNOWN; break;
652    }
653    return get_vk_device(family);
654 }
655 
656 VkDevice
get_vk_device(enum radeon_family family)657 get_vk_device(enum radeon_family family)
658 {
659    assert(family != CHIP_UNKNOWN);
660 
661    std::lock_guard<std::mutex> guard(create_device_mutex);
662 
663    if (device_cache[family])
664       return device_cache[family];
665 
666    setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
667 
668    VkApplicationInfo app_info = {};
669    app_info.pApplicationName = "aco_tests";
670    app_info.apiVersion = VK_API_VERSION_1_2;
671    VkInstanceCreateInfo instance_create_info = {};
672    instance_create_info.pApplicationInfo = &app_info;
673    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
674    ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
675       NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
676    assert(result == VK_SUCCESS);
677 
678 #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
679    FUNCTION_LIST
680 #undef ITEM
681 
682    uint32_t device_count = 1;
683    VkPhysicalDevice device = VK_NULL_HANDLE;
684    result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
685    assert(result == VK_SUCCESS);
686    assert(device != VK_NULL_HANDLE);
687 
688    VkDeviceCreateInfo device_create_info = {};
689    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
690    static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
691    device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
692    device_create_info.ppEnabledExtensionNames = extensions;
693    result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
694 
695    return device_cache[family];
696 }
697 
698 static struct DestroyDevices {
~DestroyDevicesDestroyDevices699    ~DestroyDevices()
700    {
701       for (unsigned i = 0; i < CHIP_LAST; i++) {
702          if (!device_cache[i])
703             continue;
704          DestroyDevice(device_cache[i], NULL);
705          DestroyInstance(instance_cache[i], NULL);
706       }
707    }
708 } destroy_devices;
709 
710 void
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)711 print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
712                   const char* name, bool remove_encoding)
713 {
714    uint32_t executable_count = 16;
715    VkPipelineExecutablePropertiesKHR executables[16];
716    VkPipelineInfoKHR pipeline_info;
717    pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
718    pipeline_info.pNext = NULL;
719    pipeline_info.pipeline = pipeline;
720    ASSERTED VkResult result =
721       GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
722    assert(result == VK_SUCCESS);
723 
724    uint32_t executable = 0;
725    for (; executable < executable_count; executable++) {
726       if (executables[executable].stages == stages)
727          break;
728    }
729    assert(executable != executable_count);
730 
731    VkPipelineExecutableInfoKHR exec_info;
732    exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
733    exec_info.pNext = NULL;
734    exec_info.pipeline = pipeline;
735    exec_info.executableIndex = executable;
736 
737    uint32_t ir_count = 16;
738    VkPipelineExecutableInternalRepresentationKHR ir[16];
739    memset(ir, 0, sizeof(ir));
740    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
741    assert(result == VK_SUCCESS);
742 
743    VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
744    for (unsigned i = 0; i < ir_count; ++i) {
745       if (strcmp(ir[i].name, name) == 0) {
746          requested_ir = &ir[i];
747          break;
748       }
749    }
750    assert(requested_ir && "Could not find requested IR");
751 
752    char* data = (char*)malloc(requested_ir->dataSize);
753    requested_ir->pData = data;
754    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
755    assert(result == VK_SUCCESS);
756 
757    if (remove_encoding) {
758       for (char* c = data; *c; c++) {
759          if (*c == ';') {
760             for (; *c && *c != '\n'; c++)
761                *c = ' ';
762          }
763       }
764    }
765 
766    fprintf(output, "%s", data);
767    free(data);
768 }
769 
770 VkShaderModule
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)771 __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
772 {
773    VkShaderModuleCreateInfo vk_module_info;
774    vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
775    vk_module_info.pNext = NULL;
776    vk_module_info.flags = 0;
777    vk_module_info.codeSize = module_info->spirvSize;
778    vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
779 
780    VkShaderModule module;
781    ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
782    assert(result == VK_SUCCESS);
783 
784    return module;
785 }
786 
PipelineBuilder(VkDevice dev)787 PipelineBuilder::PipelineBuilder(VkDevice dev)
788 {
789    memset(this, 0, sizeof(*this));
790    topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
791    device = dev;
792 }
793 
~PipelineBuilder()794 PipelineBuilder::~PipelineBuilder()
795 {
796    DestroyPipeline(device, pipeline, NULL);
797 
798    for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
799       VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
800       if (owned_stages & stage_info->stage)
801          DestroyShaderModule(device, stage_info->module, NULL);
802    }
803 
804    DestroyPipelineLayout(device, pipeline_layout, NULL);
805 
806    for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
807       DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
808 
809    DestroyRenderPass(device, render_pass, NULL);
810 }
811 
812 void
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)813 PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
814                                   VkDescriptorType type, uint32_t count)
815 {
816    desc_layouts_used |= 1ull << layout;
817    desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
818 }
819 
820 void
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)821 PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
822 {
823    vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
824 }
825 
826 void
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)827 PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
828                                       uint32_t offset)
829 {
830    vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
831 }
832 
833 void
add_resource_decls(QoShaderModuleCreateInfo * module)834 PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
835 {
836    for (unsigned i = 0; i < module->declarationCount; i++) {
837       const QoShaderDecl* decl = &module->pDeclarations[i];
838       switch (decl->decl_type) {
839       case QoShaderDeclType_ubo:
840          add_desc_binding(module->stage, decl->set, decl->binding,
841                           VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
842          break;
843       case QoShaderDeclType_ssbo:
844          add_desc_binding(module->stage, decl->set, decl->binding,
845                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
846          break;
847       case QoShaderDeclType_img_buf:
848          add_desc_binding(module->stage, decl->set, decl->binding,
849                           VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
850          break;
851       case QoShaderDeclType_img:
852          add_desc_binding(module->stage, decl->set, decl->binding,
853                           VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
854          break;
855       case QoShaderDeclType_tex_buf:
856          add_desc_binding(module->stage, decl->set, decl->binding,
857                           VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
858          break;
859       case QoShaderDeclType_combined:
860          add_desc_binding(module->stage, decl->set, decl->binding,
861                           VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
862          break;
863       case QoShaderDeclType_tex:
864          add_desc_binding(module->stage, decl->set, decl->binding,
865                           VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
866          break;
867       case QoShaderDeclType_samp:
868          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
869          break;
870       default: break;
871       }
872    }
873 }
874 
875 void
add_io_decls(QoShaderModuleCreateInfo * module)876 PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
877 {
878    unsigned next_vtx_offset = 0;
879    for (unsigned i = 0; i < module->declarationCount; i++) {
880       const QoShaderDecl* decl = &module->pDeclarations[i];
881       switch (decl->decl_type) {
882       case QoShaderDeclType_in:
883          if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
884             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
885                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
886                                     next_vtx_offset);
887             else if (decl->type[0] == 'u')
888                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
889                                     next_vtx_offset);
890             else if (decl->type[0] == 'i')
891                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
892                                     next_vtx_offset);
893             next_vtx_offset += 16;
894          }
895          break;
896       case QoShaderDeclType_out:
897          if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
898             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
899                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
900             else if (decl->type[0] == 'u')
901                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
902             else if (decl->type[0] == 'i')
903                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
904          }
905          break;
906       default: break;
907       }
908    }
909    if (next_vtx_offset)
910       add_vertex_binding(0, next_vtx_offset);
911 }
912 
913 void
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)914 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
915 {
916    VkPipelineShaderStageCreateInfo* stage_info;
917    if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
918       stage_info = &stages[0];
919    else
920       stage_info = &stages[gfx_pipeline_info.stageCount++];
921    stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
922    stage_info->pNext = NULL;
923    stage_info->flags = 0;
924    stage_info->stage = stage;
925    stage_info->module = module;
926    stage_info->pName = name;
927    stage_info->pSpecializationInfo = NULL;
928    owned_stages |= stage;
929 }
930 
931 void
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)932 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
933                            const char* name)
934 {
935    add_stage(stage, __qoCreateShaderModule(device, &module), name);
936    add_resource_decls(&module);
937    add_io_decls(&module);
938 }
939 
940 void
add_vsfs(VkShaderModule vs,VkShaderModule fs)941 PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
942 {
943    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
944    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
945 }
946 
947 void
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)948 PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
949 {
950    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
951    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
952 }
953 
954 void
add_cs(VkShaderModule cs)955 PipelineBuilder::add_cs(VkShaderModule cs)
956 {
957    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
958 }
959 
960 void
add_cs(QoShaderModuleCreateInfo cs)961 PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
962 {
963    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
964 }
965 
966 bool
is_compute()967 PipelineBuilder::is_compute()
968 {
969    return gfx_pipeline_info.stageCount == 0;
970 }
971 
972 void
create_compute_pipeline()973 PipelineBuilder::create_compute_pipeline()
974 {
975    VkComputePipelineCreateInfo create_info;
976    create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
977    create_info.pNext = NULL;
978    create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
979    create_info.stage = stages[0];
980    create_info.layout = pipeline_layout;
981    create_info.basePipelineHandle = VK_NULL_HANDLE;
982    create_info.basePipelineIndex = 0;
983 
984    ASSERTED VkResult result =
985       CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
986    assert(result == VK_SUCCESS);
987 }
988 
989 void
create_graphics_pipeline()990 PipelineBuilder::create_graphics_pipeline()
991 {
992    /* create the create infos */
993    if (!samples)
994       samples = VK_SAMPLE_COUNT_1_BIT;
995 
996    unsigned num_color_attachments = 0;
997    VkPipelineColorBlendAttachmentState blend_attachment_states[16];
998    VkAttachmentReference color_attachments[16];
999    VkAttachmentDescription attachment_descs[17];
1000    for (unsigned i = 0; i < 16; i++) {
1001       if (color_outputs[i] == VK_FORMAT_UNDEFINED)
1002          continue;
1003 
1004       VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
1005       desc->flags = 0;
1006       desc->format = color_outputs[i];
1007       desc->samples = samples;
1008       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1009       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1010       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1011       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
1012       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
1013       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
1014 
1015       VkAttachmentReference* ref = &color_attachments[num_color_attachments];
1016       ref->attachment = num_color_attachments;
1017       ref->layout = VK_IMAGE_LAYOUT_GENERAL;
1018 
1019       VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
1020       blend->blendEnable = false;
1021       blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
1022                               VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
1023 
1024       num_color_attachments++;
1025    }
1026 
1027    unsigned num_attachments = num_color_attachments;
1028    VkAttachmentReference ds_attachment;
1029    if (ds_output != VK_FORMAT_UNDEFINED) {
1030       VkAttachmentDescription* desc = &attachment_descs[num_attachments];
1031       desc->flags = 0;
1032       desc->format = ds_output;
1033       desc->samples = samples;
1034       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1035       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1036       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1037       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
1038       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
1039       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
1040 
1041       ds_attachment.attachment = num_color_attachments;
1042       ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
1043 
1044       num_attachments++;
1045    }
1046 
1047    vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
1048    vs_input.pNext = NULL;
1049    vs_input.flags = 0;
1050    vs_input.pVertexBindingDescriptions = vs_bindings;
1051    vs_input.pVertexAttributeDescriptions = vs_attributes;
1052 
1053    VkPipelineInputAssemblyStateCreateInfo assembly_state;
1054    assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
1055    assembly_state.pNext = NULL;
1056    assembly_state.flags = 0;
1057    assembly_state.topology = topology;
1058    assembly_state.primitiveRestartEnable = false;
1059 
1060    VkPipelineTessellationStateCreateInfo tess_state;
1061    tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
1062    tess_state.pNext = NULL;
1063    tess_state.flags = 0;
1064    tess_state.patchControlPoints = patch_size;
1065 
1066    VkPipelineViewportStateCreateInfo viewport_state;
1067    viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
1068    viewport_state.pNext = NULL;
1069    viewport_state.flags = 0;
1070    viewport_state.viewportCount = 1;
1071    viewport_state.pViewports = NULL;
1072    viewport_state.scissorCount = 1;
1073    viewport_state.pScissors = NULL;
1074 
1075    VkPipelineRasterizationStateCreateInfo rasterization_state;
1076    rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
1077    rasterization_state.pNext = NULL;
1078    rasterization_state.flags = 0;
1079    rasterization_state.depthClampEnable = false;
1080    rasterization_state.rasterizerDiscardEnable = false;
1081    rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
1082    rasterization_state.cullMode = VK_CULL_MODE_NONE;
1083    rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1084    rasterization_state.depthBiasEnable = false;
1085    rasterization_state.lineWidth = 1.0;
1086 
1087    VkPipelineMultisampleStateCreateInfo ms_state;
1088    ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
1089    ms_state.pNext = NULL;
1090    ms_state.flags = 0;
1091    ms_state.rasterizationSamples = samples;
1092    ms_state.sampleShadingEnable = sample_shading_enable;
1093    ms_state.minSampleShading = min_sample_shading;
1094    VkSampleMask sample_mask = 0xffffffff;
1095    ms_state.pSampleMask = &sample_mask;
1096    ms_state.alphaToCoverageEnable = false;
1097    ms_state.alphaToOneEnable = false;
1098 
1099    VkPipelineDepthStencilStateCreateInfo ds_state;
1100    ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
1101    ds_state.pNext = NULL;
1102    ds_state.flags = 0;
1103    ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
1104    ds_state.depthWriteEnable = true;
1105    ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
1106    ds_state.depthBoundsTestEnable = false;
1107    ds_state.stencilTestEnable = true;
1108    ds_state.front.failOp = VK_STENCIL_OP_KEEP;
1109    ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
1110    ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
1111    ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
1112    ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
1113    ds_state.front.reference = 0;
1114    ds_state.back = ds_state.front;
1115 
1116    VkPipelineColorBlendStateCreateInfo color_blend_state;
1117    color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
1118    color_blend_state.pNext = NULL;
1119    color_blend_state.flags = 0;
1120    color_blend_state.logicOpEnable = false;
1121    color_blend_state.attachmentCount = num_color_attachments;
1122    color_blend_state.pAttachments = blend_attachment_states;
1123 
1124    VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
1125                                        VK_DYNAMIC_STATE_SCISSOR,
1126                                        VK_DYNAMIC_STATE_LINE_WIDTH,
1127                                        VK_DYNAMIC_STATE_DEPTH_BIAS,
1128                                        VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1129                                        VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1130                                        VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
1131                                        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
1132                                        VK_DYNAMIC_STATE_STENCIL_REFERENCE};
1133 
1134    VkPipelineDynamicStateCreateInfo dynamic_state;
1135    dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
1136    dynamic_state.pNext = NULL;
1137    dynamic_state.flags = 0;
1138    dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
1139    dynamic_state.pDynamicStates = dynamic_states;
1140 
1141    gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
1142    gfx_pipeline_info.pNext = NULL;
1143    gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
1144    gfx_pipeline_info.pVertexInputState = &vs_input;
1145    gfx_pipeline_info.pInputAssemblyState = &assembly_state;
1146    gfx_pipeline_info.pTessellationState = &tess_state;
1147    gfx_pipeline_info.pViewportState = &viewport_state;
1148    gfx_pipeline_info.pRasterizationState = &rasterization_state;
1149    gfx_pipeline_info.pMultisampleState = &ms_state;
1150    gfx_pipeline_info.pDepthStencilState = &ds_state;
1151    gfx_pipeline_info.pColorBlendState = &color_blend_state;
1152    gfx_pipeline_info.pDynamicState = &dynamic_state;
1153    gfx_pipeline_info.subpass = 0;
1154 
1155    /* create the objects used to create the pipeline */
1156    VkSubpassDescription subpass;
1157    subpass.flags = 0;
1158    subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
1159    subpass.inputAttachmentCount = 0;
1160    subpass.pInputAttachments = NULL;
1161    subpass.colorAttachmentCount = num_color_attachments;
1162    subpass.pColorAttachments = color_attachments;
1163    subpass.pResolveAttachments = NULL;
1164    subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
1165    subpass.preserveAttachmentCount = 0;
1166    subpass.pPreserveAttachments = NULL;
1167 
1168    VkRenderPassCreateInfo renderpass_info;
1169    renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
1170    renderpass_info.pNext = NULL;
1171    renderpass_info.flags = 0;
1172    renderpass_info.attachmentCount = num_attachments;
1173    renderpass_info.pAttachments = attachment_descs;
1174    renderpass_info.subpassCount = 1;
1175    renderpass_info.pSubpasses = &subpass;
1176    renderpass_info.dependencyCount = 0;
1177    renderpass_info.pDependencies = NULL;
1178 
1179    ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
1180    assert(result == VK_SUCCESS);
1181 
1182    gfx_pipeline_info.layout = pipeline_layout;
1183    gfx_pipeline_info.renderPass = render_pass;
1184 
1185    /* create the pipeline */
1186    gfx_pipeline_info.pStages = stages;
1187 
1188    result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
1189    assert(result == VK_SUCCESS);
1190 }
1191 
1192 void
create_pipeline()1193 PipelineBuilder::create_pipeline()
1194 {
1195    unsigned num_desc_layouts = 0;
1196    for (unsigned i = 0; i < 64; i++) {
1197       if (!(desc_layouts_used & (1ull << i)))
1198          continue;
1199 
1200       VkDescriptorSetLayoutCreateInfo desc_layout_info;
1201       desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1202       desc_layout_info.pNext = NULL;
1203       desc_layout_info.flags = 0;
1204       desc_layout_info.bindingCount = num_desc_bindings[i];
1205       desc_layout_info.pBindings = desc_bindings[i];
1206 
1207       ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
1208                                                            &desc_layouts[num_desc_layouts]);
1209       assert(result == VK_SUCCESS);
1210       num_desc_layouts++;
1211    }
1212 
1213    VkPipelineLayoutCreateInfo pipeline_layout_info;
1214    pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1215    pipeline_layout_info.pNext = NULL;
1216    pipeline_layout_info.flags = 0;
1217    pipeline_layout_info.pushConstantRangeCount = 1;
1218    pipeline_layout_info.pPushConstantRanges = &push_constant_range;
1219    pipeline_layout_info.setLayoutCount = num_desc_layouts;
1220    pipeline_layout_info.pSetLayouts = desc_layouts;
1221 
1222    ASSERTED VkResult result =
1223       CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
1224    assert(result == VK_SUCCESS);
1225 
1226    if (is_compute())
1227       create_compute_pipeline();
1228    else
1229       create_graphics_pipeline();
1230 }
1231 
1232 void
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)1233 PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
1234 {
1235    if (!pipeline)
1236       create_pipeline();
1237    print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
1238 }
1239