1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6 #include "helpers.h"
7
8 #include "common/amd_family.h"
9 #include "common/nir/ac_nir.h"
10 #include "vk_format.h"
11
12 #include <llvm-c/Target.h>
13
14 #include <mutex>
15 #include <sstream>
16 #include <stdio.h>
17
18 using namespace aco;
19
20 extern "C" {
21 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
22 }
23
24 ac_shader_config config;
25 aco_shader_info info;
26 std::unique_ptr<Program> program;
27 Builder bld(NULL);
28 Temp inputs[16];
29
30 static radeon_info rad_info;
31 static nir_shader_compiler_options nir_options;
32 static nir_builder _nb;
33 nir_builder *nb;
34
35 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
36 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
37 static std::mutex create_device_mutex;
38
39 #define FUNCTION_LIST \
40 ITEM(CreateInstance) \
41 ITEM(DestroyInstance) \
42 ITEM(EnumeratePhysicalDevices) \
43 ITEM(GetPhysicalDeviceProperties2) \
44 ITEM(CreateDevice) \
45 ITEM(DestroyDevice) \
46 ITEM(CreateShaderModule) \
47 ITEM(DestroyShaderModule) \
48 ITEM(CreateGraphicsPipelines) \
49 ITEM(CreateComputePipelines) \
50 ITEM(DestroyPipeline) \
51 ITEM(CreateDescriptorSetLayout) \
52 ITEM(DestroyDescriptorSetLayout) \
53 ITEM(CreatePipelineLayout) \
54 ITEM(DestroyPipelineLayout) \
55 ITEM(CreateRenderPass) \
56 ITEM(DestroyRenderPass) \
57 ITEM(GetPipelineExecutablePropertiesKHR) \
58 ITEM(GetPipelineExecutableInternalRepresentationsKHR)
59
60 #define ITEM(n) PFN_vk##n n;
61 FUNCTION_LIST
62 #undef ITEM
63
64 void
create_program(enum amd_gfx_level gfx_level,Stage stage,unsigned wave_size,enum radeon_family family)65 create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
66 enum radeon_family family)
67 {
68 memset(&config, 0, sizeof(config));
69 info.wave_size = wave_size;
70
71 program.reset(new Program);
72 aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
73 program->workgroup_size = UINT_MAX;
74 calc_min_waves(program.get());
75
76 program->debug.func = nullptr;
77 program->debug.private_data = nullptr;
78
79 program->debug.output = output;
80 program->debug.shorten_messages = true;
81 program->debug.func = nullptr;
82 program->debug.private_data = nullptr;
83
84 Block* block = program->create_and_insert_block();
85 block->kind = block_kind_top_level;
86
87 bld = Builder(program.get(), &program->blocks[0]);
88
89 config.float_mode = program->blocks[0].fp_mode.val;
90 }
91
92 bool
setup_cs(const char * input_spec,enum amd_gfx_level gfx_level,enum radeon_family family,const char * subvariant,unsigned wave_size)93 setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
94 const char* subvariant, unsigned wave_size)
95 {
96 if (!set_variant(gfx_level, subvariant))
97 return false;
98
99 memset(&info, 0, sizeof(info));
100 create_program(gfx_level, compute_cs, wave_size, family);
101
102 if (input_spec) {
103 std::vector<RegClass> input_classes;
104 while (input_spec[0]) {
105 RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
106 unsigned size = input_spec[1] - '0';
107 bool in_bytes = input_spec[2] == 'b';
108 input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
109
110 input_spec += 2 + in_bytes;
111 while (input_spec[0] == ' ')
112 input_spec++;
113 }
114
115 aco_ptr<Instruction> startpgm{
116 create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
117 for (unsigned i = 0; i < input_classes.size(); i++) {
118 inputs[i] = bld.tmp(input_classes[i]);
119 startpgm->definitions[i] = Definition(inputs[i]);
120 }
121 bld.insert(std::move(startpgm));
122 }
123
124 return true;
125 }
126
127 bool
setup_nir_cs(enum amd_gfx_level gfx_level,gl_shader_stage stage,enum radeon_family family,const char * subvariant)128 setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_family family, const char* subvariant)
129 {
130 if (!set_variant(gfx_level, subvariant))
131 return false;
132
133 if (family == CHIP_UNKNOWN) {
134 switch (gfx_level) {
135 case GFX6: family = CHIP_TAHITI; break;
136 case GFX7: family = CHIP_BONAIRE; break;
137 case GFX8: family = CHIP_POLARIS10; break;
138 case GFX9: family = CHIP_VEGA10; break;
139 case GFX10: family = CHIP_NAVI10; break;
140 case GFX10_3: family = CHIP_NAVI21; break;
141 case GFX11: family = CHIP_NAVI31; break;
142 default: family = CHIP_UNKNOWN; break;
143 }
144 }
145
146 memset(&rad_info, 0, sizeof(rad_info));
147 rad_info.gfx_level = gfx_level;
148 rad_info.family = family;
149
150 memset(&nir_options, 0, sizeof(nir_options));
151 ac_nir_set_options(&rad_info, false, &nir_options);
152
153 glsl_type_singleton_init_or_ref();
154
155 _nb = nir_builder_init_simple_shader(stage, &nir_options, "aco_test");
156 nb = &_nb;
157
158 return true;
159 }
160
161 void
finish_program(Program * prog,bool endpgm,bool dominance)162 finish_program(Program* prog, bool endpgm, bool dominance)
163 {
164 for (Block& BB : prog->blocks) {
165 for (unsigned idx : BB.linear_preds)
166 prog->blocks[idx].linear_succs.emplace_back(BB.index);
167 for (unsigned idx : BB.logical_preds)
168 prog->blocks[idx].logical_succs.emplace_back(BB.index);
169 }
170
171 for (Block& block : prog->blocks) {
172 if (block.linear_succs.size() == 0) {
173 block.kind |= block_kind_uniform;
174 if (endpgm)
175 Builder(prog, &block).sopp(aco_opcode::s_endpgm);
176 }
177 }
178
179 if (dominance)
180 dominator_tree(program.get());
181 }
182
183 void
finish_validator_test()184 finish_validator_test()
185 {
186 finish_program(program.get(), true, true);
187 aco_print_program(program.get(), output);
188 fprintf(output, "Validation results:\n");
189 if (aco::validate_ir(program.get()))
190 fprintf(output, "Validation passed\n");
191 else
192 fprintf(output, "Validation failed\n");
193 }
194
195 void
finish_opt_test()196 finish_opt_test()
197 {
198 finish_program(program.get(), true, true);
199 if (!aco::validate_ir(program.get())) {
200 fail_test("Validation before optimization failed");
201 return;
202 }
203 aco::optimize(program.get());
204 if (!aco::validate_ir(program.get())) {
205 fail_test("Validation after optimization failed");
206 return;
207 }
208 aco_print_program(program.get(), output);
209 }
210
211 void
finish_setup_reduce_temp_test()212 finish_setup_reduce_temp_test()
213 {
214 finish_program(program.get(), true, true);
215 if (!aco::validate_ir(program.get())) {
216 fail_test("Validation before setup_reduce_temp failed");
217 return;
218 }
219 aco::setup_reduce_temp(program.get());
220 if (!aco::validate_ir(program.get())) {
221 fail_test("Validation after setup_reduce_temp failed");
222 return;
223 }
224 aco_print_program(program.get(), output);
225 }
226
227 void
finish_lower_subdword_test()228 finish_lower_subdword_test()
229 {
230 finish_program(program.get(), true, true);
231 if (!aco::validate_ir(program.get())) {
232 fail_test("Validation before lower_subdword failed");
233 return;
234 }
235 aco::lower_subdword(program.get());
236 if (!aco::validate_ir(program.get())) {
237 fail_test("Validation after lower_subdword failed");
238 return;
239 }
240 aco_print_program(program.get(), output);
241 }
242
243 void
finish_ra_test(ra_test_policy policy)244 finish_ra_test(ra_test_policy policy)
245 {
246 finish_program(program.get(), true, true);
247 if (!aco::validate_ir(program.get())) {
248 fail_test("Validation before register allocation failed");
249 return;
250 }
251
252 program->workgroup_size = program->wave_size;
253 aco::live_var_analysis(program.get());
254 aco::register_allocation(program.get(), policy);
255
256 if (aco::validate_ra(program.get())) {
257 fail_test("Validation after register allocation failed");
258 return;
259 }
260
261 aco_print_program(program.get(), output);
262 }
263
264 void
finish_optimizer_postRA_test()265 finish_optimizer_postRA_test()
266 {
267 finish_program(program.get(), true, true);
268
269 if (!aco::validate_ir(program.get())) {
270 fail_test("Validation before optimize_postRA failed");
271 return;
272 }
273
274 aco::optimize_postRA(program.get());
275
276 if (!aco::validate_ir(program.get())) {
277 fail_test("Validation after optimize_postRA failed");
278 return;
279 }
280
281 aco_print_program(program.get(), output);
282 }
283
284 void
finish_to_hw_instr_test()285 finish_to_hw_instr_test()
286 {
287 finish_program(program.get(), true, true);
288
289 if (!aco::validate_ir(program.get())) {
290 fail_test("Validation before lower_to_hw_instr failed");
291 return;
292 }
293
294 aco::lower_to_hw_instr(program.get());
295
296 if (!aco::validate_ir(program.get())) {
297 fail_test("Validation after lower_to_hw_instr failed");
298 return;
299 }
300
301 aco_print_program(program.get(), output);
302 }
303
304 void
finish_schedule_vopd_test()305 finish_schedule_vopd_test()
306 {
307 finish_program(program.get());
308 aco::schedule_vopd(program.get());
309 aco_print_program(program.get(), output);
310 }
311
312 void
finish_waitcnt_test()313 finish_waitcnt_test()
314 {
315 finish_program(program.get());
316 aco::insert_waitcnt(program.get());
317 aco_print_program(program.get(), output);
318 }
319
320 void
finish_insert_nops_test(bool endpgm)321 finish_insert_nops_test(bool endpgm)
322 {
323 finish_program(program.get(), endpgm);
324 aco::insert_NOPs(program.get());
325 aco_print_program(program.get(), output);
326 }
327
328 void
finish_form_hard_clause_test()329 finish_form_hard_clause_test()
330 {
331 finish_program(program.get());
332 aco::form_hard_clauses(program.get());
333 aco_print_program(program.get(), output);
334 }
335
336 void
finish_assembler_test()337 finish_assembler_test()
338 {
339 finish_program(program.get());
340 std::vector<uint32_t> binary;
341 unsigned exec_size = emit_program(program.get(), binary);
342
343 /* we could use CLRX for disassembly but that would require it to be
344 * installed */
345 if (program->gfx_level >= GFX8) {
346 print_asm(program.get(), binary, exec_size / 4u, output);
347 } else {
348 // TODO: maybe we should use CLRX and skip this test if it's not available?
349 for (uint32_t dword : binary)
350 fprintf(output, "%.8x\n", dword);
351 }
352 }
353
354 void
live_var_analysis_debug_func(void * private_data,enum aco_compiler_debug_level level,const char * message)355 live_var_analysis_debug_func(void* private_data, enum aco_compiler_debug_level level, const char* message)
356 {
357 if (level == ACO_COMPILER_DEBUG_LEVEL_ERROR)
358 *(bool *)private_data = true;
359 }
360
361 void
finish_isel_test(enum ac_hw_stage hw_stage,unsigned wave_size)362 finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size)
363 {
364 nir_validate_shader(nb->shader, "in finish_isel_test");
365 nir_validate_ssa_dominance(nb->shader, "in finish_isel_test");
366
367 program.reset(new Program);
368 program->debug.func = nullptr;
369 program->debug.private_data = nullptr;
370
371 ac_shader_args args = {};
372
373 aco_compiler_options options = {};
374 options.family = rad_info.family;
375 options.gfx_level = rad_info.gfx_level;
376
377 memset(&info, 0, sizeof(info));
378 info.hw_stage = hw_stage;
379 info.wave_size = wave_size;
380 info.workgroup_size = nb->shader->info.workgroup_size[0] * nb->shader->info.workgroup_size[1] * nb->shader->info.workgroup_size[2];
381
382 memset(&config, 0, sizeof(config));
383
384 select_program(program.get(), 1, &nb->shader, &config, &options, &info, &args);
385 dominator_tree(program.get());
386 if (program->should_repair_ssa)
387 repair_ssa(program.get());
388 lower_phis(program.get());
389
390 ralloc_free(nb->shader);
391 glsl_type_singleton_decref();
392
393 aco_print_program(program.get(), output);
394
395 if (!aco::validate_ir(program.get())) {
396 fail_test("Validation after instruction selection failed");
397 return;
398 }
399 if (!aco::validate_cfg(program.get())) {
400 fail_test("Invalidate CFG");
401 return;
402 }
403
404 bool live_var_fail = false;
405 program->debug.func = &live_var_analysis_debug_func;
406 program->debug.private_data = &live_var_fail;
407 aco::live_var_analysis(program.get());
408 if (live_var_fail) {
409 fail_test("Live var analysis failed");
410 return;
411 }
412 }
413
414 void
writeout(unsigned i,Temp tmp)415 writeout(unsigned i, Temp tmp)
416 {
417 if (tmp.id())
418 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
419 else
420 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
421 }
422
423 void
writeout(unsigned i,aco::Builder::Result res)424 writeout(unsigned i, aco::Builder::Result res)
425 {
426 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
427 }
428
429 void
writeout(unsigned i,Operand op)430 writeout(unsigned i, Operand op)
431 {
432 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
433 }
434
435 void
writeout(unsigned i,Operand op0,Operand op1)436 writeout(unsigned i, Operand op0, Operand op1)
437 {
438 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
439 }
440
441 Temp
fneg(Temp src,Builder b)442 fneg(Temp src, Builder b)
443 {
444 if (src.bytes() == 2)
445 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
446 else
447 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
448 }
449
450 Temp
fabs(Temp src,Builder b)451 fabs(Temp src, Builder b)
452 {
453 if (src.bytes() == 2) {
454 Builder::Result res =
455 b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
456 res->valu().abs[1] = true;
457 return res;
458 } else {
459 Builder::Result res =
460 b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
461 res->valu().abs[1] = true;
462 return res;
463 }
464 }
465
466 Temp
f2f32(Temp src,Builder b)467 f2f32(Temp src, Builder b)
468 {
469 return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
470 }
471
472 Temp
f2f16(Temp src,Builder b)473 f2f16(Temp src, Builder b)
474 {
475 return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
476 }
477
478 Temp
u2u16(Temp src,Builder b)479 u2u16(Temp src, Builder b)
480 {
481 return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
482 }
483
484 Temp
fadd(Temp src0,Temp src1,Builder b)485 fadd(Temp src0, Temp src1, Builder b)
486 {
487 if (src0.bytes() == 2)
488 return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
489 else
490 return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
491 }
492
493 Temp
fmul(Temp src0,Temp src1,Builder b)494 fmul(Temp src0, Temp src1, Builder b)
495 {
496 if (src0.bytes() == 2)
497 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
498 else
499 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
500 }
501
502 Temp
fma(Temp src0,Temp src1,Temp src2,Builder b)503 fma(Temp src0, Temp src1, Temp src2, Builder b)
504 {
505 if (src0.bytes() == 2)
506 return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
507 else
508 return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
509 }
510
511 Temp
fsat(Temp src,Builder b)512 fsat(Temp src, Builder b)
513 {
514 if (src.bytes() == 2)
515 return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
516 src);
517 else
518 return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
519 src);
520 }
521
522 Temp
fmin(Temp src0,Temp src1,Builder b)523 fmin(Temp src0, Temp src1, Builder b)
524 {
525 return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
526 }
527
528 Temp
fmax(Temp src0,Temp src1,Builder b)529 fmax(Temp src0, Temp src1, Builder b)
530 {
531 return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
532 }
533
534 static Temp
extract(Temp src,unsigned idx,unsigned size,bool sign_extend,Builder b)535 extract(Temp src, unsigned idx, unsigned size, bool sign_extend, Builder b)
536 {
537 if (src.type() == RegType::sgpr)
538 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), bld.def(s1, scc), src,
539 Operand::c32(idx), Operand::c32(size), Operand::c32(sign_extend));
540 else
541 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
542 Operand::c32(size), Operand::c32(sign_extend));
543 }
544
545 Temp
ext_ushort(Temp src,unsigned idx,Builder b)546 ext_ushort(Temp src, unsigned idx, Builder b)
547 {
548 return extract(src, idx, 16, false, b);
549 }
550
551 Temp
ext_sshort(Temp src,unsigned idx,Builder b)552 ext_sshort(Temp src, unsigned idx, Builder b)
553 {
554 return extract(src, idx, 16, true, b);
555 }
556
557 Temp
ext_ubyte(Temp src,unsigned idx,Builder b)558 ext_ubyte(Temp src, unsigned idx, Builder b)
559 {
560 return extract(src, idx, 8, false, b);
561 }
562
563 Temp
ext_sbyte(Temp src,unsigned idx,Builder b)564 ext_sbyte(Temp src, unsigned idx, Builder b)
565 {
566 return extract(src, idx, 8, true, b);
567 }
568
569 void
emit_divergent_if_else(Program * prog,aco::Builder & b,Operand cond,std::function<void ()> then,std::function<void ()> els)570 emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
571 std::function<void()> els)
572 {
573 prog->blocks.reserve(prog->blocks.size() + 6);
574
575 Block* if_block = &prog->blocks.back();
576 Block* then_logical = prog->create_and_insert_block();
577 Block* then_linear = prog->create_and_insert_block();
578 Block* invert = prog->create_and_insert_block();
579 Block* else_logical = prog->create_and_insert_block();
580 Block* else_linear = prog->create_and_insert_block();
581 Block* endif_block = prog->create_and_insert_block();
582
583 if_block->kind |= block_kind_branch;
584 invert->kind |= block_kind_invert;
585 endif_block->kind |= block_kind_merge | (if_block->kind & block_kind_top_level);
586
587 /* Set up logical CF */
588 then_logical->logical_preds.push_back(if_block->index);
589 else_logical->logical_preds.push_back(if_block->index);
590 endif_block->logical_preds.push_back(then_logical->index);
591 endif_block->logical_preds.push_back(else_logical->index);
592
593 /* Set up linear CF */
594 then_logical->linear_preds.push_back(if_block->index);
595 then_linear->linear_preds.push_back(if_block->index);
596 invert->linear_preds.push_back(then_logical->index);
597 invert->linear_preds.push_back(then_linear->index);
598 else_logical->linear_preds.push_back(invert->index);
599 else_linear->linear_preds.push_back(invert->index);
600 endif_block->linear_preds.push_back(else_logical->index);
601 endif_block->linear_preds.push_back(else_linear->index);
602
603 PhysReg saved_exec_reg(84);
604
605 b.reset(if_block);
606 Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
607 Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
608 b.branch(aco_opcode::p_cbranch_nz, then_logical->index, then_linear->index);
609
610 b.reset(then_logical);
611 b.pseudo(aco_opcode::p_logical_start);
612 then();
613 b.pseudo(aco_opcode::p_logical_end);
614 b.branch(aco_opcode::p_branch, invert->index);
615
616 b.reset(then_linear);
617 b.branch(aco_opcode::p_branch, invert->index);
618
619 b.reset(invert);
620 b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
621 Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
622 b.branch(aco_opcode::p_cbranch_nz, else_logical->index, else_linear->index);
623
624 b.reset(else_logical);
625 b.pseudo(aco_opcode::p_logical_start);
626 els();
627 b.pseudo(aco_opcode::p_logical_end);
628 b.branch(aco_opcode::p_branch, endif_block->index);
629
630 b.reset(else_linear);
631 b.branch(aco_opcode::p_branch, endif_block->index);
632
633 b.reset(endif_block);
634 b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
635 Operand(saved_exec, saved_exec_reg));
636 }
637
638 VkDevice
get_vk_device(enum amd_gfx_level gfx_level)639 get_vk_device(enum amd_gfx_level gfx_level)
640 {
641 enum radeon_family family;
642 switch (gfx_level) {
643 case GFX6: family = CHIP_TAHITI; break;
644 case GFX7: family = CHIP_BONAIRE; break;
645 case GFX8: family = CHIP_POLARIS10; break;
646 case GFX9: family = CHIP_VEGA10; break;
647 case GFX10: family = CHIP_NAVI10; break;
648 case GFX10_3: family = CHIP_NAVI21; break;
649 case GFX11: family = CHIP_NAVI31; break;
650 case GFX12: family = CHIP_GFX1200; break;
651 default: family = CHIP_UNKNOWN; break;
652 }
653 return get_vk_device(family);
654 }
655
656 VkDevice
get_vk_device(enum radeon_family family)657 get_vk_device(enum radeon_family family)
658 {
659 assert(family != CHIP_UNKNOWN);
660
661 std::lock_guard<std::mutex> guard(create_device_mutex);
662
663 if (device_cache[family])
664 return device_cache[family];
665
666 setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
667
668 VkApplicationInfo app_info = {};
669 app_info.pApplicationName = "aco_tests";
670 app_info.apiVersion = VK_API_VERSION_1_2;
671 VkInstanceCreateInfo instance_create_info = {};
672 instance_create_info.pApplicationInfo = &app_info;
673 instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
674 ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
675 NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
676 assert(result == VK_SUCCESS);
677
678 #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
679 FUNCTION_LIST
680 #undef ITEM
681
682 uint32_t device_count = 1;
683 VkPhysicalDevice device = VK_NULL_HANDLE;
684 result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
685 assert(result == VK_SUCCESS);
686 assert(device != VK_NULL_HANDLE);
687
688 VkDeviceCreateInfo device_create_info = {};
689 device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
690 static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
691 device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
692 device_create_info.ppEnabledExtensionNames = extensions;
693 result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
694
695 return device_cache[family];
696 }
697
698 static struct DestroyDevices {
~DestroyDevicesDestroyDevices699 ~DestroyDevices()
700 {
701 for (unsigned i = 0; i < CHIP_LAST; i++) {
702 if (!device_cache[i])
703 continue;
704 DestroyDevice(device_cache[i], NULL);
705 DestroyInstance(instance_cache[i], NULL);
706 }
707 }
708 } destroy_devices;
709
710 void
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)711 print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
712 const char* name, bool remove_encoding)
713 {
714 uint32_t executable_count = 16;
715 VkPipelineExecutablePropertiesKHR executables[16];
716 VkPipelineInfoKHR pipeline_info;
717 pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
718 pipeline_info.pNext = NULL;
719 pipeline_info.pipeline = pipeline;
720 ASSERTED VkResult result =
721 GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
722 assert(result == VK_SUCCESS);
723
724 uint32_t executable = 0;
725 for (; executable < executable_count; executable++) {
726 if (executables[executable].stages == stages)
727 break;
728 }
729 assert(executable != executable_count);
730
731 VkPipelineExecutableInfoKHR exec_info;
732 exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
733 exec_info.pNext = NULL;
734 exec_info.pipeline = pipeline;
735 exec_info.executableIndex = executable;
736
737 uint32_t ir_count = 16;
738 VkPipelineExecutableInternalRepresentationKHR ir[16];
739 memset(ir, 0, sizeof(ir));
740 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
741 assert(result == VK_SUCCESS);
742
743 VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
744 for (unsigned i = 0; i < ir_count; ++i) {
745 if (strcmp(ir[i].name, name) == 0) {
746 requested_ir = &ir[i];
747 break;
748 }
749 }
750 assert(requested_ir && "Could not find requested IR");
751
752 char* data = (char*)malloc(requested_ir->dataSize);
753 requested_ir->pData = data;
754 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
755 assert(result == VK_SUCCESS);
756
757 if (remove_encoding) {
758 for (char* c = data; *c; c++) {
759 if (*c == ';') {
760 for (; *c && *c != '\n'; c++)
761 *c = ' ';
762 }
763 }
764 }
765
766 fprintf(output, "%s", data);
767 free(data);
768 }
769
770 VkShaderModule
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)771 __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
772 {
773 VkShaderModuleCreateInfo vk_module_info;
774 vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
775 vk_module_info.pNext = NULL;
776 vk_module_info.flags = 0;
777 vk_module_info.codeSize = module_info->spirvSize;
778 vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
779
780 VkShaderModule module;
781 ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
782 assert(result == VK_SUCCESS);
783
784 return module;
785 }
786
PipelineBuilder(VkDevice dev)787 PipelineBuilder::PipelineBuilder(VkDevice dev)
788 {
789 memset(this, 0, sizeof(*this));
790 topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
791 device = dev;
792 }
793
~PipelineBuilder()794 PipelineBuilder::~PipelineBuilder()
795 {
796 DestroyPipeline(device, pipeline, NULL);
797
798 for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
799 VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
800 if (owned_stages & stage_info->stage)
801 DestroyShaderModule(device, stage_info->module, NULL);
802 }
803
804 DestroyPipelineLayout(device, pipeline_layout, NULL);
805
806 for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
807 DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
808
809 DestroyRenderPass(device, render_pass, NULL);
810 }
811
812 void
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)813 PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
814 VkDescriptorType type, uint32_t count)
815 {
816 desc_layouts_used |= 1ull << layout;
817 desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
818 }
819
820 void
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)821 PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
822 {
823 vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
824 }
825
826 void
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)827 PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
828 uint32_t offset)
829 {
830 vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
831 }
832
833 void
add_resource_decls(QoShaderModuleCreateInfo * module)834 PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
835 {
836 for (unsigned i = 0; i < module->declarationCount; i++) {
837 const QoShaderDecl* decl = &module->pDeclarations[i];
838 switch (decl->decl_type) {
839 case QoShaderDeclType_ubo:
840 add_desc_binding(module->stage, decl->set, decl->binding,
841 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
842 break;
843 case QoShaderDeclType_ssbo:
844 add_desc_binding(module->stage, decl->set, decl->binding,
845 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
846 break;
847 case QoShaderDeclType_img_buf:
848 add_desc_binding(module->stage, decl->set, decl->binding,
849 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
850 break;
851 case QoShaderDeclType_img:
852 add_desc_binding(module->stage, decl->set, decl->binding,
853 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
854 break;
855 case QoShaderDeclType_tex_buf:
856 add_desc_binding(module->stage, decl->set, decl->binding,
857 VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
858 break;
859 case QoShaderDeclType_combined:
860 add_desc_binding(module->stage, decl->set, decl->binding,
861 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
862 break;
863 case QoShaderDeclType_tex:
864 add_desc_binding(module->stage, decl->set, decl->binding,
865 VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
866 break;
867 case QoShaderDeclType_samp:
868 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
869 break;
870 default: break;
871 }
872 }
873 }
874
875 void
add_io_decls(QoShaderModuleCreateInfo * module)876 PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
877 {
878 unsigned next_vtx_offset = 0;
879 for (unsigned i = 0; i < module->declarationCount; i++) {
880 const QoShaderDecl* decl = &module->pDeclarations[i];
881 switch (decl->decl_type) {
882 case QoShaderDeclType_in:
883 if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
884 if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
885 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
886 next_vtx_offset);
887 else if (decl->type[0] == 'u')
888 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
889 next_vtx_offset);
890 else if (decl->type[0] == 'i')
891 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
892 next_vtx_offset);
893 next_vtx_offset += 16;
894 }
895 break;
896 case QoShaderDeclType_out:
897 if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
898 if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
899 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
900 else if (decl->type[0] == 'u')
901 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
902 else if (decl->type[0] == 'i')
903 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
904 }
905 break;
906 default: break;
907 }
908 }
909 if (next_vtx_offset)
910 add_vertex_binding(0, next_vtx_offset);
911 }
912
913 void
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)914 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
915 {
916 VkPipelineShaderStageCreateInfo* stage_info;
917 if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
918 stage_info = &stages[0];
919 else
920 stage_info = &stages[gfx_pipeline_info.stageCount++];
921 stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
922 stage_info->pNext = NULL;
923 stage_info->flags = 0;
924 stage_info->stage = stage;
925 stage_info->module = module;
926 stage_info->pName = name;
927 stage_info->pSpecializationInfo = NULL;
928 owned_stages |= stage;
929 }
930
931 void
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)932 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
933 const char* name)
934 {
935 add_stage(stage, __qoCreateShaderModule(device, &module), name);
936 add_resource_decls(&module);
937 add_io_decls(&module);
938 }
939
940 void
add_vsfs(VkShaderModule vs,VkShaderModule fs)941 PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
942 {
943 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
944 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
945 }
946
947 void
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)948 PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
949 {
950 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
951 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
952 }
953
954 void
add_cs(VkShaderModule cs)955 PipelineBuilder::add_cs(VkShaderModule cs)
956 {
957 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
958 }
959
960 void
add_cs(QoShaderModuleCreateInfo cs)961 PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
962 {
963 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
964 }
965
966 bool
is_compute()967 PipelineBuilder::is_compute()
968 {
969 return gfx_pipeline_info.stageCount == 0;
970 }
971
972 void
create_compute_pipeline()973 PipelineBuilder::create_compute_pipeline()
974 {
975 VkComputePipelineCreateInfo create_info;
976 create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
977 create_info.pNext = NULL;
978 create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
979 create_info.stage = stages[0];
980 create_info.layout = pipeline_layout;
981 create_info.basePipelineHandle = VK_NULL_HANDLE;
982 create_info.basePipelineIndex = 0;
983
984 ASSERTED VkResult result =
985 CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
986 assert(result == VK_SUCCESS);
987 }
988
989 void
create_graphics_pipeline()990 PipelineBuilder::create_graphics_pipeline()
991 {
992 /* create the create infos */
993 if (!samples)
994 samples = VK_SAMPLE_COUNT_1_BIT;
995
996 unsigned num_color_attachments = 0;
997 VkPipelineColorBlendAttachmentState blend_attachment_states[16];
998 VkAttachmentReference color_attachments[16];
999 VkAttachmentDescription attachment_descs[17];
1000 for (unsigned i = 0; i < 16; i++) {
1001 if (color_outputs[i] == VK_FORMAT_UNDEFINED)
1002 continue;
1003
1004 VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
1005 desc->flags = 0;
1006 desc->format = color_outputs[i];
1007 desc->samples = samples;
1008 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1009 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1010 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1011 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
1012 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
1013 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
1014
1015 VkAttachmentReference* ref = &color_attachments[num_color_attachments];
1016 ref->attachment = num_color_attachments;
1017 ref->layout = VK_IMAGE_LAYOUT_GENERAL;
1018
1019 VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
1020 blend->blendEnable = false;
1021 blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
1022 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
1023
1024 num_color_attachments++;
1025 }
1026
1027 unsigned num_attachments = num_color_attachments;
1028 VkAttachmentReference ds_attachment;
1029 if (ds_output != VK_FORMAT_UNDEFINED) {
1030 VkAttachmentDescription* desc = &attachment_descs[num_attachments];
1031 desc->flags = 0;
1032 desc->format = ds_output;
1033 desc->samples = samples;
1034 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1035 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1036 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1037 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
1038 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
1039 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
1040
1041 ds_attachment.attachment = num_color_attachments;
1042 ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
1043
1044 num_attachments++;
1045 }
1046
1047 vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
1048 vs_input.pNext = NULL;
1049 vs_input.flags = 0;
1050 vs_input.pVertexBindingDescriptions = vs_bindings;
1051 vs_input.pVertexAttributeDescriptions = vs_attributes;
1052
1053 VkPipelineInputAssemblyStateCreateInfo assembly_state;
1054 assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
1055 assembly_state.pNext = NULL;
1056 assembly_state.flags = 0;
1057 assembly_state.topology = topology;
1058 assembly_state.primitiveRestartEnable = false;
1059
1060 VkPipelineTessellationStateCreateInfo tess_state;
1061 tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
1062 tess_state.pNext = NULL;
1063 tess_state.flags = 0;
1064 tess_state.patchControlPoints = patch_size;
1065
1066 VkPipelineViewportStateCreateInfo viewport_state;
1067 viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
1068 viewport_state.pNext = NULL;
1069 viewport_state.flags = 0;
1070 viewport_state.viewportCount = 1;
1071 viewport_state.pViewports = NULL;
1072 viewport_state.scissorCount = 1;
1073 viewport_state.pScissors = NULL;
1074
1075 VkPipelineRasterizationStateCreateInfo rasterization_state;
1076 rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
1077 rasterization_state.pNext = NULL;
1078 rasterization_state.flags = 0;
1079 rasterization_state.depthClampEnable = false;
1080 rasterization_state.rasterizerDiscardEnable = false;
1081 rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
1082 rasterization_state.cullMode = VK_CULL_MODE_NONE;
1083 rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1084 rasterization_state.depthBiasEnable = false;
1085 rasterization_state.lineWidth = 1.0;
1086
1087 VkPipelineMultisampleStateCreateInfo ms_state;
1088 ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
1089 ms_state.pNext = NULL;
1090 ms_state.flags = 0;
1091 ms_state.rasterizationSamples = samples;
1092 ms_state.sampleShadingEnable = sample_shading_enable;
1093 ms_state.minSampleShading = min_sample_shading;
1094 VkSampleMask sample_mask = 0xffffffff;
1095 ms_state.pSampleMask = &sample_mask;
1096 ms_state.alphaToCoverageEnable = false;
1097 ms_state.alphaToOneEnable = false;
1098
1099 VkPipelineDepthStencilStateCreateInfo ds_state;
1100 ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
1101 ds_state.pNext = NULL;
1102 ds_state.flags = 0;
1103 ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
1104 ds_state.depthWriteEnable = true;
1105 ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
1106 ds_state.depthBoundsTestEnable = false;
1107 ds_state.stencilTestEnable = true;
1108 ds_state.front.failOp = VK_STENCIL_OP_KEEP;
1109 ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
1110 ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
1111 ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
1112 ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
1113 ds_state.front.reference = 0;
1114 ds_state.back = ds_state.front;
1115
1116 VkPipelineColorBlendStateCreateInfo color_blend_state;
1117 color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
1118 color_blend_state.pNext = NULL;
1119 color_blend_state.flags = 0;
1120 color_blend_state.logicOpEnable = false;
1121 color_blend_state.attachmentCount = num_color_attachments;
1122 color_blend_state.pAttachments = blend_attachment_states;
1123
1124 VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
1125 VK_DYNAMIC_STATE_SCISSOR,
1126 VK_DYNAMIC_STATE_LINE_WIDTH,
1127 VK_DYNAMIC_STATE_DEPTH_BIAS,
1128 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1129 VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1130 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
1131 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
1132 VK_DYNAMIC_STATE_STENCIL_REFERENCE};
1133
1134 VkPipelineDynamicStateCreateInfo dynamic_state;
1135 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
1136 dynamic_state.pNext = NULL;
1137 dynamic_state.flags = 0;
1138 dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
1139 dynamic_state.pDynamicStates = dynamic_states;
1140
1141 gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
1142 gfx_pipeline_info.pNext = NULL;
1143 gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
1144 gfx_pipeline_info.pVertexInputState = &vs_input;
1145 gfx_pipeline_info.pInputAssemblyState = &assembly_state;
1146 gfx_pipeline_info.pTessellationState = &tess_state;
1147 gfx_pipeline_info.pViewportState = &viewport_state;
1148 gfx_pipeline_info.pRasterizationState = &rasterization_state;
1149 gfx_pipeline_info.pMultisampleState = &ms_state;
1150 gfx_pipeline_info.pDepthStencilState = &ds_state;
1151 gfx_pipeline_info.pColorBlendState = &color_blend_state;
1152 gfx_pipeline_info.pDynamicState = &dynamic_state;
1153 gfx_pipeline_info.subpass = 0;
1154
1155 /* create the objects used to create the pipeline */
1156 VkSubpassDescription subpass;
1157 subpass.flags = 0;
1158 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
1159 subpass.inputAttachmentCount = 0;
1160 subpass.pInputAttachments = NULL;
1161 subpass.colorAttachmentCount = num_color_attachments;
1162 subpass.pColorAttachments = color_attachments;
1163 subpass.pResolveAttachments = NULL;
1164 subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
1165 subpass.preserveAttachmentCount = 0;
1166 subpass.pPreserveAttachments = NULL;
1167
1168 VkRenderPassCreateInfo renderpass_info;
1169 renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
1170 renderpass_info.pNext = NULL;
1171 renderpass_info.flags = 0;
1172 renderpass_info.attachmentCount = num_attachments;
1173 renderpass_info.pAttachments = attachment_descs;
1174 renderpass_info.subpassCount = 1;
1175 renderpass_info.pSubpasses = &subpass;
1176 renderpass_info.dependencyCount = 0;
1177 renderpass_info.pDependencies = NULL;
1178
1179 ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
1180 assert(result == VK_SUCCESS);
1181
1182 gfx_pipeline_info.layout = pipeline_layout;
1183 gfx_pipeline_info.renderPass = render_pass;
1184
1185 /* create the pipeline */
1186 gfx_pipeline_info.pStages = stages;
1187
1188 result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
1189 assert(result == VK_SUCCESS);
1190 }
1191
1192 void
create_pipeline()1193 PipelineBuilder::create_pipeline()
1194 {
1195 unsigned num_desc_layouts = 0;
1196 for (unsigned i = 0; i < 64; i++) {
1197 if (!(desc_layouts_used & (1ull << i)))
1198 continue;
1199
1200 VkDescriptorSetLayoutCreateInfo desc_layout_info;
1201 desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1202 desc_layout_info.pNext = NULL;
1203 desc_layout_info.flags = 0;
1204 desc_layout_info.bindingCount = num_desc_bindings[i];
1205 desc_layout_info.pBindings = desc_bindings[i];
1206
1207 ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
1208 &desc_layouts[num_desc_layouts]);
1209 assert(result == VK_SUCCESS);
1210 num_desc_layouts++;
1211 }
1212
1213 VkPipelineLayoutCreateInfo pipeline_layout_info;
1214 pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1215 pipeline_layout_info.pNext = NULL;
1216 pipeline_layout_info.flags = 0;
1217 pipeline_layout_info.pushConstantRangeCount = 1;
1218 pipeline_layout_info.pPushConstantRanges = &push_constant_range;
1219 pipeline_layout_info.setLayoutCount = num_desc_layouts;
1220 pipeline_layout_info.pSetLayouts = desc_layouts;
1221
1222 ASSERTED VkResult result =
1223 CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
1224 assert(result == VK_SUCCESS);
1225
1226 if (is_compute())
1227 create_compute_pipeline();
1228 else
1229 create_graphics_pipeline();
1230 }
1231
1232 void
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)1233 PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
1234 {
1235 if (!pipeline)
1236 create_pipeline();
1237 print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
1238 }
1239