1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "zink_context.h"
25 #include "zink_compiler.h"
26 #include "zink_program.h"
27 #include "zink_screen.h"
28 #include "nir_to_spirv/nir_to_spirv.h"
29
30 #include "pipe/p_state.h"
31
32 #include "nir.h"
33 #include "compiler/nir/nir_builder.h"
34
35 #include "nir/tgsi_to_nir.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_from_mesa.h"
38
39 #include "util/u_memory.h"
40
41 static bool
lower_discard_if_instr(nir_intrinsic_instr * instr,nir_builder * b)42 lower_discard_if_instr(nir_intrinsic_instr *instr, nir_builder *b)
43 {
44 if (instr->intrinsic == nir_intrinsic_discard_if) {
45 b->cursor = nir_before_instr(&instr->instr);
46
47 nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
48 nir_intrinsic_instr *discard =
49 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
50 nir_builder_instr_insert(b, &discard->instr);
51 nir_pop_if(b, if_stmt);
52 nir_instr_remove(&instr->instr);
53 return true;
54 }
55 /* a shader like this (shaders@glsl-fs-discard-04):
56
57 uniform int j, k;
58
59 void main()
60 {
61 for (int i = 0; i < j; i++) {
62 if (i > k)
63 continue;
64 discard;
65 }
66 gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
67 }
68
69
70
71 will generate nir like:
72
73 loop {
74 //snip
75 if ssa_11 {
76 block block_5:
77 / preds: block_4 /
78 vec1 32 ssa_17 = iadd ssa_50, ssa_31
79 / succs: block_7 /
80 } else {
81 block block_6:
82 / preds: block_4 /
83 intrinsic discard () () <-- not last instruction
84 vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment
85 / succs: block_7 /
86 }
87 //snip
88 }
89
90 which means that we can't assert like this:
91
92 assert(instr->intrinsic != nir_intrinsic_discard ||
93 nir_block_last_instr(instr->instr.block) == &instr->instr);
94
95
96 and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
97 */
98
99 return false;
100 }
101
102 static bool
lower_discard_if(nir_shader * shader)103 lower_discard_if(nir_shader *shader)
104 {
105 bool progress = false;
106
107 nir_foreach_function(function, shader) {
108 if (function->impl) {
109 nir_builder builder;
110 nir_builder_init(&builder, function->impl);
111 nir_foreach_block(block, function->impl) {
112 nir_foreach_instr_safe(instr, block) {
113 if (instr->type == nir_instr_type_intrinsic)
114 progress |= lower_discard_if_instr(
115 nir_instr_as_intrinsic(instr),
116 &builder);
117 }
118 }
119
120 nir_metadata_preserve(function->impl, nir_metadata_dominance);
121 }
122 }
123
124 return progress;
125 }
126
127 static const struct nir_shader_compiler_options nir_options = {
128 .lower_all_io_to_temps = true,
129 .lower_ffma16 = true,
130 .lower_ffma32 = true,
131 .lower_ffma64 = true,
132 .lower_fdph = true,
133 .lower_flrp32 = true,
134 .lower_fpow = true,
135 .lower_fsat = true,
136 .lower_extract_byte = true,
137 .lower_extract_word = true,
138 .lower_mul_high = true,
139 .lower_rotate = true,
140 .lower_uadd_carry = true,
141 };
142
143 const void *
zink_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)144 zink_get_compiler_options(struct pipe_screen *screen,
145 enum pipe_shader_ir ir,
146 enum pipe_shader_type shader)
147 {
148 assert(ir == PIPE_SHADER_IR_NIR);
149 return &nir_options;
150 }
151
152 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)153 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
154 {
155 if (zink_debug & ZINK_DEBUG_TGSI) {
156 fprintf(stderr, "TGSI shader:\n---8<---\n");
157 tgsi_dump_to_file(tokens, 0, stderr);
158 fprintf(stderr, "---8<---\n\n");
159 }
160
161 return tgsi_to_nir(tokens, screen, false);
162 }
163
164 static void
optimize_nir(struct nir_shader * s)165 optimize_nir(struct nir_shader *s)
166 {
167 bool progress;
168 do {
169 progress = false;
170 NIR_PASS_V(s, nir_lower_vars_to_ssa);
171 NIR_PASS(progress, s, nir_copy_prop);
172 NIR_PASS(progress, s, nir_opt_remove_phis);
173 NIR_PASS(progress, s, nir_opt_dce);
174 NIR_PASS(progress, s, nir_opt_dead_cf);
175 NIR_PASS(progress, s, nir_opt_cse);
176 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
177 NIR_PASS(progress, s, nir_opt_algebraic);
178 NIR_PASS(progress, s, nir_opt_constant_folding);
179 NIR_PASS(progress, s, nir_opt_undef);
180 NIR_PASS(progress, s, zink_nir_lower_b2b);
181 } while (progress);
182 }
183
184 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
185 static bool
check_psiz(struct nir_shader * s)186 check_psiz(struct nir_shader *s)
187 {
188 nir_foreach_shader_out_variable(var, s) {
189 if (var->data.location == VARYING_SLOT_PSIZ) {
190 /* genuine PSIZ outputs will have this set */
191 return !!var->data.explicit_location;
192 }
193 }
194 return false;
195 }
196
197 /* semi-copied from iris */
198 static void
update_so_info(struct zink_shader * sh,uint64_t outputs_written,bool have_psiz)199 update_so_info(struct zink_shader *sh,
200 uint64_t outputs_written, bool have_psiz)
201 {
202 uint8_t reverse_map[64] = {};
203 unsigned slot = 0;
204 while (outputs_written) {
205 int bit = u_bit_scan64(&outputs_written);
206 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
207 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
208 continue;
209 reverse_map[slot++] = bit;
210 }
211
212 for (unsigned i = 0; i < sh->streamout.so_info.num_outputs; i++) {
213 struct pipe_stream_output *output = &sh->streamout.so_info.output[i];
214 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
215 sh->streamout.so_info_slots[i] = reverse_map[output->register_index];
216 }
217 }
218
219 VkShaderModule
zink_shader_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned char * shader_slot_map,unsigned char * shader_slots_reserved)220 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
221 unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
222 {
223 VkShaderModule mod = VK_NULL_HANDLE;
224 void *streamout = NULL;
225 if (zs->streamout.so_info_slots && (zs->nir->info.stage != MESA_SHADER_VERTEX || !zs->has_geometry_shader))
226 streamout = &zs->streamout;
227 struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout, shader_slot_map, shader_slots_reserved);
228 assert(spirv);
229
230 if (zink_debug & ZINK_DEBUG_SPIRV) {
231 char buf[256];
232 static int i;
233 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
234 FILE *fp = fopen(buf, "wb");
235 if (fp) {
236 fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
237 fclose(fp);
238 fprintf(stderr, "wrote '%s'...\n", buf);
239 }
240 }
241
242 VkShaderModuleCreateInfo smci = {};
243 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
244 smci.codeSize = spirv->num_words * sizeof(uint32_t);
245 smci.pCode = spirv->words;
246
247 if (vkCreateShaderModule(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
248 mod = VK_NULL_HANDLE;
249
250 /* TODO: determine if there's any reason to cache spirv output? */
251 free(spirv->words);
252 free(spirv);
253 return mod;
254 }
255
256 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir,const struct pipe_stream_output_info * so_info)257 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
258 const struct pipe_stream_output_info *so_info)
259 {
260 struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
261 bool have_psiz = false;
262
263 ret->programs = _mesa_pointer_set_create(NULL);
264
265 /* only do uniforms -> ubo if we have uniforms, otherwise we're just
266 * screwing with the bindings for no reason
267 */
268 if (nir->num_uniforms)
269 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
270 NIR_PASS_V(nir, nir_lower_ubo_vec4);
271 NIR_PASS_V(nir, nir_lower_clip_halfz);
272 if (nir->info.stage < MESA_SHADER_FRAGMENT)
273 have_psiz = check_psiz(nir);
274 if (nir->info.stage == MESA_SHADER_GEOMETRY)
275 NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
276 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
277 optimize_nir(nir);
278 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
279 NIR_PASS_V(nir, lower_discard_if);
280 NIR_PASS_V(nir, nir_lower_fragcolor);
281 NIR_PASS_V(nir, nir_convert_from_ssa, true);
282
283 if (zink_debug & ZINK_DEBUG_NIR) {
284 fprintf(stderr, "NIR shader:\n---8<---\n");
285 nir_print_shader(nir, stderr);
286 fprintf(stderr, "---8<---\n");
287 }
288
289 ret->num_bindings = 0;
290 uint32_t cur_ubo = 0;
291 /* UBO buffers are zero-indexed, but buffer 0 is always the one created by nir_lower_uniforms_to_ubo,
292 * which means there is no buffer 0 if there are no uniforms
293 */
294 int ubo_index = !nir->num_uniforms;
295 /* need to set up var->data.binding for UBOs, which means we need to start at
296 * the "first" UBO, which is at the end of the list
297 */
298 foreach_list_typed_reverse(nir_variable, var, node, &nir->variables) {
299 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
300 nir_var_mem_ubo |
301 nir_var_mem_ssbo)) {
302 if (var->data.mode == nir_var_mem_ubo) {
303 /* ignore variables being accessed if they aren't the base of the UBO */
304 if (var->data.location)
305 continue;
306 var->data.binding = cur_ubo++;
307
308 int binding = zink_binding(nir->info.stage,
309 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
310 var->data.binding);
311 ret->bindings[ret->num_bindings].index = ubo_index++;
312 ret->bindings[ret->num_bindings].binding = binding;
313 ret->bindings[ret->num_bindings].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
314 ret->num_bindings++;
315 } else {
316 assert(var->data.mode == nir_var_uniform);
317 if (glsl_type_is_sampler(var->type)) {
318 VkDescriptorType vktype = zink_sampler_type(var->type);
319 int binding = zink_binding(nir->info.stage,
320 vktype,
321 var->data.binding);
322 ret->bindings[ret->num_bindings].index = var->data.binding;
323 ret->bindings[ret->num_bindings].binding = binding;
324 ret->bindings[ret->num_bindings].type = vktype;
325 ret->num_bindings++;
326 } else if (glsl_type_is_array(var->type)) {
327 /* need to unroll possible arrays of arrays before checking type
328 * in order to handle ARB_arrays_of_arrays extension
329 */
330 const struct glsl_type *type = glsl_without_array(var->type);
331 if (!glsl_type_is_sampler(type))
332 continue;
333 VkDescriptorType vktype = zink_sampler_type(type);
334
335 unsigned size = glsl_get_aoa_size(var->type);
336 for (int i = 0; i < size; ++i) {
337 int binding = zink_binding(nir->info.stage,
338 vktype,
339 var->data.binding + i);
340 ret->bindings[ret->num_bindings].index = var->data.binding + i;
341 ret->bindings[ret->num_bindings].binding = binding;
342 ret->bindings[ret->num_bindings].type = vktype;
343 ret->num_bindings++;
344 }
345 }
346 }
347 }
348 }
349
350 ret->nir = nir;
351 if (so_info) {
352 memcpy(&ret->streamout.so_info, so_info, sizeof(struct pipe_stream_output_info));
353 ret->streamout.so_info_slots = malloc(so_info->num_outputs * sizeof(unsigned int));
354 assert(ret->streamout.so_info_slots);
355 update_so_info(ret, nir->info.outputs_written, have_psiz);
356 }
357
358 return ret;
359 }
360
361 void
zink_shader_free(struct zink_context * ctx,struct zink_shader * shader)362 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
363 {
364 struct zink_screen *screen = zink_screen(ctx->base.screen);
365 set_foreach(shader->programs, entry) {
366 struct zink_gfx_program *prog = (void*)entry->key;
367 _mesa_hash_table_remove_key(ctx->program_cache, prog->shaders);
368 prog->shaders[pipe_shader_type_from_mesa(shader->nir->info.stage)] = NULL;
369 zink_gfx_program_reference(screen, &prog, NULL);
370 }
371 _mesa_set_destroy(shader->programs, NULL);
372 free(shader->streamout.so_info_slots);
373 ralloc_free(shader->nir);
374 FREE(shader);
375 }
376