1 /*
2 * Copyright © 2024 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "util/u_dynarray.h"
7
8 #include "nir_builder.h"
9
10 #include "vk_blend.h"
11 #include "vk_format.h"
12 #include "vk_graphics_state.h"
13 #include "vk_log.h"
14
15 #include "pan_shader.h"
16
17 #include "panvk_blend.h"
18 #include "panvk_cmd_buffer.h"
19 #include "panvk_device.h"
20 #include "panvk_shader.h"
21
22 struct panvk_blend_shader_key {
23 enum panvk_meta_object_key_type type;
24 struct pan_blend_shader_key info;
25 };
26
27 static bool
lower_load_blend_const(nir_builder * b,nir_instr * instr,UNUSED void * data)28 lower_load_blend_const(nir_builder *b, nir_instr *instr, UNUSED void *data)
29 {
30 if (instr->type != nir_instr_type_intrinsic)
31 return false;
32
33 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
34
35 if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
36 return false;
37
38 b->cursor = nir_before_instr(instr);
39
40 /* Blend constants are always passed through FAU words 0:3. */
41 nir_def *blend_consts = nir_load_push_constant(
42 b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0));
43
44 nir_def_rewrite_uses(&intr->def, blend_consts);
45 return true;
46 }
47
48 static VkResult
get_blend_shader(struct panvk_device * dev,const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt,uint64_t * shader_addr)49 get_blend_shader(struct panvk_device *dev,
50 const struct pan_blend_state *state,
51 nir_alu_type src0_type, nir_alu_type src1_type,
52 unsigned rt, uint64_t *shader_addr)
53 {
54 struct panvk_physical_device *pdev =
55 to_panvk_physical_device(dev->vk.physical);
56 struct panvk_blend_shader_key key = {
57 .type = PANVK_META_OBJECT_KEY_BLEND_SHADER,
58 .info = {
59 .format = state->rts[rt].format,
60 .src0_type = src0_type,
61 .src1_type = src1_type,
62 .rt = rt,
63 .has_constants =
64 pan_blend_constant_mask(state->rts[rt].equation) != 0,
65 .logicop_enable = state->logicop_enable,
66 .logicop_func = state->logicop_func,
67 .nr_samples = state->rts[rt].nr_samples,
68 .equation = state->rts[rt].equation,
69 .alpha_to_one = state->alpha_to_one,
70 },
71 };
72 struct panvk_internal_shader *shader;
73
74 assert(state->logicop_enable || state->alpha_to_one ||
75 !pan_blend_is_opaque(state->rts[rt].equation));
76 assert(state->rts[rt].equation.color_mask != 0);
77
78 VkShaderEXT shader_handle = (VkShaderEXT)vk_meta_lookup_object(
79 &dev->meta, VK_OBJECT_TYPE_SHADER_EXT, &key, sizeof(key));
80 if (shader_handle != VK_NULL_HANDLE)
81 goto out;
82
83 nir_shader *nir =
84 GENX(pan_blend_create_shader)(state, src0_type, src1_type, rt);
85
86 NIR_PASS(_, nir, nir_shader_instructions_pass, lower_load_blend_const,
87 nir_metadata_control_flow, NULL);
88
89 /* Compile the NIR shader */
90 struct panfrost_compile_inputs inputs = {
91 .gpu_id = pdev->kmod.props.gpu_prod_id,
92 .no_ubo_to_push = true,
93 .is_blend = true,
94 .blend = {
95 .nr_samples = key.info.nr_samples,
96 .bifrost_blend_desc =
97 GENX(pan_blend_get_internal_desc)(key.info.format, key.info.rt, 0,
98 false),
99 },
100 };
101
102 pan_shader_preprocess(nir, inputs.gpu_id);
103
104 enum pipe_format rt_formats[8] = {0};
105 rt_formats[rt] = key.info.format;
106 NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
107
108 VkResult result =
109 panvk_per_arch(create_internal_shader)(dev, nir, &inputs, &shader);
110
111 ralloc_free(nir);
112
113 if (result != VK_SUCCESS)
114 return result;
115
116 shader_handle = (VkShaderEXT)vk_meta_cache_object(
117 &dev->vk, &dev->meta, &key, sizeof(key), VK_OBJECT_TYPE_SHADER_EXT,
118 (uint64_t)panvk_internal_shader_to_handle(shader));
119
120 out:
121 shader = panvk_internal_shader_from_handle(shader_handle);
122 *shader_addr = panvk_priv_mem_dev_addr(shader->code_mem);
123 return VK_SUCCESS;
124 }
125
126 static void
emit_blend_desc(const struct pan_shader_info * fs_info,uint64_t fs_code,const struct pan_blend_state * state,unsigned rt_idx,uint64_t blend_shader,uint16_t constant,struct mali_blend_packed * bd)127 emit_blend_desc(const struct pan_shader_info *fs_info, uint64_t fs_code,
128 const struct pan_blend_state *state, unsigned rt_idx,
129 uint64_t blend_shader, uint16_t constant,
130 struct mali_blend_packed *bd)
131 {
132 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
133
134 pan_pack(bd, BLEND, cfg) {
135 if (!state->rt_count || !rt->equation.color_mask) {
136 cfg.enable = false;
137 cfg.internal.mode = MALI_BLEND_MODE_OFF;
138 continue;
139 }
140
141 cfg.srgb = util_format_is_srgb(rt->format);
142 cfg.load_destination = pan_blend_reads_dest(rt->equation);
143 cfg.round_to_fb_precision = true;
144 cfg.constant = constant;
145
146 if (blend_shader) {
147 /* Blend and fragment shaders must be in the same 4G region. */
148 assert((blend_shader >> 32) == (fs_code >> 32));
149 /* Blend shader must be 16-byte aligned. */
150 assert((blend_shader & 15) == 0);
151 /* Fragment shader return address must be 8-byte aligned. */
152 assert((fs_code & 7) == 0);
153
154 cfg.internal.mode = MALI_BLEND_MODE_SHADER;
155 cfg.internal.shader.pc = (uint32_t)blend_shader;
156
157 #if PAN_ARCH <= 7
158 uint32_t ret_offset = fs_info->bifrost.blend[rt_idx].return_offset;
159
160 /* If ret_offset is zero, we assume the BLEND is a terminal
161 * instruction and set return_value to zero, to let the
162 * blend shader jump to address zero, which terminates the
163 * thread.
164 */
165 cfg.internal.shader.return_value =
166 ret_offset ? fs_code + ret_offset : 0;
167 #endif
168 } else {
169 bool opaque = pan_blend_is_opaque(rt->equation);
170
171 cfg.internal.mode =
172 opaque ? MALI_BLEND_MODE_OPAQUE : MALI_BLEND_MODE_FIXED_FUNCTION;
173
174 pan_blend_to_fixed_function_equation(rt->equation, &cfg.equation);
175
176 /* If we want the conversion to work properly, num_comps must be set to
177 * 4.
178 */
179 cfg.internal.fixed_function.num_comps = 4;
180 cfg.internal.fixed_function.conversion.memory_format =
181 GENX(panfrost_dithered_format_from_pipe_format)(rt->format, false);
182
183 #if PAN_ARCH >= 7
184 if (cfg.internal.mode == MALI_BLEND_MODE_FIXED_FUNCTION &&
185 (cfg.internal.fixed_function.conversion.memory_format & 0xff) ==
186 MALI_RGB_COMPONENT_ORDER_RGB1) {
187 /* fixed function does not like RGB1 as the component order */
188 /* force this field to be the RGBA. */
189 cfg.internal.fixed_function.conversion.memory_format &= ~0xff;
190 cfg.internal.fixed_function.conversion.memory_format |=
191 MALI_RGB_COMPONENT_ORDER_RGBA;
192 }
193 #endif
194
195 cfg.internal.fixed_function.rt = rt_idx;
196
197 #if PAN_ARCH <= 7
198 if (fs_info->fs.untyped_color_outputs) {
199 nir_alu_type type = fs_info->bifrost.blend[rt_idx].type;
200
201 cfg.internal.fixed_function.conversion.register_format =
202 GENX(pan_fixup_blend_type)(type, rt->format);
203 } else {
204 cfg.internal.fixed_function.conversion.register_format =
205 fs_info->bifrost.blend[rt_idx].format;
206 }
207
208 if (!opaque) {
209 cfg.internal.fixed_function.alpha_zero_nop =
210 pan_blend_alpha_zero_nop(rt->equation);
211 cfg.internal.fixed_function.alpha_one_store =
212 pan_blend_alpha_one_store(rt->equation);
213 }
214 #endif
215 }
216 }
217 }
218
219 static uint16_t
get_ff_blend_constant(const struct pan_blend_state * state,unsigned rt_idx,unsigned const_idx)220 get_ff_blend_constant(const struct pan_blend_state *state, unsigned rt_idx,
221 unsigned const_idx)
222 {
223 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
224
225 /* On Bifrost, the blend constant is expressed with a UNORM of the
226 * size of the target format. The value is then shifted such that
227 * used bits are in the MSB.
228 */
229 const struct util_format_description *format_desc =
230 util_format_description(rt->format);
231 unsigned chan_size = 0;
232 for (unsigned c = 0; c < format_desc->nr_channels; c++)
233 chan_size = MAX2(format_desc->channel[c].size, chan_size);
234 float factor = ((1 << chan_size) - 1) << (16 - chan_size);
235
236 return (uint16_t)(state->constants[const_idx] * factor);
237 }
238
239 static bool
blend_needs_shader(const struct pan_blend_state * state,unsigned rt_idx,unsigned * ff_blend_constant)240 blend_needs_shader(const struct pan_blend_state *state, unsigned rt_idx,
241 unsigned *ff_blend_constant)
242 {
243 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
244
245 /* LogicOp requires a blend shader, unless it's a NOOP, in which case we just
246 * disable blending.
247 */
248 if (state->logicop_enable)
249 return state->logicop_func != PIPE_LOGICOP_NOOP;
250
251 /* alpha-to-one always requires a blend shader */
252 if (state->alpha_to_one)
253 return true;
254
255 /* If the output is opaque, we don't need a blend shader, no matter the
256 * format.
257 */
258 if (pan_blend_is_opaque(rt->equation))
259 return false;
260
261 /* Not all formats can be blended by fixed-function hardware */
262 if (!GENX(panfrost_blendable_format_from_pipe_format)(rt->format)->internal)
263 return true;
264
265 unsigned constant_mask = pan_blend_constant_mask(rt->equation);
266
267 /* v6 doesn't support blend constants in FF blend equations. */
268 if (constant_mask && PAN_ARCH == 6)
269 return true;
270
271 if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
272 return true;
273
274 /* v7+ only uses the constant from RT 0. If we're not RT0, all previous
275 * RTs using FF with a blend constant need to have the same constant,
276 * otherwise we need a blend shader.
277 */
278 unsigned blend_const = ~0;
279 if (constant_mask) {
280 blend_const =
281 get_ff_blend_constant(state, rt_idx, ffs(constant_mask) - 1);
282
283 if (*ff_blend_constant != ~0 && blend_const != *ff_blend_constant)
284 return true;
285 }
286
287 bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
288 if (!pan_blend_can_fixed_function(rt->equation, supports_2src))
289 return true;
290
291 /* Update the fixed function blend constant, if we use it. */
292 if (blend_const != ~0)
293 *ff_blend_constant = blend_const;
294
295 return false;
296 }
297
298 VkResult
panvk_per_arch(blend_emit_descs)299 panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf,
300 struct mali_blend_packed *bds)
301 {
302 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
303 const struct vk_dynamic_graphics_state *dyns =
304 &cmdbuf->vk.dynamic_graphics_state;
305 const struct vk_color_blend_state *cb = &dyns->cb;
306 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
307 const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
308 uint64_t fs_code = panvk_shader_get_dev_addr(fs);
309 const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render;
310 const VkFormat *color_attachment_formats = render->color_attachments.fmts;
311 const uint8_t *color_attachment_samples = render->color_attachments.samples;
312 struct panvk_blend_info *blend_info = &cmdbuf->state.gfx.cb.info;
313 struct pan_blend_state bs = {
314 .alpha_to_one = dyns->ms.alpha_to_one_enable,
315 .logicop_enable = cb->logic_op_enable,
316 .logicop_func = vk_logic_op_to_pipe(cb->logic_op),
317 .rt_count = cb->attachment_count,
318 .constants =
319 {
320 cb->blend_constants[0],
321 cb->blend_constants[1],
322 cb->blend_constants[2],
323 cb->blend_constants[3],
324 },
325 };
326 uint64_t blend_shaders[8] = {};
327 /* All bits set to one encodes unused fixed-function blend constant. */
328 unsigned ff_blend_constant = ~0;
329
330 memset(blend_info, 0, sizeof(*blend_info));
331 for (uint8_t i = 0; i < cb->attachment_count; i++) {
332 struct pan_blend_rt_state *rt = &bs.rts[i];
333
334 if (!(cb->color_write_enables & BITFIELD_BIT(i))) {
335 rt->equation.color_mask = 0;
336 continue;
337 }
338
339 if (bs.logicop_enable && bs.logicop_func == PIPE_LOGICOP_NOOP) {
340 rt->equation.color_mask = 0;
341 continue;
342 }
343
344 if (color_attachment_formats[i] == VK_FORMAT_UNDEFINED) {
345 rt->equation.color_mask = 0;
346 continue;
347 }
348
349 if (!cb->attachments[i].write_mask) {
350 rt->equation.color_mask = 0;
351 continue;
352 }
353
354 rt->format = vk_format_to_pipe_format(color_attachment_formats[i]);
355
356 rt->nr_samples = color_attachment_samples[i];
357 rt->equation.blend_enable = cb->attachments[i].blend_enable;
358 rt->equation.color_mask = cb->attachments[i].write_mask;
359 rt->equation.rgb_func =
360 vk_blend_op_to_pipe(cb->attachments[i].color_blend_op);
361 rt->equation.rgb_src_factor =
362 vk_blend_factor_to_pipe(cb->attachments[i].src_color_blend_factor);
363 rt->equation.rgb_dst_factor =
364 vk_blend_factor_to_pipe(cb->attachments[i].dst_color_blend_factor);
365 rt->equation.alpha_func =
366 vk_blend_op_to_pipe(cb->attachments[i].alpha_blend_op);
367 rt->equation.alpha_src_factor =
368 vk_blend_factor_to_pipe(cb->attachments[i].src_alpha_blend_factor);
369 rt->equation.alpha_dst_factor =
370 vk_blend_factor_to_pipe(cb->attachments[i].dst_alpha_blend_factor);
371
372 bool dest_has_alpha = util_format_has_alpha(rt->format);
373 if (!dest_has_alpha) {
374 rt->equation.rgb_src_factor =
375 util_blend_dst_alpha_to_one(rt->equation.rgb_src_factor);
376 rt->equation.rgb_dst_factor =
377 util_blend_dst_alpha_to_one(rt->equation.rgb_dst_factor);
378
379 rt->equation.alpha_src_factor =
380 util_blend_dst_alpha_to_one(rt->equation.alpha_src_factor);
381 rt->equation.alpha_dst_factor =
382 util_blend_dst_alpha_to_one(rt->equation.alpha_dst_factor);
383 }
384
385 blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation);
386
387 if (blend_needs_shader(&bs, i, &ff_blend_constant)) {
388 nir_alu_type src0_type = fs_info->bifrost.blend[i].type;
389 nir_alu_type src1_type = fs_info->bifrost.blend_src1_type;
390
391 VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type, i,
392 &blend_shaders[i]);
393 if (result != VK_SUCCESS)
394 return result;
395
396 blend_info->shader_loads_blend_const |=
397 pan_blend_constant_mask(rt->equation) != 0;
398 blend_info->needs_shader = true;
399 }
400 }
401
402 /* Set the blend constant to zero if it's not used by any of the blend ops. */
403 if (ff_blend_constant == ~0)
404 ff_blend_constant = 0;
405
406 /* Now that we've collected all the information, we can emit. */
407 for (uint8_t i = 0; i < MAX2(cb->attachment_count, 1); i++) {
408 emit_blend_desc(fs_info, fs_code, &bs, i, blend_shaders[i],
409 ff_blend_constant, &bds[i]);
410 }
411
412 if (blend_info->shader_loads_blend_const)
413 gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
414
415 return VK_SUCCESS;
416 }
417