1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/format/u_format.h"
25 #include "util/half_float.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/compiler/v3d_compiler.h"
31
32 static uint8_t
v3d_factor(enum pipe_blendfactor factor,bool dst_alpha_one)33 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
34 {
35 /* We may get a bad blendfactor when blending is disabled. */
36 if (factor == 0)
37 return V3D_BLEND_FACTOR_ZERO;
38
39 switch (factor) {
40 case PIPE_BLENDFACTOR_ZERO:
41 return V3D_BLEND_FACTOR_ZERO;
42 case PIPE_BLENDFACTOR_ONE:
43 return V3D_BLEND_FACTOR_ONE;
44 case PIPE_BLENDFACTOR_SRC_COLOR:
45 return V3D_BLEND_FACTOR_SRC_COLOR;
46 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
47 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
48 case PIPE_BLENDFACTOR_DST_COLOR:
49 return V3D_BLEND_FACTOR_DST_COLOR;
50 case PIPE_BLENDFACTOR_INV_DST_COLOR:
51 return V3D_BLEND_FACTOR_INV_DST_COLOR;
52 case PIPE_BLENDFACTOR_SRC_ALPHA:
53 return V3D_BLEND_FACTOR_SRC_ALPHA;
54 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
55 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
56 case PIPE_BLENDFACTOR_DST_ALPHA:
57 return (dst_alpha_one ?
58 V3D_BLEND_FACTOR_ONE :
59 V3D_BLEND_FACTOR_DST_ALPHA);
60 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
61 return (dst_alpha_one ?
62 V3D_BLEND_FACTOR_ZERO :
63 V3D_BLEND_FACTOR_INV_DST_ALPHA);
64 case PIPE_BLENDFACTOR_CONST_COLOR:
65 return V3D_BLEND_FACTOR_CONST_COLOR;
66 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
67 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
68 case PIPE_BLENDFACTOR_CONST_ALPHA:
69 return V3D_BLEND_FACTOR_CONST_ALPHA;
70 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
71 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
72 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
73 return (dst_alpha_one ?
74 V3D_BLEND_FACTOR_ZERO :
75 V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
76 default:
77 unreachable("Bad blend factor");
78 }
79 }
80
81 #if V3D_VERSION < 40
82 static inline uint16_t
swizzled_border_color(const struct v3d_device_info * devinfo,struct pipe_sampler_state * sampler,struct v3d_sampler_view * sview,int chan)83 swizzled_border_color(const struct v3d_device_info *devinfo,
84 struct pipe_sampler_state *sampler,
85 struct v3d_sampler_view *sview,
86 int chan)
87 {
88 const struct util_format_description *desc =
89 util_format_description(sview->base.format);
90 uint8_t swiz = chan;
91
92 /* If we're doing swizzling in the sampler, then only rearrange the
93 * border color for the mismatch between the V3D texture format and
94 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
95 * the sampler's swizzle.
96 *
97 * For swizzling in the shader, we don't do any pre-swizzling of the
98 * border color.
99 */
100 if (v3d_get_tex_return_size(devinfo, sview->base.format,
101 sampler->compare_mode) != 32)
102 swiz = desc->swizzle[swiz];
103
104 switch (swiz) {
105 case PIPE_SWIZZLE_0:
106 return _mesa_float_to_half(0.0);
107 case PIPE_SWIZZLE_1:
108 return _mesa_float_to_half(1.0);
109 default:
110 return _mesa_float_to_half(sampler->border_color.f[swiz]);
111 }
112 }
113
114 static void
emit_one_texture(struct v3d_context * v3d,struct v3d_texture_stateobj * stage_tex,int i)115 emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
116 int i)
117 {
118 struct v3d_job *job = v3d->job;
119 struct pipe_sampler_state *psampler = stage_tex->samplers[i];
120 struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
121 struct pipe_sampler_view *psview = stage_tex->textures[i];
122 struct v3d_sampler_view *sview = v3d_sampler_view(psview);
123 struct pipe_resource *prsc = psview->texture;
124 struct v3d_resource *rsc = v3d_resource(prsc);
125 const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
126
127 stage_tex->texture_state[i].offset =
128 v3d_cl_ensure_space(&job->indirect,
129 cl_packet_length(TEXTURE_SHADER_STATE),
130 32);
131 v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
132 job->indirect.bo);
133
134 uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
135 psampler->compare_mode);
136
137 struct V3D33_TEXTURE_SHADER_STATE unpacked = {
138 /* XXX */
139 .border_color_red = swizzled_border_color(devinfo, psampler,
140 sview, 0),
141 .border_color_green = swizzled_border_color(devinfo, psampler,
142 sview, 1),
143 .border_color_blue = swizzled_border_color(devinfo, psampler,
144 sview, 2),
145 .border_color_alpha = swizzled_border_color(devinfo, psampler,
146 sview, 3),
147
148 /* In the normal texturing path, the LOD gets clamped between
149 * min/max, and the base_level field (set in the sampler view
150 * from first_level) only decides where the min/mag switch
151 * happens, so we need to use the LOD clamps to keep us
152 * between min and max.
153 *
154 * For txf, the LOD clamp is still used, despite GL not
155 * wanting that. We will need to have a separate
156 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
157 * support txf properly.
158 */
159 .min_level_of_detail = MIN2(psview->u.tex.first_level +
160 MAX2(psampler->min_lod, 0),
161 psview->u.tex.last_level),
162 .max_level_of_detail = MIN2(psview->u.tex.first_level +
163 MAX2(psampler->max_lod,
164 psampler->min_lod),
165 psview->u.tex.last_level),
166
167 .texture_base_pointer = cl_address(rsc->bo,
168 rsc->slices[0].offset),
169
170 .output_32_bit = return_size == 32,
171 };
172
173 /* Set up the sampler swizzle if we're doing 16-bit sampling. For
174 * 32-bit, we leave swizzling up to the shader compiler.
175 *
176 * Note: Contrary to the docs, the swizzle still applies even if the
177 * return size is 32. It's just that you probably want to swizzle in
178 * the shader, because you need the Y/Z/W channels to be defined.
179 */
180 if (return_size == 32) {
181 unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X);
182 unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y);
183 unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z);
184 unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W);
185 } else {
186 unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]);
187 unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]);
188 unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]);
189 unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]);
190 }
191
192 int min_img_filter = psampler->min_img_filter;
193 int min_mip_filter = psampler->min_mip_filter;
194 int mag_img_filter = psampler->mag_img_filter;
195
196 if (return_size == 32) {
197 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
198 min_img_filter = PIPE_TEX_FILTER_NEAREST;
199 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
200 }
201
202 bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
203 switch (min_mip_filter) {
204 case PIPE_TEX_MIPFILTER_NONE:
205 unpacked.filter += min_nearest ? 2 : 0;
206 break;
207 case PIPE_TEX_MIPFILTER_NEAREST:
208 unpacked.filter += min_nearest ? 4 : 8;
209 break;
210 case PIPE_TEX_MIPFILTER_LINEAR:
211 unpacked.filter += min_nearest ? 4 : 8;
212 unpacked.filter += 2;
213 break;
214 }
215
216 if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
217 unpacked.filter++;
218
219 if (psampler->max_anisotropy > 8)
220 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
221 else if (psampler->max_anisotropy > 4)
222 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
223 else if (psampler->max_anisotropy > 2)
224 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
225 else if (psampler->max_anisotropy)
226 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
227
228 uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
229 cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
230
231 for (int i = 0; i < ARRAY_SIZE(packed); i++)
232 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
233
234 /* TMU indirect structs need to be 32b aligned. */
235 v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
236 cl_emit_prepacked(&job->indirect, &packed);
237 }
238
239 static void
emit_textures(struct v3d_context * v3d,struct v3d_texture_stateobj * stage_tex)240 emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
241 {
242 for (int i = 0; i < stage_tex->num_textures; i++) {
243 if (stage_tex->textures[i])
244 emit_one_texture(v3d, stage_tex, i);
245 }
246 }
247 #endif /* V3D_VERSION < 40 */
248
249 static uint32_t
translate_colormask(struct v3d_context * v3d,uint32_t colormask,int rt)250 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
251 {
252 if (v3d->swap_color_rb & (1 << rt)) {
253 colormask = ((colormask & (2 | 8)) |
254 ((colormask & 1) << 2) |
255 ((colormask & 4) >> 2));
256 }
257
258 return (~colormask) & 0xf;
259 }
260
261 static void
emit_rt_blend(struct v3d_context * v3d,struct v3d_job * job,struct pipe_blend_state * blend,int rt,uint8_t rt_mask,bool blend_dst_alpha_one)262 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
263 struct pipe_blend_state *blend, int rt, uint8_t rt_mask,
264 bool blend_dst_alpha_one)
265 {
266 struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
267
268 #if V3D_VERSION >= 40
269 /* We don't need to emit blend state for disabled RTs. */
270 if (!rtblend->blend_enable)
271 return;
272 #endif
273
274 cl_emit(&job->bcl, BLEND_CFG, config) {
275 #if V3D_VERSION >= 40
276 config.render_target_mask = rt_mask;
277 #else
278 assert(rt == 0);
279 #endif
280
281 config.color_blend_mode = rtblend->rgb_func;
282 config.color_blend_dst_factor =
283 v3d_factor(rtblend->rgb_dst_factor,
284 blend_dst_alpha_one);
285 config.color_blend_src_factor =
286 v3d_factor(rtblend->rgb_src_factor,
287 blend_dst_alpha_one);
288
289 config.alpha_blend_mode = rtblend->alpha_func;
290 config.alpha_blend_dst_factor =
291 v3d_factor(rtblend->alpha_dst_factor,
292 blend_dst_alpha_one);
293 config.alpha_blend_src_factor =
294 v3d_factor(rtblend->alpha_src_factor,
295 blend_dst_alpha_one);
296 }
297 }
298
299 static void
emit_flat_shade_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)300 emit_flat_shade_flags(struct v3d_job *job,
301 int varying_offset,
302 uint32_t varyings,
303 enum V3DX(Varying_Flags_Action) lower,
304 enum V3DX(Varying_Flags_Action) higher)
305 {
306 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
307 flags.varying_offset_v0 = varying_offset;
308 flags.flat_shade_flags_for_varyings_v024 = varyings;
309 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
310 lower;
311 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
312 higher;
313 }
314 }
315
316 #if V3D_VERSION >= 40
317 static void
emit_noperspective_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)318 emit_noperspective_flags(struct v3d_job *job,
319 int varying_offset,
320 uint32_t varyings,
321 enum V3DX(Varying_Flags_Action) lower,
322 enum V3DX(Varying_Flags_Action) higher)
323 {
324 cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
325 flags.varying_offset_v0 = varying_offset;
326 flags.non_perspective_flags_for_varyings_v024 = varyings;
327 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
328 lower;
329 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
330 higher;
331 }
332 }
333
334 static void
emit_centroid_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)335 emit_centroid_flags(struct v3d_job *job,
336 int varying_offset,
337 uint32_t varyings,
338 enum V3DX(Varying_Flags_Action) lower,
339 enum V3DX(Varying_Flags_Action) higher)
340 {
341 cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
342 flags.varying_offset_v0 = varying_offset;
343 flags.centroid_flags_for_varyings_v024 = varyings;
344 flags.action_for_centroid_flags_of_lower_numbered_varyings =
345 lower;
346 flags.action_for_centroid_flags_of_higher_numbered_varyings =
347 higher;
348 }
349 }
350 #endif /* V3D_VERSION >= 40 */
351
352 static bool
emit_varying_flags(struct v3d_job * job,uint32_t * flags,void (* flag_emit_callback)(struct v3d_job * job,int varying_offset,uint32_t flags,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher))353 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
354 void (*flag_emit_callback)(struct v3d_job *job,
355 int varying_offset,
356 uint32_t flags,
357 enum V3DX(Varying_Flags_Action) lower,
358 enum V3DX(Varying_Flags_Action) higher))
359 {
360 struct v3d_context *v3d = job->v3d;
361 bool emitted_any = false;
362
363 for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
364 if (!flags[i])
365 continue;
366
367 if (emitted_any) {
368 flag_emit_callback(job, i, flags[i],
369 V3D_VARYING_FLAGS_ACTION_UNCHANGED,
370 V3D_VARYING_FLAGS_ACTION_UNCHANGED);
371 } else if (i == 0) {
372 flag_emit_callback(job, i, flags[i],
373 V3D_VARYING_FLAGS_ACTION_UNCHANGED,
374 V3D_VARYING_FLAGS_ACTION_ZEROED);
375 } else {
376 flag_emit_callback(job, i, flags[i],
377 V3D_VARYING_FLAGS_ACTION_ZEROED,
378 V3D_VARYING_FLAGS_ACTION_ZEROED);
379 }
380 emitted_any = true;
381 }
382
383 return emitted_any;
384 }
385
386 static inline struct v3d_uncompiled_shader *
get_tf_shader(struct v3d_context * v3d)387 get_tf_shader(struct v3d_context *v3d)
388 {
389 if (v3d->prog.bind_gs)
390 return v3d->prog.bind_gs;
391 else
392 return v3d->prog.bind_vs;
393 }
394
395 void
v3dX(emit_state)396 v3dX(emit_state)(struct pipe_context *pctx)
397 {
398 struct v3d_context *v3d = v3d_context(pctx);
399 struct v3d_job *job = v3d->job;
400 bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
401
402 if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |
403 V3D_DIRTY_RASTERIZER)) {
404 float *vpscale = v3d->viewport.scale;
405 float *vptranslate = v3d->viewport.translate;
406 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
407 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
408 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
409 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
410
411 /* Clip to the scissor if it's enabled, but still clip to the
412 * drawable regardless since that controls where the binner
413 * tries to put things.
414 *
415 * Additionally, always clip the rendering to the viewport,
416 * since the hardware does guardband clipping, meaning
417 * primitives would rasterize outside of the view volume.
418 */
419 uint32_t minx, miny, maxx, maxy;
420 if (!v3d->rasterizer->base.scissor) {
421 minx = MAX2(vp_minx, 0);
422 miny = MAX2(vp_miny, 0);
423 maxx = MIN2(vp_maxx, job->draw_width);
424 maxy = MIN2(vp_maxy, job->draw_height);
425 } else {
426 minx = MAX2(vp_minx, v3d->scissor.minx);
427 miny = MAX2(vp_miny, v3d->scissor.miny);
428 maxx = MIN2(vp_maxx, v3d->scissor.maxx);
429 maxy = MIN2(vp_maxy, v3d->scissor.maxy);
430 }
431
432 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
433 clip.clip_window_left_pixel_coordinate = minx;
434 clip.clip_window_bottom_pixel_coordinate = miny;
435 if (maxx > minx && maxy > miny) {
436 clip.clip_window_width_in_pixels = maxx - minx;
437 clip.clip_window_height_in_pixels = maxy - miny;
438 } else if (V3D_VERSION < 41) {
439 /* The HW won't entirely clip out when scissor
440 * w/h is 0. Just treat it the same as
441 * rasterizer discard.
442 */
443 rasterizer_discard = true;
444 clip.clip_window_width_in_pixels = 1;
445 clip.clip_window_height_in_pixels = 1;
446 }
447 }
448
449 job->draw_min_x = MIN2(job->draw_min_x, minx);
450 job->draw_min_y = MIN2(job->draw_min_y, miny);
451 job->draw_max_x = MAX2(job->draw_max_x, maxx);
452 job->draw_max_y = MAX2(job->draw_max_y, maxy);
453
454 if (!v3d->rasterizer->base.scissor) {
455 job->scissor.disabled = true;
456 } else if (!job->scissor.disabled &&
457 (v3d->dirty & V3D_DIRTY_SCISSOR)) {
458 if (job->scissor.count < MAX_JOB_SCISSORS) {
459 job->scissor.rects[job->scissor.count].min_x =
460 v3d->scissor.minx;
461 job->scissor.rects[job->scissor.count].min_y =
462 v3d->scissor.miny;
463 job->scissor.rects[job->scissor.count].max_x =
464 v3d->scissor.maxx - 1;
465 job->scissor.rects[job->scissor.count].max_y =
466 v3d->scissor.maxy - 1;
467 job->scissor.count++;
468 } else {
469 job->scissor.disabled = true;
470 perf_debug("Too many scissor rects.");
471 }
472 }
473 }
474
475 if (v3d->dirty & (V3D_DIRTY_RASTERIZER |
476 V3D_DIRTY_ZSA |
477 V3D_DIRTY_BLEND |
478 V3D_DIRTY_COMPILED_FS)) {
479 cl_emit(&job->bcl, CFG_BITS, config) {
480 config.enable_forward_facing_primitive =
481 !rasterizer_discard &&
482 !(v3d->rasterizer->base.cull_face &
483 PIPE_FACE_FRONT);
484 config.enable_reverse_facing_primitive =
485 !rasterizer_discard &&
486 !(v3d->rasterizer->base.cull_face &
487 PIPE_FACE_BACK);
488 /* This seems backwards, but it's what gets the
489 * clipflat test to pass.
490 */
491 config.clockwise_primitives =
492 v3d->rasterizer->base.front_ccw;
493
494 config.enable_depth_offset =
495 v3d->rasterizer->base.offset_tri;
496
497 /* V3D follows GL behavior where the sample mask only
498 * applies when MSAA is enabled. Gallium has sample
499 * mask apply anyway, and the MSAA blit shaders will
500 * set sample mask without explicitly setting
501 * rasterizer oversample. Just force it on here,
502 * since the blit shaders are the only way to have
503 * !multisample && samplemask != 0xf.
504 */
505 config.rasterizer_oversample_mode =
506 v3d->rasterizer->base.multisample ||
507 v3d->sample_mask != 0xf;
508
509 config.direct3d_provoking_vertex =
510 v3d->rasterizer->base.flatshade_first;
511
512 config.blend_enable = v3d->blend->blend_enables;
513
514 /* Note: EZ state may update based on the compiled FS,
515 * along with ZSA
516 */
517 config.early_z_updates_enable =
518 (job->ez_state != V3D_EZ_DISABLED);
519 if (v3d->zsa->base.depth_enabled) {
520 config.z_updates_enable =
521 v3d->zsa->base.depth_writemask;
522 config.early_z_enable =
523 config.early_z_updates_enable;
524 config.depth_test_function =
525 v3d->zsa->base.depth_func;
526 } else {
527 config.depth_test_function = PIPE_FUNC_ALWAYS;
528 }
529
530 config.stencil_enable =
531 v3d->zsa->base.stencil[0].enabled;
532
533 /* Use nicer line caps when line smoothing is
534 * enabled
535 */
536 config.line_rasterization =
537 v3d_line_smoothing_enabled(v3d) ?
538 V3D_LINE_RASTERIZATION_PERP_END_CAPS :
539 V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
540 }
541
542 }
543
544 if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
545 v3d->rasterizer->base.offset_tri) {
546 if (job->zsbuf &&
547 job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
548 cl_emit_prepacked_sized(&job->bcl,
549 v3d->rasterizer->depth_offset_z16,
550 cl_packet_length(DEPTH_OFFSET));
551 } else {
552 cl_emit_prepacked_sized(&job->bcl,
553 v3d->rasterizer->depth_offset,
554 cl_packet_length(DEPTH_OFFSET));
555 }
556 }
557
558 if (v3d->dirty & V3D_DIRTY_RASTERIZER) {
559 cl_emit(&job->bcl, POINT_SIZE, point_size) {
560 point_size.point_size = v3d->rasterizer->point_size;
561 }
562
563 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
564 line_width.line_width = v3d_get_real_line_width(v3d);
565 }
566 }
567
568 if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
569 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
570 clip.viewport_half_width_in_1_256th_of_pixel =
571 v3d->viewport.scale[0] * 256.0f;
572 clip.viewport_half_height_in_1_256th_of_pixel =
573 v3d->viewport.scale[1] * 256.0f;
574 }
575
576 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
577 clip.viewport_z_offset_zc_to_zs =
578 v3d->viewport.translate[2];
579 clip.viewport_z_scale_zc_to_zs =
580 v3d->viewport.scale[2];
581 }
582 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
583 float z1 = (v3d->viewport.translate[2] -
584 v3d->viewport.scale[2]);
585 float z2 = (v3d->viewport.translate[2] +
586 v3d->viewport.scale[2]);
587 clip.minimum_zw = MIN2(z1, z2);
588 clip.maximum_zw = MAX2(z1, z2);
589 }
590
591 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
592 vp.viewport_centre_x_coordinate =
593 v3d->viewport.translate[0];
594 vp.viewport_centre_y_coordinate =
595 v3d->viewport.translate[1];
596 }
597 }
598
599 if (v3d->dirty & V3D_DIRTY_BLEND) {
600 struct v3d_blend_state *blend = v3d->blend;
601
602 if (blend->blend_enables) {
603 #if V3D_VERSION >= 40
604 cl_emit(&job->bcl, BLEND_ENABLES, enables) {
605 enables.mask = blend->blend_enables;
606 }
607 #endif
608
609 if (blend->base.independent_blend_enable) {
610 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
611 emit_rt_blend(v3d, job, &blend->base, i,
612 (1 << i),
613 v3d->blend_dst_alpha_one & (1 << i));
614 } else if (v3d->blend_dst_alpha_one &&
615 util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) {
616 /* Even if we don't have independent per-RT
617 * blending, we may have a combination of RT
618 * formats were some RTs have an alpha channel
619 * and others don't. Since this affects how
620 * blending is performed, we also need to emit
621 * independent blend configurations in this
622 * case: one for RTs with alpha and one for
623 * RTs without.
624 */
625 emit_rt_blend(v3d, job, &blend->base, 0,
626 ((1 << V3D_MAX_DRAW_BUFFERS) - 1) &
627 v3d->blend_dst_alpha_one,
628 true);
629 emit_rt_blend(v3d, job, &blend->base, 0,
630 ((1 << V3D_MAX_DRAW_BUFFERS) - 1) &
631 ~v3d->blend_dst_alpha_one,
632 false);
633 } else {
634 emit_rt_blend(v3d, job, &blend->base, 0,
635 (1 << V3D_MAX_DRAW_BUFFERS) - 1,
636 v3d->blend_dst_alpha_one);
637 }
638 }
639 }
640
641 if (v3d->dirty & V3D_DIRTY_BLEND) {
642 struct pipe_blend_state *blend = &v3d->blend->base;
643
644 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
645 for (int i = 0; i < 4; i++) {
646 int rt = blend->independent_blend_enable ? i : 0;
647 int rt_mask = blend->rt[rt].colormask;
648
649 mask.mask |= translate_colormask(v3d, rt_mask,
650 i) << (4 * i);
651 }
652 }
653 }
654
655 /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
656 * color.
657 */
658 if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||
659 (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {
660 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
661 color.red_f16 = (v3d->swap_color_rb ?
662 v3d->blend_color.hf[2] :
663 v3d->blend_color.hf[0]);
664 color.green_f16 = v3d->blend_color.hf[1];
665 color.blue_f16 = (v3d->swap_color_rb ?
666 v3d->blend_color.hf[0] :
667 v3d->blend_color.hf[2]);
668 color.alpha_f16 = v3d->blend_color.hf[3];
669 }
670 }
671
672 if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {
673 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
674 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
675
676 if (front->enabled) {
677 cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
678 v3d->zsa->stencil_front, config) {
679 config.stencil_ref_value =
680 v3d->stencil_ref.ref_value[0];
681 }
682 }
683
684 if (back->enabled) {
685 cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
686 v3d->zsa->stencil_back, config) {
687 config.stencil_ref_value =
688 v3d->stencil_ref.ref_value[1];
689 }
690 }
691 }
692
693 #if V3D_VERSION < 40
694 /* Pre-4.x, we have texture state that depends on both the sampler and
695 * the view, so we merge them together at draw time.
696 */
697 if (v3d->dirty & V3D_DIRTY_FRAGTEX)
698 emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
699
700 if (v3d->dirty & V3D_DIRTY_GEOMTEX)
701 emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
702
703 if (v3d->dirty & V3D_DIRTY_VERTTEX)
704 emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
705 #endif
706
707 if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
708 if (!emit_varying_flags(job,
709 v3d->prog.fs->prog_data.fs->flat_shade_flags,
710 emit_flat_shade_flags)) {
711 cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
712 }
713 }
714
715 #if V3D_VERSION >= 40
716 if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
717 if (!emit_varying_flags(job,
718 v3d->prog.fs->prog_data.fs->noperspective_flags,
719 emit_noperspective_flags)) {
720 cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
721 }
722 }
723
724 if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {
725 if (!emit_varying_flags(job,
726 v3d->prog.fs->prog_data.fs->centroid_flags,
727 emit_centroid_flags)) {
728 cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
729 }
730 }
731 #endif
732
733 /* Set up the transform feedback data specs (which VPM entries to
734 * output to which buffers).
735 */
736 if (v3d->dirty & (V3D_DIRTY_STREAMOUT |
737 V3D_DIRTY_RASTERIZER |
738 V3D_DIRTY_PRIM_MODE)) {
739 struct v3d_streamout_stateobj *so = &v3d->streamout;
740 if (so->num_targets) {
741 bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
742 v3d->rasterizer->base.point_size_per_vertex);
743 struct v3d_uncompiled_shader *tf_shader =
744 get_tf_shader(v3d);
745 uint16_t *tf_specs = (psiz_per_vertex ?
746 tf_shader->tf_specs_psiz :
747 tf_shader->tf_specs);
748
749 #if V3D_VERSION >= 40
750 bool tf_enabled = v3d_transform_feedback_enabled(v3d);
751 job->tf_enabled |= tf_enabled;
752
753 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
754 tfe.number_of_16_bit_output_data_specs_following =
755 tf_shader->num_tf_specs;
756 tfe.enable = tf_enabled;
757 };
758 #else /* V3D_VERSION < 40 */
759 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
760 tfe.number_of_32_bit_output_buffer_address_following =
761 so->num_targets;
762 tfe.number_of_16_bit_output_data_specs_following =
763 tf_shader->num_tf_specs;
764 };
765 #endif /* V3D_VERSION < 40 */
766 for (int i = 0; i < tf_shader->num_tf_specs; i++) {
767 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
768 }
769 } else {
770 #if V3D_VERSION >= 40
771 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
772 tfe.enable = false;
773 };
774 #endif /* V3D_VERSION >= 40 */
775 }
776 }
777
778 /* Set up the transform feedback buffers. */
779 if (v3d->dirty & V3D_DIRTY_STREAMOUT) {
780 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
781 struct v3d_streamout_stateobj *so = &v3d->streamout;
782 for (int i = 0; i < so->num_targets; i++) {
783 const struct pipe_stream_output_target *target =
784 so->targets[i];
785 struct v3d_resource *rsc = target ?
786 v3d_resource(target->buffer) : NULL;
787 struct pipe_shader_state *ss = &tf_shader->base;
788 struct pipe_stream_output_info *info = &ss->stream_output;
789 uint32_t offset = (v3d->streamout.offsets[i] *
790 info->stride[i] * 4);
791
792 #if V3D_VERSION >= 40
793 if (!target)
794 continue;
795
796 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
797 output.buffer_address =
798 cl_address(rsc->bo,
799 target->buffer_offset +
800 offset);
801 output.buffer_size_in_32_bit_words =
802 (target->buffer_size - offset) >> 2;
803 output.buffer_number = i;
804 }
805 #else /* V3D_VERSION < 40 */
806 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
807 if (target) {
808 output.address =
809 cl_address(rsc->bo,
810 target->buffer_offset +
811 offset);
812 }
813 };
814 #endif /* V3D_VERSION < 40 */
815 if (target) {
816 v3d_job_add_tf_write_resource(v3d->job,
817 target->buffer);
818 }
819 /* XXX: buffer_size? */
820 }
821 }
822
823 if (v3d->dirty & V3D_DIRTY_OQ) {
824 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
825 if (v3d->active_queries && v3d->current_oq) {
826 counter.address = cl_address(v3d->current_oq, 0);
827 }
828 }
829 }
830
831 #if V3D_VERSION >= 40
832 if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
833 cl_emit(&job->bcl, SAMPLE_STATE, state) {
834 /* Note: SampleCoverage was handled at the
835 * frontend level by converting to sample_mask.
836 */
837 state.coverage = 1.0;
838 state.mask = job->msaa ? v3d->sample_mask : 0xf;
839 }
840 }
841 #endif
842 }
843