• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/half_float.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/compiler/v3d_compiler.h"
31 
32 static uint8_t
v3d_factor(enum pipe_blendfactor factor,bool dst_alpha_one)33 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
34 {
35         /* We may get a bad blendfactor when blending is disabled. */
36         if (factor == 0)
37                 return V3D_BLEND_FACTOR_ZERO;
38 
39         switch (factor) {
40         case PIPE_BLENDFACTOR_ZERO:
41                 return V3D_BLEND_FACTOR_ZERO;
42         case PIPE_BLENDFACTOR_ONE:
43                 return V3D_BLEND_FACTOR_ONE;
44         case PIPE_BLENDFACTOR_SRC_COLOR:
45                 return V3D_BLEND_FACTOR_SRC_COLOR;
46         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
47                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
48         case PIPE_BLENDFACTOR_DST_COLOR:
49                 return V3D_BLEND_FACTOR_DST_COLOR;
50         case PIPE_BLENDFACTOR_INV_DST_COLOR:
51                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
52         case PIPE_BLENDFACTOR_SRC_ALPHA:
53                 return V3D_BLEND_FACTOR_SRC_ALPHA;
54         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
55                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
56         case PIPE_BLENDFACTOR_DST_ALPHA:
57                 return (dst_alpha_one ?
58                         V3D_BLEND_FACTOR_ONE :
59                         V3D_BLEND_FACTOR_DST_ALPHA);
60         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
61                 return (dst_alpha_one ?
62                         V3D_BLEND_FACTOR_ZERO :
63                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
64         case PIPE_BLENDFACTOR_CONST_COLOR:
65                 return V3D_BLEND_FACTOR_CONST_COLOR;
66         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
67                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
68         case PIPE_BLENDFACTOR_CONST_ALPHA:
69                 return V3D_BLEND_FACTOR_CONST_ALPHA;
70         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
71                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
72         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
73                 return (dst_alpha_one ?
74                         V3D_BLEND_FACTOR_ZERO :
75                         V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
76         default:
77                 unreachable("Bad blend factor");
78         }
79 }
80 
81 static uint32_t
translate_colormask(struct v3d_context * v3d,uint32_t colormask,int rt)82 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
83 {
84         if (v3d->swap_color_rb & (1 << rt)) {
85                 colormask = ((colormask & (2 | 8)) |
86                              ((colormask & 1) << 2) |
87                              ((colormask & 4) >> 2));
88         }
89 
90         return (~colormask) & 0xf;
91 }
92 
93 static void
emit_rt_blend(struct v3d_context * v3d,struct v3d_job * job,struct pipe_blend_state * blend,int rt,uint8_t rt_mask,bool blend_dst_alpha_one)94 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
95               struct pipe_blend_state *blend, int rt, uint8_t rt_mask,
96               bool blend_dst_alpha_one)
97 {
98         struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
99 
100         /* We don't need to emit blend state for disabled RTs. */
101         if (!rtblend->blend_enable)
102                 return;
103 
104         cl_emit(&job->bcl, BLEND_CFG, config) {
105                 config.render_target_mask = rt_mask;
106 
107                 config.color_blend_mode = rtblend->rgb_func;
108                 config.color_blend_dst_factor =
109                         v3d_factor(rtblend->rgb_dst_factor,
110                                    blend_dst_alpha_one);
111                 config.color_blend_src_factor =
112                         v3d_factor(rtblend->rgb_src_factor,
113                                    blend_dst_alpha_one);
114 
115                 config.alpha_blend_mode = rtblend->alpha_func;
116                 config.alpha_blend_dst_factor =
117                         v3d_factor(rtblend->alpha_dst_factor,
118                                    blend_dst_alpha_one);
119                 config.alpha_blend_src_factor =
120                         v3d_factor(rtblend->alpha_src_factor,
121                                    blend_dst_alpha_one);
122         }
123 }
124 
125 static void
emit_flat_shade_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)126 emit_flat_shade_flags(struct v3d_job *job,
127                       int varying_offset,
128                       uint32_t varyings,
129                       enum V3DX(Varying_Flags_Action) lower,
130                       enum V3DX(Varying_Flags_Action) higher)
131 {
132         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
133                 flags.varying_offset_v0 = varying_offset;
134                 flags.flat_shade_flags_for_varyings_v024 = varyings;
135                 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
136                         lower;
137                 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
138                         higher;
139         }
140 }
141 
142 static void
emit_noperspective_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)143 emit_noperspective_flags(struct v3d_job *job,
144                          int varying_offset,
145                          uint32_t varyings,
146                          enum V3DX(Varying_Flags_Action) lower,
147                          enum V3DX(Varying_Flags_Action) higher)
148 {
149         cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
150                 flags.varying_offset_v0 = varying_offset;
151                 flags.non_perspective_flags_for_varyings_v024 = varyings;
152                 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
153                         lower;
154                 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
155                         higher;
156         }
157 }
158 
159 static void
emit_centroid_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)160 emit_centroid_flags(struct v3d_job *job,
161                     int varying_offset,
162                     uint32_t varyings,
163                     enum V3DX(Varying_Flags_Action) lower,
164                     enum V3DX(Varying_Flags_Action) higher)
165 {
166         cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
167                 flags.varying_offset_v0 = varying_offset;
168                 flags.centroid_flags_for_varyings_v024 = varyings;
169                 flags.action_for_centroid_flags_of_lower_numbered_varyings =
170                         lower;
171                 flags.action_for_centroid_flags_of_higher_numbered_varyings =
172                         higher;
173         }
174 }
175 
176 static bool
emit_varying_flags(struct v3d_job * job,uint32_t * flags,void (* flag_emit_callback)(struct v3d_job * job,int varying_offset,uint32_t flags,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher))177 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
178                    void (*flag_emit_callback)(struct v3d_job *job,
179                                               int varying_offset,
180                                               uint32_t flags,
181                                               enum V3DX(Varying_Flags_Action) lower,
182                                               enum V3DX(Varying_Flags_Action) higher))
183 {
184         struct v3d_context *v3d = job->v3d;
185         bool emitted_any = false;
186 
187         for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
188                 if (!flags[i])
189                         continue;
190 
191                 if (emitted_any) {
192                         flag_emit_callback(job, i, flags[i],
193                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
194                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED);
195                 } else if (i == 0) {
196                         flag_emit_callback(job, i, flags[i],
197                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
198                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
199                 } else {
200                         flag_emit_callback(job, i, flags[i],
201                                            V3D_VARYING_FLAGS_ACTION_ZEROED,
202                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
203                 }
204                 emitted_any = true;
205         }
206 
207         return emitted_any;
208 }
209 
210 static inline struct v3d_uncompiled_shader *
get_tf_shader(struct v3d_context * v3d)211 get_tf_shader(struct v3d_context *v3d)
212 {
213         if (v3d->prog.bind_gs)
214                 return v3d->prog.bind_gs;
215         else
216                 return v3d->prog.bind_vs;
217 }
218 
219 void
v3dX(emit_state)220 v3dX(emit_state)(struct pipe_context *pctx)
221 {
222         struct v3d_context *v3d = v3d_context(pctx);
223         struct v3d_job *job = v3d->job;
224         bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
225 
226         if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |
227                           V3D_DIRTY_RASTERIZER)) {
228                 float *vpscale = v3d->viewport.scale;
229                 float *vptranslate = v3d->viewport.translate;
230                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
231                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
232                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
233                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
234 
235                 /* Clip to the scissor if it's enabled, but still clip to the
236                  * drawable regardless since that controls where the binner
237                  * tries to put things.
238                  *
239                  * Additionally, always clip the rendering to the viewport,
240                  * since the hardware does guardband clipping, meaning
241                  * primitives would rasterize outside of the view volume.
242                  */
243                 uint32_t minx, miny, maxx, maxy;
244                 if (!v3d->rasterizer->base.scissor) {
245                         minx = MAX2(vp_minx, 0);
246                         miny = MAX2(vp_miny, 0);
247                         maxx = MIN2(vp_maxx, job->draw_width);
248                         maxy = MIN2(vp_maxy, job->draw_height);
249                 } else {
250                         minx = MAX2(vp_minx, v3d->scissor.minx);
251                         miny = MAX2(vp_miny, v3d->scissor.miny);
252                         maxx = MIN2(vp_maxx, v3d->scissor.maxx);
253                         maxy = MIN2(vp_maxy, v3d->scissor.maxy);
254                 }
255 
256                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
257                         clip.clip_window_left_pixel_coordinate = minx;
258                         clip.clip_window_bottom_pixel_coordinate = miny;
259                         if (maxx > minx && maxy > miny) {
260                                 clip.clip_window_width_in_pixels = maxx - minx;
261                                 clip.clip_window_height_in_pixels = maxy - miny;
262                         }
263                 }
264 
265                 job->draw_min_x = MIN2(job->draw_min_x, minx);
266                 job->draw_min_y = MIN2(job->draw_min_y, miny);
267                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
268                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
269 
270                 if (!v3d->rasterizer->base.scissor) {
271                     job->scissor.disabled = true;
272                 } else if (!job->scissor.disabled &&
273                            (v3d->dirty & V3D_DIRTY_SCISSOR)) {
274                         if (job->scissor.count < MAX_JOB_SCISSORS) {
275                                 job->scissor.rects[job->scissor.count].min_x =
276                                         v3d->scissor.minx;
277                                 job->scissor.rects[job->scissor.count].min_y =
278                                         v3d->scissor.miny;
279                                 job->scissor.rects[job->scissor.count].max_x =
280                                         v3d->scissor.maxx - 1;
281                                 job->scissor.rects[job->scissor.count].max_y =
282                                         v3d->scissor.maxy - 1;
283                                 job->scissor.count++;
284                         } else {
285                                 job->scissor.disabled = true;
286                                 perf_debug("Too many scissor rects.");
287                         }
288                 }
289         }
290 
291         if (v3d->dirty & (V3D_DIRTY_RASTERIZER |
292                           V3D_DIRTY_ZSA |
293                           V3D_DIRTY_BLEND |
294                           V3D_DIRTY_COMPILED_FS)) {
295                 cl_emit(&job->bcl, CFG_BITS, config) {
296                         config.enable_forward_facing_primitive =
297                                 !rasterizer_discard &&
298                                 !(v3d->rasterizer->base.cull_face &
299                                   PIPE_FACE_FRONT);
300                         config.enable_reverse_facing_primitive =
301                                 !rasterizer_discard &&
302                                 !(v3d->rasterizer->base.cull_face &
303                                   PIPE_FACE_BACK);
304                         /* This seems backwards, but it's what gets the
305                          * clipflat test to pass.
306                          */
307                         config.clockwise_primitives =
308                                 v3d->rasterizer->base.front_ccw;
309 
310                         config.enable_depth_offset =
311                                 v3d->rasterizer->base.offset_tri;
312 
313                         /* V3D follows GL behavior where the sample mask only
314                          * applies when MSAA is enabled.  Gallium has sample
315                          * mask apply anyway, and the MSAA blit shaders will
316                          * set sample mask without explicitly setting
317                          * rasterizer oversample.  Just force it on here,
318                          * since the blit shaders are the only way to have
319                          * !multisample && samplemask != 0xf.
320                          */
321                         config.rasterizer_oversample_mode =
322                                 v3d->rasterizer->base.multisample ||
323                                 v3d->sample_mask != 0xf;
324 
325                         config.direct3d_provoking_vertex =
326                                 v3d->rasterizer->base.flatshade_first;
327 
328                         config.blend_enable = v3d->blend->blend_enables;
329 
330                         /* Note: EZ state may update based on the compiled FS,
331                          * along with ZSA
332                          */
333 #if V3D_VERSION == 42
334                         config.early_z_updates_enable =
335                                 (job->ez_state != V3D_EZ_DISABLED);
336 #endif
337                         if (v3d->zsa->base.depth_enabled) {
338                                 config.z_updates_enable =
339                                         v3d->zsa->base.depth_writemask;
340 #if V3D_VERSION == 42
341                                 config.early_z_enable =
342                                         config.early_z_updates_enable;
343 #endif
344                                 config.depth_test_function =
345                                         v3d->zsa->base.depth_func;
346                         } else {
347                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
348                         }
349 
350                         config.stencil_enable =
351                                 v3d->zsa->base.stencil[0].enabled;
352 
353                         /* Use nicer line caps when line smoothing is
354                          * enabled
355                          */
356                         config.line_rasterization =
357                                 v3d_line_smoothing_enabled(v3d) ?
358                                 V3D_LINE_RASTERIZATION_PERP_END_CAPS :
359                                 V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
360 
361 #if V3D_VERSION >= 71
362                         /* The following follows the logic implemented in v3dv
363                          * plus the definition of depth_clip_near/far and
364                          * depth_clamp.
365                          *
366                          * Note: some extensions are not supported by v3d
367                          * (like ARB_depth_clamp) that would affect this, but
368                          * the values on rasterizer are taking that into
369                          * account.
370                          */
371                         config.z_clipping_mode = v3d->rasterizer->base.depth_clip_near ||
372                            v3d->rasterizer->base.depth_clip_far ?
373                            V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_NONE;
374 #endif
375                 }
376         }
377 
378         if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
379             v3d->rasterizer->base.offset_tri) {
380                 if (v3d->screen->devinfo.ver == 42 &&
381                     job->zsbuf &&
382                     job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
383                         cl_emit_prepacked_sized(&job->bcl,
384                                                 v3d->rasterizer->depth_offset_z16,
385                                                 cl_packet_length(DEPTH_OFFSET));
386                 } else {
387                         cl_emit_prepacked_sized(&job->bcl,
388                                                 v3d->rasterizer->depth_offset,
389                                                 cl_packet_length(DEPTH_OFFSET));
390                 }
391         }
392 
393         if (v3d->dirty & V3D_DIRTY_RASTERIZER) {
394                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
395                         point_size.point_size = v3d->rasterizer->point_size;
396                 }
397 
398                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
399                         line_width.line_width = v3d_get_real_line_width(v3d);
400                 }
401         }
402 
403         if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
404 #if V3D_VERSION == 42
405                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
406                         clip.viewport_half_width_in_1_256th_of_pixel =
407                                 v3d->viewport.scale[0] * 256.0f;
408                         clip.viewport_half_height_in_1_256th_of_pixel =
409                                 v3d->viewport.scale[1] * 256.0f;
410                 }
411 #endif
412 #if V3D_VERSION >= 71
413                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
414                         clip.viewport_half_width_in_1_64th_of_pixel =
415                                 v3d->viewport.scale[0] * 64.0f;
416                         clip.viewport_half_height_in_1_64th_of_pixel =
417                                 v3d->viewport.scale[1] * 64.0f;
418                 }
419 #endif
420 
421 
422                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
423                         clip.viewport_z_offset_zc_to_zs =
424                                 v3d->viewport.translate[2];
425                         clip.viewport_z_scale_zc_to_zs =
426                                 v3d->viewport.scale[2];
427                 }
428                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
429                         float z1 = (v3d->viewport.translate[2] -
430                                     v3d->viewport.scale[2]);
431                         float z2 = (v3d->viewport.translate[2] +
432                                     v3d->viewport.scale[2]);
433                         clip.minimum_zw = MIN2(z1, z2);
434                         clip.maximum_zw = MAX2(z1, z2);
435                 }
436 
437                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
438                         float vp_fine_x = v3d->viewport.translate[0];
439                         float vp_fine_y = v3d->viewport.translate[1];
440                         int32_t vp_coarse_x = 0;
441                         int32_t vp_coarse_y = 0;
442 
443                         /* The fine coordinates must be unsigned, but coarse
444                          * can be signed.
445                          */
446                         if (unlikely(vp_fine_x < 0)) {
447                                 int32_t blocks_64 =
448                                         DIV_ROUND_UP(fabsf(vp_fine_x), 64);
449                                 vp_fine_x += 64.0f * blocks_64;
450                                 vp_coarse_x -= blocks_64;
451                         }
452 
453                         if (unlikely(vp_fine_y < 0)) {
454                                 int32_t blocks_64 =
455                                         DIV_ROUND_UP(fabsf(vp_fine_y), 64);
456                                 vp_fine_y += 64.0f * blocks_64;
457                                 vp_coarse_y -= blocks_64;
458                         }
459 
460                         vp.fine_x = vp_fine_x;
461                         vp.fine_y = vp_fine_y;
462                         vp.coarse_x = vp_coarse_x;
463                         vp.coarse_y = vp_coarse_y;
464                 }
465         }
466 
467         if (v3d->dirty & V3D_DIRTY_BLEND) {
468                 struct v3d_blend_state *blend = v3d->blend;
469 
470                 if (blend->blend_enables) {
471                         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
472                                 enables.mask = blend->blend_enables;
473                         }
474 
475                         const uint32_t max_rts =
476                                 V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
477                         if (blend->base.independent_blend_enable) {
478                                 for (int i = 0; i < max_rts; i++)
479                                         emit_rt_blend(v3d, job, &blend->base, i,
480                                                       (1 << i),
481                                                       v3d->blend_dst_alpha_one & (1 << i));
482                         } else if (v3d->blend_dst_alpha_one &&
483                                    util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) {
484                                 /* Even if we don't have independent per-RT
485                                  * blending, we may have a combination of RT
486                                  * formats were some RTs have an alpha channel
487                                  * and others don't. Since this affects how
488                                  * blending is performed, we also need to emit
489                                  * independent blend configurations in this
490                                  * case: one for RTs with alpha and one for
491                                  * RTs without.
492                                  */
493                                 emit_rt_blend(v3d, job, &blend->base, 0,
494                                               ((1 << max_rts) - 1) &
495                                                    v3d->blend_dst_alpha_one,
496                                               true);
497                                 emit_rt_blend(v3d, job, &blend->base, 0,
498                                               ((1 << max_rts) - 1) &
499                                                    ~v3d->blend_dst_alpha_one,
500                                               false);
501                         } else {
502                                 emit_rt_blend(v3d, job, &blend->base, 0,
503                                               (1 << max_rts) - 1,
504                                               v3d->blend_dst_alpha_one);
505                         }
506                 }
507         }
508 
509         if (v3d->dirty & V3D_DIRTY_BLEND) {
510                 struct pipe_blend_state *blend = &v3d->blend->base;
511 
512                 const uint32_t max_rts =
513                         V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
514                 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
515                         for (int i = 0; i < max_rts; i++) {
516                                 int rt = blend->independent_blend_enable ? i : 0;
517                                 int rt_mask = blend->rt[rt].colormask;
518 
519                                 mask.mask |= translate_colormask(v3d, rt_mask,
520                                                                  i) << (4 * i);
521                         }
522                 }
523         }
524 
525         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
526          * color.
527          */
528         if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
529                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
530                         color.red_f16 = (v3d->swap_color_rb ?
531                                           v3d->blend_color.hf[2] :
532                                           v3d->blend_color.hf[0]);
533                         color.green_f16 = v3d->blend_color.hf[1];
534                         color.blue_f16 = (v3d->swap_color_rb ?
535                                            v3d->blend_color.hf[0] :
536                                            v3d->blend_color.hf[2]);
537                         color.alpha_f16 = v3d->blend_color.hf[3];
538                 }
539         }
540 
541         if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {
542                 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
543                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
544 
545                 if (front->enabled) {
546                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
547                                                v3d->zsa->stencil_front, config) {
548                                 config.stencil_ref_value =
549                                         v3d->stencil_ref.ref_value[0];
550                         }
551                 }
552 
553                 if (back->enabled) {
554                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
555                                                v3d->zsa->stencil_back, config) {
556                                 config.stencil_ref_value =
557                                         v3d->stencil_ref.ref_value[1];
558                         }
559                 }
560         }
561 
562         if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
563                 if (!emit_varying_flags(job,
564                                         v3d->prog.fs->prog_data.fs->flat_shade_flags,
565                                         emit_flat_shade_flags)) {
566                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
567                 }
568         }
569 
570         if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
571                 if (!emit_varying_flags(job,
572                                         v3d->prog.fs->prog_data.fs->noperspective_flags,
573                                         emit_noperspective_flags)) {
574                         cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
575                 }
576         }
577 
578         if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {
579                 if (!emit_varying_flags(job,
580                                         v3d->prog.fs->prog_data.fs->centroid_flags,
581                                         emit_centroid_flags)) {
582                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
583                 }
584         }
585 
586         /* Set up the transform feedback data specs (which VPM entries to
587          * output to which buffers).
588          */
589         if (v3d->dirty & (V3D_DIRTY_STREAMOUT |
590                           V3D_DIRTY_RASTERIZER |
591                           V3D_DIRTY_PRIM_MODE)) {
592                 struct v3d_streamout_stateobj *so = &v3d->streamout;
593                 if (so->num_targets) {
594                         bool psiz_per_vertex = (v3d->prim_mode == MESA_PRIM_POINTS &&
595                                                 v3d->rasterizer->base.point_size_per_vertex);
596                         struct v3d_uncompiled_shader *tf_shader =
597                                 get_tf_shader(v3d);
598                         uint16_t *tf_specs = (psiz_per_vertex ?
599                                               tf_shader->tf_specs_psiz :
600                                               tf_shader->tf_specs);
601 
602                         bool tf_enabled = v3d_transform_feedback_enabled(v3d);
603                         job->tf_enabled |= tf_enabled;
604 
605                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
606                                 tfe.number_of_16_bit_output_data_specs_following =
607                                         tf_shader->num_tf_specs;
608                                 tfe.enable = tf_enabled;
609                         };
610                         for (int i = 0; i < tf_shader->num_tf_specs; i++) {
611                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
612                         }
613                 } else {
614                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
615                                 tfe.enable = false;
616                         };
617                 }
618         }
619 
620         /* Set up the transform feedback buffers. */
621         if (v3d->dirty & V3D_DIRTY_STREAMOUT) {
622                 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
623                 struct v3d_streamout_stateobj *so = &v3d->streamout;
624                 for (int i = 0; i < so->num_targets; i++) {
625                         struct pipe_stream_output_target *target =
626                                 so->targets[i];
627                         struct v3d_resource *rsc = target ?
628                                 v3d_resource(target->buffer) : NULL;
629                         struct pipe_shader_state *ss = &tf_shader->base;
630                         struct pipe_stream_output_info *info = &ss->stream_output;
631                         uint32_t offset = target ?
632                                 v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;
633 
634                         if (!target)
635                                 continue;
636 
637                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
638                                 output.buffer_address =
639                                         cl_address(rsc->bo,
640                                                    target->buffer_offset +
641                                                    offset);
642                                 output.buffer_size_in_32_bit_words =
643                                         (target->buffer_size - offset) >> 2;
644                                 output.buffer_number = i;
645                         }
646                         if (target) {
647                                 v3d_job_add_tf_write_resource(v3d->job,
648                                                               target->buffer);
649                         }
650                         /* XXX: buffer_size? */
651                 }
652         }
653 
654         if (v3d->dirty & V3D_DIRTY_OQ) {
655                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
656                         if (v3d->active_queries && v3d->current_oq) {
657                                 counter.address = cl_address(v3d->current_oq, 0);
658                         }
659                 }
660         }
661 
662         if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
663                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
664                         /* Note: SampleCoverage was handled at the
665                          * frontend level by converting to sample_mask.
666                          */
667                         state.coverage = 1.0;
668                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
669                 }
670         }
671 }
672