• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "genxml/gen_macros.h"
25 
26 #include "compiler/shader_enums.h"
27 #include "util/macros.h"
28 
29 #include "vk_util.h"
30 
31 #include "pan_desc.h"
32 #include "pan_earlyzs.h"
33 #include "pan_encoder.h"
34 #include "pan_pool.h"
35 #include "pan_shader.h"
36 
37 #include "panvk_cs.h"
38 #include "panvk_private.h"
39 #include "panvk_varyings.h"
40 
41 #include "vk_sampler.h"
42 
43 static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)44 panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
45 {
46    switch (mode) {
47    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
48       return MALI_MIPMAP_MODE_NEAREST;
49    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
50       return MALI_MIPMAP_MODE_TRILINEAR;
51    default:
52       unreachable("Invalid mipmap mode");
53    }
54 }
55 
56 static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)57 panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
58 {
59    switch (mode) {
60    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
61       return MALI_WRAP_MODE_REPEAT;
62    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
63       return MALI_WRAP_MODE_MIRRORED_REPEAT;
64    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
65       return MALI_WRAP_MODE_CLAMP_TO_EDGE;
66    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
67       return MALI_WRAP_MODE_CLAMP_TO_BORDER;
68    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
69       return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
70    default:
71       unreachable("Invalid wrap");
72    }
73 }
74 
75 static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx)76 panvk_varying_hw_format(const struct panvk_device *dev,
77                         const struct panvk_varyings_info *varyings,
78                         gl_shader_stage stage, unsigned idx)
79 {
80    gl_varying_slot loc = varyings->stage[stage].loc[idx];
81 
82    switch (loc) {
83    case VARYING_SLOT_PNTC:
84    case VARYING_SLOT_PSIZ:
85 #if PAN_ARCH <= 6
86       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
87 #else
88       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
89 #endif
90    case VARYING_SLOT_POS:
91 #if PAN_ARCH <= 6
92       return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
93 #else
94       return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
95 #endif
96    default:
97       if (varyings->varying[loc].format != PIPE_FORMAT_NONE) {
98          enum pipe_format f = varyings->varying[loc].format;
99 
100          return GENX(panfrost_format_from_pipe_format)(f)->hw;
101       }
102 #if PAN_ARCH >= 7
103       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
104 #else
105       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
106 #endif
107    }
108 }
109 
110 static void
panvk_emit_varying(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx,void * attrib)111 panvk_emit_varying(const struct panvk_device *dev,
112                    const struct panvk_varyings_info *varyings,
113                    gl_shader_stage stage, unsigned idx, void *attrib)
114 {
115    gl_varying_slot loc = varyings->stage[stage].loc[idx];
116 
117    pan_pack(attrib, ATTRIBUTE, cfg) {
118       cfg.buffer_index = varyings->varying[loc].buf;
119       cfg.offset = varyings->varying[loc].offset;
120       cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
121    }
122 }
123 
124 void
panvk_per_arch(emit_varyings)125 panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
126                               const struct panvk_varyings_info *varyings,
127                               gl_shader_stage stage, void *descs)
128 {
129    struct mali_attribute_packed *attrib = descs;
130 
131    for (unsigned i = 0; i < varyings->stage[stage].count; i++)
132       panvk_emit_varying(dev, varyings, stage, i, attrib++);
133 }
134 
135 static void
panvk_emit_varying_buf(const struct panvk_varyings_info * varyings,enum panvk_varying_buf_id id,void * buf)136 panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
137                        enum panvk_varying_buf_id id, void *buf)
138 {
139    unsigned buf_idx = panvk_varying_buf_index(varyings, id);
140 
141    pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
142       unsigned offset = varyings->buf[buf_idx].address & 63;
143 
144       cfg.stride = varyings->buf[buf_idx].stride;
145       cfg.size = varyings->buf[buf_idx].size + offset;
146       cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
147    }
148 }
149 
150 void
panvk_per_arch(emit_varying_bufs)151 panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
152                                   void *descs)
153 {
154    struct mali_attribute_buffer_packed *buf = descs;
155 
156    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
157       if (varyings->buf_mask & (1 << i))
158          panvk_emit_varying_buf(varyings, i, buf++);
159    }
160 }
161 
162 static void
panvk_emit_attrib_buf(const struct panvk_attribs_info * info,const struct panvk_draw_info * draw,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * desc)163 panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
164                       const struct panvk_draw_info *draw,
165                       const struct panvk_attrib_buf *bufs, unsigned buf_count,
166                       unsigned idx, void *desc)
167 {
168    const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
169 
170    assert(idx < buf_count);
171    const struct panvk_attrib_buf *buf = &bufs[idx];
172    mali_ptr addr = buf->address & ~63ULL;
173    unsigned size = buf->size + (buf->address & 63);
174    unsigned divisor = draw->padded_vertex_count * buf_info->instance_divisor;
175 
176    /* TODO: support instanced arrays */
177    if (draw->instance_count <= 1) {
178       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
179          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
180          cfg.stride = buf_info->per_instance ? 0 : buf_info->stride;
181          cfg.pointer = addr;
182          cfg.size = size;
183       }
184    } else if (!buf_info->per_instance) {
185       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
186          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
187          cfg.divisor = draw->padded_vertex_count;
188          cfg.stride = buf_info->stride;
189          cfg.pointer = addr;
190          cfg.size = size;
191       }
192    } else if (!divisor) {
193       /* instance_divisor == 0 means all instances share the same value.
194        * Make it a 1D array with a zero stride.
195        */
196       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
197          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
198          cfg.stride = 0;
199          cfg.pointer = addr;
200          cfg.size = size;
201       }
202    } else if (util_is_power_of_two_or_zero(divisor)) {
203       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
204          cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
205          cfg.stride = buf_info->stride;
206          cfg.pointer = addr;
207          cfg.size = size;
208          cfg.divisor_r = __builtin_ctz(divisor);
209       }
210    } else {
211       unsigned divisor_r = 0, divisor_e = 0;
212       unsigned divisor_num =
213          panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
214       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
215          cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
216          cfg.stride = buf_info->stride;
217          cfg.pointer = addr;
218          cfg.size = size;
219          cfg.divisor_r = divisor_r;
220          cfg.divisor_e = divisor_e;
221       }
222 
223       desc += pan_size(ATTRIBUTE_BUFFER);
224       pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
225          cfg.divisor_numerator = divisor_num;
226          cfg.divisor = buf_info->instance_divisor;
227       }
228    }
229 }
230 
231 void
panvk_per_arch(emit_attrib_bufs)232 panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
233                                  const struct panvk_attrib_buf *bufs,
234                                  unsigned buf_count,
235                                  const struct panvk_draw_info *draw,
236                                  void *descs)
237 {
238    struct mali_attribute_buffer_packed *buf = descs;
239 
240    for (unsigned i = 0; i < info->buf_count; i++) {
241       panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf);
242       buf += 2;
243    }
244 }
245 
246 void
panvk_per_arch(emit_sampler)247 panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, void *desc)
248 {
249    VkClearColorValue border_color =
250       vk_sampler_border_color_value(pCreateInfo, NULL);
251 
252    pan_pack(desc, SAMPLER, cfg) {
253       cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
254       cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
255       cfg.mipmap_mode =
256          panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
257       cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
258 
259       cfg.lod_bias = pCreateInfo->mipLodBias;
260       cfg.minimum_lod = pCreateInfo->minLod;
261       cfg.maximum_lod = pCreateInfo->maxLod;
262       cfg.wrap_mode_s =
263          panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
264       cfg.wrap_mode_t =
265          panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
266       cfg.wrap_mode_r =
267          panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
268       cfg.compare_function =
269          panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
270       cfg.border_color_r = border_color.uint32[0];
271       cfg.border_color_g = border_color.uint32[1];
272       cfg.border_color_b = border_color.uint32[2];
273       cfg.border_color_a = border_color.uint32[3];
274    }
275 }
276 
277 static void
panvk_emit_attrib(const struct panvk_device * dev,const struct panvk_draw_info * draw,const struct panvk_attribs_info * attribs,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * attrib)278 panvk_emit_attrib(const struct panvk_device *dev,
279                   const struct panvk_draw_info *draw,
280                   const struct panvk_attribs_info *attribs,
281                   const struct panvk_attrib_buf *bufs, unsigned buf_count,
282                   unsigned idx, void *attrib)
283 {
284    enum pipe_format f = attribs->attrib[idx].format;
285    unsigned buf_idx = attribs->attrib[idx].buf;
286    const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx];
287 
288    pan_pack(attrib, ATTRIBUTE, cfg) {
289       cfg.buffer_index = buf_idx * 2;
290       cfg.offset = attribs->attrib[idx].offset + (bufs[buf_idx].address & 63);
291 
292       if (buf_info->per_instance)
293          cfg.offset += draw->first_instance * buf_info->stride;
294 
295       cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
296    }
297 }
298 
299 void
panvk_per_arch(emit_attribs)300 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
301                              const struct panvk_draw_info *draw,
302                              const struct panvk_attribs_info *attribs,
303                              const struct panvk_attrib_buf *bufs,
304                              unsigned buf_count, void *descs)
305 {
306    struct mali_attribute_packed *attrib = descs;
307 
308    for (unsigned i = 0; i < attribs->attrib_count; i++)
309       panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++);
310 }
311 
312 void
panvk_per_arch(emit_ubo)313 panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc)
314 {
315    pan_pack(desc, UNIFORM_BUFFER, cfg) {
316       cfg.pointer = address;
317       cfg.entries = DIV_ROUND_UP(size, 16);
318    }
319 }
320 
321 void
panvk_per_arch(emit_ubos)322 panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
323                           const struct panvk_descriptor_state *state,
324                           void *descs)
325 {
326    struct mali_uniform_buffer_packed *ubos = descs;
327 
328    panvk_per_arch(emit_ubo)(state->sysvals_ptr, sizeof(state->sysvals),
329                             &ubos[PANVK_SYSVAL_UBO_INDEX]);
330 
331    if (pipeline->layout->push_constants.size) {
332       panvk_per_arch(emit_ubo)(
333          state->push_constants,
334          ALIGN_POT(pipeline->layout->push_constants.size, 16),
335          &ubos[PANVK_PUSH_CONST_UBO_INDEX]);
336    } else {
337       memset(&ubos[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubos));
338    }
339 
340    for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
341       const struct panvk_descriptor_set_layout *set_layout =
342          vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
343       const struct panvk_descriptor_set *set = state->sets[s];
344 
345       unsigned ubo_start =
346          panvk_pipeline_layout_ubo_start(pipeline->layout, s, false);
347 
348       if (!set) {
349          unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos;
350          memset(&ubos[ubo_start], 0, all_ubos * sizeof(*ubos));
351       } else {
352          memcpy(&ubos[ubo_start], set->ubos,
353                 set_layout->num_ubos * sizeof(*ubos));
354 
355          unsigned dyn_ubo_start =
356             panvk_pipeline_layout_ubo_start(pipeline->layout, s, true);
357 
358          for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) {
359             const struct panvk_buffer_desc *bdesc =
360                &state->dyn.ubos[pipeline->layout->sets[s].dyn_ubo_offset + i];
361 
362             mali_ptr address =
363                panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset);
364             size_t size =
365                panvk_buffer_range(bdesc->buffer, bdesc->offset, bdesc->size);
366             if (size) {
367                panvk_per_arch(emit_ubo)(address, size,
368                                         &ubos[dyn_ubo_start + i]);
369             } else {
370                memset(&ubos[dyn_ubo_start + i], 0, sizeof(*ubos));
371             }
372          }
373       }
374    }
375 }
376 
377 void
panvk_per_arch(emit_vertex_job)378 panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
379                                 const struct panvk_draw_info *draw, void *job)
380 {
381    void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
382 
383    memcpy(section, &draw->invocation, pan_size(INVOCATION));
384 
385    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
386       cfg.job_task_split = 5;
387    }
388 
389    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
390       cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
391       cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
392       cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs;
393       cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
394       cfg.varying_buffers = draw->varying_bufs;
395       cfg.thread_storage = draw->tls;
396       cfg.offset_start = draw->offset_start;
397       cfg.instance_size =
398          draw->instance_count > 1 ? draw->padded_vertex_count : 1;
399       cfg.uniform_buffers = draw->ubos;
400       cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
401       cfg.textures = draw->textures;
402       cfg.samplers = draw->samplers;
403    }
404 }
405 
406 void
panvk_per_arch(emit_compute_job)407 panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline,
408                                  const struct panvk_dispatch_info *dispatch,
409                                  void *job)
410 {
411    panfrost_pack_work_groups_compute(
412       pan_section_ptr(job, COMPUTE_JOB, INVOCATION), dispatch->wg_count.x,
413       dispatch->wg_count.y, dispatch->wg_count.z, pipeline->cs.local_size.x,
414       pipeline->cs.local_size.y, pipeline->cs.local_size.z, false, false);
415 
416    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
417       cfg.job_task_split = util_logbase2_ceil(pipeline->cs.local_size.x + 1) +
418                            util_logbase2_ceil(pipeline->cs.local_size.y + 1) +
419                            util_logbase2_ceil(pipeline->cs.local_size.z + 1);
420    }
421 
422    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
423       cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE];
424       cfg.attributes = dispatch->attributes;
425       cfg.attribute_buffers = dispatch->attribute_bufs;
426       cfg.thread_storage = dispatch->tsd;
427       cfg.uniform_buffers = dispatch->ubos;
428       cfg.push_uniforms = dispatch->push_uniforms;
429       cfg.textures = dispatch->textures;
430       cfg.samplers = dispatch->samplers;
431    }
432 }
433 
434 static void
panvk_emit_tiler_primitive(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * prim)435 panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
436                            const struct panvk_draw_info *draw, void *prim)
437 {
438    pan_pack(prim, PRIMITIVE, cfg) {
439       cfg.draw_mode = pipeline->ia.topology;
440       if (pipeline->ia.writes_point_size)
441          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
442 
443       cfg.first_provoking_vertex = true;
444       if (pipeline->ia.primitive_restart)
445          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
446       cfg.job_task_split = 6;
447 
448       if (draw->index_size) {
449          cfg.index_count = draw->index_count;
450          cfg.indices = draw->indices;
451          cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
452 
453          switch (draw->index_size) {
454          case 32:
455             cfg.index_type = MALI_INDEX_TYPE_UINT32;
456             break;
457          case 16:
458             cfg.index_type = MALI_INDEX_TYPE_UINT16;
459             break;
460          case 8:
461             cfg.index_type = MALI_INDEX_TYPE_UINT8;
462             break;
463          default:
464             unreachable("Invalid index size");
465          }
466       } else {
467          cfg.index_count = draw->vertex_count;
468          cfg.index_type = MALI_INDEX_TYPE_NONE;
469       }
470    }
471 }
472 
473 static void
panvk_emit_tiler_primitive_size(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * primsz)474 panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
475                                 const struct panvk_draw_info *draw,
476                                 void *primsz)
477 {
478    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
479       if (pipeline->ia.writes_point_size) {
480          cfg.size_array = draw->psiz;
481       } else {
482          cfg.constant = draw->line_width;
483       }
484    }
485 }
486 
487 static void
panvk_emit_tiler_dcd(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * dcd)488 panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
489                      const struct panvk_draw_info *draw, void *dcd)
490 {
491    pan_pack(dcd, DRAW, cfg) {
492       cfg.front_face_ccw = pipeline->rast.front_ccw;
493       cfg.cull_front_face = pipeline->rast.cull_front_face;
494       cfg.cull_back_face = pipeline->rast.cull_back_face;
495       cfg.position = draw->position;
496       cfg.state = draw->fs_rsd;
497       cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
498       cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs;
499       cfg.viewport = draw->viewport;
500       cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
501       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
502       cfg.thread_storage = draw->tls;
503 
504       /* For all primitives but lines DRAW.flat_shading_vertex must
505        * be set to 0 and the provoking vertex is selected with the
506        * PRIMITIVE.first_provoking_vertex field.
507        */
508       if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
509           pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
510           pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
511          cfg.flat_shading_vertex = true;
512       }
513 
514       cfg.offset_start = draw->offset_start;
515       cfg.instance_size =
516          draw->instance_count > 1 ? draw->padded_vertex_count : 1;
517       cfg.uniform_buffers = draw->ubos;
518       cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
519       cfg.textures = draw->textures;
520       cfg.samplers = draw->samplers;
521 
522       /* TODO: occlusion queries */
523    }
524 }
525 
526 void
panvk_per_arch(emit_tiler_job)527 panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
528                                const struct panvk_draw_info *draw, void *job)
529 {
530    void *section;
531 
532    section = pan_section_ptr(job, TILER_JOB, INVOCATION);
533    memcpy(section, &draw->invocation, pan_size(INVOCATION));
534 
535    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
536    panvk_emit_tiler_primitive(pipeline, draw, section);
537 
538    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
539    panvk_emit_tiler_primitive_size(pipeline, draw, section);
540 
541    section = pan_section_ptr(job, TILER_JOB, DRAW);
542    panvk_emit_tiler_dcd(pipeline, draw, section);
543 
544    pan_section_pack(job, TILER_JOB, TILER, cfg) {
545       cfg.address = draw->tiler_ctx->bifrost;
546    }
547    pan_section_pack(job, TILER_JOB, PADDING, padding)
548       ;
549 }
550 
551 void
panvk_per_arch(emit_viewport)552 panvk_per_arch(emit_viewport)(const VkViewport *viewport,
553                               const VkRect2D *scissor, void *vpd)
554 {
555    /* The spec says "width must be greater than 0.0" */
556    assert(viewport->x >= 0);
557    int minx = (int)viewport->x;
558    int maxx = (int)(viewport->x + viewport->width);
559 
560    /* Viewport height can be negative */
561    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
562    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
563 
564    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
565    miny = MAX2(scissor->offset.x, minx);
566    miny = MAX2(scissor->offset.y, miny);
567    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
568    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
569 
570    /* Make sure we don't end up with a max < min when width/height is 0 */
571    maxx = maxx > minx ? maxx - 1 : maxx;
572    maxy = maxy > miny ? maxy - 1 : maxy;
573 
574    assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
575    assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
576 
577    pan_pack(vpd, VIEWPORT, cfg) {
578       cfg.scissor_minimum_x = minx;
579       cfg.scissor_minimum_y = miny;
580       cfg.scissor_maximum_x = maxx;
581       cfg.scissor_maximum_y = maxy;
582       cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
583       cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
584    }
585 }
586 
587 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)588 bifrost_blend_type_from_nir(nir_alu_type nir_type)
589 {
590    switch (nir_type) {
591    case 0: /* Render target not in use */
592       return 0;
593    case nir_type_float16:
594       return MALI_REGISTER_FILE_FORMAT_F16;
595    case nir_type_float32:
596       return MALI_REGISTER_FILE_FORMAT_F32;
597    case nir_type_int32:
598       return MALI_REGISTER_FILE_FORMAT_I32;
599    case nir_type_uint32:
600       return MALI_REGISTER_FILE_FORMAT_U32;
601    case nir_type_int16:
602       return MALI_REGISTER_FILE_FORMAT_I16;
603    case nir_type_uint16:
604       return MALI_REGISTER_FILE_FORMAT_U16;
605    default:
606       unreachable("Unsupported blend shader type for NIR alu type");
607    }
608 }
609 
610 void
panvk_per_arch(emit_blend)611 panvk_per_arch(emit_blend)(const struct panvk_device *dev,
612                            const struct panvk_pipeline *pipeline, unsigned rt,
613                            void *bd)
614 {
615    const struct pan_blend_state *blend = &pipeline->blend.state;
616    const struct pan_blend_rt_state *rts = &blend->rts[rt];
617    bool dithered = false;
618 
619    pan_pack(bd, BLEND, cfg) {
620       if (!blend->rt_count || !rts->equation.color_mask) {
621          cfg.enable = false;
622          cfg.internal.mode = MALI_BLEND_MODE_OFF;
623          continue;
624       }
625 
626       cfg.srgb = util_format_is_srgb(rts->format);
627       cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
628       cfg.round_to_fb_precision = !dithered;
629 
630       const struct util_format_description *format_desc =
631          util_format_description(rts->format);
632       unsigned chan_size = 0;
633       for (unsigned i = 0; i < format_desc->nr_channels; i++)
634          chan_size = MAX2(format_desc->channel[i].size, chan_size);
635 
636       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
637                                            &cfg.equation);
638 
639       /* Fixed point constant */
640       float fconst = pan_blend_get_constant(
641          pan_blend_constant_mask(blend->rts[rt].equation), blend->constants);
642       u16 constant = fconst * ((1 << chan_size) - 1);
643       constant <<= 16 - chan_size;
644       cfg.constant = constant;
645 
646       if (pan_blend_is_opaque(blend->rts[rt].equation)) {
647          cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
648       } else {
649          cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
650 
651          cfg.internal.fixed_function.alpha_zero_nop =
652             pan_blend_alpha_zero_nop(blend->rts[rt].equation);
653          cfg.internal.fixed_function.alpha_one_store =
654             pan_blend_alpha_one_store(blend->rts[rt].equation);
655       }
656 
657       /* If we want the conversion to work properly,
658        * num_comps must be set to 4
659        */
660       cfg.internal.fixed_function.num_comps = 4;
661       cfg.internal.fixed_function.conversion.memory_format =
662          GENX(panfrost_dithered_format_from_pipe_format)(rts->format, dithered);
663       cfg.internal.fixed_function.conversion.register_format =
664          bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
665       cfg.internal.fixed_function.rt = rt;
666    }
667 }
668 
669 void
panvk_per_arch(emit_blend_constant)670 panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
671                                     const struct panvk_pipeline *pipeline,
672                                     unsigned rt, const float *constants,
673                                     void *bd)
674 {
675    float constant = constants[pipeline->blend.constant[rt].index];
676 
677    pan_pack(bd, BLEND, cfg) {
678       cfg.enable = false;
679       cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
680    }
681 }
682 
683 void
panvk_per_arch(emit_dyn_fs_rsd)684 panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
685                                 const struct panvk_cmd_state *state, void *rsd)
686 {
687    pan_pack(rsd, RENDERER_STATE, cfg) {
688       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
689          cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
690          cfg.depth_factor = state->rast.depth_bias.slope_factor;
691          cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
692       }
693 
694       if (pipeline->dynamic_state_mask &
695           (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
696          cfg.stencil_front.mask = state->zs.s_front.compare_mask;
697          cfg.stencil_back.mask = state->zs.s_back.compare_mask;
698       }
699 
700       if (pipeline->dynamic_state_mask &
701           (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
702          cfg.stencil_mask_misc.stencil_mask_front =
703             state->zs.s_front.write_mask;
704          cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
705       }
706 
707       if (pipeline->dynamic_state_mask &
708           (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
709          cfg.stencil_front.reference_value = state->zs.s_front.ref;
710          cfg.stencil_back.reference_value = state->zs.s_back.ref;
711       }
712    }
713 }
714 
715 void
panvk_per_arch(emit_base_fs_rsd)716 panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
717                                  const struct panvk_pipeline *pipeline,
718                                  void *rsd)
719 {
720    const struct pan_shader_info *info = &pipeline->fs.info;
721 
722    pan_pack(rsd, RENDERER_STATE, cfg) {
723       if (pipeline->fs.required) {
724          pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
725 
726          uint8_t rt_written =
727             pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
728          uint8_t rt_mask = pipeline->fs.rt_mask;
729          cfg.properties.allow_forward_pixel_to_kill =
730             pipeline->fs.info.fs.can_fpk && !(rt_mask & ~rt_written) &&
731             !pipeline->ms.alpha_to_coverage && !pipeline->blend.reads_dest;
732 
733          bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test;
734          bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test;
735          bool oq = false; /* TODO: Occlusion queries */
736 
737          struct pan_earlyzs_state earlyzs =
738             pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq,
739                             pipeline->ms.alpha_to_coverage, zs_always_passes);
740 
741          cfg.properties.pixel_kill_operation = earlyzs.kill;
742          cfg.properties.zs_update_operation = earlyzs.update;
743       } else {
744          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
745          cfg.properties.allow_forward_pixel_to_kill = true;
746          cfg.properties.allow_forward_pixel_to_be_killed = true;
747          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
748       }
749 
750       bool msaa = pipeline->ms.rast_samples > 1;
751       cfg.multisample_misc.multisample_enable = msaa;
752       cfg.multisample_misc.sample_mask =
753          msaa ? pipeline->ms.sample_mask : UINT16_MAX;
754 
755       cfg.multisample_misc.depth_function =
756          pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
757 
758       cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
759       cfg.multisample_misc.fixed_function_near_discard =
760          !pipeline->rast.clamp_depth;
761       cfg.multisample_misc.fixed_function_far_discard =
762          !pipeline->rast.clamp_depth;
763       cfg.multisample_misc.shader_depth_range_fixed = true;
764 
765       cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
766       cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
767       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
768       cfg.stencil_mask_misc.front_facing_depth_bias =
769          pipeline->rast.depth_bias.enable;
770       cfg.stencil_mask_misc.back_facing_depth_bias =
771          pipeline->rast.depth_bias.enable;
772       cfg.stencil_mask_misc.single_sampled_lines =
773          pipeline->ms.rast_samples <= 1;
774 
775       if (!(pipeline->dynamic_state_mask &
776             (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
777          cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
778          cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
779          cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
780       }
781 
782       if (!(pipeline->dynamic_state_mask &
783             (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
784          cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
785          cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
786       }
787 
788       if (!(pipeline->dynamic_state_mask &
789             (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
790          cfg.stencil_mask_misc.stencil_mask_front =
791             pipeline->zs.s_front.write_mask;
792          cfg.stencil_mask_misc.stencil_mask_back =
793             pipeline->zs.s_back.write_mask;
794       }
795 
796       if (!(pipeline->dynamic_state_mask &
797             (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
798          cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
799          cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
800       }
801 
802       cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
803       cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
804       cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
805       cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
806       cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
807       cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
808       cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
809       cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
810    }
811 }
812 
813 void
panvk_per_arch(emit_non_fs_rsd)814 panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
815                                 const struct pan_shader_info *shader_info,
816                                 mali_ptr shader_ptr, void *rsd)
817 {
818    assert(shader_info->stage != MESA_SHADER_FRAGMENT);
819 
820    pan_pack(rsd, RENDERER_STATE, cfg) {
821       pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
822    }
823 }
824 
825 void
panvk_per_arch(emit_tiler_context)826 panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
827                                    unsigned width, unsigned height,
828                                    const struct panfrost_ptr *descs)
829 {
830    pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
831       cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
832       cfg.base = dev->tiler_heap->addr.dev;
833       cfg.bottom = dev->tiler_heap->addr.dev;
834       cfg.top = cfg.base + cfg.size;
835    }
836 
837    pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
838       cfg.hierarchy_mask = 0x28;
839       cfg.fb_width = width;
840       cfg.fb_height = height;
841       cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
842    }
843 }
844