• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "genxml/gen_macros.h"
25 
26 #include "util/macros.h"
27 #include "compiler/shader_enums.h"
28 
29 #include "vk_util.h"
30 
31 #include "pan_cs.h"
32 #include "pan_encoder.h"
33 #include "pan_pool.h"
34 #include "pan_shader.h"
35 #include "pan_earlyzs.h"
36 
37 #include "panvk_cs.h"
38 #include "panvk_private.h"
39 #include "panvk_varyings.h"
40 
41 #include "vk_sampler.h"
42 
43 static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)44 panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
45 {
46    switch (mode) {
47    case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
48    case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
49    default: unreachable("Invalid mipmap mode");
50    }
51 }
52 
53 static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)54 panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
55 {
56    switch (mode) {
57    case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
58    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
59    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
60    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
61    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
62    default: unreachable("Invalid wrap");
63    }
64 }
65 
66 static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx)67 panvk_varying_hw_format(const struct panvk_device *dev,
68                         const struct panvk_varyings_info *varyings,
69                         gl_shader_stage stage, unsigned idx)
70 {
71    const struct panfrost_device *pdev = &dev->physical_device->pdev;
72    gl_varying_slot loc = varyings->stage[stage].loc[idx];
73 
74    switch (loc) {
75    case VARYING_SLOT_PNTC:
76    case VARYING_SLOT_PSIZ:
77 #if PAN_ARCH <= 6
78       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
79 #else
80       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
81 #endif
82    case VARYING_SLOT_POS:
83 #if PAN_ARCH <= 6
84       return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
85 #else
86       return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
87 #endif
88    default:
89       if (varyings->varying[loc].format != PIPE_FORMAT_NONE)
90          return pdev->formats[varyings->varying[loc].format].hw;
91 #if PAN_ARCH >= 7
92       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
93 #else
94       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
95 #endif
96    }
97 }
98 
99 static void
panvk_emit_varying(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx,void * attrib)100 panvk_emit_varying(const struct panvk_device *dev,
101                    const struct panvk_varyings_info *varyings,
102                    gl_shader_stage stage, unsigned idx,
103                    void *attrib)
104 {
105    gl_varying_slot loc = varyings->stage[stage].loc[idx];
106 
107    pan_pack(attrib, ATTRIBUTE, cfg) {
108       cfg.buffer_index = varyings->varying[loc].buf;
109       cfg.offset = varyings->varying[loc].offset;
110       cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
111    }
112 }
113 
114 void
panvk_per_arch(emit_varyings)115 panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
116                               const struct panvk_varyings_info *varyings,
117                               gl_shader_stage stage,
118                               void *descs)
119 {
120    struct mali_attribute_packed *attrib = descs;
121 
122    for (unsigned i = 0; i < varyings->stage[stage].count; i++)
123       panvk_emit_varying(dev, varyings, stage, i, attrib++);
124 }
125 
126 static void
panvk_emit_varying_buf(const struct panvk_varyings_info * varyings,enum panvk_varying_buf_id id,void * buf)127 panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
128                        enum panvk_varying_buf_id id, void *buf)
129 {
130    unsigned buf_idx = panvk_varying_buf_index(varyings, id);
131 
132    pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
133       unsigned offset = varyings->buf[buf_idx].address & 63;
134 
135       cfg.stride = varyings->buf[buf_idx].stride;
136       cfg.size = varyings->buf[buf_idx].size + offset;
137       cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
138    }
139 }
140 
141 void
panvk_per_arch(emit_varying_bufs)142 panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
143                                   void *descs)
144 {
145    struct mali_attribute_buffer_packed *buf = descs;
146 
147    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
148       if (varyings->buf_mask & (1 << i))
149          panvk_emit_varying_buf(varyings, i, buf++);
150    }
151 }
152 
153 static void
panvk_emit_attrib_buf(const struct panvk_attribs_info * info,const struct panvk_draw_info * draw,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * desc)154 panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
155                       const struct panvk_draw_info *draw,
156                       const struct panvk_attrib_buf *bufs,
157                       unsigned buf_count,
158                       unsigned idx, void *desc)
159 {
160    const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
161 
162    assert(idx < buf_count);
163    const struct panvk_attrib_buf *buf = &bufs[idx];
164    mali_ptr addr = buf->address & ~63ULL;
165    unsigned size = buf->size + (buf->address & 63);
166    unsigned divisor =
167       draw->padded_vertex_count * buf_info->instance_divisor;
168 
169    /* TODO: support instanced arrays */
170    if (draw->instance_count <= 1) {
171       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
172          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
173          cfg.stride = buf_info->per_instance ? 0 : buf_info->stride;
174          cfg.pointer = addr;
175          cfg.size = size;
176       }
177    } else if (!buf_info->per_instance) {
178       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
179          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
180          cfg.divisor = draw->padded_vertex_count;
181          cfg.stride = buf_info->stride;
182          cfg.pointer = addr;
183          cfg.size = size;
184       }
185    } else if (!divisor) {
186       /* instance_divisor == 0 means all instances share the same value.
187        * Make it a 1D array with a zero stride.
188        */
189       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
190          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
191          cfg.stride = 0;
192          cfg.pointer = addr;
193          cfg.size = size;
194       }
195    } else if (util_is_power_of_two_or_zero(divisor)) {
196       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
197          cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
198          cfg.stride = buf_info->stride;
199          cfg.pointer = addr;
200          cfg.size = size;
201          cfg.divisor_r = __builtin_ctz(divisor);
202       }
203    } else {
204       unsigned divisor_r = 0, divisor_e = 0;
205       unsigned divisor_num =
206          panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
207       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
208          cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
209          cfg.stride = buf_info->stride;
210          cfg.pointer = addr;
211          cfg.size = size;
212          cfg.divisor_r = divisor_r;
213          cfg.divisor_e = divisor_e;
214       }
215 
216       desc += pan_size(ATTRIBUTE_BUFFER);
217       pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
218          cfg.divisor_numerator = divisor_num;
219          cfg.divisor = buf_info->instance_divisor;
220       }
221    }
222 }
223 
224 void
panvk_per_arch(emit_attrib_bufs)225 panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
226                                  const struct panvk_attrib_buf *bufs,
227                                  unsigned buf_count,
228                                  const struct panvk_draw_info *draw,
229                                  void *descs)
230 {
231    struct mali_attribute_buffer_packed *buf = descs;
232 
233    for (unsigned i = 0; i < info->buf_count; i++) {
234       panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf);
235       buf += 2;
236    }
237 }
238 
239 void
panvk_per_arch(emit_sampler)240 panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
241                              void *desc)
242 {
243    VkClearColorValue border_color =
244       vk_sampler_border_color_value(pCreateInfo, NULL);
245 
246    pan_pack(desc, SAMPLER, cfg) {
247       cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
248       cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
249       cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
250       cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
251 
252       cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
253       cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
254       cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
255       cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
256       cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
257       cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
258       cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
259       cfg.border_color_r = border_color.uint32[0];
260       cfg.border_color_g = border_color.uint32[1];
261       cfg.border_color_b = border_color.uint32[2];
262       cfg.border_color_a = border_color.uint32[3];
263    }
264 }
265 
266 static void
panvk_emit_attrib(const struct panvk_device * dev,const struct panvk_draw_info * draw,const struct panvk_attribs_info * attribs,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * attrib)267 panvk_emit_attrib(const struct panvk_device *dev,
268                   const struct panvk_draw_info *draw,
269                   const struct panvk_attribs_info *attribs,
270                   const struct panvk_attrib_buf *bufs,
271                   unsigned buf_count,
272                   unsigned idx, void *attrib)
273 {
274    const struct panfrost_device *pdev = &dev->physical_device->pdev;
275    unsigned buf_idx = attribs->attrib[idx].buf;
276    const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx];
277 
278    pan_pack(attrib, ATTRIBUTE, cfg) {
279       cfg.buffer_index = buf_idx * 2;
280       cfg.offset = attribs->attrib[idx].offset +
281                    (bufs[buf_idx].address & 63);
282 
283       if (buf_info->per_instance)
284          cfg.offset += draw->first_instance * buf_info->stride;
285 
286       cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
287    }
288 }
289 
290 void
panvk_per_arch(emit_attribs)291 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
292                              const struct panvk_draw_info *draw,
293                              const struct panvk_attribs_info *attribs,
294                              const struct panvk_attrib_buf *bufs,
295                              unsigned buf_count,
296                              void *descs)
297 {
298    struct mali_attribute_packed *attrib = descs;
299 
300    for (unsigned i = 0; i < attribs->attrib_count; i++)
301       panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++);
302 }
303 
304 void
panvk_per_arch(emit_ubo)305 panvk_per_arch(emit_ubo)(mali_ptr address, size_t size,  void *desc)
306 {
307    pan_pack(desc, UNIFORM_BUFFER, cfg) {
308       cfg.pointer = address;
309       cfg.entries = DIV_ROUND_UP(size, 16);
310    }
311 }
312 
313 void
panvk_per_arch(emit_ubos)314 panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
315                           const struct panvk_descriptor_state *state,
316                           void *descs)
317 {
318    struct mali_uniform_buffer_packed *ubos = descs;
319 
320    panvk_per_arch(emit_ubo)(state->sysvals_ptr,
321                             sizeof(state->sysvals),
322                             &ubos[PANVK_SYSVAL_UBO_INDEX]);
323 
324    if (pipeline->layout->push_constants.size) {
325       panvk_per_arch(emit_ubo)(state->push_constants,
326                                ALIGN_POT(pipeline->layout->push_constants.size, 16),
327                                &ubos[PANVK_PUSH_CONST_UBO_INDEX]);
328    } else {
329       memset(&ubos[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubos));
330    }
331 
332    for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
333       const struct panvk_descriptor_set_layout *set_layout =
334          vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
335       const struct panvk_descriptor_set *set = state->sets[s];
336 
337       unsigned ubo_start =
338          panvk_pipeline_layout_ubo_start(pipeline->layout, s, false);
339 
340       if (!set) {
341          unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos;
342          memset(&ubos[ubo_start], 0, all_ubos * sizeof(*ubos));
343       } else {
344          memcpy(&ubos[ubo_start], set->ubos,
345                 set_layout->num_ubos * sizeof(*ubos));
346 
347          unsigned dyn_ubo_start =
348             panvk_pipeline_layout_ubo_start(pipeline->layout, s, true);
349 
350          for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) {
351             const struct panvk_buffer_desc *bdesc =
352                &state->dyn.ubos[pipeline->layout->sets[s].dyn_ubo_offset + i];
353 
354             mali_ptr address = panvk_buffer_gpu_ptr(bdesc->buffer,
355                                                     bdesc->offset);
356             size_t size = panvk_buffer_range(bdesc->buffer,
357                                              bdesc->offset, bdesc->size);
358             if (size) {
359                panvk_per_arch(emit_ubo)(address, size,
360                                         &ubos[dyn_ubo_start + i]);
361             } else {
362                memset(&ubos[dyn_ubo_start + i], 0, sizeof(*ubos));
363             }
364          }
365       }
366    }
367 }
368 
369 void
panvk_per_arch(emit_vertex_job)370 panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
371                                 const struct panvk_draw_info *draw,
372                                 void *job)
373 {
374    void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
375 
376    memcpy(section, &draw->invocation, pan_size(INVOCATION));
377 
378    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
379       cfg.job_task_split = 5;
380    }
381 
382    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
383       cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
384       cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
385       cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs;
386       cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
387       cfg.varying_buffers = draw->varying_bufs;
388       cfg.thread_storage = draw->tls;
389       cfg.offset_start = draw->offset_start;
390       cfg.instance_size = draw->instance_count > 1 ?
391                           draw->padded_vertex_count : 1;
392       cfg.uniform_buffers = draw->ubos;
393       cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
394       cfg.textures = draw->textures;
395       cfg.samplers = draw->samplers;
396    }
397 }
398 
399 void
panvk_per_arch(emit_compute_job)400 panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline,
401                                  const struct panvk_dispatch_info *dispatch,
402                                  void *job)
403 {
404    panfrost_pack_work_groups_compute(pan_section_ptr(job, COMPUTE_JOB, INVOCATION),
405                                      dispatch->wg_count.x,
406                                      dispatch->wg_count.y,
407                                      dispatch->wg_count.z,
408                                      pipeline->cs.local_size.x,
409                                      pipeline->cs.local_size.y,
410                                      pipeline->cs.local_size.z,
411                                      false, false);
412 
413    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
414       cfg.job_task_split =
415          util_logbase2_ceil(pipeline->cs.local_size.x + 1) +
416          util_logbase2_ceil(pipeline->cs.local_size.y + 1) +
417          util_logbase2_ceil(pipeline->cs.local_size.z + 1);
418    }
419 
420    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
421       cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE];
422       cfg.attributes = dispatch->attributes;
423       cfg.attribute_buffers = dispatch->attribute_bufs;
424       cfg.thread_storage = dispatch->tsd;
425       cfg.uniform_buffers = dispatch->ubos;
426       cfg.push_uniforms = dispatch->push_uniforms;
427       cfg.textures = dispatch->textures;
428       cfg.samplers = dispatch->samplers;
429    }
430 }
431 
432 static void
panvk_emit_tiler_primitive(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * prim)433 panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
434                            const struct panvk_draw_info *draw,
435                            void *prim)
436 {
437    pan_pack(prim, PRIMITIVE, cfg) {
438       cfg.draw_mode = pipeline->ia.topology;
439       if (pipeline->ia.writes_point_size)
440          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
441 
442       cfg.first_provoking_vertex = true;
443       if (pipeline->ia.primitive_restart)
444          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
445       cfg.job_task_split = 6;
446 
447       if (draw->index_size) {
448          cfg.index_count = draw->index_count;
449          cfg.indices = draw->indices;
450          cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
451 
452          switch (draw->index_size) {
453          case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break;
454          case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break;
455          case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break;
456          default: unreachable("Invalid index size");
457          }
458       } else {
459          cfg.index_count = draw->vertex_count;
460          cfg.index_type = MALI_INDEX_TYPE_NONE;
461       }
462    }
463 }
464 
465 static void
panvk_emit_tiler_primitive_size(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * primsz)466 panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
467                                 const struct panvk_draw_info *draw,
468                                 void *primsz)
469 {
470    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
471       if (pipeline->ia.writes_point_size) {
472          cfg.size_array = draw->psiz;
473       } else {
474          cfg.constant = draw->line_width;
475       }
476    }
477 }
478 
479 static void
panvk_emit_tiler_dcd(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * dcd)480 panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
481                      const struct panvk_draw_info *draw,
482                      void *dcd)
483 {
484    pan_pack(dcd, DRAW, cfg) {
485       cfg.front_face_ccw = pipeline->rast.front_ccw;
486       cfg.cull_front_face = pipeline->rast.cull_front_face;
487       cfg.cull_back_face = pipeline->rast.cull_back_face;
488       cfg.position = draw->position;
489       cfg.state = draw->fs_rsd;
490       cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
491       cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs;
492       cfg.viewport = draw->viewport;
493       cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
494       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
495       cfg.thread_storage = draw->tls;
496 
497       /* For all primitives but lines DRAW.flat_shading_vertex must
498        * be set to 0 and the provoking vertex is selected with the
499        * PRIMITIVE.first_provoking_vertex field.
500        */
501       if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
502           pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
503           pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
504          cfg.flat_shading_vertex = true;
505       }
506 
507       cfg.offset_start = draw->offset_start;
508       cfg.instance_size = draw->instance_count > 1 ?
509                          draw->padded_vertex_count : 1;
510       cfg.uniform_buffers = draw->ubos;
511       cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
512       cfg.textures = draw->textures;
513       cfg.samplers = draw->samplers;
514 
515       /* TODO: occlusion queries */
516    }
517 }
518 
519 void
panvk_per_arch(emit_tiler_job)520 panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
521                                const struct panvk_draw_info *draw,
522                                void *job)
523 {
524    void *section;
525 
526    section = pan_section_ptr(job, TILER_JOB, INVOCATION);
527    memcpy(section, &draw->invocation, pan_size(INVOCATION));
528 
529    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
530    panvk_emit_tiler_primitive(pipeline, draw, section);
531 
532    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
533    panvk_emit_tiler_primitive_size(pipeline, draw, section);
534 
535    section = pan_section_ptr(job, TILER_JOB, DRAW);
536    panvk_emit_tiler_dcd(pipeline, draw, section);
537 
538    pan_section_pack(job, TILER_JOB, TILER, cfg) {
539       cfg.address = draw->tiler_ctx->bifrost;
540    }
541    pan_section_pack(job, TILER_JOB, PADDING, padding);
542 }
543 
544 void
panvk_per_arch(emit_viewport)545 panvk_per_arch(emit_viewport)(const VkViewport *viewport,
546                               const VkRect2D *scissor,
547                               void *vpd)
548 {
549    /* The spec says "width must be greater than 0.0" */
550    assert(viewport->x >= 0);
551    int minx = (int)viewport->x;
552    int maxx = (int)(viewport->x + viewport->width);
553 
554    /* Viewport height can be negative */
555    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
556    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
557 
558    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
559    miny = MAX2(scissor->offset.x, minx);
560    miny = MAX2(scissor->offset.y, miny);
561    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
562    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
563 
564    /* Make sure we don't end up with a max < min when width/height is 0 */
565    maxx = maxx > minx ? maxx - 1 : maxx;
566    maxy = maxy > miny ? maxy - 1 : maxy;
567 
568    assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
569    assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
570 
571    pan_pack(vpd, VIEWPORT, cfg) {
572       cfg.scissor_minimum_x = minx;
573       cfg.scissor_minimum_y = miny;
574       cfg.scissor_maximum_x = maxx;
575       cfg.scissor_maximum_y = maxy;
576       cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
577       cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
578    }
579 }
580 
581 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)582 bifrost_blend_type_from_nir(nir_alu_type nir_type)
583 {
584    switch(nir_type) {
585    case 0: /* Render target not in use */
586       return 0;
587    case nir_type_float16:
588       return MALI_REGISTER_FILE_FORMAT_F16;
589    case nir_type_float32:
590       return MALI_REGISTER_FILE_FORMAT_F32;
591    case nir_type_int32:
592       return MALI_REGISTER_FILE_FORMAT_I32;
593    case nir_type_uint32:
594       return MALI_REGISTER_FILE_FORMAT_U32;
595    case nir_type_int16:
596       return MALI_REGISTER_FILE_FORMAT_I16;
597    case nir_type_uint16:
598       return MALI_REGISTER_FILE_FORMAT_U16;
599    default:
600       unreachable("Unsupported blend shader type for NIR alu type");
601    }
602 }
603 
604 void
panvk_per_arch(emit_blend)605 panvk_per_arch(emit_blend)(const struct panvk_device *dev,
606                            const struct panvk_pipeline *pipeline,
607                            unsigned rt, void *bd)
608 {
609    const struct pan_blend_state *blend = &pipeline->blend.state;
610    const struct pan_blend_rt_state *rts = &blend->rts[rt];
611    bool dithered = false;
612 
613    pan_pack(bd, BLEND, cfg) {
614       if (!blend->rt_count || !rts->equation.color_mask) {
615          cfg.enable = false;
616          cfg.internal.mode = MALI_BLEND_MODE_OFF;
617          continue;
618       }
619 
620       cfg.srgb = util_format_is_srgb(rts->format);
621       cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
622       cfg.round_to_fb_precision = !dithered;
623 
624       const struct panfrost_device *pdev = &dev->physical_device->pdev;
625       const struct util_format_description *format_desc =
626          util_format_description(rts->format);
627       unsigned chan_size = 0;
628       for (unsigned i = 0; i < format_desc->nr_channels; i++)
629          chan_size = MAX2(format_desc->channel[i].size, chan_size);
630 
631       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
632                                            &cfg.equation);
633 
634       /* Fixed point constant */
635       float fconst =
636          pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
637                                 blend->constants);
638       u16 constant = fconst * ((1 << chan_size) - 1);
639       constant <<= 16 - chan_size;
640       cfg.constant = constant;
641 
642       if (pan_blend_is_opaque(blend->rts[rt].equation)) {
643          cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
644       } else {
645          cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
646 
647          cfg.internal.fixed_function.alpha_zero_nop =
648                  pan_blend_alpha_zero_nop(blend->rts[rt].equation);
649          cfg.internal.fixed_function.alpha_one_store =
650                  pan_blend_alpha_one_store(blend->rts[rt].equation);
651       }
652 
653       /* If we want the conversion to work properly,
654        * num_comps must be set to 4
655        */
656       cfg.internal.fixed_function.num_comps = 4;
657       cfg.internal.fixed_function.conversion.memory_format =
658          panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
659       cfg.internal.fixed_function.conversion.register_format =
660          bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
661       cfg.internal.fixed_function.rt = rt;
662    }
663 }
664 
665 void
panvk_per_arch(emit_blend_constant)666 panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
667                                     const struct panvk_pipeline *pipeline,
668                                     unsigned rt, const float *constants,
669                                     void *bd)
670 {
671    float constant = constants[pipeline->blend.constant[rt].index];
672 
673    pan_pack(bd, BLEND, cfg) {
674       cfg.enable = false;
675       cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
676    }
677 }
678 
679 void
panvk_per_arch(emit_dyn_fs_rsd)680 panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
681                                 const struct panvk_cmd_state *state,
682                                 void *rsd)
683 {
684    pan_pack(rsd, RENDERER_STATE, cfg) {
685       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
686          cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
687          cfg.depth_factor = state->rast.depth_bias.slope_factor;
688          cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
689       }
690 
691       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
692          cfg.stencil_front.mask = state->zs.s_front.compare_mask;
693          cfg.stencil_back.mask = state->zs.s_back.compare_mask;
694       }
695 
696       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
697          cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
698          cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
699       }
700 
701       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
702          cfg.stencil_front.reference_value = state->zs.s_front.ref;
703          cfg.stencil_back.reference_value = state->zs.s_back.ref;
704       }
705    }
706 }
707 
708 void
panvk_per_arch(emit_base_fs_rsd)709 panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
710                                  const struct panvk_pipeline *pipeline,
711                                  void *rsd)
712 {
713    const struct pan_shader_info *info = &pipeline->fs.info;
714 
715    pan_pack(rsd, RENDERER_STATE, cfg) {
716       if (pipeline->fs.required) {
717          pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
718 
719          uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
720          uint8_t rt_mask = pipeline->fs.rt_mask;
721          cfg.properties.allow_forward_pixel_to_kill =
722                  pipeline->fs.info.fs.can_fpk &&
723                  !(rt_mask & ~rt_written) &&
724                  !pipeline->ms.alpha_to_coverage &&
725                  !pipeline->blend.reads_dest;
726 
727          bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test;
728          bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test;
729          bool oq = false; /* TODO: Occlusion queries */
730 
731          struct pan_earlyzs_state earlyzs =
732             pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq,
733                             pipeline->ms.alpha_to_coverage, zs_always_passes);
734 
735          cfg.properties.pixel_kill_operation = earlyzs.kill;
736          cfg.properties.zs_update_operation = earlyzs.update;
737       } else {
738          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
739          cfg.properties.allow_forward_pixel_to_kill = true;
740          cfg.properties.allow_forward_pixel_to_be_killed = true;
741          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
742       }
743 
744       bool msaa = pipeline->ms.rast_samples > 1;
745       cfg.multisample_misc.multisample_enable = msaa;
746       cfg.multisample_misc.sample_mask =
747          msaa ? pipeline->ms.sample_mask : UINT16_MAX;
748 
749       cfg.multisample_misc.depth_function =
750          pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
751 
752       cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
753       cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
754       cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
755       cfg.multisample_misc.shader_depth_range_fixed = true;
756 
757       cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
758       cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
759       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
760       cfg.stencil_mask_misc.front_facing_depth_bias = pipeline->rast.depth_bias.enable;
761       cfg.stencil_mask_misc.back_facing_depth_bias = pipeline->rast.depth_bias.enable;
762       cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
763 
764       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
765          cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
766          cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
767          cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
768       }
769 
770       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
771          cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
772          cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
773       }
774 
775       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
776          cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
777          cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
778       }
779 
780       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
781          cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
782          cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
783       }
784 
785       cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
786       cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
787       cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
788       cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
789       cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
790       cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
791       cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
792       cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
793    }
794 }
795 
796 void
panvk_per_arch(emit_non_fs_rsd)797 panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
798                                 const struct pan_shader_info *shader_info,
799                                 mali_ptr shader_ptr,
800                                 void *rsd)
801 {
802    assert(shader_info->stage != MESA_SHADER_FRAGMENT);
803 
804    pan_pack(rsd, RENDERER_STATE, cfg) {
805       pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
806    }
807 }
808 
809 void
panvk_per_arch(emit_tiler_context)810 panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
811                                    unsigned width, unsigned height,
812                                    const struct panfrost_ptr *descs)
813 {
814    const struct panfrost_device *pdev = &dev->physical_device->pdev;
815 
816    pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
817       cfg.size = pdev->tiler_heap->size;
818       cfg.base = pdev->tiler_heap->ptr.gpu;
819       cfg.bottom = pdev->tiler_heap->ptr.gpu;
820       cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
821    }
822 
823    pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
824       cfg.hierarchy_mask = 0x28;
825       cfg.fb_width = width;
826       cfg.fb_height = height;
827       cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
828    }
829 }
830