• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Based in part on v3d driver which is:
5  *
6  * Copyright © 2014-2017 Broadcom
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "v3dv_private.h"
29 
30 /* The only version specific structure that we need is
31  * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
32  * previous V3D versions and we don't expect that to change, so for now let's
33  * just hardcode the V3D version here.
34  */
35 #define V3D_VERSION 41
36 #include "broadcom/common/v3d_macros.h"
37 #include "broadcom/cle/v3dx_pack.h"
38 
39 /* Our Vulkan resource indices represent indices in descriptor maps which
40  * include all shader stages, so we need to size the arrays below
41  * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
42  */
43 #define MAX_STAGES 3
44 
45 #define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
46 struct texture_bo_list {
47    struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
48 };
49 
50 /* This tracks state BOs for both textures and samplers, so we
51  * multiply by 2.
52  */
53 #define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
54 struct state_bo_list {
55    uint32_t count;
56    struct v3dv_bo *states[MAX_TOTAL_STATES];
57 };
58 
59 #define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \
60                                     MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
61 #define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
62 struct buffer_bo_list {
63    struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
64    struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
65 };
66 
67 static bool
state_bo_in_list(struct state_bo_list * list,struct v3dv_bo * bo)68 state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
69 {
70    for (int i = 0; i < list->count; i++) {
71       if (list->states[i] == bo)
72          return true;
73    }
74    return false;
75 }
76 
77 static void
push_constants_bo_free(VkDevice _device,uint64_t bo_ptr,VkAllocationCallbacks * alloc)78 push_constants_bo_free(VkDevice _device,
79                        uint64_t bo_ptr,
80                        VkAllocationCallbacks *alloc)
81 {
82    V3DV_FROM_HANDLE(v3dv_device, device, _device);
83    v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr);
84 }
85 
86 /*
87  * This method checks if the ubo used for push constants is needed to be
88  * updated or not.
89  *
90  * push contants ubo is only used for push constants accessed by a non-const
91  * index.
92  */
93 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)94 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
95                          struct v3dv_pipeline *pipeline)
96 {
97    if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) ||
98        pipeline->layout->push_constant_size == 0)
99       return;
100 
101    if (cmd_buffer->push_constants_resource.bo == NULL) {
102       cmd_buffer->push_constants_resource.bo =
103          v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true);
104 
105       v3dv_job_add_bo(cmd_buffer->state.job,
106                       cmd_buffer->push_constants_resource.bo);
107 
108       if (!cmd_buffer->push_constants_resource.bo) {
109          fprintf(stderr, "Failed to allocate memory for push constants\n");
110          abort();
111       }
112 
113       bool ok = v3dv_bo_map(cmd_buffer->device,
114                             cmd_buffer->push_constants_resource.bo,
115                             cmd_buffer->push_constants_resource.bo->size);
116       if (!ok) {
117          fprintf(stderr, "failed to map push constants buffer\n");
118          abort();
119       }
120    } else {
121       if (cmd_buffer->push_constants_resource.offset +
122           cmd_buffer->state.push_constants_size <=
123           cmd_buffer->push_constants_resource.bo->size) {
124          cmd_buffer->push_constants_resource.offset +=
125             cmd_buffer->state.push_constants_size;
126       } else {
127          /* We ran out of space so we'll have to allocate a new buffer but we
128           * need to ensure the old one is preserved until the end of the command
129           * buffer life and make sure it is eventually freed. We use the
130           * private object machinery in the command buffer for this.
131           */
132          v3dv_cmd_buffer_add_private_obj(
133             cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo,
134             (v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free);
135 
136          /* Now call back so we create a new BO */
137          cmd_buffer->push_constants_resource.bo = NULL;
138          check_push_constants_ubo(cmd_buffer, pipeline);
139          return;
140       }
141    }
142 
143    assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE);
144    memcpy(cmd_buffer->push_constants_resource.bo->map +
145           cmd_buffer->push_constants_resource.offset,
146           cmd_buffer->state.push_constants_data,
147           cmd_buffer->state.push_constants_size);
148 
149    cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO;
150 }
151 
152 /** V3D 4.x TMU configuration parameter 0 (texture) */
153 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct texture_bo_list * tex_bos,struct state_bo_list * state_bos)154 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
155              struct v3dv_pipeline *pipeline,
156              enum broadcom_shader_stage stage,
157              struct v3dv_cl_out **uniforms,
158              uint32_t data,
159              struct texture_bo_list *tex_bos,
160              struct state_bo_list *state_bos)
161 {
162    uint32_t texture_idx = v3d_unit_data_get_unit(data);
163 
164    struct v3dv_descriptor_state *descriptor_state =
165       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
166 
167    /* We need to ensure that the texture bo is added to the job */
168    struct v3dv_bo *texture_bo =
169       v3dv_descriptor_map_get_texture_bo(descriptor_state,
170                                          &pipeline->shared_data->maps[stage]->texture_map,
171                                          pipeline->layout, texture_idx);
172    assert(texture_bo);
173    assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
174    tex_bos->tex[texture_idx] = texture_bo;
175 
176    struct v3dv_cl_reloc state_reloc =
177       v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
178                                                    &pipeline->shared_data->maps[stage]->texture_map,
179                                                    pipeline->layout,
180                                                    texture_idx);
181 
182    cl_aligned_u32(uniforms, state_reloc.bo->offset +
183                             state_reloc.offset +
184                             v3d_unit_data_get_offset(data));
185 
186    /* Texture and Sampler states are typically suballocated, so they are
187     * usually the same BO: only flag them once to avoid trying to add them
188     * multiple times to the job later.
189     */
190    if (!state_bo_in_list(state_bos, state_reloc.bo)) {
191       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
192       state_bos->states[state_bos->count++] = state_reloc.bo;
193    }
194 }
195 
196 /** V3D 4.x TMU configuration parameter 1 (sampler) */
197 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct state_bo_list * state_bos)198 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
199              struct v3dv_pipeline *pipeline,
200              enum broadcom_shader_stage stage,
201              struct v3dv_cl_out **uniforms,
202              uint32_t data,
203              struct state_bo_list *state_bos)
204 {
205    uint32_t sampler_idx = v3d_unit_data_get_unit(data);
206    struct v3dv_descriptor_state *descriptor_state =
207       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
208 
209    assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
210           sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
211 
212    struct v3dv_cl_reloc sampler_state_reloc =
213       v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
214                                             &pipeline->shared_data->maps[stage]->sampler_map,
215                                             pipeline->layout, sampler_idx);
216 
217    const struct v3dv_sampler *sampler =
218       v3dv_descriptor_map_get_sampler(descriptor_state,
219                                       &pipeline->shared_data->maps[stage]->sampler_map,
220                                       pipeline->layout, sampler_idx);
221    assert(sampler);
222 
223    /* Set unnormalized coordinates flag from sampler object */
224    uint32_t p1_packed = v3d_unit_data_get_offset(data);
225    if (sampler->unnormalized_coordinates) {
226       struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
227       V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
228       p1_unpacked.unnormalized_coordinates = true;
229       V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
230                                         &p1_unpacked);
231    }
232 
233    cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
234                             sampler_state_reloc.offset +
235                             p1_packed);
236 
237    /* Texture and Sampler states are typically suballocated, so they are
238     * usually the same BO: only flag them once to avoid trying to add them
239     * multiple times to the job later.
240     */
241    if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
242       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
243       state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
244    }
245 }
246 
247 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data,struct buffer_bo_list * buffer_bos)248 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
249                         struct v3dv_pipeline *pipeline,
250                         enum broadcom_shader_stage stage,
251                         struct v3dv_cl_out **uniforms,
252                         enum quniform_contents content,
253                         uint32_t data,
254                         struct buffer_bo_list *buffer_bos)
255 {
256    struct v3dv_descriptor_state *descriptor_state =
257       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
258 
259    struct v3dv_descriptor_map *map =
260       content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
261       &pipeline->shared_data->maps[stage]->ubo_map :
262       &pipeline->shared_data->maps[stage]->ssbo_map;
263 
264    uint32_t offset =
265       content == QUNIFORM_UBO_ADDR ?
266       v3d_unit_data_get_offset(data) :
267       0;
268 
269    uint32_t dynamic_offset = 0;
270 
271    /* For ubos, index is shifted, as 0 is reserved for push constants
272     * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
273     * buffers.
274     */
275    uint32_t index = v3d_unit_data_get_unit(data);
276    if (content == QUNIFORM_UBO_ADDR && index == 0) {
277       /* Ensure the push constants UBO is created and updated. This also
278        * adds the BO to the job so we don't need to track it in buffer_bos.
279        */
280       check_push_constants_ubo(cmd_buffer, pipeline);
281 
282       struct v3dv_cl_reloc *resource =
283          &cmd_buffer->push_constants_resource;
284       assert(resource->bo);
285 
286       cl_aligned_u32(uniforms, resource->bo->offset +
287                                resource->offset +
288                                offset + dynamic_offset);
289    } else {
290       if (content == QUNIFORM_UBO_ADDR) {
291          /* We reserve index 0 for push constants and artificially increase our
292           * indices by one for that reason, fix that now before accessing the
293           * descriptor map.
294           */
295          assert(index > 0);
296          index--;
297       } else {
298          index = data;
299       }
300 
301       struct v3dv_descriptor *descriptor =
302          v3dv_descriptor_map_get_descriptor(descriptor_state, map,
303                                             pipeline->layout,
304                                             index, &dynamic_offset);
305 
306       /* Inline UBO descriptors store UBO data in descriptor pool memory,
307        * instead of an external buffer.
308        */
309       assert(descriptor);
310 
311       if (content == QUNIFORM_GET_SSBO_SIZE ||
312           content == QUNIFORM_GET_UBO_SIZE) {
313          cl_aligned_u32(uniforms, descriptor->range);
314       } else {
315          /* Inline uniform buffers store their contents in pool memory instead
316           * of an external buffer.
317           */
318          struct v3dv_bo *bo;
319          uint32_t addr;
320          if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
321             assert(dynamic_offset == 0);
322             struct v3dv_cl_reloc reloc =
323                v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
324                                                      descriptor_state, map,
325                                                      pipeline->layout, index,
326                                                      NULL);
327             bo = reloc.bo;
328             addr = reloc.bo->offset + reloc.offset + offset;
329          } else {
330             assert(descriptor->buffer);
331             assert(descriptor->buffer->mem);
332             assert(descriptor->buffer->mem->bo);
333 
334             bo = descriptor->buffer->mem->bo;
335             addr = bo->offset +
336                    descriptor->buffer->mem_offset +
337                    descriptor->offset +
338                    offset + dynamic_offset;
339          }
340 
341          cl_aligned_u32(uniforms, addr);
342 
343          if (content == QUNIFORM_UBO_ADDR) {
344             assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
345             buffer_bos->ubo[index] = bo;
346          } else {
347             assert(index < MAX_TOTAL_STORAGE_BUFFERS);
348             buffer_bos->ssbo[index] = bo;
349          }
350       }
351    }
352 }
353 
354 static void
write_inline_uniform(struct v3dv_cl_out ** uniforms,uint32_t index,uint32_t offset,struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)355 write_inline_uniform(struct v3dv_cl_out **uniforms,
356                      uint32_t index,
357                      uint32_t offset,
358                      struct v3dv_cmd_buffer *cmd_buffer,
359                      struct v3dv_pipeline *pipeline,
360                      enum broadcom_shader_stage stage)
361 {
362    assert(index < MAX_INLINE_UNIFORM_BUFFERS);
363 
364    struct v3dv_descriptor_state *descriptor_state =
365       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
366 
367    struct v3dv_descriptor_map *map =
368       &pipeline->shared_data->maps[stage]->ubo_map;
369 
370    struct v3dv_cl_reloc reloc =
371       v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
372                                             descriptor_state, map,
373                                             pipeline->layout, index,
374                                             NULL);
375 
376    /* Offset comes in 32-bit units */
377    uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
378    cl_aligned_u32(uniforms, *addr);
379 }
380 
381 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)382 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
383                                  enum quniform_contents contents,
384                                  uint32_t data)
385 {
386    switch(contents) {
387    case QUNIFORM_IMAGE_WIDTH:
388    case QUNIFORM_TEXTURE_WIDTH:
389       /* We don't u_minify the values, as we are using the image_view
390        * extents
391        */
392       return image_view->vk.extent.width;
393    case QUNIFORM_IMAGE_HEIGHT:
394    case QUNIFORM_TEXTURE_HEIGHT:
395       return image_view->vk.extent.height;
396    case QUNIFORM_IMAGE_DEPTH:
397    case QUNIFORM_TEXTURE_DEPTH:
398       return image_view->vk.extent.depth;
399    case QUNIFORM_IMAGE_ARRAY_SIZE:
400    case QUNIFORM_TEXTURE_ARRAY_SIZE:
401       if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
402          return image_view->vk.layer_count;
403       } else {
404          assert(image_view->vk.layer_count % 6 == 0);
405          return image_view->vk.layer_count / 6;
406       }
407    case QUNIFORM_TEXTURE_LEVELS:
408       return image_view->vk.level_count;
409    case QUNIFORM_TEXTURE_SAMPLES:
410       assert(image_view->vk.image);
411       return image_view->vk.image->samples;
412    default:
413       unreachable("Bad texture size field");
414    }
415 }
416 
417 
418 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)419 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
420                                   enum quniform_contents contents,
421                                   uint32_t data)
422 {
423    switch(contents) {
424    case QUNIFORM_IMAGE_WIDTH:
425    case QUNIFORM_TEXTURE_WIDTH:
426       return buffer_view->num_elements;
427    /* Only size can be queried for texel buffers  */
428    default:
429       unreachable("Bad texture size field for texel buffers");
430    }
431 }
432 
433 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,enum quniform_contents contents,uint32_t data)434 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
435                  struct v3dv_pipeline *pipeline,
436                  enum broadcom_shader_stage stage,
437                  enum quniform_contents contents,
438                  uint32_t data)
439 {
440    uint32_t texture_idx = data;
441 
442    struct v3dv_descriptor_state *descriptor_state =
443       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
444 
445    struct v3dv_descriptor *descriptor =
446       v3dv_descriptor_map_get_descriptor(descriptor_state,
447                                          &pipeline->shared_data->maps[stage]->texture_map,
448                                          pipeline->layout,
449                                          texture_idx, NULL);
450 
451    assert(descriptor);
452 
453    switch (descriptor->type) {
454    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
455    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
456    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
457    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
458       return get_texture_size_from_image_view(descriptor->image_view,
459                                               contents, data);
460    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
461    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
462       return get_texture_size_from_buffer_view(descriptor->buffer_view,
463                                                contents, data);
464    default:
465       unreachable("Wrong descriptor for getting texture size");
466    }
467 }
468 
469 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant,uint32_t ** wg_count_offsets)470 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
471                                struct v3dv_pipeline *pipeline,
472                                struct v3dv_shader_variant *variant,
473                                uint32_t **wg_count_offsets)
474 {
475    struct v3d_uniform_list *uinfo =
476       &variant->prog_data.base->uniforms;
477    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
478 
479    struct v3dv_job *job = cmd_buffer->state.job;
480    assert(job);
481    assert(job->cmd_buffer == cmd_buffer);
482 
483    struct texture_bo_list tex_bos = { 0 };
484    struct state_bo_list state_bos = { 0 };
485    struct buffer_bo_list buffer_bos = { 0 };
486 
487    /* The hardware always pre-fetches the next uniform (also when there
488     * aren't any), so we always allocate space for an extra slot. This
489     * fixes MMU exceptions reported since Linux kernel 5.4 when the
490     * uniforms fill up the tail bytes of a page in the indirect
491     * BO. In that scenario, when the hardware pre-fetches after reading
492     * the last uniform it will read beyond the end of the page and trigger
493     * the MMU exception.
494     */
495    v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
496 
497    struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
498 
499    struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
500 
501    for (int i = 0; i < uinfo->count; i++) {
502       uint32_t data = uinfo->data[i];
503 
504       switch (uinfo->contents[i]) {
505       case QUNIFORM_CONSTANT:
506          cl_aligned_u32(&uniforms, data);
507          break;
508 
509       case QUNIFORM_UNIFORM:
510          cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]);
511          break;
512 
513       case QUNIFORM_INLINE_UBO_0:
514       case QUNIFORM_INLINE_UBO_1:
515       case QUNIFORM_INLINE_UBO_2:
516       case QUNIFORM_INLINE_UBO_3:
517          write_inline_uniform(&uniforms,
518                               uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
519                               cmd_buffer, pipeline, variant->stage);
520          break;
521 
522       case QUNIFORM_VIEWPORT_X_SCALE:
523          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
524          break;
525 
526       case QUNIFORM_VIEWPORT_Y_SCALE:
527          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
528          break;
529 
530       case QUNIFORM_VIEWPORT_Z_OFFSET:
531          cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
532          break;
533 
534       case QUNIFORM_VIEWPORT_Z_SCALE:
535          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
536          break;
537 
538       case QUNIFORM_SSBO_OFFSET:
539       case QUNIFORM_UBO_ADDR:
540       case QUNIFORM_GET_SSBO_SIZE:
541       case QUNIFORM_GET_UBO_SIZE:
542          write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
543                                  uinfo->contents[i], data, &buffer_bos);
544 
545         break;
546 
547       case QUNIFORM_IMAGE_TMU_CONFIG_P0:
548       case QUNIFORM_TMU_CONFIG_P0:
549          write_tmu_p0(cmd_buffer, pipeline, variant->stage,
550                       &uniforms, data, &tex_bos, &state_bos);
551          break;
552 
553       case QUNIFORM_TMU_CONFIG_P1:
554          write_tmu_p1(cmd_buffer, pipeline, variant->stage,
555                       &uniforms, data, &state_bos);
556          break;
557 
558       case QUNIFORM_IMAGE_WIDTH:
559       case QUNIFORM_IMAGE_HEIGHT:
560       case QUNIFORM_IMAGE_DEPTH:
561       case QUNIFORM_IMAGE_ARRAY_SIZE:
562       case QUNIFORM_TEXTURE_WIDTH:
563       case QUNIFORM_TEXTURE_HEIGHT:
564       case QUNIFORM_TEXTURE_DEPTH:
565       case QUNIFORM_TEXTURE_ARRAY_SIZE:
566       case QUNIFORM_TEXTURE_LEVELS:
567       case QUNIFORM_TEXTURE_SAMPLES:
568          cl_aligned_u32(&uniforms,
569                         get_texture_size(cmd_buffer,
570                                          pipeline,
571                                          variant->stage,
572                                          uinfo->contents[i],
573                                          data));
574          break;
575 
576       /* We generate this from geometry shaders to cap the generated gl_Layer
577        * to be within the number of layers of the framebuffer so we prevent the
578        * binner from trying to access tile state memory out of bounds (for
579        * layers that don't exist).
580        *
581        * Unfortunately, for secondary command buffers we may not know the
582        * number of layers in the framebuffer at this stage. Since we are
583        * only using this to sanitize the shader and it should not have any
584        * impact on correct shaders that emit valid values for gl_Layer,
585        * we just work around it by using the largest number of layers we
586        * support.
587        *
588        * FIXME: we could do better than this by recording in the job that
589        * the value at this uniform offset is not correct, and patch it when
590        * we execute the secondary command buffer into a primary, since we do
591        * have the correct number of layers at that point, but again, since this
592        * is only for sanityzing the shader and it only affects the specific case
593        * of secondary command buffers without framebuffer info available it
594        * might not be worth the trouble.
595        *
596        * With multiview the number of layers is dictated by the view mask
597        * and not by the framebuffer layers. We do set the job's frame tiling
598        * information correctly from the view mask in that case, however,
599        * secondary command buffers may not have valid frame tiling data,
600        * so when multiview is enabled, we always set the number of layers
601        * from the subpass view mask.
602        */
603       case QUNIFORM_FB_LAYERS: {
604          const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
605          const uint32_t view_mask =
606             state->pass->subpasses[state->subpass_idx].view_mask;
607 
608          uint32_t num_layers;
609          if (view_mask != 0) {
610             num_layers = util_last_bit(view_mask);
611          } else if (job->frame_tiling.layers != 0) {
612             num_layers = job->frame_tiling.layers;
613          } else if (cmd_buffer->state.framebuffer) {
614             num_layers = cmd_buffer->state.framebuffer->layers;
615          } else {
616             assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
617             num_layers = 2048;
618 #if DEBUG
619             fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
620                             "secondary command buffer\n");
621 #endif
622          }
623          cl_aligned_u32(&uniforms, num_layers);
624          break;
625       }
626 
627       case QUNIFORM_VIEW_INDEX:
628          cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
629          break;
630 
631       case QUNIFORM_NUM_WORK_GROUPS:
632          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
633          assert(job->csd.wg_count[data] > 0);
634          if (wg_count_offsets)
635             wg_count_offsets[data] = (uint32_t *) uniforms;
636          cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
637          break;
638 
639       case QUNIFORM_WORK_GROUP_BASE:
640          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
641          cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
642          break;
643 
644       case QUNIFORM_SHARED_OFFSET:
645          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
646          assert(job->csd.shared_memory);
647          cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
648          break;
649 
650       case QUNIFORM_SPILL_OFFSET:
651          assert(pipeline->spill.bo);
652          cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
653          break;
654 
655       case QUNIFORM_SPILL_SIZE_PER_THREAD:
656          assert(pipeline->spill.size_per_thread > 0);
657          cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
658          break;
659 
660       default:
661          unreachable("unsupported quniform_contents uniform type\n");
662       }
663    }
664 
665    cl_end(&job->indirect, uniforms);
666 
667    for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
668       if (tex_bos.tex[i])
669          v3dv_job_add_bo(job, tex_bos.tex[i]);
670    }
671 
672    for (int i = 0; i < state_bos.count; i++)
673       v3dv_job_add_bo(job, state_bos.states[i]);
674 
675    for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
676       if (buffer_bos.ubo[i])
677          v3dv_job_add_bo(job, buffer_bos.ubo[i]);
678    }
679 
680    for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
681       if (buffer_bos.ssbo[i])
682          v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
683    }
684 
685    if (job->csd.shared_memory)
686       v3dv_job_add_bo(job, job->csd.shared_memory);
687 
688    if (pipeline->spill.bo)
689       v3dv_job_add_bo(job, pipeline->spill.bo);
690 
691    return uniform_stream;
692 }
693 
694 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)695 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
696                     struct v3dv_pipeline *pipeline,
697                     struct v3dv_shader_variant *variant)
698 {
699    return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
700 }
701