• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Based in part on v3d driver which is:
5  *
6  * Copyright © 2014-2017 Broadcom
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "v3dv_private.h"
29 #include "vk_format_info.h"
30 
31 /* The only version specific structure that we need is
32  * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
33  * previous V3D versions and we don't expect that to change, so for now let's
34  * just hardcode the V3D version here.
35  */
36 #define V3D_VERSION 41
37 #include "broadcom/common/v3d_macros.h"
38 #include "broadcom/cle/v3dx_pack.h"
39 
40 /* Our Vulkan resource indices represent indices in descriptor maps which
41  * include all shader stages, so we need to size the arrays below
42  * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
43  */
44 #define MAX_STAGES 3
45 
46 #define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
47 struct texture_bo_list {
48    struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
49 };
50 
51 /* This tracks state BOs for both textures and samplers, so we
52  * multiply by 2.
53  */
54 #define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
55 struct state_bo_list {
56    uint32_t count;
57    struct v3dv_bo *states[MAX_TOTAL_STATES];
58 };
59 
60 #define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES)
61 #define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
62 struct buffer_bo_list {
63    struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
64    struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
65 };
66 
67 static bool
state_bo_in_list(struct state_bo_list * list,struct v3dv_bo * bo)68 state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
69 {
70    for (int i = 0; i < list->count; i++) {
71       if (list->states[i] == bo)
72          return true;
73    }
74    return false;
75 }
76 
77 /*
78  * This method checks if the ubo used for push constants is needed to be
79  * updated or not.
80  *
81  * push contants ubo is only used for push constants accessed by a non-const
82  * index.
83  *
84  * FIXME: right now for this cases we are uploading the full
85  * push_constants_data. An improvement would be to upload only the data that
86  * we need to rely on a UBO.
87  */
88 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)89 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
90                          struct v3dv_pipeline *pipeline)
91 {
92    if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS) ||
93        pipeline->layout->push_constant_size == 0)
94       return;
95 
96    if (cmd_buffer->push_constants_resource.bo == NULL) {
97       cmd_buffer->push_constants_resource.bo =
98          v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
99                        "push constants", true);
100 
101       if (!cmd_buffer->push_constants_resource.bo) {
102          fprintf(stderr, "Failed to allocate memory for push constants\n");
103          abort();
104       }
105 
106       bool ok = v3dv_bo_map(cmd_buffer->device,
107                             cmd_buffer->push_constants_resource.bo,
108                             MAX_PUSH_CONSTANTS_SIZE);
109       if (!ok) {
110          fprintf(stderr, "failed to map push constants buffer\n");
111          abort();
112       }
113    } else {
114       if (cmd_buffer->push_constants_resource.offset + MAX_PUSH_CONSTANTS_SIZE <=
115           cmd_buffer->push_constants_resource.bo->size) {
116          cmd_buffer->push_constants_resource.offset += MAX_PUSH_CONSTANTS_SIZE;
117       } else {
118          /* FIXME: we got out of space for push descriptors. Should we create
119           * a new bo? This could be easier with a uploader
120           */
121       }
122    }
123 
124    memcpy(cmd_buffer->push_constants_resource.bo->map +
125           cmd_buffer->push_constants_resource.offset,
126           cmd_buffer->push_constants_data,
127           MAX_PUSH_CONSTANTS_SIZE);
128 
129    cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS;
130 }
131 
132 /** V3D 4.x TMU configuration parameter 0 (texture) */
133 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct texture_bo_list * tex_bos,struct state_bo_list * state_bos)134 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
135              struct v3dv_pipeline *pipeline,
136              enum broadcom_shader_stage stage,
137              struct v3dv_cl_out **uniforms,
138              uint32_t data,
139              struct texture_bo_list *tex_bos,
140              struct state_bo_list *state_bos)
141 {
142    uint32_t texture_idx = v3d_unit_data_get_unit(data);
143 
144    struct v3dv_descriptor_state *descriptor_state =
145       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
146 
147    /* We need to ensure that the texture bo is added to the job */
148    struct v3dv_bo *texture_bo =
149       v3dv_descriptor_map_get_texture_bo(descriptor_state,
150                                          &pipeline->shared_data->maps[stage]->texture_map,
151                                          pipeline->layout, texture_idx);
152    assert(texture_bo);
153    assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
154    tex_bos->tex[texture_idx] = texture_bo;
155 
156    struct v3dv_cl_reloc state_reloc =
157       v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
158                                                    &pipeline->shared_data->maps[stage]->texture_map,
159                                                    pipeline->layout,
160                                                    texture_idx);
161 
162    cl_aligned_u32(uniforms, state_reloc.bo->offset +
163                             state_reloc.offset +
164                             v3d_unit_data_get_offset(data));
165 
166    /* Texture and Sampler states are typically suballocated, so they are
167     * usually the same BO: only flag them once to avoid trying to add them
168     * multiple times to the job later.
169     */
170    if (!state_bo_in_list(state_bos, state_reloc.bo)) {
171       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
172       state_bos->states[state_bos->count++] = state_reloc.bo;
173    }
174 }
175 
176 /** V3D 4.x TMU configuration parameter 1 (sampler) */
177 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct state_bo_list * state_bos)178 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
179              struct v3dv_pipeline *pipeline,
180              enum broadcom_shader_stage stage,
181              struct v3dv_cl_out **uniforms,
182              uint32_t data,
183              struct state_bo_list *state_bos)
184 {
185    uint32_t sampler_idx = v3d_unit_data_get_unit(data);
186    struct v3dv_descriptor_state *descriptor_state =
187       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
188 
189    assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
190           sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
191 
192    struct v3dv_cl_reloc sampler_state_reloc =
193       v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
194                                             &pipeline->shared_data->maps[stage]->sampler_map,
195                                             pipeline->layout, sampler_idx);
196 
197    const struct v3dv_sampler *sampler =
198       v3dv_descriptor_map_get_sampler(descriptor_state,
199                                       &pipeline->shared_data->maps[stage]->sampler_map,
200                                       pipeline->layout, sampler_idx);
201    assert(sampler);
202 
203    /* Set unnormalized coordinates flag from sampler object */
204    uint32_t p1_packed = v3d_unit_data_get_offset(data);
205    if (sampler->unnormalized_coordinates) {
206       struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
207       V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
208       p1_unpacked.unnormalized_coordinates = true;
209       V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
210                                         &p1_unpacked);
211    }
212 
213    cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
214                             sampler_state_reloc.offset +
215                             p1_packed);
216 
217    /* Texture and Sampler states are typically suballocated, so they are
218     * usually the same BO: only flag them once to avoid trying to add them
219     * multiple times to the job later.
220     */
221    if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
222       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
223       state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
224    }
225 }
226 
227 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data,struct buffer_bo_list * buffer_bos)228 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
229                         struct v3dv_pipeline *pipeline,
230                         enum broadcom_shader_stage stage,
231                         struct v3dv_cl_out **uniforms,
232                         enum quniform_contents content,
233                         uint32_t data,
234                         struct buffer_bo_list *buffer_bos)
235 {
236    struct v3dv_descriptor_state *descriptor_state =
237       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
238 
239    struct v3dv_descriptor_map *map =
240       content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
241       &pipeline->shared_data->maps[stage]->ubo_map :
242       &pipeline->shared_data->maps[stage]->ssbo_map;
243 
244    uint32_t offset =
245       content == QUNIFORM_UBO_ADDR ?
246       v3d_unit_data_get_offset(data) :
247       0;
248 
249    uint32_t dynamic_offset = 0;
250 
251    /* For ubos, index is shifted, as 0 is reserved for push constants.
252     */
253    if (content == QUNIFORM_UBO_ADDR &&
254        v3d_unit_data_get_unit(data) == 0) {
255       /* This calls is to ensure that the push_constant_ubo is
256        * updated. It already take into account it is should do the
257        * update or not
258        */
259       check_push_constants_ubo(cmd_buffer, pipeline);
260 
261       struct v3dv_cl_reloc *resource =
262          &cmd_buffer->push_constants_resource;
263       assert(resource->bo);
264 
265       cl_aligned_u32(uniforms, resource->bo->offset +
266                                resource->offset +
267                                offset + dynamic_offset);
268       buffer_bos->ubo[0] = resource->bo;
269    } else {
270       uint32_t index =
271          content == QUNIFORM_UBO_ADDR ?
272          v3d_unit_data_get_unit(data) - 1 :
273          data;
274 
275       struct v3dv_descriptor *descriptor =
276          v3dv_descriptor_map_get_descriptor(descriptor_state, map,
277                                             pipeline->layout,
278                                             index, &dynamic_offset);
279       assert(descriptor);
280       assert(descriptor->buffer);
281       assert(descriptor->buffer->mem);
282       assert(descriptor->buffer->mem->bo);
283 
284       if (content == QUNIFORM_GET_SSBO_SIZE ||
285           content == QUNIFORM_GET_UBO_SIZE) {
286          cl_aligned_u32(uniforms, descriptor->range);
287       } else {
288          cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset +
289                                   descriptor->buffer->mem_offset +
290                                   descriptor->offset +
291                                   offset + dynamic_offset);
292 
293          if (content == QUNIFORM_UBO_ADDR) {
294             assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS);
295             buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo;
296          } else {
297             assert(index < MAX_TOTAL_STORAGE_BUFFERS);
298             buffer_bos->ssbo[index] = descriptor->buffer->mem->bo;
299          }
300       }
301    }
302 }
303 
304 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)305 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
306                                  enum quniform_contents contents,
307                                  uint32_t data)
308 {
309    switch(contents) {
310    case QUNIFORM_IMAGE_WIDTH:
311    case QUNIFORM_TEXTURE_WIDTH:
312       /* We don't u_minify the values, as we are using the image_view
313        * extents
314        */
315       return image_view->vk.extent.width;
316    case QUNIFORM_IMAGE_HEIGHT:
317    case QUNIFORM_TEXTURE_HEIGHT:
318       return image_view->vk.extent.height;
319    case QUNIFORM_IMAGE_DEPTH:
320    case QUNIFORM_TEXTURE_DEPTH:
321       return image_view->vk.extent.depth;
322    case QUNIFORM_IMAGE_ARRAY_SIZE:
323    case QUNIFORM_TEXTURE_ARRAY_SIZE:
324       if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
325          return image_view->vk.layer_count;
326       } else {
327          assert(image_view->vk.layer_count % 6 == 0);
328          return image_view->vk.layer_count / 6;
329       }
330    case QUNIFORM_TEXTURE_LEVELS:
331       return image_view->vk.level_count;
332    case QUNIFORM_TEXTURE_SAMPLES:
333       assert(image_view->vk.image);
334       return image_view->vk.image->samples;
335    default:
336       unreachable("Bad texture size field");
337    }
338 }
339 
340 
341 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)342 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
343                                   enum quniform_contents contents,
344                                   uint32_t data)
345 {
346    switch(contents) {
347    case QUNIFORM_IMAGE_WIDTH:
348    case QUNIFORM_TEXTURE_WIDTH:
349       return buffer_view->num_elements;
350    /* Only size can be queried for texel buffers  */
351    default:
352       unreachable("Bad texture size field for texel buffers");
353    }
354 }
355 
356 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,enum quniform_contents contents,uint32_t data)357 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
358                  struct v3dv_pipeline *pipeline,
359                  enum broadcom_shader_stage stage,
360                  enum quniform_contents contents,
361                  uint32_t data)
362 {
363    uint32_t texture_idx = data;
364 
365    struct v3dv_descriptor_state *descriptor_state =
366       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
367 
368    struct v3dv_descriptor *descriptor =
369       v3dv_descriptor_map_get_descriptor(descriptor_state,
370                                          &pipeline->shared_data->maps[stage]->texture_map,
371                                          pipeline->layout,
372                                          texture_idx, NULL);
373 
374    assert(descriptor);
375 
376    switch (descriptor->type) {
377    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
378    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
379    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
380    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
381       return get_texture_size_from_image_view(descriptor->image_view,
382                                               contents, data);
383    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
384    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
385       return get_texture_size_from_buffer_view(descriptor->buffer_view,
386                                                contents, data);
387    default:
388       unreachable("Wrong descriptor for getting texture size");
389    }
390 }
391 
392 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant,uint32_t ** wg_count_offsets)393 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
394                                struct v3dv_pipeline *pipeline,
395                                struct v3dv_shader_variant *variant,
396                                uint32_t **wg_count_offsets)
397 {
398    struct v3d_uniform_list *uinfo =
399       &variant->prog_data.base->uniforms;
400    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
401 
402    struct v3dv_job *job = cmd_buffer->state.job;
403    assert(job);
404    assert(job->cmd_buffer == cmd_buffer);
405 
406    struct texture_bo_list tex_bos = { 0 };
407    struct state_bo_list state_bos = { 0 };
408    struct buffer_bo_list buffer_bos = { 0 };
409 
410    /* The hardware always pre-fetches the next uniform (also when there
411     * aren't any), so we always allocate space for an extra slot. This
412     * fixes MMU exceptions reported since Linux kernel 5.4 when the
413     * uniforms fill up the tail bytes of a page in the indirect
414     * BO. In that scenario, when the hardware pre-fetches after reading
415     * the last uniform it will read beyond the end of the page and trigger
416     * the MMU exception.
417     */
418    v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
419 
420    struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
421 
422    struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
423 
424    for (int i = 0; i < uinfo->count; i++) {
425       uint32_t data = uinfo->data[i];
426 
427       switch (uinfo->contents[i]) {
428       case QUNIFORM_CONSTANT:
429          cl_aligned_u32(&uniforms, data);
430          break;
431 
432       case QUNIFORM_UNIFORM:
433          cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
434          break;
435 
436       case QUNIFORM_VIEWPORT_X_SCALE:
437          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
438          break;
439 
440       case QUNIFORM_VIEWPORT_Y_SCALE:
441          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
442          break;
443 
444       case QUNIFORM_VIEWPORT_Z_OFFSET:
445          cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
446          break;
447 
448       case QUNIFORM_VIEWPORT_Z_SCALE:
449          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
450          break;
451 
452       case QUNIFORM_SSBO_OFFSET:
453       case QUNIFORM_UBO_ADDR:
454       case QUNIFORM_GET_SSBO_SIZE:
455       case QUNIFORM_GET_UBO_SIZE:
456          write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
457                                  uinfo->contents[i], data, &buffer_bos);
458 
459         break;
460 
461       case QUNIFORM_IMAGE_TMU_CONFIG_P0:
462       case QUNIFORM_TMU_CONFIG_P0:
463          write_tmu_p0(cmd_buffer, pipeline, variant->stage,
464                       &uniforms, data, &tex_bos, &state_bos);
465          break;
466 
467       case QUNIFORM_TMU_CONFIG_P1:
468          write_tmu_p1(cmd_buffer, pipeline, variant->stage,
469                       &uniforms, data, &state_bos);
470          break;
471 
472       case QUNIFORM_IMAGE_WIDTH:
473       case QUNIFORM_IMAGE_HEIGHT:
474       case QUNIFORM_IMAGE_DEPTH:
475       case QUNIFORM_IMAGE_ARRAY_SIZE:
476       case QUNIFORM_TEXTURE_WIDTH:
477       case QUNIFORM_TEXTURE_HEIGHT:
478       case QUNIFORM_TEXTURE_DEPTH:
479       case QUNIFORM_TEXTURE_ARRAY_SIZE:
480       case QUNIFORM_TEXTURE_LEVELS:
481       case QUNIFORM_TEXTURE_SAMPLES:
482          cl_aligned_u32(&uniforms,
483                         get_texture_size(cmd_buffer,
484                                          pipeline,
485                                          variant->stage,
486                                          uinfo->contents[i],
487                                          data));
488          break;
489 
490       /* We generate this from geometry shaders to cap the generated gl_Layer
491        * to be within the number of layers of the framebuffer so we prevent the
492        * binner from trying to access tile state memory out of bounds (for
493        * layers that don't exist).
494        *
495        * Unfortunately, for secondary command buffers we may not know the
496        * number of layers in the framebuffer at this stage. Since we are
497        * only using this to sanitize the shader and it should not have any
498        * impact on correct shaders that emit valid values for gl_Layer,
499        * we just work around it by using the largest number of layers we
500        * support.
501        *
502        * FIXME: we could do better than this by recording in the job that
503        * the value at this uniform offset is not correct, and patch it when
504        * we execute the secondary command buffer into a primary, since we do
505        * have the correct number of layers at that point, but again, since this
506        * is only for sanityzing the shader and it only affects the specific case
507        * of secondary command buffers without framebuffer info available it
508        * might not be worth the trouble.
509        *
510        * With multiview the number of layers is dictated by the view mask
511        * and not by the framebuffer layers. We do set the job's frame tiling
512        * information correctly from the view mask in that case, however,
513        * secondary command buffers may not have valid frame tiling data,
514        * so when multiview is enabled, we always set the number of layers
515        * from the subpass view mask.
516        */
517       case QUNIFORM_FB_LAYERS: {
518          const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
519          const uint32_t view_mask =
520             state->pass->subpasses[state->subpass_idx].view_mask;
521 
522          uint32_t num_layers;
523          if (view_mask != 0) {
524             num_layers = util_last_bit(view_mask);
525          } else if (job->frame_tiling.layers != 0) {
526             num_layers = job->frame_tiling.layers;
527          } else if (cmd_buffer->state.framebuffer) {
528             num_layers = cmd_buffer->state.framebuffer->layers;
529          } else {
530             assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
531             num_layers = 2048;
532 #if DEBUG
533             fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
534                             "secondary command buffer\n");
535 #endif
536          }
537          cl_aligned_u32(&uniforms, num_layers);
538          break;
539       }
540 
541       case QUNIFORM_VIEW_INDEX:
542          cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
543          break;
544 
545       case QUNIFORM_NUM_WORK_GROUPS:
546          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
547          assert(job->csd.wg_count[data] > 0);
548          if (wg_count_offsets)
549             wg_count_offsets[data] = (uint32_t *) uniforms;
550          cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
551          break;
552 
553       case QUNIFORM_WORK_GROUP_BASE:
554          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
555          cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
556          break;
557 
558       case QUNIFORM_SHARED_OFFSET:
559          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
560          assert(job->csd.shared_memory);
561          cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
562          break;
563 
564       case QUNIFORM_SPILL_OFFSET:
565          assert(pipeline->spill.bo);
566          cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
567          break;
568 
569       case QUNIFORM_SPILL_SIZE_PER_THREAD:
570          assert(pipeline->spill.size_per_thread > 0);
571          cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
572          break;
573 
574       default:
575          unreachable("unsupported quniform_contents uniform type\n");
576       }
577    }
578 
579    cl_end(&job->indirect, uniforms);
580 
581    for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
582       if (tex_bos.tex[i])
583          v3dv_job_add_bo(job, tex_bos.tex[i]);
584    }
585 
586    for (int i = 0; i < state_bos.count; i++)
587       v3dv_job_add_bo(job, state_bos.states[i]);
588 
589    for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
590       if (buffer_bos.ubo[i])
591          v3dv_job_add_bo(job, buffer_bos.ubo[i]);
592    }
593 
594    for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
595       if (buffer_bos.ssbo[i])
596          v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
597    }
598 
599    if (job->csd.shared_memory)
600       v3dv_job_add_bo(job, job->csd.shared_memory);
601 
602    if (pipeline->spill.bo)
603       v3dv_job_add_bo(job, pipeline->spill.bo);
604 
605    return uniform_stream;
606 }
607 
608 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)609 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
610                     struct v3dv_pipeline *pipeline,
611                     struct v3dv_shader_variant *variant)
612 {
613    return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
614 }
615