• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Based in part on v3d driver which is:
5  *
6  * Copyright © 2014-2017 Broadcom
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "v3dv_private.h"
29 #include "vk_format_info.h"
30 
31 /*
32  * This method checks if the ubo used for push constants is needed to be
33  * updated or not.
34  *
35  * push contants ubo is only used for push constants accessed by a non-const
36  * index.
37  *
38  * FIXME: right now for this cases we are uploading the full
39  * push_constants_data. An improvement would be to upload only the data that
40  * we need to rely on a UBO.
41  */
42 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer)43 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer)
44 {
45    if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS) ||
46        cmd_buffer->state.pipeline->layout->push_constant_size == 0)
47       return;
48 
49    if (cmd_buffer->push_constants_resource.bo == NULL) {
50       cmd_buffer->push_constants_resource.bo =
51          v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
52                        "push constants", true);
53 
54       if (!cmd_buffer->push_constants_resource.bo) {
55          fprintf(stderr, "Failed to allocate memory for push constants\n");
56          abort();
57       }
58 
59       bool ok = v3dv_bo_map(cmd_buffer->device,
60                             cmd_buffer->push_constants_resource.bo,
61                             MAX_PUSH_CONSTANTS_SIZE);
62       if (!ok) {
63          fprintf(stderr, "failed to map push constants buffer\n");
64          abort();
65       }
66    } else {
67       if (cmd_buffer->push_constants_resource.offset + MAX_PUSH_CONSTANTS_SIZE <=
68           cmd_buffer->push_constants_resource.bo->size) {
69          cmd_buffer->push_constants_resource.offset += MAX_PUSH_CONSTANTS_SIZE;
70       } else {
71          /* FIXME: we got out of space for push descriptors. Should we create
72           * a new bo? This could be easier with a uploader
73           */
74       }
75    }
76 
77    memcpy(cmd_buffer->push_constants_resource.bo->map +
78           cmd_buffer->push_constants_resource.offset,
79           cmd_buffer->push_constants_data,
80           MAX_PUSH_CONSTANTS_SIZE);
81 
82    cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS;
83 }
84 
85 /** V3D 4.x TMU configuration parameter 0 (texture) */
86 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,uint32_t data)87 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
88              struct v3dv_pipeline *pipeline,
89              struct v3dv_cl_out **uniforms,
90              uint32_t data)
91 {
92    int unit = v3d_unit_data_get_unit(data);
93    uint32_t texture_idx;
94    struct v3dv_job *job = cmd_buffer->state.job;
95    struct v3dv_descriptor_state *descriptor_state =
96       &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
97 
98    v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
99                                            &texture_idx,
100                                            NULL);
101 
102    /* We need to ensure that the texture bo is added to the job */
103    struct v3dv_bo *texture_bo =
104       v3dv_descriptor_map_get_texture_bo(descriptor_state, &pipeline->texture_map,
105                                          pipeline->layout, texture_idx);
106    assert(texture_bo);
107    v3dv_job_add_bo(job, texture_bo);
108 
109    struct v3dv_cl_reloc state_reloc =
110       v3dv_descriptor_map_get_texture_shader_state(descriptor_state,
111                                                    &pipeline->texture_map,
112                                                    pipeline->layout,
113                                                    texture_idx);
114 
115    cl_aligned_reloc(&job->indirect, uniforms,
116                     state_reloc.bo,
117                     state_reloc.offset +
118                     v3d_unit_data_get_offset(data));
119 }
120 
121 /** V3D 4.x TMU configuration parameter 1 (sampler) */
122 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,uint32_t data)123 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
124              struct v3dv_pipeline *pipeline,
125              struct v3dv_cl_out **uniforms,
126              uint32_t data)
127 {
128    uint32_t unit = v3d_unit_data_get_unit(data);
129    uint32_t sampler_idx;
130    struct v3dv_job *job = cmd_buffer->state.job;
131    struct v3dv_descriptor_state *descriptor_state =
132       &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
133 
134    v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
135                                            NULL, &sampler_idx);
136    assert(sampler_idx != V3DV_NO_SAMPLER_IDX);
137 
138    struct v3dv_cl_reloc sampler_state_reloc =
139       v3dv_descriptor_map_get_sampler_state(descriptor_state, &pipeline->sampler_map,
140                                             pipeline->layout, sampler_idx);
141 
142    const struct v3dv_sampler *sampler =
143       v3dv_descriptor_map_get_sampler(descriptor_state, &pipeline->sampler_map,
144                                          pipeline->layout, sampler_idx);
145    assert(sampler);
146 
147    /* Set unnormalized coordinates flag from sampler object */
148    uint32_t p1_packed = v3d_unit_data_get_offset(data);
149    if (sampler->unnormalized_coordinates) {
150       struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
151       V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
152       p1_unpacked.unnormalized_coordinates = true;
153       V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
154                                         &p1_unpacked);
155    }
156 
157    cl_aligned_reloc(&job->indirect, uniforms,
158                     sampler_state_reloc.bo,
159                     sampler_state_reloc.offset +
160                     p1_packed);
161 }
162 
163 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data)164 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
165                         struct v3dv_pipeline *pipeline,
166                         struct v3dv_cl_out **uniforms,
167                         enum quniform_contents content,
168                         uint32_t data)
169 {
170    struct v3dv_job *job = cmd_buffer->state.job;
171    struct v3dv_descriptor_state *descriptor_state =
172       &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
173 
174    struct v3dv_descriptor_map *map =
175       content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
176       &pipeline->ubo_map : &pipeline->ssbo_map;
177 
178    uint32_t offset =
179       content == QUNIFORM_UBO_ADDR ?
180       v3d_unit_data_get_offset(data) :
181       0;
182 
183    uint32_t dynamic_offset = 0;
184 
185    /* For ubos, index is shifted, as 0 is reserved for push constants.
186     */
187    if (content == QUNIFORM_UBO_ADDR &&
188        v3d_unit_data_get_unit(data) == 0) {
189       /* This calls is to ensure that the push_constant_ubo is
190        * updated. It already take into account it is should do the
191        * update or not
192        */
193       check_push_constants_ubo(cmd_buffer);
194 
195       struct v3dv_resource *resource =
196          &cmd_buffer->push_constants_resource;
197       assert(resource->bo);
198 
199       cl_aligned_reloc(&job->indirect, uniforms,
200                        resource->bo,
201                        resource->offset + offset + dynamic_offset);
202 
203    } else {
204       uint32_t index =
205          content == QUNIFORM_UBO_ADDR ?
206          v3d_unit_data_get_unit(data) - 1 :
207          data;
208 
209       struct v3dv_descriptor *descriptor =
210          v3dv_descriptor_map_get_descriptor(descriptor_state, map,
211                                             pipeline->layout,
212                                             index, &dynamic_offset);
213       assert(descriptor);
214       assert(descriptor->buffer);
215       assert(descriptor->buffer->mem);
216       assert(descriptor->buffer->mem->bo);
217 
218       if (content == QUNIFORM_GET_SSBO_SIZE ||
219           content == QUNIFORM_GET_UBO_SIZE) {
220          cl_aligned_u32(uniforms, descriptor->range);
221       } else {
222          cl_aligned_reloc(&job->indirect, uniforms,
223                           descriptor->buffer->mem->bo,
224                           descriptor->buffer->mem_offset +
225                           descriptor->offset + offset + dynamic_offset);
226       }
227    }
228 }
229 
230 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)231 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
232                                  enum quniform_contents contents,
233                                  uint32_t data)
234 {
235    switch(contents) {
236    case QUNIFORM_IMAGE_WIDTH:
237    case QUNIFORM_TEXTURE_WIDTH:
238       /* We don't u_minify the values, as we are using the image_view
239        * extents
240        */
241       return image_view->extent.width;
242    case QUNIFORM_IMAGE_HEIGHT:
243    case QUNIFORM_TEXTURE_HEIGHT:
244       return image_view->extent.height;
245    case QUNIFORM_IMAGE_DEPTH:
246    case QUNIFORM_TEXTURE_DEPTH:
247       return image_view->extent.depth;
248    case QUNIFORM_IMAGE_ARRAY_SIZE:
249    case QUNIFORM_TEXTURE_ARRAY_SIZE:
250       if (image_view->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
251          return image_view->last_layer - image_view->first_layer + 1;
252       } else {
253          assert((image_view->last_layer - image_view->first_layer + 1) % 6 == 0);
254          return (image_view->last_layer - image_view->first_layer + 1) / 6;
255       }
256    case QUNIFORM_TEXTURE_LEVELS:
257       return image_view->max_level - image_view->base_level + 1;
258    case QUNIFORM_TEXTURE_SAMPLES:
259       assert(image_view->image);
260       return image_view->image->samples;
261    default:
262       unreachable("Bad texture size field");
263    }
264 }
265 
266 
267 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)268 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
269                                   enum quniform_contents contents,
270                                   uint32_t data)
271 {
272    switch(contents) {
273    case QUNIFORM_IMAGE_WIDTH:
274    case QUNIFORM_TEXTURE_WIDTH:
275       return buffer_view->num_elements;
276    /* Only size can be queried for texel buffers  */
277    default:
278       unreachable("Bad texture size field for texel buffers");
279    }
280 }
281 
282 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum quniform_contents contents,uint32_t data)283 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
284                  struct v3dv_pipeline *pipeline,
285                  enum quniform_contents contents,
286                  uint32_t data)
287 {
288    int unit = v3d_unit_data_get_unit(data);
289    uint32_t texture_idx;
290    struct v3dv_descriptor_state *descriptor_state =
291       &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
292 
293    v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
294                                            &texture_idx,
295                                            NULL);
296 
297    struct v3dv_descriptor *descriptor =
298       v3dv_descriptor_map_get_descriptor(descriptor_state,
299                                          &pipeline->texture_map,
300                                          pipeline->layout,
301                                          texture_idx, NULL);
302 
303    assert(descriptor);
304 
305    switch (descriptor->type) {
306    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
307    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
308    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
309    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
310       return get_texture_size_from_image_view(descriptor->image_view,
311                                               contents, data);
312    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
313    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
314       return get_texture_size_from_buffer_view(descriptor->buffer_view,
315                                                contents, data);
316    default:
317       unreachable("Wrong descriptor for getting texture size");
318    }
319 }
320 
321 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline_stage * p_stage,uint32_t ** wg_count_offsets)322 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
323                                struct v3dv_pipeline_stage *p_stage,
324                                uint32_t **wg_count_offsets)
325 {
326    struct v3d_uniform_list *uinfo =
327       &p_stage->current_variant->prog_data.base->uniforms;
328    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
329    struct v3dv_pipeline *pipeline = p_stage->pipeline;
330 
331    struct v3dv_job *job = cmd_buffer->state.job;
332    assert(job);
333 
334    /* The hardware always pre-fetches the next uniform (also when there
335     * aren't any), so we always allocate space for an extra slot. This
336     * fixes MMU exceptions reported since Linux kernel 5.4 when the
337     * uniforms fill up the tail bytes of a page in the indirect
338     * BO. In that scenario, when the hardware pre-fetches after reading
339     * the last uniform it will read beyond the end of the page and trigger
340     * the MMU exception.
341     */
342    v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
343 
344    struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
345 
346    struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
347 
348    for (int i = 0; i < uinfo->count; i++) {
349       uint32_t data = uinfo->data[i];
350 
351       switch (uinfo->contents[i]) {
352       case QUNIFORM_CONSTANT:
353          cl_aligned_u32(&uniforms, data);
354          break;
355 
356       case QUNIFORM_UNIFORM:
357          assert(pipeline->use_push_constants);
358          cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
359          break;
360 
361       case QUNIFORM_VIEWPORT_X_SCALE:
362          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
363          break;
364 
365       case QUNIFORM_VIEWPORT_Y_SCALE:
366          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
367          break;
368 
369       case QUNIFORM_VIEWPORT_Z_OFFSET:
370          cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
371          break;
372 
373       case QUNIFORM_VIEWPORT_Z_SCALE:
374          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
375          break;
376 
377       case QUNIFORM_SSBO_OFFSET:
378       case QUNIFORM_UBO_ADDR:
379       case QUNIFORM_GET_SSBO_SIZE:
380       case QUNIFORM_GET_UBO_SIZE:
381          write_ubo_ssbo_uniforms(cmd_buffer, pipeline, &uniforms,
382                                  uinfo->contents[i], data);
383         break;
384 
385       case QUNIFORM_IMAGE_TMU_CONFIG_P0:
386       case QUNIFORM_TMU_CONFIG_P0:
387          write_tmu_p0(cmd_buffer, pipeline, &uniforms, data);
388          break;
389 
390       case QUNIFORM_TMU_CONFIG_P1:
391          write_tmu_p1(cmd_buffer, pipeline, &uniforms, data);
392          break;
393 
394       case QUNIFORM_IMAGE_WIDTH:
395       case QUNIFORM_IMAGE_HEIGHT:
396       case QUNIFORM_IMAGE_DEPTH:
397       case QUNIFORM_IMAGE_ARRAY_SIZE:
398       case QUNIFORM_TEXTURE_WIDTH:
399       case QUNIFORM_TEXTURE_HEIGHT:
400       case QUNIFORM_TEXTURE_DEPTH:
401       case QUNIFORM_TEXTURE_ARRAY_SIZE:
402       case QUNIFORM_TEXTURE_LEVELS:
403       case QUNIFORM_TEXTURE_SAMPLES:
404          cl_aligned_u32(&uniforms,
405                         get_texture_size(cmd_buffer,
406                                          pipeline,
407                                          uinfo->contents[i],
408                                          data));
409          break;
410 
411       case QUNIFORM_NUM_WORK_GROUPS:
412          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
413          assert(job->csd.wg_count[data] > 0);
414          if (wg_count_offsets)
415             wg_count_offsets[data] = (uint32_t *) uniforms;
416          cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
417          break;
418 
419       case QUNIFORM_SHARED_OFFSET:
420          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
421          assert(job->csd.shared_memory);
422          cl_aligned_reloc(&job->indirect, &uniforms, job->csd.shared_memory, 0);
423          break;
424 
425       case QUNIFORM_SPILL_OFFSET:
426          assert(pipeline->spill.bo);
427          cl_aligned_reloc(&job->indirect, &uniforms, pipeline->spill.bo, 0);
428          break;
429 
430       case QUNIFORM_SPILL_SIZE_PER_THREAD:
431          assert(pipeline->spill.size_per_thread > 0);
432          cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
433          break;
434 
435       default:
436          unreachable("unsupported quniform_contents uniform type\n");
437       }
438    }
439 
440    cl_end(&job->indirect, uniforms);
441 
442    return uniform_stream;
443 }
444 
445 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline_stage * p_stage)446 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
447                     struct v3dv_pipeline_stage *p_stage)
448 {
449    return v3dv_write_uniforms_wg_offsets(cmd_buffer, p_stage, NULL);
450 }
451