1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Based in part on v3d driver which is:
5 *
6 * Copyright © 2014-2017 Broadcom
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "v3dv_private.h"
29 #include "vk_format_info.h"
30
31 /*
32 * This method checks if the ubo used for push constants is needed to be
33 * updated or not.
34 *
35 * push contants ubo is only used for push constants accessed by a non-const
36 * index.
37 *
38 * FIXME: right now for this cases we are uploading the full
39 * push_constants_data. An improvement would be to upload only the data that
40 * we need to rely on a UBO.
41 */
42 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer)43 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer)
44 {
45 if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS) ||
46 cmd_buffer->state.pipeline->layout->push_constant_size == 0)
47 return;
48
49 if (cmd_buffer->push_constants_resource.bo == NULL) {
50 cmd_buffer->push_constants_resource.bo =
51 v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
52 "push constants", true);
53
54 if (!cmd_buffer->push_constants_resource.bo) {
55 fprintf(stderr, "Failed to allocate memory for push constants\n");
56 abort();
57 }
58
59 bool ok = v3dv_bo_map(cmd_buffer->device,
60 cmd_buffer->push_constants_resource.bo,
61 MAX_PUSH_CONSTANTS_SIZE);
62 if (!ok) {
63 fprintf(stderr, "failed to map push constants buffer\n");
64 abort();
65 }
66 } else {
67 if (cmd_buffer->push_constants_resource.offset + MAX_PUSH_CONSTANTS_SIZE <=
68 cmd_buffer->push_constants_resource.bo->size) {
69 cmd_buffer->push_constants_resource.offset += MAX_PUSH_CONSTANTS_SIZE;
70 } else {
71 /* FIXME: we got out of space for push descriptors. Should we create
72 * a new bo? This could be easier with a uploader
73 */
74 }
75 }
76
77 memcpy(cmd_buffer->push_constants_resource.bo->map +
78 cmd_buffer->push_constants_resource.offset,
79 cmd_buffer->push_constants_data,
80 MAX_PUSH_CONSTANTS_SIZE);
81
82 cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS;
83 }
84
85 /** V3D 4.x TMU configuration parameter 0 (texture) */
86 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,uint32_t data)87 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
88 struct v3dv_pipeline *pipeline,
89 struct v3dv_cl_out **uniforms,
90 uint32_t data)
91 {
92 int unit = v3d_unit_data_get_unit(data);
93 uint32_t texture_idx;
94 struct v3dv_job *job = cmd_buffer->state.job;
95 struct v3dv_descriptor_state *descriptor_state =
96 &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
97
98 v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
99 &texture_idx,
100 NULL);
101
102 /* We need to ensure that the texture bo is added to the job */
103 struct v3dv_bo *texture_bo =
104 v3dv_descriptor_map_get_texture_bo(descriptor_state, &pipeline->texture_map,
105 pipeline->layout, texture_idx);
106 assert(texture_bo);
107 v3dv_job_add_bo(job, texture_bo);
108
109 struct v3dv_cl_reloc state_reloc =
110 v3dv_descriptor_map_get_texture_shader_state(descriptor_state,
111 &pipeline->texture_map,
112 pipeline->layout,
113 texture_idx);
114
115 cl_aligned_reloc(&job->indirect, uniforms,
116 state_reloc.bo,
117 state_reloc.offset +
118 v3d_unit_data_get_offset(data));
119 }
120
121 /** V3D 4.x TMU configuration parameter 1 (sampler) */
122 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,uint32_t data)123 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
124 struct v3dv_pipeline *pipeline,
125 struct v3dv_cl_out **uniforms,
126 uint32_t data)
127 {
128 uint32_t unit = v3d_unit_data_get_unit(data);
129 uint32_t sampler_idx;
130 struct v3dv_job *job = cmd_buffer->state.job;
131 struct v3dv_descriptor_state *descriptor_state =
132 &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
133
134 v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
135 NULL, &sampler_idx);
136 assert(sampler_idx != V3DV_NO_SAMPLER_IDX);
137
138 struct v3dv_cl_reloc sampler_state_reloc =
139 v3dv_descriptor_map_get_sampler_state(descriptor_state, &pipeline->sampler_map,
140 pipeline->layout, sampler_idx);
141
142 const struct v3dv_sampler *sampler =
143 v3dv_descriptor_map_get_sampler(descriptor_state, &pipeline->sampler_map,
144 pipeline->layout, sampler_idx);
145 assert(sampler);
146
147 /* Set unnormalized coordinates flag from sampler object */
148 uint32_t p1_packed = v3d_unit_data_get_offset(data);
149 if (sampler->unnormalized_coordinates) {
150 struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
151 V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
152 p1_unpacked.unnormalized_coordinates = true;
153 V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
154 &p1_unpacked);
155 }
156
157 cl_aligned_reloc(&job->indirect, uniforms,
158 sampler_state_reloc.bo,
159 sampler_state_reloc.offset +
160 p1_packed);
161 }
162
163 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data)164 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
165 struct v3dv_pipeline *pipeline,
166 struct v3dv_cl_out **uniforms,
167 enum quniform_contents content,
168 uint32_t data)
169 {
170 struct v3dv_job *job = cmd_buffer->state.job;
171 struct v3dv_descriptor_state *descriptor_state =
172 &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
173
174 struct v3dv_descriptor_map *map =
175 content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
176 &pipeline->ubo_map : &pipeline->ssbo_map;
177
178 uint32_t offset =
179 content == QUNIFORM_UBO_ADDR ?
180 v3d_unit_data_get_offset(data) :
181 0;
182
183 uint32_t dynamic_offset = 0;
184
185 /* For ubos, index is shifted, as 0 is reserved for push constants.
186 */
187 if (content == QUNIFORM_UBO_ADDR &&
188 v3d_unit_data_get_unit(data) == 0) {
189 /* This calls is to ensure that the push_constant_ubo is
190 * updated. It already take into account it is should do the
191 * update or not
192 */
193 check_push_constants_ubo(cmd_buffer);
194
195 struct v3dv_resource *resource =
196 &cmd_buffer->push_constants_resource;
197 assert(resource->bo);
198
199 cl_aligned_reloc(&job->indirect, uniforms,
200 resource->bo,
201 resource->offset + offset + dynamic_offset);
202
203 } else {
204 uint32_t index =
205 content == QUNIFORM_UBO_ADDR ?
206 v3d_unit_data_get_unit(data) - 1 :
207 data;
208
209 struct v3dv_descriptor *descriptor =
210 v3dv_descriptor_map_get_descriptor(descriptor_state, map,
211 pipeline->layout,
212 index, &dynamic_offset);
213 assert(descriptor);
214 assert(descriptor->buffer);
215 assert(descriptor->buffer->mem);
216 assert(descriptor->buffer->mem->bo);
217
218 if (content == QUNIFORM_GET_SSBO_SIZE ||
219 content == QUNIFORM_GET_UBO_SIZE) {
220 cl_aligned_u32(uniforms, descriptor->range);
221 } else {
222 cl_aligned_reloc(&job->indirect, uniforms,
223 descriptor->buffer->mem->bo,
224 descriptor->buffer->mem_offset +
225 descriptor->offset + offset + dynamic_offset);
226 }
227 }
228 }
229
230 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)231 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
232 enum quniform_contents contents,
233 uint32_t data)
234 {
235 switch(contents) {
236 case QUNIFORM_IMAGE_WIDTH:
237 case QUNIFORM_TEXTURE_WIDTH:
238 /* We don't u_minify the values, as we are using the image_view
239 * extents
240 */
241 return image_view->extent.width;
242 case QUNIFORM_IMAGE_HEIGHT:
243 case QUNIFORM_TEXTURE_HEIGHT:
244 return image_view->extent.height;
245 case QUNIFORM_IMAGE_DEPTH:
246 case QUNIFORM_TEXTURE_DEPTH:
247 return image_view->extent.depth;
248 case QUNIFORM_IMAGE_ARRAY_SIZE:
249 case QUNIFORM_TEXTURE_ARRAY_SIZE:
250 if (image_view->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
251 return image_view->last_layer - image_view->first_layer + 1;
252 } else {
253 assert((image_view->last_layer - image_view->first_layer + 1) % 6 == 0);
254 return (image_view->last_layer - image_view->first_layer + 1) / 6;
255 }
256 case QUNIFORM_TEXTURE_LEVELS:
257 return image_view->max_level - image_view->base_level + 1;
258 case QUNIFORM_TEXTURE_SAMPLES:
259 assert(image_view->image);
260 return image_view->image->samples;
261 default:
262 unreachable("Bad texture size field");
263 }
264 }
265
266
267 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)268 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
269 enum quniform_contents contents,
270 uint32_t data)
271 {
272 switch(contents) {
273 case QUNIFORM_IMAGE_WIDTH:
274 case QUNIFORM_TEXTURE_WIDTH:
275 return buffer_view->num_elements;
276 /* Only size can be queried for texel buffers */
277 default:
278 unreachable("Bad texture size field for texel buffers");
279 }
280 }
281
282 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum quniform_contents contents,uint32_t data)283 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
284 struct v3dv_pipeline *pipeline,
285 enum quniform_contents contents,
286 uint32_t data)
287 {
288 int unit = v3d_unit_data_get_unit(data);
289 uint32_t texture_idx;
290 struct v3dv_descriptor_state *descriptor_state =
291 &cmd_buffer->state.descriptor_state[v3dv_pipeline_get_binding_point(pipeline)];
292
293 v3dv_pipeline_combined_index_key_unpack(pipeline->combined_index_to_key_map[unit],
294 &texture_idx,
295 NULL);
296
297 struct v3dv_descriptor *descriptor =
298 v3dv_descriptor_map_get_descriptor(descriptor_state,
299 &pipeline->texture_map,
300 pipeline->layout,
301 texture_idx, NULL);
302
303 assert(descriptor);
304
305 switch (descriptor->type) {
306 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
307 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
308 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
309 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
310 return get_texture_size_from_image_view(descriptor->image_view,
311 contents, data);
312 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
313 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
314 return get_texture_size_from_buffer_view(descriptor->buffer_view,
315 contents, data);
316 default:
317 unreachable("Wrong descriptor for getting texture size");
318 }
319 }
320
321 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline_stage * p_stage,uint32_t ** wg_count_offsets)322 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
323 struct v3dv_pipeline_stage *p_stage,
324 uint32_t **wg_count_offsets)
325 {
326 struct v3d_uniform_list *uinfo =
327 &p_stage->current_variant->prog_data.base->uniforms;
328 struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
329 struct v3dv_pipeline *pipeline = p_stage->pipeline;
330
331 struct v3dv_job *job = cmd_buffer->state.job;
332 assert(job);
333
334 /* The hardware always pre-fetches the next uniform (also when there
335 * aren't any), so we always allocate space for an extra slot. This
336 * fixes MMU exceptions reported since Linux kernel 5.4 when the
337 * uniforms fill up the tail bytes of a page in the indirect
338 * BO. In that scenario, when the hardware pre-fetches after reading
339 * the last uniform it will read beyond the end of the page and trigger
340 * the MMU exception.
341 */
342 v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
343
344 struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
345
346 struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
347
348 for (int i = 0; i < uinfo->count; i++) {
349 uint32_t data = uinfo->data[i];
350
351 switch (uinfo->contents[i]) {
352 case QUNIFORM_CONSTANT:
353 cl_aligned_u32(&uniforms, data);
354 break;
355
356 case QUNIFORM_UNIFORM:
357 assert(pipeline->use_push_constants);
358 cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
359 break;
360
361 case QUNIFORM_VIEWPORT_X_SCALE:
362 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
363 break;
364
365 case QUNIFORM_VIEWPORT_Y_SCALE:
366 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
367 break;
368
369 case QUNIFORM_VIEWPORT_Z_OFFSET:
370 cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
371 break;
372
373 case QUNIFORM_VIEWPORT_Z_SCALE:
374 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
375 break;
376
377 case QUNIFORM_SSBO_OFFSET:
378 case QUNIFORM_UBO_ADDR:
379 case QUNIFORM_GET_SSBO_SIZE:
380 case QUNIFORM_GET_UBO_SIZE:
381 write_ubo_ssbo_uniforms(cmd_buffer, pipeline, &uniforms,
382 uinfo->contents[i], data);
383 break;
384
385 case QUNIFORM_IMAGE_TMU_CONFIG_P0:
386 case QUNIFORM_TMU_CONFIG_P0:
387 write_tmu_p0(cmd_buffer, pipeline, &uniforms, data);
388 break;
389
390 case QUNIFORM_TMU_CONFIG_P1:
391 write_tmu_p1(cmd_buffer, pipeline, &uniforms, data);
392 break;
393
394 case QUNIFORM_IMAGE_WIDTH:
395 case QUNIFORM_IMAGE_HEIGHT:
396 case QUNIFORM_IMAGE_DEPTH:
397 case QUNIFORM_IMAGE_ARRAY_SIZE:
398 case QUNIFORM_TEXTURE_WIDTH:
399 case QUNIFORM_TEXTURE_HEIGHT:
400 case QUNIFORM_TEXTURE_DEPTH:
401 case QUNIFORM_TEXTURE_ARRAY_SIZE:
402 case QUNIFORM_TEXTURE_LEVELS:
403 case QUNIFORM_TEXTURE_SAMPLES:
404 cl_aligned_u32(&uniforms,
405 get_texture_size(cmd_buffer,
406 pipeline,
407 uinfo->contents[i],
408 data));
409 break;
410
411 case QUNIFORM_NUM_WORK_GROUPS:
412 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
413 assert(job->csd.wg_count[data] > 0);
414 if (wg_count_offsets)
415 wg_count_offsets[data] = (uint32_t *) uniforms;
416 cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
417 break;
418
419 case QUNIFORM_SHARED_OFFSET:
420 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
421 assert(job->csd.shared_memory);
422 cl_aligned_reloc(&job->indirect, &uniforms, job->csd.shared_memory, 0);
423 break;
424
425 case QUNIFORM_SPILL_OFFSET:
426 assert(pipeline->spill.bo);
427 cl_aligned_reloc(&job->indirect, &uniforms, pipeline->spill.bo, 0);
428 break;
429
430 case QUNIFORM_SPILL_SIZE_PER_THREAD:
431 assert(pipeline->spill.size_per_thread > 0);
432 cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
433 break;
434
435 default:
436 unreachable("unsupported quniform_contents uniform type\n");
437 }
438 }
439
440 cl_end(&job->indirect, uniforms);
441
442 return uniform_stream;
443 }
444
445 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline_stage * p_stage)446 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
447 struct v3dv_pipeline_stage *p_stage)
448 {
449 return v3dv_write_uniforms_wg_offsets(cmd_buffer, p_stage, NULL);
450 }
451