1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29
30 #include "hwdef/rogue_hw_utils.h"
31 #include "pvr_bo.h"
32 #include "pvr_cdm_load_sr.h"
33 #include "pvr_csb.h"
34 #include "pvr_job_context.h"
35 #include "pvr_pds.h"
36 #include "pvr_private.h"
37 #include "pvr_transfer_eot.h"
38 #include "pvr_types.h"
39 #include "pvr_vdm_load_sr.h"
40 #include "pvr_vdm_store_sr.h"
41 #include "pvr_winsys.h"
42 #include "util/macros.h"
43 #include "vk_alloc.h"
44 #include "vk_log.h"
45
46 /* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
47 * value when constructing the control stream?
48 */
49 /* The VDM callstack is used by the hardware to implement control stream links
50 * with a return, i.e. sub-control streams/subroutines. This value specifies the
51 * maximum callstack depth.
52 */
53 #define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
54
55 #define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
56
pvr_ctx_reset_cmd_init(struct pvr_device * device,struct pvr_reset_cmd * const reset_cmd)57 static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
58 struct pvr_reset_cmd *const reset_cmd)
59 {
60 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
61
62 /* The reset framework depends on compute support in the hw. */
63 assert(PVR_HAS_FEATURE(dev_info, compute));
64
65 if (PVR_HAS_QUIRK(dev_info, 51764))
66 pvr_finishme("Missing reset support for brn51764");
67
68 if (PVR_HAS_QUIRK(dev_info, 58839))
69 pvr_finishme("Missing reset support for brn58839");
70
71 return VK_SUCCESS;
72 }
73
pvr_ctx_reset_cmd_fini(struct pvr_device * device,struct pvr_reset_cmd * reset_cmd)74 static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
75 struct pvr_reset_cmd *reset_cmd)
76
77 {
78 /* TODO: reset command cleanup. */
79 }
80
pvr_pds_pt_store_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)81 static VkResult pvr_pds_pt_store_program_create_and_upload(
82 struct pvr_device *device,
83 struct pvr_bo *pt_bo,
84 uint32_t pt_bo_size,
85 struct pvr_pds_upload *const pds_upload_out)
86 {
87 struct pvr_pds_stream_out_terminate_program program = { 0 };
88 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
89 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
90 size_t staging_buffer_size;
91 uint32_t *staging_buffer;
92 uint32_t *data_buffer;
93 uint32_t *code_buffer;
94 VkResult result;
95
96 /* Check the bo size can be converted to dwords without any rounding. */
97 assert(pt_bo_size % 4 == 0);
98
99 program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
100 program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
101
102 pvr_pds_generate_stream_out_terminate_program(&program,
103 NULL,
104 PDS_GENERATE_SIZES,
105 dev_info);
106
107 staging_buffer_size = (program.stream_out_terminate_pds_data_size +
108 program.stream_out_terminate_pds_code_size) *
109 sizeof(*staging_buffer);
110
111 staging_buffer = vk_zalloc(&device->vk.alloc,
112 staging_buffer_size,
113 8,
114 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
115 if (!staging_buffer)
116 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117
118 data_buffer = staging_buffer;
119 code_buffer =
120 pvr_pds_generate_stream_out_terminate_program(&program,
121 data_buffer,
122 PDS_GENERATE_DATA_SEGMENT,
123 dev_info);
124 pvr_pds_generate_stream_out_terminate_program(&program,
125 code_buffer,
126 PDS_GENERATE_CODE_SEGMENT,
127 dev_info);
128
129 /* This PDS program is passed to the HW via the PPP state words. These only
130 * allow the data segment address to be specified and expect the code
131 * segment to immediately follow. Assume the code alignment is the same as
132 * the data.
133 */
134 result =
135 pvr_gpu_upload_pds(device,
136 data_buffer,
137 program.stream_out_terminate_pds_data_size,
138 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
139 code_buffer,
140 program.stream_out_terminate_pds_code_size,
141 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
142 cache_line_size,
143 pds_upload_out);
144
145 vk_free(&device->vk.alloc, staging_buffer);
146
147 return result;
148 }
149
pvr_pds_pt_resume_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)150 static VkResult pvr_pds_pt_resume_program_create_and_upload(
151 struct pvr_device *device,
152 struct pvr_bo *pt_bo,
153 uint32_t pt_bo_size,
154 struct pvr_pds_upload *const pds_upload_out)
155 {
156 struct pvr_pds_stream_out_init_program program = { 0 };
157 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
158 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
159 size_t staging_buffer_size;
160 uint32_t *staging_buffer;
161 uint32_t *data_buffer;
162 uint32_t *code_buffer;
163 VkResult result;
164
165 /* Check the bo size can be converted to dwords without any rounding. */
166 assert(pt_bo_size % 4 == 0);
167
168 program.num_buffers = 1;
169 program.pds_buffer_data_size[0] = pt_bo_size / 4;
170 program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
171
172 pvr_pds_generate_stream_out_init_program(&program,
173 NULL,
174 false,
175 PDS_GENERATE_SIZES,
176 dev_info);
177
178 staging_buffer_size = (program.stream_out_init_pds_data_size +
179 program.stream_out_init_pds_code_size) *
180 sizeof(*staging_buffer);
181
182 staging_buffer = vk_zalloc(&device->vk.alloc,
183 staging_buffer_size,
184 8,
185 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
186 if (!staging_buffer)
187 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
188
189 data_buffer = staging_buffer;
190 code_buffer =
191 pvr_pds_generate_stream_out_init_program(&program,
192 data_buffer,
193 false,
194 PDS_GENERATE_DATA_SEGMENT,
195 dev_info);
196 pvr_pds_generate_stream_out_init_program(&program,
197 code_buffer,
198 false,
199 PDS_GENERATE_CODE_SEGMENT,
200 dev_info);
201
202 /* This PDS program is passed to the HW via the PPP state words. These only
203 * allow the data segment address to be specified and expect the code
204 * segment to immediately follow. Assume the code alignment is the same as
205 * the data.
206 */
207 result =
208 pvr_gpu_upload_pds(device,
209 data_buffer,
210 program.stream_out_init_pds_data_size,
211 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
212 code_buffer,
213 program.stream_out_init_pds_code_size,
214 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
215 cache_line_size,
216 pds_upload_out);
217
218 vk_free(&device->vk.alloc, staging_buffer);
219
220 return result;
221 }
222
223 static VkResult
pvr_render_job_pt_programs_setup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)224 pvr_render_job_pt_programs_setup(struct pvr_device *device,
225 struct rogue_pt_programs *pt_programs)
226 {
227 VkResult result;
228
229 result = pvr_bo_alloc(device,
230 device->heaps.pds_heap,
231 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
232 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
233 PVR_BO_ALLOC_FLAG_CPU_ACCESS,
234 &pt_programs->store_resume_state_bo);
235 if (result != VK_SUCCESS)
236 return result;
237
238 result = pvr_pds_pt_store_program_create_and_upload(
239 device,
240 pt_programs->store_resume_state_bo,
241 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
242 &pt_programs->pds_store_program);
243 if (result != VK_SUCCESS)
244 goto err_free_store_resume_state_bo;
245
246 result = pvr_pds_pt_resume_program_create_and_upload(
247 device,
248 pt_programs->store_resume_state_bo,
249 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
250 &pt_programs->pds_resume_program);
251 if (result != VK_SUCCESS)
252 goto err_free_pds_store_program;
253
254 return VK_SUCCESS;
255
256 err_free_pds_store_program:
257 pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
258
259 err_free_store_resume_state_bo:
260 pvr_bo_free(device, pt_programs->store_resume_state_bo);
261
262 return result;
263 }
264
265 static void
pvr_render_job_pt_programs_cleanup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)266 pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
267 struct rogue_pt_programs *pt_programs)
268 {
269 pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo);
270 pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
271 pvr_bo_free(device, pt_programs->store_resume_state_bo);
272 }
273
pvr_pds_ctx_sr_program_setup(bool cc_enable,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_shared_storing_program * const program_out)274 static void pvr_pds_ctx_sr_program_setup(
275 bool cc_enable,
276 uint64_t usc_program_upload_offset,
277 uint8_t usc_temps,
278 pvr_dev_addr_t sr_addr,
279 struct pvr_pds_shared_storing_program *const program_out)
280 {
281 /* The PDS task is the same for stores and loads. */
282 *program_out = (struct pvr_pds_shared_storing_program){
283 .cc_enable = cc_enable,
284 .doutw_control = {
285 .dest_store = PDS_UNIFIED_STORE,
286 .num_const64 = 2,
287 .doutw_data = {
288 [0] = sr_addr.addr,
289 [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
290 },
291 .last_instruction = false,
292 },
293 };
294
295 pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
296 usc_program_upload_offset,
297 usc_temps,
298 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
299 false);
300 }
301
302 /* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
303 * this. If there is a problem here it's likely that the same problem exists
304 * there so don't forget to update the compute function.
305 */
pvr_pds_render_ctx_sr_program_create_and_upload(struct pvr_device * device,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)306 static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
307 struct pvr_device *device,
308 uint64_t usc_program_upload_offset,
309 uint8_t usc_temps,
310 pvr_dev_addr_t sr_addr,
311 struct pvr_pds_upload *const pds_upload_out)
312 {
313 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
314 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
315 const uint32_t pds_data_alignment =
316 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
317
318 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
319 * and code size when using the PDS_GENERATE_SIZES mode.
320 */
321 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
322 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
323 struct pvr_pds_shared_storing_program program;
324 ASSERTED uint32_t *buffer_end;
325 uint32_t code_offset;
326
327 pvr_pds_ctx_sr_program_setup(false,
328 usc_program_upload_offset,
329 usc_temps,
330 sr_addr,
331 &program);
332
333 pvr_pds_generate_shared_storing_program(&program,
334 &staging_buffer[0],
335 PDS_GENERATE_DATA_SEGMENT,
336 dev_info);
337
338 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
339
340 buffer_end =
341 pvr_pds_generate_shared_storing_program(&program,
342 &staging_buffer[code_offset],
343 PDS_GENERATE_CODE_SEGMENT,
344 dev_info);
345
346 assert((uint32_t)(buffer_end - staging_buffer) * 4 <
347 ROGUE_PDS_TASK_PROGRAM_SIZE);
348
349 return pvr_gpu_upload_pds(device,
350 &staging_buffer[0],
351 program.data_size,
352 PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
353 &staging_buffer[code_offset],
354 program.code_size,
355 PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
356 cache_line_size,
357 pds_upload_out);
358 }
359
360 /* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
361 * this. If there is a problem here it's likely that the same problem exists
362 * there so don't forget to update the render_ctx function.
363 */
pvr_pds_compute_ctx_sr_program_create_and_upload(struct pvr_device * device,bool is_loading_program,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)364 static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
365 struct pvr_device *device,
366 bool is_loading_program,
367 uint64_t usc_program_upload_offset,
368 uint8_t usc_temps,
369 pvr_dev_addr_t sr_addr,
370 struct pvr_pds_upload *const pds_upload_out)
371 {
372 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
373 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
374 const uint32_t pds_data_alignment =
375 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
376
377 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
378 * and code size when using the PDS_GENERATE_SIZES mode.
379 */
380 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
381 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
382 struct pvr_pds_shared_storing_program program;
383 uint32_t *buffer_ptr;
384 uint32_t code_offset;
385
386 pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
387 usc_program_upload_offset,
388 usc_temps,
389 sr_addr,
390 &program);
391
392 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
393 pvr_pds_generate_compute_shared_loading_program(&program,
394 &staging_buffer[0],
395 PDS_GENERATE_DATA_SEGMENT,
396 dev_info);
397 } else {
398 pvr_pds_generate_shared_storing_program(&program,
399 &staging_buffer[0],
400 PDS_GENERATE_DATA_SEGMENT,
401 dev_info);
402 }
403
404 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
405
406 buffer_ptr =
407 pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
408 PDS_GENERATE_CODE_SEGMENT);
409
410 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
411 buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
412 &program,
413 buffer_ptr,
414 PDS_GENERATE_CODE_SEGMENT,
415 dev_info);
416 } else {
417 buffer_ptr =
418 pvr_pds_generate_shared_storing_program(&program,
419 buffer_ptr,
420 PDS_GENERATE_CODE_SEGMENT,
421 dev_info);
422 }
423
424 assert((uint32_t)(buffer_ptr - staging_buffer) * 4 <
425 ROGUE_PDS_TASK_PROGRAM_SIZE);
426
427 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
428 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
429
430 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
431 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
432
433 return pvr_gpu_upload_pds(
434 device,
435 &staging_buffer[0],
436 program.data_size,
437 PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
438 &staging_buffer[code_offset],
439 (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
440 PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
441 cache_line_size,
442 pds_upload_out);
443 }
444
445 enum pvr_ctx_sr_program_target {
446 PVR_CTX_SR_RENDER_TARGET,
447 PVR_CTX_SR_COMPUTE_TARGET,
448 };
449
pvr_ctx_sr_programs_setup(struct pvr_device * device,enum pvr_ctx_sr_program_target target,struct rogue_sr_programs * sr_programs)450 static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
451 enum pvr_ctx_sr_program_target target,
452 struct rogue_sr_programs *sr_programs)
453 {
454 const uint64_t store_load_state_bo_size =
455 PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
456 ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
457 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
458 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
459 uint64_t usc_store_program_upload_offset;
460 uint64_t usc_load_program_upload_offset;
461 const uint8_t *usc_load_sr_code;
462 uint32_t usc_load_sr_code_size;
463 VkResult result;
464
465 /* Note that this is being used for both compute and render ctx. There is no
466 * compute equivalent define for the VDMCTRL unit size.
467 */
468 /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
469 sr_programs->usc.unified_size =
470 DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
471
472 result = pvr_bo_alloc(device,
473 device->heaps.pds_heap,
474 store_load_state_bo_size,
475 cache_line_size,
476 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
477 &sr_programs->store_load_state_bo);
478 if (result != VK_SUCCESS)
479 return result;
480
481 /* USC state update: SR state store. */
482
483 assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
484
485 result = pvr_gpu_upload_usc(device,
486 pvr_vdm_store_sr_code,
487 sizeof(pvr_vdm_store_sr_code),
488 cache_line_size,
489 &sr_programs->usc.store_program_bo);
490 if (result != VK_SUCCESS)
491 goto err_free_store_load_state_bo;
492
493 usc_store_program_upload_offset =
494 sr_programs->usc.store_program_bo->vma->dev_addr.addr -
495 device->heaps.usc_heap->base_addr.addr;
496
497 /* USC state update: SR state load. */
498
499 if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
500 STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
501
502 usc_load_sr_code = pvr_cdm_load_sr_code;
503 usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
504 } else {
505 STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
506
507 usc_load_sr_code = pvr_vdm_load_sr_code;
508 usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
509 }
510
511 result = pvr_gpu_upload_usc(device,
512 usc_load_sr_code,
513 usc_load_sr_code_size,
514 cache_line_size,
515 &sr_programs->usc.load_program_bo);
516 if (result != VK_SUCCESS)
517 goto err_free_usc_store_program_bo;
518
519 usc_load_program_upload_offset =
520 sr_programs->usc.load_program_bo->vma->dev_addr.addr -
521 device->heaps.usc_heap->base_addr.addr;
522
523 /* FIXME: The number of USC temps should be output alongside
524 * pvr_vdm_store_sr_code rather than hard coded.
525 */
526 /* Create and upload the PDS load and store programs. Point them to the
527 * appropriate USC load and store programs.
528 */
529 switch (target) {
530 case PVR_CTX_SR_RENDER_TARGET:
531 /* PDS state update: SR state store. */
532 result = pvr_pds_render_ctx_sr_program_create_and_upload(
533 device,
534 usc_store_program_upload_offset,
535 8,
536 sr_programs->store_load_state_bo->vma->dev_addr,
537 &sr_programs->pds.store_program);
538 if (result != VK_SUCCESS)
539 goto err_free_usc_load_program_bo;
540
541 /* PDS state update: SR state load. */
542 result = pvr_pds_render_ctx_sr_program_create_and_upload(
543 device,
544 usc_load_program_upload_offset,
545 20,
546 sr_programs->store_load_state_bo->vma->dev_addr,
547 &sr_programs->pds.load_program);
548 if (result != VK_SUCCESS)
549 goto err_free_pds_store_program_bo;
550
551 break;
552
553 case PVR_CTX_SR_COMPUTE_TARGET:
554 /* PDS state update: SR state store. */
555 result = pvr_pds_compute_ctx_sr_program_create_and_upload(
556 device,
557 false,
558 usc_store_program_upload_offset,
559 8,
560 sr_programs->store_load_state_bo->vma->dev_addr,
561 &sr_programs->pds.store_program);
562 if (result != VK_SUCCESS)
563 goto err_free_usc_load_program_bo;
564
565 /* PDS state update: SR state load. */
566 result = pvr_pds_compute_ctx_sr_program_create_and_upload(
567 device,
568 true,
569 usc_load_program_upload_offset,
570 20,
571 sr_programs->store_load_state_bo->vma->dev_addr,
572 &sr_programs->pds.load_program);
573 if (result != VK_SUCCESS)
574 goto err_free_pds_store_program_bo;
575
576 break;
577
578 default:
579 unreachable("Invalid target.");
580 break;
581 }
582
583 return VK_SUCCESS;
584
585 err_free_pds_store_program_bo:
586 pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
587
588 err_free_usc_load_program_bo:
589 pvr_bo_free(device, sr_programs->usc.load_program_bo);
590
591 err_free_usc_store_program_bo:
592 pvr_bo_free(device, sr_programs->usc.store_program_bo);
593
594 err_free_store_load_state_bo:
595 pvr_bo_free(device, sr_programs->store_load_state_bo);
596
597 return VK_SUCCESS;
598 }
599
pvr_ctx_sr_programs_cleanup(struct pvr_device * device,struct rogue_sr_programs * sr_programs)600 static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
601 struct rogue_sr_programs *sr_programs)
602 {
603 pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo);
604 pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
605 pvr_bo_free(device, sr_programs->usc.load_program_bo);
606 pvr_bo_free(device, sr_programs->usc.store_program_bo);
607 pvr_bo_free(device, sr_programs->store_load_state_bo);
608 }
609
610 static VkResult
pvr_render_ctx_switch_programs_setup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)611 pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
612 struct pvr_render_ctx_programs *programs)
613 {
614 VkResult result;
615
616 result = pvr_render_job_pt_programs_setup(device, &programs->pt);
617 if (result != VK_SUCCESS)
618 return result;
619
620 result = pvr_ctx_sr_programs_setup(device,
621 PVR_CTX_SR_RENDER_TARGET,
622 &programs->sr);
623 if (result != VK_SUCCESS)
624 goto err_pt_programs_cleanup;
625
626 return VK_SUCCESS;
627
628 err_pt_programs_cleanup:
629 pvr_render_job_pt_programs_cleanup(device, &programs->pt);
630
631 return result;
632 }
633
634 static void
pvr_render_ctx_switch_programs_cleanup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)635 pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
636 struct pvr_render_ctx_programs *programs)
637 {
638 pvr_ctx_sr_programs_cleanup(device, &programs->sr);
639 pvr_render_job_pt_programs_cleanup(device, &programs->pt);
640 }
641
pvr_render_ctx_switch_init(struct pvr_device * device,struct pvr_render_ctx * ctx)642 static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
643 struct pvr_render_ctx *ctx)
644 {
645 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
646 const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
647 PVR_BO_ALLOC_FLAG_CPU_ACCESS;
648 const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
649 PVR_BO_ALLOC_FLAG_CPU_ACCESS;
650 VkResult result;
651
652 result = pvr_bo_alloc(device,
653 device->heaps.general_heap,
654 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
655 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
656 vdm_state_bo_flags,
657 &ctx_switch->vdm_state_bo);
658 if (result != VK_SUCCESS)
659 return result;
660
661 result = pvr_bo_alloc(device,
662 device->heaps.general_heap,
663 ROGUE_LLS_TA_STATE_BUFFER_SIZE,
664 ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
665 geom_state_bo_flags,
666 &ctx_switch->geom_state_bo);
667 if (result != VK_SUCCESS)
668 goto err_pvr_bo_free_vdm_state_bo;
669
670 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
671 result =
672 pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
673 if (result)
674 goto err_programs_cleanup;
675 }
676
677 return result;
678
679 err_programs_cleanup:
680 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
681 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
682 }
683
684 pvr_bo_free(device, ctx_switch->geom_state_bo);
685
686 err_pvr_bo_free_vdm_state_bo:
687 pvr_bo_free(device, ctx_switch->vdm_state_bo);
688
689 return result;
690 }
691
pvr_render_ctx_switch_fini(struct pvr_device * device,struct pvr_render_ctx * ctx)692 static void pvr_render_ctx_switch_fini(struct pvr_device *device,
693 struct pvr_render_ctx *ctx)
694 {
695 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
696
697 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
698 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
699 }
700
701 pvr_bo_free(device, ctx_switch->geom_state_bo);
702 pvr_bo_free(device, ctx_switch->vdm_state_bo);
703 }
704
705 static void
pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload * pds_program,enum PVRX (VDMCTRL_USC_TARGET)usc_target,uint8_t usc_unified_size,uint32_t * const state0_out,uint32_t * const state1_out)706 pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
707 enum PVRX(VDMCTRL_USC_TARGET) usc_target,
708 uint8_t usc_unified_size,
709 uint32_t *const state0_out,
710 uint32_t *const state1_out)
711 {
712 pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
713 /* Convert the data size from dwords to bytes. */
714 const uint32_t pds_data_size = pds_program->data_size * 4;
715
716 state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
717 state.usc_target = usc_target;
718 state.usc_common_size = 0;
719 state.usc_unified_size = usc_unified_size;
720 state.pds_temp_size = 0;
721
722 assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
723 0);
724 state.pds_data_size =
725 pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
726 };
727
728 pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
729 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
730 state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
731 state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
732 }
733 }
734
735 static void
pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload * pds_program,uint32_t * const stream_out1_out,uint32_t * const stream_out2_out)736 pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
737 uint32_t *const stream_out1_out,
738 uint32_t *const stream_out2_out)
739 {
740 pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
741 /* Convert the data size from dwords to bytes. */
742 const uint32_t pds_data_size = pds_program->data_size * 4;
743
744 state.sync = true;
745
746 assert(pds_data_size %
747 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
748 0);
749 state.pds_data_size =
750 pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
751
752 state.pds_temp_size = 0;
753 }
754
755 pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
756 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
757 }
758 }
759
pvr_render_ctx_ws_static_state_init(struct pvr_render_ctx * ctx,struct pvr_winsys_render_ctx_static_state * static_state)760 static void pvr_render_ctx_ws_static_state_init(
761 struct pvr_render_ctx *ctx,
762 struct pvr_winsys_render_ctx_static_state *static_state)
763 {
764 uint64_t *q_dst;
765 uint32_t *d_dst;
766
767 q_dst = &static_state->vdm_ctx_state_base_addr;
768 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) {
769 base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
770 }
771
772 q_dst = &static_state->geom_ctx_state_base_addr;
773 pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) {
774 base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
775 }
776
777 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
778 struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
779 struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
780
781 /* Context store state. */
782 q_dst = &static_state->geom_state[i].vdm_ctx_store_task0;
783 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
784 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
785 PVRX(VDMCTRL_USC_TARGET_ANY),
786 sr_prog->usc.unified_size,
787 &task0.pds_state0,
788 &task0.pds_state1);
789 }
790
791 d_dst = &static_state->geom_state[i].vdm_ctx_store_task1;
792 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
793 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
794 state.pds_code_addr =
795 PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset);
796 }
797 }
798
799 q_dst = &static_state->geom_state[i].vdm_ctx_store_task2;
800 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
801 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
802 &task2.stream_out1,
803 &task2.stream_out2);
804 }
805
806 /* Context resume state. */
807 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
808 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
809 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
810 PVRX(VDMCTRL_USC_TARGET_ALL),
811 sr_prog->usc.unified_size,
812 &task0.pds_state0,
813 &task0.pds_state1);
814 }
815
816 d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
817 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
818 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
819 state.pds_code_addr =
820 PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset);
821 }
822 }
823
824 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
825 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
826 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
827 &task2.stream_out1,
828 &task2.stream_out2);
829 }
830 }
831 }
832
pvr_render_ctx_ws_create_info_init(struct pvr_render_ctx * ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_render_ctx_create_info * create_info)833 static void pvr_render_ctx_ws_create_info_init(
834 struct pvr_render_ctx *ctx,
835 enum pvr_winsys_ctx_priority priority,
836 struct pvr_winsys_render_ctx_create_info *create_info)
837 {
838 create_info->priority = priority;
839 create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
840
841 pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
842 }
843
pvr_render_ctx_create(struct pvr_device * device,enum pvr_winsys_ctx_priority priority,struct pvr_render_ctx ** const ctx_out)844 VkResult pvr_render_ctx_create(struct pvr_device *device,
845 enum pvr_winsys_ctx_priority priority,
846 struct pvr_render_ctx **const ctx_out)
847 {
848 const uint64_t vdm_callstack_size =
849 sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
850 struct pvr_winsys_render_ctx_create_info create_info;
851 struct pvr_render_ctx *ctx;
852 VkResult result;
853
854 ctx = vk_alloc(&device->vk.alloc,
855 sizeof(*ctx),
856 8,
857 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
858 if (!ctx)
859 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
860
861 ctx->device = device;
862
863 result = pvr_bo_alloc(device,
864 device->heaps.general_heap,
865 vdm_callstack_size,
866 PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
867 0,
868 &ctx->vdm_callstack_bo);
869 if (result != VK_SUCCESS)
870 goto err_vk_free_ctx;
871
872 result = pvr_render_ctx_switch_init(device, ctx);
873 if (result != VK_SUCCESS)
874 goto err_free_vdm_callstack_bo;
875
876 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
877 if (result != VK_SUCCESS)
878 goto err_render_ctx_switch_fini;
879
880 /* ctx must be fully initialized by this point since
881 * pvr_render_ctx_ws_create_info_init() depends on this.
882 */
883 pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
884
885 result = device->ws->ops->render_ctx_create(device->ws,
886 &create_info,
887 &ctx->ws_ctx);
888 if (result != VK_SUCCESS)
889 goto err_render_ctx_reset_cmd_fini;
890
891 *ctx_out = ctx;
892
893 return VK_SUCCESS;
894
895 err_render_ctx_reset_cmd_fini:
896 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
897
898 err_render_ctx_switch_fini:
899 pvr_render_ctx_switch_fini(device, ctx);
900
901 err_free_vdm_callstack_bo:
902 pvr_bo_free(device, ctx->vdm_callstack_bo);
903
904 err_vk_free_ctx:
905 vk_free(&device->vk.alloc, ctx);
906
907 return result;
908 }
909
pvr_render_ctx_destroy(struct pvr_render_ctx * ctx)910 void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
911 {
912 struct pvr_device *device = ctx->device;
913
914 device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
915
916 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
917 pvr_render_ctx_switch_fini(device, ctx);
918 pvr_bo_free(device, ctx->vdm_callstack_bo);
919 vk_free(&device->vk.alloc, ctx);
920 }
921
pvr_pds_sr_fence_terminate_program_create_and_upload(struct pvr_device * device,struct pvr_pds_upload * const pds_upload_out)922 static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
923 struct pvr_device *device,
924 struct pvr_pds_upload *const pds_upload_out)
925 {
926 const uint32_t pds_data_alignment =
927 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
928 const struct pvr_device_runtime_info *dev_runtime_info =
929 &device->pdevice->dev_runtime_info;
930 ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
931 uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
932 struct pvr_pds_fence_program program = { 0 };
933 ASSERTED uint32_t *buffer_end;
934 uint32_t code_offset;
935 uint32_t data_size;
936
937 /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
938 assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
939 dev_runtime_info->num_phantoms >= 2));
940
941 pvr_pds_generate_fence_terminate_program(&program,
942 staging_buffer,
943 PDS_GENERATE_DATA_SEGMENT,
944 &device->pdevice->dev_info);
945
946 /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
947 * when we generate the code segment. Implement
948 * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
949 * This behavior doesn't seem consistent with the rest of the api. For now
950 * we store the size in a variable.
951 */
952 data_size = program.data_size;
953 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
954
955 buffer_end =
956 pvr_pds_generate_fence_terminate_program(&program,
957 &staging_buffer[code_offset],
958 PDS_GENERATE_CODE_SEGMENT,
959 &device->pdevice->dev_info);
960
961 assert((uint64_t)(buffer_end - staging_buffer) * 4U <
962 ROGUE_PDS_TASK_PROGRAM_SIZE);
963
964 return pvr_gpu_upload_pds(device,
965 staging_buffer,
966 data_size,
967 PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
968 &staging_buffer[code_offset],
969 program.code_size,
970 PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
971 0,
972 pds_upload_out);
973 }
974
pvr_compute_ctx_ws_static_state_init(const struct pvr_device_info * const dev_info,const struct pvr_compute_ctx * const ctx,struct pvr_winsys_compute_ctx_static_state * const static_state)975 static void pvr_compute_ctx_ws_static_state_init(
976 const struct pvr_device_info *const dev_info,
977 const struct pvr_compute_ctx *const ctx,
978 struct pvr_winsys_compute_ctx_static_state *const static_state)
979 {
980 const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
981
982 /* CR_CDM_CONTEXT_... use state store program info. */
983
984 pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
985 CR_CDM_CONTEXT_PDS0,
986 state) {
987 state.data_addr =
988 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset);
989 state.code_addr =
990 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset);
991 }
992
993 pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
994 CR_CDM_CONTEXT_PDS0,
995 state) {
996 state.data_addr =
997 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset);
998 state.code_addr =
999 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset);
1000 }
1001
1002 pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
1003 CR_CDM_CONTEXT_PDS1,
1004 state) {
1005 /* Convert the data size from dwords to bytes. */
1006 const uint32_t store_program_data_size =
1007 ctx_switch->sr[0].pds.store_program.data_size * 4U;
1008
1009 state.pds_seq_dep = true;
1010 state.usc_seq_dep = false;
1011 state.target = true;
1012 state.unified_size = ctx_switch->sr[0].usc.unified_size;
1013 state.common_shared = false;
1014 state.common_size = 0;
1015 state.temp_size = 0;
1016
1017 assert(store_program_data_size %
1018 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1019 0);
1020 state.data_size = store_program_data_size /
1021 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1022
1023 state.fence = true;
1024 }
1025
1026 /* CR_CDM_TERMINATE_... use fence terminate info. */
1027
1028 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
1029 CR_CDM_TERMINATE_PDS,
1030 state) {
1031 state.data_addr =
1032 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset);
1033 state.code_addr =
1034 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset);
1035 }
1036
1037 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
1038 CR_CDM_TERMINATE_PDS1,
1039 state) {
1040 /* Convert the data size from dwords to bytes. */
1041 const uint32_t fence_terminate_program_data_size =
1042 ctx_switch->sr_fence_terminate_program.data_size * 4U;
1043
1044 state.pds_seq_dep = true;
1045 state.usc_seq_dep = false;
1046 state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
1047 state.unified_size = 0;
1048 /* Common store is for shareds -- this will free the partitions. */
1049 state.common_shared = true;
1050 state.common_size = 0;
1051 state.temp_size = 0;
1052
1053 assert(fence_terminate_program_data_size %
1054 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1055 0);
1056 state.data_size = fence_terminate_program_data_size /
1057 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1058 state.fence = true;
1059 }
1060
1061 /* CR_CDM_RESUME_... use state load program info. */
1062
1063 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
1064 CR_CDM_CONTEXT_LOAD_PDS0,
1065 state) {
1066 state.data_addr =
1067 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset);
1068 state.code_addr =
1069 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset);
1070 }
1071
1072 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
1073 CR_CDM_CONTEXT_LOAD_PDS0,
1074 state) {
1075 state.data_addr =
1076 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset);
1077 state.code_addr =
1078 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset);
1079 }
1080 }
1081
pvr_compute_ctx_ws_create_info_init(const struct pvr_compute_ctx * const ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_compute_ctx_create_info * const create_info)1082 static void pvr_compute_ctx_ws_create_info_init(
1083 const struct pvr_compute_ctx *const ctx,
1084 enum pvr_winsys_ctx_priority priority,
1085 struct pvr_winsys_compute_ctx_create_info *const create_info)
1086 {
1087 create_info->priority = priority;
1088
1089 pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
1090 ctx,
1091 &create_info->static_state);
1092 }
1093
pvr_compute_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_compute_ctx ** const ctx_out)1094 VkResult pvr_compute_ctx_create(struct pvr_device *const device,
1095 enum pvr_winsys_ctx_priority priority,
1096 struct pvr_compute_ctx **const ctx_out)
1097 {
1098 struct pvr_winsys_compute_ctx_create_info create_info;
1099 struct pvr_compute_ctx *ctx;
1100 VkResult result;
1101
1102 ctx = vk_alloc(&device->vk.alloc,
1103 sizeof(*ctx),
1104 8,
1105 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1106 if (!ctx)
1107 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1108
1109 ctx->device = device;
1110
1111 result = pvr_bo_alloc(
1112 device,
1113 device->heaps.general_heap,
1114 rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
1115 rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
1116 PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
1117 &ctx->ctx_switch.compute_state_bo);
1118 if (result != VK_SUCCESS)
1119 goto err_free_ctx;
1120
1121 /* TODO: Change this so that enabling storage to B doesn't change the array
1122 * size. Instead of looping we could unroll this and have the second
1123 * programs setup depending on the B enable. Doing it that way would make
1124 * things more obvious.
1125 */
1126 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
1127 result = pvr_ctx_sr_programs_setup(device,
1128 PVR_CTX_SR_COMPUTE_TARGET,
1129 &ctx->ctx_switch.sr[i]);
1130 if (result != VK_SUCCESS) {
1131 for (uint32_t j = 0; j < i; j++)
1132 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
1133
1134 goto err_free_state_buffer;
1135 }
1136 }
1137
1138 result = pvr_pds_sr_fence_terminate_program_create_and_upload(
1139 device,
1140 &ctx->ctx_switch.sr_fence_terminate_program);
1141 if (result != VK_SUCCESS)
1142 goto err_free_sr_programs;
1143
1144 pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
1145
1146 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1147 if (result != VK_SUCCESS)
1148 goto err_free_pds_fence_terminate_program;
1149
1150 result = device->ws->ops->compute_ctx_create(device->ws,
1151 &create_info,
1152 &ctx->ws_ctx);
1153 if (result != VK_SUCCESS)
1154 goto err_fini_reset_cmd;
1155
1156 *ctx_out = ctx;
1157
1158 return VK_SUCCESS;
1159
1160 err_fini_reset_cmd:
1161 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1162
1163 err_free_pds_fence_terminate_program:
1164 pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1165
1166 err_free_sr_programs:
1167 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1168 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1169
1170 err_free_state_buffer:
1171 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1172
1173 err_free_ctx:
1174 vk_free(&device->vk.alloc, ctx);
1175
1176 return result;
1177 }
1178
pvr_compute_ctx_destroy(struct pvr_compute_ctx * const ctx)1179 void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
1180 {
1181 struct pvr_device *device = ctx->device;
1182
1183 device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
1184
1185 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1186
1187 pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1188 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1189 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1190
1191 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1192
1193 vk_free(&device->vk.alloc, ctx);
1194 }
1195
pvr_transfer_ctx_ws_create_info_init(enum pvr_winsys_ctx_priority priority,struct pvr_winsys_transfer_ctx_create_info * const create_info)1196 static void pvr_transfer_ctx_ws_create_info_init(
1197 enum pvr_winsys_ctx_priority priority,
1198 struct pvr_winsys_transfer_ctx_create_info *const create_info)
1199 {
1200 create_info->priority = priority;
1201 }
1202
pvr_transfer_ctx_setup_shaders(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1203 static VkResult pvr_transfer_ctx_setup_shaders(struct pvr_device *device,
1204 struct pvr_transfer_ctx *ctx)
1205 {
1206 const uint32_t cache_line_size =
1207 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1208 VkResult result;
1209
1210 /* TODO: Setup USC fragments. */
1211
1212 /* Setup EOT program. */
1213 result = pvr_gpu_upload_usc(device,
1214 pvr_transfer_eot_usc_code,
1215 sizeof(pvr_transfer_eot_usc_code),
1216 cache_line_size,
1217 &ctx->usc_eot_bo);
1218 if (result != VK_SUCCESS)
1219 return result;
1220
1221 STATIC_ASSERT(ARRAY_SIZE(pvr_transfer_eot_usc_offsets) ==
1222 ARRAY_SIZE(ctx->transfer_mrts));
1223 for (uint32_t i = 0U; i < ARRAY_SIZE(pvr_transfer_eot_usc_offsets); i++) {
1224 ctx->transfer_mrts[i] =
1225 PVR_DEV_ADDR_OFFSET(ctx->usc_eot_bo->vma->dev_addr,
1226 pvr_transfer_eot_usc_offsets[i]);
1227 }
1228
1229 return VK_SUCCESS;
1230 }
1231
pvr_transfer_ctx_fini_shaders(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1232 static void pvr_transfer_ctx_fini_shaders(struct pvr_device *device,
1233 struct pvr_transfer_ctx *ctx)
1234 {
1235 pvr_bo_free(device, ctx->usc_eot_bo);
1236 }
1237
pvr_transfer_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_transfer_ctx ** const ctx_out)1238 VkResult pvr_transfer_ctx_create(struct pvr_device *const device,
1239 enum pvr_winsys_ctx_priority priority,
1240 struct pvr_transfer_ctx **const ctx_out)
1241 {
1242 struct pvr_winsys_transfer_ctx_create_info create_info;
1243 struct pvr_transfer_ctx *ctx;
1244 VkResult result;
1245
1246 ctx = vk_zalloc(&device->vk.alloc,
1247 sizeof(*ctx),
1248 8U,
1249 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1250 if (!ctx)
1251 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1252
1253 ctx->device = device;
1254
1255 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1256 if (result != VK_SUCCESS)
1257 goto err_free_ctx;
1258
1259 pvr_transfer_ctx_ws_create_info_init(priority, &create_info);
1260
1261 result = device->ws->ops->transfer_ctx_create(device->ws,
1262 &create_info,
1263 &ctx->ws_ctx);
1264 if (result != VK_SUCCESS)
1265 goto err_fini_reset_cmd;
1266
1267 result = pvr_transfer_ctx_setup_shaders(device, ctx);
1268 if (result != VK_SUCCESS)
1269 goto err_destroy_transfer_ctx;
1270
1271 /* Create the PDS Uniform/Tex state code segment array. */
1272 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1273 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1274 if (i == 0U && j == 0U)
1275 continue;
1276
1277 result = pvr_pds_unitex_state_program_create_and_upload(
1278 device,
1279 NULL,
1280 i,
1281 j,
1282 &ctx->pds_unitex_code[i][j]);
1283 if (result != VK_SUCCESS) {
1284 goto err_free_pds_unitex_bos;
1285 }
1286 }
1287 }
1288
1289 *ctx_out = ctx;
1290
1291 return VK_SUCCESS;
1292
1293 err_free_pds_unitex_bos:
1294 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1295 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1296 if (!ctx->pds_unitex_code[i][j].pvr_bo)
1297 continue;
1298
1299 pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1300 }
1301 }
1302
1303 pvr_transfer_ctx_fini_shaders(device, ctx);
1304
1305 err_destroy_transfer_ctx:
1306 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1307
1308 err_fini_reset_cmd:
1309 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1310
1311 err_free_ctx:
1312 vk_free(&device->vk.alloc, ctx);
1313
1314 return result;
1315 }
1316
pvr_transfer_ctx_destroy(struct pvr_transfer_ctx * const ctx)1317 void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx)
1318 {
1319 struct pvr_device *device = ctx->device;
1320
1321 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1322 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1323 if (!ctx->pds_unitex_code[i][j].pvr_bo)
1324 continue;
1325
1326 pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1327 }
1328 }
1329
1330 pvr_transfer_ctx_fini_shaders(device, ctx);
1331 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1332 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1333 vk_free(&device->vk.alloc, ctx);
1334 }
1335