• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29 
30 #include "hwdef/rogue_hw_utils.h"
31 #include "pvr_bo.h"
32 #include "pvr_cdm_load_sr.h"
33 #include "pvr_csb.h"
34 #include "pvr_job_context.h"
35 #include "pvr_pds.h"
36 #include "pvr_private.h"
37 #include "pvr_transfer_eot.h"
38 #include "pvr_types.h"
39 #include "pvr_vdm_load_sr.h"
40 #include "pvr_vdm_store_sr.h"
41 #include "pvr_winsys.h"
42 #include "util/macros.h"
43 #include "vk_alloc.h"
44 #include "vk_log.h"
45 
46 /* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
47  * value when constructing the control stream?
48  */
49 /* The VDM callstack is used by the hardware to implement control stream links
50  * with a return, i.e. sub-control streams/subroutines. This value specifies the
51  * maximum callstack depth.
52  */
53 #define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
54 
55 #define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
56 
pvr_ctx_reset_cmd_init(struct pvr_device * device,struct pvr_reset_cmd * const reset_cmd)57 static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
58                                        struct pvr_reset_cmd *const reset_cmd)
59 {
60    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
61 
62    /* The reset framework depends on compute support in the hw. */
63    assert(PVR_HAS_FEATURE(dev_info, compute));
64 
65    if (PVR_HAS_QUIRK(dev_info, 51764))
66       pvr_finishme("Missing reset support for brn51764");
67 
68    if (PVR_HAS_QUIRK(dev_info, 58839))
69       pvr_finishme("Missing reset support for brn58839");
70 
71    return VK_SUCCESS;
72 }
73 
pvr_ctx_reset_cmd_fini(struct pvr_device * device,struct pvr_reset_cmd * reset_cmd)74 static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
75                                    struct pvr_reset_cmd *reset_cmd)
76 
77 {
78    /* TODO: reset command cleanup. */
79 }
80 
pvr_pds_pt_store_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)81 static VkResult pvr_pds_pt_store_program_create_and_upload(
82    struct pvr_device *device,
83    struct pvr_bo *pt_bo,
84    uint32_t pt_bo_size,
85    struct pvr_pds_upload *const pds_upload_out)
86 {
87    struct pvr_pds_stream_out_terminate_program program = { 0 };
88    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
89    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
90    size_t staging_buffer_size;
91    uint32_t *staging_buffer;
92    uint32_t *data_buffer;
93    uint32_t *code_buffer;
94    VkResult result;
95 
96    /* Check the bo size can be converted to dwords without any rounding. */
97    assert(pt_bo_size % 4 == 0);
98 
99    program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
100    program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
101 
102    pvr_pds_generate_stream_out_terminate_program(&program,
103                                                  NULL,
104                                                  PDS_GENERATE_SIZES,
105                                                  dev_info);
106 
107    staging_buffer_size = (program.stream_out_terminate_pds_data_size +
108                           program.stream_out_terminate_pds_code_size) *
109                          sizeof(*staging_buffer);
110 
111    staging_buffer = vk_zalloc(&device->vk.alloc,
112                               staging_buffer_size,
113                               8,
114                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
115    if (!staging_buffer)
116       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117 
118    data_buffer = staging_buffer;
119    code_buffer =
120       pvr_pds_generate_stream_out_terminate_program(&program,
121                                                     data_buffer,
122                                                     PDS_GENERATE_DATA_SEGMENT,
123                                                     dev_info);
124    pvr_pds_generate_stream_out_terminate_program(&program,
125                                                  code_buffer,
126                                                  PDS_GENERATE_CODE_SEGMENT,
127                                                  dev_info);
128 
129    /* This PDS program is passed to the HW via the PPP state words. These only
130     * allow the data segment address to be specified and expect the code
131     * segment to immediately follow. Assume the code alignment is the same as
132     * the data.
133     */
134    result =
135       pvr_gpu_upload_pds(device,
136                          data_buffer,
137                          program.stream_out_terminate_pds_data_size,
138                          PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
139                          code_buffer,
140                          program.stream_out_terminate_pds_code_size,
141                          PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
142                          cache_line_size,
143                          pds_upload_out);
144 
145    vk_free(&device->vk.alloc, staging_buffer);
146 
147    return result;
148 }
149 
pvr_pds_pt_resume_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)150 static VkResult pvr_pds_pt_resume_program_create_and_upload(
151    struct pvr_device *device,
152    struct pvr_bo *pt_bo,
153    uint32_t pt_bo_size,
154    struct pvr_pds_upload *const pds_upload_out)
155 {
156    struct pvr_pds_stream_out_init_program program = { 0 };
157    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
158    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
159    size_t staging_buffer_size;
160    uint32_t *staging_buffer;
161    uint32_t *data_buffer;
162    uint32_t *code_buffer;
163    VkResult result;
164 
165    /* Check the bo size can be converted to dwords without any rounding. */
166    assert(pt_bo_size % 4 == 0);
167 
168    program.num_buffers = 1;
169    program.pds_buffer_data_size[0] = pt_bo_size / 4;
170    program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
171 
172    pvr_pds_generate_stream_out_init_program(&program,
173                                             NULL,
174                                             false,
175                                             PDS_GENERATE_SIZES,
176                                             dev_info);
177 
178    staging_buffer_size = (program.stream_out_init_pds_data_size +
179                           program.stream_out_init_pds_code_size) *
180                          sizeof(*staging_buffer);
181 
182    staging_buffer = vk_zalloc(&device->vk.alloc,
183                               staging_buffer_size,
184                               8,
185                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
186    if (!staging_buffer)
187       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
188 
189    data_buffer = staging_buffer;
190    code_buffer =
191       pvr_pds_generate_stream_out_init_program(&program,
192                                                data_buffer,
193                                                false,
194                                                PDS_GENERATE_DATA_SEGMENT,
195                                                dev_info);
196    pvr_pds_generate_stream_out_init_program(&program,
197                                             code_buffer,
198                                             false,
199                                             PDS_GENERATE_CODE_SEGMENT,
200                                             dev_info);
201 
202    /* This PDS program is passed to the HW via the PPP state words. These only
203     * allow the data segment address to be specified and expect the code
204     * segment to immediately follow. Assume the code alignment is the same as
205     * the data.
206     */
207    result =
208       pvr_gpu_upload_pds(device,
209                          data_buffer,
210                          program.stream_out_init_pds_data_size,
211                          PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
212                          code_buffer,
213                          program.stream_out_init_pds_code_size,
214                          PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
215                          cache_line_size,
216                          pds_upload_out);
217 
218    vk_free(&device->vk.alloc, staging_buffer);
219 
220    return result;
221 }
222 
223 static VkResult
pvr_render_job_pt_programs_setup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)224 pvr_render_job_pt_programs_setup(struct pvr_device *device,
225                                  struct rogue_pt_programs *pt_programs)
226 {
227    VkResult result;
228 
229    result = pvr_bo_alloc(device,
230                          device->heaps.pds_heap,
231                          ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
232                          ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
233                          PVR_BO_ALLOC_FLAG_CPU_ACCESS,
234                          &pt_programs->store_resume_state_bo);
235    if (result != VK_SUCCESS)
236       return result;
237 
238    result = pvr_pds_pt_store_program_create_and_upload(
239       device,
240       pt_programs->store_resume_state_bo,
241       ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
242       &pt_programs->pds_store_program);
243    if (result != VK_SUCCESS)
244       goto err_free_store_resume_state_bo;
245 
246    result = pvr_pds_pt_resume_program_create_and_upload(
247       device,
248       pt_programs->store_resume_state_bo,
249       ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
250       &pt_programs->pds_resume_program);
251    if (result != VK_SUCCESS)
252       goto err_free_pds_store_program;
253 
254    return VK_SUCCESS;
255 
256 err_free_pds_store_program:
257    pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
258 
259 err_free_store_resume_state_bo:
260    pvr_bo_free(device, pt_programs->store_resume_state_bo);
261 
262    return result;
263 }
264 
265 static void
pvr_render_job_pt_programs_cleanup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)266 pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
267                                    struct rogue_pt_programs *pt_programs)
268 {
269    pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo);
270    pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
271    pvr_bo_free(device, pt_programs->store_resume_state_bo);
272 }
273 
pvr_pds_ctx_sr_program_setup(bool cc_enable,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_shared_storing_program * const program_out)274 static void pvr_pds_ctx_sr_program_setup(
275    bool cc_enable,
276    uint64_t usc_program_upload_offset,
277    uint8_t usc_temps,
278    pvr_dev_addr_t sr_addr,
279    struct pvr_pds_shared_storing_program *const program_out)
280 {
281    /* The PDS task is the same for stores and loads. */
282    *program_out = (struct pvr_pds_shared_storing_program){
283 		.cc_enable = cc_enable,
284 		.doutw_control = {
285 			.dest_store = PDS_UNIFIED_STORE,
286 			.num_const64 = 2,
287 			.doutw_data = {
288 				[0] = sr_addr.addr,
289 				[1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
290 			},
291 			.last_instruction = false,
292 		},
293 	};
294 
295    pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
296                        usc_program_upload_offset,
297                        usc_temps,
298                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
299                        false);
300 }
301 
302 /* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
303  * this. If there is a problem here it's likely that the same problem exists
304  * there so don't forget to update the compute function.
305  */
pvr_pds_render_ctx_sr_program_create_and_upload(struct pvr_device * device,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)306 static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
307    struct pvr_device *device,
308    uint64_t usc_program_upload_offset,
309    uint8_t usc_temps,
310    pvr_dev_addr_t sr_addr,
311    struct pvr_pds_upload *const pds_upload_out)
312 {
313    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
314    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
315    const uint32_t pds_data_alignment =
316       PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
317 
318    /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
319     * and code size when using the PDS_GENERATE_SIZES mode.
320     */
321    STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
322    uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
323    struct pvr_pds_shared_storing_program program;
324    ASSERTED uint32_t *buffer_end;
325    uint32_t code_offset;
326 
327    pvr_pds_ctx_sr_program_setup(false,
328                                 usc_program_upload_offset,
329                                 usc_temps,
330                                 sr_addr,
331                                 &program);
332 
333    pvr_pds_generate_shared_storing_program(&program,
334                                            &staging_buffer[0],
335                                            PDS_GENERATE_DATA_SEGMENT,
336                                            dev_info);
337 
338    code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
339 
340    buffer_end =
341       pvr_pds_generate_shared_storing_program(&program,
342                                               &staging_buffer[code_offset],
343                                               PDS_GENERATE_CODE_SEGMENT,
344                                               dev_info);
345 
346    assert((uint32_t)(buffer_end - staging_buffer) * 4 <
347           ROGUE_PDS_TASK_PROGRAM_SIZE);
348 
349    return pvr_gpu_upload_pds(device,
350                              &staging_buffer[0],
351                              program.data_size,
352                              PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
353                              &staging_buffer[code_offset],
354                              program.code_size,
355                              PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
356                              cache_line_size,
357                              pds_upload_out);
358 }
359 
360 /* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
361  * this. If there is a problem here it's likely that the same problem exists
362  * there so don't forget to update the render_ctx function.
363  */
pvr_pds_compute_ctx_sr_program_create_and_upload(struct pvr_device * device,bool is_loading_program,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)364 static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
365    struct pvr_device *device,
366    bool is_loading_program,
367    uint64_t usc_program_upload_offset,
368    uint8_t usc_temps,
369    pvr_dev_addr_t sr_addr,
370    struct pvr_pds_upload *const pds_upload_out)
371 {
372    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
373    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
374    const uint32_t pds_data_alignment =
375       PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
376 
377    /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
378     * and code size when using the PDS_GENERATE_SIZES mode.
379     */
380    STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
381    uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
382    struct pvr_pds_shared_storing_program program;
383    uint32_t *buffer_ptr;
384    uint32_t code_offset;
385 
386    pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
387                                 usc_program_upload_offset,
388                                 usc_temps,
389                                 sr_addr,
390                                 &program);
391 
392    if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
393       pvr_pds_generate_compute_shared_loading_program(&program,
394                                                       &staging_buffer[0],
395                                                       PDS_GENERATE_DATA_SEGMENT,
396                                                       dev_info);
397    } else {
398       pvr_pds_generate_shared_storing_program(&program,
399                                               &staging_buffer[0],
400                                               PDS_GENERATE_DATA_SEGMENT,
401                                               dev_info);
402    }
403 
404    code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
405 
406    buffer_ptr =
407       pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
408                                                    PDS_GENERATE_CODE_SEGMENT);
409 
410    if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
411       buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
412          &program,
413          buffer_ptr,
414          PDS_GENERATE_CODE_SEGMENT,
415          dev_info);
416    } else {
417       buffer_ptr =
418          pvr_pds_generate_shared_storing_program(&program,
419                                                  buffer_ptr,
420                                                  PDS_GENERATE_CODE_SEGMENT,
421                                                  dev_info);
422    }
423 
424    assert((uint32_t)(buffer_ptr - staging_buffer) * 4 <
425           ROGUE_PDS_TASK_PROGRAM_SIZE);
426 
427    STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
428                  PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
429 
430    STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
431                  PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
432 
433    return pvr_gpu_upload_pds(
434       device,
435       &staging_buffer[0],
436       program.data_size,
437       PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
438       &staging_buffer[code_offset],
439       (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
440       PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
441       cache_line_size,
442       pds_upload_out);
443 }
444 
445 enum pvr_ctx_sr_program_target {
446    PVR_CTX_SR_RENDER_TARGET,
447    PVR_CTX_SR_COMPUTE_TARGET,
448 };
449 
pvr_ctx_sr_programs_setup(struct pvr_device * device,enum pvr_ctx_sr_program_target target,struct rogue_sr_programs * sr_programs)450 static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
451                                           enum pvr_ctx_sr_program_target target,
452                                           struct rogue_sr_programs *sr_programs)
453 {
454    const uint64_t store_load_state_bo_size =
455       PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
456       ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
457    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
458    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
459    uint64_t usc_store_program_upload_offset;
460    uint64_t usc_load_program_upload_offset;
461    const uint8_t *usc_load_sr_code;
462    uint32_t usc_load_sr_code_size;
463    VkResult result;
464 
465    /* Note that this is being used for both compute and render ctx. There is no
466     * compute equivalent define for the VDMCTRL unit size.
467     */
468    /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
469    sr_programs->usc.unified_size =
470       DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
471 
472    result = pvr_bo_alloc(device,
473                          device->heaps.pds_heap,
474                          store_load_state_bo_size,
475                          cache_line_size,
476                          PVR_WINSYS_BO_FLAG_CPU_ACCESS,
477                          &sr_programs->store_load_state_bo);
478    if (result != VK_SUCCESS)
479       return result;
480 
481    /* USC state update: SR state store. */
482 
483    assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
484 
485    result = pvr_gpu_upload_usc(device,
486                                pvr_vdm_store_sr_code,
487                                sizeof(pvr_vdm_store_sr_code),
488                                cache_line_size,
489                                &sr_programs->usc.store_program_bo);
490    if (result != VK_SUCCESS)
491       goto err_free_store_load_state_bo;
492 
493    usc_store_program_upload_offset =
494       sr_programs->usc.store_program_bo->vma->dev_addr.addr -
495       device->heaps.usc_heap->base_addr.addr;
496 
497    /* USC state update: SR state load. */
498 
499    if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
500       STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
501 
502       usc_load_sr_code = pvr_cdm_load_sr_code;
503       usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
504    } else {
505       STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
506 
507       usc_load_sr_code = pvr_vdm_load_sr_code;
508       usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
509    }
510 
511    result = pvr_gpu_upload_usc(device,
512                                usc_load_sr_code,
513                                usc_load_sr_code_size,
514                                cache_line_size,
515                                &sr_programs->usc.load_program_bo);
516    if (result != VK_SUCCESS)
517       goto err_free_usc_store_program_bo;
518 
519    usc_load_program_upload_offset =
520       sr_programs->usc.load_program_bo->vma->dev_addr.addr -
521       device->heaps.usc_heap->base_addr.addr;
522 
523    /* FIXME: The number of USC temps should be output alongside
524     * pvr_vdm_store_sr_code rather than hard coded.
525     */
526    /* Create and upload the PDS load and store programs. Point them to the
527     * appropriate USC load and store programs.
528     */
529    switch (target) {
530    case PVR_CTX_SR_RENDER_TARGET:
531       /* PDS state update: SR state store. */
532       result = pvr_pds_render_ctx_sr_program_create_and_upload(
533          device,
534          usc_store_program_upload_offset,
535          8,
536          sr_programs->store_load_state_bo->vma->dev_addr,
537          &sr_programs->pds.store_program);
538       if (result != VK_SUCCESS)
539          goto err_free_usc_load_program_bo;
540 
541       /* PDS state update: SR state load. */
542       result = pvr_pds_render_ctx_sr_program_create_and_upload(
543          device,
544          usc_load_program_upload_offset,
545          20,
546          sr_programs->store_load_state_bo->vma->dev_addr,
547          &sr_programs->pds.load_program);
548       if (result != VK_SUCCESS)
549          goto err_free_pds_store_program_bo;
550 
551       break;
552 
553    case PVR_CTX_SR_COMPUTE_TARGET:
554       /* PDS state update: SR state store. */
555       result = pvr_pds_compute_ctx_sr_program_create_and_upload(
556          device,
557          false,
558          usc_store_program_upload_offset,
559          8,
560          sr_programs->store_load_state_bo->vma->dev_addr,
561          &sr_programs->pds.store_program);
562       if (result != VK_SUCCESS)
563          goto err_free_usc_load_program_bo;
564 
565       /* PDS state update: SR state load. */
566       result = pvr_pds_compute_ctx_sr_program_create_and_upload(
567          device,
568          true,
569          usc_load_program_upload_offset,
570          20,
571          sr_programs->store_load_state_bo->vma->dev_addr,
572          &sr_programs->pds.load_program);
573       if (result != VK_SUCCESS)
574          goto err_free_pds_store_program_bo;
575 
576       break;
577 
578    default:
579       unreachable("Invalid target.");
580       break;
581    }
582 
583    return VK_SUCCESS;
584 
585 err_free_pds_store_program_bo:
586    pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
587 
588 err_free_usc_load_program_bo:
589    pvr_bo_free(device, sr_programs->usc.load_program_bo);
590 
591 err_free_usc_store_program_bo:
592    pvr_bo_free(device, sr_programs->usc.store_program_bo);
593 
594 err_free_store_load_state_bo:
595    pvr_bo_free(device, sr_programs->store_load_state_bo);
596 
597    return VK_SUCCESS;
598 }
599 
pvr_ctx_sr_programs_cleanup(struct pvr_device * device,struct rogue_sr_programs * sr_programs)600 static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
601                                         struct rogue_sr_programs *sr_programs)
602 {
603    pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo);
604    pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
605    pvr_bo_free(device, sr_programs->usc.load_program_bo);
606    pvr_bo_free(device, sr_programs->usc.store_program_bo);
607    pvr_bo_free(device, sr_programs->store_load_state_bo);
608 }
609 
610 static VkResult
pvr_render_ctx_switch_programs_setup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)611 pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
612                                      struct pvr_render_ctx_programs *programs)
613 {
614    VkResult result;
615 
616    result = pvr_render_job_pt_programs_setup(device, &programs->pt);
617    if (result != VK_SUCCESS)
618       return result;
619 
620    result = pvr_ctx_sr_programs_setup(device,
621                                       PVR_CTX_SR_RENDER_TARGET,
622                                       &programs->sr);
623    if (result != VK_SUCCESS)
624       goto err_pt_programs_cleanup;
625 
626    return VK_SUCCESS;
627 
628 err_pt_programs_cleanup:
629    pvr_render_job_pt_programs_cleanup(device, &programs->pt);
630 
631    return result;
632 }
633 
634 static void
pvr_render_ctx_switch_programs_cleanup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)635 pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
636                                        struct pvr_render_ctx_programs *programs)
637 {
638    pvr_ctx_sr_programs_cleanup(device, &programs->sr);
639    pvr_render_job_pt_programs_cleanup(device, &programs->pt);
640 }
641 
pvr_render_ctx_switch_init(struct pvr_device * device,struct pvr_render_ctx * ctx)642 static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
643                                            struct pvr_render_ctx *ctx)
644 {
645    struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
646    const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
647                                        PVR_BO_ALLOC_FLAG_CPU_ACCESS;
648    const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
649                                         PVR_BO_ALLOC_FLAG_CPU_ACCESS;
650    VkResult result;
651 
652    result = pvr_bo_alloc(device,
653                          device->heaps.general_heap,
654                          ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
655                          ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
656                          vdm_state_bo_flags,
657                          &ctx_switch->vdm_state_bo);
658    if (result != VK_SUCCESS)
659       return result;
660 
661    result = pvr_bo_alloc(device,
662                          device->heaps.general_heap,
663                          ROGUE_LLS_TA_STATE_BUFFER_SIZE,
664                          ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
665                          geom_state_bo_flags,
666                          &ctx_switch->geom_state_bo);
667    if (result != VK_SUCCESS)
668       goto err_pvr_bo_free_vdm_state_bo;
669 
670    for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
671       result =
672          pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
673       if (result)
674          goto err_programs_cleanup;
675    }
676 
677    return result;
678 
679 err_programs_cleanup:
680    for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
681       pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
682    }
683 
684    pvr_bo_free(device, ctx_switch->geom_state_bo);
685 
686 err_pvr_bo_free_vdm_state_bo:
687    pvr_bo_free(device, ctx_switch->vdm_state_bo);
688 
689    return result;
690 }
691 
pvr_render_ctx_switch_fini(struct pvr_device * device,struct pvr_render_ctx * ctx)692 static void pvr_render_ctx_switch_fini(struct pvr_device *device,
693                                        struct pvr_render_ctx *ctx)
694 {
695    struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
696 
697    for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
698       pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
699    }
700 
701    pvr_bo_free(device, ctx_switch->geom_state_bo);
702    pvr_bo_free(device, ctx_switch->vdm_state_bo);
703 }
704 
705 static void
pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload * pds_program,enum PVRX (VDMCTRL_USC_TARGET)usc_target,uint8_t usc_unified_size,uint32_t * const state0_out,uint32_t * const state1_out)706 pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
707                                       enum PVRX(VDMCTRL_USC_TARGET) usc_target,
708                                       uint8_t usc_unified_size,
709                                       uint32_t *const state0_out,
710                                       uint32_t *const state1_out)
711 {
712    pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
713       /* Convert the data size from dwords to bytes. */
714       const uint32_t pds_data_size = pds_program->data_size * 4;
715 
716       state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
717       state.usc_target = usc_target;
718       state.usc_common_size = 0;
719       state.usc_unified_size = usc_unified_size;
720       state.pds_temp_size = 0;
721 
722       assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
723              0);
724       state.pds_data_size =
725          pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
726    };
727 
728    pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
729       state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
730       state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
731       state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
732    }
733 }
734 
735 static void
pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload * pds_program,uint32_t * const stream_out1_out,uint32_t * const stream_out2_out)736 pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
737                                           uint32_t *const stream_out1_out,
738                                           uint32_t *const stream_out2_out)
739 {
740    pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
741       /* Convert the data size from dwords to bytes. */
742       const uint32_t pds_data_size = pds_program->data_size * 4;
743 
744       state.sync = true;
745 
746       assert(pds_data_size %
747                 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
748              0);
749       state.pds_data_size =
750          pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
751 
752       state.pds_temp_size = 0;
753    }
754 
755    pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
756       state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
757    }
758 }
759 
pvr_render_ctx_ws_static_state_init(struct pvr_render_ctx * ctx,struct pvr_winsys_render_ctx_static_state * static_state)760 static void pvr_render_ctx_ws_static_state_init(
761    struct pvr_render_ctx *ctx,
762    struct pvr_winsys_render_ctx_static_state *static_state)
763 {
764    uint64_t *q_dst;
765    uint32_t *d_dst;
766 
767    q_dst = &static_state->vdm_ctx_state_base_addr;
768    pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) {
769       base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
770    }
771 
772    q_dst = &static_state->geom_ctx_state_base_addr;
773    pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) {
774       base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
775    }
776 
777    for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
778       struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
779       struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
780 
781       /* Context store state. */
782       q_dst = &static_state->geom_state[i].vdm_ctx_store_task0;
783       pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
784          pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
785                                                PVRX(VDMCTRL_USC_TARGET_ANY),
786                                                sr_prog->usc.unified_size,
787                                                &task0.pds_state0,
788                                                &task0.pds_state1);
789       }
790 
791       d_dst = &static_state->geom_state[i].vdm_ctx_store_task1;
792       pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
793          pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
794             state.pds_code_addr =
795                PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset);
796          }
797       }
798 
799       q_dst = &static_state->geom_state[i].vdm_ctx_store_task2;
800       pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
801          pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
802                                                    &task2.stream_out1,
803                                                    &task2.stream_out2);
804       }
805 
806       /* Context resume state. */
807       q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
808       pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
809          pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
810                                                PVRX(VDMCTRL_USC_TARGET_ALL),
811                                                sr_prog->usc.unified_size,
812                                                &task0.pds_state0,
813                                                &task0.pds_state1);
814       }
815 
816       d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
817       pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
818          pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
819             state.pds_code_addr =
820                PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset);
821          }
822       }
823 
824       q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
825       pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
826          pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
827                                                    &task2.stream_out1,
828                                                    &task2.stream_out2);
829       }
830    }
831 }
832 
pvr_render_ctx_ws_create_info_init(struct pvr_render_ctx * ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_render_ctx_create_info * create_info)833 static void pvr_render_ctx_ws_create_info_init(
834    struct pvr_render_ctx *ctx,
835    enum pvr_winsys_ctx_priority priority,
836    struct pvr_winsys_render_ctx_create_info *create_info)
837 {
838    create_info->priority = priority;
839    create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
840 
841    pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
842 }
843 
pvr_render_ctx_create(struct pvr_device * device,enum pvr_winsys_ctx_priority priority,struct pvr_render_ctx ** const ctx_out)844 VkResult pvr_render_ctx_create(struct pvr_device *device,
845                                enum pvr_winsys_ctx_priority priority,
846                                struct pvr_render_ctx **const ctx_out)
847 {
848    const uint64_t vdm_callstack_size =
849       sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
850    struct pvr_winsys_render_ctx_create_info create_info;
851    struct pvr_render_ctx *ctx;
852    VkResult result;
853 
854    ctx = vk_alloc(&device->vk.alloc,
855                   sizeof(*ctx),
856                   8,
857                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
858    if (!ctx)
859       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
860 
861    ctx->device = device;
862 
863    result = pvr_bo_alloc(device,
864                          device->heaps.general_heap,
865                          vdm_callstack_size,
866                          PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
867                          0,
868                          &ctx->vdm_callstack_bo);
869    if (result != VK_SUCCESS)
870       goto err_vk_free_ctx;
871 
872    result = pvr_render_ctx_switch_init(device, ctx);
873    if (result != VK_SUCCESS)
874       goto err_free_vdm_callstack_bo;
875 
876    result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
877    if (result != VK_SUCCESS)
878       goto err_render_ctx_switch_fini;
879 
880    /* ctx must be fully initialized by this point since
881     * pvr_render_ctx_ws_create_info_init() depends on this.
882     */
883    pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
884 
885    result = device->ws->ops->render_ctx_create(device->ws,
886                                                &create_info,
887                                                &ctx->ws_ctx);
888    if (result != VK_SUCCESS)
889       goto err_render_ctx_reset_cmd_fini;
890 
891    *ctx_out = ctx;
892 
893    return VK_SUCCESS;
894 
895 err_render_ctx_reset_cmd_fini:
896    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
897 
898 err_render_ctx_switch_fini:
899    pvr_render_ctx_switch_fini(device, ctx);
900 
901 err_free_vdm_callstack_bo:
902    pvr_bo_free(device, ctx->vdm_callstack_bo);
903 
904 err_vk_free_ctx:
905    vk_free(&device->vk.alloc, ctx);
906 
907    return result;
908 }
909 
pvr_render_ctx_destroy(struct pvr_render_ctx * ctx)910 void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
911 {
912    struct pvr_device *device = ctx->device;
913 
914    device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
915 
916    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
917    pvr_render_ctx_switch_fini(device, ctx);
918    pvr_bo_free(device, ctx->vdm_callstack_bo);
919    vk_free(&device->vk.alloc, ctx);
920 }
921 
pvr_pds_sr_fence_terminate_program_create_and_upload(struct pvr_device * device,struct pvr_pds_upload * const pds_upload_out)922 static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
923    struct pvr_device *device,
924    struct pvr_pds_upload *const pds_upload_out)
925 {
926    const uint32_t pds_data_alignment =
927       PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
928    const struct pvr_device_runtime_info *dev_runtime_info =
929       &device->pdevice->dev_runtime_info;
930    ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
931    uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
932    struct pvr_pds_fence_program program = { 0 };
933    ASSERTED uint32_t *buffer_end;
934    uint32_t code_offset;
935    uint32_t data_size;
936 
937    /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
938    assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
939             dev_runtime_info->num_phantoms >= 2));
940 
941    pvr_pds_generate_fence_terminate_program(&program,
942                                             staging_buffer,
943                                             PDS_GENERATE_DATA_SEGMENT,
944                                             &device->pdevice->dev_info);
945 
946    /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
947     * when we generate the code segment. Implement
948     * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
949     * This behavior doesn't seem consistent with the rest of the api. For now
950     * we store the size in a variable.
951     */
952    data_size = program.data_size;
953    code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
954 
955    buffer_end =
956       pvr_pds_generate_fence_terminate_program(&program,
957                                                &staging_buffer[code_offset],
958                                                PDS_GENERATE_CODE_SEGMENT,
959                                                &device->pdevice->dev_info);
960 
961    assert((uint64_t)(buffer_end - staging_buffer) * 4U <
962           ROGUE_PDS_TASK_PROGRAM_SIZE);
963 
964    return pvr_gpu_upload_pds(device,
965                              staging_buffer,
966                              data_size,
967                              PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
968                              &staging_buffer[code_offset],
969                              program.code_size,
970                              PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
971                              0,
972                              pds_upload_out);
973 }
974 
pvr_compute_ctx_ws_static_state_init(const struct pvr_device_info * const dev_info,const struct pvr_compute_ctx * const ctx,struct pvr_winsys_compute_ctx_static_state * const static_state)975 static void pvr_compute_ctx_ws_static_state_init(
976    const struct pvr_device_info *const dev_info,
977    const struct pvr_compute_ctx *const ctx,
978    struct pvr_winsys_compute_ctx_static_state *const static_state)
979 {
980    const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
981 
982    /* CR_CDM_CONTEXT_... use state store program info. */
983 
984    pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
985                  CR_CDM_CONTEXT_PDS0,
986                  state) {
987       state.data_addr =
988          PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset);
989       state.code_addr =
990          PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset);
991    }
992 
993    pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
994                  CR_CDM_CONTEXT_PDS0,
995                  state) {
996       state.data_addr =
997          PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset);
998       state.code_addr =
999          PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset);
1000    }
1001 
1002    pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
1003                  CR_CDM_CONTEXT_PDS1,
1004                  state) {
1005       /* Convert the data size from dwords to bytes. */
1006       const uint32_t store_program_data_size =
1007          ctx_switch->sr[0].pds.store_program.data_size * 4U;
1008 
1009       state.pds_seq_dep = true;
1010       state.usc_seq_dep = false;
1011       state.target = true;
1012       state.unified_size = ctx_switch->sr[0].usc.unified_size;
1013       state.common_shared = false;
1014       state.common_size = 0;
1015       state.temp_size = 0;
1016 
1017       assert(store_program_data_size %
1018                 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1019              0);
1020       state.data_size = store_program_data_size /
1021                         PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1022 
1023       state.fence = true;
1024    }
1025 
1026    /* CR_CDM_TERMINATE_... use fence terminate info. */
1027 
1028    pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
1029                  CR_CDM_TERMINATE_PDS,
1030                  state) {
1031       state.data_addr =
1032          PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset);
1033       state.code_addr =
1034          PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset);
1035    }
1036 
1037    pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
1038                  CR_CDM_TERMINATE_PDS1,
1039                  state) {
1040       /* Convert the data size from dwords to bytes. */
1041       const uint32_t fence_terminate_program_data_size =
1042          ctx_switch->sr_fence_terminate_program.data_size * 4U;
1043 
1044       state.pds_seq_dep = true;
1045       state.usc_seq_dep = false;
1046       state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
1047       state.unified_size = 0;
1048       /* Common store is for shareds -- this will free the partitions. */
1049       state.common_shared = true;
1050       state.common_size = 0;
1051       state.temp_size = 0;
1052 
1053       assert(fence_terminate_program_data_size %
1054                 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1055              0);
1056       state.data_size = fence_terminate_program_data_size /
1057                         PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1058       state.fence = true;
1059    }
1060 
1061    /* CR_CDM_RESUME_... use state load program info. */
1062 
1063    pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
1064                  CR_CDM_CONTEXT_LOAD_PDS0,
1065                  state) {
1066       state.data_addr =
1067          PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset);
1068       state.code_addr =
1069          PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset);
1070    }
1071 
1072    pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
1073                  CR_CDM_CONTEXT_LOAD_PDS0,
1074                  state) {
1075       state.data_addr =
1076          PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset);
1077       state.code_addr =
1078          PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset);
1079    }
1080 }
1081 
pvr_compute_ctx_ws_create_info_init(const struct pvr_compute_ctx * const ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_compute_ctx_create_info * const create_info)1082 static void pvr_compute_ctx_ws_create_info_init(
1083    const struct pvr_compute_ctx *const ctx,
1084    enum pvr_winsys_ctx_priority priority,
1085    struct pvr_winsys_compute_ctx_create_info *const create_info)
1086 {
1087    create_info->priority = priority;
1088 
1089    pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
1090                                         ctx,
1091                                         &create_info->static_state);
1092 }
1093 
pvr_compute_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_compute_ctx ** const ctx_out)1094 VkResult pvr_compute_ctx_create(struct pvr_device *const device,
1095                                 enum pvr_winsys_ctx_priority priority,
1096                                 struct pvr_compute_ctx **const ctx_out)
1097 {
1098    struct pvr_winsys_compute_ctx_create_info create_info;
1099    struct pvr_compute_ctx *ctx;
1100    VkResult result;
1101 
1102    ctx = vk_alloc(&device->vk.alloc,
1103                   sizeof(*ctx),
1104                   8,
1105                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1106    if (!ctx)
1107       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1108 
1109    ctx->device = device;
1110 
1111    result = pvr_bo_alloc(
1112       device,
1113       device->heaps.general_heap,
1114       rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
1115       rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
1116       PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
1117       &ctx->ctx_switch.compute_state_bo);
1118    if (result != VK_SUCCESS)
1119       goto err_free_ctx;
1120 
1121    /* TODO: Change this so that enabling storage to B doesn't change the array
1122     * size. Instead of looping we could unroll this and have the second
1123     * programs setup depending on the B enable. Doing it that way would make
1124     * things more obvious.
1125     */
1126    for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
1127       result = pvr_ctx_sr_programs_setup(device,
1128                                          PVR_CTX_SR_COMPUTE_TARGET,
1129                                          &ctx->ctx_switch.sr[i]);
1130       if (result != VK_SUCCESS) {
1131          for (uint32_t j = 0; j < i; j++)
1132             pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
1133 
1134          goto err_free_state_buffer;
1135       }
1136    }
1137 
1138    result = pvr_pds_sr_fence_terminate_program_create_and_upload(
1139       device,
1140       &ctx->ctx_switch.sr_fence_terminate_program);
1141    if (result != VK_SUCCESS)
1142       goto err_free_sr_programs;
1143 
1144    pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
1145 
1146    result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1147    if (result != VK_SUCCESS)
1148       goto err_free_pds_fence_terminate_program;
1149 
1150    result = device->ws->ops->compute_ctx_create(device->ws,
1151                                                 &create_info,
1152                                                 &ctx->ws_ctx);
1153    if (result != VK_SUCCESS)
1154       goto err_fini_reset_cmd;
1155 
1156    *ctx_out = ctx;
1157 
1158    return VK_SUCCESS;
1159 
1160 err_fini_reset_cmd:
1161    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1162 
1163 err_free_pds_fence_terminate_program:
1164    pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1165 
1166 err_free_sr_programs:
1167    for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1168       pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1169 
1170 err_free_state_buffer:
1171    pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1172 
1173 err_free_ctx:
1174    vk_free(&device->vk.alloc, ctx);
1175 
1176    return result;
1177 }
1178 
pvr_compute_ctx_destroy(struct pvr_compute_ctx * const ctx)1179 void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
1180 {
1181    struct pvr_device *device = ctx->device;
1182 
1183    device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
1184 
1185    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1186 
1187    pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1188    for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1189       pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1190 
1191    pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1192 
1193    vk_free(&device->vk.alloc, ctx);
1194 }
1195 
pvr_transfer_ctx_ws_create_info_init(enum pvr_winsys_ctx_priority priority,struct pvr_winsys_transfer_ctx_create_info * const create_info)1196 static void pvr_transfer_ctx_ws_create_info_init(
1197    enum pvr_winsys_ctx_priority priority,
1198    struct pvr_winsys_transfer_ctx_create_info *const create_info)
1199 {
1200    create_info->priority = priority;
1201 }
1202 
pvr_transfer_ctx_setup_shaders(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1203 static VkResult pvr_transfer_ctx_setup_shaders(struct pvr_device *device,
1204                                                struct pvr_transfer_ctx *ctx)
1205 {
1206    const uint32_t cache_line_size =
1207       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1208    VkResult result;
1209 
1210    /* TODO: Setup USC fragments. */
1211 
1212    /* Setup EOT program. */
1213    result = pvr_gpu_upload_usc(device,
1214                                pvr_transfer_eot_usc_code,
1215                                sizeof(pvr_transfer_eot_usc_code),
1216                                cache_line_size,
1217                                &ctx->usc_eot_bo);
1218    if (result != VK_SUCCESS)
1219       return result;
1220 
1221    STATIC_ASSERT(ARRAY_SIZE(pvr_transfer_eot_usc_offsets) ==
1222                  ARRAY_SIZE(ctx->transfer_mrts));
1223    for (uint32_t i = 0U; i < ARRAY_SIZE(pvr_transfer_eot_usc_offsets); i++) {
1224       ctx->transfer_mrts[i] =
1225          PVR_DEV_ADDR_OFFSET(ctx->usc_eot_bo->vma->dev_addr,
1226                              pvr_transfer_eot_usc_offsets[i]);
1227    }
1228 
1229    return VK_SUCCESS;
1230 }
1231 
pvr_transfer_ctx_fini_shaders(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1232 static void pvr_transfer_ctx_fini_shaders(struct pvr_device *device,
1233                                           struct pvr_transfer_ctx *ctx)
1234 {
1235    pvr_bo_free(device, ctx->usc_eot_bo);
1236 }
1237 
pvr_transfer_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_transfer_ctx ** const ctx_out)1238 VkResult pvr_transfer_ctx_create(struct pvr_device *const device,
1239                                  enum pvr_winsys_ctx_priority priority,
1240                                  struct pvr_transfer_ctx **const ctx_out)
1241 {
1242    struct pvr_winsys_transfer_ctx_create_info create_info;
1243    struct pvr_transfer_ctx *ctx;
1244    VkResult result;
1245 
1246    ctx = vk_zalloc(&device->vk.alloc,
1247                    sizeof(*ctx),
1248                    8U,
1249                    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1250    if (!ctx)
1251       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1252 
1253    ctx->device = device;
1254 
1255    result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1256    if (result != VK_SUCCESS)
1257       goto err_free_ctx;
1258 
1259    pvr_transfer_ctx_ws_create_info_init(priority, &create_info);
1260 
1261    result = device->ws->ops->transfer_ctx_create(device->ws,
1262                                                  &create_info,
1263                                                  &ctx->ws_ctx);
1264    if (result != VK_SUCCESS)
1265       goto err_fini_reset_cmd;
1266 
1267    result = pvr_transfer_ctx_setup_shaders(device, ctx);
1268    if (result != VK_SUCCESS)
1269       goto err_destroy_transfer_ctx;
1270 
1271    /* Create the PDS Uniform/Tex state code segment array. */
1272    for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1273       for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1274          if (i == 0U && j == 0U)
1275             continue;
1276 
1277          result = pvr_pds_unitex_state_program_create_and_upload(
1278             device,
1279             NULL,
1280             i,
1281             j,
1282             &ctx->pds_unitex_code[i][j]);
1283          if (result != VK_SUCCESS) {
1284             goto err_free_pds_unitex_bos;
1285          }
1286       }
1287    }
1288 
1289    *ctx_out = ctx;
1290 
1291    return VK_SUCCESS;
1292 
1293 err_free_pds_unitex_bos:
1294    for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1295       for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1296          if (!ctx->pds_unitex_code[i][j].pvr_bo)
1297             continue;
1298 
1299          pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1300       }
1301    }
1302 
1303    pvr_transfer_ctx_fini_shaders(device, ctx);
1304 
1305 err_destroy_transfer_ctx:
1306    device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1307 
1308 err_fini_reset_cmd:
1309    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1310 
1311 err_free_ctx:
1312    vk_free(&device->vk.alloc, ctx);
1313 
1314    return result;
1315 }
1316 
pvr_transfer_ctx_destroy(struct pvr_transfer_ctx * const ctx)1317 void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx)
1318 {
1319    struct pvr_device *device = ctx->device;
1320 
1321    for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1322       for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1323          if (!ctx->pds_unitex_code[i][j].pvr_bo)
1324             continue;
1325 
1326          pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1327       }
1328    }
1329 
1330    pvr_transfer_ctx_fini_shaders(device, ctx);
1331    device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1332    pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1333    vk_free(&device->vk.alloc, ctx);
1334 }
1335