/* * Copyright © 2022 Imagination Technologies Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "fw-api/pvr_rogue_fwif.h" #include "fw-api/pvr_rogue_fwif_rf.h" #include "pvr_csb.h" #include "pvr_job_render.h" #include "pvr_private.h" #include "pvr_srv.h" #include "pvr_srv_bo.h" #include "pvr_srv_bridge.h" #include "pvr_srv_job_common.h" #include "pvr_srv_job_render.h" #include "pvr_srv_sync.h" #include "pvr_srv_sync_prim.h" #include "pvr_types.h" #include "pvr_winsys.h" #include "util/compiler.h" #include "util/log.h" #include "util/macros.h" #include "util/u_math.h" #include "vk_alloc.h" #include "vk_log.h" #include "vk_util.h" struct pvr_srv_winsys_free_list { struct pvr_winsys_free_list base; void *handle; struct pvr_srv_winsys_free_list *parent; }; #define to_pvr_srv_winsys_free_list(free_list) \ container_of(free_list, struct pvr_srv_winsys_free_list, base) struct pvr_srv_winsys_rt_dataset { struct pvr_winsys_rt_dataset base; struct { void *handle; struct pvr_srv_sync_prim *sync_prim; } rt_datas[ROGUE_FWIF_NUM_RTDATAS]; }; #define to_pvr_srv_winsys_rt_dataset(rt_dataset) \ container_of(rt_dataset, struct pvr_srv_winsys_rt_dataset, base) struct pvr_srv_winsys_render_ctx { struct pvr_winsys_render_ctx base; /* Handle to kernel context. */ void *handle; int timeline_geom; int timeline_frag; }; #define to_pvr_srv_winsys_render_ctx(ctx) \ container_of(ctx, struct pvr_srv_winsys_render_ctx, base) VkResult pvr_srv_winsys_free_list_create( struct pvr_winsys *ws, struct pvr_winsys_vma *free_list_vma, uint32_t initial_num_pages, uint32_t max_num_pages, uint32_t grow_num_pages, uint32_t grow_threshold, struct pvr_winsys_free_list *parent_free_list, struct pvr_winsys_free_list **const free_list_out) { struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); struct pvr_srv_winsys_bo *srv_free_list_bo = to_pvr_srv_winsys_bo(free_list_vma->bo); struct pvr_srv_winsys_free_list *srv_free_list; void *parent_handle; VkResult result; srv_free_list = vk_zalloc(ws->alloc, sizeof(*srv_free_list), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!srv_free_list) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); if (parent_free_list) { srv_free_list->parent = to_pvr_srv_winsys_free_list(parent_free_list); parent_handle = srv_free_list->parent->handle; } else { srv_free_list->parent = NULL; parent_handle = NULL; } result = pvr_srv_rgx_create_free_list(ws->render_fd, srv_ws->server_memctx_data, max_num_pages, initial_num_pages, grow_num_pages, grow_threshold, parent_handle, #if MESA_DEBUG PVR_SRV_TRUE /* free_list_check */, #else PVR_SRV_FALSE /* free_list_check */, #endif free_list_vma->dev_addr, srv_free_list_bo->pmr, 0 /* pmr_offset */, &srv_free_list->handle); if (result != VK_SUCCESS) goto err_vk_free_srv_free_list; srv_free_list->base.ws = ws; *free_list_out = &srv_free_list->base; return VK_SUCCESS; err_vk_free_srv_free_list: vk_free(ws->alloc, srv_free_list); return result; } void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list) { struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(free_list->ws); struct pvr_srv_winsys_free_list *srv_free_list = to_pvr_srv_winsys_free_list(free_list); pvr_srv_rgx_destroy_free_list(srv_ws->base.render_fd, srv_free_list->handle); vk_free(srv_ws->base.alloc, srv_free_list); } static uint64_t pvr_rogue_get_cr_multisamplectl_val(uint32_t samples, bool y_flip) { static const struct { uint8_t x[8]; uint8_t y[8]; } sample_positions[4] = { /* 1 sample */ { .x = { 8 }, .y = { 8 }, }, /* 2 samples */ { .x = { 12, 4 }, .y = { 12, 4 }, }, /* 4 samples */ { .x = { 6, 14, 2, 10 }, .y = { 2, 6, 10, 14 }, }, /* 8 samples */ { .x = { 9, 7, 13, 5, 3, 1, 11, 15 }, .y = { 5, 11, 9, 3, 13, 7, 15, 1 }, }, }; uint64_t multisamplectl; uint8_t idx; idx = util_fast_log2(samples); assert(idx < ARRAY_SIZE(sample_positions)); pvr_csb_pack (&multisamplectl, CR_PPP_MULTISAMPLECTL, value) { switch (samples) { case 8: value.msaa_x7 = sample_positions[idx].x[7]; value.msaa_x6 = sample_positions[idx].x[6]; value.msaa_x5 = sample_positions[idx].x[5]; value.msaa_x4 = sample_positions[idx].x[4]; if (y_flip) { value.msaa_y7 = 16U - sample_positions[idx].y[7]; value.msaa_y6 = 16U - sample_positions[idx].y[6]; value.msaa_y5 = 16U - sample_positions[idx].y[5]; value.msaa_y4 = 16U - sample_positions[idx].y[4]; } else { value.msaa_y7 = sample_positions[idx].y[7]; value.msaa_y6 = sample_positions[idx].y[6]; value.msaa_y5 = sample_positions[idx].y[5]; value.msaa_y4 = sample_positions[idx].y[4]; } FALLTHROUGH; case 4: value.msaa_x3 = sample_positions[idx].x[3]; value.msaa_x2 = sample_positions[idx].x[2]; if (y_flip) { value.msaa_y3 = 16U - sample_positions[idx].y[3]; value.msaa_y2 = 16U - sample_positions[idx].y[2]; } else { value.msaa_y3 = sample_positions[idx].y[3]; value.msaa_y2 = sample_positions[idx].y[2]; } FALLTHROUGH; case 2: value.msaa_x1 = sample_positions[idx].x[1]; if (y_flip) { value.msaa_y1 = 16U - sample_positions[idx].y[1]; } else { value.msaa_y1 = sample_positions[idx].y[1]; } FALLTHROUGH; case 1: value.msaa_x0 = sample_positions[idx].x[0]; if (y_flip) { value.msaa_y0 = 16U - sample_positions[idx].y[0]; } else { value.msaa_y0 = sample_positions[idx].y[0]; } break; default: unreachable("Unsupported number of samples"); } } return multisamplectl; } static uint32_t pvr_rogue_get_cr_isp_mtile_size_val(const struct pvr_device_info *dev_info, const struct pvr_rt_mtile_info *mtile_info, uint32_t samples) { uint32_t samples_per_pixel = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0); uint32_t isp_mtile_size; pvr_csb_pack (&isp_mtile_size, CR_ISP_MTILE_SIZE, value) { value.x = mtile_info->mtile_x1; value.y = mtile_info->mtile_y1; if (samples_per_pixel == 1) { if (samples >= 4) value.x <<= 1; if (samples >= 2) value.y <<= 1; } else if (samples_per_pixel == 2) { if (samples >= 8) value.x <<= 1; if (samples >= 4) value.y <<= 1; } else if (samples_per_pixel == 4) { if (samples >= 8) value.y <<= 1; } else { assert(!"Unsupported ISP samples per pixel value"); } } return isp_mtile_size; } static uint32_t pvr_rogue_get_ppp_screen_val(uint32_t width, uint32_t height) { uint32_t val; pvr_csb_pack (&val, CR_PPP_SCREEN, state) { state.pixxmax = width - 1; state.pixymax = height - 1; } return val; } struct pvr_rogue_cr_te { uint32_t aa; uint32_t mtile1; uint32_t mtile2; uint32_t screen; uint32_t mtile_stride; }; static void pvr_rogue_ct_te_init(const struct pvr_device_info *dev_info, const struct pvr_rt_mtile_info *mtile_info, uint32_t samples, struct pvr_rogue_cr_te *const te_regs) { uint32_t samples_per_pixel = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0); pvr_csb_pack (&te_regs->aa, CR_TE_AA, value) { if (samples_per_pixel == 1) { if (samples >= 2) value.y = true; if (samples >= 4) value.x = true; } else if (samples_per_pixel == 2) { if (samples >= 2) value.x2 = true; if (samples >= 4) value.y = true; if (samples >= 8) value.x = true; } else if (samples_per_pixel == 4) { if (samples >= 2) value.x2 = true; if (samples >= 4) value.y2 = true; if (samples >= 8) value.y = true; } else { assert(!"Unsupported ISP samples per pixel value"); } } pvr_csb_pack (&te_regs->mtile1, CR_TE_MTILE1, value) { value.x1 = mtile_info->mtile_x1; if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { value.x2 = mtile_info->mtile_x2; value.x3 = mtile_info->mtile_x3; } } pvr_csb_pack (&te_regs->mtile2, CR_TE_MTILE2, value) { value.y1 = mtile_info->mtile_y1; if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { value.y2 = mtile_info->mtile_y2; value.y3 = mtile_info->mtile_y3; } } pvr_csb_pack (&te_regs->screen, CR_TE_SCREEN, value) { value.xmax = mtile_info->x_tile_max; value.ymax = mtile_info->y_tile_max; } te_regs->mtile_stride = mtile_info->mtile_x1 * mtile_info->mtile_y1; } VkResult pvr_srv_render_target_dataset_create( struct pvr_winsys *ws, const struct pvr_winsys_rt_dataset_create_info *create_info, const struct pvr_device_info *dev_info, struct pvr_winsys_rt_dataset **const rt_dataset_out) { const pvr_dev_addr_t macrotile_addrs[ROGUE_FWIF_NUM_RTDATAS] = { [0] = create_info->rt_datas[0].macrotile_array_dev_addr, [1] = create_info->rt_datas[1].macrotile_array_dev_addr, }; const pvr_dev_addr_t pm_mlist_addrs[ROGUE_FWIF_NUM_RTDATAS] = { [0] = create_info->rt_datas[0].pm_mlist_dev_addr, [1] = create_info->rt_datas[1].pm_mlist_dev_addr, }; const pvr_dev_addr_t rgn_header_addrs[ROGUE_FWIF_NUM_RTDATAS] = { [0] = create_info->rt_datas[0].rgn_header_dev_addr, [1] = create_info->rt_datas[1].rgn_header_dev_addr, }; struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); struct pvr_srv_winsys_free_list *srv_local_free_list = to_pvr_srv_winsys_free_list(create_info->local_free_list); void *free_lists[ROGUE_FW_MAX_FREELISTS] = { NULL }; struct pvr_srv_winsys_rt_dataset *srv_rt_dataset; void *handles[ROGUE_FWIF_NUM_RTDATAS]; struct pvr_rogue_cr_te rogue_te_regs; struct pvr_rt_mtile_info mtile_info; uint32_t isp_mtile_size; VkResult result; free_lists[ROGUE_FW_LOCAL_FREELIST] = srv_local_free_list->handle; if (srv_local_free_list->parent) { free_lists[ROGUE_FW_GLOBAL_FREELIST] = srv_local_free_list->parent->handle; } srv_rt_dataset = vk_zalloc(ws->alloc, sizeof(*srv_rt_dataset), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!srv_rt_dataset) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); /* If greater than 1 we'll have to pass in an array. For now just passing in * the reference. */ STATIC_ASSERT(ROGUE_FWIF_NUM_GEOMDATAS == 1); /* If not 2 the arrays used in the bridge call will require updating. */ STATIC_ASSERT(ROGUE_FWIF_NUM_RTDATAS == 2); pvr_rt_mtile_info_init(dev_info, &mtile_info, create_info->width, create_info->height, create_info->samples); isp_mtile_size = pvr_rogue_get_cr_isp_mtile_size_val(dev_info, &mtile_info, create_info->samples); pvr_rogue_ct_te_init(dev_info, &mtile_info, create_info->samples, &rogue_te_regs); result = pvr_srv_rgx_create_hwrt_dataset( ws->render_fd, pvr_rogue_get_cr_multisamplectl_val(create_info->samples, true), pvr_rogue_get_cr_multisamplectl_val(create_info->samples, false), macrotile_addrs, pm_mlist_addrs, &create_info->rtc_dev_addr, rgn_header_addrs, &create_info->tpc_dev_addr, &create_info->vheap_table_dev_addr, free_lists, create_info->isp_merge_lower_x, create_info->isp_merge_lower_y, create_info->isp_merge_scale_x, create_info->isp_merge_scale_y, create_info->isp_merge_upper_x, create_info->isp_merge_upper_y, isp_mtile_size, rogue_te_regs.mtile_stride, pvr_rogue_get_ppp_screen_val(create_info->width, create_info->height), create_info->rgn_header_size, rogue_te_regs.aa, rogue_te_regs.mtile1, rogue_te_regs.mtile2, rogue_te_regs.screen, create_info->tpc_size, create_info->tpc_stride, create_info->layers, handles); if (result != VK_SUCCESS) goto err_vk_free_srv_rt_dataset; srv_rt_dataset->rt_datas[0].handle = handles[0]; srv_rt_dataset->rt_datas[1].handle = handles[1]; for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { srv_rt_dataset->rt_datas[i].sync_prim = pvr_srv_sync_prim_alloc(srv_ws); if (!srv_rt_dataset->rt_datas[i].sync_prim) goto err_srv_sync_prim_free; } srv_rt_dataset->base.ws = ws; *rt_dataset_out = &srv_rt_dataset->base; return VK_SUCCESS; err_srv_sync_prim_free: for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { pvr_srv_sync_prim_free(srv_ws, srv_rt_dataset->rt_datas[i].sync_prim); if (srv_rt_dataset->rt_datas[i].handle) { pvr_srv_rgx_destroy_hwrt_dataset(ws->render_fd, srv_rt_dataset->rt_datas[i].handle); } } err_vk_free_srv_rt_dataset: vk_free(ws->alloc, srv_rt_dataset); return result; } void pvr_srv_render_target_dataset_destroy( struct pvr_winsys_rt_dataset *rt_dataset) { struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(rt_dataset->ws); struct pvr_srv_winsys_rt_dataset *srv_rt_dataset = to_pvr_srv_winsys_rt_dataset(rt_dataset); for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) { pvr_srv_sync_prim_free(srv_ws, srv_rt_dataset->rt_datas[i].sync_prim); if (srv_rt_dataset->rt_datas[i].handle) { pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->base.render_fd, srv_rt_dataset->rt_datas[i].handle); } } vk_free(srv_ws->base.alloc, srv_rt_dataset); } static void pvr_srv_render_ctx_fw_static_state_init( struct pvr_winsys_render_ctx_create_info *create_info, struct rogue_fwif_static_rendercontext_state *static_state) { struct pvr_winsys_render_ctx_static_state *ws_static_state = &create_info->static_state; struct rogue_fwif_ta_regs_cswitch *regs = &static_state->ctx_switch_geom_regs[0]; memset(static_state, 0, sizeof(*static_state)); regs->vdm_context_state_base_addr = ws_static_state->vdm_ctx_state_base_addr; regs->ta_context_state_base_addr = ws_static_state->geom_ctx_state_base_addr; STATIC_ASSERT(ARRAY_SIZE(regs->ta_state) == ARRAY_SIZE(ws_static_state->geom_state)); for (uint32_t i = 0; i < ARRAY_SIZE(ws_static_state->geom_state); i++) { regs->ta_state[i].vdm_context_store_task0 = ws_static_state->geom_state[i].vdm_ctx_store_task0; regs->ta_state[i].vdm_context_store_task1 = ws_static_state->geom_state[i].vdm_ctx_store_task1; regs->ta_state[i].vdm_context_store_task2 = ws_static_state->geom_state[i].vdm_ctx_store_task2; regs->ta_state[i].vdm_context_resume_task0 = ws_static_state->geom_state[i].vdm_ctx_resume_task0; regs->ta_state[i].vdm_context_resume_task1 = ws_static_state->geom_state[i].vdm_ctx_resume_task1; regs->ta_state[i].vdm_context_resume_task2 = ws_static_state->geom_state[i].vdm_ctx_resume_task2; } } VkResult pvr_srv_winsys_render_ctx_create( struct pvr_winsys *ws, struct pvr_winsys_render_ctx_create_info *create_info, struct pvr_winsys_render_ctx **const ctx_out) { struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); struct rogue_fwif_rf_cmd reset_cmd = { 0 }; struct rogue_fwif_static_rendercontext_state static_state; struct pvr_srv_winsys_render_ctx *srv_ctx; const uint32_t call_stack_depth = 1U; VkResult result; srv_ctx = vk_zalloc(ws->alloc, sizeof(*srv_ctx), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!srv_ctx) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); result = pvr_srv_create_timeline(ws->render_fd, &srv_ctx->timeline_geom); if (result != VK_SUCCESS) goto err_free_srv_ctx; result = pvr_srv_create_timeline(ws->render_fd, &srv_ctx->timeline_frag); if (result != VK_SUCCESS) goto err_close_timeline_geom; pvr_srv_render_ctx_fw_static_state_init(create_info, &static_state); /* TODO: Add support for reset framework. Currently we subtract * reset_cmd.regs size from reset_cmd size to only pass empty flags field. */ result = pvr_srv_rgx_create_render_context( ws->render_fd, pvr_srv_from_winsys_priority(create_info->priority), create_info->vdm_callstack_addr, call_stack_depth, sizeof(reset_cmd) - sizeof(reset_cmd.regs), (uint8_t *)&reset_cmd, srv_ws->server_memctx_data, sizeof(static_state), (uint8_t *)&static_state, 0, RGX_CONTEXT_FLAG_DISABLESLR, 0, UINT_MAX, UINT_MAX, &srv_ctx->handle); if (result != VK_SUCCESS) goto err_close_timeline_frag; srv_ctx->base.ws = ws; *ctx_out = &srv_ctx->base; return VK_SUCCESS; err_close_timeline_frag: close(srv_ctx->timeline_frag); err_close_timeline_geom: close(srv_ctx->timeline_geom); err_free_srv_ctx: vk_free(ws->alloc, srv_ctx); return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); } void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx) { struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); struct pvr_srv_winsys_render_ctx *srv_ctx = to_pvr_srv_winsys_render_ctx(ctx); pvr_srv_rgx_destroy_render_context(srv_ws->base.render_fd, srv_ctx->handle); close(srv_ctx->timeline_frag); close(srv_ctx->timeline_geom); vk_free(srv_ws->base.alloc, srv_ctx); } static uint32_t pvr_srv_geometry_cmd_stream_load(struct rogue_fwif_cmd_ta *const cmd, const uint8_t *const stream, const uint32_t stream_len, const struct pvr_device_info *const dev_info) { const uint32_t *stream_ptr = (const uint32_t *)stream; struct rogue_fwif_ta_regs *const regs = &cmd->regs; uint32_t main_stream_len = pvr_csb_unpack((const uint64_t *)stream_ptr, KMD_STREAM_HDR).length; stream_ptr += pvr_cmd_length(KMD_STREAM_HDR); regs->vdm_ctrl_stream_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE); regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM); regs->ppp_ctrl = *stream_ptr; stream_ptr += pvr_cmd_length(CR_PPP_CTRL); regs->te_psg = *stream_ptr; stream_ptr += pvr_cmd_length(CR_TE_PSG); regs->vdm_context_resume_task0_size = *stream_ptr; stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0); regs->view_idx = *stream_ptr; stream_ptr++; assert((const uint8_t *)stream_ptr - stream <= stream_len); assert((const uint8_t *)stream_ptr - stream == main_stream_len); return main_stream_len; } static void pvr_srv_geometry_cmd_ext_stream_load( struct rogue_fwif_cmd_ta *const cmd, const uint8_t *const stream, const uint32_t stream_len, const uint32_t ext_stream_offset, const struct pvr_device_info *const dev_info) { const uint32_t *ext_stream_ptr = (const uint32_t *)((uint8_t *)stream + ext_stream_offset); struct rogue_fwif_ta_regs *const regs = &cmd->regs; struct ROGUE_KMD_STREAM_EXTHDR_GEOM0 header0; header0 = pvr_csb_unpack(ext_stream_ptr, KMD_STREAM_EXTHDR_GEOM0); ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_GEOM0); assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927); if (header0.has_brn49927) { regs->tpu = *ext_stream_ptr; ext_stream_ptr += pvr_cmd_length(CR_TPU); } assert((const uint8_t *)ext_stream_ptr - stream == stream_len); } static void pvr_srv_geometry_cmd_init( const struct pvr_winsys_render_submit_info *submit_info, const struct pvr_srv_sync_prim *sync_prim, struct rogue_fwif_cmd_ta *cmd, const struct pvr_device_info *const dev_info) { const struct pvr_winsys_geometry_state *state = &submit_info->geometry; uint32_t ext_stream_offset; memset(cmd, 0, sizeof(*cmd)); cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; ext_stream_offset = pvr_srv_geometry_cmd_stream_load(cmd, state->fw_stream, state->fw_stream_len, dev_info); if (ext_stream_offset < state->fw_stream_len) { pvr_srv_geometry_cmd_ext_stream_load(cmd, state->fw_stream, state->fw_stream_len, ext_stream_offset, dev_info); } if (state->flags.is_first_geometry) cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK; if (state->flags.is_last_geometry) cmd->flags |= ROGUE_FWIF_TAFLAGS_LASTKICK; if (state->flags.use_single_core) cmd->flags |= ROGUE_FWIF_TAFLAGS_SINGLE_CORE; cmd->partial_render_ta_3d_fence.ufo_addr.addr = pvr_srv_sync_prim_get_fw_addr(sync_prim); cmd->partial_render_ta_3d_fence.value = sync_prim->value; } static uint32_t pvr_srv_fragment_cmd_stream_load(struct rogue_fwif_cmd_3d *const cmd, const uint8_t *const stream, const uint32_t stream_len, const struct pvr_device_info *const dev_info) { const uint32_t *stream_ptr = (const uint32_t *)stream; struct rogue_fwif_3d_regs *const regs = &cmd->regs; uint32_t main_stream_len = pvr_csb_unpack((const uint64_t *)stream_ptr, KMD_STREAM_HDR).length; stream_ptr += pvr_cmd_length(KMD_STREAM_HDR); regs->isp_scissor_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE); regs->isp_dbias_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE); regs->isp_oclqry_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE); regs->isp_zlsctl = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL); regs->isp_zload_store_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE); regs->isp_stencil_load_store_base = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE); if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup)) { regs->fb_cdc_zls = *(const uint64_t *)stream_ptr; stream_ptr += 2U; } STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word) == 8U); STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word[0]) == 3U); STATIC_ASSERT(sizeof(regs->pbe_word[0][0]) == sizeof(uint64_t)); memcpy(regs->pbe_word, stream_ptr, sizeof(regs->pbe_word)); stream_ptr += 8U * 3U * 2U; regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr; stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM); STATIC_ASSERT(ARRAY_SIZE(regs->pds_bgnd) == 3U); STATIC_ASSERT(sizeof(regs->pds_bgnd[0]) == sizeof(uint64_t)); memcpy(regs->pds_bgnd, stream_ptr, sizeof(regs->pds_bgnd)); stream_ptr += 3U * 2U; STATIC_ASSERT(ARRAY_SIZE(regs->pds_pr_bgnd) == 3U); STATIC_ASSERT(sizeof(regs->pds_pr_bgnd[0]) == sizeof(uint64_t)); memcpy(regs->pds_pr_bgnd, stream_ptr, sizeof(regs->pds_pr_bgnd)); stream_ptr += 3U * 2U; STATIC_ASSERT(ARRAY_SIZE(regs->usc_clear_register) == 8U); STATIC_ASSERT(sizeof(regs->usc_clear_register[0]) == sizeof(uint32_t)); memcpy(regs->usc_clear_register, stream_ptr, sizeof(regs->usc_clear_register)); stream_ptr += 8U; regs->usc_pixel_output_ctrl = *stream_ptr; stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL); regs->isp_bgobjdepth = *stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH); regs->isp_bgobjvals = *stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS); regs->isp_aa = *stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_AA); regs->isp_ctl = *stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_CTL); regs->event_pixel_pds_info = *stream_ptr; stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO); if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { regs->pixel_phantom = *stream_ptr; stream_ptr++; } regs->view_idx = *stream_ptr; stream_ptr++; regs->event_pixel_pds_data = *stream_ptr; stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA); if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { regs->isp_oclqry_stride = *stream_ptr; stream_ptr++; } if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { regs->isp_zls_pixels = *stream_ptr; stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS); } cmd->zls_stride = *stream_ptr; stream_ptr++; cmd->sls_stride = *stream_ptr; stream_ptr++; if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { cmd->execute_count = *stream_ptr; stream_ptr++; } assert((const uint8_t *)stream_ptr - stream <= stream_len); assert((const uint8_t *)stream_ptr - stream == main_stream_len); return main_stream_len; } static void pvr_srv_fragment_cmd_ext_stream_load( struct rogue_fwif_cmd_3d *const cmd, const uint8_t *const stream, const uint32_t stream_len, const uint32_t ext_stream_offset, const struct pvr_device_info *const dev_info) { const uint32_t *ext_stream_ptr = (const uint32_t *)((uint8_t *)stream + ext_stream_offset); struct rogue_fwif_3d_regs *const regs = &cmd->regs; struct ROGUE_KMD_STREAM_EXTHDR_FRAG0 header0; header0 = pvr_csb_unpack(ext_stream_ptr, KMD_STREAM_EXTHDR_FRAG0); ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_FRAG0); assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927); if (header0.has_brn49927) { regs->tpu = *ext_stream_ptr; ext_stream_ptr += pvr_cmd_length(CR_TPU); } assert((const uint8_t *)ext_stream_ptr - stream == stream_len); } static void pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd, const struct pvr_winsys_fragment_state *state, const struct pvr_device_info *dev_info, uint32_t frame_num) { uint32_t ext_stream_offset; memset(cmd, 0, sizeof(*cmd)); cmd->cmd_shared.cmn.frame_num = frame_num; ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd, state->fw_stream, state->fw_stream_len, dev_info); if (ext_stream_offset < state->fw_stream_len) { pvr_srv_fragment_cmd_ext_stream_load(cmd, state->fw_stream, state->fw_stream_len, ext_stream_offset, dev_info); } if (state->flags.has_depth_buffer) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER; if (state->flags.has_stencil_buffer) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER; if (state->flags.prevent_cdm_overlap) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP; if (state->flags.use_single_core) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE; if (state->flags.get_vis_results) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_GETVISRESULTS; if (state->flags.has_spm_scratch_buffer) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SPMSCRATCHBUFFER; if (state->flags.disable_pixel_merging) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DISABLE_PIXELMERGE; } VkResult pvr_srv_winsys_render_submit( const struct pvr_winsys_render_ctx *ctx, const struct pvr_winsys_render_submit_info *submit_info, const struct pvr_device_info *dev_info, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag) { const struct pvr_srv_winsys_rt_dataset *srv_rt_dataset = to_pvr_srv_winsys_rt_dataset(submit_info->rt_dataset); struct pvr_srv_sync_prim *sync_prim = srv_rt_dataset->rt_datas[submit_info->rt_data_idx].sync_prim; void *rt_data_handle = srv_rt_dataset->rt_datas[submit_info->rt_data_idx].handle; const struct pvr_srv_winsys_render_ctx *srv_ctx = to_pvr_srv_winsys_render_ctx(ctx); const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws); struct pvr_srv_sync *srv_signal_sync_geom; struct pvr_srv_sync *srv_signal_sync_frag; struct rogue_fwif_cmd_ta geom_cmd; struct rogue_fwif_cmd_3d frag_cmd = { 0 }; struct rogue_fwif_cmd_3d pr_cmd = { 0 }; uint8_t *frag_cmd_ptr = NULL; uint32_t frag_cmd_size = 0; uint32_t current_sync_value = sync_prim->value; uint32_t geom_sync_update_value; uint32_t frag_to_geom_fence_count = 0; uint32_t frag_to_geom_fence_value; uint32_t frag_sync_update_count = 0; uint32_t frag_sync_update_value; int in_frag_fd = -1; int in_geom_fd = -1; int fence_frag; int fence_geom; VkResult result; pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info); pvr_srv_fragment_cmd_init(&pr_cmd, &submit_info->fragment_pr, dev_info, submit_info->frame_num); if (submit_info->has_fragment_job) { pvr_srv_fragment_cmd_init(&frag_cmd, &submit_info->fragment, dev_info, submit_info->frame_num); frag_cmd_ptr = (uint8_t *)&frag_cmd; frag_cmd_size = sizeof(frag_cmd); } if (submit_info->geometry.wait) { struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->geometry.wait); if (srv_wait_sync->fd >= 0) { in_geom_fd = dup(srv_wait_sync->fd); if (in_geom_fd == -1) { return vk_errorf(NULL, VK_ERROR_OUT_OF_HOST_MEMORY, "dup called on wait sync failed, Errno: %s", strerror(errno)); } } } if (submit_info->fragment.wait) { struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->fragment.wait); if (srv_wait_sync->fd >= 0) { in_frag_fd = dup(srv_wait_sync->fd); if (in_frag_fd == -1) { return vk_errorf(NULL, VK_ERROR_OUT_OF_HOST_MEMORY, "dup called on wait sync failed, Errno: %s", strerror(errno)); } } } if (submit_info->geometry.flags.is_first_geometry) { frag_to_geom_fence_count = 1; frag_to_geom_fence_value = current_sync_value; } /* Geometery is always kicked */ geom_sync_update_value = ++current_sync_value; if (submit_info->has_fragment_job) { frag_sync_update_count = 1; frag_sync_update_value = ++current_sync_value; } do { /* The fw allows the ZS and MSAA scratch buffers to be lazily allocated in * which case we need to provide a status update (i.e. if they are * physically backed or not) to the fw. In our case they will always be * physically backed so no need to inform the fw about their status and * pass in anything. We'll just pass in NULL. */ result = pvr_srv_rgx_kick_render2(srv_ws->base.render_fd, srv_ctx->handle, frag_to_geom_fence_count, &sync_prim->ctx->block_handle, &sync_prim->offset, &frag_to_geom_fence_value, 1, &sync_prim->ctx->block_handle, &sync_prim->offset, &geom_sync_update_value, frag_sync_update_count, &sync_prim->ctx->block_handle, &sync_prim->offset, &frag_sync_update_value, sync_prim->ctx->block_handle, sync_prim->offset, geom_sync_update_value, in_geom_fd, srv_ctx->timeline_geom, &fence_geom, "GEOM", in_frag_fd, srv_ctx->timeline_frag, &fence_frag, "FRAG", sizeof(geom_cmd), (uint8_t *)&geom_cmd, sizeof(pr_cmd), (uint8_t *)&pr_cmd, frag_cmd_size, frag_cmd_ptr, submit_info->job_num, /* Always kick the TA. */ true, /* Always kick a PR. */ true, submit_info->has_fragment_job, false, 0, rt_data_handle, NULL, NULL, 0, NULL, NULL, 0, 0, 0, 0, 0); } while (result == VK_NOT_READY); if (result != VK_SUCCESS) goto end_close_in_fds; /* The job submission was succesful, update the sync prim value. */ sync_prim->value = current_sync_value; if (signal_sync_geom) { srv_signal_sync_geom = to_srv_sync(signal_sync_geom); pvr_srv_set_sync_payload(srv_signal_sync_geom, fence_geom); } else if (fence_geom != -1) { close(fence_geom); } if (signal_sync_frag) { srv_signal_sync_frag = to_srv_sync(signal_sync_frag); pvr_srv_set_sync_payload(srv_signal_sync_frag, fence_frag); } else if (fence_frag != -1) { close(fence_frag); } end_close_in_fds: if (in_geom_fd >= 0) close(in_geom_fd); if (in_frag_fd >= 0) close(in_frag_fd); return result; }