/* * Copyright © 2022 Imagination Technologies Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include "hwdef/rogue_hw_defs.h" #include "hwdef/rogue_hw_utils.h" #include "pvr_device_info.h" #include "pvr_job_common.h" #include "pvr_private.h" #include "util/macros.h" #include "util/u_math.h" #include "vk_alloc.h" #include "vk_format.h" #include "vk_object.h" /* clang-format off */ static enum PVRX(PBESTATE_SWIZ) pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz) /* clang-format on */ { switch (swz) { case PIPE_SWIZZLE_0: return ROGUE_PBESTATE_SWIZ_ZERO; case PIPE_SWIZZLE_1: return ROGUE_PBESTATE_SWIZ_ONE; case PIPE_SWIZZLE_X: return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0; case PIPE_SWIZZLE_Y: return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1; case PIPE_SWIZZLE_Z: return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2; case PIPE_SWIZZLE_W: return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3; case PIPE_SWIZZLE_NONE: if (comp == VK_COMPONENT_SWIZZLE_A) return ROGUE_PBESTATE_SWIZ_ONE; else return ROGUE_PBESTATE_SWIZ_ZERO; default: unreachable("Unknown enum pipe_swizzle"); }; } void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format, enum pvr_pbe_gamma default_gamma, bool with_packed_usc_channel, uint32_t *const src_format_out, enum pvr_pbe_gamma *const gamma_out) { uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0); *gamma_out = default_gamma; if (vk_format_has_32bit_component(vk_format) || vk_format_is_int(vk_format)) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); } else if (vk_format_is_float(vk_format)) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); } else if (vk_format_is_srgb(vk_format)) { *gamma_out = PVR_PBE_GAMMA_ENABLED; /* F16 source for gamma'd formats. */ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); } else if (vk_format_has_depth(vk_format) && vk_format_get_component_bits(vk_format, UTIL_FORMAT_COLORSPACE_ZS, 0) > 16) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); } else if (vk_format_has_stencil(vk_format) && vk_format_get_component_bits(vk_format, UTIL_FORMAT_COLORSPACE_ZS, 1) > 0) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); } else if (chan_0_width > 16) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); } else if (chan_0_width > 8) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); } else if (!with_packed_usc_channel) { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); } else { *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); } } static void pvr_pbe_get_src_pos(const struct pvr_device_info *dev_info, enum pvr_pbe_source_start_pos source_start, uint32_t *const src_pos_out, bool *const src_pos_offset_128_out) { *src_pos_offset_128_out = false; switch (source_start) { case PVR_PBE_STARTPOS_BIT32: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); break; case PVR_PBE_STARTPOS_BIT64: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); break; case PVR_PBE_STARTPOS_BIT96: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); break; case PVR_PBE_STARTPOS_BIT0: default: if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) { switch (source_start) { case PVR_PBE_STARTPOS_BIT128: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); *src_pos_offset_128_out = true; break; case PVR_PBE_STARTPOS_BIT160: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); *src_pos_offset_128_out = true; break; case PVR_PBE_STARTPOS_BIT192: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); *src_pos_offset_128_out = true; break; case PVR_PBE_STARTPOS_BIT224: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); *src_pos_offset_128_out = true; break; default: *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); break; } } else { *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); } break; } } void pvr_pbe_pack_state( const struct pvr_device_info *dev_info, const struct pvr_pbe_surf_params *surface_params, const struct pvr_pbe_render_params *render_params, uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) { /* This function needs updating if the value of * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected * value. */ STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2); /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS * changes, so check that it's the expected value. */ STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3); pbe_reg_words[2] = 0; if (surface_params->z_only_render) { pbe_cs_words[0] = 0; pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { state.emptytile = true; } pbe_reg_words[0] = 0; pbe_reg_words[1] = 0; return; } pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) { state.address_low = surface_params->addr; } pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { state.address_high = surface_params->addr; state.source_format = surface_params->source_format; pvr_pbe_get_src_pos(dev_info, render_params->source_start, &state.source_pos, &state.source_pos_offset_128); /* MRT index (Use 0 for a single render target)/ */ state.mrt_index = render_params->mrt_index; /* Normalization flag based on output format. */ state.norm = surface_params->is_normalized; state.packmode = surface_params->pbe_packmode; } pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) { reg.tilerelative = true; switch (surface_params->mem_layout) { case PVR_MEMLAYOUT_TWIDDLED: reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D); break; case PVR_MEMLAYOUT_3DTWIDDLED: reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D); break; case PVR_MEMLAYOUT_LINEAR: default: reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR); break; } /* FIXME: Remove rotation and y_flip hardcoding if needed. */ reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG); reg.y_flip = false; /* Note: Due to gamma being overridden above, anything other than * ENABLED/NONE is ignored. */ if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) { reg.gamma = true; if (surface_params->nr_components == 2) reg.twocomp_gamma = PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS); } reg.linestride = (surface_params->stride - 1) / PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE); reg.minclip_x = render_params->min_x_clip; reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R, surface_params->swizzle[0]); reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G, surface_params->swizzle[1]); reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B, surface_params->swizzle[2]); reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A, surface_params->swizzle[3]); if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) reg.size_z = util_logbase2_ceil(surface_params->depth); reg.downscale = surface_params->down_scale; } pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) { if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED || surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) { reg.size_x = util_logbase2_ceil(surface_params->width); reg.size_y = util_logbase2_ceil(surface_params->height); } reg.minclip_y = render_params->min_y_clip; reg.maxclip_x = render_params->max_x_clip; reg.zslice = render_params->slice; reg.maxclip_y = render_params->max_y_clip; } } /* TODO: Split this into smaller functions to make it easier to follow. When * doing this, it would be nice to have a function that returns * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in * pvr_render_job_ws_fragment_state_init(). */ void pvr_setup_tiles_in_flight( const struct pvr_device_info *dev_info, const struct pvr_device_runtime_info *dev_runtime_info, uint32_t msaa_mode, uint32_t pixel_width, bool paired_tiles, uint32_t max_tiles_in_flight, uint32_t *const isp_ctl_out, uint32_t *const pixel_ctl_out) { uint32_t total_tiles_in_flight = 0; uint32_t usable_partition_size; uint32_t partitions_available; uint32_t usc_min_output_regs; uint32_t max_partitions; uint32_t partition_size; uint32_t max_phantoms; uint32_t tile_size_x; uint32_t tile_size_y; uint32_t isp_samples; /* Round up the pixel width to the next allocation granularity. */ usc_min_output_regs = PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0); pixel_width = MAX2(pixel_width, usc_min_output_regs); pixel_width = util_next_power_of_two(pixel_width); assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info)); partition_size = pixel_width; isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1); if (isp_samples == 2) { if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE)) partition_size *= 2U; } else if (isp_samples == 4) { if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) || msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X)) partition_size *= 4U; else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X)) partition_size *= 2U; } /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the * partition size is the same as for a 32x32 tile quadrant (with no MSAA). * When MSAA is enabled, the USC has to process half the tile (16x8 pixels). */ tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); /* We only support square tiles. */ assert(tile_size_x == tile_size_y); if (tile_size_x == 16U) { /* Cores with 16x16 tiles does not use tile quadrants. */ partition_size *= tile_size_x * tile_size_y; } else { /* Size of a tile quadrant (in dwords). */ partition_size *= (tile_size_x * tile_size_y / 4U); } /* Maximum available partition space for partitions of this size. */ max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size, partition_size * max_partitions); if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) < (1024 * 4 * 4)) { /* Do not apply the limit for cores with 16x16 tile size (no quadrant * affinity). */ if (tile_size_x != 16) { /* This is to counter the extremely limited CS size on some cores. */ /* Available partition space is limited to 8 tile quadrants. */ usable_partition_size = MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size); } } /* Ensure that maximum number of partitions in use is not greater * than the total number of partitions available. */ partitions_available = MIN2(max_partitions, usable_partition_size / partition_size); if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) max_phantoms = dev_runtime_info->num_phantoms; else if (PVR_HAS_FEATURE(dev_info, roguexe)) max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0); else max_phantoms = 1; for (uint32_t i = 0; i < max_phantoms; i++) { uint32_t usc_tiles_in_flight = partitions_available; uint32_t isp_tiles_in_flight; /* Cores with tiles size other than 16x16 use tile quadrants. */ if (tile_size_x != 16) { uint32_t num_clusters = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U); usc_tiles_in_flight = (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U; } assert(usc_tiles_in_flight > 0); isp_tiles_in_flight = PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); /* Ensure that maximum number of ISP tiles in flight is not greater * than the maximum number of USC tiles in flight. */ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) || PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) != 2) { isp_tiles_in_flight /= dev_runtime_info->num_phantoms; } isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight); /* Limit the number of tiles in flight if the shaders have * requested a large allocation of local memory. */ if (max_tiles_in_flight > 0U) { isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight); if (PVR_HAS_FEATURE(dev_info, roguexe)) { if (tile_size_x == 16) { /* The FW infers the tiles in flight value from the * partitions setting. */ /* Partitions per tile. */ partitions_available = isp_tiles_in_flight; } else { /* Partitions per tile quadrant. */ partitions_available = isp_tiles_in_flight * 4U; } } } /* Due to limitations of ISP_CTL_PIPE there can only be a difference of * 1 between Phantoms. */ if (total_tiles_in_flight > (isp_tiles_in_flight + 1U)) total_tiles_in_flight = isp_tiles_in_flight + 1U; total_tiles_in_flight += isp_tiles_in_flight; } if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) == 2) { /* Limit the ISP tiles in flight to fit into the available USC partition * store. */ total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available); } if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) { total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available / 2); } pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) { if (pixel_width == 1 && usc_min_output_regs == 1) { reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); } else if (pixel_width == 2) { reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); } else if (pixel_width == 4) { reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS); } else if (pixel_width == 8 && PVR_HAS_FEATURE(dev_info, eight_output_registers)) { reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS); } else if (usc_min_output_regs == 1) { reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); } else { reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); } if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) { /* Setup the partition mask based on the maximum number of * partitions available. */ reg.partition_mask = (1 << max_partitions) - 1; } else { reg.enable_4th_partition = true; /* Setup the partition mask based on the number of partitions * available. */ reg.partition_mask = (1U << partitions_available) - 1U; } } pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) { if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) reg.pipe_enable = (2 * total_tiles_in_flight) - 1; else reg.pipe_enable = total_tiles_in_flight - 1; } }