• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include <stdint.h>
26 
27 #include "hwdef/rogue_hw_defs.h"
28 #include "hwdef/rogue_hw_utils.h"
29 #include "pvr_device_info.h"
30 #include "pvr_job_common.h"
31 #include "pvr_private.h"
32 #include "util/macros.h"
33 #include "util/u_math.h"
34 #include "vk_alloc.h"
35 #include "vk_format.h"
36 #include "vk_object.h"
37 
38 /* clang-format off */
PVRX(PBESTATE_SWIZ)39 static enum PVRX(PBESTATE_SWIZ)
40 pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz)
41 /* clang-format on */
42 {
43    switch (swz) {
44    case PIPE_SWIZZLE_0:
45       return ROGUE_PBESTATE_SWIZ_ZERO;
46    case PIPE_SWIZZLE_1:
47       return ROGUE_PBESTATE_SWIZ_ONE;
48    case PIPE_SWIZZLE_X:
49       return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
50    case PIPE_SWIZZLE_Y:
51       return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
52    case PIPE_SWIZZLE_Z:
53       return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
54    case PIPE_SWIZZLE_W:
55       return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
56    case PIPE_SWIZZLE_NONE:
57       if (comp == VK_COMPONENT_SWIZZLE_A)
58          return ROGUE_PBESTATE_SWIZ_ONE;
59       else
60          return ROGUE_PBESTATE_SWIZ_ZERO;
61    default:
62       unreachable("Unknown enum pipe_swizzle");
63    };
64 }
65 
pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,enum pvr_pbe_gamma default_gamma,bool with_packed_usc_channel,uint32_t * const src_format_out,enum pvr_pbe_gamma * const gamma_out)66 void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
67                                       enum pvr_pbe_gamma default_gamma,
68                                       bool with_packed_usc_channel,
69                                       uint32_t *const src_format_out,
70                                       enum pvr_pbe_gamma *const gamma_out)
71 {
72    uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0);
73 
74    *gamma_out = default_gamma;
75 
76    if (vk_format_has_32bit_component(vk_format) ||
77        vk_format_is_int(vk_format)) {
78       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
79    } else if (vk_format_is_float(vk_format)) {
80       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
81    } else if (vk_format_is_srgb(vk_format)) {
82       *gamma_out = PVR_PBE_GAMMA_ENABLED;
83 
84       /* F16 source for gamma'd formats. */
85       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
86    } else if (vk_format_has_depth(vk_format) &&
87               vk_format_get_component_bits(vk_format,
88                                            UTIL_FORMAT_COLORSPACE_ZS,
89                                            0) > 16) {
90       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
91    } else if (vk_format_has_stencil(vk_format) &&
92               vk_format_get_component_bits(vk_format,
93                                            UTIL_FORMAT_COLORSPACE_ZS,
94                                            1) > 0) {
95       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
96    } else if (chan_0_width > 16) {
97       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
98    } else if (chan_0_width > 8) {
99       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
100    } else if (!with_packed_usc_channel) {
101       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
102    } else {
103       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
104    }
105 }
106 
pvr_pbe_get_src_pos(const struct pvr_device_info * dev_info,enum pvr_pbe_source_start_pos source_start,uint32_t * const src_pos_out,bool * const src_pos_offset_128_out)107 static void pvr_pbe_get_src_pos(const struct pvr_device_info *dev_info,
108                                 enum pvr_pbe_source_start_pos source_start,
109                                 uint32_t *const src_pos_out,
110                                 bool *const src_pos_offset_128_out)
111 {
112    *src_pos_offset_128_out = false;
113 
114    switch (source_start) {
115    case PVR_PBE_STARTPOS_BIT32:
116       *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
117       break;
118 
119    case PVR_PBE_STARTPOS_BIT64:
120       *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
121       break;
122 
123    case PVR_PBE_STARTPOS_BIT96:
124       *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
125       break;
126 
127    case PVR_PBE_STARTPOS_BIT0:
128    default:
129       if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
130          switch (source_start) {
131          case PVR_PBE_STARTPOS_BIT128:
132             *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
133             *src_pos_offset_128_out = true;
134             break;
135 
136          case PVR_PBE_STARTPOS_BIT160:
137             *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
138             *src_pos_offset_128_out = true;
139             break;
140 
141          case PVR_PBE_STARTPOS_BIT192:
142             *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
143             *src_pos_offset_128_out = true;
144             break;
145 
146          case PVR_PBE_STARTPOS_BIT224:
147             *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
148             *src_pos_offset_128_out = true;
149             break;
150 
151          default:
152             *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
153             break;
154          }
155       } else {
156          *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
157       }
158       break;
159    }
160 }
161 
pvr_pbe_pack_state(const struct pvr_device_info * dev_info,const struct pvr_pbe_surf_params * surface_params,const struct pvr_pbe_render_params * render_params,uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])162 void pvr_pbe_pack_state(
163    const struct pvr_device_info *dev_info,
164    const struct pvr_pbe_surf_params *surface_params,
165    const struct pvr_pbe_render_params *render_params,
166    uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
167    uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
168 {
169    /* This function needs updating if the value of
170     * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
171     * value.
172     */
173    STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
174 
175    /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
176     * changes, so check that it's the expected value.
177     */
178    STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
179 
180    pbe_reg_words[2] = 0;
181 
182    if (surface_params->z_only_render) {
183       pbe_cs_words[0] = 0;
184 
185       pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
186          state.emptytile = true;
187       }
188 
189       pbe_reg_words[0] = 0;
190       pbe_reg_words[1] = 0;
191 
192       return;
193    }
194 
195    pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
196       state.address_low = surface_params->addr;
197    }
198 
199    pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
200       state.address_high = surface_params->addr;
201 
202       state.source_format = surface_params->source_format;
203 
204       pvr_pbe_get_src_pos(dev_info,
205                           render_params->source_start,
206                           &state.source_pos,
207                           &state.source_pos_offset_128);
208 
209       /* MRT index (Use 0 for a single render target)/ */
210       state.mrt_index = render_params->mrt_index;
211 
212       /* Normalization flag based on output format. */
213       state.norm = surface_params->is_normalized;
214 
215       state.packmode = surface_params->pbe_packmode;
216    }
217 
218    pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
219       reg.tilerelative = true;
220 
221       switch (surface_params->mem_layout) {
222       case PVR_MEMLAYOUT_TWIDDLED:
223          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
224          break;
225 
226       case PVR_MEMLAYOUT_3DTWIDDLED:
227          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
228          break;
229 
230       case PVR_MEMLAYOUT_LINEAR:
231       default:
232          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
233          break;
234       }
235 
236       /* FIXME: Remove rotation and y_flip hardcoding if needed. */
237       reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
238       reg.y_flip = false;
239 
240       /* Note: Due to gamma being overridden above, anything other than
241        * ENABLED/NONE is ignored.
242        */
243       if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
244          reg.gamma = true;
245 
246          if (surface_params->nr_components == 2)
247             reg.twocomp_gamma =
248                PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
249       }
250 
251       reg.linestride = (surface_params->stride - 1) /
252                        PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
253       reg.minclip_x = render_params->min_x_clip;
254 
255       reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R,
256                                               surface_params->swizzle[0]);
257       reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G,
258                                               surface_params->swizzle[1]);
259       reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B,
260                                               surface_params->swizzle[2]);
261       reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A,
262                                               surface_params->swizzle[3]);
263 
264       if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
265          reg.size_z = util_logbase2_ceil(surface_params->depth);
266 
267       reg.downscale = surface_params->down_scale;
268    }
269 
270    pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
271       if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
272           surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
273          reg.size_x = util_logbase2_ceil(surface_params->width);
274          reg.size_y = util_logbase2_ceil(surface_params->height);
275       }
276 
277       reg.minclip_y = render_params->min_y_clip;
278       reg.maxclip_x = render_params->max_x_clip;
279       reg.zslice = render_params->slice;
280       reg.maxclip_y = render_params->max_y_clip;
281    }
282 }
283 
284 /* TODO: Split this into smaller functions to make it easier to follow. When
285  * doing this, it would be nice to have a function that returns
286  * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
287  * pvr_render_job_ws_fragment_state_init().
288  */
pvr_setup_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t msaa_mode,uint32_t pixel_width,bool paired_tiles,uint32_t max_tiles_in_flight,uint32_t * const isp_ctl_out,uint32_t * const pixel_ctl_out)289 void pvr_setup_tiles_in_flight(
290    const struct pvr_device_info *dev_info,
291    const struct pvr_device_runtime_info *dev_runtime_info,
292    uint32_t msaa_mode,
293    uint32_t pixel_width,
294    bool paired_tiles,
295    uint32_t max_tiles_in_flight,
296    uint32_t *const isp_ctl_out,
297    uint32_t *const pixel_ctl_out)
298 {
299    uint32_t total_tiles_in_flight = 0;
300    uint32_t usable_partition_size;
301    uint32_t partitions_available;
302    uint32_t usc_min_output_regs;
303    uint32_t max_partitions;
304    uint32_t partition_size;
305    uint32_t max_phantoms;
306    uint32_t tile_size_x;
307    uint32_t tile_size_y;
308    uint32_t isp_samples;
309 
310    /* Round up the pixel width to the next allocation granularity. */
311    usc_min_output_regs =
312       PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
313    pixel_width = MAX2(pixel_width, usc_min_output_regs);
314    pixel_width = util_next_power_of_two(pixel_width);
315 
316    assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
317 
318    partition_size = pixel_width;
319 
320    isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
321    if (isp_samples == 2) {
322       if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
323          partition_size *= 2U;
324    } else if (isp_samples == 4) {
325       if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
326           msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
327          partition_size *= 4U;
328       else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
329          partition_size *= 2U;
330    }
331 
332    /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
333     * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
334     * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
335     */
336    tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
337    tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
338 
339    /* We only support square tiles. */
340    assert(tile_size_x == tile_size_y);
341 
342    if (tile_size_x == 16U) {
343       /* Cores with 16x16 tiles does not use tile quadrants. */
344       partition_size *= tile_size_x * tile_size_y;
345    } else {
346       /* Size of a tile quadrant (in dwords). */
347       partition_size *= (tile_size_x * tile_size_y / 4U);
348    }
349 
350    /* Maximum available partition space for partitions of this size. */
351    max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
352    usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
353                                 partition_size * max_partitions);
354 
355    if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
356        (1024 * 4 * 4)) {
357       /* Do not apply the limit for cores with 16x16 tile size (no quadrant
358        * affinity). */
359       if (tile_size_x != 16) {
360          /* This is to counter the extremely limited CS size on some cores.
361           */
362          /* Available partition space is limited to 8 tile quadrants. */
363          usable_partition_size =
364             MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
365       }
366    }
367 
368    /* Ensure that maximum number of partitions in use is not greater
369     * than the total number of partitions available.
370     */
371    partitions_available =
372       MIN2(max_partitions, usable_partition_size / partition_size);
373 
374    if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
375       max_phantoms = dev_runtime_info->num_phantoms;
376    else if (PVR_HAS_FEATURE(dev_info, roguexe))
377       max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
378    else
379       max_phantoms = 1;
380 
381    for (uint32_t i = 0; i < max_phantoms; i++) {
382       uint32_t usc_tiles_in_flight = partitions_available;
383       uint32_t isp_tiles_in_flight;
384 
385       /* Cores with tiles size other than 16x16 use tile quadrants. */
386       if (tile_size_x != 16) {
387          uint32_t num_clusters =
388             PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
389          usc_tiles_in_flight =
390             (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
391       }
392 
393       assert(usc_tiles_in_flight > 0);
394 
395       isp_tiles_in_flight =
396          PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
397       /* Ensure that maximum number of ISP tiles in flight is not greater
398        * than the maximum number of USC tiles in flight.
399        */
400       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
401           PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
402              2) {
403          isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
404       }
405 
406       isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
407 
408       /* Limit the number of tiles in flight if the shaders have
409        * requested a large allocation of local memory.
410        */
411       if (max_tiles_in_flight > 0U) {
412          isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
413 
414          if (PVR_HAS_FEATURE(dev_info, roguexe)) {
415             if (tile_size_x == 16) {
416                /* The FW infers the tiles in flight value from the
417                 * partitions setting.
418                 */
419                /* Partitions per tile. */
420                partitions_available = isp_tiles_in_flight;
421             } else {
422                /* Partitions per tile quadrant. */
423                partitions_available = isp_tiles_in_flight * 4U;
424             }
425          }
426       }
427 
428       /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
429        * 1 between Phantoms.
430        */
431       if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
432          total_tiles_in_flight = isp_tiles_in_flight + 1U;
433 
434       total_tiles_in_flight += isp_tiles_in_flight;
435    }
436 
437    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
438        PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
439           2) {
440       /* Limit the ISP tiles in flight to fit into the available USC partition
441        * store.
442        */
443       total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
444    }
445 
446    if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
447       total_tiles_in_flight =
448          MIN2(total_tiles_in_flight, partitions_available / 2);
449    }
450 
451    pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
452       if (pixel_width == 1 && usc_min_output_regs == 1) {
453          reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
454       } else if (pixel_width == 2) {
455          reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
456       } else if (pixel_width == 4) {
457          reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
458       } else if (pixel_width == 8 &&
459                  PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
460          reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
461       } else if (usc_min_output_regs == 1) {
462          reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
463       } else {
464          reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
465       }
466 
467       if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
468          /* Setup the partition mask based on the maximum number of
469           * partitions available.
470           */
471          reg.partition_mask = (1 << max_partitions) - 1;
472       } else {
473          reg.enable_4th_partition = true;
474 
475          /* Setup the partition mask based on the number of partitions
476           * available.
477           */
478          reg.partition_mask = (1U << partitions_available) - 1U;
479       }
480    }
481 
482    pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
483       if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
484          reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
485       else
486          reg.pipe_enable = total_tiles_in_flight - 1;
487    }
488 }
489