• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30 
31 #include "pvr_csb.h"
32 #include "pvr_csb_enum_helpers.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_common.h"
35 #include "pvr_job_context.h"
36 #include "pvr_job_transfer.h"
37 #include "pvr_private.h"
38 #include "pvr_tex_state.h"
39 #include "pvr_transfer_frag_store.h"
40 #include "pvr_types.h"
41 #include "usc/pvr_uscgen.h"
42 #include "pvr_util.h"
43 #include "pvr_winsys.h"
44 #include "util/bitscan.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "util/u_math.h"
48 #define XXH_INLINE_ALL
49 #include "util/xxhash.h"
50 #include "vk_format.h"
51 #include "vk_log.h"
52 #include "vk_sync.h"
53 
54 #define PVR_TRANSFER_MAX_PASSES 10U
55 #define PVR_TRANSFER_MAX_CLIP_RECTS 4U
56 #define PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT 16U
57 #define PVR_TRANSFER_MAX_CUSTOM_RECTS 3U
58 
59 /* Number of triangles sent to the TSP per raster. */
60 #define PVR_TRANSFER_NUM_LAYERS 1U
61 
62 #define PVR_MAX_WIDTH 16384
63 #define PVR_MAX_HEIGHT 16384
64 
65 #define PVR_MAX_CLIP_SIZE(dev_info) \
66    (PVR_HAS_FEATURE(dev_info, screen_size8K) ? 8192U : 16384U)
67 
68 enum pvr_paired_tiles {
69    PVR_PAIRED_TILES_NONE,
70    PVR_PAIRED_TILES_X,
71    PVR_PAIRED_TILES_Y
72 };
73 
74 struct pvr_transfer_wa_source {
75    uint32_t src_offset;
76    uint32_t mapping_count;
77    struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
78    bool extend_height;
79 };
80 
81 struct pvr_transfer_pass {
82    uint32_t dst_offset;
83 
84    uint32_t source_count;
85    struct pvr_transfer_wa_source sources[PVR_TRANSFER_MAX_SOURCES];
86 
87    uint32_t clip_rects_count;
88    VkRect2D clip_rects[PVR_TRANSFER_MAX_CLIP_RECTS];
89 };
90 
91 /* Structure representing a layer iteration. */
92 struct pvr_transfer_custom_mapping {
93    bool double_stride;
94    uint32_t texel_unwind_src;
95    uint32_t texel_unwind_dst;
96    uint32_t texel_extend_src;
97    uint32_t texel_extend_dst;
98    uint32_t pass_count;
99    struct pvr_transfer_pass passes[PVR_TRANSFER_MAX_PASSES];
100    uint32_t max_clip_rects;
101    int32_t max_clip_size;
102 };
103 
104 struct pvr_transfer_3d_iteration {
105    uint32_t texture_coords[12];
106 };
107 
108 struct pvr_transfer_3d_state {
109    struct pvr_winsys_transfer_regs regs;
110 
111    bool empty_dst;
112    bool down_scale;
113    /* Write all channels present in the dst from the USC even if those are
114     * constants.
115     */
116    bool dont_force_pbe;
117 
118    /* The rate of the shader. */
119    uint32_t msaa_multiplier;
120    /* Top left corner of the render in ISP tiles. */
121    uint32_t origin_x_in_tiles;
122    /* Top left corner of the render in ISP tiles. */
123    uint32_t origin_y_in_tiles;
124    /* Width of the render in ISP tiles. */
125    uint32_t width_in_tiles;
126    /* Height of the render in ISP tiles. */
127    uint32_t height_in_tiles;
128 
129    /* Width of a sample in registers (pixel partition width). */
130    uint32_t usc_pixel_width;
131 
132    /* Properties of the USC shader. */
133    struct pvr_tq_shader_properties shader_props;
134 
135    /* TODO: Use pvr_dev_addr_t of an offset type for these. */
136    uint32_t pds_shader_task_offset;
137    uint32_t tex_state_data_offset;
138    uint32_t uni_tex_code_offset;
139 
140    uint32_t uniform_data_size;
141    uint32_t tex_state_data_size;
142    uint32_t usc_coeff_regs;
143 
144    /* Pointer into the common store. */
145    uint32_t common_ptr;
146    /* Pointer into the dynamic constant reg buffer. */
147    uint32_t dynamic_const_reg_ptr;
148    /* Pointer into the USC constant reg buffer. */
149    uint32_t usc_const_reg_ptr;
150 
151    uint32_t pds_coeff_task_offset;
152    uint32_t coeff_data_size;
153 
154    /* Number of temporary 32bit registers used by PDS. */
155    uint32_t pds_temps;
156 
157    struct pvr_transfer_custom_mapping custom_mapping;
158    uint32_t pass_idx;
159 
160    enum pvr_filter filter[PVR_TRANSFER_MAX_SOURCES];
161    bool custom_filter;
162 
163    enum pvr_paired_tiles pair_tiles;
164 };
165 
166 struct pvr_transfer_prep_data {
167    struct pvr_winsys_transfer_cmd_flags flags;
168    struct pvr_transfer_3d_state state;
169 };
170 
171 struct pvr_transfer_submit {
172    uint32_t prep_count;
173    struct pvr_transfer_prep_data
174       prep_array[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
175 };
176 
pvr_pbe_src_format_raw(VkFormat format)177 static enum pvr_transfer_pbe_pixel_src pvr_pbe_src_format_raw(VkFormat format)
178 {
179    uint32_t bpp = vk_format_get_blocksizebits(format);
180 
181    if (bpp <= 32U)
182       return PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
183    else if (bpp <= 64U)
184       return PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
185 
186    return PVR_TRANSFER_PBE_PIXEL_SRC_RAW128;
187 }
188 
pvr_pbe_src_format_pick_depth(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)189 static VkResult pvr_pbe_src_format_pick_depth(
190    const VkFormat src_format,
191    const VkFormat dst_format,
192    enum pvr_transfer_pbe_pixel_src *const src_format_out)
193 {
194    if (dst_format != VK_FORMAT_D24_UNORM_S8_UINT)
195       return VK_ERROR_FORMAT_NOT_SUPPORTED;
196 
197    switch (src_format) {
198    case VK_FORMAT_D24_UNORM_S8_UINT:
199    case VK_FORMAT_X8_D24_UNORM_PACK32:
200       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8;
201       break;
202 
203    case VK_FORMAT_D32_SFLOAT:
204       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8;
205       break;
206 
207    default:
208       return VK_ERROR_FORMAT_NOT_SUPPORTED;
209    }
210 
211    return VK_SUCCESS;
212 }
213 
pvr_pbe_src_format_pick_stencil(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)214 static VkResult pvr_pbe_src_format_pick_stencil(
215    const VkFormat src_format,
216    const VkFormat dst_format,
217    enum pvr_transfer_pbe_pixel_src *const src_format_out)
218 {
219    if ((src_format != VK_FORMAT_D24_UNORM_S8_UINT &&
220         src_format != VK_FORMAT_S8_UINT) ||
221        dst_format != VK_FORMAT_D24_UNORM_S8_UINT) {
222       return VK_ERROR_FORMAT_NOT_SUPPORTED;
223    }
224 
225    if (src_format == VK_FORMAT_S8_UINT)
226       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8;
227    else
228       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8;
229 
230    return VK_SUCCESS;
231 }
232 
233 static VkResult
pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface * src,const enum pvr_filter filter,const VkFormat dst_format,const uint32_t flags,const bool down_scale,enum pvr_transfer_pbe_pixel_src * src_format_out)234 pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface *src,
235                       const enum pvr_filter filter,
236                       const VkFormat dst_format,
237                       const uint32_t flags,
238                       const bool down_scale,
239                       enum pvr_transfer_pbe_pixel_src *src_format_out)
240 {
241    const VkFormat src_format = src->vk_format;
242 
243    const bool src_depth = vk_format_has_depth(src_format);
244    const bool dst_depth = vk_format_has_depth(dst_format);
245    const bool src_stencil = vk_format_has_stencil(src_format);
246    const bool dst_stencil = vk_format_has_stencil(dst_format);
247 
248    if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
249       /* Merging, so destination should always have both. */
250       if (!dst_depth || !dst_stencil)
251          return VK_ERROR_FORMAT_NOT_SUPPORTED;
252 
253       if (flags & PVR_TRANSFER_CMD_FLAGS_PICKD) {
254          return pvr_pbe_src_format_pick_depth(src_format,
255                                               dst_format,
256                                               src_format_out);
257       } else {
258          return pvr_pbe_src_format_pick_stencil(src_format,
259                                                 dst_format,
260                                                 src_format_out);
261       }
262    }
263 
264    /* We can't invent channels out of nowhere. */
265    if ((dst_depth && !src_depth) || (dst_stencil && !src_stencil))
266       return VK_ERROR_FORMAT_NOT_SUPPORTED;
267 
268    switch (dst_format) {
269    case VK_FORMAT_D16_UNORM:
270       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
271          return VK_ERROR_FORMAT_NOT_SUPPORTED;
272 
273       if (!down_scale)
274          *src_format_out = pvr_pbe_src_format_raw(dst_format);
275       else
276          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
277 
278       break;
279    case VK_FORMAT_D24_UNORM_S8_UINT:
280       switch (src_format) {
281       case VK_FORMAT_D24_UNORM_S8_UINT:
282          if (filter == PVR_FILTER_LINEAR)
283             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D24S8;
284          else
285             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
286 
287          break;
288 
289       /* D16_UNORM results in a 0.0->1.0 float from the TPU, the same as D32 */
290       case VK_FORMAT_D16_UNORM:
291       case VK_FORMAT_D32_SFLOAT:
292          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8;
293          break;
294 
295       default:
296          if (filter == PVR_FILTER_LINEAR)
297             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D32S8;
298          else
299             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
300       }
301 
302       break;
303 
304    case VK_FORMAT_D32_SFLOAT:
305       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
306          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32;
307       else
308          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
309 
310       break;
311 
312    default:
313       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
314          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB;
315       else
316          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
317    }
318 
319    return VK_SUCCESS;
320 }
321 
322 /**
323  * How the PBE expects the output buffer for an RGBA space conversion.
324  */
325 static VkResult
pvr_pbe_src_format_normal(VkFormat src_format,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * src_format_out)326 pvr_pbe_src_format_normal(VkFormat src_format,
327                           VkFormat dst_format,
328                           bool down_scale,
329                           bool dont_force_pbe,
330                           enum pvr_transfer_pbe_pixel_src *src_format_out)
331 {
332    bool dst_signed = vk_format_is_sint(dst_format) ||
333                      vk_format_is_snorm(dst_format);
334 
335    if (vk_format_is_int(dst_format)) {
336       uint32_t red_width;
337       bool src_signed;
338       uint32_t count;
339 
340       if (!vk_format_is_int(src_format))
341          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
342 
343       src_signed = vk_format_is_sint(src_format);
344 
345       red_width = vk_format_get_component_bits(dst_format,
346                                                UTIL_FORMAT_COLORSPACE_RGB,
347                                                0);
348 
349       switch (red_width) {
350       case 8:
351          if (!src_signed && !dst_signed)
352             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU8888;
353          else if (src_signed && !dst_signed)
354             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU8888;
355          else if (!src_signed && dst_signed)
356             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US8888;
357          else
358             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS8888;
359 
360          break;
361 
362       case 10:
363          switch (dst_format) {
364          case VK_FORMAT_A2B10G10R10_UINT_PACK32:
365             *src_format_out = src_signed ? PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102
366                                          : PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102;
367             break;
368 
369          case VK_FORMAT_A2R10G10B10_UINT_PACK32:
370             *src_format_out = src_signed
371                                  ? PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102
372                                  : PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102;
373             break;
374 
375          default:
376             return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
377          }
378          break;
379 
380       case 16:
381          if (!src_signed && !dst_signed)
382             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
383          else if (src_signed && !dst_signed)
384             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16;
385          else if (!src_signed && dst_signed)
386             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US16S16;
387          else
388             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16;
389 
390          break;
391 
392       case 32:
393          if (dont_force_pbe) {
394             count = vk_format_get_blocksizebits(dst_format) / 32U;
395          } else {
396             count = pvr_vk_format_get_common_color_channel_count(src_format,
397                                                                  dst_format);
398          }
399 
400          if (!src_signed && !dst_signed) {
401             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
402                                            : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
403          } else if (src_signed && !dst_signed) {
404             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32
405                                            : PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32;
406          } else if (!src_signed && dst_signed) {
407             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32
408                                            : PVR_TRANSFER_PBE_PIXEL_SRC_US32S32;
409          } else {
410             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
411                                            : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
412          }
413          break;
414 
415       default:
416          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
417       }
418 
419    } else if (vk_format_is_float(dst_format) ||
420               pvr_vk_format_is_fully_normalized(dst_format)) {
421       bool is_float = true;
422 
423       if (!vk_format_is_float(src_format) &&
424           !pvr_vk_format_is_fully_normalized(src_format) &&
425           !vk_format_is_block_compressed(src_format)) {
426          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
427       }
428 
429       if (pvr_vk_format_is_fully_normalized(dst_format)) {
430          uint32_t chan_width;
431 
432          is_float = false;
433 
434          /* Alpha only. */
435          switch (dst_format) {
436          case VK_FORMAT_D16_UNORM:
437             chan_width = 16;
438             break;
439 
440          default:
441             chan_width =
442                vk_format_get_component_bits(dst_format,
443                                             UTIL_FORMAT_COLORSPACE_RGB,
444                                             0U);
445             break;
446          }
447 
448          if (src_format == dst_format) {
449             switch (chan_width) {
450             case 16U:
451                if (down_scale) {
452                   *src_format_out = dst_signed
453                                        ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
454                                        : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
455                } else {
456                   *src_format_out = dst_signed
457                                        ? PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16
458                                        : PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
459                }
460                break;
461 
462             case 32U:
463                *src_format_out = pvr_pbe_src_format_raw(dst_format);
464                break;
465             default:
466                is_float = true;
467                break;
468             }
469          } else {
470             switch (chan_width) {
471             case 16U:
472                *src_format_out = dst_signed
473                                     ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
474                                     : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
475                break;
476             default:
477                is_float = true;
478                break;
479             }
480          }
481       }
482 
483       if (is_float) {
484          if (pvr_vk_format_has_32bit_component(dst_format)) {
485             uint32_t count;
486 
487             if (dont_force_pbe) {
488                count = vk_format_get_blocksizebits(dst_format) / 32U;
489             } else {
490                count = pvr_vk_format_get_common_color_channel_count(src_format,
491                                                                     dst_format);
492             }
493 
494             switch (count) {
495             case 1U:
496                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
497                break;
498             case 2U:
499                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X2;
500                break;
501             default:
502                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X4;
503                break;
504             }
505          } else {
506             if (dst_format == VK_FORMAT_B8G8R8A8_UNORM ||
507                 dst_format == VK_FORMAT_R8G8B8A8_UNORM ||
508                 dst_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32) {
509                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8;
510             } else {
511                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16F16;
512             }
513          }
514       }
515    } else {
516       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
517    }
518 
519    return VK_SUCCESS;
520 }
521 
522 static inline uint32_t
pvr_get_blit_flags(const struct pvr_transfer_cmd * transfer_cmd)523 pvr_get_blit_flags(const struct pvr_transfer_cmd *transfer_cmd)
524 {
525    return transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D
526              ? 0
527              : transfer_cmd->flags;
528 }
529 
pvr_pbe_src_format(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_tq_shader_properties * prop)530 static VkResult pvr_pbe_src_format(struct pvr_transfer_cmd *transfer_cmd,
531                                    struct pvr_transfer_3d_state *state,
532                                    struct pvr_tq_shader_properties *prop)
533 {
534    struct pvr_tq_layer_properties *layer = &prop->layer_props;
535    const enum pvr_filter filter = transfer_cmd->source_count
536                                      ? transfer_cmd->sources[0].filter
537                                      : PVR_FILTER_POINT;
538    const uint32_t flags = transfer_cmd->flags;
539    VkFormat dst_format = transfer_cmd->dst.vk_format;
540    const struct pvr_transfer_cmd_surface *src;
541    VkFormat src_format;
542    bool down_scale;
543 
544    if (transfer_cmd->source_count > 0) {
545       src = &transfer_cmd->sources[0].surface;
546       down_scale = transfer_cmd->sources[0].resolve_op == PVR_RESOLVE_BLEND &&
547                    transfer_cmd->sources[0].surface.sample_count > 1U &&
548                    transfer_cmd->dst.sample_count <= 1U;
549    } else {
550       src = &transfer_cmd->dst;
551       down_scale = false;
552    }
553 
554    src_format = src->vk_format;
555 
556    /* This has to come before the rest as S8 for instance is integer and
557     * signedness check fails on D24S8.
558     */
559    if (vk_format_is_depth_or_stencil(src_format) ||
560        vk_format_is_depth_or_stencil(dst_format) ||
561        flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
562       return pvr_pbe_src_format_ds(src,
563                                    filter,
564                                    dst_format,
565                                    flags,
566                                    down_scale,
567                                    &layer->pbe_format);
568    }
569 
570    return pvr_pbe_src_format_normal(src_format,
571                                     dst_format,
572                                     down_scale,
573                                     state->dont_force_pbe,
574                                     &layer->pbe_format);
575 }
576 
pvr_setup_hwbg_object(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state)577 static inline void pvr_setup_hwbg_object(const struct pvr_device_info *dev_info,
578                                          struct pvr_transfer_3d_state *state)
579 {
580    struct pvr_winsys_transfer_regs *regs = &state->regs;
581 
582    pvr_csb_pack (&regs->pds_bgnd0_base, CR_PDS_BGRND0_BASE, reg) {
583       reg.shader_addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
584       assert(pvr_dev_addr_is_aligned(
585          reg.shader_addr,
586          ROGUE_CR_PDS_BGRND0_BASE_SHADER_ADDR_ALIGNMENT));
587       reg.texunicode_addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
588       assert(pvr_dev_addr_is_aligned(
589          reg.texunicode_addr,
590          ROGUE_CR_PDS_BGRND0_BASE_TEXUNICODE_ADDR_ALIGNMENT));
591    }
592 
593    pvr_csb_pack (&regs->pds_bgnd1_base, CR_PDS_BGRND1_BASE, reg) {
594       reg.texturedata_addr = PVR_DEV_ADDR(state->tex_state_data_offset);
595       assert(pvr_dev_addr_is_aligned(
596          reg.texturedata_addr,
597          ROGUE_CR_PDS_BGRND1_BASE_TEXTUREDATA_ADDR_ALIGNMENT));
598    }
599 
600    /* BGRND 2 not needed, background object PDS doesn't use uniform program. */
601 
602    pvr_csb_pack (&regs->pds_bgnd3_sizeinfo, CR_PDS_BGRND3_SIZEINFO, reg) {
603       reg.usc_sharedsize =
604          DIV_ROUND_UP(state->common_ptr,
605                       ROGUE_CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE);
606 
607       assert(!(state->uniform_data_size &
608                (ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE - 1)));
609       reg.pds_uniformsize =
610          state->uniform_data_size /
611          ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE;
612 
613       assert(
614          !(state->tex_state_data_size &
615            (ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE - 1)));
616       reg.pds_texturestatesize =
617          state->tex_state_data_size /
618          ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE;
619 
620       reg.pds_tempsize =
621          DIV_ROUND_UP(state->pds_temps,
622                       ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE);
623    }
624 }
625 
626 static inline bool
pvr_is_surface_aligned(pvr_dev_addr_t dev_addr,bool is_input,uint32_t bpp)627 pvr_is_surface_aligned(pvr_dev_addr_t dev_addr, bool is_input, uint32_t bpp)
628 {
629    /* 96 bpp is 32 bit granular. */
630    if (bpp == 64U || bpp == 128U) {
631       uint64_t mask = (uint64_t)((bpp >> 3U) - 1U);
632 
633       if ((dev_addr.addr & mask) != 0ULL)
634          return false;
635    }
636 
637    if (is_input) {
638       if ((dev_addr.addr &
639            (ROGUE_TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT - 1U)) !=
640           0ULL) {
641          return false;
642       }
643    } else {
644       if ((dev_addr.addr &
645            (ROGUE_PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT - 1U)) != 0ULL) {
646          return false;
647       }
648    }
649 
650    return true;
651 }
652 
653 static inline VkResult
pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface * surface,uint32_t load,bool is_input,uint32_t * width_out,uint32_t * height_out,uint32_t * stride_out,enum pvr_memlayout * mem_layout_out,pvr_dev_addr_t * dev_addr_out)654 pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface *surface,
655                     uint32_t load,
656                     bool is_input,
657                     uint32_t *width_out,
658                     uint32_t *height_out,
659                     uint32_t *stride_out,
660                     enum pvr_memlayout *mem_layout_out,
661                     pvr_dev_addr_t *dev_addr_out)
662 {
663    const uint32_t bpp = vk_format_get_blocksizebits(surface->vk_format);
664    uint32_t unsigned_stride;
665 
666    *mem_layout_out = surface->mem_layout;
667    *height_out = surface->height;
668    *width_out = surface->width;
669    *stride_out = surface->stride;
670    *dev_addr_out = surface->dev_addr;
671 
672    if (surface->mem_layout != PVR_MEMLAYOUT_LINEAR &&
673        !pvr_is_surface_aligned(*dev_addr_out, is_input, bpp)) {
674       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
675    }
676 
677    switch (surface->mem_layout) {
678    case PVR_MEMLAYOUT_LINEAR:
679       if (surface->stride == 0U)
680          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
681 
682       unsigned_stride = *stride_out;
683 
684       if (!pvr_is_surface_aligned(*dev_addr_out, is_input, bpp))
685          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
686 
687       if (unsigned_stride < *width_out)
688          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
689 
690       if (!is_input) {
691          if (unsigned_stride == 1U) {
692             /* Change the setup to twiddling as that doesn't hit the stride
693              * limit and twiddled == strided when 1px stride.
694              */
695             *mem_layout_out = PVR_MEMLAYOUT_TWIDDLED;
696          }
697       }
698 
699       *stride_out = unsigned_stride;
700       break;
701 
702    case PVR_MEMLAYOUT_TWIDDLED:
703    case PVR_MEMLAYOUT_3DTWIDDLED:
704       /* Ignoring stride value for twiddled/tiled surface. */
705       *stride_out = *width_out;
706       break;
707 
708    default:
709       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
710    }
711 
712    return VK_SUCCESS;
713 }
714 
715 static VkResult
pvr_pbe_setup_codegen_defaults(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surface_params,struct pvr_pbe_render_params * render_params)716 pvr_pbe_setup_codegen_defaults(const struct pvr_device_info *dev_info,
717                                const struct pvr_transfer_cmd *transfer_cmd,
718                                struct pvr_transfer_3d_state *state,
719                                struct pvr_pbe_surf_params *surface_params,
720                                struct pvr_pbe_render_params *render_params)
721 {
722    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
723    const uint8_t *swizzle;
724    VkFormat format;
725    VkResult result;
726 
727    switch (dst->vk_format) {
728    case VK_FORMAT_D24_UNORM_S8_UINT:
729    case VK_FORMAT_X8_D24_UNORM_PACK32:
730       format = VK_FORMAT_R32_UINT;
731       break;
732 
733    default:
734       format = dst->vk_format;
735       break;
736    }
737 
738    swizzle = pvr_get_format_swizzle(format);
739    memcpy(surface_params->swizzle, swizzle, sizeof(surface_params->swizzle));
740 
741    pvr_pbe_get_src_format_and_gamma(format,
742                                     PVR_PBE_GAMMA_NONE,
743                                     false,
744                                     &surface_params->source_format,
745                                     &surface_params->gamma);
746 
747    surface_params->is_normalized = pvr_vk_format_is_fully_normalized(format);
748    surface_params->pbe_packmode = pvr_get_pbe_packmode(format);
749    surface_params->nr_components = vk_format_get_nr_components(format);
750 
751    result = pvr_mem_layout_spec(dst,
752                                 0U,
753                                 false,
754                                 &surface_params->width,
755                                 &surface_params->height,
756                                 &surface_params->stride,
757                                 &surface_params->mem_layout,
758                                 &surface_params->addr);
759    if (result != VK_SUCCESS)
760       return result;
761 
762    surface_params->z_only_render = false;
763    surface_params->depth = dst->depth;
764    surface_params->down_scale = state->down_scale;
765 
766    if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
767       render_params->slice = (uint32_t)MAX2(dst->z_position, 0.0f);
768    else
769       render_params->slice = 0U;
770 
771    uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
772    uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
773 
774    /* If the rectangle happens to be empty / off-screen we clip away
775     * everything.
776     */
777    if (state->empty_dst) {
778       render_params->min_x_clip = 2U * tile_size_x;
779       render_params->max_x_clip = 3U * tile_size_x;
780       render_params->min_y_clip = 2U * tile_size_y;
781       render_params->max_y_clip = 3U * tile_size_y;
782       state->origin_x_in_tiles = 0U;
783       state->origin_y_in_tiles = 0U;
784       state->height_in_tiles = 1U;
785       state->width_in_tiles = 1U;
786    } else {
787       const VkRect2D *scissor = &transfer_cmd->scissor;
788 
789       /* Clamp */
790       render_params->min_x_clip =
791          MAX2(MIN2(scissor->offset.x, (int32_t)surface_params->width), 0U);
792       render_params->max_x_clip =
793          MAX2(MIN2(scissor->offset.x + scissor->extent.width,
794                    (int32_t)surface_params->width),
795               0U) -
796          1U;
797 
798       render_params->min_y_clip =
799          MAX2(MIN2(scissor->offset.y, surface_params->height), 0U);
800       render_params->max_y_clip =
801          MAX2(MIN2(scissor->offset.y + scissor->extent.height,
802                    surface_params->height),
803               0U) -
804          1U;
805 
806       if (state->custom_mapping.pass_count > 0U) {
807          struct pvr_transfer_pass *pass =
808             &state->custom_mapping.passes[state->pass_idx];
809 
810          render_params->min_x_clip = (uint32_t)pass->clip_rects[0U].offset.x;
811          render_params->max_x_clip =
812             (uint32_t)(pass->clip_rects[0U].offset.x +
813                        pass->clip_rects[0U].extent.width) -
814             1U;
815          render_params->min_y_clip = (uint32_t)pass->clip_rects[0U].offset.y;
816          render_params->max_y_clip =
817             (uint32_t)(pass->clip_rects[0U].offset.y +
818                        pass->clip_rects[0U].extent.height) -
819             1U;
820       }
821 
822       state->origin_x_in_tiles = render_params->min_x_clip / tile_size_x;
823       state->origin_y_in_tiles = render_params->min_y_clip / tile_size_y;
824       state->width_in_tiles =
825          (render_params->max_x_clip + tile_size_x) / tile_size_x;
826       state->height_in_tiles =
827          (render_params->max_y_clip + tile_size_y) / tile_size_y;
828 
829       /* Be careful here as this isn't the same as ((max_x_clip -
830        * min_x_clip) + tile_size_x) >> tile_size_x.
831        */
832       state->width_in_tiles -= state->origin_x_in_tiles;
833       state->height_in_tiles -= state->origin_y_in_tiles;
834    }
835 
836    render_params->source_start = PVR_PBE_STARTPOS_BIT0;
837    render_params->mrt_index = 0U;
838 
839    return VK_SUCCESS;
840 }
841 
842 static VkResult
pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface * dst,struct pvr_transfer_3d_state * state,uint32_t rt_idx,struct pvr_pbe_surf_params * surf_params,struct pvr_pbe_render_params * render_params)843 pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface *dst,
844                               struct pvr_transfer_3d_state *state,
845                               uint32_t rt_idx,
846                               struct pvr_pbe_surf_params *surf_params,
847                               struct pvr_pbe_render_params *render_params)
848 {
849    struct pvr_transfer_pass *pass;
850    VkRect2D *clip_rect;
851 
852    render_params->mrt_index = rt_idx;
853 
854    assert(rt_idx > 0 && rt_idx <= PVR_TRANSFER_MAX_RENDER_TARGETS);
855 
856    if (state->custom_mapping.pass_count == 0)
857       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
858 
859    pass = &state->custom_mapping.passes[state->pass_idx];
860 
861    assert(rt_idx < PVR_TRANSFER_MAX_CUSTOM_RECTS);
862 
863    clip_rect = &pass->clip_rects[rt_idx];
864 
865    render_params->min_x_clip = (uint32_t)clip_rect->offset.x;
866    render_params->max_x_clip =
867       (uint32_t)clip_rect->offset.x + clip_rect->extent.width - 1U;
868    render_params->min_y_clip = (uint32_t)clip_rect->offset.y;
869    render_params->max_y_clip =
870       (uint32_t)clip_rect->offset.y + clip_rect->extent.height - 1U;
871 
872    return VK_SUCCESS;
873 }
874 
875 static uint32_t
pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)876 pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)
877 {
878    switch (pixel_format) {
879    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
880    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
881    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
882    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
883    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
884    case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
885    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
886    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
887    case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
888    case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
889    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
890    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
891    case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
892    case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
893    case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
894    case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
895    case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
896    case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
897    case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
898       return 1U;
899 
900    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
901    case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
902    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
903    case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
904    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
905    case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
906    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
907    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
908    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
909    case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
910    case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
911    case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
912    case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
913    case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
914    case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
915    case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
916       return 2U;
917 
918    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
919    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
920    case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
921    case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
922       return 4U;
923 
924    case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
925    default:
926       break;
927    }
928 
929    return 0U;
930 }
931 
pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surf_params)932 static void pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd *transfer_cmd,
933                                   struct pvr_transfer_3d_state *state,
934                                   struct pvr_pbe_surf_params *surf_params)
935 {
936    bool color_fill = !!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL);
937    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
938 
939    const uint32_t pixel_size =
940       pvr_pbe_get_pixel_size(state->shader_props.layer_props.pbe_format);
941 
942    state->usc_pixel_width = MAX2(pixel_size, 1U);
943 
944    switch (dst->vk_format) {
945    case VK_FORMAT_X8_D24_UNORM_PACK32:
946    case VK_FORMAT_D24_UNORM_S8_UINT:
947    case VK_FORMAT_S8_UINT:
948       surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
949       surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
950       surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
951       surf_params->swizzle[3U] = PIPE_SWIZZLE_0;
952       break;
953 
954    default: {
955       const uint32_t red_width =
956          vk_format_get_component_bits(dst->vk_format,
957                                       UTIL_FORMAT_COLORSPACE_RGB,
958                                       0U);
959 
960       if (transfer_cmd->source_count > 0 &&
961           vk_format_is_alpha(dst->vk_format)) {
962          if (vk_format_has_alpha(transfer_cmd->sources[0].surface.vk_format)) {
963             /* Modify the destination format swizzle to always source from
964              * src0.
965              */
966             surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
967             surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
968             surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
969             surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
970             break;
971          }
972 
973          /* Source format having no alpha channel still allocates 4 output
974           * buffer registers.
975           */
976       }
977 
978       if (pvr_vk_format_is_fully_normalized(dst->vk_format)) {
979          if (color_fill &&
980              (dst->vk_format == VK_FORMAT_B8G8R8A8_UNORM ||
981               dst->vk_format == VK_FORMAT_R8G8B8A8_UNORM ||
982               dst->vk_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)) {
983             surf_params->source_format =
984                ROGUE_PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL;
985          } else if (state->shader_props.layer_props.pbe_format ==
986                     PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8) {
987             surf_params->source_format =
988                ROGUE_PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL;
989          } else if (red_width <= 8U) {
990             surf_params->source_format =
991                ROGUE_PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL;
992          }
993       } else if (red_width == 32U && !state->dont_force_pbe) {
994          uint32_t count = 0U;
995 
996          for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
997             VkFormat src_format = transfer_cmd->sources[i].surface.vk_format;
998             uint32_t tmp;
999 
1000             tmp = pvr_vk_format_get_common_color_channel_count(src_format,
1001                                                                dst->vk_format);
1002 
1003             count = MAX2(count, tmp);
1004          }
1005 
1006          switch (count) {
1007          case 1U:
1008             surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
1009             FALLTHROUGH;
1010          case 2U:
1011             surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
1012             FALLTHROUGH;
1013          case 3U:
1014             surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
1015             break;
1016 
1017          case 4U:
1018          default:
1019             break;
1020          }
1021       }
1022       break;
1023    }
1024    }
1025 }
1026 
1027 /**
1028  * Calculates the required PBE byte mask based on the incoming transfer command.
1029  *
1030  * @param transfer_cmd  the transfer command
1031  * @return the bytemask (active high disable mask)
1032  */
1033 
pvr_pbe_byte_mask(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd)1034 static uint64_t pvr_pbe_byte_mask(const struct pvr_device_info *dev_info,
1035                                   const struct pvr_transfer_cmd *transfer_cmd)
1036 {
1037    uint32_t flags = pvr_get_blit_flags(transfer_cmd);
1038 
1039    assert(PVR_HAS_ERN(dev_info, 42064));
1040 
1041    if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
1042       uint32_t mask = 0U;
1043 
1044       switch (transfer_cmd->dst.vk_format) {
1045       case VK_FORMAT_D32_SFLOAT_S8_UINT:
1046          mask = 0xF0F0F0F0U;
1047          break;
1048       case VK_FORMAT_D24_UNORM_S8_UINT:
1049          mask = 0x88888888U;
1050          break;
1051       default:
1052          break;
1053       }
1054 
1055       if ((flags & PVR_TRANSFER_CMD_FLAGS_PICKD) == 0U)
1056          mask = ~mask;
1057 
1058       return mask;
1059    }
1060 
1061    /* The mask is as it was inactive on cores without the ERN. This keeps the
1062     * firmware agnostic to the feature.
1063     */
1064    return 0U;
1065 }
1066 
pvr_pbe_setup_emit(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t rt_count,uint32_t * pbe_setup_words)1067 static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
1068                                    struct pvr_transfer_ctx *ctx,
1069                                    struct pvr_transfer_3d_state *state,
1070                                    uint32_t rt_count,
1071                                    uint32_t *pbe_setup_words)
1072 {
1073    struct pvr_device *const device = ctx->device;
1074    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1075 
1076    struct pvr_winsys_transfer_regs *regs = &state->regs;
1077    struct pvr_pds_event_program program = {
1078       .emit_words = pbe_setup_words,
1079       .num_emit_word_pairs = rt_count,
1080    };
1081    struct pvr_pds_upload pds_upload;
1082    uint32_t staging_buffer_size;
1083    uint32_t *staging_buffer;
1084    pvr_dev_addr_t addr;
1085    VkResult result;
1086 
1087    /* Precondition, make sure to use a valid index for ctx->usc_eot_bos. */
1088    assert(rt_count <= ARRAY_SIZE(ctx->usc_eot_bos));
1089    assert(rt_count > 0U);
1090 
1091    addr.addr = ctx->usc_eot_bos[rt_count - 1U]->dev_addr.addr -
1092                device->heaps.usc_heap->base_addr.addr;
1093 
1094    pvr_pds_setup_doutu(&program.task_control,
1095                        addr.addr,
1096                        0U,
1097                        ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1098                        false);
1099 
1100    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1101 
1102    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1103 
1104    staging_buffer = vk_alloc(&device->vk.alloc,
1105                              staging_buffer_size,
1106                              8U,
1107                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1108    if (!staging_buffer)
1109       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1110 
1111    pvr_pds_generate_pixel_event_data_segment(&program,
1112                                              staging_buffer,
1113                                              dev_info);
1114 
1115    /* TODO: We can save some memory by generating a code segment for each
1116     * rt_count, which at the time of writing is a maximum of 3, in
1117     * pvr_setup_transfer_eot_shaders() when we setup the corresponding EOT
1118     * USC programs.
1119     */
1120    pvr_pds_generate_pixel_event_code_segment(&program,
1121                                              staging_buffer + program.data_size,
1122                                              dev_info);
1123 
1124    result =
1125       pvr_cmd_buffer_upload_pds(transfer_cmd->cmd_buffer,
1126                                 staging_buffer,
1127                                 program.data_size,
1128                                 ROGUE_CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT,
1129                                 staging_buffer + program.data_size,
1130                                 program.code_size,
1131                                 ROGUE_CR_EVENT_PIXEL_PDS_CODE_ADDR_ALIGNMENT,
1132                                 ROGUE_CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT,
1133                                 &pds_upload);
1134    vk_free(&device->vk.alloc, staging_buffer);
1135    if (result != VK_SUCCESS)
1136       return result;
1137 
1138    pvr_csb_pack (&regs->event_pixel_pds_info, CR_EVENT_PIXEL_PDS_INFO, reg) {
1139       reg.temp_stride = 0U;
1140       reg.const_size =
1141          DIV_ROUND_UP(program.data_size,
1142                       ROGUE_CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE);
1143       reg.usc_sr_size =
1144          DIV_ROUND_UP(rt_count * PVR_STATE_PBE_DWORDS,
1145                       ROGUE_CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE);
1146    }
1147 
1148    pvr_csb_pack (&regs->event_pixel_pds_data, CR_EVENT_PIXEL_PDS_DATA, reg) {
1149       reg.addr = PVR_DEV_ADDR(pds_upload.data_offset);
1150    }
1151 
1152    pvr_csb_pack (&regs->event_pixel_pds_code, CR_EVENT_PIXEL_PDS_CODE, reg) {
1153       reg.addr = PVR_DEV_ADDR(pds_upload.code_offset);
1154    }
1155 
1156    return VK_SUCCESS;
1157 }
1158 
pvr_pbe_setup(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state)1159 static VkResult pvr_pbe_setup(const struct pvr_transfer_cmd *transfer_cmd,
1160                               struct pvr_transfer_ctx *ctx,
1161                               struct pvr_transfer_3d_state *state)
1162 {
1163    struct pvr_device *const device = ctx->device;
1164    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1165 
1166    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1167    uint32_t num_rts = vk_format_get_plane_count(dst->vk_format);
1168    uint32_t pbe_setup_words[PVR_TRANSFER_MAX_RENDER_TARGETS *
1169                             ROGUE_NUM_PBESTATE_STATE_WORDS];
1170    struct pvr_pbe_render_params render_params;
1171    struct pvr_pbe_surf_params surf_params;
1172    VkResult result;
1173 
1174    if (state->custom_mapping.pass_count > 0U)
1175       num_rts = state->custom_mapping.passes[state->pass_idx].clip_rects_count;
1176 
1177    if (PVR_HAS_FEATURE(dev_info, paired_tiles))
1178       state->pair_tiles = PVR_PAIRED_TILES_NONE;
1179 
1180    for (uint32_t i = 0U; i < num_rts; i++) {
1181       uint64_t *pbe_regs;
1182       uint32_t *pbe_words;
1183 
1184       /* Ensure the access into the pbe_wordx_mrty is made within its bounds. */
1185       assert(i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER <
1186              ARRAY_SIZE(state->regs.pbe_wordx_mrty));
1187       /* Ensure the access into pbe_setup_words is made within its bounds. */
1188       assert(i * ROGUE_NUM_PBESTATE_STATE_WORDS < ARRAY_SIZE(pbe_setup_words));
1189 
1190       pbe_regs =
1191          &state->regs
1192              .pbe_wordx_mrty[i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER];
1193       pbe_words = &pbe_setup_words[i * ROGUE_NUM_PBESTATE_STATE_WORDS];
1194 
1195       if (PVR_HAS_ERN(dev_info, 42064))
1196          pbe_regs[2U] = 0UL;
1197 
1198       if (i == 0U) {
1199          result = pvr_pbe_setup_codegen_defaults(dev_info,
1200                                                  transfer_cmd,
1201                                                  state,
1202                                                  &surf_params,
1203                                                  &render_params);
1204          if (result != VK_SUCCESS)
1205             return result;
1206       } else {
1207          result = pvr_pbe_setup_modify_defaults(dst,
1208                                                 state,
1209                                                 i,
1210                                                 &surf_params,
1211                                                 &render_params);
1212          if (result != VK_SUCCESS)
1213             return result;
1214       }
1215 
1216       pvr_pbe_setup_swizzle(transfer_cmd, state, &surf_params);
1217 
1218       pvr_pbe_pack_state(dev_info,
1219                          &surf_params,
1220                          &render_params,
1221                          pbe_words,
1222                          pbe_regs);
1223 
1224       if (PVR_HAS_ERN(dev_info, 42064)) {
1225          uint64_t temp_reg;
1226 
1227          pvr_csb_pack (&temp_reg, PBESTATE_REG_WORD2, reg) {
1228             reg.sw_bytemask = pvr_pbe_byte_mask(dev_info, transfer_cmd);
1229          }
1230 
1231          pbe_regs[2U] |= temp_reg;
1232       }
1233 
1234       if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1235          if (pbe_regs[2U] &
1236              (1ULL << ROGUE_PBESTATE_REG_WORD2_PAIR_TILES_SHIFT)) {
1237             if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED)
1238                state->pair_tiles = PVR_PAIRED_TILES_Y;
1239             else
1240                state->pair_tiles = PVR_PAIRED_TILES_X;
1241          }
1242       }
1243    }
1244 
1245    result =
1246       pvr_pbe_setup_emit(transfer_cmd, ctx, state, num_rts, pbe_setup_words);
1247    if (result != VK_SUCCESS)
1248       return result;
1249 
1250    /* Adjust tile origin and width to include all emits. */
1251    if (state->custom_mapping.pass_count > 0U) {
1252       const uint32_t tile_size_x =
1253          PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
1254       const uint32_t tile_size_y =
1255          PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
1256       struct pvr_transfer_pass *pass =
1257          &state->custom_mapping.passes[state->pass_idx];
1258       VkOffset2D offset = { 0U, 0U };
1259       VkOffset2D end = { 0U, 0U };
1260 
1261       for (uint32_t i = 0U; i < pass->clip_rects_count; i++) {
1262          VkRect2D *rect = &pass->clip_rects[i];
1263 
1264          offset.x = MIN2(offset.x, rect->offset.x);
1265          offset.y = MIN2(offset.y, rect->offset.y);
1266          end.x = MAX2(end.x, rect->offset.x + rect->extent.width);
1267          end.y = MAX2(end.y, rect->offset.y + rect->extent.height);
1268       }
1269 
1270       state->origin_x_in_tiles = (uint32_t)offset.x / tile_size_x;
1271       state->origin_y_in_tiles = (uint32_t)offset.y / tile_size_y;
1272       state->width_in_tiles =
1273          DIV_ROUND_UP((uint32_t)end.x, tile_size_x) - state->origin_x_in_tiles;
1274       state->height_in_tiles =
1275          DIV_ROUND_UP((uint32_t)end.y, tile_size_y) - state->origin_y_in_tiles;
1276    }
1277 
1278    return VK_SUCCESS;
1279 }
1280 
1281 /**
1282  * Writes the ISP tile registers according to the MSAA state. Sets up the USC
1283  * pixel partition allocations and the number of tiles in flight.
1284  */
pvr_isp_tiles(const struct pvr_device * device,struct pvr_transfer_3d_state * state)1285 static VkResult pvr_isp_tiles(const struct pvr_device *device,
1286                               struct pvr_transfer_3d_state *state)
1287 {
1288    const struct pvr_device_runtime_info *dev_runtime_info =
1289       &device->pdevice->dev_runtime_info;
1290    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1291    const uint32_t isp_samples =
1292       PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1U);
1293    uint32_t origin_x = state->origin_x_in_tiles;
1294    uint32_t origin_y = state->origin_y_in_tiles;
1295    uint32_t width = state->width_in_tiles;
1296    uint32_t height = state->height_in_tiles;
1297    uint32_t isp_tiles_in_flight;
1298 
1299    /* msaa_multiplier is calculated by sample_count & ~1U. Given sample
1300     * count is always in powers of two, we can get the sample count from
1301     * msaa_multiplier using the following logic.
1302     */
1303    const uint32_t samples = MAX2(state->msaa_multiplier, 1U);
1304 
1305    /* isp_samples_per_pixel feature is also know as "2x/4x for free", when
1306     * this is present SAMPLES_PER_PIXEL is 2/4, otherwise 1. The following
1307     * logic should end up with these numbers:
1308     *
1309     * |---------------------------------|
1310     * | 4 SAMPLES / ISP PIXEL           |
1311     * |-----------------------+----+----|
1312     * |                  MSAA | X* | Y* |
1313     * |                    2X |  1 |  1 |
1314     * |                    4X |  1 |  1 |
1315     * |---------------------------------|
1316     * | 2 SAMPLES / ISP PIXEL           |
1317     * |-----------------------+----+----|
1318     * |                  MSAA | X* | Y* |
1319     * |                    2X |  1 |  1 |
1320     * |                    4X |  1 |  2 |
1321     * |                    8X |  2 |  2 |
1322     * |-----------------------+----+----|
1323     * |  1 SAMPLE / ISP PIXEL           |
1324     * |-----------------------+----+----|
1325     * |                  MSAA | X* | Y* |
1326     * |                    2X |  1 |  2 |
1327     * |                    4X |  2 |  2 |
1328     * |-----------------------+----+----|
1329     */
1330 
1331    origin_x <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1332    origin_y <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1333                  (state->msaa_multiplier >> isp_samples)) &
1334                 1U;
1335    width <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1336    height <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1337                (state->msaa_multiplier >> isp_samples)) &
1338               1U;
1339 
1340    if (PVR_HAS_FEATURE(dev_info, paired_tiles) &&
1341        state->pair_tiles != PVR_PAIRED_TILES_NONE) {
1342       width = ALIGN_POT(width, 2U);
1343       height = ALIGN_POT(height, 2U);
1344    }
1345 
1346    pvr_csb_pack (&state->regs.isp_mtile_size, CR_ISP_MTILE_SIZE, reg) {
1347       reg.x = width;
1348       reg.y = height;
1349    }
1350 
1351    pvr_csb_pack (&state->regs.isp_render_origin, CR_ISP_RENDER_ORIGIN, reg) {
1352       reg.x = origin_x;
1353       reg.y = origin_y;
1354    }
1355 
1356    pvr_setup_tiles_in_flight(dev_info,
1357                              dev_runtime_info,
1358                              pvr_cr_isp_aa_mode_type(samples),
1359                              state->usc_pixel_width,
1360                              state->pair_tiles != PVR_PAIRED_TILES_NONE,
1361                              0,
1362                              &isp_tiles_in_flight,
1363                              &state->regs.usc_pixel_output_ctrl);
1364 
1365    pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, reg) {
1366       reg.process_empty_tiles = true;
1367 
1368       if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1369          if (state->pair_tiles == PVR_PAIRED_TILES_X) {
1370             reg.pair_tiles = true;
1371          } else if (state->pair_tiles == PVR_PAIRED_TILES_Y) {
1372             reg.pair_tiles = true;
1373             reg.pair_tiles_vert = true;
1374          }
1375       }
1376    }
1377 
1378    state->regs.isp_ctl |= isp_tiles_in_flight;
1379 
1380    return VK_SUCCESS;
1381 }
1382 
1383 static bool
pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pbe_format)1384 pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info *dev_info,
1385                                    enum pvr_transfer_pbe_pixel_src pbe_format)
1386 {
1387    /* We don't emulate rate change from the USC with the pbe_yuv feature. */
1388    if (!PVR_HAS_FEATURE(dev_info, pbe_yuv) &&
1389        (pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED ||
1390         pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V)) {
1391       return true;
1392    }
1393 
1394    return false;
1395 }
1396 
1397 /**
1398  * Number of DWORDs from the unified store that floating texture coefficients
1399  * take up.
1400  */
pvr_uv_space(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)1401 static void pvr_uv_space(const struct pvr_device_info *dev_info,
1402                          const struct pvr_transfer_cmd *transfer_cmd,
1403                          struct pvr_transfer_3d_state *state)
1404 {
1405    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1406    const VkRect2D *dst_rect = &transfer_cmd->scissor;
1407 
1408    /* This also avoids division by 0 in pvr_dma_texture_floats(). */
1409    if (state->custom_mapping.pass_count == 0U &&
1410        (dst_rect->extent.width == 0U || dst_rect->extent.height == 0U ||
1411         MAX2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) <
1412            0U ||
1413         MIN2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) >
1414            (int32_t)dst->width ||
1415         MAX2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) <
1416            0U ||
1417         MIN2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) >
1418            (int32_t)dst->height)) {
1419       state->empty_dst = true;
1420    } else {
1421       state->empty_dst = false;
1422 
1423       if (transfer_cmd->source_count > 0) {
1424          struct pvr_tq_layer_properties *layer =
1425             &state->shader_props.layer_props;
1426 
1427          const VkRect2D *src_rect =
1428             &transfer_cmd->sources[0U].mappings[0U].src_rect;
1429          const VkRect2D *dst_rect =
1430             &transfer_cmd->sources[0U].mappings[0U].dst_rect;
1431          int32_t dst_x1 = dst_rect->offset.x + dst_rect->extent.width;
1432          int32_t dst_y1 = dst_rect->offset.y + dst_rect->extent.height;
1433          int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
1434          int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
1435 
1436          assert(transfer_cmd->source_count == 1);
1437 
1438          if (state->filter[0U] > PVR_FILTER_POINT) {
1439             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1440          } else if (src_rect->extent.width == 0U ||
1441                     src_rect->extent.height == 0U) {
1442             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1443          } else if ((src_rect->offset.x * dst_x1 !=
1444                      src_x1 * dst_rect->offset.x) ||
1445                     (src_rect->offset.y * dst_y1 !=
1446                      src_y1 * dst_rect->offset.y) ||
1447                     (src_rect->extent.width != dst_rect->extent.width) ||
1448                     (src_rect->extent.height != dst_rect->extent.height) ||
1449                     transfer_cmd->sources[0U].mappings[0U].flip_x ||
1450                     transfer_cmd->sources[0U].mappings[0U].flip_y) {
1451             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1452          } else {
1453             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1454          }
1455 
1456          /* We have to adjust the rate. */
1457          if (layer->layer_floats != PVR_INT_COORD_SET_FLOATS_0 &&
1458              pvr_int_pbe_pixel_changes_dst_rate(dev_info, layer->pbe_format)) {
1459             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_6;
1460          }
1461       }
1462    }
1463 }
1464 
pvr_int_pbe_pixel_num_sampler_and_image_states(enum pvr_transfer_pbe_pixel_src pbe_format)1465 static uint32_t pvr_int_pbe_pixel_num_sampler_and_image_states(
1466    enum pvr_transfer_pbe_pixel_src pbe_format)
1467 {
1468    switch (pbe_format) {
1469    case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1470    case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1471       return 1U;
1472    default:
1473       return pvr_pbe_pixel_num_loads(pbe_format);
1474    }
1475 }
1476 
pvr_sampler_state_for_surface(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_surface * surface,enum pvr_filter filter,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t sampler,uint32_t * mem_ptr)1477 static VkResult pvr_sampler_state_for_surface(
1478    const struct pvr_device_info *dev_info,
1479    const struct pvr_transfer_cmd_surface *surface,
1480    enum pvr_filter filter,
1481    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1482    uint32_t sampler,
1483    uint32_t *mem_ptr)
1484 {
1485    uint64_t sampler_state[2U] = { 0UL, 0UL };
1486 
1487    pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
1488       reg.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
1489       reg.minlod = ROGUE_TEXSTATE_CLAMP_MIN;
1490       reg.maxlod = ROGUE_TEXSTATE_CLAMP_MIN;
1491       reg.dadjust = ROGUE_TEXSTATE_DADJUST_MIN_UINT;
1492 
1493       if (filter == PVR_FILTER_DONTCARE || filter == PVR_FILTER_POINT) {
1494          reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
1495          reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1496       } else if (filter == PVR_FILTER_LINEAR) {
1497          reg.minfilter = ROGUE_TEXSTATE_FILTER_LINEAR;
1498          reg.magfilter = ROGUE_TEXSTATE_FILTER_LINEAR;
1499       } else {
1500          assert(PVR_HAS_FEATURE(dev_info, tf_bicubic_filter));
1501          reg.minfilter = ROGUE_TEXSTATE_FILTER_BICUBIC;
1502          reg.magfilter = ROGUE_TEXSTATE_FILTER_BICUBIC;
1503       }
1504 
1505       reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1506       reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1507 
1508       if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1509          reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1510    }
1511 
1512    assert(sampler < PVR_TRANSFER_MAX_IMAGES);
1513 
1514    assert(sampler <= sh_reg_layout->combined_image_samplers.count);
1515    mem_ptr += sh_reg_layout->combined_image_samplers.offsets[sampler].sampler;
1516 
1517    memcpy(mem_ptr, sampler_state, sizeof(sampler_state));
1518 
1519    return VK_SUCCESS;
1520 }
1521 
pvr_image_state_set_codegen_defaults(struct pvr_device * device,struct pvr_transfer_3d_state * state,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint64_t * mem_ptr)1522 static inline VkResult pvr_image_state_set_codegen_defaults(
1523    struct pvr_device *device,
1524    struct pvr_transfer_3d_state *state,
1525    const struct pvr_transfer_cmd_surface *surface,
1526    uint32_t load,
1527    uint64_t *mem_ptr)
1528 {
1529    struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1530    struct pvr_texture_state_info info = { 0U };
1531    VkResult result;
1532 
1533    switch (surface->vk_format) {
1534    /* ERN 46863 */
1535    case VK_FORMAT_D32_SFLOAT_S8_UINT:
1536       switch (layer->pbe_format) {
1537       case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
1538       case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
1539       case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1540       case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1541       case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1542       case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
1543       case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1544          info.format = VK_FORMAT_R32G32_UINT;
1545          break;
1546       default:
1547          break;
1548       }
1549       break;
1550 
1551    case VK_FORMAT_D24_UNORM_S8_UINT:
1552    case VK_FORMAT_X8_D24_UNORM_PACK32:
1553       info.format = VK_FORMAT_R32_UINT;
1554       break;
1555 
1556    default:
1557       info.format = surface->vk_format;
1558       break;
1559    }
1560 
1561    info.flags = 0U;
1562    info.base_level = 0U;
1563    info.mip_levels = 1U;
1564    info.mipmaps_present = false;
1565    info.sample_count = MAX2(surface->sample_count, 1U);
1566 
1567    if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1568       info.extent.depth = surface->depth;
1569    else
1570       info.extent.depth = 0U;
1571 
1572    if (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
1573       info.array_size = 0U;
1574 
1575    result = pvr_mem_layout_spec(surface,
1576                                 load,
1577                                 true,
1578                                 &info.extent.width,
1579                                 &info.extent.height,
1580                                 &info.stride,
1581                                 &info.mem_layout,
1582                                 &info.addr);
1583    if (result != VK_SUCCESS)
1584       return result;
1585 
1586    if (state->custom_mapping.texel_extend_dst > 1U) {
1587       info.extent.width /= state->custom_mapping.texel_extend_dst;
1588       info.stride /= state->custom_mapping.texel_extend_dst;
1589    }
1590 
1591    info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
1592    memcpy(info.swizzle,
1593           pvr_get_format_swizzle(info.format),
1594           sizeof(info.swizzle));
1595 
1596    if (surface->vk_format == VK_FORMAT_S8_UINT) {
1597       info.swizzle[0U] = PIPE_SWIZZLE_X;
1598       info.swizzle[1U] = PIPE_SWIZZLE_0;
1599       info.swizzle[2U] = PIPE_SWIZZLE_0;
1600       info.swizzle[3U] = PIPE_SWIZZLE_0;
1601    }
1602 
1603    if (info.extent.depth > 0U)
1604       info.type = VK_IMAGE_VIEW_TYPE_3D;
1605    else if (info.extent.height > 1U)
1606       info.type = VK_IMAGE_VIEW_TYPE_2D;
1607    else
1608       info.type = VK_IMAGE_VIEW_TYPE_1D;
1609 
1610    result = pvr_pack_tex_state(device, &info, mem_ptr);
1611    if (result != VK_SUCCESS)
1612       return result;
1613 
1614    return VK_SUCCESS;
1615 }
1616 
pvr_image_state_for_surface(const struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint32_t source,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t uf_image,uint32_t * mem_ptr)1617 static VkResult pvr_image_state_for_surface(
1618    const struct pvr_transfer_ctx *ctx,
1619    const struct pvr_transfer_cmd *transfer_cmd,
1620    const struct pvr_transfer_cmd_surface *surface,
1621    uint32_t load,
1622    uint32_t source,
1623    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1624    struct pvr_transfer_3d_state *state,
1625    uint32_t uf_image,
1626    uint32_t *mem_ptr)
1627 {
1628    uint32_t tex_state[ROGUE_MAXIMUM_IMAGE_STATE_SIZE] = { 0U };
1629    VkResult result;
1630    uint8_t offset;
1631 
1632    result = pvr_image_state_set_codegen_defaults(ctx->device,
1633                                                  state,
1634                                                  surface,
1635                                                  load,
1636                                                  (uint64_t *)tex_state);
1637    if (result != VK_SUCCESS)
1638       return result;
1639 
1640    assert(uf_image < PVR_TRANSFER_MAX_IMAGES);
1641 
1642    /* Offset of the shared registers containing the hardware image state. */
1643    assert(uf_image < sh_reg_layout->combined_image_samplers.count);
1644    offset = sh_reg_layout->combined_image_samplers.offsets[uf_image].image;
1645 
1646    /* Copy the image state to the buffer which is loaded into the shared
1647     * registers.
1648     */
1649    memcpy(mem_ptr + offset, tex_state, sizeof(tex_state));
1650 
1651    return VK_SUCCESS;
1652 }
1653 
1654 /* Writes the texture state/sampler state into DMAed memory. */
1655 static VkResult
pvr_sampler_image_state(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t * mem_ptr)1656 pvr_sampler_image_state(struct pvr_transfer_ctx *ctx,
1657                         const struct pvr_transfer_cmd *transfer_cmd,
1658                         const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1659                         struct pvr_transfer_3d_state *state,
1660                         uint32_t *mem_ptr)
1661 {
1662    if (!state->empty_dst) {
1663       uint32_t uf_sampler = 0U;
1664       uint32_t uf_image = 0U;
1665 
1666       for (uint32_t source = 0; source < transfer_cmd->source_count; source++) {
1667          struct pvr_tq_layer_properties *layer =
1668             &state->shader_props.layer_props;
1669          uint32_t max_load = pvr_pbe_pixel_num_loads(layer->pbe_format);
1670 
1671          for (uint32_t load = 0U; load < max_load; load++) {
1672             const struct pvr_transfer_cmd_surface *surface;
1673             enum pvr_filter filter;
1674             VkResult result;
1675 
1676             switch (layer->pbe_format) {
1677             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
1678             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
1679             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1680             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1681             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1682             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1683             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1684             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1685             case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1686             case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
1687                if (load > 0U) {
1688                   surface = &transfer_cmd->dst;
1689                   filter = transfer_cmd->sources[source].filter;
1690                } else {
1691                   surface = &transfer_cmd->sources[source].surface;
1692                   filter = state->filter[source];
1693                }
1694                break;
1695 
1696             case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1697             case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1698                surface = &transfer_cmd->sources[source].surface;
1699                filter = state->filter[source];
1700                break;
1701 
1702             default:
1703                surface = &transfer_cmd->sources[source + load].surface;
1704                filter = state->filter[source + load];
1705                break;
1706             }
1707 
1708             if (load < pvr_int_pbe_pixel_num_sampler_and_image_states(
1709                           layer->pbe_format)) {
1710                const struct pvr_device_info *dev_info =
1711                   &transfer_cmd->cmd_buffer->device->pdevice->dev_info;
1712 
1713                result = pvr_sampler_state_for_surface(dev_info,
1714                                                       surface,
1715                                                       filter,
1716                                                       sh_reg_layout,
1717                                                       uf_sampler,
1718                                                       mem_ptr);
1719                if (result != VK_SUCCESS)
1720                   return result;
1721 
1722                uf_sampler++;
1723 
1724                result = pvr_image_state_for_surface(ctx,
1725                                                     transfer_cmd,
1726                                                     surface,
1727                                                     load,
1728                                                     source,
1729                                                     sh_reg_layout,
1730                                                     state,
1731                                                     uf_image,
1732                                                     mem_ptr);
1733                if (result != VK_SUCCESS)
1734                   return result;
1735 
1736                uf_image++;
1737             }
1738          }
1739       }
1740    }
1741 
1742    return VK_SUCCESS;
1743 }
1744 
1745 /* The returned offset is in dwords. */
pvr_dynamic_const_reg_advance(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state)1746 static inline uint32_t pvr_dynamic_const_reg_advance(
1747    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1748    struct pvr_transfer_3d_state *state)
1749 {
1750    const uint32_t offset = sh_reg_layout->dynamic_consts.offset;
1751 
1752    assert(state->dynamic_const_reg_ptr < sh_reg_layout->dynamic_consts.count);
1753 
1754    return offset + state->dynamic_const_reg_ptr++;
1755 }
1756 
1757 /** Scales coefficients for sampling. (non normalized). */
1758 static inline void
pvr_dma_texture_floats(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)1759 pvr_dma_texture_floats(const struct pvr_transfer_cmd *transfer_cmd,
1760                        struct pvr_transfer_3d_state *state,
1761                        const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1762                        uint32_t *mem_ptr)
1763 
1764 {
1765    if (transfer_cmd->source_count > 0) {
1766       struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1767       const struct pvr_rect_mapping *mapping =
1768          &transfer_cmd->sources[0].mappings[0U];
1769       VkRect2D src_rect = mapping->src_rect;
1770       VkRect2D dst_rect = mapping->dst_rect;
1771 
1772       switch (layer->layer_floats) {
1773       case PVR_INT_COORD_SET_FLOATS_0:
1774          break;
1775 
1776       case PVR_INT_COORD_SET_FLOATS_6:
1777       case PVR_INT_COORD_SET_FLOATS_4: {
1778          int32_t consts[2U] = { 0U, 0U };
1779          int32_t denom[2U] = { 0U, 0U };
1780          int32_t nums[2U] = { 0U, 0U };
1781          int32_t src_x, dst_x;
1782          int32_t src_y, dst_y;
1783          float offset = 0.0f;
1784          float tmp;
1785 
1786          dst_x = mapping->flip_x ? -(int32_t)dst_rect.extent.width
1787                                  : dst_rect.extent.width;
1788          dst_y = mapping->flip_y ? -(int32_t)dst_rect.extent.height
1789                                  : dst_rect.extent.height;
1790          src_x = src_rect.extent.width;
1791          src_y = src_rect.extent.height;
1792 
1793          nums[0U] = src_x;
1794          denom[0U] = dst_x;
1795          consts[0U] =
1796             mapping->flip_x
1797                ? src_rect.offset.x * dst_x -
1798                     src_x * (dst_rect.offset.x + dst_rect.extent.width)
1799                : src_rect.offset.x * dst_x - src_x * dst_rect.offset.x;
1800          nums[1U] = src_y;
1801          denom[1U] = dst_y;
1802          consts[1U] =
1803             mapping->flip_y
1804                ? src_rect.offset.y * dst_y -
1805                     src_y * (dst_rect.offset.y + dst_rect.extent.height)
1806                : src_rect.offset.y * dst_y - src_y * dst_rect.offset.y;
1807 
1808          for (uint32_t i = 0U; i < 2U; i++) {
1809             tmp = (float)(nums[i]) / (float)(denom[i]);
1810             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1811                fui(tmp);
1812 
1813             tmp = ((float)(consts[i]) + (i == 1U ? offset : 0.0f)) /
1814                   (float)(denom[i]);
1815             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1816                fui(tmp);
1817          }
1818 
1819          if (layer->layer_floats == PVR_INT_COORD_SET_FLOATS_6) {
1820             tmp = (float)MIN2(dst_rect.offset.x, dst_rect.offset.x + dst_x);
1821             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1822                fui(tmp);
1823 
1824             tmp = (float)MIN2(dst_rect.offset.y, dst_rect.offset.y + dst_y);
1825             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1826                fui(tmp);
1827          }
1828          break;
1829       }
1830 
1831       default:
1832          unreachable("Unknown COORD_SET_FLOATS.");
1833          break;
1834       }
1835    }
1836 }
1837 
pvr_int_pbe_pixel_requires_usc_filter(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pixel_format)1838 static bool pvr_int_pbe_pixel_requires_usc_filter(
1839    const struct pvr_device_info *dev_info,
1840    enum pvr_transfer_pbe_pixel_src pixel_format)
1841 {
1842    switch (pixel_format) {
1843    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1844    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1845    case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
1846    case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
1847    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
1848    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
1849    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
1850       return true;
1851    case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1852       return !PVR_HAS_FEATURE(dev_info, pbe_filterable_f16);
1853    default:
1854       return false;
1855    }
1856 }
1857 
1858 /**
1859  * Sets up the MSAA related bits in the operation
1860  *
1861  * TPU sample count is read directly from transfer_cmd in the TPU code. An MSAA
1862  * src can be read from sample rate or instance rate shaders as long as the
1863  * sample count is set on the TPU. If a layer is single sample we expect the
1864  * same sample replicated in full rate shaders. If the layer is multi sample,
1865  * instance rate shaders are used to emulate the filter or to select the
1866  * specified sample. The sample number is static in the programs.
1867  */
pvr_msaa_state(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,uint32_t source)1868 static VkResult pvr_msaa_state(const struct pvr_device_info *dev_info,
1869                                const struct pvr_transfer_cmd *transfer_cmd,
1870                                struct pvr_transfer_3d_state *state,
1871                                uint32_t source)
1872 {
1873    struct pvr_tq_shader_properties *shader_props = &state->shader_props;
1874    struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
1875    struct pvr_winsys_transfer_regs *const regs = &state->regs;
1876    uint32_t src_sample_count =
1877       transfer_cmd->sources[source].surface.sample_count & ~1U;
1878    uint32_t dst_sample_count = transfer_cmd->dst.sample_count & ~1U;
1879    uint32_t bsample_count = 0U;
1880 
1881    shader_props->full_rate = false;
1882    state->msaa_multiplier = 1U;
1883    state->down_scale = false;
1884 
1885    /* clang-format off */
1886    pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg);
1887    /* clang-format on */
1888 
1889    layer->sample_count = 1U;
1890    layer->resolve_op = PVR_RESOLVE_BLEND;
1891 
1892    bsample_count |= src_sample_count | dst_sample_count;
1893 
1894    if (bsample_count > PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 0U))
1895       return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1896 
1897    /* Shouldn't get two distinct bits set (implies different sample counts).
1898     * The reason being the rate at which the shader runs has to match.
1899     */
1900    if ((bsample_count & (bsample_count - 1U)) != 0U)
1901       return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1902 
1903    if (src_sample_count == 0U && dst_sample_count == 0U) {
1904       /* S -> S (no MSAA involved). */
1905       layer->msaa = false;
1906    } else if (src_sample_count != 0U && dst_sample_count == 0U) {
1907       /* M -> S (resolve). */
1908       layer->resolve_op = transfer_cmd->sources[source].resolve_op;
1909 
1910       if ((uint32_t)layer->resolve_op >=
1911           (src_sample_count + (uint32_t)PVR_RESOLVE_SAMPLE0)) {
1912          return vk_error(transfer_cmd->cmd_buffer,
1913                          VK_ERROR_FORMAT_NOT_SUPPORTED);
1914       }
1915 
1916       layer->msaa = true;
1917 
1918       switch (layer->resolve_op) {
1919       case PVR_RESOLVE_MIN:
1920       case PVR_RESOLVE_MAX:
1921          switch (transfer_cmd->sources[source].surface.vk_format) {
1922          case VK_FORMAT_D32_SFLOAT:
1923          case VK_FORMAT_D16_UNORM:
1924          case VK_FORMAT_S8_UINT:
1925          case VK_FORMAT_D24_UNORM_S8_UINT:
1926          case VK_FORMAT_X8_D24_UNORM_PACK32:
1927             if (transfer_cmd->sources[source].surface.vk_format !=
1928                 transfer_cmd->dst.vk_format) {
1929                return vk_error(transfer_cmd->cmd_buffer,
1930                                VK_ERROR_FORMAT_NOT_SUPPORTED);
1931             }
1932             break;
1933 
1934          default:
1935             return vk_error(transfer_cmd->cmd_buffer,
1936                             VK_ERROR_FORMAT_NOT_SUPPORTED);
1937          }
1938 
1939          /* Instance rate. */
1940          layer->sample_count = src_sample_count;
1941          state->shader_props.full_rate = false;
1942          break;
1943 
1944       case PVR_RESOLVE_BLEND:
1945          if (pvr_int_pbe_pixel_requires_usc_filter(dev_info,
1946                                                    layer->pbe_format)) {
1947             /* Instance rate. */
1948             layer->sample_count = src_sample_count;
1949             state->shader_props.full_rate = false;
1950          } else {
1951             /* Sample rate. */
1952             state->shader_props.full_rate = true;
1953             state->msaa_multiplier = src_sample_count;
1954             state->down_scale = true;
1955 
1956             pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
1957                reg.mode = pvr_cr_isp_aa_mode_type(src_sample_count);
1958             }
1959          }
1960          break;
1961 
1962       default:
1963          /* Shader doesn't have to know the number of samples. It's enough
1964           * if the TPU knows, and the shader sets the right sno (given to the
1965           * shader in resolve_op).
1966           */
1967          state->shader_props.full_rate = false;
1968          break;
1969       }
1970    } else {
1971       state->msaa_multiplier = dst_sample_count;
1972 
1973       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
1974          reg.mode = pvr_cr_isp_aa_mode_type(dst_sample_count);
1975       }
1976 
1977       if (src_sample_count == 0U && dst_sample_count != 0U) {
1978          /* S -> M (replicate samples) */
1979          layer->msaa = false;
1980          state->shader_props.full_rate = !state->shader_props.iterated;
1981       } else {
1982          /* M -> M (sample to sample) */
1983          layer->msaa = true;
1984          state->shader_props.full_rate = true;
1985       }
1986    }
1987 
1988    return VK_SUCCESS;
1989 }
1990 
pvr_requires_usc_linear_filter(VkFormat format)1991 static bool pvr_requires_usc_linear_filter(VkFormat format)
1992 {
1993    switch (format) {
1994    case VK_FORMAT_R32_SFLOAT:
1995    case VK_FORMAT_R32G32_SFLOAT:
1996    case VK_FORMAT_R32G32B32_SFLOAT:
1997    case VK_FORMAT_R32G32B32A32_SFLOAT:
1998    case VK_FORMAT_D32_SFLOAT:
1999    case VK_FORMAT_D24_UNORM_S8_UINT:
2000    case VK_FORMAT_X8_D24_UNORM_PACK32:
2001       return true;
2002    default:
2003       return false;
2004    }
2005 }
2006 
2007 static inline bool
pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,bool sample,bool msaa,bool full_rate)2008 pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,
2009                               bool sample,
2010                               bool msaa,
2011                               bool full_rate)
2012 {
2013    if (sample || msaa || full_rate)
2014       return false;
2015 
2016    switch (pbe_format) {
2017    case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
2018    case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
2019    case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
2020    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
2021    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
2022    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
2023       return true;
2024    default:
2025       return false;
2026    }
2027 }
2028 
pvr_pick_component_needed(const struct pvr_transfer_custom_mapping * custom_mapping)2029 static inline bool pvr_pick_component_needed(
2030    const struct pvr_transfer_custom_mapping *custom_mapping)
2031 {
2032    return custom_mapping->pass_count > 0U &&
2033           custom_mapping->texel_extend_dst > 1U &&
2034           custom_mapping->texel_extend_src <= 1U;
2035 }
2036 
2037 /** Writes the shader related constants into the DMA space. */
2038 static void
pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * dma_space)2039 pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2040                         uint32_t *dma_space)
2041 {
2042    const uint32_t reg = sh_reg_layout->driver_total;
2043    const uint32_t consts_count =
2044       sh_reg_layout->compiler_out.usc_constants.count;
2045 
2046    /* If not we likely need to write more consts. */
2047    assert(consts_count == sh_reg_layout->compiler_out_total);
2048 
2049    /* Append the usc consts after the driver allocated regs. */
2050    for (uint32_t i = 0U; i < consts_count; i++)
2051       dma_space[reg + i] = sh_reg_layout->compiler_out.usc_constants.values[i];
2052 }
2053 
2054 static inline void
pvr_dma_texel_unwind(struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)2055 pvr_dma_texel_unwind(struct pvr_transfer_3d_state *state,
2056                      const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2057                      uint32_t *mem_ptr)
2058 
2059 {
2060    const uint32_t coord_sample_mask =
2061       state->custom_mapping.texel_extend_dst - 1U;
2062 
2063    mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2064       coord_sample_mask;
2065    mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2066       state->custom_mapping.texel_unwind_dst;
2067 }
2068 
2069 /** Writes the Uniform/Texture state data segments + the UniTex code. */
2070 static inline VkResult
pvr_pds_unitex(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_pds_pixel_shader_sa_program * program,struct pvr_transfer_prep_data * prep_data)2071 pvr_pds_unitex(const struct pvr_device_info *dev_info,
2072                struct pvr_transfer_ctx *ctx,
2073                const struct pvr_transfer_cmd *transfer_cmd,
2074                struct pvr_pds_pixel_shader_sa_program *program,
2075                struct pvr_transfer_prep_data *prep_data)
2076 {
2077    struct pvr_pds_upload *unitex_code =
2078       &ctx->pds_unitex_code[program->num_texture_dma_kicks]
2079                            [program->num_uniform_dma_kicks];
2080    struct pvr_transfer_3d_state *state = &prep_data->state;
2081    struct pvr_suballoc_bo *pvr_bo;
2082    VkResult result;
2083    void *map;
2084 
2085    /* Uniform program is not used. */
2086    assert(program->num_uniform_dma_kicks == 0U);
2087 
2088    if (program->num_texture_dma_kicks == 0U) {
2089       state->uniform_data_size = 0U;
2090       state->tex_state_data_size = 0U;
2091       state->tex_state_data_offset = 0U;
2092       state->uni_tex_code_offset = 0U;
2093 
2094       return VK_SUCCESS;
2095    }
2096 
2097    pvr_pds_set_sizes_pixel_shader_sa_uniform_data(program, dev_info);
2098    assert(program->data_size == 0U);
2099    state->uniform_data_size = 0U;
2100 
2101    pvr_pds_set_sizes_pixel_shader_sa_texture_data(program, dev_info);
2102    state->tex_state_data_size =
2103       ALIGN_POT(program->data_size,
2104                 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
2105 
2106    result =
2107       pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2108                                ctx->device->heaps.pds_heap,
2109                                PVR_DW_TO_BYTES(state->tex_state_data_size),
2110                                &pvr_bo);
2111    if (result != VK_SUCCESS)
2112       return result;
2113 
2114    state->tex_state_data_offset =
2115       pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
2116 
2117    map = pvr_bo_suballoc_get_map_addr(pvr_bo);
2118    pvr_pds_generate_pixel_shader_sa_texture_state_data(program, map, dev_info);
2119 
2120    /* Save the dev_addr and size in the 3D state. */
2121    state->uni_tex_code_offset = unitex_code->code_offset;
2122    state->pds_temps = program->temps_used;
2123 
2124    return VK_SUCCESS;
2125 }
2126 
2127 /** Converts a float in range 0 to 1 to an N-bit fixed-point integer. */
pvr_float_to_ufixed(float value,uint32_t bits)2128 static uint32_t pvr_float_to_ufixed(float value, uint32_t bits)
2129 {
2130    uint32_t max = (1U << bits) - 1U;
2131 
2132    /* NaN and Inf and overflow. */
2133    if (util_is_inf_or_nan(value) || value >= 1.0f)
2134       return max;
2135    else if (value < 0.0f)
2136       return 0U;
2137 
2138    /* Normalise. */
2139    value = value * (float)max;
2140 
2141    /* Cast to double so that we can accurately represent the sum for N > 23. */
2142    return (uint32_t)floor((double)value + 0.5f);
2143 }
2144 
2145 /** Converts a float in range -1 to 1 to a signed N-bit fixed-point integer. */
pvr_float_to_sfixed(float value,uint32_t N)2146 static uint32_t pvr_float_to_sfixed(float value, uint32_t N)
2147 {
2148    int32_t max = (1 << (N - 1)) - 1;
2149    int32_t min = 0 - (1 << (N - 1));
2150    union fi x;
2151 
2152    /* NaN and Inf and overflow. */
2153    if (util_is_inf_or_nan(value) || value >= 1.0f)
2154       return (uint32_t)max;
2155    else if (value == 0.0f)
2156       return 0U;
2157    else if (value <= -1.0f)
2158       return (uint32_t)min;
2159 
2160    /* Normalise. */
2161    value *= (float)max;
2162 
2163    /* Cast to double so that we can accurately represent the sum for N > 23. */
2164    if (value > 0.0f)
2165       x.i = (int32_t)floor((double)value + 0.5f);
2166    else
2167       x.i = (int32_t)floor((double)value - 0.5f);
2168 
2169    return x.ui;
2170 }
2171 
2172 /** Convert a value in IEEE single precision format to 16-bit floating point
2173  * format.
2174  */
2175 /* TODO: See if we can use _mesa_float_to_float16_rtz_slow() instead. */
pvr_float_to_f16(float value,bool round_to_even)2176 static uint16_t pvr_float_to_f16(float value, bool round_to_even)
2177 {
2178    uint32_t input_value;
2179    uint32_t exponent;
2180    uint32_t mantissa;
2181    uint16_t output;
2182 
2183    /* 0.0f can be exactly expressed in binary using IEEE float format. */
2184    if (value == 0.0f)
2185       return 0U;
2186 
2187    if (value < 0U) {
2188       output = 0x8000;
2189       value = -value;
2190    } else {
2191       output = 0U;
2192    }
2193 
2194    /* 2^16 * (2 - 1/1024) = highest f16 representable value. */
2195    value = MIN2(value, 131008);
2196    input_value = fui(value);
2197 
2198    /* Extract the exponent and mantissa. */
2199    exponent = util_get_float32_exponent(value) + 15;
2200    mantissa = input_value & ((1 << 23) - 1);
2201 
2202    /* If the exponent is outside the supported range then denormalise the
2203     * mantissa.
2204     */
2205    if ((int32_t)exponent <= 0) {
2206       uint32_t shift;
2207 
2208       mantissa |= (1 << 23);
2209       exponent = input_value >> 23;
2210       shift = -14 + 127 - exponent;
2211 
2212       if (shift < 24)
2213          mantissa >>= shift;
2214       else
2215          mantissa = 0;
2216    } else {
2217       output = (uint16_t)(output | ((exponent << 10) & 0x7C00));
2218    }
2219 
2220    output = (uint16_t)(output | (((mantissa >> 13) << 0) & 0x03FF));
2221 
2222    if (round_to_even) {
2223       /* Round to nearest even. */
2224       if ((((int)value) % 2 != 0) && (((1 << 13) - 1) & mantissa))
2225          output++;
2226    } else {
2227       /* Round to nearest. */
2228       if (mantissa & (1 << 12))
2229          output++;
2230    }
2231 
2232    return output;
2233 }
2234 
pvr_pack_clear_color(VkFormat format,const union fi color[static4],uint32_t pkd_color[static4])2235 static VkResult pvr_pack_clear_color(VkFormat format,
2236                                      const union fi color[static 4],
2237                                      uint32_t pkd_color[static 4])
2238 {
2239    const uint32_t red_width =
2240       vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0U);
2241    uint32_t pbe_pack_mode = pvr_get_pbe_packmode(format);
2242    const bool pbe_norm = pvr_vk_format_is_fully_normalized(format);
2243 
2244    if (pbe_pack_mode == ROGUE_PBESTATE_PACKMODE_INVALID)
2245       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2246 
2247    /* Set packed color based on PBE pack mode and PBE norm. */
2248    switch (pbe_pack_mode) {
2249    case ROGUE_PBESTATE_PACKMODE_U8U8U8U8:
2250    case ROGUE_PBESTATE_PACKMODE_A8R3G3B2:
2251       if (pbe_norm) {
2252          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2253          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2254          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2255          pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 8) & 0xFFU) << 24;
2256       } else {
2257          pkd_color[0] = color[0].ui & 0xFFU;
2258          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2259          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2260          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2261       }
2262       break;
2263 
2264    case ROGUE_PBESTATE_PACKMODE_S8S8S8S8:
2265    case ROGUE_PBESTATE_PACKMODE_X8U8S8S8:
2266    case ROGUE_PBESTATE_PACKMODE_X8S8S8U8:
2267       if (pbe_norm) {
2268          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2269          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2270          pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2271          pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2272       } else {
2273          pkd_color[0] = color[0].ui & 0xFFU;
2274          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2275          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2276          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2277       }
2278       break;
2279 
2280    case ROGUE_PBESTATE_PACKMODE_U16U16U16U16:
2281       if (pbe_norm) {
2282          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2283          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2284          pkd_color[1] = pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU;
2285          pkd_color[1] |= (pvr_float_to_ufixed(color[3].f, 16) & 0xFFFFU) << 16;
2286       } else {
2287          pkd_color[0] = color[0].ui & 0xFFFFU;
2288          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2289          pkd_color[1] = color[2].ui & 0xFFFFU;
2290          pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2291       }
2292       break;
2293 
2294    case ROGUE_PBESTATE_PACKMODE_S16S16S16S16:
2295       if (pbe_norm) {
2296          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2297          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2298          pkd_color[1] = (pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU);
2299          pkd_color[1] |= (pvr_float_to_sfixed(color[3].f, 16) & 0xFFFFU) << 16;
2300       } else {
2301          pkd_color[0] = color[0].ui & 0xFFFFU;
2302          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2303          pkd_color[1] = color[2].ui & 0xFFFFU;
2304          pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2305       }
2306       break;
2307 
2308    case ROGUE_PBESTATE_PACKMODE_A2_XRBIAS_U10U10U10:
2309    case ROGUE_PBESTATE_PACKMODE_ARGBV16_XR10:
2310    case ROGUE_PBESTATE_PACKMODE_F16F16F16F16:
2311    case ROGUE_PBESTATE_PACKMODE_A2R10B10G10:
2312    case ROGUE_PBESTATE_PACKMODE_A4R4G4B4:
2313    case ROGUE_PBESTATE_PACKMODE_A1R5G5B5:
2314    case ROGUE_PBESTATE_PACKMODE_R5G5B5A1:
2315    case ROGUE_PBESTATE_PACKMODE_R5G6B5:
2316       if (red_width > 0) {
2317          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2318          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2319          pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2320          pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2321       } else {
2322          /* Swizzle only uses first channel for alpha formats. */
2323          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[3].f, false);
2324       }
2325       break;
2326 
2327    case ROGUE_PBESTATE_PACKMODE_U32U32U32U32:
2328       pkd_color[0] = color[0].ui;
2329       pkd_color[1] = color[1].ui;
2330       pkd_color[2] = color[2].ui;
2331       pkd_color[3] = color[3].ui;
2332       break;
2333 
2334    case ROGUE_PBESTATE_PACKMODE_S32S32S32S32:
2335       pkd_color[0] = (uint32_t)color[0].i;
2336       pkd_color[1] = (uint32_t)color[1].i;
2337       pkd_color[2] = (uint32_t)color[2].i;
2338       pkd_color[3] = (uint32_t)color[3].i;
2339       break;
2340 
2341    case ROGUE_PBESTATE_PACKMODE_F32F32F32F32:
2342       memcpy(pkd_color, &color[0].f, 4U * sizeof(float));
2343       break;
2344 
2345    case ROGUE_PBESTATE_PACKMODE_R10B10G10A2:
2346       if (pbe_norm) {
2347          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 10) & 0xFFU;
2348          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 10) & 0xFFU) << 10;
2349          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 10) & 0xFFU) << 20;
2350          pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 2) & 0xFFU) << 30;
2351       } else if (format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
2352          pkd_color[0] = color[2].ui & 0x3FFU;
2353          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2354          pkd_color[0] |= (color[0].ui & 0x3FFU) << 20;
2355          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2356       } else {
2357          pkd_color[0] = color[0].ui & 0x3FFU;
2358          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2359          pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2360          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2361       }
2362 
2363       break;
2364 
2365    case ROGUE_PBESTATE_PACKMODE_A2F10F10F10:
2366    case ROGUE_PBESTATE_PACKMODE_F10F10F10A2:
2367       pkd_color[0] = pvr_float_to_sfixed(color[0].f, 10) & 0xFFU;
2368       pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 10) & 0xFFU) << 10;
2369       pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 10) & 0xFFU) << 20;
2370       pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 2) & 0xFFU) << 30;
2371       break;
2372 
2373    case ROGUE_PBESTATE_PACKMODE_U8U8U8:
2374    case ROGUE_PBESTATE_PACKMODE_R5SG5SB6:
2375       if (pbe_norm) {
2376          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2377          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2378          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2379       } else {
2380          pkd_color[0] = color[0].ui & 0xFFU;
2381          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2382          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2383       }
2384       break;
2385 
2386    case ROGUE_PBESTATE_PACKMODE_S8S8S8:
2387    case ROGUE_PBESTATE_PACKMODE_B6G5SR5S:
2388       if (pbe_norm) {
2389          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
2390          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
2391          pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
2392       } else {
2393          pkd_color[0] = color[0].ui & 0xFFU;
2394          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2395          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2396       }
2397       break;
2398 
2399    case ROGUE_PBESTATE_PACKMODE_U16U16U16:
2400       if (pbe_norm) {
2401          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2402          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2403          pkd_color[1] = (pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU);
2404       } else {
2405          pkd_color[0] = color[0].ui & 0xFFFFU;
2406          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2407          pkd_color[1] = color[2].ui & 0xFFFFU;
2408       }
2409       break;
2410 
2411    case ROGUE_PBESTATE_PACKMODE_S16S16S16:
2412       if (pbe_norm) {
2413          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2414          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2415          pkd_color[1] = pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU;
2416       } else {
2417          pkd_color[0] = color[0].ui & 0xFFFFU;
2418          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2419          pkd_color[1] = color[2].ui & 0xFFFFU;
2420       }
2421       break;
2422 
2423    case ROGUE_PBESTATE_PACKMODE_F16F16F16:
2424    case ROGUE_PBESTATE_PACKMODE_F11F11F10:
2425    case ROGUE_PBESTATE_PACKMODE_F10F11F11:
2426    case ROGUE_PBESTATE_PACKMODE_SE9995:
2427       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2428       pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2429       pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, true);
2430       break;
2431 
2432    case ROGUE_PBESTATE_PACKMODE_U32U32U32:
2433       pkd_color[0] = color[0].ui;
2434       pkd_color[1] = color[1].ui;
2435       pkd_color[2] = color[2].ui;
2436       break;
2437 
2438    case ROGUE_PBESTATE_PACKMODE_S32S32S32:
2439       pkd_color[0] = (uint32_t)color[0].i;
2440       pkd_color[1] = (uint32_t)color[1].i;
2441       pkd_color[2] = (uint32_t)color[2].i;
2442       break;
2443 
2444    case ROGUE_PBESTATE_PACKMODE_X24G8X32:
2445    case ROGUE_PBESTATE_PACKMODE_U8X24:
2446       pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2447       break;
2448 
2449    case ROGUE_PBESTATE_PACKMODE_F32F32F32:
2450       memcpy(pkd_color, &color[0].f, 3U * sizeof(float));
2451       break;
2452 
2453    case ROGUE_PBESTATE_PACKMODE_U8U8:
2454       if (pbe_norm) {
2455          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2456          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2457       } else {
2458          pkd_color[0] = color[0].ui & 0xFFU;
2459          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2460       }
2461       break;
2462 
2463    case ROGUE_PBESTATE_PACKMODE_S8S8:
2464       if (pbe_norm) {
2465          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2466          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2467       } else {
2468          pkd_color[0] = color[0].ui & 0xFFU;
2469          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2470          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2471          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2472       }
2473       break;
2474 
2475    case ROGUE_PBESTATE_PACKMODE_U16U16:
2476       if (pbe_norm) {
2477          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2478          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2479       } else {
2480          pkd_color[0] = color[0].ui & 0xFFFFU;
2481          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2482       }
2483       break;
2484 
2485    case ROGUE_PBESTATE_PACKMODE_S16S16:
2486       if (pbe_norm) {
2487          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2488          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2489       } else {
2490          pkd_color[0] = color[0].ui & 0xFFFFU;
2491          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2492       }
2493       break;
2494 
2495    case ROGUE_PBESTATE_PACKMODE_F16F16:
2496       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2497       pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2498       break;
2499 
2500    case ROGUE_PBESTATE_PACKMODE_U32U32:
2501       pkd_color[0] = color[0].ui;
2502       pkd_color[1] = color[1].ui;
2503       break;
2504 
2505    case ROGUE_PBESTATE_PACKMODE_S32S32:
2506       pkd_color[0] = (uint32_t)color[0].i;
2507       pkd_color[1] = (uint32_t)color[1].i;
2508       break;
2509 
2510    case ROGUE_PBESTATE_PACKMODE_X24U8F32:
2511    case ROGUE_PBESTATE_PACKMODE_X24X8F32:
2512       memcpy(pkd_color, &color[0].f, 1U * sizeof(float));
2513       pkd_color[1] = color[1].ui & 0xFFU;
2514       break;
2515 
2516    case ROGUE_PBESTATE_PACKMODE_F32F32:
2517       memcpy(pkd_color, &color[0].f, 2U * sizeof(float));
2518       break;
2519 
2520    case ROGUE_PBESTATE_PACKMODE_ST8U24:
2521       pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2522       pkd_color[0] |= color[1].ui << 24;
2523       break;
2524 
2525    case ROGUE_PBESTATE_PACKMODE_U8:
2526       if (format == VK_FORMAT_S8_UINT)
2527          pkd_color[0] = color[1].ui & 0xFFU;
2528       else if (pbe_norm)
2529          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2530       else
2531          pkd_color[0] = color[0].ui & 0xFFU;
2532 
2533       break;
2534 
2535    case ROGUE_PBESTATE_PACKMODE_S8:
2536       if (pbe_norm)
2537          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2538       else
2539          pkd_color[0] = color[0].ui & 0xFFU;
2540       break;
2541 
2542    case ROGUE_PBESTATE_PACKMODE_U16:
2543       if (pbe_norm)
2544          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2545       else
2546          pkd_color[0] = color[0].ui & 0xFFFFU;
2547       break;
2548 
2549    case ROGUE_PBESTATE_PACKMODE_S16:
2550       if (pbe_norm)
2551          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2552       else
2553          pkd_color[0] = color[0].ui & 0xFFFFU;
2554       break;
2555 
2556    case ROGUE_PBESTATE_PACKMODE_F16:
2557       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2558       break;
2559 
2560    /* U32 */
2561    case ROGUE_PBESTATE_PACKMODE_U32:
2562       if (format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2563          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2564       } else if (format == VK_FORMAT_D24_UNORM_S8_UINT) {
2565          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2566          pkd_color[0] |= (color[1].ui & 0xFFU) << 24;
2567       } else if (format == VK_FORMAT_A2B10G10R10_UINT_PACK32) {
2568          pkd_color[0] = color[0].ui & 0x3FFU;
2569          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2570          pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2571          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2572       } else {
2573          pkd_color[0] = color[0].ui;
2574       }
2575       break;
2576 
2577    /* U24ST8 */
2578    case ROGUE_PBESTATE_PACKMODE_U24ST8:
2579       pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2580       pkd_color[1] |= pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2581       break;
2582 
2583    /* S32 */
2584    case ROGUE_PBESTATE_PACKMODE_S32:
2585       pkd_color[0] = (uint32_t)color[0].i;
2586       break;
2587 
2588    /* F32 */
2589    case ROGUE_PBESTATE_PACKMODE_F32:
2590       memcpy(pkd_color, &color[0].f, sizeof(float));
2591       break;
2592 
2593    /* X8U24 */
2594    case ROGUE_PBESTATE_PACKMODE_X8U24:
2595       pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2596       break;
2597 
2598    default:
2599       break;
2600    }
2601 
2602    return VK_SUCCESS;
2603 }
2604 
2605 static VkResult
pvr_isp_scan_direction(struct pvr_transfer_cmd * transfer_cmd,bool custom_mapping,enum ROGUE_CR_DIR_TYPE * const dir_type_out)2606 pvr_isp_scan_direction(struct pvr_transfer_cmd *transfer_cmd,
2607                        bool custom_mapping,
2608                        enum ROGUE_CR_DIR_TYPE *const dir_type_out)
2609 {
2610    pvr_dev_addr_t dst_dev_addr = transfer_cmd->dst.dev_addr;
2611    bool backwards_in_x = false;
2612    bool backwards_in_y = false;
2613    bool done_dest_rect = false;
2614    VkRect2D dst_rect;
2615    int32_t dst_x1;
2616    int32_t dst_y1;
2617 
2618    for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
2619       struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
2620       pvr_dev_addr_t src_dev_addr = src->surface.dev_addr;
2621 
2622       if (src_dev_addr.addr == dst_dev_addr.addr && !custom_mapping) {
2623          VkRect2D *src_rect = &src->mappings[0].src_rect;
2624          int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
2625          int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
2626 
2627          if (!done_dest_rect) {
2628             dst_rect = src->mappings[0].dst_rect;
2629 
2630             dst_x1 = dst_rect.offset.x + dst_rect.extent.width;
2631             dst_y1 = dst_rect.offset.y + dst_rect.extent.height;
2632 
2633             done_dest_rect = true;
2634          }
2635 
2636          if ((dst_rect.offset.x < src_x1 && dst_x1 > src_rect->offset.x) &&
2637              (dst_rect.offset.y < src_y1 && dst_y1 > src_rect->offset.y)) {
2638             if (src_rect->extent.width != dst_rect.extent.width ||
2639                 src_rect->extent.height != dst_rect.extent.height) {
2640                /* Scaling is not possible. */
2641                return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2642             }
2643 
2644             /* Direction is to the right. */
2645             backwards_in_x = dst_rect.offset.x > src_rect->offset.x;
2646 
2647             /* Direction is to the bottom. */
2648             backwards_in_y = dst_rect.offset.y > src_rect->offset.y;
2649          }
2650       }
2651    }
2652 
2653    if (backwards_in_x) {
2654       if (backwards_in_y)
2655          *dir_type_out = ROGUE_CR_DIR_TYPE_BR2TL;
2656       else
2657          *dir_type_out = ROGUE_CR_DIR_TYPE_TR2BL;
2658    } else {
2659       if (backwards_in_y)
2660          *dir_type_out = ROGUE_CR_DIR_TYPE_BL2TR;
2661       else
2662          *dir_type_out = ROGUE_CR_DIR_TYPE_TL2BR;
2663    }
2664 
2665    return VK_SUCCESS;
2666 }
2667 
pvr_3d_copy_blit_core(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)2668 static VkResult pvr_3d_copy_blit_core(struct pvr_transfer_ctx *ctx,
2669                                       struct pvr_transfer_cmd *transfer_cmd,
2670                                       struct pvr_transfer_prep_data *prep_data,
2671                                       uint32_t pass_idx,
2672                                       bool *finished_out)
2673 {
2674    struct pvr_transfer_3d_state *const state = &prep_data->state;
2675    struct pvr_winsys_transfer_regs *const regs = &state->regs;
2676    struct pvr_device *const device = ctx->device;
2677    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
2678 
2679    VkResult result;
2680 
2681    *finished_out = true;
2682 
2683    state->common_ptr = 0U;
2684    state->dynamic_const_reg_ptr = 0U;
2685    state->usc_const_reg_ptr = 0U;
2686 
2687    if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
2688       uint32_t packed_color[4U] = { 0U };
2689 
2690       if (transfer_cmd->source_count != 0U)
2691          return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2692 
2693       if (vk_format_is_compressed(transfer_cmd->dst.vk_format))
2694          return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2695 
2696       /* No shader. */
2697       state->pds_temps = 0U;
2698       state->uniform_data_size = 0U;
2699       state->tex_state_data_size = 0U;
2700 
2701       /* No background enabled. */
2702       /* clang-format off */
2703       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2704       /* clang-format on */
2705       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
2706          reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2707       }
2708 
2709       result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
2710                                     transfer_cmd->clear_color,
2711                                     packed_color);
2712       if (result != VK_SUCCESS)
2713          return result;
2714 
2715       pvr_csb_pack (&regs->usc_clear_register0, CR_USC_CLEAR_REGISTER, reg) {
2716          reg.val = packed_color[0U];
2717       }
2718 
2719       pvr_csb_pack (&regs->usc_clear_register1, CR_USC_CLEAR_REGISTER, reg) {
2720          reg.val = packed_color[1U];
2721       }
2722 
2723       pvr_csb_pack (&regs->usc_clear_register2, CR_USC_CLEAR_REGISTER, reg) {
2724          reg.val = packed_color[2U];
2725       }
2726 
2727       pvr_csb_pack (&regs->usc_clear_register3, CR_USC_CLEAR_REGISTER, reg) {
2728          reg.val = packed_color[3U];
2729       }
2730 
2731       state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2732       state->pds_shader_task_offset = 0U;
2733       state->uni_tex_code_offset = 0U;
2734       state->tex_state_data_offset = 0U;
2735    } else if (transfer_cmd->source_count > 0U) {
2736       const struct pvr_tq_frag_sh_reg_layout nop_sh_reg_layout = {
2737          /* TODO: Setting this to 1 so that we don't try to pvr_bo_alloc() with
2738           * zero size. The device will ignore the PDS program if USC_SHAREDSIZE
2739           * is zero and in the case of the nop shader we're expecting it to be
2740           * zero. See if we can safely pass PVR_DEV_ADDR_INVALID for the unitex
2741           * program.
2742           */
2743          .driver_total = 1,
2744       };
2745       const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
2746       struct pvr_pds_pixel_shader_sa_program unitex_prog = { 0U };
2747       uint32_t tex_state_dma_size_dw;
2748       struct pvr_suballoc_bo *pvr_bo;
2749       uint32_t *dma_space;
2750 
2751       result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2752       if (result != VK_SUCCESS)
2753          return result;
2754 
2755       pvr_uv_space(dev_info, transfer_cmd, state);
2756 
2757       state->shader_props.iterated = false;
2758 
2759       state->shader_props.layer_props.sample =
2760          transfer_cmd->sources[0].surface.mem_layout ==
2761          PVR_MEMLAYOUT_3DTWIDDLED;
2762 
2763       result = pvr_msaa_state(dev_info, transfer_cmd, state, 0);
2764       if (result != VK_SUCCESS)
2765          return result;
2766 
2767       state->shader_props.pick_component =
2768          pvr_pick_component_needed(&state->custom_mapping);
2769 
2770       if (state->filter[0] == PVR_FILTER_LINEAR &&
2771           pvr_requires_usc_linear_filter(
2772              transfer_cmd->sources[0].surface.vk_format)) {
2773          if (pvr_int_pbe_usc_linear_filter(
2774                 state->shader_props.layer_props.pbe_format,
2775                 state->shader_props.layer_props.sample,
2776                 state->shader_props.layer_props.msaa,
2777                 state->shader_props.full_rate)) {
2778             state->shader_props.layer_props.linear = true;
2779          } else {
2780             mesa_logw("Transfer: F32 linear filter not supported.");
2781          }
2782       }
2783 
2784       if (state->empty_dst) {
2785          sh_reg_layout = &nop_sh_reg_layout;
2786          state->pds_shader_task_offset = device->nop_program.pds.data_offset;
2787       } else {
2788          pvr_dev_addr_t kick_usc_pds_dev_addr;
2789 
2790          result =
2791             pvr_transfer_frag_store_get_shader_info(device,
2792                                                     &ctx->frag_store,
2793                                                     &state->shader_props,
2794                                                     &kick_usc_pds_dev_addr,
2795                                                     &sh_reg_layout);
2796          if (result != VK_SUCCESS)
2797             return result;
2798 
2799          assert(kick_usc_pds_dev_addr.addr <= UINT32_MAX);
2800          state->pds_shader_task_offset = (uint32_t)kick_usc_pds_dev_addr.addr;
2801       }
2802 
2803       unitex_prog.kick_usc = false;
2804       unitex_prog.clear = false;
2805 
2806       tex_state_dma_size_dw =
2807          sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
2808 
2809       unitex_prog.num_texture_dma_kicks = 1U;
2810       unitex_prog.num_uniform_dma_kicks = 0U;
2811 
2812       result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2813                                         device->heaps.general_heap,
2814                                         PVR_DW_TO_BYTES(tex_state_dma_size_dw),
2815                                         &pvr_bo);
2816       if (result != VK_SUCCESS)
2817          return result;
2818 
2819       dma_space = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
2820 
2821       result = pvr_sampler_image_state(ctx,
2822                                        transfer_cmd,
2823                                        sh_reg_layout,
2824                                        state,
2825                                        dma_space);
2826       if (result != VK_SUCCESS)
2827          return result;
2828 
2829       pvr_dma_texture_floats(transfer_cmd, state, sh_reg_layout, dma_space);
2830 
2831       if (transfer_cmd->sources[0].surface.mem_layout ==
2832           PVR_MEMLAYOUT_3DTWIDDLED) {
2833          dma_space[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2834             fui(transfer_cmd->sources[0].surface.z_position);
2835       }
2836 
2837       pvr_write_usc_constants(sh_reg_layout, dma_space);
2838 
2839       if (pvr_pick_component_needed(&state->custom_mapping))
2840          pvr_dma_texel_unwind(state, sh_reg_layout, dma_space);
2841 
2842       pvr_pds_encode_dma_burst(unitex_prog.texture_dma_control,
2843                                unitex_prog.texture_dma_address,
2844                                state->common_ptr,
2845                                tex_state_dma_size_dw,
2846                                pvr_bo->dev_addr.addr,
2847                                true,
2848                                dev_info);
2849 
2850       state->common_ptr += tex_state_dma_size_dw;
2851 
2852       result =
2853          pvr_pds_unitex(dev_info, ctx, transfer_cmd, &unitex_prog, prep_data);
2854       if (result != VK_SUCCESS)
2855          return result;
2856 
2857       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg) {
2858          reg.enablebgtag = true;
2859       }
2860    } else {
2861       /* No shader. */
2862       state->pds_temps = 0U;
2863       state->uniform_data_size = 0U;
2864       state->tex_state_data_size = 0U;
2865 
2866       /* No background enabled. */
2867       /* clang-format off */
2868       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2869       /* clang-format on */
2870       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
2871          reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2872       }
2873       state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2874       state->pds_shader_task_offset = 0U;
2875       state->uni_tex_code_offset = 0U;
2876       state->tex_state_data_offset = 0U;
2877 
2878       result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2879       if (result != VK_SUCCESS)
2880          return result;
2881    }
2882 
2883    pvr_setup_hwbg_object(dev_info, state);
2884 
2885    pvr_csb_pack (&regs->isp_render, CR_ISP_RENDER, reg) {
2886       reg.mode_type = ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_SCALE;
2887 
2888       result = pvr_isp_scan_direction(transfer_cmd,
2889                                       state->custom_mapping.pass_count,
2890                                       &reg.dir_type);
2891       if (result != VK_SUCCESS)
2892          return result;
2893    }
2894 
2895    /* Set up pixel event handling. */
2896    result = pvr_pbe_setup(transfer_cmd, ctx, state);
2897    if (result != VK_SUCCESS)
2898       return result;
2899 
2900    result = pvr_isp_tiles(device, state);
2901    if (result != VK_SUCCESS)
2902       return result;
2903 
2904    if (PVR_HAS_FEATURE(&device->pdevice->dev_info, gpu_multicore_support)) {
2905       pvr_csb_pack (&regs->frag_screen, CR_FRAG_SCREEN, reg) {
2906          reg.xmax = transfer_cmd->dst.width - 1;
2907          reg.ymax = transfer_cmd->dst.height - 1;
2908       }
2909    }
2910 
2911    if ((pass_idx + 1U) < state->custom_mapping.pass_count)
2912       *finished_out = false;
2913 
2914    return VK_SUCCESS;
2915 }
2916 
2917 static VkResult
pvr_pbe_src_format_f2d(uint32_t merge_flags,struct pvr_transfer_cmd_source * src,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * pixel_format_out)2918 pvr_pbe_src_format_f2d(uint32_t merge_flags,
2919                        struct pvr_transfer_cmd_source *src,
2920                        VkFormat dst_format,
2921                        bool down_scale,
2922                        bool dont_force_pbe,
2923                        enum pvr_transfer_pbe_pixel_src *pixel_format_out)
2924 {
2925    VkFormat src_format = src->surface.vk_format;
2926 
2927    /* This has to come before the rest as S8 for instance is integer and
2928     * signedsess check fails on D24S8.
2929     */
2930    if (vk_format_is_depth_or_stencil(src_format) ||
2931        vk_format_is_depth_or_stencil(dst_format) ||
2932        merge_flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
2933       return pvr_pbe_src_format_ds(&src->surface,
2934                                    src->filter,
2935                                    dst_format,
2936                                    merge_flags,
2937                                    down_scale,
2938                                    pixel_format_out);
2939    }
2940 
2941    return pvr_pbe_src_format_normal(src_format,
2942                                     dst_format,
2943                                     down_scale,
2944                                     dont_force_pbe,
2945                                     pixel_format_out);
2946 }
2947 
2948 /** Writes the coefficient loading PDS task. */
2949 static inline VkResult
pvr_pds_coeff_task(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const bool sample_3d,struct pvr_transfer_prep_data * prep_data)2950 pvr_pds_coeff_task(struct pvr_transfer_ctx *ctx,
2951                    const struct pvr_transfer_cmd *transfer_cmd,
2952                    const bool sample_3d,
2953                    struct pvr_transfer_prep_data *prep_data)
2954 {
2955    struct pvr_transfer_3d_state *state = &prep_data->state;
2956    struct pvr_pds_coeff_loading_program program = { 0U };
2957    struct pvr_suballoc_bo *pvr_bo;
2958    VkResult result;
2959 
2960    program.num_fpu_iterators = 1U;
2961 
2962    pvr_csb_pack (&program.FPU_iterators[0U],
2963                  PDSINST_DOUT_FIELDS_DOUTI_SRC,
2964                  reg) {
2965       if (sample_3d)
2966          reg.size = ROGUE_PDSINST_DOUTI_SIZE_3D;
2967       else
2968          reg.size = ROGUE_PDSINST_DOUTI_SIZE_2D;
2969 
2970       reg.perspective = false;
2971 
2972       /* Varying wrap on the TSP means that the TSP chooses the shorter path
2973        * out of the normal and the wrapping path i.e. chooses between u0->u1
2974        * and u1->1.0 == 0.0 -> u0. We don't need this behavior.
2975        */
2976       /*
2977        * if RHW ever needed offset SRC_F32 to the first U in 16 bit units
2978        * l0 U    <= offs 0
2979        * l0 V
2980        * l1 U    <= offs 4
2981        * ...
2982        */
2983       reg.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
2984       reg.f32_offset = 0U;
2985    }
2986 
2987    if (sample_3d)
2988       state->usc_coeff_regs = 12U;
2989    else
2990       state->usc_coeff_regs = 8U;
2991 
2992    pvr_pds_set_sizes_coeff_loading(&program);
2993 
2994    result = pvr_cmd_buffer_alloc_mem(
2995       transfer_cmd->cmd_buffer,
2996       ctx->device->heaps.pds_heap,
2997       PVR_DW_TO_BYTES(program.data_size + program.code_size),
2998       &pvr_bo);
2999    if (result != VK_SUCCESS)
3000       return result;
3001 
3002    state->pds_coeff_task_offset =
3003       pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
3004 
3005    pvr_pds_generate_coeff_loading_program(&program,
3006                                           pvr_bo_suballoc_get_map_addr(pvr_bo));
3007 
3008    state->coeff_data_size = program.data_size;
3009    state->pds_temps = program.temps_used;
3010 
3011    return VK_SUCCESS;
3012 }
3013 
3014 #define X 0U
3015 #define Y 1U
3016 #define Z 2U
3017 
pvr_tsp_floats(const struct pvr_device_info * dev_info,VkRect2D * rect,const float recips[3U],bool custom_filter,bool z_present,float z_value,struct pvr_transfer_3d_iteration * layer)3018 static void pvr_tsp_floats(const struct pvr_device_info *dev_info,
3019                            VkRect2D *rect,
3020                            const float recips[3U],
3021                            bool custom_filter,
3022                            bool z_present,
3023                            float z_value,
3024                            struct pvr_transfer_3d_iteration *layer)
3025 {
3026 #define U0 0U
3027 #define U1 1U
3028 #define V0 2U
3029 #define V1 3U
3030 
3031    const uint32_t indices[8U] = { U0, V0, U0, V1, U1, V1, U1, V0 };
3032    float delta[2U] = { 0.0f, 0.0f };
3033    int32_t non_normalized[4U];
3034    uint32_t src_flipped[2U];
3035    uint32_t normalized[4U];
3036    int32_t src_span[2U];
3037 
3038    non_normalized[U0] = rect->offset.x;
3039    non_normalized[U1] = rect->offset.x + rect->extent.width;
3040    non_normalized[V0] = rect->offset.y;
3041    non_normalized[V1] = rect->offset.y + rect->extent.height;
3042 
3043    /* Filter adjust. */
3044    src_span[X] = rect->extent.width;
3045    src_flipped[X] = src_span[X] > 0U ? 0U : 1U;
3046    src_span[Y] = rect->extent.height;
3047    src_flipped[Y] = src_span[Y] > 0U ? 0U : 1U;
3048    /*
3049     * | X  | Y  | srcFlipX | srcFlipY |
3050     * +----+----+----------+----------|
3051     * | X  | Y  | 0        | 0        |
3052     * | -X | Y  | 1        | 0        |
3053     * | X  | -Y | 0        | 1        |
3054     * | -X | -Y | 1        | 1        |
3055     */
3056    for (uint32_t i = X; i <= Y; i++) {
3057       if (custom_filter) {
3058          if (src_flipped[i] != 0U)
3059             delta[i] += 0.25;
3060          else
3061             delta[i] -= 0.25;
3062       }
3063    }
3064 
3065    /* Normalize. */
3066    for (uint32_t i = 0U; i < ARRAY_SIZE(normalized); i++) {
3067       uint32_t tmp;
3068       float ftmp;
3069 
3070       ftmp = (float)non_normalized[i] + delta[i >> 1U];
3071       ftmp *= recips[i >> 1U];
3072 
3073       tmp = fui(ftmp);
3074       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3075          tmp = XXH_rotl32(tmp, 1U);
3076 
3077       normalized[i] = tmp;
3078    }
3079 
3080    /* Apply indices. */
3081    for (uint32_t i = 0U; i < 8U; i++)
3082       layer->texture_coords[i] = normalized[indices[i]];
3083 
3084    if (z_present) {
3085       uint32_t tmp = fui(z_value * recips[2U]);
3086 
3087       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3088          tmp = XXH_rotl32(tmp, 1U);
3089 
3090       for (uint32_t i = 8U; i < 12U; i++)
3091          layer->texture_coords[i] = tmp;
3092    }
3093 
3094 #undef U0
3095 #undef U1
3096 #undef V0
3097 #undef V1
3098 }
3099 
3100 static void
pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,struct pvr_rect_mapping * mappings,bool custom_filter,uint32_t num_mappings,uint32_t mapping_offset,uint32_t tsp_comp_format_in_dw,uint32_t ** const cs_ptr_out)3101 pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info *dev_info,
3102                                     const struct pvr_transfer_cmd_source *src,
3103                                     struct pvr_rect_mapping *mappings,
3104                                     bool custom_filter,
3105                                     uint32_t num_mappings,
3106                                     uint32_t mapping_offset,
3107                                     uint32_t tsp_comp_format_in_dw,
3108                                     uint32_t **const cs_ptr_out)
3109 {
3110    struct pvr_transfer_3d_iteration layer;
3111    uint32_t *cs_ptr = *cs_ptr_out;
3112 
3113    /*  |<-32b->|
3114     *  +-------+-----
3115     *  |  RHW  |    | X num_isp_vertices
3116     *  +-------+--  |
3117     *  |  U    | |  |
3118     *  |  V    | | X PVR_TRANSFER_NUM_LAYERS
3119     *  +-------+-----
3120     *
3121     * RHW is not there any more in the Transfer. The comment still explains
3122     * where it should go if ever needed.
3123     */
3124    for (uint32_t i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3125       bool z_present = src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
3126       const float recips[3U] = {
3127          [X] = 1.0f / (float)src->surface.width,
3128          [Y] = 1.0f / (float)src->surface.height,
3129          [Z] = z_present ? 1.0f / (float)src->surface.depth : 0.0f,
3130       };
3131       float z_pos = (src->filter < PVR_FILTER_LINEAR)
3132                        ? floor(src->surface.z_position) + 0.5f
3133                        : src->surface.z_position;
3134 
3135       pvr_tsp_floats(dev_info,
3136                      &mappings[i].src_rect,
3137                      recips,
3138                      custom_filter,
3139                      z_present,
3140                      z_pos,
3141                      &layer);
3142 
3143       /* We request UVs from TSP for ISP triangle:
3144        *  0 u 1
3145        *  +---,
3146        * v|  /|
3147        *  | / |
3148        * 2'/--'3
3149        */
3150       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3151          *cs_ptr++ = layer.texture_coords[0U];
3152          *cs_ptr++ = layer.texture_coords[1U];
3153       }
3154 
3155       if (z_present) {
3156          *cs_ptr++ = layer.texture_coords[8U];
3157          *cs_ptr++ = 0U;
3158       }
3159 
3160       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3161          *cs_ptr++ = layer.texture_coords[6U];
3162          *cs_ptr++ = layer.texture_coords[7U];
3163       }
3164 
3165       if (z_present) {
3166          *cs_ptr++ = layer.texture_coords[11U];
3167          *cs_ptr++ = 0U;
3168       }
3169 
3170       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3171          *cs_ptr++ = layer.texture_coords[2U];
3172          *cs_ptr++ = layer.texture_coords[3U];
3173       }
3174 
3175       if (z_present) {
3176          *cs_ptr++ = layer.texture_coords[9U];
3177          *cs_ptr++ = 0U;
3178       }
3179 
3180       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3181          *cs_ptr++ = layer.texture_coords[4U];
3182          *cs_ptr++ = layer.texture_coords[5U];
3183       }
3184 
3185       if (z_present) {
3186          *cs_ptr++ = layer.texture_coords[10U];
3187          *cs_ptr++ = 0U;
3188       }
3189    }
3190 
3191    if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3192       /* Skipped optional primitive id. */
3193       for (uint32_t i = 0U; i < tsp_comp_format_in_dw; i++)
3194          *cs_ptr++ = 0x88888888U;
3195    } else {
3196       /* Align back to 64 bits. */
3197       if (((uintptr_t)cs_ptr & 7U) != 0U)
3198          cs_ptr++;
3199    }
3200 
3201    *cs_ptr_out = cs_ptr;
3202 }
3203 
3204 #undef X
3205 #undef Y
3206 #undef Z
3207 
pvr_isp_prim_block_pds_state(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t ** const cs_ptr_out)3208 static void pvr_isp_prim_block_pds_state(const struct pvr_device_info *dev_info,
3209                                          struct pvr_transfer_ctx *ctx,
3210                                          struct pvr_transfer_3d_state *state,
3211                                          uint32_t **const cs_ptr_out)
3212 {
3213    uint32_t *cs_ptr = *cs_ptr_out;
3214 
3215    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SHADERBASE, shader_base) {
3216       shader_base.addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
3217    }
3218    cs_ptr++;
3219 
3220    pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXUNICODEBASE, tex_base) {
3221       tex_base.addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
3222    }
3223    cs_ptr++;
3224 
3225    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO1, info1) {
3226       info1.pds_uniformsize =
3227          state->uniform_data_size /
3228          ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE;
3229 
3230       info1.pds_texturestatesize =
3231          state->tex_state_data_size /
3232          ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE;
3233 
3234       info1.pds_varyingsize =
3235          state->coeff_data_size /
3236          ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE;
3237 
3238       info1.usc_varyingsize =
3239          ALIGN_POT(state->usc_coeff_regs,
3240                    ROGUE_TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE) /
3241          ROGUE_TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE;
3242 
3243       info1.pds_tempsize =
3244          ALIGN_POT(state->pds_temps,
3245                    ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE) /
3246          ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE;
3247    }
3248    cs_ptr++;
3249 
3250    pvr_csb_pack (cs_ptr, TA_STATE_PDS_VARYINGBASE, base) {
3251       base.addr = PVR_DEV_ADDR(state->pds_coeff_task_offset);
3252    }
3253    cs_ptr++;
3254 
3255    pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXTUREDATABASE, base) {
3256       base.addr = PVR_DEV_ADDR(state->tex_state_data_offset);
3257    }
3258    cs_ptr++;
3259 
3260    /* PDS uniform program not used. */
3261    pvr_csb_pack (cs_ptr, TA_STATE_PDS_UNIFORMDATABASE, base) {
3262       base.addr = PVR_DEV_ADDR(0U);
3263    }
3264    cs_ptr++;
3265 
3266    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO2, info) {
3267       info.usc_sharedsize =
3268          ALIGN_POT(state->common_ptr,
3269                    ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE) /
3270          ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE;
3271       info.pds_tri_merge_disable = !PVR_HAS_ERN(dev_info, 42307);
3272       info.pds_batchnum = 0U;
3273    }
3274    cs_ptr++;
3275 
3276    /* Get back to 64 bits boundary. */
3277    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3278       cs_ptr++;
3279 
3280    *cs_ptr_out = cs_ptr;
3281 }
3282 
pvr_isp_prim_block_isp_state(const struct pvr_device_info * dev_info,UNUSED uint32_t tsp_comp_format_in_dw,uint32_t tsp_data_size_in_bytes,uint32_t num_isp_vertices,bool read_bgnd,uint32_t ** const cs_ptr_out)3283 static void pvr_isp_prim_block_isp_state(const struct pvr_device_info *dev_info,
3284                                          UNUSED uint32_t tsp_comp_format_in_dw,
3285                                          uint32_t tsp_data_size_in_bytes,
3286                                          uint32_t num_isp_vertices,
3287                                          bool read_bgnd,
3288                                          uint32_t **const cs_ptr_out)
3289 {
3290    const bool has_simple_internal_parameter_format_v2 =
3291       PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2);
3292    uint32_t *cs_ptr = *cs_ptr_out;
3293 
3294    if (has_simple_internal_parameter_format_v2) {
3295       const uint32_t tsp_data_per_vrx_in_bytes =
3296          tsp_data_size_in_bytes / num_isp_vertices;
3297 
3298       pvr_csb_pack ((uint64_t *)cs_ptr,
3299                     IPF_VERTEX_FORMAT_WORD_SIPF2,
3300                     vert_fmt) {
3301          vert_fmt.vf_isp_state_size =
3302             pvr_cmd_length(TA_STATE_ISPCTL) + pvr_cmd_length(TA_STATE_ISPA);
3303 
3304          vert_fmt.vf_tsp_vtx_raw = true;
3305          vert_fmt.vf_isp_vtx_raw = true;
3306 
3307          vert_fmt.vf_varying_vertex_bits = tsp_data_per_vrx_in_bytes * 8U;
3308          vert_fmt.vf_primitive_total = (num_isp_vertices / 2U) - 1U;
3309          vert_fmt.vf_vertex_total = num_isp_vertices - 1U;
3310       }
3311       cs_ptr += pvr_cmd_length(IPF_VERTEX_FORMAT_WORD_SIPF2);
3312    }
3313 
3314    /* ISP state words. */
3315 
3316    /* clang-format off */
3317    pvr_csb_pack (cs_ptr, TA_STATE_ISPCTL, ispctl);
3318    /* clang-format on */
3319    cs_ptr += pvr_cmd_length(TA_STATE_ISPCTL);
3320 
3321    pvr_csb_pack (cs_ptr, TA_STATE_ISPA, ispa) {
3322       ispa.objtype = ROGUE_TA_OBJTYPE_TRIANGLE;
3323       ispa.passtype = read_bgnd ? ROGUE_TA_PASSTYPE_TRANSLUCENT
3324                                 : ROGUE_TA_PASSTYPE_OPAQUE;
3325       ispa.dcmpmode = ROGUE_TA_CMPMODE_ALWAYS;
3326       ispa.dwritedisable = true;
3327    }
3328    cs_ptr += pvr_cmd_length(TA_STATE_ISPA);
3329 
3330    if (has_simple_internal_parameter_format_v2) {
3331       *cs_ptr_out = cs_ptr;
3332       return;
3333    }
3334 
3335    /* How many bytes the TSP compression format needs? */
3336    pvr_csb_pack (cs_ptr, IPF_COMPRESSION_SIZE_WORD, word) {
3337       word.cs_isp_comp_table_size = 0U;
3338       word.cs_tsp_comp_format_size = tsp_comp_format_in_dw;
3339       word.cs_tsp_comp_table_size = 0U;
3340       word.cs_tsp_comp_vertex_size = tsp_data_size_in_bytes / num_isp_vertices;
3341    }
3342    cs_ptr += pvr_cmd_length(IPF_COMPRESSION_SIZE_WORD);
3343 
3344    /* ISP vertex compression. */
3345    pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_0, word0) {
3346       word0.cf_isp_comp_fmt_x0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3347       word0.cf_isp_comp_fmt_x1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3348       word0.cf_isp_comp_fmt_x2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3349       word0.cf_isp_comp_fmt_y0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3350       word0.cf_isp_comp_fmt_y1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3351       word0.cf_isp_comp_fmt_y2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3352       word0.cf_isp_comp_fmt_z0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3353       word0.cf_isp_comp_fmt_z1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3354    }
3355    cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_0);
3356 
3357    pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_1, word1) {
3358       word1.vf_prim_msaa = 0U;
3359       word1.vf_prim_id_pres = 0U;
3360       word1.vf_vertex_clipped = 0U;
3361       word1.vf_vertex_total = num_isp_vertices - 1U;
3362       word1.cf_isp_comp_fmt_z3 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3363       word1.cf_isp_comp_fmt_z2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3364    }
3365    cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_1);
3366 
3367    *cs_ptr_out = cs_ptr;
3368 }
3369 
3370 static void
pvr_isp_prim_block_index_block(const struct pvr_device_info * dev_info,uint32_t num_mappings,uint32_t ** const cs_ptr_out)3371 pvr_isp_prim_block_index_block(const struct pvr_device_info *dev_info,
3372                                uint32_t num_mappings,
3373                                uint32_t **const cs_ptr_out)
3374 {
3375    uint32_t *cs_ptr = *cs_ptr_out;
3376 
3377    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3378       for (uint32_t i = 0U; i < DIV_ROUND_UP(num_mappings, 2U); i++) {
3379          const uint32_t idx = i * 8U;
3380 
3381          pvr_csb_pack ((uint64_t *)cs_ptr,
3382                        IPF_INDEX_DATA_WORDS_SIPF,
3383                        idx_data_word) {
3384             idx_data_word.ix_triangle3_index_2 = idx + 5U;
3385             idx_data_word.ix_triangle3_index_1 = idx + 6U;
3386             idx_data_word.ix_triangle3_index_0 = idx + 7U;
3387 
3388             idx_data_word.ix_triangle2_index_2 = idx + 6U;
3389             idx_data_word.ix_triangle2_index_1 = idx + 5U;
3390             idx_data_word.ix_triangle2_index_0 = idx + 4U;
3391 
3392             idx_data_word.ix_triangle1_index_2 = idx + 1U;
3393             idx_data_word.ix_triangle1_index_1 = idx + 2U;
3394             idx_data_word.ix_triangle1_index_0 = idx + 3U;
3395 
3396             idx_data_word.ix_triangle0_index_2 = idx + 2U;
3397             idx_data_word.ix_triangle0_index_1 = idx + 1U;
3398             idx_data_word.ix_triangle0_index_0 = idx + 0U;
3399          }
3400          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA_WORDS_SIPF);
3401       }
3402 
3403       *cs_ptr_out = cs_ptr;
3404       return;
3405    }
3406 
3407    for (uint32_t i = 0U, j = 0U; i < num_mappings; i++, j += 4U) {
3408       if ((i & 1U) == 0U) {
3409          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3410             word.ix_index0_0 = j;
3411             word.ix_index0_1 = j + 1U;
3412             word.ix_index0_2 = j + 2U;
3413             word.ix_index1_0 = j + 3U;
3414          }
3415          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3416 
3417          /* Don't increment cs_ptr here. IPF_INDEX_DATA is patched in the
3418           * else part and then cs_ptr is incremented.
3419           */
3420          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3421             word.ix_index0_0 = j + 2U;
3422             word.ix_index0_1 = j + 1U;
3423          }
3424       } else {
3425          uint32_t tmp;
3426 
3427          pvr_csb_pack (&tmp, IPF_INDEX_DATA, word) {
3428             word.ix_index0_2 = j;
3429             word.ix_index1_0 = j + 1U;
3430          }
3431          *cs_ptr |= tmp;
3432          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3433 
3434          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3435             word.ix_index0_0 = j + 2U;
3436             word.ix_index0_1 = j + 3U;
3437             word.ix_index0_2 = j + 2U;
3438             word.ix_index1_0 = j + 1U;
3439          }
3440          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3441       }
3442    }
3443 
3444    /* The last pass didn't ++. */
3445    if ((num_mappings & 1U) != 0U)
3446       cs_ptr++;
3447 
3448    *cs_ptr_out = cs_ptr;
3449 }
3450 
3451 /* Calculates a 24 bit fixed point (biased) representation of a signed integer.
3452  */
3453 static inline VkResult
pvr_int32_to_isp_xy_vtx(const struct pvr_device_info * dev_info,int32_t val,bool bias,uint32_t * word_out)3454 pvr_int32_to_isp_xy_vtx(const struct pvr_device_info *dev_info,
3455                         int32_t val,
3456                         bool bias,
3457                         uint32_t *word_out)
3458 {
3459    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3460       const uint32_t max_fractional = ROGUE_IPF_ISP_VERTEX_XY_SIPF_FRAC_MAX_VAL;
3461       const uint32_t max_integer = ROGUE_IPF_ISP_VERTEX_XY_SIPF_INTEGER_MAX_VAL;
3462 
3463       uint32_t fractional;
3464       uint32_t integer;
3465 
3466       if (bias)
3467          val += ROGUE_IPF_ISP_VERTEX_XY_BIAS_VALUE_SIPF;
3468 
3469       if (val < 0 || val > max_integer + 1) {
3470          mesa_loge("ISP vertex xy value out of range.");
3471          return vk_error(NULL, VK_ERROR_UNKNOWN);
3472       }
3473 
3474       if (val <= max_integer) {
3475          integer = val;
3476          fractional = 0;
3477       } else if (val == max_integer + 1) {
3478          /* The integer field is 13 bits long so the max value is
3479           * 2 ^ 13 - 1 = 8191. For 8k support we need to handle 8192 so we set
3480           * all fractional bits to get as close as possible. The best we can do
3481           * is: 0x1FFF.F = 8191.9375 ≈ 8192 .
3482           */
3483          integer = max_integer;
3484          fractional = max_fractional;
3485       }
3486 
3487       pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY_SIPF, word) {
3488          word.integer = integer;
3489          word.frac = fractional;
3490       }
3491 
3492       return VK_SUCCESS;
3493    }
3494 
3495    val += ROGUE_IPF_ISP_VERTEX_XY_BIAS_VALUE;
3496 
3497    if (((uint32_t)val & 0x7fff8000U) != 0U)
3498       return vk_error(NULL, VK_ERROR_UNKNOWN);
3499 
3500    pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY, word) {
3501       word.sign = val < 0;
3502       word.integer = val;
3503    }
3504 
3505    return VK_SUCCESS;
3506 }
3507 
3508 static VkResult
pvr_isp_prim_block_isp_vertices(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,uint32_t ** const cs_ptr_out)3509 pvr_isp_prim_block_isp_vertices(const struct pvr_device_info *dev_info,
3510                                 struct pvr_transfer_3d_state *state,
3511                                 struct pvr_rect_mapping *mappings,
3512                                 uint32_t num_mappings,
3513                                 uint32_t mapping_offset,
3514                                 uint32_t **const cs_ptr_out)
3515 {
3516    uint32_t *cs_ptr = *cs_ptr_out;
3517    bool bias = true;
3518    uint32_t i;
3519 
3520    if (PVR_HAS_FEATURE(dev_info, screen_size8K))
3521       bias = state->width_in_tiles <= 256U && state->height_in_tiles <= 256U;
3522 
3523    for (i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3524       uint32_t bottom = 0U;
3525       uint32_t right = 0U;
3526       uint32_t left = 0U;
3527       uint32_t top = 0U;
3528       VkResult result;
3529 
3530       /* ISP vertex data (X, Y, Z). */
3531       result = pvr_int32_to_isp_xy_vtx(dev_info,
3532                                        mappings[i].dst_rect.offset.y,
3533                                        bias,
3534                                        &top);
3535       if (result != VK_SUCCESS)
3536          return result;
3537 
3538       result = pvr_int32_to_isp_xy_vtx(dev_info,
3539                                        mappings[i].dst_rect.offset.y +
3540                                           mappings[i].dst_rect.extent.height,
3541                                        bias,
3542                                        &bottom);
3543       if (result != VK_SUCCESS)
3544          return result;
3545 
3546       result = pvr_int32_to_isp_xy_vtx(dev_info,
3547                                        mappings[i].dst_rect.offset.x,
3548                                        bias,
3549                                        &left);
3550       if (result != VK_SUCCESS)
3551          return result;
3552 
3553       result = pvr_int32_to_isp_xy_vtx(dev_info,
3554                                        mappings[i].dst_rect.offset.x +
3555                                           mappings[i].dst_rect.extent.width,
3556                                        bias,
3557                                        &right);
3558       if (result != VK_SUCCESS)
3559          return result;
3560 
3561       if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3562          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3563             word.y = top;
3564             word.x = left;
3565          }
3566          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3567 
3568          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3569             word.y = top;
3570             word.x = right;
3571          }
3572          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3573 
3574          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3575             word.y = bottom;
3576             word.x = left;
3577          }
3578          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3579 
3580          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3581             word.y = bottom;
3582             word.x = right;
3583          }
3584          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3585 
3586          continue;
3587       }
3588 
3589       /* ISP vertices 0 and 1. */
3590       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3591          word0.x0 = left;
3592          word0.y0 = top & 0xFF;
3593       }
3594       cs_ptr++;
3595 
3596       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3597          word1.y0 = top >> ROGUE_IPF_ISP_VERTEX_WORD_1_Y0_SHIFT;
3598       }
3599       cs_ptr++;
3600 
3601       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3602          word2.x1 = right & 0xFFFF;
3603          word2.z0 = 0U;
3604       }
3605       cs_ptr++;
3606 
3607       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3608          word3.x1 = right >> ROGUE_IPF_ISP_VERTEX_WORD_3_X1_SHIFT;
3609          word3.y1 = top;
3610       }
3611       cs_ptr++;
3612 
3613       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3614          word4.z1 = 0U;
3615       }
3616       cs_ptr++;
3617 
3618       /* ISP vertices 2 and 3. */
3619       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3620          word0.x0 = left;
3621          word0.y0 = bottom & 0xFF;
3622       }
3623       cs_ptr++;
3624 
3625       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3626          word1.y0 = bottom >> ROGUE_IPF_ISP_VERTEX_WORD_1_Y0_SHIFT;
3627       }
3628       cs_ptr++;
3629 
3630       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3631          word2.x1 = right & 0xFFFF;
3632          word2.z0 = 0U;
3633       }
3634       cs_ptr++;
3635 
3636       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3637          word3.x1 = right >> ROGUE_IPF_ISP_VERTEX_WORD_3_X1_SHIFT;
3638          word3.y1 = bottom;
3639       }
3640       cs_ptr++;
3641 
3642       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3643          word4.z1 = 0U;
3644       }
3645       cs_ptr++;
3646    }
3647    *cs_ptr_out = cs_ptr;
3648 
3649    return VK_SUCCESS;
3650 }
3651 
3652 static uint32_t
pvr_isp_primitive_block_size(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,uint32_t num_mappings)3653 pvr_isp_primitive_block_size(const struct pvr_device_info *dev_info,
3654                              const struct pvr_transfer_cmd_source *src,
3655                              uint32_t num_mappings)
3656 {
3657    uint32_t num_isp_vertices = num_mappings * 4U;
3658    uint32_t num_tsp_vertices_per_isp_vertex;
3659    uint32_t isp_vertex_data_size_dw;
3660    bool color_fill = (src == NULL);
3661    uint32_t tsp_comp_format_dw;
3662    uint32_t isp_state_size_dw;
3663    uint32_t pds_state_size_dw;
3664    uint32_t idx_data_size_dw;
3665    uint32_t tsp_data_size;
3666    uint32_t stream_size;
3667 
3668    if (color_fill) {
3669       num_tsp_vertices_per_isp_vertex = 0U;
3670    } else {
3671       num_tsp_vertices_per_isp_vertex =
3672          src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3673    }
3674 
3675    tsp_data_size = PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3676                                    num_tsp_vertices_per_isp_vertex);
3677 
3678    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3679       /* An XYZ vertex is 16/16/32 bits => 8 bytes. */
3680       isp_vertex_data_size_dw = num_isp_vertices * 2U;
3681 
3682       /* Round to even for 64 bit boundary. */
3683       idx_data_size_dw = ALIGN_POT(num_mappings, 2U);
3684       tsp_comp_format_dw = 0U;
3685       isp_state_size_dw = 4U;
3686       pds_state_size_dw = 8U;
3687    } else {
3688       tsp_comp_format_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3689 
3690       if (!color_fill) {
3691          if (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3692             tsp_comp_format_dw *= 2U;
3693       }
3694 
3695       /* An XYZ vertex is 24/24/32 bits => 10 bytes with last padded to 4 byte
3696        * burst align.
3697        */
3698       isp_vertex_data_size_dw = DIV_ROUND_UP(num_isp_vertices * 10U, 4U);
3699 
3700       /* 4 triangles fit in 3 dw: t0t0t0t1_t1t1t2t2_t2t3t3t3. */
3701       idx_data_size_dw = num_mappings + DIV_ROUND_UP(num_mappings, 2U);
3702       isp_state_size_dw = 5U;
3703       pds_state_size_dw = 7U;
3704    }
3705 
3706    stream_size =
3707       tsp_data_size + PVR_DW_TO_BYTES(idx_data_size_dw + tsp_comp_format_dw +
3708                                       isp_vertex_data_size_dw +
3709                                       isp_state_size_dw + pds_state_size_dw);
3710 
3711    return stream_size;
3712 }
3713 
3714 static VkResult
pvr_isp_primitive_block(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,const struct pvr_transfer_cmd_source * src,bool custom_filter,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,bool read_bgnd,uint32_t * cs_start_offset,uint32_t ** cs_ptr_out)3715 pvr_isp_primitive_block(const struct pvr_device_info *dev_info,
3716                         struct pvr_transfer_ctx *ctx,
3717                         const struct pvr_transfer_cmd *transfer_cmd,
3718                         struct pvr_transfer_prep_data *prep_data,
3719                         const struct pvr_transfer_cmd_source *src,
3720                         bool custom_filter,
3721                         struct pvr_rect_mapping *mappings,
3722                         uint32_t num_mappings,
3723                         uint32_t mapping_offset,
3724                         bool read_bgnd,
3725                         uint32_t *cs_start_offset,
3726                         uint32_t **cs_ptr_out)
3727 {
3728    struct pvr_transfer_3d_state *state = &prep_data->state;
3729    uint32_t num_isp_vertices = num_mappings * 4U;
3730    uint32_t num_tsp_vertices_per_isp_vert;
3731    uint32_t tsp_data_size_in_bytes;
3732    uint32_t tsp_comp_format_in_dw;
3733    bool color_fill = src == NULL;
3734    uint32_t stream_size_in_bytes;
3735    uint32_t *cs_ptr_start;
3736    VkResult result;
3737 
3738    if (color_fill) {
3739       num_tsp_vertices_per_isp_vert = 0U;
3740    } else {
3741       num_tsp_vertices_per_isp_vert =
3742          src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3743    }
3744 
3745    tsp_data_size_in_bytes =
3746       PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3747                       num_tsp_vertices_per_isp_vert);
3748 
3749    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3750       tsp_comp_format_in_dw = 0U;
3751    } else {
3752       tsp_comp_format_in_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3753 
3754       if (!color_fill && src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3755          tsp_comp_format_in_dw *= 2U;
3756    }
3757 
3758    stream_size_in_bytes =
3759       pvr_isp_primitive_block_size(dev_info, src, num_mappings);
3760 
3761    cs_ptr_start = *cs_ptr_out;
3762 
3763    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3764       /* This includes:
3765        *    Vertex formats.
3766        *    ISP state words.
3767        */
3768       pvr_isp_prim_block_isp_state(dev_info,
3769                                    tsp_comp_format_in_dw,
3770                                    tsp_data_size_in_bytes,
3771                                    num_isp_vertices,
3772                                    read_bgnd,
3773                                    cs_ptr_out);
3774 
3775       /* This include:
3776        *    Index data / point pitch.
3777        */
3778       pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3779 
3780       result = pvr_isp_prim_block_isp_vertices(dev_info,
3781                                                state,
3782                                                mappings,
3783                                                num_mappings,
3784                                                mapping_offset,
3785                                                cs_ptr_out);
3786       if (result != VK_SUCCESS)
3787          return result;
3788 
3789       pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3790 
3791       if (!color_fill) {
3792          /* This includes:
3793           *    TSP vertex formats.
3794           */
3795          pvr_isp_prim_block_tsp_vertex_block(dev_info,
3796                                              src,
3797                                              mappings,
3798                                              custom_filter,
3799                                              num_mappings,
3800                                              mapping_offset,
3801                                              tsp_comp_format_in_dw,
3802                                              cs_ptr_out);
3803       }
3804 
3805       *cs_start_offset = 0;
3806    } else {
3807       if (!color_fill) {
3808          /* This includes:
3809           *    Compressed TSP vertex data & tables.
3810           *    Primitive id.
3811           *    TSP compression formats.
3812           */
3813          pvr_isp_prim_block_tsp_vertex_block(dev_info,
3814                                              src,
3815                                              mappings,
3816                                              custom_filter,
3817                                              num_mappings,
3818                                              mapping_offset,
3819                                              tsp_comp_format_in_dw,
3820                                              cs_ptr_out);
3821       }
3822 
3823       pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3824 
3825       /* Point the CS_PRIM_BASE here. */
3826       *cs_start_offset = (*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]);
3827 
3828       /* This includes:
3829        *    ISP state words.
3830        *    Compression size word.
3831        *    ISP compression and vertex formats.
3832        */
3833       pvr_isp_prim_block_isp_state(dev_info,
3834                                    tsp_comp_format_in_dw,
3835                                    tsp_data_size_in_bytes,
3836                                    num_isp_vertices,
3837                                    read_bgnd,
3838                                    cs_ptr_out);
3839 
3840       pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3841 
3842       result = pvr_isp_prim_block_isp_vertices(dev_info,
3843                                                state,
3844                                                mappings,
3845                                                num_mappings,
3846                                                mapping_offset,
3847                                                cs_ptr_out);
3848       if (result != VK_SUCCESS)
3849          return result;
3850    }
3851 
3852    assert((*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]) ==
3853           stream_size_in_bytes);
3854 
3855    return VK_SUCCESS;
3856 }
3857 
3858 static inline uint32_t
pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info * dev_info)3859 pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info *dev_info)
3860 {
3861    uint32_t ret = PVR_DW_TO_BYTES(ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS);
3862 
3863    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3864       return ret / sizeof(uint64_t) / 2U;
3865 
3866    return ret / sizeof(uint32_t) / 2U - 1U;
3867 }
3868 
3869 static inline uint32_t
pvr_transfer_max_quads_per_pb(const struct pvr_device_info * dev_info)3870 pvr_transfer_max_quads_per_pb(const struct pvr_device_info *dev_info)
3871 {
3872    return PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U
3873                                                                       : 16U;
3874 }
3875 
pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t * stream,uint32_t data,uint32_t size)3876 static inline uint8_t *pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t *stream,
3877                                                               uint32_t data,
3878                                                               uint32_t size)
3879 {
3880    const uint32_t offset = (uintptr_t)stream & 0x3U;
3881    uint32_t *aligned_stream = (uint32_t *)(stream - offset);
3882    const uint32_t current_data = *aligned_stream & ((1U << (offset * 8U)) - 1U);
3883 
3884    assert(size > 0 && size <= 4U);
3885 
3886    *aligned_stream = current_data | data << (offset * 8U);
3887 
3888    if (offset + size > 4U) {
3889       aligned_stream++;
3890       *aligned_stream = data >> ((4U - offset) * 8);
3891    }
3892 
3893    return stream + size;
3894 }
3895 
3896 /**
3897  * Writes ISP ctrl stream.
3898  *
3899  * We change sampler/texture state when we process a new TQ source. The
3900  * primitive block contains the shader pointers, but we supply the primitive
3901  * blocks with shaders from here.
3902  */
pvr_isp_ctrl_stream(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data)3903 static VkResult pvr_isp_ctrl_stream(const struct pvr_device_info *dev_info,
3904                                     struct pvr_transfer_ctx *ctx,
3905                                     struct pvr_transfer_cmd *transfer_cmd,
3906                                     struct pvr_transfer_prep_data *prep_data)
3907 {
3908    const uint32_t max_mappings_per_pb = pvr_transfer_max_quads_per_pb(dev_info);
3909    bool fill_blit = (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U;
3910    uint32_t free_ctrl_stream_words = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
3911    struct pvr_transfer_3d_state *const state = &prep_data->state;
3912    struct pvr_winsys_transfer_regs *const regs = &state->regs;
3913    struct pvr_transfer_pass *pass = NULL;
3914    uint32_t flags = transfer_cmd->flags;
3915    struct pvr_suballoc_bo *pvr_cs_bo;
3916    pvr_dev_addr_t stream_base_vaddr;
3917    uint32_t num_prim_blks = 0U;
3918    uint32_t prim_blk_size = 0U;
3919    uint32_t region_arrays_size;
3920    uint32_t num_region_arrays;
3921    uint32_t total_stream_size;
3922    bool was_linked = false;
3923    uint32_t rem_mappings;
3924    uint32_t num_sources;
3925    uint32_t *blk_cs_ptr;
3926    uint32_t *cs_ptr;
3927    uint32_t source;
3928    VkResult result;
3929 
3930    if (state->custom_mapping.pass_count > 0U) {
3931       pass = &state->custom_mapping.passes[state->pass_idx];
3932 
3933       num_sources = pass->source_count;
3934 
3935       for (source = 0; source < num_sources; source++) {
3936          uint32_t num_mappings = pass->sources[source].mapping_count;
3937 
3938          while (num_mappings > 0U) {
3939             if (fill_blit) {
3940                prim_blk_size += pvr_isp_primitive_block_size(
3941                   dev_info,
3942                   NULL,
3943                   MIN2(max_mappings_per_pb, num_mappings));
3944             }
3945 
3946             if (transfer_cmd->source_count > 0) {
3947                prim_blk_size += pvr_isp_primitive_block_size(
3948                   dev_info,
3949                   &transfer_cmd->sources[source],
3950                   MIN2(max_mappings_per_pb, num_mappings));
3951             }
3952 
3953             num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3954             num_prim_blks++;
3955          }
3956       }
3957    } else {
3958       num_sources = fill_blit ? 1U : transfer_cmd->source_count;
3959 
3960       if (fill_blit) {
3961          num_prim_blks = 1U;
3962          prim_blk_size +=
3963             pvr_isp_primitive_block_size(dev_info,
3964                                          NULL,
3965                                          MIN2(max_mappings_per_pb, 1U));
3966 
3967          /* Fill blits can also have a source; fallthrough to handle. */
3968       }
3969 
3970       for (source = 0; source < transfer_cmd->source_count; source++) {
3971          uint32_t num_mappings = transfer_cmd->sources[source].mapping_count;
3972 
3973          while (num_mappings > 0U) {
3974             prim_blk_size += pvr_isp_primitive_block_size(
3975                dev_info,
3976                &transfer_cmd->sources[source],
3977                MIN2(max_mappings_per_pb, num_mappings));
3978 
3979             num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3980             num_prim_blks++;
3981          }
3982       }
3983    }
3984 
3985    num_region_arrays =
3986       (num_prim_blks + (pvr_transfer_prim_blocks_per_alloc(dev_info) - 1U)) /
3987       pvr_transfer_prim_blocks_per_alloc(dev_info);
3988    region_arrays_size = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS *
3989                         sizeof(uint32_t) * num_region_arrays;
3990    total_stream_size = region_arrays_size + prim_blk_size;
3991 
3992    /* Allocate space for IPF control stream. */
3993    result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
3994                                      ctx->device->heaps.transfer_frag_heap,
3995                                      total_stream_size,
3996                                      &pvr_cs_bo);
3997    if (result != VK_SUCCESS)
3998       return result;
3999 
4000    stream_base_vaddr =
4001       PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4002                    ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4003 
4004    cs_ptr = pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4005    blk_cs_ptr = cs_ptr + region_arrays_size / sizeof(uint32_t);
4006 
4007    source = 0;
4008    while (source < num_sources) {
4009       if (fill_blit)
4010          rem_mappings = pass ? pass->sources[source].mapping_count : 1U;
4011       else
4012          rem_mappings = transfer_cmd->sources[source].mapping_count;
4013 
4014       if ((transfer_cmd->source_count > 0 || fill_blit) && rem_mappings != 0U) {
4015          struct pvr_pds_pixel_shader_sa_program unitex_pds_prog = { 0U };
4016          struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[source];
4017          struct pvr_rect_mapping fill_mapping;
4018          uint32_t mapping_offset = 0U;
4019          bool read_bgnd = false;
4020 
4021          if (fill_blit) {
4022             uint32_t packed_color[4U] = { 0U };
4023 
4024             if (vk_format_is_compressed(transfer_cmd->dst.vk_format)) {
4025                return vk_error(transfer_cmd->cmd_buffer,
4026                                VK_ERROR_FORMAT_NOT_SUPPORTED);
4027             }
4028 
4029             state->pds_shader_task_offset = 0U;
4030             state->uni_tex_code_offset = 0U;
4031             state->tex_state_data_offset = 0U;
4032             state->common_ptr = 0U;
4033 
4034             result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
4035                                           transfer_cmd->clear_color,
4036                                           packed_color);
4037             if (result != VK_SUCCESS)
4038                return result;
4039 
4040             fill_mapping.dst_rect = transfer_cmd->scissor;
4041 
4042             pvr_csb_pack (&regs->usc_clear_register0,
4043                           CR_USC_CLEAR_REGISTER,
4044                           reg) {
4045                reg.val = packed_color[0U];
4046             }
4047 
4048             pvr_csb_pack (&regs->usc_clear_register1,
4049                           CR_USC_CLEAR_REGISTER,
4050                           reg) {
4051                reg.val = packed_color[1U];
4052             }
4053 
4054             pvr_csb_pack (&regs->usc_clear_register2,
4055                           CR_USC_CLEAR_REGISTER,
4056                           reg) {
4057                reg.val = packed_color[2U];
4058             }
4059 
4060             pvr_csb_pack (&regs->usc_clear_register3,
4061                           CR_USC_CLEAR_REGISTER,
4062                           reg) {
4063                reg.val = packed_color[3U];
4064             }
4065 
4066             state->pds_shader_task_offset =
4067                transfer_cmd->cmd_buffer->device->nop_program.pds.data_offset;
4068 
4069             unitex_pds_prog.kick_usc = false;
4070             unitex_pds_prog.clear = false;
4071          } else {
4072             const bool down_scale = transfer_cmd->sources[source].resolve_op ==
4073                                        PVR_RESOLVE_BLEND &&
4074                                     src->surface.sample_count > 1U &&
4075                                     transfer_cmd->dst.sample_count <= 1U;
4076             struct pvr_tq_shader_properties *shader_props =
4077                &state->shader_props;
4078             struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
4079             const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
4080             enum pvr_transfer_pbe_pixel_src pbe_src_format;
4081             struct pvr_suballoc_bo *pvr_bo;
4082             uint32_t tex_state_dma_size;
4083             pvr_dev_addr_t dev_offset;
4084 
4085             /* Reset the shared register bank ptrs each src implies new texture
4086              * state (Note that we don't change texture state per prim block).
4087              */
4088             state->common_ptr = 0U;
4089             state->usc_const_reg_ptr = 0U;
4090             /* We don't use state->dynamic_const_reg_ptr here. */
4091 
4092             if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE)
4093                read_bgnd = true;
4094 
4095             result = pvr_pbe_src_format_f2d(flags,
4096                                             src,
4097                                             transfer_cmd->dst.vk_format,
4098                                             down_scale,
4099                                             state->dont_force_pbe,
4100                                             &pbe_src_format);
4101             if (result != VK_SUCCESS)
4102                return result;
4103 
4104             memset(shader_props, 0U, sizeof(*shader_props));
4105 
4106             layer->pbe_format = pbe_src_format;
4107             layer->sample =
4108                (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED);
4109             shader_props->iterated = true;
4110 
4111             shader_props->pick_component =
4112                pvr_pick_component_needed(&state->custom_mapping);
4113 
4114             result = pvr_msaa_state(dev_info, transfer_cmd, state, source);
4115             if (result != VK_SUCCESS)
4116                return result;
4117 
4118             if (state->filter[source] == PVR_FILTER_LINEAR &&
4119                 pvr_requires_usc_linear_filter(src->surface.vk_format)) {
4120                if (pvr_int_pbe_usc_linear_filter(layer->pbe_format,
4121                                                  layer->sample,
4122                                                  layer->msaa,
4123                                                  shader_props->full_rate)) {
4124                   layer->linear = true;
4125                } else {
4126                   mesa_logw("Transfer: F32 linear filter not supported.");
4127                }
4128             }
4129 
4130             result = pvr_transfer_frag_store_get_shader_info(
4131                transfer_cmd->cmd_buffer->device,
4132                &ctx->frag_store,
4133                shader_props,
4134                &dev_offset,
4135                &sh_reg_layout);
4136             if (result != VK_SUCCESS)
4137                return result;
4138 
4139             assert(dev_offset.addr <= UINT32_MAX);
4140             prep_data->state.pds_shader_task_offset = (uint32_t)dev_offset.addr;
4141 
4142             result =
4143                pvr_pds_coeff_task(ctx, transfer_cmd, layer->sample, prep_data);
4144             if (result != VK_SUCCESS)
4145                return result;
4146 
4147             unitex_pds_prog.kick_usc = false;
4148             unitex_pds_prog.clear = false;
4149 
4150             tex_state_dma_size =
4151                sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
4152 
4153             unitex_pds_prog.num_texture_dma_kicks = 1U;
4154             unitex_pds_prog.num_uniform_dma_kicks = 0U;
4155 
4156             /* Allocate memory for DMA. */
4157             result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
4158                                               ctx->device->heaps.general_heap,
4159                                               tex_state_dma_size << 2U,
4160                                               &pvr_bo);
4161             if (result != VK_SUCCESS)
4162                return result;
4163 
4164             result = pvr_sampler_state_for_surface(
4165                dev_info,
4166                &transfer_cmd->sources[source].surface,
4167                state->filter[source],
4168                sh_reg_layout,
4169                0U,
4170                pvr_bo_suballoc_get_map_addr(pvr_bo));
4171             if (result != VK_SUCCESS)
4172                return result;
4173 
4174             result = pvr_image_state_for_surface(
4175                ctx,
4176                transfer_cmd,
4177                &transfer_cmd->sources[source].surface,
4178                0U,
4179                source,
4180                sh_reg_layout,
4181                state,
4182                0U,
4183                pvr_bo_suballoc_get_map_addr(pvr_bo));
4184             if (result != VK_SUCCESS)
4185                return result;
4186 
4187             pvr_pds_encode_dma_burst(unitex_pds_prog.texture_dma_control,
4188                                      unitex_pds_prog.texture_dma_address,
4189                                      state->common_ptr,
4190                                      tex_state_dma_size,
4191                                      pvr_bo->dev_addr.addr,
4192                                      true,
4193                                      dev_info);
4194 
4195             state->common_ptr += tex_state_dma_size;
4196 
4197             pvr_write_usc_constants(sh_reg_layout,
4198                                     pvr_bo_suballoc_get_map_addr(pvr_bo));
4199 
4200             if (pvr_pick_component_needed(&state->custom_mapping)) {
4201                pvr_dma_texel_unwind(state,
4202                                     sh_reg_layout,
4203                                     pvr_bo_suballoc_get_map_addr(pvr_bo));
4204             }
4205          }
4206 
4207          result = pvr_pds_unitex(dev_info,
4208                                  ctx,
4209                                  transfer_cmd,
4210                                  &unitex_pds_prog,
4211                                  prep_data);
4212          if (result != VK_SUCCESS)
4213             return result;
4214 
4215          while (rem_mappings > 0U) {
4216             const uint32_t min_free_ctrl_stream_words =
4217                PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 2
4218                                                                            : 3;
4219             const uint32_t num_mappings =
4220                MIN2(max_mappings_per_pb, rem_mappings);
4221             struct pvr_rect_mapping *mappings = NULL;
4222             uint32_t stream_start_offset = 0U;
4223             pvr_dev_addr_t prim_blk_addr;
4224 
4225             if (free_ctrl_stream_words < min_free_ctrl_stream_words) {
4226                pvr_dev_addr_t next_region_array_vaddr = stream_base_vaddr;
4227 
4228                num_region_arrays++;
4229                next_region_array_vaddr.addr +=
4230                   num_region_arrays *
4231                   PVR_DW_TO_BYTES(ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS);
4232 
4233                if (PVR_HAS_FEATURE(dev_info,
4234                                    simple_internal_parameter_format_v2)) {
4235                   uint32_t link_addr;
4236 
4237                   pvr_csb_pack (&link_addr,
4238                                 IPF_CONTROL_STREAM_LINK_SIPF2,
4239                                 control_stream) {
4240                      control_stream.cs_ctrl_type =
4241                         ROGUE_IPF_CS_CTRL_TYPE_SIPF2_LINK;
4242                      control_stream.cs_link.addr = next_region_array_vaddr.addr;
4243                   }
4244 
4245                   pvr_isp_ctrl_stream_sipf_write_aligned(
4246                      (uint8_t *)cs_ptr,
4247                      link_addr,
4248                      PVR_DW_TO_BYTES(
4249                         pvr_cmd_length(IPF_CONTROL_STREAM_LINK_SIPF2)));
4250                } else {
4251                   pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, control_stream) {
4252                      control_stream.cs_type = ROGUE_IPF_CS_TYPE_LINK;
4253                      control_stream.cs_link.addr = next_region_array_vaddr.addr;
4254                   }
4255                }
4256 
4257                cs_ptr =
4258                   (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_cs_bo) +
4259                   num_region_arrays * ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
4260                free_ctrl_stream_words = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
4261 
4262                was_linked = PVR_HAS_FEATURE(dev_info, ipf_creq_pf);
4263             }
4264 
4265             if (fill_blit)
4266                mappings = pass ? pass->sources[source].mappings : &fill_mapping;
4267             else
4268                mappings = transfer_cmd->sources[source].mappings;
4269 
4270             prim_blk_addr = stream_base_vaddr;
4271             prim_blk_addr.addr +=
4272                (uintptr_t)blk_cs_ptr -
4273                (uintptr_t)pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4274 
4275             result = pvr_isp_primitive_block(dev_info,
4276                                              ctx,
4277                                              transfer_cmd,
4278                                              prep_data,
4279                                              fill_blit ? NULL : src,
4280                                              state->custom_filter,
4281                                              mappings,
4282                                              num_mappings,
4283                                              mapping_offset,
4284                                              read_bgnd,
4285                                              &stream_start_offset,
4286                                              &blk_cs_ptr);
4287             if (result != VK_SUCCESS)
4288                return result;
4289 
4290             prim_blk_addr.addr += stream_start_offset;
4291 
4292             if (PVR_HAS_FEATURE(dev_info,
4293                                 simple_internal_parameter_format_v2)) {
4294                uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4295                uint32_t tmp;
4296 
4297                /* This part of the control stream is byte granular. */
4298 
4299                pvr_csb_pack (&tmp, IPF_PRIMITIVE_HEADER_SIPF2, prim_header) {
4300                   prim_header.cs_prim_base_size = 1;
4301                   prim_header.cs_mask_num_bytes = 1;
4302                   prim_header.cs_valid_tile0 = true;
4303                }
4304                cs_byte_ptr =
4305                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4306 
4307                pvr_csb_pack (&tmp, IPF_PRIMITIVE_BASE_SIPF2, word) {
4308                   word.cs_prim_base = prim_blk_addr;
4309                }
4310                cs_byte_ptr =
4311                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 4);
4312 
4313                /* IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 since
4314                 * IPF_PRIMITIVE_HEADER_SIPF2.cs_mask_num_bytes == 1.
4315                 */
4316                pvr_csb_pack (&tmp,
4317                              IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2,
4318                              mask) {
4319                   switch (num_mappings) {
4320                   case 4:
4321                      mask.cs_mask_one_byte_tile0_7 = true;
4322                      mask.cs_mask_one_byte_tile0_6 = true;
4323                      FALLTHROUGH;
4324                   case 3:
4325                      mask.cs_mask_one_byte_tile0_5 = true;
4326                      mask.cs_mask_one_byte_tile0_4 = true;
4327                      FALLTHROUGH;
4328                   case 2:
4329                      mask.cs_mask_one_byte_tile0_3 = true;
4330                      mask.cs_mask_one_byte_tile0_2 = true;
4331                      FALLTHROUGH;
4332                   case 1:
4333                      mask.cs_mask_one_byte_tile0_1 = true;
4334                      mask.cs_mask_one_byte_tile0_0 = true;
4335                      break;
4336                   default:
4337                      /* Unreachable since we clamped the value earlier so
4338                       * reaching this is an implementation error.
4339                       */
4340                      unreachable("num_mapping exceeded max_mappings_per_pb");
4341                      break;
4342                   }
4343                }
4344                /* Only 1 byte since there's only 1 valid tile within the single
4345                 * IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 mask.
4346                 * ROGUE_IPF_PRIMITIVE_HEADER_SIPF2.cs_valid_tile0 == true.
4347                 */
4348                cs_byte_ptr =
4349                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4350 
4351                cs_ptr = (uint32_t *)cs_byte_ptr;
4352 
4353                free_ctrl_stream_words -= 2;
4354             } else {
4355                pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_FORMAT, word) {
4356                   word.cs_type = ROGUE_IPF_CS_TYPE_PRIM;
4357                   word.cs_isp_state_read = true;
4358                   word.cs_isp_state_size = 2U;
4359                   word.cs_prim_total = 2U * num_mappings - 1U;
4360                   word.cs_mask_fmt = ROGUE_IPF_CS_MASK_FMT_FULL;
4361                   word.cs_prim_base_pres = true;
4362                }
4363                cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_FORMAT);
4364 
4365                pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_BASE, word) {
4366                   word.cs_prim_base = prim_blk_addr;
4367                }
4368                cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_BASE);
4369 
4370                free_ctrl_stream_words -= 2;
4371             }
4372 
4373             rem_mappings -= num_mappings;
4374             mapping_offset += num_mappings;
4375          }
4376       }
4377 
4378       source++;
4379 
4380       /* A fill blit may also have sources for normal blits. */
4381       if (fill_blit && transfer_cmd->source_count > 0) {
4382          /* Fill blit count for custom mapping equals source blit count. While
4383           * normal blits use only one fill blit.
4384           */
4385          if (state->custom_mapping.pass_count == 0 && source > num_sources) {
4386             fill_blit = false;
4387             source = 0;
4388          }
4389       }
4390    }
4391 
4392    if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
4393       assert((num_region_arrays > 1) == was_linked);
4394 
4395    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
4396       uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4397       uint32_t tmp;
4398 
4399       /* clang-format off */
4400       pvr_csb_pack (&tmp, IPF_CONTROL_STREAM_TERMINATE_SIPF2, term);
4401       /* clang-format on */
4402 
4403       cs_byte_ptr = pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4404 
4405       cs_ptr = (uint32_t *)cs_byte_ptr;
4406    } else {
4407       pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, word) {
4408          word.cs_type = ROGUE_IPF_CS_TYPE_TERM;
4409       }
4410       cs_ptr += pvr_cmd_length(IPF_CONTROL_STREAM);
4411    }
4412 
4413    pvr_csb_pack (&regs->isp_mtile_base, CR_ISP_MTILE_BASE, reg) {
4414       reg.addr =
4415          PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4416                       ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4417    }
4418 
4419    pvr_csb_pack (&regs->isp_render, CR_ISP_RENDER, reg) {
4420       reg.mode_type = ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_2D;
4421    }
4422 
4423    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2) &&
4424        PVR_HAS_FEATURE(dev_info, ipf_creq_pf)) {
4425       pvr_csb_pack (&regs->isp_rgn, CR_ISP_RGN_SIPF, isp_rgn) {
4426          /* Bit 0 in CR_ISP_RGN.cs_size_ipf_creq_pf is used to indicate the
4427           * presence of a link.
4428           */
4429          isp_rgn.cs_size_ipf_creq_pf = was_linked;
4430       }
4431    } else {
4432       /* clang-format off */
4433       pvr_csb_pack(&regs->isp_rgn, CR_ISP_RGN, isp_rgn);
4434       /* clang-format on */
4435    }
4436 
4437    return VK_SUCCESS;
4438 }
4439 
pvr_transfer_set_filter(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)4440 static void pvr_transfer_set_filter(struct pvr_transfer_cmd *transfer_cmd,
4441                                     struct pvr_transfer_3d_state *state)
4442 {
4443    for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
4444       VkRect2D *src = &transfer_cmd->sources[i].mappings[0U].src_rect;
4445       VkRect2D *dst = &transfer_cmd->sources[i].mappings[0U].dst_rect;
4446 
4447       /* If no scaling is applied to the copy region, we can use point
4448        * filtering.
4449        */
4450       if (!state->custom_filter && (src->extent.width == dst->extent.width) &&
4451           (src->extent.height == dst->extent.height))
4452          state->filter[i] = PVR_FILTER_POINT;
4453       else
4454          state->filter[i] = transfer_cmd->sources[i].filter;
4455    }
4456 }
4457 
4458 /** Generates hw resources to kick a 3D clip blit. */
pvr_3d_clip_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)4459 static VkResult pvr_3d_clip_blit(struct pvr_transfer_ctx *ctx,
4460                                  struct pvr_transfer_cmd *transfer_cmd,
4461                                  struct pvr_transfer_prep_data *prep_data,
4462                                  uint32_t pass_idx,
4463                                  bool *finished_out)
4464 {
4465    struct pvr_transfer_3d_state *state = &prep_data->state;
4466    uint32_t texel_unwind_src = state->custom_mapping.texel_unwind_src;
4467    struct pvr_transfer_cmd bg_cmd = { 0U };
4468    uint32_t control_reg;
4469    VkResult result;
4470 
4471    state->dont_force_pbe = false;
4472    bg_cmd.scissor = transfer_cmd->scissor;
4473    bg_cmd.cmd_buffer = transfer_cmd->cmd_buffer;
4474    bg_cmd.flags = transfer_cmd->flags;
4475    bg_cmd.flags &=
4476       ~(PVR_TRANSFER_CMD_FLAGS_FAST2D | PVR_TRANSFER_CMD_FLAGS_FILL |
4477         PVR_TRANSFER_CMD_FLAGS_DSMERGE | PVR_TRANSFER_CMD_FLAGS_PICKD);
4478 
4479    bg_cmd.source_count = state->custom_mapping.pass_count > 0U ? 0 : 1;
4480    if (bg_cmd.source_count > 0) {
4481       struct pvr_transfer_cmd_source *src = &bg_cmd.sources[0];
4482 
4483       src->mappings[0U].src_rect = transfer_cmd->scissor;
4484       src->mappings[0U].dst_rect = transfer_cmd->scissor;
4485       src->resolve_op = PVR_RESOLVE_BLEND;
4486       src->surface = transfer_cmd->dst;
4487    }
4488 
4489    state->filter[0] = PVR_FILTER_DONTCARE;
4490    bg_cmd.dst = transfer_cmd->dst;
4491    state->custom_mapping.texel_unwind_src =
4492       state->custom_mapping.texel_unwind_dst;
4493 
4494    result =
4495       pvr_3d_copy_blit_core(ctx, &bg_cmd, prep_data, pass_idx, finished_out);
4496    if (result != VK_SUCCESS)
4497       return result;
4498 
4499    /* If the destination has 4 channels and the source has at most 2, we still
4500     * need all 4 channels from the USC into the PBE.
4501     */
4502    state->dont_force_pbe = true;
4503    state->custom_mapping.texel_unwind_src = texel_unwind_src;
4504 
4505    /* We need the viewport mask, otherwise all pixels would be disabled. */
4506    pvr_csb_pack (&control_reg, CR_ISP_BGOBJVALS, reg) {
4507       reg.mask = true;
4508    }
4509    state->regs.isp_bgobjvals |= control_reg;
4510 
4511    pvr_transfer_set_filter(transfer_cmd, state);
4512    result = pvr_isp_ctrl_stream(&ctx->device->pdevice->dev_info,
4513                                 ctx,
4514                                 transfer_cmd,
4515                                 prep_data);
4516    if (result != VK_SUCCESS)
4517       return result;
4518 
4519    /* In case of resolve M -> S, the accumulation is read from and written to a
4520     * single sampled surface. Make sure that we are resolving and we have the
4521     * right number of tiles.
4522     */
4523    if (state->down_scale) {
4524       uint64_t tmp;
4525 
4526       pvr_csb_pack (&tmp, CR_PBE_WORD0_MRT0, reg) {
4527          reg.downscale = true;
4528       }
4529       state->regs.pbe_wordx_mrty[0U] |= tmp;
4530 
4531       result = pvr_isp_tiles(ctx->device, state);
4532       if (result != VK_SUCCESS)
4533          return result;
4534    }
4535 
4536    return VK_SUCCESS;
4537 }
4538 
pvr_texel_unwind(uint32_t bpp,pvr_dev_addr_t dev_addr,bool is_input,uint32_t texel_extend,uint32_t * texel_unwind_out)4539 static bool pvr_texel_unwind(uint32_t bpp,
4540                              pvr_dev_addr_t dev_addr,
4541                              bool is_input,
4542                              uint32_t texel_extend,
4543                              uint32_t *texel_unwind_out)
4544 {
4545    uint32_t texel_unwind = 0U;
4546 
4547    for (uint32_t i = 0U; i < 16U; i++) {
4548       if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
4549          break;
4550       } else {
4551          if (i == 15U) {
4552             return false;
4553          } else {
4554             dev_addr.addr -= (bpp / texel_extend) / 8U;
4555             texel_unwind++;
4556          }
4557       }
4558    }
4559 
4560    *texel_unwind_out = texel_unwind;
4561 
4562    return true;
4563 }
4564 
pvr_is_identity_mapping(const struct pvr_rect_mapping * mapping)4565 static bool pvr_is_identity_mapping(const struct pvr_rect_mapping *mapping)
4566 {
4567    return (mapping->src_rect.offset.x == mapping->dst_rect.offset.x &&
4568            mapping->src_rect.offset.y == mapping->dst_rect.offset.y &&
4569            mapping->src_rect.extent.width == mapping->dst_rect.extent.width &&
4570            mapping->src_rect.extent.height == mapping->dst_rect.extent.height);
4571 }
4572 
pvr_is_pbe_stride_aligned(const uint32_t stride)4573 static inline bool pvr_is_pbe_stride_aligned(const uint32_t stride)
4574 {
4575    if (stride == 1U)
4576       return true;
4577 
4578    return ((stride & (ROGUE_PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE - 1U)) ==
4579            0x0U);
4580 }
4581 
4582 static struct pvr_transfer_pass *
pvr_create_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4583 pvr_create_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4584                 uint32_t dst_offset)
4585 {
4586    struct pvr_transfer_pass *pass;
4587 
4588    assert(custom_mapping->pass_count < PVR_TRANSFER_MAX_PASSES);
4589 
4590    pass = &custom_mapping->passes[custom_mapping->pass_count];
4591    pass->clip_rects_count = 0U;
4592    pass->dst_offset = dst_offset;
4593    pass->source_count = 0U;
4594 
4595    custom_mapping->pass_count++;
4596 
4597    return pass;
4598 }
4599 
4600 /* Acquire pass with given offset. If one doesn't exist, create new. */
4601 static struct pvr_transfer_pass *
pvr_acquire_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4602 pvr_acquire_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4603                  uint32_t dst_offset)
4604 {
4605    for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4606       if (custom_mapping->passes[i].dst_offset == dst_offset)
4607          return &custom_mapping->passes[i];
4608    }
4609 
4610    return pvr_create_pass(custom_mapping, dst_offset);
4611 }
4612 
4613 static struct pvr_transfer_wa_source *
pvr_create_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4614 pvr_create_source(struct pvr_transfer_pass *pass,
4615                   uint32_t src_offset,
4616                   bool extend_height)
4617 {
4618    struct pvr_transfer_wa_source *src;
4619 
4620    assert(pass->source_count < ARRAY_SIZE(pass->sources));
4621 
4622    src = &pass->sources[pass->source_count];
4623    src->mapping_count = 0U;
4624    src->extend_height = extend_height;
4625 
4626    pass->source_count++;
4627 
4628    return src;
4629 }
4630 
4631 /* Acquire source with given offset. If one doesn't exist, create new. */
4632 static struct pvr_transfer_wa_source *
pvr_acquire_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4633 pvr_acquire_source(struct pvr_transfer_pass *pass,
4634                    uint32_t src_offset,
4635                    bool extend_height)
4636 {
4637    for (uint32_t i = 0U; i < pass->source_count; i++) {
4638       if (pass->sources[i].src_offset == src_offset &&
4639           pass->sources[i].extend_height == extend_height)
4640          return &pass->sources[i];
4641    }
4642 
4643    return pvr_create_source(pass, src_offset, extend_height);
4644 }
4645 
pvr_remove_source(struct pvr_transfer_pass * pass,uint32_t idx)4646 static void pvr_remove_source(struct pvr_transfer_pass *pass, uint32_t idx)
4647 {
4648    assert(idx < pass->source_count);
4649 
4650    for (uint32_t i = idx; i < (pass->source_count - 1U); i++)
4651       pass->sources[i] = pass->sources[i + 1U];
4652 
4653    pass->source_count--;
4654 }
4655 
pvr_remove_mapping(struct pvr_transfer_wa_source * src,uint32_t idx)4656 static void pvr_remove_mapping(struct pvr_transfer_wa_source *src, uint32_t idx)
4657 {
4658    assert(idx < src->mapping_count);
4659 
4660    for (uint32_t i = idx; i < (src->mapping_count - 1U); i++)
4661       src->mappings[i] = src->mappings[i + 1U];
4662 
4663    src->mapping_count--;
4664 }
4665 
4666 static struct pvr_rect_mapping *
pvr_create_mapping(struct pvr_transfer_wa_source * src)4667 pvr_create_mapping(struct pvr_transfer_wa_source *src)
4668 {
4669    assert(src->mapping_count < ARRAY_SIZE(src->mappings));
4670 
4671    return &src->mappings[src->mapping_count++];
4672 }
4673 
4674 /**
4675  * If PBE can't write to surfaces with odd stride, the stride of
4676  * destination surface is doubled to make it even. Height of the surface is
4677  * halved. The source surface is not resized. Each half of the modified
4678  * destination surface samples every second row from the source surface. This
4679  * only works with nearest filtering.
4680  */
pvr_double_stride(struct pvr_transfer_pass * pass,uint32_t stride)4681 static bool pvr_double_stride(struct pvr_transfer_pass *pass, uint32_t stride)
4682 {
4683    struct pvr_rect_mapping *mappings = pass->sources[0].mappings;
4684    uint32_t new_mapping = 0;
4685 
4686    if (stride == 1U)
4687       return false;
4688 
4689    if (mappings[0U].dst_rect.extent.height == 1U &&
4690        pass->sources[0].mapping_count == 1U) {
4691       /* Only one mapping required if height is 1. */
4692       if ((mappings[0U].dst_rect.offset.y & 1U) != 0U) {
4693          mappings[0U].dst_rect.offset.x += (int32_t)stride;
4694          mappings[0U].dst_rect.offset.y /= 2U;
4695          mappings[0U].dst_rect.extent.height =
4696             (mappings[0U].dst_rect.extent.height + 1U) / 2U;
4697       } else {
4698          mappings[0U].dst_rect.extent.height =
4699             (mappings[0U].dst_rect.offset.y +
4700              mappings[0U].dst_rect.extent.height + 1U) /
4701                2U -
4702             mappings[0U].dst_rect.offset.y;
4703          mappings[0U].dst_rect.offset.y /= 2U;
4704       }
4705 
4706       return true;
4707    }
4708 
4709    for (uint32_t i = 0; i < pass->sources[0].mapping_count; i++) {
4710       struct pvr_rect_mapping *mapping_a = &mappings[i];
4711       struct pvr_rect_mapping *mapping_b =
4712          &mappings[pass->sources[0].mapping_count + new_mapping];
4713       int32_t mapping_a_src_rect_y1 =
4714          mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height;
4715       int32_t mapping_b_src_rect_y1 = mapping_a_src_rect_y1;
4716       const bool dst_starts_odd_row = !!(mapping_a->dst_rect.offset.y & 1);
4717       const bool dst_ends_odd_row =
4718          !!((mapping_a->dst_rect.offset.y + mapping_a->dst_rect.extent.height) &
4719             1);
4720       const bool src_starts_odd_row = !!(mapping_a->src_rect.offset.y & 1);
4721       const bool src_ends_odd_row =
4722          !!((mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height) &
4723             1);
4724 
4725       assert(pass->sources[0].mapping_count + new_mapping <
4726              ARRAY_SIZE(pass->sources[0].mappings));
4727       *mapping_b = *mapping_a;
4728 
4729       mapping_a->src_rect.offset.y = ALIGN_POT(mapping_a->src_rect.offset.y, 2);
4730       if (dst_starts_odd_row && !src_starts_odd_row)
4731          mapping_a->src_rect.offset.y++;
4732       else if (!dst_starts_odd_row && src_starts_odd_row)
4733          mapping_a->src_rect.offset.y--;
4734 
4735       mapping_a_src_rect_y1 = ALIGN_POT(mapping_a_src_rect_y1, 2);
4736       if (dst_ends_odd_row && !src_ends_odd_row)
4737          mapping_a_src_rect_y1++;
4738       else if (!dst_ends_odd_row && src_ends_odd_row)
4739          mapping_a_src_rect_y1--;
4740 
4741       mapping_a->src_rect.extent.height =
4742          mapping_a_src_rect_y1 - mapping_a->src_rect.offset.y;
4743 
4744       mapping_b->src_rect.offset.y = ALIGN_POT(mapping_b->src_rect.offset.y, 2);
4745       if (dst_starts_odd_row && src_starts_odd_row)
4746          mapping_b->src_rect.offset.y--;
4747       else if (!dst_starts_odd_row && !src_starts_odd_row)
4748          mapping_b->src_rect.offset.y++;
4749 
4750       mapping_b_src_rect_y1 = ALIGN_POT(mapping_b_src_rect_y1, 2);
4751       if (dst_ends_odd_row && src_ends_odd_row)
4752          mapping_b_src_rect_y1--;
4753       else if (!dst_ends_odd_row && !src_ends_odd_row)
4754          mapping_b_src_rect_y1++;
4755 
4756       mapping_b->src_rect.extent.height =
4757          mapping_b_src_rect_y1 - mapping_b->src_rect.offset.y;
4758 
4759       /* Destination rectangles. */
4760       mapping_a->dst_rect.offset.y = mapping_a->dst_rect.offset.y / 2;
4761 
4762       if (dst_starts_odd_row)
4763          mapping_a->dst_rect.offset.y++;
4764 
4765       mapping_b->dst_rect.offset.x += stride;
4766       mapping_b->dst_rect.offset.y /= 2;
4767       mapping_b->dst_rect.extent.height /= 2;
4768       mapping_a->dst_rect.extent.height -= mapping_b->dst_rect.extent.height;
4769 
4770       if (!mapping_a->src_rect.extent.width ||
4771           !mapping_a->src_rect.extent.height) {
4772          *mapping_a = *mapping_b;
4773       } else if (mapping_b->src_rect.extent.width &&
4774                  mapping_b->src_rect.extent.height) {
4775          new_mapping++;
4776       }
4777    }
4778 
4779    pass->sources[0].mapping_count++;
4780 
4781    return true;
4782 }
4783 
pvr_split_rect(uint32_t stride,uint32_t height,uint32_t texel_unwind,VkRect2D * rect_a,VkRect2D * rect_b)4784 static void pvr_split_rect(uint32_t stride,
4785                            uint32_t height,
4786                            uint32_t texel_unwind,
4787                            VkRect2D *rect_a,
4788                            VkRect2D *rect_b)
4789 {
4790    rect_a->offset.x = 0;
4791    rect_a->extent.width = stride - texel_unwind;
4792    rect_a->offset.y = 0;
4793    rect_a->extent.height = height;
4794 
4795    rect_b->offset.x = (int32_t)stride - texel_unwind;
4796    rect_b->extent.width = texel_unwind;
4797    rect_b->offset.y = 0;
4798    rect_b->extent.height = height;
4799 }
4800 
pvr_rect_width_covered_by(const VkRect2D * rect_a,const VkRect2D * rect_b)4801 static bool pvr_rect_width_covered_by(const VkRect2D *rect_a,
4802                                       const VkRect2D *rect_b)
4803 {
4804    return (rect_b->offset.x <= rect_a->offset.x &&
4805            (rect_b->offset.x + rect_b->extent.width) >=
4806               (rect_a->offset.x + rect_a->extent.width));
4807 }
4808 
pvr_unwind_rects(uint32_t width,uint32_t height,uint32_t texel_unwind,bool input,struct pvr_transfer_pass * pass)4809 static void pvr_unwind_rects(uint32_t width,
4810                              uint32_t height,
4811                              uint32_t texel_unwind,
4812                              bool input,
4813                              struct pvr_transfer_pass *pass)
4814 {
4815    struct pvr_transfer_wa_source *const source = &pass->sources[0];
4816    struct pvr_rect_mapping *const mappings = source->mappings;
4817    const uint32_t num_mappings = source->mapping_count;
4818    VkRect2D rect_a, rect_b;
4819 
4820    if (texel_unwind == 0)
4821       return;
4822 
4823    pvr_split_rect(width, height, texel_unwind, &rect_a, &rect_b);
4824 
4825    for (uint32_t i = 0; i < num_mappings; i++) {
4826       VkRect2D *const old_rect = input ? &mappings[i].src_rect
4827                                        : &mappings[i].dst_rect;
4828 
4829       if (height == 1) {
4830          old_rect->offset.x += texel_unwind;
4831       } else if (width == 1) {
4832          old_rect->offset.y += texel_unwind;
4833       } else if (pvr_rect_width_covered_by(old_rect, &rect_a)) {
4834          old_rect->offset.x += texel_unwind;
4835       } else if (pvr_rect_width_covered_by(old_rect, &rect_b)) {
4836          old_rect->offset.x = texel_unwind - width + old_rect->offset.x;
4837          old_rect->offset.y++;
4838       } else {
4839          /* Mapping requires split. */
4840          const uint32_t new_mapping = source->mapping_count++;
4841 
4842          VkRect2D *const new_rect = input ? &mappings[new_mapping].src_rect
4843                                           : &mappings[new_mapping].dst_rect;
4844 
4845          VkRect2D *const new_rect_opp = input ? &mappings[new_mapping].dst_rect
4846                                               : &mappings[new_mapping].src_rect;
4847          VkRect2D *const old_rect_opp = input ? &mappings[i].dst_rect
4848                                               : &mappings[i].src_rect;
4849 
4850          const uint32_t split_point = width - texel_unwind;
4851          const uint32_t split_width =
4852             old_rect->offset.x + old_rect->extent.width - split_point;
4853 
4854          assert(new_mapping < ARRAY_SIZE(source->mappings));
4855          mappings[new_mapping] = mappings[i];
4856 
4857          old_rect_opp->extent.width -= split_width;
4858          new_rect_opp->extent.width = split_width;
4859          new_rect_opp->offset.x =
4860             old_rect_opp->offset.x + old_rect_opp->extent.width;
4861 
4862          old_rect->offset.x += texel_unwind;
4863          old_rect->extent.width = width - old_rect->offset.x;
4864 
4865          new_rect->offset.x = 0;
4866          new_rect->offset.y++;
4867          new_rect->extent.width = split_width;
4868       }
4869    }
4870 }
4871 
4872 /**
4873  * Assign clip rects to rectangle mappings. TDM can only do two PBE clip
4874  * rects per screen.
4875  */
4876 static void
pvr_map_clip_rects(struct pvr_transfer_custom_mapping * custom_mapping)4877 pvr_map_clip_rects(struct pvr_transfer_custom_mapping *custom_mapping)
4878 {
4879    for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4880       struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
4881 
4882       pass->clip_rects_count = 0U;
4883 
4884       for (uint32_t s = 0U; s < pass->source_count; s++) {
4885          struct pvr_transfer_wa_source *src = &pass->sources[s];
4886 
4887          for (uint32_t j = 0U; j < src->mapping_count; j++) {
4888             struct pvr_rect_mapping *mappings = src->mappings;
4889             VkRect2D *clip_rects = pass->clip_rects;
4890             bool merged = false;
4891 
4892             /* Try merge adjacent clip rects. */
4893             for (uint32_t k = 0U; k < pass->clip_rects_count; k++) {
4894                if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4895                    clip_rects[k].extent.height ==
4896                       mappings[j].dst_rect.extent.height &&
4897                    clip_rects[k].offset.x + clip_rects[k].extent.width ==
4898                       mappings[j].dst_rect.offset.x) {
4899                   clip_rects[k].extent.width +=
4900                      mappings[j].dst_rect.extent.width;
4901                   merged = true;
4902                   break;
4903                }
4904 
4905                if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4906                    clip_rects[k].extent.height ==
4907                       mappings[j].dst_rect.extent.height &&
4908                    clip_rects[k].offset.x ==
4909                       mappings[j].dst_rect.offset.x +
4910                          mappings[j].dst_rect.extent.width) {
4911                   clip_rects[k].offset.x = mappings[j].dst_rect.offset.x;
4912                   clip_rects[k].extent.width +=
4913                      mappings[j].dst_rect.extent.width;
4914                   merged = true;
4915                   break;
4916                }
4917 
4918                if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4919                    clip_rects[k].extent.width ==
4920                       mappings[j].dst_rect.extent.width &&
4921                    clip_rects[k].offset.y + clip_rects[k].extent.height ==
4922                       mappings[j].dst_rect.offset.y) {
4923                   clip_rects[k].extent.height +=
4924                      mappings[j].dst_rect.extent.height;
4925                   merged = true;
4926                   break;
4927                }
4928 
4929                if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4930                    clip_rects[k].extent.width ==
4931                       mappings[j].dst_rect.extent.width &&
4932                    clip_rects[k].offset.y ==
4933                       mappings[j].dst_rect.offset.y +
4934                          mappings[j].dst_rect.extent.height) {
4935                   clip_rects[k].extent.height +=
4936                      mappings[j].dst_rect.extent.height;
4937                   clip_rects[k].offset.y = mappings[j].dst_rect.offset.y;
4938                   merged = true;
4939                   break;
4940                }
4941             }
4942 
4943             if (merged)
4944                continue;
4945 
4946             /* Create new pass if needed, TDM can only have 2 clip rects. */
4947             if (pass->clip_rects_count >= custom_mapping->max_clip_rects) {
4948                struct pvr_transfer_pass *new_pass =
4949                   pvr_create_pass(custom_mapping, pass->dst_offset);
4950                struct pvr_transfer_wa_source *new_source =
4951                   pvr_create_source(new_pass,
4952                                     src->src_offset,
4953                                     src->extend_height);
4954                struct pvr_rect_mapping *new_mapping =
4955                   pvr_create_mapping(new_source);
4956 
4957                new_pass->clip_rects_count = 1U;
4958                *new_mapping = src->mappings[j];
4959 
4960                pvr_remove_mapping(src, j);
4961 
4962                if (src->mapping_count == 0) {
4963                   pvr_remove_source(pass, s);
4964                   s--;
4965                } else {
4966                   /* Redo - mapping was replaced. */
4967                   j--;
4968                }
4969             } else {
4970                pass->clip_rects[pass->clip_rects_count] =
4971                   src->mappings[j].dst_rect;
4972 
4973                pass->clip_rects_count++;
4974 
4975                assert(pass->clip_rects_count <= ARRAY_SIZE(pass->clip_rects));
4976             }
4977          }
4978       }
4979    }
4980 }
4981 
pvr_extend_height(const VkRect2D * rect,const uint32_t height,const uint32_t unwind_src)4982 static bool pvr_extend_height(const VkRect2D *rect,
4983                               const uint32_t height,
4984                               const uint32_t unwind_src)
4985 {
4986    if (rect->offset.x >= (int32_t)unwind_src)
4987       return false;
4988 
4989    return (rect->offset.y > (int32_t)height) ||
4990           ((rect->offset.y + rect->extent.height) > (int32_t)height);
4991 }
4992 
4993 static void
pvr_generate_custom_mapping(uint32_t src_stride,uint32_t src_width,uint32_t src_height,uint32_t dst_stride,uint32_t dst_width,uint32_t dst_height,enum pvr_memlayout dst_mem_layout,struct pvr_transfer_custom_mapping * custom_mapping)4994 pvr_generate_custom_mapping(uint32_t src_stride,
4995                             uint32_t src_width,
4996                             uint32_t src_height,
4997                             uint32_t dst_stride,
4998                             uint32_t dst_width,
4999                             uint32_t dst_height,
5000                             enum pvr_memlayout dst_mem_layout,
5001                             struct pvr_transfer_custom_mapping *custom_mapping)
5002 {
5003    src_stride *= custom_mapping->texel_extend_src;
5004    src_width *= custom_mapping->texel_extend_src;
5005    dst_stride *= custom_mapping->texel_extend_dst;
5006    dst_width *= custom_mapping->texel_extend_dst;
5007 
5008    if (custom_mapping->texel_unwind_src > 0U) {
5009       pvr_unwind_rects(src_stride,
5010                        src_height,
5011                        custom_mapping->texel_unwind_src,
5012                        true,
5013                        &custom_mapping->passes[0U]);
5014    }
5015 
5016    if (custom_mapping->double_stride) {
5017       custom_mapping->double_stride =
5018          pvr_double_stride(&custom_mapping->passes[0U], dst_stride);
5019 
5020       dst_stride *= 2U;
5021    }
5022 
5023    pvr_unwind_rects(dst_stride,
5024                     dst_height,
5025                     custom_mapping->texel_unwind_dst,
5026                     false,
5027                     &custom_mapping->passes[0U]);
5028 
5029    pvr_map_clip_rects(custom_mapping);
5030 
5031    /* If the last row of the source mapping is sampled, height of the surface
5032     * can only be increased if the new area contains a valid region. Some blits
5033     * are split to two sources.
5034     */
5035    if (custom_mapping->texel_unwind_src > 0U) {
5036       for (uint32_t i = 0; i < custom_mapping->pass_count; i++) {
5037          struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
5038 
5039          for (uint32_t j = 0; j < pass->source_count; j++) {
5040             struct pvr_transfer_wa_source *src = &pass->sources[j];
5041 
5042             for (uint32_t k = 0; k < src->mapping_count; k++) {
5043                VkRect2D *src_rect = &src->mappings[k].src_rect;
5044                bool extend_height =
5045                   pvr_extend_height(src_rect,
5046                                     src_height,
5047                                     custom_mapping->texel_unwind_src);
5048 
5049                if (src->mapping_count == 1) {
5050                   src->extend_height = extend_height;
5051                } else if (!src->extend_height && extend_height) {
5052                   struct pvr_transfer_wa_source *new_src =
5053                      pvr_acquire_source(pass, src->src_offset, extend_height);
5054 
5055                   new_src->mappings[new_src->mapping_count] = src->mappings[k];
5056                   new_src->src_offset = src->src_offset;
5057 
5058                   for (uint32_t l = k + 1; l < src->mapping_count; l++)
5059                      src->mappings[l - 1] = src->mappings[l];
5060 
5061                   new_src->mapping_count++;
5062                   src->mapping_count--;
5063                   k--;
5064                }
5065             }
5066          }
5067       }
5068    }
5069 }
5070 
5071 static bool
pvr_get_custom_mapping(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,uint32_t max_clip_rects,struct pvr_transfer_custom_mapping * custom_mapping)5072 pvr_get_custom_mapping(const struct pvr_device_info *dev_info,
5073                        const struct pvr_transfer_cmd *transfer_cmd,
5074                        uint32_t max_clip_rects,
5075                        struct pvr_transfer_custom_mapping *custom_mapping)
5076 {
5077    const uint32_t dst_bpp =
5078       vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5079    const struct pvr_transfer_cmd_source *src = NULL;
5080    struct pvr_transfer_pass *pass;
5081    bool ret;
5082 
5083    custom_mapping->max_clip_rects = max_clip_rects;
5084    custom_mapping->texel_unwind_src = 0U;
5085    custom_mapping->texel_unwind_dst = 0U;
5086    custom_mapping->texel_extend_src = 1U;
5087    custom_mapping->texel_extend_dst = 1U;
5088    custom_mapping->pass_count = 0U;
5089 
5090    if (transfer_cmd->source_count > 1)
5091       return false;
5092 
5093    custom_mapping->max_clip_size = PVR_MAX_CLIP_SIZE(dev_info);
5094 
5095    ret = pvr_texel_unwind(dst_bpp,
5096                           transfer_cmd->dst.dev_addr,
5097                           false,
5098                           1U,
5099                           &custom_mapping->texel_unwind_dst);
5100    if (!ret) {
5101       custom_mapping->texel_extend_dst = dst_bpp / 8U;
5102       if (transfer_cmd->source_count > 0) {
5103          if (transfer_cmd->sources[0].surface.mem_layout ==
5104              PVR_MEMLAYOUT_LINEAR) {
5105             custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5106          } else if (transfer_cmd->sources[0].surface.mem_layout ==
5107                        PVR_MEMLAYOUT_TWIDDLED &&
5108                     transfer_cmd->sources[0].surface.height == 1U) {
5109             custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5110          }
5111       }
5112 
5113       ret = pvr_texel_unwind(dst_bpp,
5114                              transfer_cmd->dst.dev_addr,
5115                              false,
5116                              custom_mapping->texel_extend_dst,
5117                              &custom_mapping->texel_unwind_dst);
5118       if (!ret)
5119          return false;
5120    }
5121 
5122    if (transfer_cmd->source_count > 0) {
5123       src = &transfer_cmd->sources[0];
5124       const uint32_t src_bpp =
5125          vk_format_get_blocksizebits(src->surface.vk_format);
5126 
5127       ret = pvr_is_surface_aligned(src->surface.dev_addr, true, src_bpp);
5128 
5129       if (!ret && (src->surface.mem_layout == PVR_MEMLAYOUT_LINEAR ||
5130                    src->surface.height == 1U)) {
5131          ret = pvr_texel_unwind(src_bpp,
5132                                 src->surface.dev_addr,
5133                                 true,
5134                                 custom_mapping->texel_extend_src,
5135                                 &custom_mapping->texel_unwind_src);
5136       }
5137 
5138       if (!ret) {
5139          custom_mapping->texel_extend_src = dst_bpp / 8U;
5140          custom_mapping->texel_extend_dst = custom_mapping->texel_extend_src;
5141 
5142          ret = pvr_texel_unwind(src_bpp,
5143                                 src->surface.dev_addr,
5144                                 true,
5145                                 custom_mapping->texel_extend_src,
5146                                 &custom_mapping->texel_unwind_src);
5147       }
5148 
5149       if (!ret)
5150          return false;
5151    }
5152 
5153    VkRect2D rect = transfer_cmd->scissor;
5154    assert(
5155       (rect.offset.x + rect.extent.width) <= custom_mapping->max_clip_size &&
5156       (rect.offset.y + rect.extent.height) <= custom_mapping->max_clip_size);
5157 
5158    /* Texel extend only works with strided memory layout, because pixel width is
5159     * changed. Texel unwind only works with strided memory layout. 1D blits are
5160     * allowed.
5161     */
5162    if (src && src->surface.height > 1U &&
5163        (custom_mapping->texel_extend_src > 1U ||
5164         custom_mapping->texel_unwind_src > 0U) &&
5165        src->surface.mem_layout != PVR_MEMLAYOUT_LINEAR) {
5166       return false;
5167    }
5168 
5169    /* Texel extend only works with strided memory layout, because pixel width is
5170     * changed. Texel unwind only works with strided memory layout. 1D blits are
5171     * allowed.
5172     */
5173    if ((custom_mapping->texel_extend_dst > 1U ||
5174         custom_mapping->texel_unwind_dst > 0U) &&
5175        transfer_cmd->dst.mem_layout != PVR_MEMLAYOUT_LINEAR &&
5176        transfer_cmd->dst.height > 1U) {
5177       return false;
5178    }
5179 
5180    if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_LINEAR) {
5181       custom_mapping->double_stride = !pvr_is_pbe_stride_aligned(
5182          transfer_cmd->dst.stride * custom_mapping->texel_extend_dst);
5183    }
5184 
5185    if (custom_mapping->texel_unwind_src > 0U ||
5186        custom_mapping->texel_unwind_dst > 0U || custom_mapping->double_stride) {
5187       struct pvr_transfer_wa_source *wa_src;
5188       struct pvr_rect_mapping *mapping;
5189 
5190       pass = pvr_acquire_pass(custom_mapping, 0U);
5191       wa_src = pvr_create_source(pass, 0U, false);
5192       mapping = pvr_create_mapping(wa_src);
5193 
5194       if (transfer_cmd->source_count > 0) {
5195          *mapping = src->mappings[0U];
5196       } else {
5197          mapping->src_rect = transfer_cmd->scissor;
5198          mapping->dst_rect = transfer_cmd->scissor;
5199       }
5200    } else {
5201       return false;
5202    }
5203 
5204    if (custom_mapping->texel_extend_src > 1U ||
5205        custom_mapping->texel_extend_dst > 1U) {
5206       pass->sources[0].mappings[0U].src_rect.offset.x *=
5207          (int32_t)custom_mapping->texel_extend_dst;
5208       pass->sources[0].mappings[0U].src_rect.extent.width *=
5209          (int32_t)custom_mapping->texel_extend_dst;
5210       pass->sources[0].mappings[0U].dst_rect.offset.x *=
5211          (int32_t)custom_mapping->texel_extend_dst;
5212       pass->sources[0].mappings[0U].dst_rect.extent.width *=
5213          (int32_t)custom_mapping->texel_extend_dst;
5214    }
5215 
5216    if (transfer_cmd->source_count > 0) {
5217       pvr_generate_custom_mapping(transfer_cmd->sources[0].surface.stride,
5218                                   transfer_cmd->sources[0].surface.width,
5219                                   transfer_cmd->sources[0].surface.height,
5220                                   transfer_cmd->dst.stride,
5221                                   transfer_cmd->dst.width,
5222                                   transfer_cmd->dst.height,
5223                                   transfer_cmd->dst.mem_layout,
5224                                   custom_mapping);
5225    } else {
5226       pvr_generate_custom_mapping(0U,
5227                                   0U,
5228                                   0U,
5229                                   transfer_cmd->dst.stride,
5230                                   transfer_cmd->dst.width,
5231                                   transfer_cmd->dst.height,
5232                                   transfer_cmd->dst.mem_layout,
5233                                   custom_mapping);
5234    }
5235 
5236    return true;
5237 }
5238 
pvr_pbe_extend_rect(uint32_t texel_extend,VkRect2D * rect)5239 static void pvr_pbe_extend_rect(uint32_t texel_extend, VkRect2D *rect)
5240 {
5241    rect->offset.x *= texel_extend;
5242    rect->extent.width *= texel_extend;
5243 }
5244 
pvr_pbe_rect_intersect(VkRect2D * rect_a,VkRect2D * rect_b)5245 static void pvr_pbe_rect_intersect(VkRect2D *rect_a, VkRect2D *rect_b)
5246 {
5247    rect_a->extent.width = MIN2(rect_a->offset.x + rect_a->extent.width,
5248                                rect_b->offset.x + rect_b->extent.width) -
5249                           MAX2(rect_a->offset.x, rect_b->offset.x);
5250    rect_a->offset.x = MAX2(rect_a->offset.x, rect_b->offset.x);
5251    rect_a->extent.height = MIN2(rect_a->offset.y + rect_a->extent.height,
5252                                 rect_b->offset.y + rect_b->extent.height) -
5253                            MAX2(rect_a->offset.y, rect_b->offset.y);
5254    rect_a->offset.y = MAX2(rect_a->offset.y, rect_b->offset.y);
5255 }
5256 
pvr_texel_extend_src_format(VkFormat vk_format)5257 static VkFormat pvr_texel_extend_src_format(VkFormat vk_format)
5258 {
5259    uint32_t bpp = vk_format_get_blocksizebits(vk_format);
5260    VkFormat ext_format;
5261 
5262    switch (bpp) {
5263    case 16:
5264       ext_format = VK_FORMAT_R8G8_UINT;
5265       break;
5266    case 32:
5267       ext_format = VK_FORMAT_R8G8B8A8_UINT;
5268       break;
5269    case 48:
5270       ext_format = VK_FORMAT_R16G16B16_UINT;
5271       break;
5272    default:
5273       ext_format = VK_FORMAT_R8_UINT;
5274       break;
5275    }
5276 
5277    return ext_format;
5278 }
5279 
5280 static void
pvr_modify_command(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t pass_idx,struct pvr_transfer_cmd * transfer_cmd)5281 pvr_modify_command(struct pvr_transfer_custom_mapping *custom_mapping,
5282                    uint32_t pass_idx,
5283                    struct pvr_transfer_cmd *transfer_cmd)
5284 {
5285    struct pvr_transfer_pass *pass = &custom_mapping->passes[pass_idx];
5286    uint32_t bpp;
5287 
5288    if (custom_mapping->texel_extend_src > 1U) {
5289       struct pvr_rect_mapping *mapping = &transfer_cmd->sources[0].mappings[0];
5290 
5291       pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->dst_rect);
5292       pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->src_rect);
5293 
5294       transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5295       transfer_cmd->dst.width *= custom_mapping->texel_extend_src;
5296       transfer_cmd->dst.stride *= custom_mapping->texel_extend_src;
5297       transfer_cmd->sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5298       transfer_cmd->sources[0].surface.width *=
5299          custom_mapping->texel_extend_src;
5300       transfer_cmd->sources[0].surface.stride *=
5301          custom_mapping->texel_extend_src;
5302    } else if (custom_mapping->texel_extend_dst > 1U) {
5303       VkRect2D max_clip = {
5304          .offset = { 0, 0 },
5305          .extent = { custom_mapping->max_clip_size,
5306                      custom_mapping->max_clip_size },
5307       };
5308 
5309       pvr_pbe_extend_rect(custom_mapping->texel_extend_dst,
5310                           &transfer_cmd->scissor);
5311 
5312       pvr_pbe_rect_intersect(&transfer_cmd->scissor, &max_clip);
5313 
5314       if (transfer_cmd->source_count > 0) {
5315          transfer_cmd->sources[0].surface.width *=
5316             custom_mapping->texel_extend_dst;
5317          transfer_cmd->sources[0].surface.stride *=
5318             custom_mapping->texel_extend_dst;
5319 
5320          transfer_cmd->sources[0].surface.vk_format =
5321             pvr_texel_extend_src_format(
5322                transfer_cmd->sources[0].surface.vk_format);
5323       }
5324 
5325       transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5326       transfer_cmd->dst.width *= custom_mapping->texel_extend_dst;
5327       transfer_cmd->dst.stride *= custom_mapping->texel_extend_dst;
5328    }
5329 
5330    if (custom_mapping->double_stride) {
5331       transfer_cmd->dst.width *= 2U;
5332       transfer_cmd->dst.stride *= 2U;
5333    }
5334 
5335    if (custom_mapping->texel_unwind_src > 0U) {
5336       if (transfer_cmd->sources[0].surface.height == 1U) {
5337          transfer_cmd->sources[0].surface.width +=
5338             custom_mapping->texel_unwind_src;
5339          transfer_cmd->sources[0].surface.stride +=
5340             custom_mapping->texel_unwind_src;
5341       } else if (transfer_cmd->sources[0].surface.stride == 1U) {
5342          transfer_cmd->sources[0].surface.height +=
5343             custom_mapping->texel_unwind_src;
5344       } else {
5345          /* Increase source width by texel unwind. If texel unwind is less than
5346           * the distance between width and stride. The blit can be done with one
5347           * rectangle mapping, but the width of the surface needs be to
5348           * increased in case we sample from the area between width and stride.
5349           */
5350          transfer_cmd->sources[0].surface.width =
5351             MIN2(transfer_cmd->sources[0].surface.width +
5352                     custom_mapping->texel_unwind_src,
5353                  transfer_cmd->sources[0].surface.stride);
5354       }
5355    }
5356 
5357    for (uint32_t i = 0U; i < pass->source_count; i++) {
5358       struct pvr_transfer_wa_source *src = &pass->sources[i];
5359 
5360       if (i > 0)
5361          transfer_cmd->sources[i] = transfer_cmd->sources[0];
5362 
5363       transfer_cmd->sources[i].mapping_count = src->mapping_count;
5364       for (uint32_t j = 0U; j < transfer_cmd->sources[i].mapping_count; j++)
5365          transfer_cmd->sources[i].mappings[j] = src->mappings[j];
5366 
5367       if (src->extend_height)
5368          transfer_cmd->sources[i].surface.height += 1U;
5369 
5370       transfer_cmd->sources[i].surface.width =
5371          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.width);
5372       transfer_cmd->sources[i].surface.height =
5373          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.height);
5374       transfer_cmd->sources[i].surface.stride =
5375          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.stride);
5376    }
5377 
5378    if (transfer_cmd->dst.height == 1U) {
5379       transfer_cmd->dst.width =
5380          transfer_cmd->dst.stride + custom_mapping->texel_unwind_dst;
5381       transfer_cmd->dst.mem_layout = PVR_MEMLAYOUT_TWIDDLED;
5382    }
5383 
5384    if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED) {
5385       transfer_cmd->dst.width =
5386          MIN2((uint32_t)custom_mapping->max_clip_size, transfer_cmd->dst.width);
5387       transfer_cmd->dst.height = MIN2((uint32_t)custom_mapping->max_clip_size,
5388                                       transfer_cmd->dst.height);
5389    } else {
5390       transfer_cmd->dst.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->dst.width);
5391    }
5392 
5393    if (transfer_cmd->source_count > 0) {
5394       for (uint32_t i = 0; i < pass->source_count; i++) {
5395          struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
5396 
5397          bpp = vk_format_get_blocksizebits(src->surface.vk_format);
5398 
5399          src->surface.dev_addr.addr -=
5400             custom_mapping->texel_unwind_src * bpp / 8U;
5401          src->surface.dev_addr.addr += MAX2(src->surface.sample_count, 1U) *
5402                                        pass->sources[i].src_offset * bpp / 8U;
5403       }
5404    }
5405 
5406    bpp = vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5407    transfer_cmd->dst.dev_addr.addr -=
5408       custom_mapping->texel_unwind_dst * bpp / 8U;
5409    transfer_cmd->dst.dev_addr.addr +=
5410       MAX2(transfer_cmd->dst.sample_count, 1U) * pass->dst_offset * bpp / 8U;
5411 
5412    if (transfer_cmd->source_count > 0)
5413       transfer_cmd->source_count = pass->source_count;
5414 }
5415 
5416 /* Route a copy_blit (FastScale HW) to a clip_blit (Fast2D HW).
5417  * Destination rectangle can be specified in dst_rect, or NULL to use existing.
5418  */
pvr_reroute_to_clip(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct VkRect2D * dst_rect,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5419 static VkResult pvr_reroute_to_clip(struct pvr_transfer_ctx *ctx,
5420                                     const struct pvr_transfer_cmd *transfer_cmd,
5421                                     const struct VkRect2D *dst_rect,
5422                                     struct pvr_transfer_prep_data *prep_data,
5423                                     uint32_t pass_idx,
5424                                     bool *finished_out)
5425 {
5426    struct pvr_transfer_cmd clip_transfer_cmd;
5427 
5428    clip_transfer_cmd = *transfer_cmd;
5429    clip_transfer_cmd.flags |= PVR_TRANSFER_CMD_FLAGS_FAST2D;
5430 
5431    if (transfer_cmd->source_count <= 1U) {
5432       if (dst_rect)
5433          clip_transfer_cmd.scissor = *dst_rect;
5434 
5435       return pvr_3d_clip_blit(ctx,
5436                               &clip_transfer_cmd,
5437                               prep_data,
5438                               pass_idx,
5439                               finished_out);
5440    }
5441 
5442    return vk_error(ctx->device, VK_ERROR_FORMAT_NOT_SUPPORTED);
5443 }
5444 
pvr_3d_copy_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5445 static VkResult pvr_3d_copy_blit(struct pvr_transfer_ctx *ctx,
5446                                  struct pvr_transfer_cmd *transfer_cmd,
5447                                  struct pvr_transfer_prep_data *prep_data,
5448                                  uint32_t pass_idx,
5449                                  bool *finished_out)
5450 {
5451    const struct pvr_device_info *const dev_info =
5452       &ctx->device->pdevice->dev_info;
5453 
5454    struct pvr_transfer_3d_state *state = &prep_data->state;
5455    struct pvr_transfer_cmd *active_cmd = transfer_cmd;
5456    struct pvr_transfer_cmd int_cmd;
5457    VkResult result;
5458 
5459    state->dont_force_pbe = false;
5460    state->pass_idx = pass_idx;
5461 
5462    pvr_transfer_set_filter(transfer_cmd, state);
5463 
5464    if (transfer_cmd->source_count == 1U) {
5465       struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[0];
5466 
5467       /* Try to work out a condition to map pixel formats to RAW. That is only
5468        * possible if we don't perform any kind of 2D operation on the blit as we
5469        * don't know the actual pixel values - i.e. it has to be point sampled -
5470        * scaling doesn't matter as long as point sampled.
5471        */
5472       if (src->surface.vk_format == transfer_cmd->dst.vk_format &&
5473           state->filter[0] == PVR_FILTER_POINT &&
5474           src->surface.sample_count <= transfer_cmd->dst.sample_count &&
5475           (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) == 0U) {
5476          uint32_t bpp;
5477 
5478          int_cmd = *transfer_cmd;
5479          active_cmd = &int_cmd;
5480          bpp = vk_format_get_blocksizebits(int_cmd.dst.vk_format);
5481 
5482          if (bpp > 0U) {
5483             switch (bpp) {
5484             case 8U:
5485                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5486                break;
5487             case 16U:
5488                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8_UINT;
5489                break;
5490             case 24U:
5491                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8B8_UINT;
5492                break;
5493             case 32U:
5494                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32_UINT;
5495                break;
5496             case 48U:
5497                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R16G16B16_UINT;
5498                break;
5499             case 64U:
5500                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32_UINT;
5501                break;
5502             case 96U:
5503                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32B32_UINT;
5504                break;
5505             case 128U:
5506                int_cmd.sources[0].surface.vk_format =
5507                   VK_FORMAT_R32G32B32A32_UINT;
5508                break;
5509             default:
5510                active_cmd = transfer_cmd;
5511                break;
5512             }
5513          }
5514 
5515          int_cmd.dst.vk_format = int_cmd.sources[0].surface.vk_format;
5516       }
5517    }
5518 
5519    if (pass_idx == 0U) {
5520       pvr_get_custom_mapping(dev_info, active_cmd, 3U, &state->custom_mapping);
5521 
5522       if (state->custom_mapping.texel_extend_src > 1U)
5523          state->custom_mapping.texel_extend_dst = 1U;
5524    }
5525 
5526    if (state->custom_mapping.pass_count > 0U) {
5527       struct pvr_transfer_pass *pass = &state->custom_mapping.passes[pass_idx];
5528 
5529       if (active_cmd != &int_cmd) {
5530          int_cmd = *active_cmd;
5531          active_cmd = &int_cmd;
5532       }
5533 
5534       state->custom_filter = true;
5535 
5536       pvr_modify_command(&state->custom_mapping, pass_idx, active_cmd);
5537 
5538       if (state->custom_mapping.double_stride ||
5539           pass->sources[0].mapping_count > 1U || pass->source_count > 1U) {
5540          result =
5541             pvr_3d_clip_blit(ctx, active_cmd, prep_data, pass_idx, finished_out);
5542       } else {
5543          struct pvr_rect_mapping *mappings = &pass->sources[0].mappings[0U];
5544 
5545          mappings[0U].src_rect.offset.x /=
5546             MAX2(1U, state->custom_mapping.texel_extend_dst);
5547          mappings[0U].src_rect.extent.width /=
5548             MAX2(1U, state->custom_mapping.texel_extend_dst);
5549 
5550          if (int_cmd.source_count > 0) {
5551             for (uint32_t i = 0U; i < pass->sources[0].mapping_count; i++)
5552                active_cmd->sources[0].mappings[i] = mappings[i];
5553          }
5554 
5555          active_cmd->scissor = mappings[0U].dst_rect;
5556 
5557          result = pvr_3d_copy_blit_core(ctx,
5558                                         active_cmd,
5559                                         prep_data,
5560                                         pass_idx,
5561                                         finished_out);
5562       }
5563 
5564       return result;
5565    }
5566 
5567    /* Route DS merge blits to Clip blit. Background object is used to preserve
5568     * the unmerged channel.
5569     */
5570    if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) != 0U) {
5571       /* PBE byte mask could be used for DS merge with FastScale. Clearing the
5572        * other channel on a DS merge requires Clip blit.
5573        */
5574       if (!PVR_HAS_ERN(dev_info, 42064) ||
5575           ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U)) {
5576          return pvr_reroute_to_clip(ctx,
5577                                     active_cmd,
5578                                     &active_cmd->scissor,
5579                                     prep_data,
5580                                     pass_idx,
5581                                     finished_out);
5582       }
5583    }
5584 
5585    return pvr_3d_copy_blit_core(ctx,
5586                                 active_cmd,
5587                                 prep_data,
5588                                 pass_idx,
5589                                 finished_out);
5590 }
5591 
5592 /* TODO: This should be generated in csbgen. */
5593 #define TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK \
5594    BITFIELD64_RANGE(2, (53 - 16) + 1)
5595 
pvr_validate_source_addr(pvr_dev_addr_t addr)5596 static bool pvr_validate_source_addr(pvr_dev_addr_t addr)
5597 {
5598    if (!pvr_dev_addr_is_aligned(
5599           addr,
5600           ROGUE_TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT)) {
5601       return false;
5602    }
5603 
5604    if (addr.addr & ~TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK)
5605       return false;
5606 
5607    return true;
5608 }
5609 
pvr_supports_texel_unwind(struct pvr_transfer_cmd * transfer_cmd)5610 static bool pvr_supports_texel_unwind(struct pvr_transfer_cmd *transfer_cmd)
5611 {
5612    struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
5613 
5614    if (transfer_cmd->source_count > 1)
5615       return false;
5616 
5617    if (transfer_cmd->source_count) {
5618       struct pvr_transfer_cmd_surface *src = &transfer_cmd->sources[0].surface;
5619 
5620       if (src->height == 1) {
5621          if (src->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5622              src->mem_layout != PVR_MEMLAYOUT_TWIDDLED &&
5623              src->mem_layout != PVR_MEMLAYOUT_3DTWIDDLED) {
5624             return false;
5625          }
5626       } else if (src->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
5627                  src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
5628          if (!pvr_validate_source_addr(src->dev_addr))
5629             return false;
5630       } else {
5631          if (src->mem_layout != PVR_MEMLAYOUT_LINEAR)
5632             return false;
5633       }
5634    }
5635 
5636    if (dst->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5637        dst->mem_layout != PVR_MEMLAYOUT_TWIDDLED) {
5638       return false;
5639    }
5640 
5641    return true;
5642 }
5643 
pvr_3d_validate_addr(struct pvr_transfer_cmd * transfer_cmd)5644 static bool pvr_3d_validate_addr(struct pvr_transfer_cmd *transfer_cmd)
5645 {
5646    if (!pvr_supports_texel_unwind(transfer_cmd)) {
5647       return pvr_dev_addr_is_aligned(
5648          transfer_cmd->dst.dev_addr,
5649          ROGUE_PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT);
5650    }
5651 
5652    return true;
5653 }
5654 
5655 static void
pvr_submit_info_stream_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_prep_data * prep_data,struct pvr_winsys_transfer_cmd * cmd)5656 pvr_submit_info_stream_init(struct pvr_transfer_ctx *ctx,
5657                             struct pvr_transfer_prep_data *prep_data,
5658                             struct pvr_winsys_transfer_cmd *cmd)
5659 {
5660    const struct pvr_winsys_transfer_regs *const regs = &prep_data->state.regs;
5661    const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
5662    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
5663 
5664    uint32_t *stream_ptr = (uint32_t *)cmd->fw_stream;
5665    uint32_t *stream_len_ptr = stream_ptr;
5666 
5667    /* Leave space for stream header. */
5668    stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
5669 
5670    *(uint64_t *)stream_ptr = regs->pds_bgnd0_base;
5671    stream_ptr += pvr_cmd_length(CR_PDS_BGRND0_BASE);
5672 
5673    *(uint64_t *)stream_ptr = regs->pds_bgnd1_base;
5674    stream_ptr += pvr_cmd_length(CR_PDS_BGRND1_BASE);
5675 
5676    *(uint64_t *)stream_ptr = regs->pds_bgnd3_sizeinfo;
5677    stream_ptr += pvr_cmd_length(CR_PDS_BGRND3_SIZEINFO);
5678 
5679    *(uint64_t *)stream_ptr = regs->isp_mtile_base;
5680    stream_ptr += pvr_cmd_length(CR_ISP_MTILE_BASE);
5681 
5682    STATIC_ASSERT(ARRAY_SIZE(regs->pbe_wordx_mrty) == 9U);
5683    STATIC_ASSERT(sizeof(regs->pbe_wordx_mrty[0]) == sizeof(uint64_t));
5684    memcpy(stream_ptr, regs->pbe_wordx_mrty, sizeof(regs->pbe_wordx_mrty));
5685    stream_ptr += 9U * 2U;
5686 
5687    *stream_ptr = regs->isp_bgobjvals;
5688    stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
5689 
5690    *stream_ptr = regs->usc_pixel_output_ctrl;
5691    stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
5692 
5693    *stream_ptr = regs->usc_clear_register0;
5694    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5695 
5696    *stream_ptr = regs->usc_clear_register1;
5697    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5698 
5699    *stream_ptr = regs->usc_clear_register2;
5700    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5701 
5702    *stream_ptr = regs->usc_clear_register3;
5703    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5704 
5705    *stream_ptr = regs->isp_mtile_size;
5706    stream_ptr += pvr_cmd_length(CR_ISP_MTILE_SIZE);
5707 
5708    *stream_ptr = regs->isp_render_origin;
5709    stream_ptr += pvr_cmd_length(CR_ISP_RENDER_ORIGIN);
5710 
5711    *stream_ptr = regs->isp_ctl;
5712    stream_ptr += pvr_cmd_length(CR_ISP_CTL);
5713 
5714    *stream_ptr = regs->isp_aa;
5715    stream_ptr += pvr_cmd_length(CR_ISP_AA);
5716 
5717    *stream_ptr = regs->event_pixel_pds_info;
5718    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
5719 
5720    *stream_ptr = regs->event_pixel_pds_code;
5721    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_CODE);
5722 
5723    *stream_ptr = regs->event_pixel_pds_data;
5724    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
5725 
5726    *stream_ptr = regs->isp_render;
5727    stream_ptr += pvr_cmd_length(CR_ISP_RENDER);
5728 
5729    *stream_ptr = regs->isp_rgn;
5730    stream_ptr++;
5731 
5732    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
5733       *stream_ptr = regs->frag_screen;
5734       stream_ptr++;
5735    }
5736 
5737    cmd->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)cmd->fw_stream;
5738    assert(cmd->fw_stream_len <= ARRAY_SIZE(cmd->fw_stream));
5739 
5740    pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
5741       value.length = cmd->fw_stream_len;
5742    }
5743 }
5744 
5745 static void
pvr_submit_info_flags_init(const struct pvr_device_info * const dev_info,const struct pvr_transfer_prep_data * const prep_data,struct pvr_winsys_transfer_cmd_flags * flags)5746 pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
5747                            const struct pvr_transfer_prep_data *const prep_data,
5748                            struct pvr_winsys_transfer_cmd_flags *flags)
5749 {
5750    *flags = prep_data->flags;
5751    flags->use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support);
5752 }
5753 
pvr_transfer_job_ws_submit_info_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct pvr_winsys_transfer_submit_info * submit_info)5754 static void pvr_transfer_job_ws_submit_info_init(
5755    struct pvr_transfer_ctx *ctx,
5756    struct pvr_transfer_submit *submit,
5757    struct vk_sync *wait,
5758    struct pvr_winsys_transfer_submit_info *submit_info)
5759 {
5760    const struct pvr_device *const device = ctx->device;
5761    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
5762 
5763    submit_info->frame_num = device->global_queue_present_count;
5764    submit_info->job_num = device->global_cmd_buffer_submit_count;
5765    submit_info->wait = wait;
5766    submit_info->cmd_count = submit->prep_count;
5767 
5768    for (uint32_t i = 0U; i < submit->prep_count; i++) {
5769       struct pvr_winsys_transfer_cmd *const cmd = &submit_info->cmds[i];
5770       struct pvr_transfer_prep_data *prep_data = &submit->prep_array[i];
5771 
5772       pvr_submit_info_stream_init(ctx, prep_data, cmd);
5773       pvr_submit_info_flags_init(dev_info, prep_data, &cmd->flags);
5774    }
5775 }
5776 
pvr_submit_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct vk_sync * signal_sync)5777 static VkResult pvr_submit_transfer(struct pvr_transfer_ctx *ctx,
5778                                     struct pvr_transfer_submit *submit,
5779                                     struct vk_sync *wait,
5780                                     struct vk_sync *signal_sync)
5781 {
5782    struct pvr_winsys_transfer_submit_info submit_info;
5783 
5784    pvr_transfer_job_ws_submit_info_init(ctx, submit, wait, &submit_info);
5785 
5786    return ctx->device->ws->ops->transfer_submit(ctx->ws_ctx,
5787                                                 &submit_info,
5788                                                 &ctx->device->pdevice->dev_info,
5789                                                 signal_sync);
5790 }
5791 
pvr_queue_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct vk_sync * wait,struct vk_sync * signal_sync)5792 static VkResult pvr_queue_transfer(struct pvr_transfer_ctx *ctx,
5793                                    struct pvr_transfer_cmd *transfer_cmd,
5794                                    struct vk_sync *wait,
5795                                    struct vk_sync *signal_sync)
5796 {
5797    struct pvr_transfer_prep_data *prep_data = NULL;
5798    struct pvr_transfer_prep_data *prev_prep_data;
5799    struct pvr_transfer_submit submit = { 0U };
5800    bool finished = false;
5801    uint32_t pass = 0U;
5802    VkResult result;
5803 
5804    /* Transfer queue might decide to do a blit in multiple passes. When the
5805     * prepare doesn't set the finished flag this code will keep calling the
5806     * prepare with increasing pass. If queued transfers are submitted from
5807     * here we submit them straight away. That's why we only need a single
5808     * prepare for the blit rather then one for each pass. Otherwise we insert
5809     * each prepare into the prepare array. When the client does blit batching
5810     * and we split the blit into multiple passes each pass in each queued
5811     * transfer adds one more prepare. Thus the prepare array after 2
5812     * pvr_queue_transfer calls might look like:
5813     *
5814     * +------+------++-------+-------+-------+
5815     * |B0/P0 |B0/P1 || B1/P0 | B1/P1 | B1/P2 |
5816     * +------+------++-------+-------+-------+
5817     * F           S/U F                    S/U
5818     *
5819     * Bn/Pm : nth blit (queue transfer call) / mth prepare
5820     * F     : fence point
5821     * S/U   : update / server sync update point
5822     */
5823 
5824    while (!finished) {
5825       prev_prep_data = prep_data;
5826       prep_data = &submit.prep_array[submit.prep_count++];
5827 
5828       /* Clear down the memory before we write to this prep. */
5829       memset(prep_data, 0U, sizeof(*prep_data));
5830 
5831       if (pass == 0U) {
5832          if (!pvr_3d_validate_addr(transfer_cmd))
5833             return vk_error(ctx->device, VK_ERROR_FEATURE_NOT_PRESENT);
5834       } else {
5835          /* Transfer queue workarounds could use more than one pass with 3D
5836           * path.
5837           */
5838          prep_data->state = prev_prep_data->state;
5839       }
5840 
5841       if (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D) {
5842          result =
5843             pvr_3d_clip_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5844       } else {
5845          result =
5846             pvr_3d_copy_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5847       }
5848       if (result != VK_SUCCESS)
5849          return result;
5850 
5851       /* Submit if we have finished the blit or if we are out of prepares. */
5852       if (finished || submit.prep_count == ARRAY_SIZE(submit.prep_array)) {
5853          result = pvr_submit_transfer(ctx,
5854                                       &submit,
5855                                       wait,
5856                                       finished ? signal_sync : NULL);
5857          if (result != VK_SUCCESS)
5858             return result;
5859 
5860          /* Check if we need to reset prep_count. */
5861          if (submit.prep_count == ARRAY_SIZE(submit.prep_array))
5862             submit.prep_count = 0U;
5863       }
5864 
5865       pass++;
5866    }
5867 
5868    return VK_SUCCESS;
5869 }
5870 
pvr_transfer_job_submit(struct pvr_transfer_ctx * ctx,struct pvr_sub_cmd_transfer * sub_cmd,struct vk_sync * wait_sync,struct vk_sync * signal_sync)5871 VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
5872                                  struct pvr_sub_cmd_transfer *sub_cmd,
5873                                  struct vk_sync *wait_sync,
5874                                  struct vk_sync *signal_sync)
5875 {
5876    list_for_each_entry_safe (struct pvr_transfer_cmd,
5877                              transfer_cmd,
5878                              sub_cmd->transfer_cmds,
5879                              link) {
5880       /* The fw guarantees that any kick on the same context will be
5881        * synchronized in submission order. This means only the first kick must
5882        * wait, and only the last kick need signal.
5883        */
5884       struct vk_sync *first_cmd_wait_sync = NULL;
5885       struct vk_sync *last_cmd_signal_sync = NULL;
5886       VkResult result;
5887 
5888       if (list_first_entry(sub_cmd->transfer_cmds,
5889                            struct pvr_transfer_cmd,
5890                            link) == transfer_cmd) {
5891          first_cmd_wait_sync = wait_sync;
5892       }
5893 
5894       if (list_last_entry(sub_cmd->transfer_cmds,
5895                           struct pvr_transfer_cmd,
5896                           link) == transfer_cmd) {
5897          last_cmd_signal_sync = signal_sync;
5898       }
5899 
5900       result = pvr_queue_transfer(ctx,
5901                                   transfer_cmd,
5902                                   first_cmd_wait_sync,
5903                                   last_cmd_signal_sync);
5904       if (result != VK_SUCCESS)
5905          return result;
5906    }
5907 
5908    return VK_SUCCESS;
5909 }
5910