1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30
31 #include "pvr_csb.h"
32 #include "pvr_csb_enum_helpers.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_common.h"
35 #include "pvr_job_context.h"
36 #include "pvr_job_transfer.h"
37 #include "pvr_private.h"
38 #include "pvr_tex_state.h"
39 #include "pvr_transfer_frag_store.h"
40 #include "pvr_types.h"
41 #include "usc/pvr_uscgen.h"
42 #include "pvr_util.h"
43 #include "pvr_winsys.h"
44 #include "util/bitscan.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "util/u_math.h"
48 #define XXH_INLINE_ALL
49 #include "util/xxhash.h"
50 #include "vk_format.h"
51 #include "vk_log.h"
52 #include "vk_sync.h"
53
54 #define PVR_TRANSFER_MAX_PASSES 10U
55 #define PVR_TRANSFER_MAX_CLIP_RECTS 4U
56 #define PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT 16U
57 #define PVR_TRANSFER_MAX_CUSTOM_RECTS 3U
58
59 /* Number of triangles sent to the TSP per raster. */
60 #define PVR_TRANSFER_NUM_LAYERS 1U
61
62 #define PVR_MAX_WIDTH 16384
63 #define PVR_MAX_HEIGHT 16384
64
65 #define PVR_MAX_CLIP_SIZE(dev_info) \
66 (PVR_HAS_FEATURE(dev_info, screen_size8K) ? 8192U : 16384U)
67
68 enum pvr_paired_tiles {
69 PVR_PAIRED_TILES_NONE,
70 PVR_PAIRED_TILES_X,
71 PVR_PAIRED_TILES_Y
72 };
73
74 struct pvr_transfer_wa_source {
75 uint32_t src_offset;
76 uint32_t mapping_count;
77 struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
78 bool extend_height;
79 };
80
81 struct pvr_transfer_pass {
82 uint32_t dst_offset;
83
84 uint32_t source_count;
85 struct pvr_transfer_wa_source sources[PVR_TRANSFER_MAX_SOURCES];
86
87 uint32_t clip_rects_count;
88 VkRect2D clip_rects[PVR_TRANSFER_MAX_CLIP_RECTS];
89 };
90
91 /* Structure representing a layer iteration. */
92 struct pvr_transfer_custom_mapping {
93 bool double_stride;
94 uint32_t texel_unwind_src;
95 uint32_t texel_unwind_dst;
96 uint32_t texel_extend_src;
97 uint32_t texel_extend_dst;
98 uint32_t pass_count;
99 struct pvr_transfer_pass passes[PVR_TRANSFER_MAX_PASSES];
100 uint32_t max_clip_rects;
101 int32_t max_clip_size;
102 };
103
104 struct pvr_transfer_3d_iteration {
105 uint32_t texture_coords[12];
106 };
107
108 struct pvr_transfer_3d_state {
109 struct pvr_winsys_transfer_regs regs;
110
111 bool empty_dst;
112 bool down_scale;
113 /* Write all channels present in the dst from the USC even if those are
114 * constants.
115 */
116 bool dont_force_pbe;
117
118 /* The rate of the shader. */
119 uint32_t msaa_multiplier;
120 /* Top left corner of the render in ISP tiles. */
121 uint32_t origin_x_in_tiles;
122 /* Top left corner of the render in ISP tiles. */
123 uint32_t origin_y_in_tiles;
124 /* Width of the render in ISP tiles. */
125 uint32_t width_in_tiles;
126 /* Height of the render in ISP tiles. */
127 uint32_t height_in_tiles;
128
129 /* Width of a sample in registers (pixel partition width). */
130 uint32_t usc_pixel_width;
131
132 /* Properties of the USC shader. */
133 struct pvr_tq_shader_properties shader_props;
134
135 /* TODO: Use pvr_dev_addr_t of an offset type for these. */
136 uint32_t pds_shader_task_offset;
137 uint32_t tex_state_data_offset;
138 uint32_t uni_tex_code_offset;
139
140 uint32_t uniform_data_size;
141 uint32_t tex_state_data_size;
142 uint32_t usc_coeff_regs;
143
144 /* Pointer into the common store. */
145 uint32_t common_ptr;
146 /* Pointer into the dynamic constant reg buffer. */
147 uint32_t dynamic_const_reg_ptr;
148 /* Pointer into the USC constant reg buffer. */
149 uint32_t usc_const_reg_ptr;
150
151 uint32_t pds_coeff_task_offset;
152 uint32_t coeff_data_size;
153
154 /* Number of temporary 32bit registers used by PDS. */
155 uint32_t pds_temps;
156
157 struct pvr_transfer_custom_mapping custom_mapping;
158 uint32_t pass_idx;
159
160 enum pvr_filter filter[PVR_TRANSFER_MAX_SOURCES];
161 bool custom_filter;
162
163 enum pvr_paired_tiles pair_tiles;
164 };
165
166 struct pvr_transfer_prep_data {
167 struct pvr_winsys_transfer_cmd_flags flags;
168 struct pvr_transfer_3d_state state;
169 };
170
171 struct pvr_transfer_submit {
172 uint32_t prep_count;
173 struct pvr_transfer_prep_data
174 prep_array[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
175 };
176
pvr_pbe_src_format_raw(VkFormat format)177 static enum pvr_transfer_pbe_pixel_src pvr_pbe_src_format_raw(VkFormat format)
178 {
179 uint32_t bpp = vk_format_get_blocksizebits(format);
180
181 if (bpp <= 32U)
182 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
183 else if (bpp <= 64U)
184 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
185
186 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW128;
187 }
188
pvr_pbe_src_format_pick_depth(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)189 static VkResult pvr_pbe_src_format_pick_depth(
190 const VkFormat src_format,
191 const VkFormat dst_format,
192 enum pvr_transfer_pbe_pixel_src *const src_format_out)
193 {
194 if (dst_format != VK_FORMAT_D24_UNORM_S8_UINT)
195 return VK_ERROR_FORMAT_NOT_SUPPORTED;
196
197 switch (src_format) {
198 case VK_FORMAT_D24_UNORM_S8_UINT:
199 case VK_FORMAT_X8_D24_UNORM_PACK32:
200 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8;
201 break;
202
203 case VK_FORMAT_D32_SFLOAT:
204 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8;
205 break;
206
207 default:
208 return VK_ERROR_FORMAT_NOT_SUPPORTED;
209 }
210
211 return VK_SUCCESS;
212 }
213
pvr_pbe_src_format_pick_stencil(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)214 static VkResult pvr_pbe_src_format_pick_stencil(
215 const VkFormat src_format,
216 const VkFormat dst_format,
217 enum pvr_transfer_pbe_pixel_src *const src_format_out)
218 {
219 if ((src_format != VK_FORMAT_D24_UNORM_S8_UINT &&
220 src_format != VK_FORMAT_S8_UINT) ||
221 dst_format != VK_FORMAT_D24_UNORM_S8_UINT) {
222 return VK_ERROR_FORMAT_NOT_SUPPORTED;
223 }
224
225 if (src_format == VK_FORMAT_S8_UINT)
226 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8;
227 else
228 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8;
229
230 return VK_SUCCESS;
231 }
232
233 static VkResult
pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface * src,const enum pvr_filter filter,const VkFormat dst_format,const uint32_t flags,const bool down_scale,enum pvr_transfer_pbe_pixel_src * src_format_out)234 pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface *src,
235 const enum pvr_filter filter,
236 const VkFormat dst_format,
237 const uint32_t flags,
238 const bool down_scale,
239 enum pvr_transfer_pbe_pixel_src *src_format_out)
240 {
241 const VkFormat src_format = src->vk_format;
242
243 const bool src_depth = vk_format_has_depth(src_format);
244 const bool dst_depth = vk_format_has_depth(dst_format);
245 const bool src_stencil = vk_format_has_stencil(src_format);
246 const bool dst_stencil = vk_format_has_stencil(dst_format);
247
248 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
249 /* Merging, so destination should always have both. */
250 if (!dst_depth || !dst_stencil)
251 return VK_ERROR_FORMAT_NOT_SUPPORTED;
252
253 if (flags & PVR_TRANSFER_CMD_FLAGS_PICKD) {
254 return pvr_pbe_src_format_pick_depth(src_format,
255 dst_format,
256 src_format_out);
257 } else {
258 return pvr_pbe_src_format_pick_stencil(src_format,
259 dst_format,
260 src_format_out);
261 }
262 }
263
264 /* We can't invent channels out of nowhere. */
265 if ((dst_depth && !src_depth) || (dst_stencil && !src_stencil))
266 return VK_ERROR_FORMAT_NOT_SUPPORTED;
267
268 switch (dst_format) {
269 case VK_FORMAT_D16_UNORM:
270 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
271 return VK_ERROR_FORMAT_NOT_SUPPORTED;
272
273 if (!down_scale)
274 *src_format_out = pvr_pbe_src_format_raw(dst_format);
275 else
276 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
277
278 break;
279 case VK_FORMAT_D24_UNORM_S8_UINT:
280 switch (src_format) {
281 case VK_FORMAT_D24_UNORM_S8_UINT:
282 if (filter == PVR_FILTER_LINEAR)
283 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D24S8;
284 else
285 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
286
287 break;
288
289 /* D16_UNORM results in a 0.0->1.0 float from the TPU, the same as D32 */
290 case VK_FORMAT_D16_UNORM:
291 case VK_FORMAT_D32_SFLOAT:
292 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8;
293 break;
294
295 default:
296 if (filter == PVR_FILTER_LINEAR)
297 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D32S8;
298 else
299 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
300 }
301
302 break;
303
304 case VK_FORMAT_D32_SFLOAT:
305 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
306 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32;
307 else
308 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
309
310 break;
311
312 default:
313 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
314 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB;
315 else
316 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
317 }
318
319 return VK_SUCCESS;
320 }
321
322 /**
323 * How the PBE expects the output buffer for an RGBA space conversion.
324 */
325 static VkResult
pvr_pbe_src_format_normal(VkFormat src_format,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * src_format_out)326 pvr_pbe_src_format_normal(VkFormat src_format,
327 VkFormat dst_format,
328 bool down_scale,
329 bool dont_force_pbe,
330 enum pvr_transfer_pbe_pixel_src *src_format_out)
331 {
332 bool dst_signed = vk_format_is_sint(dst_format) ||
333 vk_format_is_snorm(dst_format);
334
335 if (vk_format_is_int(dst_format)) {
336 uint32_t red_width;
337 bool src_signed;
338 uint32_t count;
339
340 if (!vk_format_is_int(src_format))
341 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
342
343 src_signed = vk_format_is_sint(src_format);
344
345 red_width = vk_format_get_component_bits(dst_format,
346 UTIL_FORMAT_COLORSPACE_RGB,
347 0);
348
349 switch (red_width) {
350 case 8:
351 if (!src_signed && !dst_signed)
352 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU8888;
353 else if (src_signed && !dst_signed)
354 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU8888;
355 else if (!src_signed && dst_signed)
356 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US8888;
357 else
358 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS8888;
359
360 break;
361
362 case 10:
363 switch (dst_format) {
364 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
365 *src_format_out = src_signed ? PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102
366 : PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102;
367 break;
368
369 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
370 *src_format_out = src_signed
371 ? PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102
372 : PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102;
373 break;
374
375 default:
376 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
377 }
378 break;
379
380 case 16:
381 if (!src_signed && !dst_signed)
382 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
383 else if (src_signed && !dst_signed)
384 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16;
385 else if (!src_signed && dst_signed)
386 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US16S16;
387 else
388 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16;
389
390 break;
391
392 case 32:
393 if (dont_force_pbe) {
394 count = vk_format_get_blocksizebits(dst_format) / 32U;
395 } else {
396 count = pvr_vk_format_get_common_color_channel_count(src_format,
397 dst_format);
398 }
399
400 if (!src_signed && !dst_signed) {
401 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
402 : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
403 } else if (src_signed && !dst_signed) {
404 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32
405 : PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32;
406 } else if (!src_signed && dst_signed) {
407 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32
408 : PVR_TRANSFER_PBE_PIXEL_SRC_US32S32;
409 } else {
410 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
411 : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
412 }
413 break;
414
415 default:
416 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
417 }
418
419 } else if (vk_format_is_float(dst_format) ||
420 pvr_vk_format_is_fully_normalized(dst_format)) {
421 bool is_float = true;
422
423 if (!vk_format_is_float(src_format) &&
424 !pvr_vk_format_is_fully_normalized(src_format) &&
425 !vk_format_is_block_compressed(src_format)) {
426 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
427 }
428
429 if (pvr_vk_format_is_fully_normalized(dst_format)) {
430 uint32_t chan_width;
431
432 is_float = false;
433
434 /* Alpha only. */
435 switch (dst_format) {
436 case VK_FORMAT_D16_UNORM:
437 chan_width = 16;
438 break;
439
440 default:
441 chan_width =
442 vk_format_get_component_bits(dst_format,
443 UTIL_FORMAT_COLORSPACE_RGB,
444 0U);
445 break;
446 }
447
448 if (src_format == dst_format) {
449 switch (chan_width) {
450 case 16U:
451 if (down_scale) {
452 *src_format_out = dst_signed
453 ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
454 : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
455 } else {
456 *src_format_out = dst_signed
457 ? PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16
458 : PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
459 }
460 break;
461
462 case 32U:
463 *src_format_out = pvr_pbe_src_format_raw(dst_format);
464 break;
465 default:
466 is_float = true;
467 break;
468 }
469 } else {
470 switch (chan_width) {
471 case 16U:
472 *src_format_out = dst_signed
473 ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
474 : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
475 break;
476 default:
477 is_float = true;
478 break;
479 }
480 }
481 }
482
483 if (is_float) {
484 if (pvr_vk_format_has_32bit_component(dst_format)) {
485 uint32_t count;
486
487 if (dont_force_pbe) {
488 count = vk_format_get_blocksizebits(dst_format) / 32U;
489 } else {
490 count = pvr_vk_format_get_common_color_channel_count(src_format,
491 dst_format);
492 }
493
494 switch (count) {
495 case 1U:
496 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
497 break;
498 case 2U:
499 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X2;
500 break;
501 default:
502 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X4;
503 break;
504 }
505 } else {
506 if (dst_format == VK_FORMAT_B8G8R8A8_UNORM ||
507 dst_format == VK_FORMAT_R8G8B8A8_UNORM ||
508 dst_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32) {
509 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8;
510 } else {
511 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16F16;
512 }
513 }
514 }
515 } else {
516 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
517 }
518
519 return VK_SUCCESS;
520 }
521
522 static inline uint32_t
pvr_get_blit_flags(const struct pvr_transfer_cmd * transfer_cmd)523 pvr_get_blit_flags(const struct pvr_transfer_cmd *transfer_cmd)
524 {
525 return transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D
526 ? 0
527 : transfer_cmd->flags;
528 }
529
pvr_pbe_src_format(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_tq_shader_properties * prop)530 static VkResult pvr_pbe_src_format(struct pvr_transfer_cmd *transfer_cmd,
531 struct pvr_transfer_3d_state *state,
532 struct pvr_tq_shader_properties *prop)
533 {
534 struct pvr_tq_layer_properties *layer = &prop->layer_props;
535 const enum pvr_filter filter = transfer_cmd->source_count
536 ? transfer_cmd->sources[0].filter
537 : PVR_FILTER_POINT;
538 const uint32_t flags = transfer_cmd->flags;
539 VkFormat dst_format = transfer_cmd->dst.vk_format;
540 const struct pvr_transfer_cmd_surface *src;
541 VkFormat src_format;
542 bool down_scale;
543
544 if (transfer_cmd->source_count > 0) {
545 src = &transfer_cmd->sources[0].surface;
546 down_scale = transfer_cmd->sources[0].resolve_op == PVR_RESOLVE_BLEND &&
547 transfer_cmd->sources[0].surface.sample_count > 1U &&
548 transfer_cmd->dst.sample_count <= 1U;
549 } else {
550 src = &transfer_cmd->dst;
551 down_scale = false;
552 }
553
554 src_format = src->vk_format;
555
556 /* This has to come before the rest as S8 for instance is integer and
557 * signedness check fails on D24S8.
558 */
559 if (vk_format_is_depth_or_stencil(src_format) ||
560 vk_format_is_depth_or_stencil(dst_format) ||
561 flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
562 return pvr_pbe_src_format_ds(src,
563 filter,
564 dst_format,
565 flags,
566 down_scale,
567 &layer->pbe_format);
568 }
569
570 return pvr_pbe_src_format_normal(src_format,
571 dst_format,
572 down_scale,
573 state->dont_force_pbe,
574 &layer->pbe_format);
575 }
576
pvr_setup_hwbg_object(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state)577 static inline void pvr_setup_hwbg_object(const struct pvr_device_info *dev_info,
578 struct pvr_transfer_3d_state *state)
579 {
580 struct pvr_winsys_transfer_regs *regs = &state->regs;
581
582 pvr_csb_pack (®s->pds_bgnd0_base, CR_PDS_BGRND0_BASE, reg) {
583 reg.shader_addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
584 assert(pvr_dev_addr_is_aligned(
585 reg.shader_addr,
586 ROGUE_CR_PDS_BGRND0_BASE_SHADER_ADDR_ALIGNMENT));
587 reg.texunicode_addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
588 assert(pvr_dev_addr_is_aligned(
589 reg.texunicode_addr,
590 ROGUE_CR_PDS_BGRND0_BASE_TEXUNICODE_ADDR_ALIGNMENT));
591 }
592
593 pvr_csb_pack (®s->pds_bgnd1_base, CR_PDS_BGRND1_BASE, reg) {
594 reg.texturedata_addr = PVR_DEV_ADDR(state->tex_state_data_offset);
595 assert(pvr_dev_addr_is_aligned(
596 reg.texturedata_addr,
597 ROGUE_CR_PDS_BGRND1_BASE_TEXTUREDATA_ADDR_ALIGNMENT));
598 }
599
600 /* BGRND 2 not needed, background object PDS doesn't use uniform program. */
601
602 pvr_csb_pack (®s->pds_bgnd3_sizeinfo, CR_PDS_BGRND3_SIZEINFO, reg) {
603 reg.usc_sharedsize =
604 DIV_ROUND_UP(state->common_ptr,
605 ROGUE_CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE);
606
607 assert(!(state->uniform_data_size &
608 (ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE - 1)));
609 reg.pds_uniformsize =
610 state->uniform_data_size /
611 ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE;
612
613 assert(
614 !(state->tex_state_data_size &
615 (ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE - 1)));
616 reg.pds_texturestatesize =
617 state->tex_state_data_size /
618 ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE;
619
620 reg.pds_tempsize =
621 DIV_ROUND_UP(state->pds_temps,
622 ROGUE_CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE);
623 }
624 }
625
626 static inline bool
pvr_is_surface_aligned(pvr_dev_addr_t dev_addr,bool is_input,uint32_t bpp)627 pvr_is_surface_aligned(pvr_dev_addr_t dev_addr, bool is_input, uint32_t bpp)
628 {
629 /* 96 bpp is 32 bit granular. */
630 if (bpp == 64U || bpp == 128U) {
631 uint64_t mask = (uint64_t)((bpp >> 3U) - 1U);
632
633 if ((dev_addr.addr & mask) != 0ULL)
634 return false;
635 }
636
637 if (is_input) {
638 if ((dev_addr.addr &
639 (ROGUE_TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT - 1U)) !=
640 0ULL) {
641 return false;
642 }
643 } else {
644 if ((dev_addr.addr &
645 (ROGUE_PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT - 1U)) != 0ULL) {
646 return false;
647 }
648 }
649
650 return true;
651 }
652
653 static inline VkResult
pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface * surface,uint32_t load,bool is_input,uint32_t * width_out,uint32_t * height_out,uint32_t * stride_out,enum pvr_memlayout * mem_layout_out,pvr_dev_addr_t * dev_addr_out)654 pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface *surface,
655 uint32_t load,
656 bool is_input,
657 uint32_t *width_out,
658 uint32_t *height_out,
659 uint32_t *stride_out,
660 enum pvr_memlayout *mem_layout_out,
661 pvr_dev_addr_t *dev_addr_out)
662 {
663 const uint32_t bpp = vk_format_get_blocksizebits(surface->vk_format);
664 uint32_t unsigned_stride;
665
666 *mem_layout_out = surface->mem_layout;
667 *height_out = surface->height;
668 *width_out = surface->width;
669 *stride_out = surface->stride;
670 *dev_addr_out = surface->dev_addr;
671
672 if (surface->mem_layout != PVR_MEMLAYOUT_LINEAR &&
673 !pvr_is_surface_aligned(*dev_addr_out, is_input, bpp)) {
674 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
675 }
676
677 switch (surface->mem_layout) {
678 case PVR_MEMLAYOUT_LINEAR:
679 if (surface->stride == 0U)
680 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
681
682 unsigned_stride = *stride_out;
683
684 if (!pvr_is_surface_aligned(*dev_addr_out, is_input, bpp))
685 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
686
687 if (unsigned_stride < *width_out)
688 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
689
690 if (!is_input) {
691 if (unsigned_stride == 1U) {
692 /* Change the setup to twiddling as that doesn't hit the stride
693 * limit and twiddled == strided when 1px stride.
694 */
695 *mem_layout_out = PVR_MEMLAYOUT_TWIDDLED;
696 }
697 }
698
699 *stride_out = unsigned_stride;
700 break;
701
702 case PVR_MEMLAYOUT_TWIDDLED:
703 case PVR_MEMLAYOUT_3DTWIDDLED:
704 /* Ignoring stride value for twiddled/tiled surface. */
705 *stride_out = *width_out;
706 break;
707
708 default:
709 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
710 }
711
712 return VK_SUCCESS;
713 }
714
715 static VkResult
pvr_pbe_setup_codegen_defaults(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surface_params,struct pvr_pbe_render_params * render_params)716 pvr_pbe_setup_codegen_defaults(const struct pvr_device_info *dev_info,
717 const struct pvr_transfer_cmd *transfer_cmd,
718 struct pvr_transfer_3d_state *state,
719 struct pvr_pbe_surf_params *surface_params,
720 struct pvr_pbe_render_params *render_params)
721 {
722 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
723 const uint8_t *swizzle;
724 VkFormat format;
725 VkResult result;
726
727 switch (dst->vk_format) {
728 case VK_FORMAT_D24_UNORM_S8_UINT:
729 case VK_FORMAT_X8_D24_UNORM_PACK32:
730 format = VK_FORMAT_R32_UINT;
731 break;
732
733 default:
734 format = dst->vk_format;
735 break;
736 }
737
738 swizzle = pvr_get_format_swizzle(format);
739 memcpy(surface_params->swizzle, swizzle, sizeof(surface_params->swizzle));
740
741 pvr_pbe_get_src_format_and_gamma(format,
742 PVR_PBE_GAMMA_NONE,
743 false,
744 &surface_params->source_format,
745 &surface_params->gamma);
746
747 surface_params->is_normalized = pvr_vk_format_is_fully_normalized(format);
748 surface_params->pbe_packmode = pvr_get_pbe_packmode(format);
749 surface_params->nr_components = vk_format_get_nr_components(format);
750
751 result = pvr_mem_layout_spec(dst,
752 0U,
753 false,
754 &surface_params->width,
755 &surface_params->height,
756 &surface_params->stride,
757 &surface_params->mem_layout,
758 &surface_params->addr);
759 if (result != VK_SUCCESS)
760 return result;
761
762 surface_params->z_only_render = false;
763 surface_params->depth = dst->depth;
764 surface_params->down_scale = state->down_scale;
765
766 if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
767 render_params->slice = (uint32_t)MAX2(dst->z_position, 0.0f);
768 else
769 render_params->slice = 0U;
770
771 uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
772 uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
773
774 /* If the rectangle happens to be empty / off-screen we clip away
775 * everything.
776 */
777 if (state->empty_dst) {
778 render_params->min_x_clip = 2U * tile_size_x;
779 render_params->max_x_clip = 3U * tile_size_x;
780 render_params->min_y_clip = 2U * tile_size_y;
781 render_params->max_y_clip = 3U * tile_size_y;
782 state->origin_x_in_tiles = 0U;
783 state->origin_y_in_tiles = 0U;
784 state->height_in_tiles = 1U;
785 state->width_in_tiles = 1U;
786 } else {
787 const VkRect2D *scissor = &transfer_cmd->scissor;
788
789 /* Clamp */
790 render_params->min_x_clip =
791 MAX2(MIN2(scissor->offset.x, (int32_t)surface_params->width), 0U);
792 render_params->max_x_clip =
793 MAX2(MIN2(scissor->offset.x + scissor->extent.width,
794 (int32_t)surface_params->width),
795 0U) -
796 1U;
797
798 render_params->min_y_clip =
799 MAX2(MIN2(scissor->offset.y, surface_params->height), 0U);
800 render_params->max_y_clip =
801 MAX2(MIN2(scissor->offset.y + scissor->extent.height,
802 surface_params->height),
803 0U) -
804 1U;
805
806 if (state->custom_mapping.pass_count > 0U) {
807 struct pvr_transfer_pass *pass =
808 &state->custom_mapping.passes[state->pass_idx];
809
810 render_params->min_x_clip = (uint32_t)pass->clip_rects[0U].offset.x;
811 render_params->max_x_clip =
812 (uint32_t)(pass->clip_rects[0U].offset.x +
813 pass->clip_rects[0U].extent.width) -
814 1U;
815 render_params->min_y_clip = (uint32_t)pass->clip_rects[0U].offset.y;
816 render_params->max_y_clip =
817 (uint32_t)(pass->clip_rects[0U].offset.y +
818 pass->clip_rects[0U].extent.height) -
819 1U;
820 }
821
822 state->origin_x_in_tiles = render_params->min_x_clip / tile_size_x;
823 state->origin_y_in_tiles = render_params->min_y_clip / tile_size_y;
824 state->width_in_tiles =
825 (render_params->max_x_clip + tile_size_x) / tile_size_x;
826 state->height_in_tiles =
827 (render_params->max_y_clip + tile_size_y) / tile_size_y;
828
829 /* Be careful here as this isn't the same as ((max_x_clip -
830 * min_x_clip) + tile_size_x) >> tile_size_x.
831 */
832 state->width_in_tiles -= state->origin_x_in_tiles;
833 state->height_in_tiles -= state->origin_y_in_tiles;
834 }
835
836 render_params->source_start = PVR_PBE_STARTPOS_BIT0;
837 render_params->mrt_index = 0U;
838
839 return VK_SUCCESS;
840 }
841
842 static VkResult
pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface * dst,struct pvr_transfer_3d_state * state,uint32_t rt_idx,struct pvr_pbe_surf_params * surf_params,struct pvr_pbe_render_params * render_params)843 pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface *dst,
844 struct pvr_transfer_3d_state *state,
845 uint32_t rt_idx,
846 struct pvr_pbe_surf_params *surf_params,
847 struct pvr_pbe_render_params *render_params)
848 {
849 struct pvr_transfer_pass *pass;
850 VkRect2D *clip_rect;
851
852 render_params->mrt_index = rt_idx;
853
854 assert(rt_idx > 0 && rt_idx <= PVR_TRANSFER_MAX_RENDER_TARGETS);
855
856 if (state->custom_mapping.pass_count == 0)
857 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
858
859 pass = &state->custom_mapping.passes[state->pass_idx];
860
861 assert(rt_idx < PVR_TRANSFER_MAX_CUSTOM_RECTS);
862
863 clip_rect = &pass->clip_rects[rt_idx];
864
865 render_params->min_x_clip = (uint32_t)clip_rect->offset.x;
866 render_params->max_x_clip =
867 (uint32_t)clip_rect->offset.x + clip_rect->extent.width - 1U;
868 render_params->min_y_clip = (uint32_t)clip_rect->offset.y;
869 render_params->max_y_clip =
870 (uint32_t)clip_rect->offset.y + clip_rect->extent.height - 1U;
871
872 return VK_SUCCESS;
873 }
874
875 static uint32_t
pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)876 pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)
877 {
878 switch (pixel_format) {
879 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
880 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
881 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
882 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
883 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
884 case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
885 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
886 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
887 case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
888 case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
889 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
890 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
891 case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
892 case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
893 case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
894 case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
895 case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
896 case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
897 case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
898 return 1U;
899
900 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
901 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
902 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
903 case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
904 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
905 case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
906 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
907 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
908 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
909 case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
910 case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
911 case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
912 case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
913 case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
914 case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
915 case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
916 return 2U;
917
918 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
919 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
920 case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
921 case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
922 return 4U;
923
924 case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
925 default:
926 break;
927 }
928
929 return 0U;
930 }
931
pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surf_params)932 static void pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd *transfer_cmd,
933 struct pvr_transfer_3d_state *state,
934 struct pvr_pbe_surf_params *surf_params)
935 {
936 bool color_fill = !!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL);
937 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
938
939 const uint32_t pixel_size =
940 pvr_pbe_get_pixel_size(state->shader_props.layer_props.pbe_format);
941
942 state->usc_pixel_width = MAX2(pixel_size, 1U);
943
944 switch (dst->vk_format) {
945 case VK_FORMAT_X8_D24_UNORM_PACK32:
946 case VK_FORMAT_D24_UNORM_S8_UINT:
947 case VK_FORMAT_S8_UINT:
948 surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
949 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
950 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
951 surf_params->swizzle[3U] = PIPE_SWIZZLE_0;
952 break;
953
954 default: {
955 const uint32_t red_width =
956 vk_format_get_component_bits(dst->vk_format,
957 UTIL_FORMAT_COLORSPACE_RGB,
958 0U);
959
960 if (transfer_cmd->source_count > 0 &&
961 vk_format_is_alpha(dst->vk_format)) {
962 if (vk_format_has_alpha(transfer_cmd->sources[0].surface.vk_format)) {
963 /* Modify the destination format swizzle to always source from
964 * src0.
965 */
966 surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
967 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
968 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
969 surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
970 break;
971 }
972
973 /* Source format having no alpha channel still allocates 4 output
974 * buffer registers.
975 */
976 }
977
978 if (pvr_vk_format_is_fully_normalized(dst->vk_format)) {
979 if (color_fill &&
980 (dst->vk_format == VK_FORMAT_B8G8R8A8_UNORM ||
981 dst->vk_format == VK_FORMAT_R8G8B8A8_UNORM ||
982 dst->vk_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)) {
983 surf_params->source_format =
984 ROGUE_PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL;
985 } else if (state->shader_props.layer_props.pbe_format ==
986 PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8) {
987 surf_params->source_format =
988 ROGUE_PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL;
989 } else if (red_width <= 8U) {
990 surf_params->source_format =
991 ROGUE_PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL;
992 }
993 } else if (red_width == 32U && !state->dont_force_pbe) {
994 uint32_t count = 0U;
995
996 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
997 VkFormat src_format = transfer_cmd->sources[i].surface.vk_format;
998 uint32_t tmp;
999
1000 tmp = pvr_vk_format_get_common_color_channel_count(src_format,
1001 dst->vk_format);
1002
1003 count = MAX2(count, tmp);
1004 }
1005
1006 switch (count) {
1007 case 1U:
1008 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
1009 FALLTHROUGH;
1010 case 2U:
1011 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
1012 FALLTHROUGH;
1013 case 3U:
1014 surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
1015 break;
1016
1017 case 4U:
1018 default:
1019 break;
1020 }
1021 }
1022 break;
1023 }
1024 }
1025 }
1026
1027 /**
1028 * Calculates the required PBE byte mask based on the incoming transfer command.
1029 *
1030 * @param transfer_cmd the transfer command
1031 * @return the bytemask (active high disable mask)
1032 */
1033
pvr_pbe_byte_mask(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd)1034 static uint64_t pvr_pbe_byte_mask(const struct pvr_device_info *dev_info,
1035 const struct pvr_transfer_cmd *transfer_cmd)
1036 {
1037 uint32_t flags = pvr_get_blit_flags(transfer_cmd);
1038
1039 assert(PVR_HAS_ERN(dev_info, 42064));
1040
1041 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
1042 uint32_t mask = 0U;
1043
1044 switch (transfer_cmd->dst.vk_format) {
1045 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1046 mask = 0xF0F0F0F0U;
1047 break;
1048 case VK_FORMAT_D24_UNORM_S8_UINT:
1049 mask = 0x88888888U;
1050 break;
1051 default:
1052 break;
1053 }
1054
1055 if ((flags & PVR_TRANSFER_CMD_FLAGS_PICKD) == 0U)
1056 mask = ~mask;
1057
1058 return mask;
1059 }
1060
1061 /* The mask is as it was inactive on cores without the ERN. This keeps the
1062 * firmware agnostic to the feature.
1063 */
1064 return 0U;
1065 }
1066
pvr_pbe_setup_emit(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t rt_count,uint32_t * pbe_setup_words)1067 static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
1068 struct pvr_transfer_ctx *ctx,
1069 struct pvr_transfer_3d_state *state,
1070 uint32_t rt_count,
1071 uint32_t *pbe_setup_words)
1072 {
1073 struct pvr_device *const device = ctx->device;
1074 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1075
1076 struct pvr_winsys_transfer_regs *regs = &state->regs;
1077 struct pvr_pds_event_program program = {
1078 .emit_words = pbe_setup_words,
1079 .num_emit_word_pairs = rt_count,
1080 };
1081 struct pvr_pds_upload pds_upload;
1082 uint32_t staging_buffer_size;
1083 uint32_t *staging_buffer;
1084 pvr_dev_addr_t addr;
1085 VkResult result;
1086
1087 /* Precondition, make sure to use a valid index for ctx->usc_eot_bos. */
1088 assert(rt_count <= ARRAY_SIZE(ctx->usc_eot_bos));
1089 assert(rt_count > 0U);
1090
1091 addr.addr = ctx->usc_eot_bos[rt_count - 1U]->dev_addr.addr -
1092 device->heaps.usc_heap->base_addr.addr;
1093
1094 pvr_pds_setup_doutu(&program.task_control,
1095 addr.addr,
1096 0U,
1097 ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1098 false);
1099
1100 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1101
1102 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1103
1104 staging_buffer = vk_alloc(&device->vk.alloc,
1105 staging_buffer_size,
1106 8U,
1107 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1108 if (!staging_buffer)
1109 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1110
1111 pvr_pds_generate_pixel_event_data_segment(&program,
1112 staging_buffer,
1113 dev_info);
1114
1115 /* TODO: We can save some memory by generating a code segment for each
1116 * rt_count, which at the time of writing is a maximum of 3, in
1117 * pvr_setup_transfer_eot_shaders() when we setup the corresponding EOT
1118 * USC programs.
1119 */
1120 pvr_pds_generate_pixel_event_code_segment(&program,
1121 staging_buffer + program.data_size,
1122 dev_info);
1123
1124 result =
1125 pvr_cmd_buffer_upload_pds(transfer_cmd->cmd_buffer,
1126 staging_buffer,
1127 program.data_size,
1128 ROGUE_CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT,
1129 staging_buffer + program.data_size,
1130 program.code_size,
1131 ROGUE_CR_EVENT_PIXEL_PDS_CODE_ADDR_ALIGNMENT,
1132 ROGUE_CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT,
1133 &pds_upload);
1134 vk_free(&device->vk.alloc, staging_buffer);
1135 if (result != VK_SUCCESS)
1136 return result;
1137
1138 pvr_csb_pack (®s->event_pixel_pds_info, CR_EVENT_PIXEL_PDS_INFO, reg) {
1139 reg.temp_stride = 0U;
1140 reg.const_size =
1141 DIV_ROUND_UP(program.data_size,
1142 ROGUE_CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE);
1143 reg.usc_sr_size =
1144 DIV_ROUND_UP(rt_count * PVR_STATE_PBE_DWORDS,
1145 ROGUE_CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE);
1146 }
1147
1148 pvr_csb_pack (®s->event_pixel_pds_data, CR_EVENT_PIXEL_PDS_DATA, reg) {
1149 reg.addr = PVR_DEV_ADDR(pds_upload.data_offset);
1150 }
1151
1152 pvr_csb_pack (®s->event_pixel_pds_code, CR_EVENT_PIXEL_PDS_CODE, reg) {
1153 reg.addr = PVR_DEV_ADDR(pds_upload.code_offset);
1154 }
1155
1156 return VK_SUCCESS;
1157 }
1158
pvr_pbe_setup(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state)1159 static VkResult pvr_pbe_setup(const struct pvr_transfer_cmd *transfer_cmd,
1160 struct pvr_transfer_ctx *ctx,
1161 struct pvr_transfer_3d_state *state)
1162 {
1163 struct pvr_device *const device = ctx->device;
1164 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1165
1166 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1167 uint32_t num_rts = vk_format_get_plane_count(dst->vk_format);
1168 uint32_t pbe_setup_words[PVR_TRANSFER_MAX_RENDER_TARGETS *
1169 ROGUE_NUM_PBESTATE_STATE_WORDS];
1170 struct pvr_pbe_render_params render_params;
1171 struct pvr_pbe_surf_params surf_params;
1172 VkResult result;
1173
1174 if (state->custom_mapping.pass_count > 0U)
1175 num_rts = state->custom_mapping.passes[state->pass_idx].clip_rects_count;
1176
1177 if (PVR_HAS_FEATURE(dev_info, paired_tiles))
1178 state->pair_tiles = PVR_PAIRED_TILES_NONE;
1179
1180 for (uint32_t i = 0U; i < num_rts; i++) {
1181 uint64_t *pbe_regs;
1182 uint32_t *pbe_words;
1183
1184 /* Ensure the access into the pbe_wordx_mrty is made within its bounds. */
1185 assert(i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER <
1186 ARRAY_SIZE(state->regs.pbe_wordx_mrty));
1187 /* Ensure the access into pbe_setup_words is made within its bounds. */
1188 assert(i * ROGUE_NUM_PBESTATE_STATE_WORDS < ARRAY_SIZE(pbe_setup_words));
1189
1190 pbe_regs =
1191 &state->regs
1192 .pbe_wordx_mrty[i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER];
1193 pbe_words = &pbe_setup_words[i * ROGUE_NUM_PBESTATE_STATE_WORDS];
1194
1195 if (PVR_HAS_ERN(dev_info, 42064))
1196 pbe_regs[2U] = 0UL;
1197
1198 if (i == 0U) {
1199 result = pvr_pbe_setup_codegen_defaults(dev_info,
1200 transfer_cmd,
1201 state,
1202 &surf_params,
1203 &render_params);
1204 if (result != VK_SUCCESS)
1205 return result;
1206 } else {
1207 result = pvr_pbe_setup_modify_defaults(dst,
1208 state,
1209 i,
1210 &surf_params,
1211 &render_params);
1212 if (result != VK_SUCCESS)
1213 return result;
1214 }
1215
1216 pvr_pbe_setup_swizzle(transfer_cmd, state, &surf_params);
1217
1218 pvr_pbe_pack_state(dev_info,
1219 &surf_params,
1220 &render_params,
1221 pbe_words,
1222 pbe_regs);
1223
1224 if (PVR_HAS_ERN(dev_info, 42064)) {
1225 uint64_t temp_reg;
1226
1227 pvr_csb_pack (&temp_reg, PBESTATE_REG_WORD2, reg) {
1228 reg.sw_bytemask = pvr_pbe_byte_mask(dev_info, transfer_cmd);
1229 }
1230
1231 pbe_regs[2U] |= temp_reg;
1232 }
1233
1234 if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1235 if (pbe_regs[2U] &
1236 (1ULL << ROGUE_PBESTATE_REG_WORD2_PAIR_TILES_SHIFT)) {
1237 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED)
1238 state->pair_tiles = PVR_PAIRED_TILES_Y;
1239 else
1240 state->pair_tiles = PVR_PAIRED_TILES_X;
1241 }
1242 }
1243 }
1244
1245 result =
1246 pvr_pbe_setup_emit(transfer_cmd, ctx, state, num_rts, pbe_setup_words);
1247 if (result != VK_SUCCESS)
1248 return result;
1249
1250 /* Adjust tile origin and width to include all emits. */
1251 if (state->custom_mapping.pass_count > 0U) {
1252 const uint32_t tile_size_x =
1253 PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
1254 const uint32_t tile_size_y =
1255 PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
1256 struct pvr_transfer_pass *pass =
1257 &state->custom_mapping.passes[state->pass_idx];
1258 VkOffset2D offset = { 0U, 0U };
1259 VkOffset2D end = { 0U, 0U };
1260
1261 for (uint32_t i = 0U; i < pass->clip_rects_count; i++) {
1262 VkRect2D *rect = &pass->clip_rects[i];
1263
1264 offset.x = MIN2(offset.x, rect->offset.x);
1265 offset.y = MIN2(offset.y, rect->offset.y);
1266 end.x = MAX2(end.x, rect->offset.x + rect->extent.width);
1267 end.y = MAX2(end.y, rect->offset.y + rect->extent.height);
1268 }
1269
1270 state->origin_x_in_tiles = (uint32_t)offset.x / tile_size_x;
1271 state->origin_y_in_tiles = (uint32_t)offset.y / tile_size_y;
1272 state->width_in_tiles =
1273 DIV_ROUND_UP((uint32_t)end.x, tile_size_x) - state->origin_x_in_tiles;
1274 state->height_in_tiles =
1275 DIV_ROUND_UP((uint32_t)end.y, tile_size_y) - state->origin_y_in_tiles;
1276 }
1277
1278 return VK_SUCCESS;
1279 }
1280
1281 /**
1282 * Writes the ISP tile registers according to the MSAA state. Sets up the USC
1283 * pixel partition allocations and the number of tiles in flight.
1284 */
pvr_isp_tiles(const struct pvr_device * device,struct pvr_transfer_3d_state * state)1285 static VkResult pvr_isp_tiles(const struct pvr_device *device,
1286 struct pvr_transfer_3d_state *state)
1287 {
1288 const struct pvr_device_runtime_info *dev_runtime_info =
1289 &device->pdevice->dev_runtime_info;
1290 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1291 const uint32_t isp_samples =
1292 PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1U);
1293 uint32_t origin_x = state->origin_x_in_tiles;
1294 uint32_t origin_y = state->origin_y_in_tiles;
1295 uint32_t width = state->width_in_tiles;
1296 uint32_t height = state->height_in_tiles;
1297 uint32_t isp_tiles_in_flight;
1298
1299 /* msaa_multiplier is calculated by sample_count & ~1U. Given sample
1300 * count is always in powers of two, we can get the sample count from
1301 * msaa_multiplier using the following logic.
1302 */
1303 const uint32_t samples = MAX2(state->msaa_multiplier, 1U);
1304
1305 /* isp_samples_per_pixel feature is also know as "2x/4x for free", when
1306 * this is present SAMPLES_PER_PIXEL is 2/4, otherwise 1. The following
1307 * logic should end up with these numbers:
1308 *
1309 * |---------------------------------|
1310 * | 4 SAMPLES / ISP PIXEL |
1311 * |-----------------------+----+----|
1312 * | MSAA | X* | Y* |
1313 * | 2X | 1 | 1 |
1314 * | 4X | 1 | 1 |
1315 * |---------------------------------|
1316 * | 2 SAMPLES / ISP PIXEL |
1317 * |-----------------------+----+----|
1318 * | MSAA | X* | Y* |
1319 * | 2X | 1 | 1 |
1320 * | 4X | 1 | 2 |
1321 * | 8X | 2 | 2 |
1322 * |-----------------------+----+----|
1323 * | 1 SAMPLE / ISP PIXEL |
1324 * |-----------------------+----+----|
1325 * | MSAA | X* | Y* |
1326 * | 2X | 1 | 2 |
1327 * | 4X | 2 | 2 |
1328 * |-----------------------+----+----|
1329 */
1330
1331 origin_x <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1332 origin_y <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1333 (state->msaa_multiplier >> isp_samples)) &
1334 1U;
1335 width <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1336 height <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1337 (state->msaa_multiplier >> isp_samples)) &
1338 1U;
1339
1340 if (PVR_HAS_FEATURE(dev_info, paired_tiles) &&
1341 state->pair_tiles != PVR_PAIRED_TILES_NONE) {
1342 width = ALIGN_POT(width, 2U);
1343 height = ALIGN_POT(height, 2U);
1344 }
1345
1346 pvr_csb_pack (&state->regs.isp_mtile_size, CR_ISP_MTILE_SIZE, reg) {
1347 reg.x = width;
1348 reg.y = height;
1349 }
1350
1351 pvr_csb_pack (&state->regs.isp_render_origin, CR_ISP_RENDER_ORIGIN, reg) {
1352 reg.x = origin_x;
1353 reg.y = origin_y;
1354 }
1355
1356 pvr_setup_tiles_in_flight(dev_info,
1357 dev_runtime_info,
1358 pvr_cr_isp_aa_mode_type(samples),
1359 state->usc_pixel_width,
1360 state->pair_tiles != PVR_PAIRED_TILES_NONE,
1361 0,
1362 &isp_tiles_in_flight,
1363 &state->regs.usc_pixel_output_ctrl);
1364
1365 pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, reg) {
1366 reg.process_empty_tiles = true;
1367
1368 if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1369 if (state->pair_tiles == PVR_PAIRED_TILES_X) {
1370 reg.pair_tiles = true;
1371 } else if (state->pair_tiles == PVR_PAIRED_TILES_Y) {
1372 reg.pair_tiles = true;
1373 reg.pair_tiles_vert = true;
1374 }
1375 }
1376 }
1377
1378 state->regs.isp_ctl |= isp_tiles_in_flight;
1379
1380 return VK_SUCCESS;
1381 }
1382
1383 static bool
pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pbe_format)1384 pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info *dev_info,
1385 enum pvr_transfer_pbe_pixel_src pbe_format)
1386 {
1387 /* We don't emulate rate change from the USC with the pbe_yuv feature. */
1388 if (!PVR_HAS_FEATURE(dev_info, pbe_yuv) &&
1389 (pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED ||
1390 pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V)) {
1391 return true;
1392 }
1393
1394 return false;
1395 }
1396
1397 /**
1398 * Number of DWORDs from the unified store that floating texture coefficients
1399 * take up.
1400 */
pvr_uv_space(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)1401 static void pvr_uv_space(const struct pvr_device_info *dev_info,
1402 const struct pvr_transfer_cmd *transfer_cmd,
1403 struct pvr_transfer_3d_state *state)
1404 {
1405 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1406 const VkRect2D *dst_rect = &transfer_cmd->scissor;
1407
1408 /* This also avoids division by 0 in pvr_dma_texture_floats(). */
1409 if (state->custom_mapping.pass_count == 0U &&
1410 (dst_rect->extent.width == 0U || dst_rect->extent.height == 0U ||
1411 MAX2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) <
1412 0U ||
1413 MIN2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) >
1414 (int32_t)dst->width ||
1415 MAX2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) <
1416 0U ||
1417 MIN2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) >
1418 (int32_t)dst->height)) {
1419 state->empty_dst = true;
1420 } else {
1421 state->empty_dst = false;
1422
1423 if (transfer_cmd->source_count > 0) {
1424 struct pvr_tq_layer_properties *layer =
1425 &state->shader_props.layer_props;
1426
1427 const VkRect2D *src_rect =
1428 &transfer_cmd->sources[0U].mappings[0U].src_rect;
1429 const VkRect2D *dst_rect =
1430 &transfer_cmd->sources[0U].mappings[0U].dst_rect;
1431 int32_t dst_x1 = dst_rect->offset.x + dst_rect->extent.width;
1432 int32_t dst_y1 = dst_rect->offset.y + dst_rect->extent.height;
1433 int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
1434 int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
1435
1436 assert(transfer_cmd->source_count == 1);
1437
1438 if (state->filter[0U] > PVR_FILTER_POINT) {
1439 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1440 } else if (src_rect->extent.width == 0U ||
1441 src_rect->extent.height == 0U) {
1442 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1443 } else if ((src_rect->offset.x * dst_x1 !=
1444 src_x1 * dst_rect->offset.x) ||
1445 (src_rect->offset.y * dst_y1 !=
1446 src_y1 * dst_rect->offset.y) ||
1447 (src_rect->extent.width != dst_rect->extent.width) ||
1448 (src_rect->extent.height != dst_rect->extent.height) ||
1449 transfer_cmd->sources[0U].mappings[0U].flip_x ||
1450 transfer_cmd->sources[0U].mappings[0U].flip_y) {
1451 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1452 } else {
1453 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1454 }
1455
1456 /* We have to adjust the rate. */
1457 if (layer->layer_floats != PVR_INT_COORD_SET_FLOATS_0 &&
1458 pvr_int_pbe_pixel_changes_dst_rate(dev_info, layer->pbe_format)) {
1459 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_6;
1460 }
1461 }
1462 }
1463 }
1464
pvr_int_pbe_pixel_num_sampler_and_image_states(enum pvr_transfer_pbe_pixel_src pbe_format)1465 static uint32_t pvr_int_pbe_pixel_num_sampler_and_image_states(
1466 enum pvr_transfer_pbe_pixel_src pbe_format)
1467 {
1468 switch (pbe_format) {
1469 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1470 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1471 return 1U;
1472 default:
1473 return pvr_pbe_pixel_num_loads(pbe_format);
1474 }
1475 }
1476
pvr_sampler_state_for_surface(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_surface * surface,enum pvr_filter filter,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t sampler,uint32_t * mem_ptr)1477 static VkResult pvr_sampler_state_for_surface(
1478 const struct pvr_device_info *dev_info,
1479 const struct pvr_transfer_cmd_surface *surface,
1480 enum pvr_filter filter,
1481 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1482 uint32_t sampler,
1483 uint32_t *mem_ptr)
1484 {
1485 uint64_t sampler_state[2U] = { 0UL, 0UL };
1486
1487 pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
1488 reg.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
1489 reg.minlod = ROGUE_TEXSTATE_CLAMP_MIN;
1490 reg.maxlod = ROGUE_TEXSTATE_CLAMP_MIN;
1491 reg.dadjust = ROGUE_TEXSTATE_DADJUST_MIN_UINT;
1492
1493 if (filter == PVR_FILTER_DONTCARE || filter == PVR_FILTER_POINT) {
1494 reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
1495 reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1496 } else if (filter == PVR_FILTER_LINEAR) {
1497 reg.minfilter = ROGUE_TEXSTATE_FILTER_LINEAR;
1498 reg.magfilter = ROGUE_TEXSTATE_FILTER_LINEAR;
1499 } else {
1500 assert(PVR_HAS_FEATURE(dev_info, tf_bicubic_filter));
1501 reg.minfilter = ROGUE_TEXSTATE_FILTER_BICUBIC;
1502 reg.magfilter = ROGUE_TEXSTATE_FILTER_BICUBIC;
1503 }
1504
1505 reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1506 reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1507
1508 if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1509 reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1510 }
1511
1512 assert(sampler < PVR_TRANSFER_MAX_IMAGES);
1513
1514 assert(sampler <= sh_reg_layout->combined_image_samplers.count);
1515 mem_ptr += sh_reg_layout->combined_image_samplers.offsets[sampler].sampler;
1516
1517 memcpy(mem_ptr, sampler_state, sizeof(sampler_state));
1518
1519 return VK_SUCCESS;
1520 }
1521
pvr_image_state_set_codegen_defaults(struct pvr_device * device,struct pvr_transfer_3d_state * state,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint64_t * mem_ptr)1522 static inline VkResult pvr_image_state_set_codegen_defaults(
1523 struct pvr_device *device,
1524 struct pvr_transfer_3d_state *state,
1525 const struct pvr_transfer_cmd_surface *surface,
1526 uint32_t load,
1527 uint64_t *mem_ptr)
1528 {
1529 struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1530 struct pvr_texture_state_info info = { 0U };
1531 VkResult result;
1532
1533 switch (surface->vk_format) {
1534 /* ERN 46863 */
1535 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1536 switch (layer->pbe_format) {
1537 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
1538 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
1539 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1540 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1541 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1542 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
1543 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1544 info.format = VK_FORMAT_R32G32_UINT;
1545 break;
1546 default:
1547 break;
1548 }
1549 break;
1550
1551 case VK_FORMAT_D24_UNORM_S8_UINT:
1552 case VK_FORMAT_X8_D24_UNORM_PACK32:
1553 info.format = VK_FORMAT_R32_UINT;
1554 break;
1555
1556 default:
1557 info.format = surface->vk_format;
1558 break;
1559 }
1560
1561 info.flags = 0U;
1562 info.base_level = 0U;
1563 info.mip_levels = 1U;
1564 info.mipmaps_present = false;
1565 info.sample_count = MAX2(surface->sample_count, 1U);
1566
1567 if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1568 info.extent.depth = surface->depth;
1569 else
1570 info.extent.depth = 0U;
1571
1572 if (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
1573 info.array_size = 0U;
1574
1575 result = pvr_mem_layout_spec(surface,
1576 load,
1577 true,
1578 &info.extent.width,
1579 &info.extent.height,
1580 &info.stride,
1581 &info.mem_layout,
1582 &info.addr);
1583 if (result != VK_SUCCESS)
1584 return result;
1585
1586 if (state->custom_mapping.texel_extend_dst > 1U) {
1587 info.extent.width /= state->custom_mapping.texel_extend_dst;
1588 info.stride /= state->custom_mapping.texel_extend_dst;
1589 }
1590
1591 info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
1592 memcpy(info.swizzle,
1593 pvr_get_format_swizzle(info.format),
1594 sizeof(info.swizzle));
1595
1596 if (surface->vk_format == VK_FORMAT_S8_UINT) {
1597 info.swizzle[0U] = PIPE_SWIZZLE_X;
1598 info.swizzle[1U] = PIPE_SWIZZLE_0;
1599 info.swizzle[2U] = PIPE_SWIZZLE_0;
1600 info.swizzle[3U] = PIPE_SWIZZLE_0;
1601 }
1602
1603 if (info.extent.depth > 0U)
1604 info.type = VK_IMAGE_VIEW_TYPE_3D;
1605 else if (info.extent.height > 1U)
1606 info.type = VK_IMAGE_VIEW_TYPE_2D;
1607 else
1608 info.type = VK_IMAGE_VIEW_TYPE_1D;
1609
1610 result = pvr_pack_tex_state(device, &info, mem_ptr);
1611 if (result != VK_SUCCESS)
1612 return result;
1613
1614 return VK_SUCCESS;
1615 }
1616
pvr_image_state_for_surface(const struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint32_t source,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t uf_image,uint32_t * mem_ptr)1617 static VkResult pvr_image_state_for_surface(
1618 const struct pvr_transfer_ctx *ctx,
1619 const struct pvr_transfer_cmd *transfer_cmd,
1620 const struct pvr_transfer_cmd_surface *surface,
1621 uint32_t load,
1622 uint32_t source,
1623 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1624 struct pvr_transfer_3d_state *state,
1625 uint32_t uf_image,
1626 uint32_t *mem_ptr)
1627 {
1628 uint32_t tex_state[ROGUE_MAXIMUM_IMAGE_STATE_SIZE] = { 0U };
1629 VkResult result;
1630 uint8_t offset;
1631
1632 result = pvr_image_state_set_codegen_defaults(ctx->device,
1633 state,
1634 surface,
1635 load,
1636 (uint64_t *)tex_state);
1637 if (result != VK_SUCCESS)
1638 return result;
1639
1640 assert(uf_image < PVR_TRANSFER_MAX_IMAGES);
1641
1642 /* Offset of the shared registers containing the hardware image state. */
1643 assert(uf_image < sh_reg_layout->combined_image_samplers.count);
1644 offset = sh_reg_layout->combined_image_samplers.offsets[uf_image].image;
1645
1646 /* Copy the image state to the buffer which is loaded into the shared
1647 * registers.
1648 */
1649 memcpy(mem_ptr + offset, tex_state, sizeof(tex_state));
1650
1651 return VK_SUCCESS;
1652 }
1653
1654 /* Writes the texture state/sampler state into DMAed memory. */
1655 static VkResult
pvr_sampler_image_state(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t * mem_ptr)1656 pvr_sampler_image_state(struct pvr_transfer_ctx *ctx,
1657 const struct pvr_transfer_cmd *transfer_cmd,
1658 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1659 struct pvr_transfer_3d_state *state,
1660 uint32_t *mem_ptr)
1661 {
1662 if (!state->empty_dst) {
1663 uint32_t uf_sampler = 0U;
1664 uint32_t uf_image = 0U;
1665
1666 for (uint32_t source = 0; source < transfer_cmd->source_count; source++) {
1667 struct pvr_tq_layer_properties *layer =
1668 &state->shader_props.layer_props;
1669 uint32_t max_load = pvr_pbe_pixel_num_loads(layer->pbe_format);
1670
1671 for (uint32_t load = 0U; load < max_load; load++) {
1672 const struct pvr_transfer_cmd_surface *surface;
1673 enum pvr_filter filter;
1674 VkResult result;
1675
1676 switch (layer->pbe_format) {
1677 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
1678 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
1679 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1680 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1681 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1682 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1683 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1684 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1685 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1686 case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
1687 if (load > 0U) {
1688 surface = &transfer_cmd->dst;
1689 filter = transfer_cmd->sources[source].filter;
1690 } else {
1691 surface = &transfer_cmd->sources[source].surface;
1692 filter = state->filter[source];
1693 }
1694 break;
1695
1696 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1697 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1698 surface = &transfer_cmd->sources[source].surface;
1699 filter = state->filter[source];
1700 break;
1701
1702 default:
1703 surface = &transfer_cmd->sources[source + load].surface;
1704 filter = state->filter[source + load];
1705 break;
1706 }
1707
1708 if (load < pvr_int_pbe_pixel_num_sampler_and_image_states(
1709 layer->pbe_format)) {
1710 const struct pvr_device_info *dev_info =
1711 &transfer_cmd->cmd_buffer->device->pdevice->dev_info;
1712
1713 result = pvr_sampler_state_for_surface(dev_info,
1714 surface,
1715 filter,
1716 sh_reg_layout,
1717 uf_sampler,
1718 mem_ptr);
1719 if (result != VK_SUCCESS)
1720 return result;
1721
1722 uf_sampler++;
1723
1724 result = pvr_image_state_for_surface(ctx,
1725 transfer_cmd,
1726 surface,
1727 load,
1728 source,
1729 sh_reg_layout,
1730 state,
1731 uf_image,
1732 mem_ptr);
1733 if (result != VK_SUCCESS)
1734 return result;
1735
1736 uf_image++;
1737 }
1738 }
1739 }
1740 }
1741
1742 return VK_SUCCESS;
1743 }
1744
1745 /* The returned offset is in dwords. */
pvr_dynamic_const_reg_advance(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state)1746 static inline uint32_t pvr_dynamic_const_reg_advance(
1747 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1748 struct pvr_transfer_3d_state *state)
1749 {
1750 const uint32_t offset = sh_reg_layout->dynamic_consts.offset;
1751
1752 assert(state->dynamic_const_reg_ptr < sh_reg_layout->dynamic_consts.count);
1753
1754 return offset + state->dynamic_const_reg_ptr++;
1755 }
1756
1757 /** Scales coefficients for sampling. (non normalized). */
1758 static inline void
pvr_dma_texture_floats(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)1759 pvr_dma_texture_floats(const struct pvr_transfer_cmd *transfer_cmd,
1760 struct pvr_transfer_3d_state *state,
1761 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1762 uint32_t *mem_ptr)
1763
1764 {
1765 if (transfer_cmd->source_count > 0) {
1766 struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1767 const struct pvr_rect_mapping *mapping =
1768 &transfer_cmd->sources[0].mappings[0U];
1769 VkRect2D src_rect = mapping->src_rect;
1770 VkRect2D dst_rect = mapping->dst_rect;
1771
1772 switch (layer->layer_floats) {
1773 case PVR_INT_COORD_SET_FLOATS_0:
1774 break;
1775
1776 case PVR_INT_COORD_SET_FLOATS_6:
1777 case PVR_INT_COORD_SET_FLOATS_4: {
1778 int32_t consts[2U] = { 0U, 0U };
1779 int32_t denom[2U] = { 0U, 0U };
1780 int32_t nums[2U] = { 0U, 0U };
1781 int32_t src_x, dst_x;
1782 int32_t src_y, dst_y;
1783 float offset = 0.0f;
1784 float tmp;
1785
1786 dst_x = mapping->flip_x ? -(int32_t)dst_rect.extent.width
1787 : dst_rect.extent.width;
1788 dst_y = mapping->flip_y ? -(int32_t)dst_rect.extent.height
1789 : dst_rect.extent.height;
1790 src_x = src_rect.extent.width;
1791 src_y = src_rect.extent.height;
1792
1793 nums[0U] = src_x;
1794 denom[0U] = dst_x;
1795 consts[0U] =
1796 mapping->flip_x
1797 ? src_rect.offset.x * dst_x -
1798 src_x * (dst_rect.offset.x + dst_rect.extent.width)
1799 : src_rect.offset.x * dst_x - src_x * dst_rect.offset.x;
1800 nums[1U] = src_y;
1801 denom[1U] = dst_y;
1802 consts[1U] =
1803 mapping->flip_y
1804 ? src_rect.offset.y * dst_y -
1805 src_y * (dst_rect.offset.y + dst_rect.extent.height)
1806 : src_rect.offset.y * dst_y - src_y * dst_rect.offset.y;
1807
1808 for (uint32_t i = 0U; i < 2U; i++) {
1809 tmp = (float)(nums[i]) / (float)(denom[i]);
1810 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1811 fui(tmp);
1812
1813 tmp = ((float)(consts[i]) + (i == 1U ? offset : 0.0f)) /
1814 (float)(denom[i]);
1815 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1816 fui(tmp);
1817 }
1818
1819 if (layer->layer_floats == PVR_INT_COORD_SET_FLOATS_6) {
1820 tmp = (float)MIN2(dst_rect.offset.x, dst_rect.offset.x + dst_x);
1821 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1822 fui(tmp);
1823
1824 tmp = (float)MIN2(dst_rect.offset.y, dst_rect.offset.y + dst_y);
1825 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1826 fui(tmp);
1827 }
1828 break;
1829 }
1830
1831 default:
1832 unreachable("Unknown COORD_SET_FLOATS.");
1833 break;
1834 }
1835 }
1836 }
1837
pvr_int_pbe_pixel_requires_usc_filter(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pixel_format)1838 static bool pvr_int_pbe_pixel_requires_usc_filter(
1839 const struct pvr_device_info *dev_info,
1840 enum pvr_transfer_pbe_pixel_src pixel_format)
1841 {
1842 switch (pixel_format) {
1843 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1844 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1845 case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
1846 case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
1847 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
1848 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
1849 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
1850 return true;
1851 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1852 return !PVR_HAS_FEATURE(dev_info, pbe_filterable_f16);
1853 default:
1854 return false;
1855 }
1856 }
1857
1858 /**
1859 * Sets up the MSAA related bits in the operation
1860 *
1861 * TPU sample count is read directly from transfer_cmd in the TPU code. An MSAA
1862 * src can be read from sample rate or instance rate shaders as long as the
1863 * sample count is set on the TPU. If a layer is single sample we expect the
1864 * same sample replicated in full rate shaders. If the layer is multi sample,
1865 * instance rate shaders are used to emulate the filter or to select the
1866 * specified sample. The sample number is static in the programs.
1867 */
pvr_msaa_state(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,uint32_t source)1868 static VkResult pvr_msaa_state(const struct pvr_device_info *dev_info,
1869 const struct pvr_transfer_cmd *transfer_cmd,
1870 struct pvr_transfer_3d_state *state,
1871 uint32_t source)
1872 {
1873 struct pvr_tq_shader_properties *shader_props = &state->shader_props;
1874 struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
1875 struct pvr_winsys_transfer_regs *const regs = &state->regs;
1876 uint32_t src_sample_count =
1877 transfer_cmd->sources[source].surface.sample_count & ~1U;
1878 uint32_t dst_sample_count = transfer_cmd->dst.sample_count & ~1U;
1879 uint32_t bsample_count = 0U;
1880
1881 shader_props->full_rate = false;
1882 state->msaa_multiplier = 1U;
1883 state->down_scale = false;
1884
1885 /* clang-format off */
1886 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg);
1887 /* clang-format on */
1888
1889 layer->sample_count = 1U;
1890 layer->resolve_op = PVR_RESOLVE_BLEND;
1891
1892 bsample_count |= src_sample_count | dst_sample_count;
1893
1894 if (bsample_count > PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 0U))
1895 return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1896
1897 /* Shouldn't get two distinct bits set (implies different sample counts).
1898 * The reason being the rate at which the shader runs has to match.
1899 */
1900 if ((bsample_count & (bsample_count - 1U)) != 0U)
1901 return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1902
1903 if (src_sample_count == 0U && dst_sample_count == 0U) {
1904 /* S -> S (no MSAA involved). */
1905 layer->msaa = false;
1906 } else if (src_sample_count != 0U && dst_sample_count == 0U) {
1907 /* M -> S (resolve). */
1908 layer->resolve_op = transfer_cmd->sources[source].resolve_op;
1909
1910 if ((uint32_t)layer->resolve_op >=
1911 (src_sample_count + (uint32_t)PVR_RESOLVE_SAMPLE0)) {
1912 return vk_error(transfer_cmd->cmd_buffer,
1913 VK_ERROR_FORMAT_NOT_SUPPORTED);
1914 }
1915
1916 layer->msaa = true;
1917
1918 switch (layer->resolve_op) {
1919 case PVR_RESOLVE_MIN:
1920 case PVR_RESOLVE_MAX:
1921 switch (transfer_cmd->sources[source].surface.vk_format) {
1922 case VK_FORMAT_D32_SFLOAT:
1923 case VK_FORMAT_D16_UNORM:
1924 case VK_FORMAT_S8_UINT:
1925 case VK_FORMAT_D24_UNORM_S8_UINT:
1926 case VK_FORMAT_X8_D24_UNORM_PACK32:
1927 if (transfer_cmd->sources[source].surface.vk_format !=
1928 transfer_cmd->dst.vk_format) {
1929 return vk_error(transfer_cmd->cmd_buffer,
1930 VK_ERROR_FORMAT_NOT_SUPPORTED);
1931 }
1932 break;
1933
1934 default:
1935 return vk_error(transfer_cmd->cmd_buffer,
1936 VK_ERROR_FORMAT_NOT_SUPPORTED);
1937 }
1938
1939 /* Instance rate. */
1940 layer->sample_count = src_sample_count;
1941 state->shader_props.full_rate = false;
1942 break;
1943
1944 case PVR_RESOLVE_BLEND:
1945 if (pvr_int_pbe_pixel_requires_usc_filter(dev_info,
1946 layer->pbe_format)) {
1947 /* Instance rate. */
1948 layer->sample_count = src_sample_count;
1949 state->shader_props.full_rate = false;
1950 } else {
1951 /* Sample rate. */
1952 state->shader_props.full_rate = true;
1953 state->msaa_multiplier = src_sample_count;
1954 state->down_scale = true;
1955
1956 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
1957 reg.mode = pvr_cr_isp_aa_mode_type(src_sample_count);
1958 }
1959 }
1960 break;
1961
1962 default:
1963 /* Shader doesn't have to know the number of samples. It's enough
1964 * if the TPU knows, and the shader sets the right sno (given to the
1965 * shader in resolve_op).
1966 */
1967 state->shader_props.full_rate = false;
1968 break;
1969 }
1970 } else {
1971 state->msaa_multiplier = dst_sample_count;
1972
1973 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
1974 reg.mode = pvr_cr_isp_aa_mode_type(dst_sample_count);
1975 }
1976
1977 if (src_sample_count == 0U && dst_sample_count != 0U) {
1978 /* S -> M (replicate samples) */
1979 layer->msaa = false;
1980 state->shader_props.full_rate = !state->shader_props.iterated;
1981 } else {
1982 /* M -> M (sample to sample) */
1983 layer->msaa = true;
1984 state->shader_props.full_rate = true;
1985 }
1986 }
1987
1988 return VK_SUCCESS;
1989 }
1990
pvr_requires_usc_linear_filter(VkFormat format)1991 static bool pvr_requires_usc_linear_filter(VkFormat format)
1992 {
1993 switch (format) {
1994 case VK_FORMAT_R32_SFLOAT:
1995 case VK_FORMAT_R32G32_SFLOAT:
1996 case VK_FORMAT_R32G32B32_SFLOAT:
1997 case VK_FORMAT_R32G32B32A32_SFLOAT:
1998 case VK_FORMAT_D32_SFLOAT:
1999 case VK_FORMAT_D24_UNORM_S8_UINT:
2000 case VK_FORMAT_X8_D24_UNORM_PACK32:
2001 return true;
2002 default:
2003 return false;
2004 }
2005 }
2006
2007 static inline bool
pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,bool sample,bool msaa,bool full_rate)2008 pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,
2009 bool sample,
2010 bool msaa,
2011 bool full_rate)
2012 {
2013 if (sample || msaa || full_rate)
2014 return false;
2015
2016 switch (pbe_format) {
2017 case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
2018 case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
2019 case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
2020 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
2021 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
2022 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
2023 return true;
2024 default:
2025 return false;
2026 }
2027 }
2028
pvr_pick_component_needed(const struct pvr_transfer_custom_mapping * custom_mapping)2029 static inline bool pvr_pick_component_needed(
2030 const struct pvr_transfer_custom_mapping *custom_mapping)
2031 {
2032 return custom_mapping->pass_count > 0U &&
2033 custom_mapping->texel_extend_dst > 1U &&
2034 custom_mapping->texel_extend_src <= 1U;
2035 }
2036
2037 /** Writes the shader related constants into the DMA space. */
2038 static void
pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * dma_space)2039 pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2040 uint32_t *dma_space)
2041 {
2042 const uint32_t reg = sh_reg_layout->driver_total;
2043 const uint32_t consts_count =
2044 sh_reg_layout->compiler_out.usc_constants.count;
2045
2046 /* If not we likely need to write more consts. */
2047 assert(consts_count == sh_reg_layout->compiler_out_total);
2048
2049 /* Append the usc consts after the driver allocated regs. */
2050 for (uint32_t i = 0U; i < consts_count; i++)
2051 dma_space[reg + i] = sh_reg_layout->compiler_out.usc_constants.values[i];
2052 }
2053
2054 static inline void
pvr_dma_texel_unwind(struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)2055 pvr_dma_texel_unwind(struct pvr_transfer_3d_state *state,
2056 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2057 uint32_t *mem_ptr)
2058
2059 {
2060 const uint32_t coord_sample_mask =
2061 state->custom_mapping.texel_extend_dst - 1U;
2062
2063 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2064 coord_sample_mask;
2065 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2066 state->custom_mapping.texel_unwind_dst;
2067 }
2068
2069 /** Writes the Uniform/Texture state data segments + the UniTex code. */
2070 static inline VkResult
pvr_pds_unitex(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_pds_pixel_shader_sa_program * program,struct pvr_transfer_prep_data * prep_data)2071 pvr_pds_unitex(const struct pvr_device_info *dev_info,
2072 struct pvr_transfer_ctx *ctx,
2073 const struct pvr_transfer_cmd *transfer_cmd,
2074 struct pvr_pds_pixel_shader_sa_program *program,
2075 struct pvr_transfer_prep_data *prep_data)
2076 {
2077 struct pvr_pds_upload *unitex_code =
2078 &ctx->pds_unitex_code[program->num_texture_dma_kicks]
2079 [program->num_uniform_dma_kicks];
2080 struct pvr_transfer_3d_state *state = &prep_data->state;
2081 struct pvr_suballoc_bo *pvr_bo;
2082 VkResult result;
2083 void *map;
2084
2085 /* Uniform program is not used. */
2086 assert(program->num_uniform_dma_kicks == 0U);
2087
2088 if (program->num_texture_dma_kicks == 0U) {
2089 state->uniform_data_size = 0U;
2090 state->tex_state_data_size = 0U;
2091 state->tex_state_data_offset = 0U;
2092 state->uni_tex_code_offset = 0U;
2093
2094 return VK_SUCCESS;
2095 }
2096
2097 pvr_pds_set_sizes_pixel_shader_sa_uniform_data(program, dev_info);
2098 assert(program->data_size == 0U);
2099 state->uniform_data_size = 0U;
2100
2101 pvr_pds_set_sizes_pixel_shader_sa_texture_data(program, dev_info);
2102 state->tex_state_data_size =
2103 ALIGN_POT(program->data_size,
2104 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
2105
2106 result =
2107 pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2108 ctx->device->heaps.pds_heap,
2109 PVR_DW_TO_BYTES(state->tex_state_data_size),
2110 &pvr_bo);
2111 if (result != VK_SUCCESS)
2112 return result;
2113
2114 state->tex_state_data_offset =
2115 pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
2116
2117 map = pvr_bo_suballoc_get_map_addr(pvr_bo);
2118 pvr_pds_generate_pixel_shader_sa_texture_state_data(program, map, dev_info);
2119
2120 /* Save the dev_addr and size in the 3D state. */
2121 state->uni_tex_code_offset = unitex_code->code_offset;
2122 state->pds_temps = program->temps_used;
2123
2124 return VK_SUCCESS;
2125 }
2126
2127 /** Converts a float in range 0 to 1 to an N-bit fixed-point integer. */
pvr_float_to_ufixed(float value,uint32_t bits)2128 static uint32_t pvr_float_to_ufixed(float value, uint32_t bits)
2129 {
2130 uint32_t max = (1U << bits) - 1U;
2131
2132 /* NaN and Inf and overflow. */
2133 if (util_is_inf_or_nan(value) || value >= 1.0f)
2134 return max;
2135 else if (value < 0.0f)
2136 return 0U;
2137
2138 /* Normalise. */
2139 value = value * (float)max;
2140
2141 /* Cast to double so that we can accurately represent the sum for N > 23. */
2142 return (uint32_t)floor((double)value + 0.5f);
2143 }
2144
2145 /** Converts a float in range -1 to 1 to a signed N-bit fixed-point integer. */
pvr_float_to_sfixed(float value,uint32_t N)2146 static uint32_t pvr_float_to_sfixed(float value, uint32_t N)
2147 {
2148 int32_t max = (1 << (N - 1)) - 1;
2149 int32_t min = 0 - (1 << (N - 1));
2150 union fi x;
2151
2152 /* NaN and Inf and overflow. */
2153 if (util_is_inf_or_nan(value) || value >= 1.0f)
2154 return (uint32_t)max;
2155 else if (value == 0.0f)
2156 return 0U;
2157 else if (value <= -1.0f)
2158 return (uint32_t)min;
2159
2160 /* Normalise. */
2161 value *= (float)max;
2162
2163 /* Cast to double so that we can accurately represent the sum for N > 23. */
2164 if (value > 0.0f)
2165 x.i = (int32_t)floor((double)value + 0.5f);
2166 else
2167 x.i = (int32_t)floor((double)value - 0.5f);
2168
2169 return x.ui;
2170 }
2171
2172 /** Convert a value in IEEE single precision format to 16-bit floating point
2173 * format.
2174 */
2175 /* TODO: See if we can use _mesa_float_to_float16_rtz_slow() instead. */
pvr_float_to_f16(float value,bool round_to_even)2176 static uint16_t pvr_float_to_f16(float value, bool round_to_even)
2177 {
2178 uint32_t input_value;
2179 uint32_t exponent;
2180 uint32_t mantissa;
2181 uint16_t output;
2182
2183 /* 0.0f can be exactly expressed in binary using IEEE float format. */
2184 if (value == 0.0f)
2185 return 0U;
2186
2187 if (value < 0U) {
2188 output = 0x8000;
2189 value = -value;
2190 } else {
2191 output = 0U;
2192 }
2193
2194 /* 2^16 * (2 - 1/1024) = highest f16 representable value. */
2195 value = MIN2(value, 131008);
2196 input_value = fui(value);
2197
2198 /* Extract the exponent and mantissa. */
2199 exponent = util_get_float32_exponent(value) + 15;
2200 mantissa = input_value & ((1 << 23) - 1);
2201
2202 /* If the exponent is outside the supported range then denormalise the
2203 * mantissa.
2204 */
2205 if ((int32_t)exponent <= 0) {
2206 uint32_t shift;
2207
2208 mantissa |= (1 << 23);
2209 exponent = input_value >> 23;
2210 shift = -14 + 127 - exponent;
2211
2212 if (shift < 24)
2213 mantissa >>= shift;
2214 else
2215 mantissa = 0;
2216 } else {
2217 output = (uint16_t)(output | ((exponent << 10) & 0x7C00));
2218 }
2219
2220 output = (uint16_t)(output | (((mantissa >> 13) << 0) & 0x03FF));
2221
2222 if (round_to_even) {
2223 /* Round to nearest even. */
2224 if ((((int)value) % 2 != 0) && (((1 << 13) - 1) & mantissa))
2225 output++;
2226 } else {
2227 /* Round to nearest. */
2228 if (mantissa & (1 << 12))
2229 output++;
2230 }
2231
2232 return output;
2233 }
2234
pvr_pack_clear_color(VkFormat format,const union fi color[static4],uint32_t pkd_color[static4])2235 static VkResult pvr_pack_clear_color(VkFormat format,
2236 const union fi color[static 4],
2237 uint32_t pkd_color[static 4])
2238 {
2239 const uint32_t red_width =
2240 vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0U);
2241 uint32_t pbe_pack_mode = pvr_get_pbe_packmode(format);
2242 const bool pbe_norm = pvr_vk_format_is_fully_normalized(format);
2243
2244 if (pbe_pack_mode == ROGUE_PBESTATE_PACKMODE_INVALID)
2245 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2246
2247 /* Set packed color based on PBE pack mode and PBE norm. */
2248 switch (pbe_pack_mode) {
2249 case ROGUE_PBESTATE_PACKMODE_U8U8U8U8:
2250 case ROGUE_PBESTATE_PACKMODE_A8R3G3B2:
2251 if (pbe_norm) {
2252 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2253 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2254 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2255 pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 8) & 0xFFU) << 24;
2256 } else {
2257 pkd_color[0] = color[0].ui & 0xFFU;
2258 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2259 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2260 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2261 }
2262 break;
2263
2264 case ROGUE_PBESTATE_PACKMODE_S8S8S8S8:
2265 case ROGUE_PBESTATE_PACKMODE_X8U8S8S8:
2266 case ROGUE_PBESTATE_PACKMODE_X8S8S8U8:
2267 if (pbe_norm) {
2268 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2269 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2270 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2271 pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2272 } else {
2273 pkd_color[0] = color[0].ui & 0xFFU;
2274 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2275 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2276 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2277 }
2278 break;
2279
2280 case ROGUE_PBESTATE_PACKMODE_U16U16U16U16:
2281 if (pbe_norm) {
2282 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2283 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2284 pkd_color[1] = pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU;
2285 pkd_color[1] |= (pvr_float_to_ufixed(color[3].f, 16) & 0xFFFFU) << 16;
2286 } else {
2287 pkd_color[0] = color[0].ui & 0xFFFFU;
2288 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2289 pkd_color[1] = color[2].ui & 0xFFFFU;
2290 pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2291 }
2292 break;
2293
2294 case ROGUE_PBESTATE_PACKMODE_S16S16S16S16:
2295 if (pbe_norm) {
2296 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2297 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2298 pkd_color[1] = (pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU);
2299 pkd_color[1] |= (pvr_float_to_sfixed(color[3].f, 16) & 0xFFFFU) << 16;
2300 } else {
2301 pkd_color[0] = color[0].ui & 0xFFFFU;
2302 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2303 pkd_color[1] = color[2].ui & 0xFFFFU;
2304 pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2305 }
2306 break;
2307
2308 case ROGUE_PBESTATE_PACKMODE_A2_XRBIAS_U10U10U10:
2309 case ROGUE_PBESTATE_PACKMODE_ARGBV16_XR10:
2310 case ROGUE_PBESTATE_PACKMODE_F16F16F16F16:
2311 case ROGUE_PBESTATE_PACKMODE_A2R10B10G10:
2312 case ROGUE_PBESTATE_PACKMODE_A4R4G4B4:
2313 case ROGUE_PBESTATE_PACKMODE_A1R5G5B5:
2314 case ROGUE_PBESTATE_PACKMODE_R5G5B5A1:
2315 case ROGUE_PBESTATE_PACKMODE_R5G6B5:
2316 if (red_width > 0) {
2317 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2318 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2319 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2320 pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2321 } else {
2322 /* Swizzle only uses first channel for alpha formats. */
2323 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[3].f, false);
2324 }
2325 break;
2326
2327 case ROGUE_PBESTATE_PACKMODE_U32U32U32U32:
2328 pkd_color[0] = color[0].ui;
2329 pkd_color[1] = color[1].ui;
2330 pkd_color[2] = color[2].ui;
2331 pkd_color[3] = color[3].ui;
2332 break;
2333
2334 case ROGUE_PBESTATE_PACKMODE_S32S32S32S32:
2335 pkd_color[0] = (uint32_t)color[0].i;
2336 pkd_color[1] = (uint32_t)color[1].i;
2337 pkd_color[2] = (uint32_t)color[2].i;
2338 pkd_color[3] = (uint32_t)color[3].i;
2339 break;
2340
2341 case ROGUE_PBESTATE_PACKMODE_F32F32F32F32:
2342 memcpy(pkd_color, &color[0].f, 4U * sizeof(float));
2343 break;
2344
2345 case ROGUE_PBESTATE_PACKMODE_R10B10G10A2:
2346 if (pbe_norm) {
2347 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 10) & 0xFFU;
2348 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 10) & 0xFFU) << 10;
2349 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 10) & 0xFFU) << 20;
2350 pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 2) & 0xFFU) << 30;
2351 } else if (format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
2352 pkd_color[0] = color[2].ui & 0x3FFU;
2353 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2354 pkd_color[0] |= (color[0].ui & 0x3FFU) << 20;
2355 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2356 } else {
2357 pkd_color[0] = color[0].ui & 0x3FFU;
2358 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2359 pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2360 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2361 }
2362
2363 break;
2364
2365 case ROGUE_PBESTATE_PACKMODE_A2F10F10F10:
2366 case ROGUE_PBESTATE_PACKMODE_F10F10F10A2:
2367 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 10) & 0xFFU;
2368 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 10) & 0xFFU) << 10;
2369 pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 10) & 0xFFU) << 20;
2370 pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 2) & 0xFFU) << 30;
2371 break;
2372
2373 case ROGUE_PBESTATE_PACKMODE_U8U8U8:
2374 case ROGUE_PBESTATE_PACKMODE_R5SG5SB6:
2375 if (pbe_norm) {
2376 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2377 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2378 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2379 } else {
2380 pkd_color[0] = color[0].ui & 0xFFU;
2381 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2382 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2383 }
2384 break;
2385
2386 case ROGUE_PBESTATE_PACKMODE_S8S8S8:
2387 case ROGUE_PBESTATE_PACKMODE_B6G5SR5S:
2388 if (pbe_norm) {
2389 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
2390 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
2391 pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
2392 } else {
2393 pkd_color[0] = color[0].ui & 0xFFU;
2394 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2395 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2396 }
2397 break;
2398
2399 case ROGUE_PBESTATE_PACKMODE_U16U16U16:
2400 if (pbe_norm) {
2401 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2402 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2403 pkd_color[1] = (pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU);
2404 } else {
2405 pkd_color[0] = color[0].ui & 0xFFFFU;
2406 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2407 pkd_color[1] = color[2].ui & 0xFFFFU;
2408 }
2409 break;
2410
2411 case ROGUE_PBESTATE_PACKMODE_S16S16S16:
2412 if (pbe_norm) {
2413 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2414 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2415 pkd_color[1] = pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU;
2416 } else {
2417 pkd_color[0] = color[0].ui & 0xFFFFU;
2418 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2419 pkd_color[1] = color[2].ui & 0xFFFFU;
2420 }
2421 break;
2422
2423 case ROGUE_PBESTATE_PACKMODE_F16F16F16:
2424 case ROGUE_PBESTATE_PACKMODE_F11F11F10:
2425 case ROGUE_PBESTATE_PACKMODE_F10F11F11:
2426 case ROGUE_PBESTATE_PACKMODE_SE9995:
2427 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2428 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2429 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, true);
2430 break;
2431
2432 case ROGUE_PBESTATE_PACKMODE_U32U32U32:
2433 pkd_color[0] = color[0].ui;
2434 pkd_color[1] = color[1].ui;
2435 pkd_color[2] = color[2].ui;
2436 break;
2437
2438 case ROGUE_PBESTATE_PACKMODE_S32S32S32:
2439 pkd_color[0] = (uint32_t)color[0].i;
2440 pkd_color[1] = (uint32_t)color[1].i;
2441 pkd_color[2] = (uint32_t)color[2].i;
2442 break;
2443
2444 case ROGUE_PBESTATE_PACKMODE_X24G8X32:
2445 case ROGUE_PBESTATE_PACKMODE_U8X24:
2446 pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2447 break;
2448
2449 case ROGUE_PBESTATE_PACKMODE_F32F32F32:
2450 memcpy(pkd_color, &color[0].f, 3U * sizeof(float));
2451 break;
2452
2453 case ROGUE_PBESTATE_PACKMODE_U8U8:
2454 if (pbe_norm) {
2455 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2456 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2457 } else {
2458 pkd_color[0] = color[0].ui & 0xFFU;
2459 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2460 }
2461 break;
2462
2463 case ROGUE_PBESTATE_PACKMODE_S8S8:
2464 if (pbe_norm) {
2465 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2466 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2467 } else {
2468 pkd_color[0] = color[0].ui & 0xFFU;
2469 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2470 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2471 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2472 }
2473 break;
2474
2475 case ROGUE_PBESTATE_PACKMODE_U16U16:
2476 if (pbe_norm) {
2477 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2478 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2479 } else {
2480 pkd_color[0] = color[0].ui & 0xFFFFU;
2481 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2482 }
2483 break;
2484
2485 case ROGUE_PBESTATE_PACKMODE_S16S16:
2486 if (pbe_norm) {
2487 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2488 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2489 } else {
2490 pkd_color[0] = color[0].ui & 0xFFFFU;
2491 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2492 }
2493 break;
2494
2495 case ROGUE_PBESTATE_PACKMODE_F16F16:
2496 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2497 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2498 break;
2499
2500 case ROGUE_PBESTATE_PACKMODE_U32U32:
2501 pkd_color[0] = color[0].ui;
2502 pkd_color[1] = color[1].ui;
2503 break;
2504
2505 case ROGUE_PBESTATE_PACKMODE_S32S32:
2506 pkd_color[0] = (uint32_t)color[0].i;
2507 pkd_color[1] = (uint32_t)color[1].i;
2508 break;
2509
2510 case ROGUE_PBESTATE_PACKMODE_X24U8F32:
2511 case ROGUE_PBESTATE_PACKMODE_X24X8F32:
2512 memcpy(pkd_color, &color[0].f, 1U * sizeof(float));
2513 pkd_color[1] = color[1].ui & 0xFFU;
2514 break;
2515
2516 case ROGUE_PBESTATE_PACKMODE_F32F32:
2517 memcpy(pkd_color, &color[0].f, 2U * sizeof(float));
2518 break;
2519
2520 case ROGUE_PBESTATE_PACKMODE_ST8U24:
2521 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2522 pkd_color[0] |= color[1].ui << 24;
2523 break;
2524
2525 case ROGUE_PBESTATE_PACKMODE_U8:
2526 if (format == VK_FORMAT_S8_UINT)
2527 pkd_color[0] = color[1].ui & 0xFFU;
2528 else if (pbe_norm)
2529 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2530 else
2531 pkd_color[0] = color[0].ui & 0xFFU;
2532
2533 break;
2534
2535 case ROGUE_PBESTATE_PACKMODE_S8:
2536 if (pbe_norm)
2537 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2538 else
2539 pkd_color[0] = color[0].ui & 0xFFU;
2540 break;
2541
2542 case ROGUE_PBESTATE_PACKMODE_U16:
2543 if (pbe_norm)
2544 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2545 else
2546 pkd_color[0] = color[0].ui & 0xFFFFU;
2547 break;
2548
2549 case ROGUE_PBESTATE_PACKMODE_S16:
2550 if (pbe_norm)
2551 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2552 else
2553 pkd_color[0] = color[0].ui & 0xFFFFU;
2554 break;
2555
2556 case ROGUE_PBESTATE_PACKMODE_F16:
2557 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2558 break;
2559
2560 /* U32 */
2561 case ROGUE_PBESTATE_PACKMODE_U32:
2562 if (format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2563 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2564 } else if (format == VK_FORMAT_D24_UNORM_S8_UINT) {
2565 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2566 pkd_color[0] |= (color[1].ui & 0xFFU) << 24;
2567 } else if (format == VK_FORMAT_A2B10G10R10_UINT_PACK32) {
2568 pkd_color[0] = color[0].ui & 0x3FFU;
2569 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2570 pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2571 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2572 } else {
2573 pkd_color[0] = color[0].ui;
2574 }
2575 break;
2576
2577 /* U24ST8 */
2578 case ROGUE_PBESTATE_PACKMODE_U24ST8:
2579 pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2580 pkd_color[1] |= pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2581 break;
2582
2583 /* S32 */
2584 case ROGUE_PBESTATE_PACKMODE_S32:
2585 pkd_color[0] = (uint32_t)color[0].i;
2586 break;
2587
2588 /* F32 */
2589 case ROGUE_PBESTATE_PACKMODE_F32:
2590 memcpy(pkd_color, &color[0].f, sizeof(float));
2591 break;
2592
2593 /* X8U24 */
2594 case ROGUE_PBESTATE_PACKMODE_X8U24:
2595 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2596 break;
2597
2598 default:
2599 break;
2600 }
2601
2602 return VK_SUCCESS;
2603 }
2604
2605 static VkResult
pvr_isp_scan_direction(struct pvr_transfer_cmd * transfer_cmd,bool custom_mapping,enum ROGUE_CR_DIR_TYPE * const dir_type_out)2606 pvr_isp_scan_direction(struct pvr_transfer_cmd *transfer_cmd,
2607 bool custom_mapping,
2608 enum ROGUE_CR_DIR_TYPE *const dir_type_out)
2609 {
2610 pvr_dev_addr_t dst_dev_addr = transfer_cmd->dst.dev_addr;
2611 bool backwards_in_x = false;
2612 bool backwards_in_y = false;
2613 bool done_dest_rect = false;
2614 VkRect2D dst_rect;
2615 int32_t dst_x1;
2616 int32_t dst_y1;
2617
2618 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
2619 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
2620 pvr_dev_addr_t src_dev_addr = src->surface.dev_addr;
2621
2622 if (src_dev_addr.addr == dst_dev_addr.addr && !custom_mapping) {
2623 VkRect2D *src_rect = &src->mappings[0].src_rect;
2624 int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
2625 int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
2626
2627 if (!done_dest_rect) {
2628 dst_rect = src->mappings[0].dst_rect;
2629
2630 dst_x1 = dst_rect.offset.x + dst_rect.extent.width;
2631 dst_y1 = dst_rect.offset.y + dst_rect.extent.height;
2632
2633 done_dest_rect = true;
2634 }
2635
2636 if ((dst_rect.offset.x < src_x1 && dst_x1 > src_rect->offset.x) &&
2637 (dst_rect.offset.y < src_y1 && dst_y1 > src_rect->offset.y)) {
2638 if (src_rect->extent.width != dst_rect.extent.width ||
2639 src_rect->extent.height != dst_rect.extent.height) {
2640 /* Scaling is not possible. */
2641 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2642 }
2643
2644 /* Direction is to the right. */
2645 backwards_in_x = dst_rect.offset.x > src_rect->offset.x;
2646
2647 /* Direction is to the bottom. */
2648 backwards_in_y = dst_rect.offset.y > src_rect->offset.y;
2649 }
2650 }
2651 }
2652
2653 if (backwards_in_x) {
2654 if (backwards_in_y)
2655 *dir_type_out = ROGUE_CR_DIR_TYPE_BR2TL;
2656 else
2657 *dir_type_out = ROGUE_CR_DIR_TYPE_TR2BL;
2658 } else {
2659 if (backwards_in_y)
2660 *dir_type_out = ROGUE_CR_DIR_TYPE_BL2TR;
2661 else
2662 *dir_type_out = ROGUE_CR_DIR_TYPE_TL2BR;
2663 }
2664
2665 return VK_SUCCESS;
2666 }
2667
pvr_3d_copy_blit_core(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)2668 static VkResult pvr_3d_copy_blit_core(struct pvr_transfer_ctx *ctx,
2669 struct pvr_transfer_cmd *transfer_cmd,
2670 struct pvr_transfer_prep_data *prep_data,
2671 uint32_t pass_idx,
2672 bool *finished_out)
2673 {
2674 struct pvr_transfer_3d_state *const state = &prep_data->state;
2675 struct pvr_winsys_transfer_regs *const regs = &state->regs;
2676 struct pvr_device *const device = ctx->device;
2677 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
2678
2679 VkResult result;
2680
2681 *finished_out = true;
2682
2683 state->common_ptr = 0U;
2684 state->dynamic_const_reg_ptr = 0U;
2685 state->usc_const_reg_ptr = 0U;
2686
2687 if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
2688 uint32_t packed_color[4U] = { 0U };
2689
2690 if (transfer_cmd->source_count != 0U)
2691 return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2692
2693 if (vk_format_is_compressed(transfer_cmd->dst.vk_format))
2694 return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2695
2696 /* No shader. */
2697 state->pds_temps = 0U;
2698 state->uniform_data_size = 0U;
2699 state->tex_state_data_size = 0U;
2700
2701 /* No background enabled. */
2702 /* clang-format off */
2703 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2704 /* clang-format on */
2705 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
2706 reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2707 }
2708
2709 result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
2710 transfer_cmd->clear_color,
2711 packed_color);
2712 if (result != VK_SUCCESS)
2713 return result;
2714
2715 pvr_csb_pack (®s->usc_clear_register0, CR_USC_CLEAR_REGISTER, reg) {
2716 reg.val = packed_color[0U];
2717 }
2718
2719 pvr_csb_pack (®s->usc_clear_register1, CR_USC_CLEAR_REGISTER, reg) {
2720 reg.val = packed_color[1U];
2721 }
2722
2723 pvr_csb_pack (®s->usc_clear_register2, CR_USC_CLEAR_REGISTER, reg) {
2724 reg.val = packed_color[2U];
2725 }
2726
2727 pvr_csb_pack (®s->usc_clear_register3, CR_USC_CLEAR_REGISTER, reg) {
2728 reg.val = packed_color[3U];
2729 }
2730
2731 state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2732 state->pds_shader_task_offset = 0U;
2733 state->uni_tex_code_offset = 0U;
2734 state->tex_state_data_offset = 0U;
2735 } else if (transfer_cmd->source_count > 0U) {
2736 const struct pvr_tq_frag_sh_reg_layout nop_sh_reg_layout = {
2737 /* TODO: Setting this to 1 so that we don't try to pvr_bo_alloc() with
2738 * zero size. The device will ignore the PDS program if USC_SHAREDSIZE
2739 * is zero and in the case of the nop shader we're expecting it to be
2740 * zero. See if we can safely pass PVR_DEV_ADDR_INVALID for the unitex
2741 * program.
2742 */
2743 .driver_total = 1,
2744 };
2745 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
2746 struct pvr_pds_pixel_shader_sa_program unitex_prog = { 0U };
2747 uint32_t tex_state_dma_size_dw;
2748 struct pvr_suballoc_bo *pvr_bo;
2749 uint32_t *dma_space;
2750
2751 result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2752 if (result != VK_SUCCESS)
2753 return result;
2754
2755 pvr_uv_space(dev_info, transfer_cmd, state);
2756
2757 state->shader_props.iterated = false;
2758
2759 state->shader_props.layer_props.sample =
2760 transfer_cmd->sources[0].surface.mem_layout ==
2761 PVR_MEMLAYOUT_3DTWIDDLED;
2762
2763 result = pvr_msaa_state(dev_info, transfer_cmd, state, 0);
2764 if (result != VK_SUCCESS)
2765 return result;
2766
2767 state->shader_props.pick_component =
2768 pvr_pick_component_needed(&state->custom_mapping);
2769
2770 if (state->filter[0] == PVR_FILTER_LINEAR &&
2771 pvr_requires_usc_linear_filter(
2772 transfer_cmd->sources[0].surface.vk_format)) {
2773 if (pvr_int_pbe_usc_linear_filter(
2774 state->shader_props.layer_props.pbe_format,
2775 state->shader_props.layer_props.sample,
2776 state->shader_props.layer_props.msaa,
2777 state->shader_props.full_rate)) {
2778 state->shader_props.layer_props.linear = true;
2779 } else {
2780 mesa_logw("Transfer: F32 linear filter not supported.");
2781 }
2782 }
2783
2784 if (state->empty_dst) {
2785 sh_reg_layout = &nop_sh_reg_layout;
2786 state->pds_shader_task_offset = device->nop_program.pds.data_offset;
2787 } else {
2788 pvr_dev_addr_t kick_usc_pds_dev_addr;
2789
2790 result =
2791 pvr_transfer_frag_store_get_shader_info(device,
2792 &ctx->frag_store,
2793 &state->shader_props,
2794 &kick_usc_pds_dev_addr,
2795 &sh_reg_layout);
2796 if (result != VK_SUCCESS)
2797 return result;
2798
2799 assert(kick_usc_pds_dev_addr.addr <= UINT32_MAX);
2800 state->pds_shader_task_offset = (uint32_t)kick_usc_pds_dev_addr.addr;
2801 }
2802
2803 unitex_prog.kick_usc = false;
2804 unitex_prog.clear = false;
2805
2806 tex_state_dma_size_dw =
2807 sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
2808
2809 unitex_prog.num_texture_dma_kicks = 1U;
2810 unitex_prog.num_uniform_dma_kicks = 0U;
2811
2812 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2813 device->heaps.general_heap,
2814 PVR_DW_TO_BYTES(tex_state_dma_size_dw),
2815 &pvr_bo);
2816 if (result != VK_SUCCESS)
2817 return result;
2818
2819 dma_space = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
2820
2821 result = pvr_sampler_image_state(ctx,
2822 transfer_cmd,
2823 sh_reg_layout,
2824 state,
2825 dma_space);
2826 if (result != VK_SUCCESS)
2827 return result;
2828
2829 pvr_dma_texture_floats(transfer_cmd, state, sh_reg_layout, dma_space);
2830
2831 if (transfer_cmd->sources[0].surface.mem_layout ==
2832 PVR_MEMLAYOUT_3DTWIDDLED) {
2833 dma_space[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2834 fui(transfer_cmd->sources[0].surface.z_position);
2835 }
2836
2837 pvr_write_usc_constants(sh_reg_layout, dma_space);
2838
2839 if (pvr_pick_component_needed(&state->custom_mapping))
2840 pvr_dma_texel_unwind(state, sh_reg_layout, dma_space);
2841
2842 pvr_pds_encode_dma_burst(unitex_prog.texture_dma_control,
2843 unitex_prog.texture_dma_address,
2844 state->common_ptr,
2845 tex_state_dma_size_dw,
2846 pvr_bo->dev_addr.addr,
2847 true,
2848 dev_info);
2849
2850 state->common_ptr += tex_state_dma_size_dw;
2851
2852 result =
2853 pvr_pds_unitex(dev_info, ctx, transfer_cmd, &unitex_prog, prep_data);
2854 if (result != VK_SUCCESS)
2855 return result;
2856
2857 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg) {
2858 reg.enablebgtag = true;
2859 }
2860 } else {
2861 /* No shader. */
2862 state->pds_temps = 0U;
2863 state->uniform_data_size = 0U;
2864 state->tex_state_data_size = 0U;
2865
2866 /* No background enabled. */
2867 /* clang-format off */
2868 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2869 /* clang-format on */
2870 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
2871 reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2872 }
2873 state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2874 state->pds_shader_task_offset = 0U;
2875 state->uni_tex_code_offset = 0U;
2876 state->tex_state_data_offset = 0U;
2877
2878 result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2879 if (result != VK_SUCCESS)
2880 return result;
2881 }
2882
2883 pvr_setup_hwbg_object(dev_info, state);
2884
2885 pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
2886 reg.mode_type = ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_SCALE;
2887
2888 result = pvr_isp_scan_direction(transfer_cmd,
2889 state->custom_mapping.pass_count,
2890 ®.dir_type);
2891 if (result != VK_SUCCESS)
2892 return result;
2893 }
2894
2895 /* Set up pixel event handling. */
2896 result = pvr_pbe_setup(transfer_cmd, ctx, state);
2897 if (result != VK_SUCCESS)
2898 return result;
2899
2900 result = pvr_isp_tiles(device, state);
2901 if (result != VK_SUCCESS)
2902 return result;
2903
2904 if (PVR_HAS_FEATURE(&device->pdevice->dev_info, gpu_multicore_support)) {
2905 pvr_csb_pack (®s->frag_screen, CR_FRAG_SCREEN, reg) {
2906 reg.xmax = transfer_cmd->dst.width - 1;
2907 reg.ymax = transfer_cmd->dst.height - 1;
2908 }
2909 }
2910
2911 if ((pass_idx + 1U) < state->custom_mapping.pass_count)
2912 *finished_out = false;
2913
2914 return VK_SUCCESS;
2915 }
2916
2917 static VkResult
pvr_pbe_src_format_f2d(uint32_t merge_flags,struct pvr_transfer_cmd_source * src,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * pixel_format_out)2918 pvr_pbe_src_format_f2d(uint32_t merge_flags,
2919 struct pvr_transfer_cmd_source *src,
2920 VkFormat dst_format,
2921 bool down_scale,
2922 bool dont_force_pbe,
2923 enum pvr_transfer_pbe_pixel_src *pixel_format_out)
2924 {
2925 VkFormat src_format = src->surface.vk_format;
2926
2927 /* This has to come before the rest as S8 for instance is integer and
2928 * signedsess check fails on D24S8.
2929 */
2930 if (vk_format_is_depth_or_stencil(src_format) ||
2931 vk_format_is_depth_or_stencil(dst_format) ||
2932 merge_flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
2933 return pvr_pbe_src_format_ds(&src->surface,
2934 src->filter,
2935 dst_format,
2936 merge_flags,
2937 down_scale,
2938 pixel_format_out);
2939 }
2940
2941 return pvr_pbe_src_format_normal(src_format,
2942 dst_format,
2943 down_scale,
2944 dont_force_pbe,
2945 pixel_format_out);
2946 }
2947
2948 /** Writes the coefficient loading PDS task. */
2949 static inline VkResult
pvr_pds_coeff_task(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const bool sample_3d,struct pvr_transfer_prep_data * prep_data)2950 pvr_pds_coeff_task(struct pvr_transfer_ctx *ctx,
2951 const struct pvr_transfer_cmd *transfer_cmd,
2952 const bool sample_3d,
2953 struct pvr_transfer_prep_data *prep_data)
2954 {
2955 struct pvr_transfer_3d_state *state = &prep_data->state;
2956 struct pvr_pds_coeff_loading_program program = { 0U };
2957 struct pvr_suballoc_bo *pvr_bo;
2958 VkResult result;
2959
2960 program.num_fpu_iterators = 1U;
2961
2962 pvr_csb_pack (&program.FPU_iterators[0U],
2963 PDSINST_DOUT_FIELDS_DOUTI_SRC,
2964 reg) {
2965 if (sample_3d)
2966 reg.size = ROGUE_PDSINST_DOUTI_SIZE_3D;
2967 else
2968 reg.size = ROGUE_PDSINST_DOUTI_SIZE_2D;
2969
2970 reg.perspective = false;
2971
2972 /* Varying wrap on the TSP means that the TSP chooses the shorter path
2973 * out of the normal and the wrapping path i.e. chooses between u0->u1
2974 * and u1->1.0 == 0.0 -> u0. We don't need this behavior.
2975 */
2976 /*
2977 * if RHW ever needed offset SRC_F32 to the first U in 16 bit units
2978 * l0 U <= offs 0
2979 * l0 V
2980 * l1 U <= offs 4
2981 * ...
2982 */
2983 reg.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
2984 reg.f32_offset = 0U;
2985 }
2986
2987 if (sample_3d)
2988 state->usc_coeff_regs = 12U;
2989 else
2990 state->usc_coeff_regs = 8U;
2991
2992 pvr_pds_set_sizes_coeff_loading(&program);
2993
2994 result = pvr_cmd_buffer_alloc_mem(
2995 transfer_cmd->cmd_buffer,
2996 ctx->device->heaps.pds_heap,
2997 PVR_DW_TO_BYTES(program.data_size + program.code_size),
2998 &pvr_bo);
2999 if (result != VK_SUCCESS)
3000 return result;
3001
3002 state->pds_coeff_task_offset =
3003 pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
3004
3005 pvr_pds_generate_coeff_loading_program(&program,
3006 pvr_bo_suballoc_get_map_addr(pvr_bo));
3007
3008 state->coeff_data_size = program.data_size;
3009 state->pds_temps = program.temps_used;
3010
3011 return VK_SUCCESS;
3012 }
3013
3014 #define X 0U
3015 #define Y 1U
3016 #define Z 2U
3017
pvr_tsp_floats(const struct pvr_device_info * dev_info,VkRect2D * rect,const float recips[3U],bool custom_filter,bool z_present,float z_value,struct pvr_transfer_3d_iteration * layer)3018 static void pvr_tsp_floats(const struct pvr_device_info *dev_info,
3019 VkRect2D *rect,
3020 const float recips[3U],
3021 bool custom_filter,
3022 bool z_present,
3023 float z_value,
3024 struct pvr_transfer_3d_iteration *layer)
3025 {
3026 #define U0 0U
3027 #define U1 1U
3028 #define V0 2U
3029 #define V1 3U
3030
3031 const uint32_t indices[8U] = { U0, V0, U0, V1, U1, V1, U1, V0 };
3032 float delta[2U] = { 0.0f, 0.0f };
3033 int32_t non_normalized[4U];
3034 uint32_t src_flipped[2U];
3035 uint32_t normalized[4U];
3036 int32_t src_span[2U];
3037
3038 non_normalized[U0] = rect->offset.x;
3039 non_normalized[U1] = rect->offset.x + rect->extent.width;
3040 non_normalized[V0] = rect->offset.y;
3041 non_normalized[V1] = rect->offset.y + rect->extent.height;
3042
3043 /* Filter adjust. */
3044 src_span[X] = rect->extent.width;
3045 src_flipped[X] = src_span[X] > 0U ? 0U : 1U;
3046 src_span[Y] = rect->extent.height;
3047 src_flipped[Y] = src_span[Y] > 0U ? 0U : 1U;
3048 /*
3049 * | X | Y | srcFlipX | srcFlipY |
3050 * +----+----+----------+----------|
3051 * | X | Y | 0 | 0 |
3052 * | -X | Y | 1 | 0 |
3053 * | X | -Y | 0 | 1 |
3054 * | -X | -Y | 1 | 1 |
3055 */
3056 for (uint32_t i = X; i <= Y; i++) {
3057 if (custom_filter) {
3058 if (src_flipped[i] != 0U)
3059 delta[i] += 0.25;
3060 else
3061 delta[i] -= 0.25;
3062 }
3063 }
3064
3065 /* Normalize. */
3066 for (uint32_t i = 0U; i < ARRAY_SIZE(normalized); i++) {
3067 uint32_t tmp;
3068 float ftmp;
3069
3070 ftmp = (float)non_normalized[i] + delta[i >> 1U];
3071 ftmp *= recips[i >> 1U];
3072
3073 tmp = fui(ftmp);
3074 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3075 tmp = XXH_rotl32(tmp, 1U);
3076
3077 normalized[i] = tmp;
3078 }
3079
3080 /* Apply indices. */
3081 for (uint32_t i = 0U; i < 8U; i++)
3082 layer->texture_coords[i] = normalized[indices[i]];
3083
3084 if (z_present) {
3085 uint32_t tmp = fui(z_value * recips[2U]);
3086
3087 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3088 tmp = XXH_rotl32(tmp, 1U);
3089
3090 for (uint32_t i = 8U; i < 12U; i++)
3091 layer->texture_coords[i] = tmp;
3092 }
3093
3094 #undef U0
3095 #undef U1
3096 #undef V0
3097 #undef V1
3098 }
3099
3100 static void
pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,struct pvr_rect_mapping * mappings,bool custom_filter,uint32_t num_mappings,uint32_t mapping_offset,uint32_t tsp_comp_format_in_dw,uint32_t ** const cs_ptr_out)3101 pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info *dev_info,
3102 const struct pvr_transfer_cmd_source *src,
3103 struct pvr_rect_mapping *mappings,
3104 bool custom_filter,
3105 uint32_t num_mappings,
3106 uint32_t mapping_offset,
3107 uint32_t tsp_comp_format_in_dw,
3108 uint32_t **const cs_ptr_out)
3109 {
3110 struct pvr_transfer_3d_iteration layer;
3111 uint32_t *cs_ptr = *cs_ptr_out;
3112
3113 /* |<-32b->|
3114 * +-------+-----
3115 * | RHW | | X num_isp_vertices
3116 * +-------+-- |
3117 * | U | | |
3118 * | V | | X PVR_TRANSFER_NUM_LAYERS
3119 * +-------+-----
3120 *
3121 * RHW is not there any more in the Transfer. The comment still explains
3122 * where it should go if ever needed.
3123 */
3124 for (uint32_t i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3125 bool z_present = src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
3126 const float recips[3U] = {
3127 [X] = 1.0f / (float)src->surface.width,
3128 [Y] = 1.0f / (float)src->surface.height,
3129 [Z] = z_present ? 1.0f / (float)src->surface.depth : 0.0f,
3130 };
3131 float z_pos = (src->filter < PVR_FILTER_LINEAR)
3132 ? floor(src->surface.z_position) + 0.5f
3133 : src->surface.z_position;
3134
3135 pvr_tsp_floats(dev_info,
3136 &mappings[i].src_rect,
3137 recips,
3138 custom_filter,
3139 z_present,
3140 z_pos,
3141 &layer);
3142
3143 /* We request UVs from TSP for ISP triangle:
3144 * 0 u 1
3145 * +---,
3146 * v| /|
3147 * | / |
3148 * 2'/--'3
3149 */
3150 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3151 *cs_ptr++ = layer.texture_coords[0U];
3152 *cs_ptr++ = layer.texture_coords[1U];
3153 }
3154
3155 if (z_present) {
3156 *cs_ptr++ = layer.texture_coords[8U];
3157 *cs_ptr++ = 0U;
3158 }
3159
3160 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3161 *cs_ptr++ = layer.texture_coords[6U];
3162 *cs_ptr++ = layer.texture_coords[7U];
3163 }
3164
3165 if (z_present) {
3166 *cs_ptr++ = layer.texture_coords[11U];
3167 *cs_ptr++ = 0U;
3168 }
3169
3170 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3171 *cs_ptr++ = layer.texture_coords[2U];
3172 *cs_ptr++ = layer.texture_coords[3U];
3173 }
3174
3175 if (z_present) {
3176 *cs_ptr++ = layer.texture_coords[9U];
3177 *cs_ptr++ = 0U;
3178 }
3179
3180 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3181 *cs_ptr++ = layer.texture_coords[4U];
3182 *cs_ptr++ = layer.texture_coords[5U];
3183 }
3184
3185 if (z_present) {
3186 *cs_ptr++ = layer.texture_coords[10U];
3187 *cs_ptr++ = 0U;
3188 }
3189 }
3190
3191 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3192 /* Skipped optional primitive id. */
3193 for (uint32_t i = 0U; i < tsp_comp_format_in_dw; i++)
3194 *cs_ptr++ = 0x88888888U;
3195 } else {
3196 /* Align back to 64 bits. */
3197 if (((uintptr_t)cs_ptr & 7U) != 0U)
3198 cs_ptr++;
3199 }
3200
3201 *cs_ptr_out = cs_ptr;
3202 }
3203
3204 #undef X
3205 #undef Y
3206 #undef Z
3207
pvr_isp_prim_block_pds_state(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t ** const cs_ptr_out)3208 static void pvr_isp_prim_block_pds_state(const struct pvr_device_info *dev_info,
3209 struct pvr_transfer_ctx *ctx,
3210 struct pvr_transfer_3d_state *state,
3211 uint32_t **const cs_ptr_out)
3212 {
3213 uint32_t *cs_ptr = *cs_ptr_out;
3214
3215 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SHADERBASE, shader_base) {
3216 shader_base.addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
3217 }
3218 cs_ptr++;
3219
3220 pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXUNICODEBASE, tex_base) {
3221 tex_base.addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
3222 }
3223 cs_ptr++;
3224
3225 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO1, info1) {
3226 info1.pds_uniformsize =
3227 state->uniform_data_size /
3228 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE;
3229
3230 info1.pds_texturestatesize =
3231 state->tex_state_data_size /
3232 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE;
3233
3234 info1.pds_varyingsize =
3235 state->coeff_data_size /
3236 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE;
3237
3238 info1.usc_varyingsize =
3239 ALIGN_POT(state->usc_coeff_regs,
3240 ROGUE_TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE) /
3241 ROGUE_TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE;
3242
3243 info1.pds_tempsize =
3244 ALIGN_POT(state->pds_temps,
3245 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE) /
3246 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE;
3247 }
3248 cs_ptr++;
3249
3250 pvr_csb_pack (cs_ptr, TA_STATE_PDS_VARYINGBASE, base) {
3251 base.addr = PVR_DEV_ADDR(state->pds_coeff_task_offset);
3252 }
3253 cs_ptr++;
3254
3255 pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXTUREDATABASE, base) {
3256 base.addr = PVR_DEV_ADDR(state->tex_state_data_offset);
3257 }
3258 cs_ptr++;
3259
3260 /* PDS uniform program not used. */
3261 pvr_csb_pack (cs_ptr, TA_STATE_PDS_UNIFORMDATABASE, base) {
3262 base.addr = PVR_DEV_ADDR(0U);
3263 }
3264 cs_ptr++;
3265
3266 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO2, info) {
3267 info.usc_sharedsize =
3268 ALIGN_POT(state->common_ptr,
3269 ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE) /
3270 ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE;
3271 info.pds_tri_merge_disable = !PVR_HAS_ERN(dev_info, 42307);
3272 info.pds_batchnum = 0U;
3273 }
3274 cs_ptr++;
3275
3276 /* Get back to 64 bits boundary. */
3277 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3278 cs_ptr++;
3279
3280 *cs_ptr_out = cs_ptr;
3281 }
3282
pvr_isp_prim_block_isp_state(const struct pvr_device_info * dev_info,UNUSED uint32_t tsp_comp_format_in_dw,uint32_t tsp_data_size_in_bytes,uint32_t num_isp_vertices,bool read_bgnd,uint32_t ** const cs_ptr_out)3283 static void pvr_isp_prim_block_isp_state(const struct pvr_device_info *dev_info,
3284 UNUSED uint32_t tsp_comp_format_in_dw,
3285 uint32_t tsp_data_size_in_bytes,
3286 uint32_t num_isp_vertices,
3287 bool read_bgnd,
3288 uint32_t **const cs_ptr_out)
3289 {
3290 const bool has_simple_internal_parameter_format_v2 =
3291 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2);
3292 uint32_t *cs_ptr = *cs_ptr_out;
3293
3294 if (has_simple_internal_parameter_format_v2) {
3295 const uint32_t tsp_data_per_vrx_in_bytes =
3296 tsp_data_size_in_bytes / num_isp_vertices;
3297
3298 pvr_csb_pack ((uint64_t *)cs_ptr,
3299 IPF_VERTEX_FORMAT_WORD_SIPF2,
3300 vert_fmt) {
3301 vert_fmt.vf_isp_state_size =
3302 pvr_cmd_length(TA_STATE_ISPCTL) + pvr_cmd_length(TA_STATE_ISPA);
3303
3304 vert_fmt.vf_tsp_vtx_raw = true;
3305 vert_fmt.vf_isp_vtx_raw = true;
3306
3307 vert_fmt.vf_varying_vertex_bits = tsp_data_per_vrx_in_bytes * 8U;
3308 vert_fmt.vf_primitive_total = (num_isp_vertices / 2U) - 1U;
3309 vert_fmt.vf_vertex_total = num_isp_vertices - 1U;
3310 }
3311 cs_ptr += pvr_cmd_length(IPF_VERTEX_FORMAT_WORD_SIPF2);
3312 }
3313
3314 /* ISP state words. */
3315
3316 /* clang-format off */
3317 pvr_csb_pack (cs_ptr, TA_STATE_ISPCTL, ispctl);
3318 /* clang-format on */
3319 cs_ptr += pvr_cmd_length(TA_STATE_ISPCTL);
3320
3321 pvr_csb_pack (cs_ptr, TA_STATE_ISPA, ispa) {
3322 ispa.objtype = ROGUE_TA_OBJTYPE_TRIANGLE;
3323 ispa.passtype = read_bgnd ? ROGUE_TA_PASSTYPE_TRANSLUCENT
3324 : ROGUE_TA_PASSTYPE_OPAQUE;
3325 ispa.dcmpmode = ROGUE_TA_CMPMODE_ALWAYS;
3326 ispa.dwritedisable = true;
3327 }
3328 cs_ptr += pvr_cmd_length(TA_STATE_ISPA);
3329
3330 if (has_simple_internal_parameter_format_v2) {
3331 *cs_ptr_out = cs_ptr;
3332 return;
3333 }
3334
3335 /* How many bytes the TSP compression format needs? */
3336 pvr_csb_pack (cs_ptr, IPF_COMPRESSION_SIZE_WORD, word) {
3337 word.cs_isp_comp_table_size = 0U;
3338 word.cs_tsp_comp_format_size = tsp_comp_format_in_dw;
3339 word.cs_tsp_comp_table_size = 0U;
3340 word.cs_tsp_comp_vertex_size = tsp_data_size_in_bytes / num_isp_vertices;
3341 }
3342 cs_ptr += pvr_cmd_length(IPF_COMPRESSION_SIZE_WORD);
3343
3344 /* ISP vertex compression. */
3345 pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_0, word0) {
3346 word0.cf_isp_comp_fmt_x0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3347 word0.cf_isp_comp_fmt_x1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3348 word0.cf_isp_comp_fmt_x2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3349 word0.cf_isp_comp_fmt_y0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3350 word0.cf_isp_comp_fmt_y1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3351 word0.cf_isp_comp_fmt_y2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3352 word0.cf_isp_comp_fmt_z0 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3353 word0.cf_isp_comp_fmt_z1 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3354 }
3355 cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_0);
3356
3357 pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_1, word1) {
3358 word1.vf_prim_msaa = 0U;
3359 word1.vf_prim_id_pres = 0U;
3360 word1.vf_vertex_clipped = 0U;
3361 word1.vf_vertex_total = num_isp_vertices - 1U;
3362 word1.cf_isp_comp_fmt_z3 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3363 word1.cf_isp_comp_fmt_z2 = ROGUE_IPF_COMPRESSION_FORMAT_RAW_BYTE;
3364 }
3365 cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_1);
3366
3367 *cs_ptr_out = cs_ptr;
3368 }
3369
3370 static void
pvr_isp_prim_block_index_block(const struct pvr_device_info * dev_info,uint32_t num_mappings,uint32_t ** const cs_ptr_out)3371 pvr_isp_prim_block_index_block(const struct pvr_device_info *dev_info,
3372 uint32_t num_mappings,
3373 uint32_t **const cs_ptr_out)
3374 {
3375 uint32_t *cs_ptr = *cs_ptr_out;
3376
3377 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3378 for (uint32_t i = 0U; i < DIV_ROUND_UP(num_mappings, 2U); i++) {
3379 const uint32_t idx = i * 8U;
3380
3381 pvr_csb_pack ((uint64_t *)cs_ptr,
3382 IPF_INDEX_DATA_WORDS_SIPF,
3383 idx_data_word) {
3384 idx_data_word.ix_triangle3_index_2 = idx + 5U;
3385 idx_data_word.ix_triangle3_index_1 = idx + 6U;
3386 idx_data_word.ix_triangle3_index_0 = idx + 7U;
3387
3388 idx_data_word.ix_triangle2_index_2 = idx + 6U;
3389 idx_data_word.ix_triangle2_index_1 = idx + 5U;
3390 idx_data_word.ix_triangle2_index_0 = idx + 4U;
3391
3392 idx_data_word.ix_triangle1_index_2 = idx + 1U;
3393 idx_data_word.ix_triangle1_index_1 = idx + 2U;
3394 idx_data_word.ix_triangle1_index_0 = idx + 3U;
3395
3396 idx_data_word.ix_triangle0_index_2 = idx + 2U;
3397 idx_data_word.ix_triangle0_index_1 = idx + 1U;
3398 idx_data_word.ix_triangle0_index_0 = idx + 0U;
3399 }
3400 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA_WORDS_SIPF);
3401 }
3402
3403 *cs_ptr_out = cs_ptr;
3404 return;
3405 }
3406
3407 for (uint32_t i = 0U, j = 0U; i < num_mappings; i++, j += 4U) {
3408 if ((i & 1U) == 0U) {
3409 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3410 word.ix_index0_0 = j;
3411 word.ix_index0_1 = j + 1U;
3412 word.ix_index0_2 = j + 2U;
3413 word.ix_index1_0 = j + 3U;
3414 }
3415 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3416
3417 /* Don't increment cs_ptr here. IPF_INDEX_DATA is patched in the
3418 * else part and then cs_ptr is incremented.
3419 */
3420 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3421 word.ix_index0_0 = j + 2U;
3422 word.ix_index0_1 = j + 1U;
3423 }
3424 } else {
3425 uint32_t tmp;
3426
3427 pvr_csb_pack (&tmp, IPF_INDEX_DATA, word) {
3428 word.ix_index0_2 = j;
3429 word.ix_index1_0 = j + 1U;
3430 }
3431 *cs_ptr |= tmp;
3432 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3433
3434 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3435 word.ix_index0_0 = j + 2U;
3436 word.ix_index0_1 = j + 3U;
3437 word.ix_index0_2 = j + 2U;
3438 word.ix_index1_0 = j + 1U;
3439 }
3440 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3441 }
3442 }
3443
3444 /* The last pass didn't ++. */
3445 if ((num_mappings & 1U) != 0U)
3446 cs_ptr++;
3447
3448 *cs_ptr_out = cs_ptr;
3449 }
3450
3451 /* Calculates a 24 bit fixed point (biased) representation of a signed integer.
3452 */
3453 static inline VkResult
pvr_int32_to_isp_xy_vtx(const struct pvr_device_info * dev_info,int32_t val,bool bias,uint32_t * word_out)3454 pvr_int32_to_isp_xy_vtx(const struct pvr_device_info *dev_info,
3455 int32_t val,
3456 bool bias,
3457 uint32_t *word_out)
3458 {
3459 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3460 const uint32_t max_fractional = ROGUE_IPF_ISP_VERTEX_XY_SIPF_FRAC_MAX_VAL;
3461 const uint32_t max_integer = ROGUE_IPF_ISP_VERTEX_XY_SIPF_INTEGER_MAX_VAL;
3462
3463 uint32_t fractional;
3464 uint32_t integer;
3465
3466 if (bias)
3467 val += ROGUE_IPF_ISP_VERTEX_XY_BIAS_VALUE_SIPF;
3468
3469 if (val < 0 || val > max_integer + 1) {
3470 mesa_loge("ISP vertex xy value out of range.");
3471 return vk_error(NULL, VK_ERROR_UNKNOWN);
3472 }
3473
3474 if (val <= max_integer) {
3475 integer = val;
3476 fractional = 0;
3477 } else if (val == max_integer + 1) {
3478 /* The integer field is 13 bits long so the max value is
3479 * 2 ^ 13 - 1 = 8191. For 8k support we need to handle 8192 so we set
3480 * all fractional bits to get as close as possible. The best we can do
3481 * is: 0x1FFF.F = 8191.9375 ≈ 8192 .
3482 */
3483 integer = max_integer;
3484 fractional = max_fractional;
3485 }
3486
3487 pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY_SIPF, word) {
3488 word.integer = integer;
3489 word.frac = fractional;
3490 }
3491
3492 return VK_SUCCESS;
3493 }
3494
3495 val += ROGUE_IPF_ISP_VERTEX_XY_BIAS_VALUE;
3496
3497 if (((uint32_t)val & 0x7fff8000U) != 0U)
3498 return vk_error(NULL, VK_ERROR_UNKNOWN);
3499
3500 pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY, word) {
3501 word.sign = val < 0;
3502 word.integer = val;
3503 }
3504
3505 return VK_SUCCESS;
3506 }
3507
3508 static VkResult
pvr_isp_prim_block_isp_vertices(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,uint32_t ** const cs_ptr_out)3509 pvr_isp_prim_block_isp_vertices(const struct pvr_device_info *dev_info,
3510 struct pvr_transfer_3d_state *state,
3511 struct pvr_rect_mapping *mappings,
3512 uint32_t num_mappings,
3513 uint32_t mapping_offset,
3514 uint32_t **const cs_ptr_out)
3515 {
3516 uint32_t *cs_ptr = *cs_ptr_out;
3517 bool bias = true;
3518 uint32_t i;
3519
3520 if (PVR_HAS_FEATURE(dev_info, screen_size8K))
3521 bias = state->width_in_tiles <= 256U && state->height_in_tiles <= 256U;
3522
3523 for (i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3524 uint32_t bottom = 0U;
3525 uint32_t right = 0U;
3526 uint32_t left = 0U;
3527 uint32_t top = 0U;
3528 VkResult result;
3529
3530 /* ISP vertex data (X, Y, Z). */
3531 result = pvr_int32_to_isp_xy_vtx(dev_info,
3532 mappings[i].dst_rect.offset.y,
3533 bias,
3534 &top);
3535 if (result != VK_SUCCESS)
3536 return result;
3537
3538 result = pvr_int32_to_isp_xy_vtx(dev_info,
3539 mappings[i].dst_rect.offset.y +
3540 mappings[i].dst_rect.extent.height,
3541 bias,
3542 &bottom);
3543 if (result != VK_SUCCESS)
3544 return result;
3545
3546 result = pvr_int32_to_isp_xy_vtx(dev_info,
3547 mappings[i].dst_rect.offset.x,
3548 bias,
3549 &left);
3550 if (result != VK_SUCCESS)
3551 return result;
3552
3553 result = pvr_int32_to_isp_xy_vtx(dev_info,
3554 mappings[i].dst_rect.offset.x +
3555 mappings[i].dst_rect.extent.width,
3556 bias,
3557 &right);
3558 if (result != VK_SUCCESS)
3559 return result;
3560
3561 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3562 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3563 word.y = top;
3564 word.x = left;
3565 }
3566 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3567
3568 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3569 word.y = top;
3570 word.x = right;
3571 }
3572 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3573
3574 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3575 word.y = bottom;
3576 word.x = left;
3577 }
3578 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3579
3580 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3581 word.y = bottom;
3582 word.x = right;
3583 }
3584 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3585
3586 continue;
3587 }
3588
3589 /* ISP vertices 0 and 1. */
3590 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3591 word0.x0 = left;
3592 word0.y0 = top & 0xFF;
3593 }
3594 cs_ptr++;
3595
3596 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3597 word1.y0 = top >> ROGUE_IPF_ISP_VERTEX_WORD_1_Y0_SHIFT;
3598 }
3599 cs_ptr++;
3600
3601 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3602 word2.x1 = right & 0xFFFF;
3603 word2.z0 = 0U;
3604 }
3605 cs_ptr++;
3606
3607 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3608 word3.x1 = right >> ROGUE_IPF_ISP_VERTEX_WORD_3_X1_SHIFT;
3609 word3.y1 = top;
3610 }
3611 cs_ptr++;
3612
3613 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3614 word4.z1 = 0U;
3615 }
3616 cs_ptr++;
3617
3618 /* ISP vertices 2 and 3. */
3619 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3620 word0.x0 = left;
3621 word0.y0 = bottom & 0xFF;
3622 }
3623 cs_ptr++;
3624
3625 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3626 word1.y0 = bottom >> ROGUE_IPF_ISP_VERTEX_WORD_1_Y0_SHIFT;
3627 }
3628 cs_ptr++;
3629
3630 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3631 word2.x1 = right & 0xFFFF;
3632 word2.z0 = 0U;
3633 }
3634 cs_ptr++;
3635
3636 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3637 word3.x1 = right >> ROGUE_IPF_ISP_VERTEX_WORD_3_X1_SHIFT;
3638 word3.y1 = bottom;
3639 }
3640 cs_ptr++;
3641
3642 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3643 word4.z1 = 0U;
3644 }
3645 cs_ptr++;
3646 }
3647 *cs_ptr_out = cs_ptr;
3648
3649 return VK_SUCCESS;
3650 }
3651
3652 static uint32_t
pvr_isp_primitive_block_size(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,uint32_t num_mappings)3653 pvr_isp_primitive_block_size(const struct pvr_device_info *dev_info,
3654 const struct pvr_transfer_cmd_source *src,
3655 uint32_t num_mappings)
3656 {
3657 uint32_t num_isp_vertices = num_mappings * 4U;
3658 uint32_t num_tsp_vertices_per_isp_vertex;
3659 uint32_t isp_vertex_data_size_dw;
3660 bool color_fill = (src == NULL);
3661 uint32_t tsp_comp_format_dw;
3662 uint32_t isp_state_size_dw;
3663 uint32_t pds_state_size_dw;
3664 uint32_t idx_data_size_dw;
3665 uint32_t tsp_data_size;
3666 uint32_t stream_size;
3667
3668 if (color_fill) {
3669 num_tsp_vertices_per_isp_vertex = 0U;
3670 } else {
3671 num_tsp_vertices_per_isp_vertex =
3672 src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3673 }
3674
3675 tsp_data_size = PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3676 num_tsp_vertices_per_isp_vertex);
3677
3678 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3679 /* An XYZ vertex is 16/16/32 bits => 8 bytes. */
3680 isp_vertex_data_size_dw = num_isp_vertices * 2U;
3681
3682 /* Round to even for 64 bit boundary. */
3683 idx_data_size_dw = ALIGN_POT(num_mappings, 2U);
3684 tsp_comp_format_dw = 0U;
3685 isp_state_size_dw = 4U;
3686 pds_state_size_dw = 8U;
3687 } else {
3688 tsp_comp_format_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3689
3690 if (!color_fill) {
3691 if (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3692 tsp_comp_format_dw *= 2U;
3693 }
3694
3695 /* An XYZ vertex is 24/24/32 bits => 10 bytes with last padded to 4 byte
3696 * burst align.
3697 */
3698 isp_vertex_data_size_dw = DIV_ROUND_UP(num_isp_vertices * 10U, 4U);
3699
3700 /* 4 triangles fit in 3 dw: t0t0t0t1_t1t1t2t2_t2t3t3t3. */
3701 idx_data_size_dw = num_mappings + DIV_ROUND_UP(num_mappings, 2U);
3702 isp_state_size_dw = 5U;
3703 pds_state_size_dw = 7U;
3704 }
3705
3706 stream_size =
3707 tsp_data_size + PVR_DW_TO_BYTES(idx_data_size_dw + tsp_comp_format_dw +
3708 isp_vertex_data_size_dw +
3709 isp_state_size_dw + pds_state_size_dw);
3710
3711 return stream_size;
3712 }
3713
3714 static VkResult
pvr_isp_primitive_block(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,const struct pvr_transfer_cmd_source * src,bool custom_filter,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,bool read_bgnd,uint32_t * cs_start_offset,uint32_t ** cs_ptr_out)3715 pvr_isp_primitive_block(const struct pvr_device_info *dev_info,
3716 struct pvr_transfer_ctx *ctx,
3717 const struct pvr_transfer_cmd *transfer_cmd,
3718 struct pvr_transfer_prep_data *prep_data,
3719 const struct pvr_transfer_cmd_source *src,
3720 bool custom_filter,
3721 struct pvr_rect_mapping *mappings,
3722 uint32_t num_mappings,
3723 uint32_t mapping_offset,
3724 bool read_bgnd,
3725 uint32_t *cs_start_offset,
3726 uint32_t **cs_ptr_out)
3727 {
3728 struct pvr_transfer_3d_state *state = &prep_data->state;
3729 uint32_t num_isp_vertices = num_mappings * 4U;
3730 uint32_t num_tsp_vertices_per_isp_vert;
3731 uint32_t tsp_data_size_in_bytes;
3732 uint32_t tsp_comp_format_in_dw;
3733 bool color_fill = src == NULL;
3734 uint32_t stream_size_in_bytes;
3735 uint32_t *cs_ptr_start;
3736 VkResult result;
3737
3738 if (color_fill) {
3739 num_tsp_vertices_per_isp_vert = 0U;
3740 } else {
3741 num_tsp_vertices_per_isp_vert =
3742 src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3743 }
3744
3745 tsp_data_size_in_bytes =
3746 PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3747 num_tsp_vertices_per_isp_vert);
3748
3749 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3750 tsp_comp_format_in_dw = 0U;
3751 } else {
3752 tsp_comp_format_in_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3753
3754 if (!color_fill && src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3755 tsp_comp_format_in_dw *= 2U;
3756 }
3757
3758 stream_size_in_bytes =
3759 pvr_isp_primitive_block_size(dev_info, src, num_mappings);
3760
3761 cs_ptr_start = *cs_ptr_out;
3762
3763 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3764 /* This includes:
3765 * Vertex formats.
3766 * ISP state words.
3767 */
3768 pvr_isp_prim_block_isp_state(dev_info,
3769 tsp_comp_format_in_dw,
3770 tsp_data_size_in_bytes,
3771 num_isp_vertices,
3772 read_bgnd,
3773 cs_ptr_out);
3774
3775 /* This include:
3776 * Index data / point pitch.
3777 */
3778 pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3779
3780 result = pvr_isp_prim_block_isp_vertices(dev_info,
3781 state,
3782 mappings,
3783 num_mappings,
3784 mapping_offset,
3785 cs_ptr_out);
3786 if (result != VK_SUCCESS)
3787 return result;
3788
3789 pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3790
3791 if (!color_fill) {
3792 /* This includes:
3793 * TSP vertex formats.
3794 */
3795 pvr_isp_prim_block_tsp_vertex_block(dev_info,
3796 src,
3797 mappings,
3798 custom_filter,
3799 num_mappings,
3800 mapping_offset,
3801 tsp_comp_format_in_dw,
3802 cs_ptr_out);
3803 }
3804
3805 *cs_start_offset = 0;
3806 } else {
3807 if (!color_fill) {
3808 /* This includes:
3809 * Compressed TSP vertex data & tables.
3810 * Primitive id.
3811 * TSP compression formats.
3812 */
3813 pvr_isp_prim_block_tsp_vertex_block(dev_info,
3814 src,
3815 mappings,
3816 custom_filter,
3817 num_mappings,
3818 mapping_offset,
3819 tsp_comp_format_in_dw,
3820 cs_ptr_out);
3821 }
3822
3823 pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3824
3825 /* Point the CS_PRIM_BASE here. */
3826 *cs_start_offset = (*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]);
3827
3828 /* This includes:
3829 * ISP state words.
3830 * Compression size word.
3831 * ISP compression and vertex formats.
3832 */
3833 pvr_isp_prim_block_isp_state(dev_info,
3834 tsp_comp_format_in_dw,
3835 tsp_data_size_in_bytes,
3836 num_isp_vertices,
3837 read_bgnd,
3838 cs_ptr_out);
3839
3840 pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3841
3842 result = pvr_isp_prim_block_isp_vertices(dev_info,
3843 state,
3844 mappings,
3845 num_mappings,
3846 mapping_offset,
3847 cs_ptr_out);
3848 if (result != VK_SUCCESS)
3849 return result;
3850 }
3851
3852 assert((*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]) ==
3853 stream_size_in_bytes);
3854
3855 return VK_SUCCESS;
3856 }
3857
3858 static inline uint32_t
pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info * dev_info)3859 pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info *dev_info)
3860 {
3861 uint32_t ret = PVR_DW_TO_BYTES(ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS);
3862
3863 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3864 return ret / sizeof(uint64_t) / 2U;
3865
3866 return ret / sizeof(uint32_t) / 2U - 1U;
3867 }
3868
3869 static inline uint32_t
pvr_transfer_max_quads_per_pb(const struct pvr_device_info * dev_info)3870 pvr_transfer_max_quads_per_pb(const struct pvr_device_info *dev_info)
3871 {
3872 return PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U
3873 : 16U;
3874 }
3875
pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t * stream,uint32_t data,uint32_t size)3876 static inline uint8_t *pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t *stream,
3877 uint32_t data,
3878 uint32_t size)
3879 {
3880 const uint32_t offset = (uintptr_t)stream & 0x3U;
3881 uint32_t *aligned_stream = (uint32_t *)(stream - offset);
3882 const uint32_t current_data = *aligned_stream & ((1U << (offset * 8U)) - 1U);
3883
3884 assert(size > 0 && size <= 4U);
3885
3886 *aligned_stream = current_data | data << (offset * 8U);
3887
3888 if (offset + size > 4U) {
3889 aligned_stream++;
3890 *aligned_stream = data >> ((4U - offset) * 8);
3891 }
3892
3893 return stream + size;
3894 }
3895
3896 /**
3897 * Writes ISP ctrl stream.
3898 *
3899 * We change sampler/texture state when we process a new TQ source. The
3900 * primitive block contains the shader pointers, but we supply the primitive
3901 * blocks with shaders from here.
3902 */
pvr_isp_ctrl_stream(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data)3903 static VkResult pvr_isp_ctrl_stream(const struct pvr_device_info *dev_info,
3904 struct pvr_transfer_ctx *ctx,
3905 struct pvr_transfer_cmd *transfer_cmd,
3906 struct pvr_transfer_prep_data *prep_data)
3907 {
3908 const uint32_t max_mappings_per_pb = pvr_transfer_max_quads_per_pb(dev_info);
3909 bool fill_blit = (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U;
3910 uint32_t free_ctrl_stream_words = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
3911 struct pvr_transfer_3d_state *const state = &prep_data->state;
3912 struct pvr_winsys_transfer_regs *const regs = &state->regs;
3913 struct pvr_transfer_pass *pass = NULL;
3914 uint32_t flags = transfer_cmd->flags;
3915 struct pvr_suballoc_bo *pvr_cs_bo;
3916 pvr_dev_addr_t stream_base_vaddr;
3917 uint32_t num_prim_blks = 0U;
3918 uint32_t prim_blk_size = 0U;
3919 uint32_t region_arrays_size;
3920 uint32_t num_region_arrays;
3921 uint32_t total_stream_size;
3922 bool was_linked = false;
3923 uint32_t rem_mappings;
3924 uint32_t num_sources;
3925 uint32_t *blk_cs_ptr;
3926 uint32_t *cs_ptr;
3927 uint32_t source;
3928 VkResult result;
3929
3930 if (state->custom_mapping.pass_count > 0U) {
3931 pass = &state->custom_mapping.passes[state->pass_idx];
3932
3933 num_sources = pass->source_count;
3934
3935 for (source = 0; source < num_sources; source++) {
3936 uint32_t num_mappings = pass->sources[source].mapping_count;
3937
3938 while (num_mappings > 0U) {
3939 if (fill_blit) {
3940 prim_blk_size += pvr_isp_primitive_block_size(
3941 dev_info,
3942 NULL,
3943 MIN2(max_mappings_per_pb, num_mappings));
3944 }
3945
3946 if (transfer_cmd->source_count > 0) {
3947 prim_blk_size += pvr_isp_primitive_block_size(
3948 dev_info,
3949 &transfer_cmd->sources[source],
3950 MIN2(max_mappings_per_pb, num_mappings));
3951 }
3952
3953 num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3954 num_prim_blks++;
3955 }
3956 }
3957 } else {
3958 num_sources = fill_blit ? 1U : transfer_cmd->source_count;
3959
3960 if (fill_blit) {
3961 num_prim_blks = 1U;
3962 prim_blk_size +=
3963 pvr_isp_primitive_block_size(dev_info,
3964 NULL,
3965 MIN2(max_mappings_per_pb, 1U));
3966
3967 /* Fill blits can also have a source; fallthrough to handle. */
3968 }
3969
3970 for (source = 0; source < transfer_cmd->source_count; source++) {
3971 uint32_t num_mappings = transfer_cmd->sources[source].mapping_count;
3972
3973 while (num_mappings > 0U) {
3974 prim_blk_size += pvr_isp_primitive_block_size(
3975 dev_info,
3976 &transfer_cmd->sources[source],
3977 MIN2(max_mappings_per_pb, num_mappings));
3978
3979 num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3980 num_prim_blks++;
3981 }
3982 }
3983 }
3984
3985 num_region_arrays =
3986 (num_prim_blks + (pvr_transfer_prim_blocks_per_alloc(dev_info) - 1U)) /
3987 pvr_transfer_prim_blocks_per_alloc(dev_info);
3988 region_arrays_size = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS *
3989 sizeof(uint32_t) * num_region_arrays;
3990 total_stream_size = region_arrays_size + prim_blk_size;
3991
3992 /* Allocate space for IPF control stream. */
3993 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
3994 ctx->device->heaps.transfer_frag_heap,
3995 total_stream_size,
3996 &pvr_cs_bo);
3997 if (result != VK_SUCCESS)
3998 return result;
3999
4000 stream_base_vaddr =
4001 PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4002 ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4003
4004 cs_ptr = pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4005 blk_cs_ptr = cs_ptr + region_arrays_size / sizeof(uint32_t);
4006
4007 source = 0;
4008 while (source < num_sources) {
4009 if (fill_blit)
4010 rem_mappings = pass ? pass->sources[source].mapping_count : 1U;
4011 else
4012 rem_mappings = transfer_cmd->sources[source].mapping_count;
4013
4014 if ((transfer_cmd->source_count > 0 || fill_blit) && rem_mappings != 0U) {
4015 struct pvr_pds_pixel_shader_sa_program unitex_pds_prog = { 0U };
4016 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[source];
4017 struct pvr_rect_mapping fill_mapping;
4018 uint32_t mapping_offset = 0U;
4019 bool read_bgnd = false;
4020
4021 if (fill_blit) {
4022 uint32_t packed_color[4U] = { 0U };
4023
4024 if (vk_format_is_compressed(transfer_cmd->dst.vk_format)) {
4025 return vk_error(transfer_cmd->cmd_buffer,
4026 VK_ERROR_FORMAT_NOT_SUPPORTED);
4027 }
4028
4029 state->pds_shader_task_offset = 0U;
4030 state->uni_tex_code_offset = 0U;
4031 state->tex_state_data_offset = 0U;
4032 state->common_ptr = 0U;
4033
4034 result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
4035 transfer_cmd->clear_color,
4036 packed_color);
4037 if (result != VK_SUCCESS)
4038 return result;
4039
4040 fill_mapping.dst_rect = transfer_cmd->scissor;
4041
4042 pvr_csb_pack (®s->usc_clear_register0,
4043 CR_USC_CLEAR_REGISTER,
4044 reg) {
4045 reg.val = packed_color[0U];
4046 }
4047
4048 pvr_csb_pack (®s->usc_clear_register1,
4049 CR_USC_CLEAR_REGISTER,
4050 reg) {
4051 reg.val = packed_color[1U];
4052 }
4053
4054 pvr_csb_pack (®s->usc_clear_register2,
4055 CR_USC_CLEAR_REGISTER,
4056 reg) {
4057 reg.val = packed_color[2U];
4058 }
4059
4060 pvr_csb_pack (®s->usc_clear_register3,
4061 CR_USC_CLEAR_REGISTER,
4062 reg) {
4063 reg.val = packed_color[3U];
4064 }
4065
4066 state->pds_shader_task_offset =
4067 transfer_cmd->cmd_buffer->device->nop_program.pds.data_offset;
4068
4069 unitex_pds_prog.kick_usc = false;
4070 unitex_pds_prog.clear = false;
4071 } else {
4072 const bool down_scale = transfer_cmd->sources[source].resolve_op ==
4073 PVR_RESOLVE_BLEND &&
4074 src->surface.sample_count > 1U &&
4075 transfer_cmd->dst.sample_count <= 1U;
4076 struct pvr_tq_shader_properties *shader_props =
4077 &state->shader_props;
4078 struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
4079 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
4080 enum pvr_transfer_pbe_pixel_src pbe_src_format;
4081 struct pvr_suballoc_bo *pvr_bo;
4082 uint32_t tex_state_dma_size;
4083 pvr_dev_addr_t dev_offset;
4084
4085 /* Reset the shared register bank ptrs each src implies new texture
4086 * state (Note that we don't change texture state per prim block).
4087 */
4088 state->common_ptr = 0U;
4089 state->usc_const_reg_ptr = 0U;
4090 /* We don't use state->dynamic_const_reg_ptr here. */
4091
4092 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE)
4093 read_bgnd = true;
4094
4095 result = pvr_pbe_src_format_f2d(flags,
4096 src,
4097 transfer_cmd->dst.vk_format,
4098 down_scale,
4099 state->dont_force_pbe,
4100 &pbe_src_format);
4101 if (result != VK_SUCCESS)
4102 return result;
4103
4104 memset(shader_props, 0U, sizeof(*shader_props));
4105
4106 layer->pbe_format = pbe_src_format;
4107 layer->sample =
4108 (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED);
4109 shader_props->iterated = true;
4110
4111 shader_props->pick_component =
4112 pvr_pick_component_needed(&state->custom_mapping);
4113
4114 result = pvr_msaa_state(dev_info, transfer_cmd, state, source);
4115 if (result != VK_SUCCESS)
4116 return result;
4117
4118 if (state->filter[source] == PVR_FILTER_LINEAR &&
4119 pvr_requires_usc_linear_filter(src->surface.vk_format)) {
4120 if (pvr_int_pbe_usc_linear_filter(layer->pbe_format,
4121 layer->sample,
4122 layer->msaa,
4123 shader_props->full_rate)) {
4124 layer->linear = true;
4125 } else {
4126 mesa_logw("Transfer: F32 linear filter not supported.");
4127 }
4128 }
4129
4130 result = pvr_transfer_frag_store_get_shader_info(
4131 transfer_cmd->cmd_buffer->device,
4132 &ctx->frag_store,
4133 shader_props,
4134 &dev_offset,
4135 &sh_reg_layout);
4136 if (result != VK_SUCCESS)
4137 return result;
4138
4139 assert(dev_offset.addr <= UINT32_MAX);
4140 prep_data->state.pds_shader_task_offset = (uint32_t)dev_offset.addr;
4141
4142 result =
4143 pvr_pds_coeff_task(ctx, transfer_cmd, layer->sample, prep_data);
4144 if (result != VK_SUCCESS)
4145 return result;
4146
4147 unitex_pds_prog.kick_usc = false;
4148 unitex_pds_prog.clear = false;
4149
4150 tex_state_dma_size =
4151 sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
4152
4153 unitex_pds_prog.num_texture_dma_kicks = 1U;
4154 unitex_pds_prog.num_uniform_dma_kicks = 0U;
4155
4156 /* Allocate memory for DMA. */
4157 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
4158 ctx->device->heaps.general_heap,
4159 tex_state_dma_size << 2U,
4160 &pvr_bo);
4161 if (result != VK_SUCCESS)
4162 return result;
4163
4164 result = pvr_sampler_state_for_surface(
4165 dev_info,
4166 &transfer_cmd->sources[source].surface,
4167 state->filter[source],
4168 sh_reg_layout,
4169 0U,
4170 pvr_bo_suballoc_get_map_addr(pvr_bo));
4171 if (result != VK_SUCCESS)
4172 return result;
4173
4174 result = pvr_image_state_for_surface(
4175 ctx,
4176 transfer_cmd,
4177 &transfer_cmd->sources[source].surface,
4178 0U,
4179 source,
4180 sh_reg_layout,
4181 state,
4182 0U,
4183 pvr_bo_suballoc_get_map_addr(pvr_bo));
4184 if (result != VK_SUCCESS)
4185 return result;
4186
4187 pvr_pds_encode_dma_burst(unitex_pds_prog.texture_dma_control,
4188 unitex_pds_prog.texture_dma_address,
4189 state->common_ptr,
4190 tex_state_dma_size,
4191 pvr_bo->dev_addr.addr,
4192 true,
4193 dev_info);
4194
4195 state->common_ptr += tex_state_dma_size;
4196
4197 pvr_write_usc_constants(sh_reg_layout,
4198 pvr_bo_suballoc_get_map_addr(pvr_bo));
4199
4200 if (pvr_pick_component_needed(&state->custom_mapping)) {
4201 pvr_dma_texel_unwind(state,
4202 sh_reg_layout,
4203 pvr_bo_suballoc_get_map_addr(pvr_bo));
4204 }
4205 }
4206
4207 result = pvr_pds_unitex(dev_info,
4208 ctx,
4209 transfer_cmd,
4210 &unitex_pds_prog,
4211 prep_data);
4212 if (result != VK_SUCCESS)
4213 return result;
4214
4215 while (rem_mappings > 0U) {
4216 const uint32_t min_free_ctrl_stream_words =
4217 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 2
4218 : 3;
4219 const uint32_t num_mappings =
4220 MIN2(max_mappings_per_pb, rem_mappings);
4221 struct pvr_rect_mapping *mappings = NULL;
4222 uint32_t stream_start_offset = 0U;
4223 pvr_dev_addr_t prim_blk_addr;
4224
4225 if (free_ctrl_stream_words < min_free_ctrl_stream_words) {
4226 pvr_dev_addr_t next_region_array_vaddr = stream_base_vaddr;
4227
4228 num_region_arrays++;
4229 next_region_array_vaddr.addr +=
4230 num_region_arrays *
4231 PVR_DW_TO_BYTES(ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS);
4232
4233 if (PVR_HAS_FEATURE(dev_info,
4234 simple_internal_parameter_format_v2)) {
4235 uint32_t link_addr;
4236
4237 pvr_csb_pack (&link_addr,
4238 IPF_CONTROL_STREAM_LINK_SIPF2,
4239 control_stream) {
4240 control_stream.cs_ctrl_type =
4241 ROGUE_IPF_CS_CTRL_TYPE_SIPF2_LINK;
4242 control_stream.cs_link.addr = next_region_array_vaddr.addr;
4243 }
4244
4245 pvr_isp_ctrl_stream_sipf_write_aligned(
4246 (uint8_t *)cs_ptr,
4247 link_addr,
4248 PVR_DW_TO_BYTES(
4249 pvr_cmd_length(IPF_CONTROL_STREAM_LINK_SIPF2)));
4250 } else {
4251 pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, control_stream) {
4252 control_stream.cs_type = ROGUE_IPF_CS_TYPE_LINK;
4253 control_stream.cs_link.addr = next_region_array_vaddr.addr;
4254 }
4255 }
4256
4257 cs_ptr =
4258 (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_cs_bo) +
4259 num_region_arrays * ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
4260 free_ctrl_stream_words = ROGUE_IPF_CONTROL_STREAM_SIZE_DWORDS;
4261
4262 was_linked = PVR_HAS_FEATURE(dev_info, ipf_creq_pf);
4263 }
4264
4265 if (fill_blit)
4266 mappings = pass ? pass->sources[source].mappings : &fill_mapping;
4267 else
4268 mappings = transfer_cmd->sources[source].mappings;
4269
4270 prim_blk_addr = stream_base_vaddr;
4271 prim_blk_addr.addr +=
4272 (uintptr_t)blk_cs_ptr -
4273 (uintptr_t)pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4274
4275 result = pvr_isp_primitive_block(dev_info,
4276 ctx,
4277 transfer_cmd,
4278 prep_data,
4279 fill_blit ? NULL : src,
4280 state->custom_filter,
4281 mappings,
4282 num_mappings,
4283 mapping_offset,
4284 read_bgnd,
4285 &stream_start_offset,
4286 &blk_cs_ptr);
4287 if (result != VK_SUCCESS)
4288 return result;
4289
4290 prim_blk_addr.addr += stream_start_offset;
4291
4292 if (PVR_HAS_FEATURE(dev_info,
4293 simple_internal_parameter_format_v2)) {
4294 uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4295 uint32_t tmp;
4296
4297 /* This part of the control stream is byte granular. */
4298
4299 pvr_csb_pack (&tmp, IPF_PRIMITIVE_HEADER_SIPF2, prim_header) {
4300 prim_header.cs_prim_base_size = 1;
4301 prim_header.cs_mask_num_bytes = 1;
4302 prim_header.cs_valid_tile0 = true;
4303 }
4304 cs_byte_ptr =
4305 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4306
4307 pvr_csb_pack (&tmp, IPF_PRIMITIVE_BASE_SIPF2, word) {
4308 word.cs_prim_base = prim_blk_addr;
4309 }
4310 cs_byte_ptr =
4311 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 4);
4312
4313 /* IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 since
4314 * IPF_PRIMITIVE_HEADER_SIPF2.cs_mask_num_bytes == 1.
4315 */
4316 pvr_csb_pack (&tmp,
4317 IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2,
4318 mask) {
4319 switch (num_mappings) {
4320 case 4:
4321 mask.cs_mask_one_byte_tile0_7 = true;
4322 mask.cs_mask_one_byte_tile0_6 = true;
4323 FALLTHROUGH;
4324 case 3:
4325 mask.cs_mask_one_byte_tile0_5 = true;
4326 mask.cs_mask_one_byte_tile0_4 = true;
4327 FALLTHROUGH;
4328 case 2:
4329 mask.cs_mask_one_byte_tile0_3 = true;
4330 mask.cs_mask_one_byte_tile0_2 = true;
4331 FALLTHROUGH;
4332 case 1:
4333 mask.cs_mask_one_byte_tile0_1 = true;
4334 mask.cs_mask_one_byte_tile0_0 = true;
4335 break;
4336 default:
4337 /* Unreachable since we clamped the value earlier so
4338 * reaching this is an implementation error.
4339 */
4340 unreachable("num_mapping exceeded max_mappings_per_pb");
4341 break;
4342 }
4343 }
4344 /* Only 1 byte since there's only 1 valid tile within the single
4345 * IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 mask.
4346 * ROGUE_IPF_PRIMITIVE_HEADER_SIPF2.cs_valid_tile0 == true.
4347 */
4348 cs_byte_ptr =
4349 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4350
4351 cs_ptr = (uint32_t *)cs_byte_ptr;
4352
4353 free_ctrl_stream_words -= 2;
4354 } else {
4355 pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_FORMAT, word) {
4356 word.cs_type = ROGUE_IPF_CS_TYPE_PRIM;
4357 word.cs_isp_state_read = true;
4358 word.cs_isp_state_size = 2U;
4359 word.cs_prim_total = 2U * num_mappings - 1U;
4360 word.cs_mask_fmt = ROGUE_IPF_CS_MASK_FMT_FULL;
4361 word.cs_prim_base_pres = true;
4362 }
4363 cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_FORMAT);
4364
4365 pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_BASE, word) {
4366 word.cs_prim_base = prim_blk_addr;
4367 }
4368 cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_BASE);
4369
4370 free_ctrl_stream_words -= 2;
4371 }
4372
4373 rem_mappings -= num_mappings;
4374 mapping_offset += num_mappings;
4375 }
4376 }
4377
4378 source++;
4379
4380 /* A fill blit may also have sources for normal blits. */
4381 if (fill_blit && transfer_cmd->source_count > 0) {
4382 /* Fill blit count for custom mapping equals source blit count. While
4383 * normal blits use only one fill blit.
4384 */
4385 if (state->custom_mapping.pass_count == 0 && source > num_sources) {
4386 fill_blit = false;
4387 source = 0;
4388 }
4389 }
4390 }
4391
4392 if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
4393 assert((num_region_arrays > 1) == was_linked);
4394
4395 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
4396 uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4397 uint32_t tmp;
4398
4399 /* clang-format off */
4400 pvr_csb_pack (&tmp, IPF_CONTROL_STREAM_TERMINATE_SIPF2, term);
4401 /* clang-format on */
4402
4403 cs_byte_ptr = pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4404
4405 cs_ptr = (uint32_t *)cs_byte_ptr;
4406 } else {
4407 pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, word) {
4408 word.cs_type = ROGUE_IPF_CS_TYPE_TERM;
4409 }
4410 cs_ptr += pvr_cmd_length(IPF_CONTROL_STREAM);
4411 }
4412
4413 pvr_csb_pack (®s->isp_mtile_base, CR_ISP_MTILE_BASE, reg) {
4414 reg.addr =
4415 PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4416 ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4417 }
4418
4419 pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
4420 reg.mode_type = ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_2D;
4421 }
4422
4423 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2) &&
4424 PVR_HAS_FEATURE(dev_info, ipf_creq_pf)) {
4425 pvr_csb_pack (®s->isp_rgn, CR_ISP_RGN_SIPF, isp_rgn) {
4426 /* Bit 0 in CR_ISP_RGN.cs_size_ipf_creq_pf is used to indicate the
4427 * presence of a link.
4428 */
4429 isp_rgn.cs_size_ipf_creq_pf = was_linked;
4430 }
4431 } else {
4432 /* clang-format off */
4433 pvr_csb_pack(®s->isp_rgn, CR_ISP_RGN, isp_rgn);
4434 /* clang-format on */
4435 }
4436
4437 return VK_SUCCESS;
4438 }
4439
pvr_transfer_set_filter(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)4440 static void pvr_transfer_set_filter(struct pvr_transfer_cmd *transfer_cmd,
4441 struct pvr_transfer_3d_state *state)
4442 {
4443 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
4444 VkRect2D *src = &transfer_cmd->sources[i].mappings[0U].src_rect;
4445 VkRect2D *dst = &transfer_cmd->sources[i].mappings[0U].dst_rect;
4446
4447 /* If no scaling is applied to the copy region, we can use point
4448 * filtering.
4449 */
4450 if (!state->custom_filter && (src->extent.width == dst->extent.width) &&
4451 (src->extent.height == dst->extent.height))
4452 state->filter[i] = PVR_FILTER_POINT;
4453 else
4454 state->filter[i] = transfer_cmd->sources[i].filter;
4455 }
4456 }
4457
4458 /** Generates hw resources to kick a 3D clip blit. */
pvr_3d_clip_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)4459 static VkResult pvr_3d_clip_blit(struct pvr_transfer_ctx *ctx,
4460 struct pvr_transfer_cmd *transfer_cmd,
4461 struct pvr_transfer_prep_data *prep_data,
4462 uint32_t pass_idx,
4463 bool *finished_out)
4464 {
4465 struct pvr_transfer_3d_state *state = &prep_data->state;
4466 uint32_t texel_unwind_src = state->custom_mapping.texel_unwind_src;
4467 struct pvr_transfer_cmd bg_cmd = { 0U };
4468 uint32_t control_reg;
4469 VkResult result;
4470
4471 state->dont_force_pbe = false;
4472 bg_cmd.scissor = transfer_cmd->scissor;
4473 bg_cmd.cmd_buffer = transfer_cmd->cmd_buffer;
4474 bg_cmd.flags = transfer_cmd->flags;
4475 bg_cmd.flags &=
4476 ~(PVR_TRANSFER_CMD_FLAGS_FAST2D | PVR_TRANSFER_CMD_FLAGS_FILL |
4477 PVR_TRANSFER_CMD_FLAGS_DSMERGE | PVR_TRANSFER_CMD_FLAGS_PICKD);
4478
4479 bg_cmd.source_count = state->custom_mapping.pass_count > 0U ? 0 : 1;
4480 if (bg_cmd.source_count > 0) {
4481 struct pvr_transfer_cmd_source *src = &bg_cmd.sources[0];
4482
4483 src->mappings[0U].src_rect = transfer_cmd->scissor;
4484 src->mappings[0U].dst_rect = transfer_cmd->scissor;
4485 src->resolve_op = PVR_RESOLVE_BLEND;
4486 src->surface = transfer_cmd->dst;
4487 }
4488
4489 state->filter[0] = PVR_FILTER_DONTCARE;
4490 bg_cmd.dst = transfer_cmd->dst;
4491 state->custom_mapping.texel_unwind_src =
4492 state->custom_mapping.texel_unwind_dst;
4493
4494 result =
4495 pvr_3d_copy_blit_core(ctx, &bg_cmd, prep_data, pass_idx, finished_out);
4496 if (result != VK_SUCCESS)
4497 return result;
4498
4499 /* If the destination has 4 channels and the source has at most 2, we still
4500 * need all 4 channels from the USC into the PBE.
4501 */
4502 state->dont_force_pbe = true;
4503 state->custom_mapping.texel_unwind_src = texel_unwind_src;
4504
4505 /* We need the viewport mask, otherwise all pixels would be disabled. */
4506 pvr_csb_pack (&control_reg, CR_ISP_BGOBJVALS, reg) {
4507 reg.mask = true;
4508 }
4509 state->regs.isp_bgobjvals |= control_reg;
4510
4511 pvr_transfer_set_filter(transfer_cmd, state);
4512 result = pvr_isp_ctrl_stream(&ctx->device->pdevice->dev_info,
4513 ctx,
4514 transfer_cmd,
4515 prep_data);
4516 if (result != VK_SUCCESS)
4517 return result;
4518
4519 /* In case of resolve M -> S, the accumulation is read from and written to a
4520 * single sampled surface. Make sure that we are resolving and we have the
4521 * right number of tiles.
4522 */
4523 if (state->down_scale) {
4524 uint64_t tmp;
4525
4526 pvr_csb_pack (&tmp, CR_PBE_WORD0_MRT0, reg) {
4527 reg.downscale = true;
4528 }
4529 state->regs.pbe_wordx_mrty[0U] |= tmp;
4530
4531 result = pvr_isp_tiles(ctx->device, state);
4532 if (result != VK_SUCCESS)
4533 return result;
4534 }
4535
4536 return VK_SUCCESS;
4537 }
4538
pvr_texel_unwind(uint32_t bpp,pvr_dev_addr_t dev_addr,bool is_input,uint32_t texel_extend,uint32_t * texel_unwind_out)4539 static bool pvr_texel_unwind(uint32_t bpp,
4540 pvr_dev_addr_t dev_addr,
4541 bool is_input,
4542 uint32_t texel_extend,
4543 uint32_t *texel_unwind_out)
4544 {
4545 uint32_t texel_unwind = 0U;
4546
4547 for (uint32_t i = 0U; i < 16U; i++) {
4548 if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
4549 break;
4550 } else {
4551 if (i == 15U) {
4552 return false;
4553 } else {
4554 dev_addr.addr -= (bpp / texel_extend) / 8U;
4555 texel_unwind++;
4556 }
4557 }
4558 }
4559
4560 *texel_unwind_out = texel_unwind;
4561
4562 return true;
4563 }
4564
pvr_is_identity_mapping(const struct pvr_rect_mapping * mapping)4565 static bool pvr_is_identity_mapping(const struct pvr_rect_mapping *mapping)
4566 {
4567 return (mapping->src_rect.offset.x == mapping->dst_rect.offset.x &&
4568 mapping->src_rect.offset.y == mapping->dst_rect.offset.y &&
4569 mapping->src_rect.extent.width == mapping->dst_rect.extent.width &&
4570 mapping->src_rect.extent.height == mapping->dst_rect.extent.height);
4571 }
4572
pvr_is_pbe_stride_aligned(const uint32_t stride)4573 static inline bool pvr_is_pbe_stride_aligned(const uint32_t stride)
4574 {
4575 if (stride == 1U)
4576 return true;
4577
4578 return ((stride & (ROGUE_PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE - 1U)) ==
4579 0x0U);
4580 }
4581
4582 static struct pvr_transfer_pass *
pvr_create_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4583 pvr_create_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4584 uint32_t dst_offset)
4585 {
4586 struct pvr_transfer_pass *pass;
4587
4588 assert(custom_mapping->pass_count < PVR_TRANSFER_MAX_PASSES);
4589
4590 pass = &custom_mapping->passes[custom_mapping->pass_count];
4591 pass->clip_rects_count = 0U;
4592 pass->dst_offset = dst_offset;
4593 pass->source_count = 0U;
4594
4595 custom_mapping->pass_count++;
4596
4597 return pass;
4598 }
4599
4600 /* Acquire pass with given offset. If one doesn't exist, create new. */
4601 static struct pvr_transfer_pass *
pvr_acquire_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4602 pvr_acquire_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4603 uint32_t dst_offset)
4604 {
4605 for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4606 if (custom_mapping->passes[i].dst_offset == dst_offset)
4607 return &custom_mapping->passes[i];
4608 }
4609
4610 return pvr_create_pass(custom_mapping, dst_offset);
4611 }
4612
4613 static struct pvr_transfer_wa_source *
pvr_create_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4614 pvr_create_source(struct pvr_transfer_pass *pass,
4615 uint32_t src_offset,
4616 bool extend_height)
4617 {
4618 struct pvr_transfer_wa_source *src;
4619
4620 assert(pass->source_count < ARRAY_SIZE(pass->sources));
4621
4622 src = &pass->sources[pass->source_count];
4623 src->mapping_count = 0U;
4624 src->extend_height = extend_height;
4625
4626 pass->source_count++;
4627
4628 return src;
4629 }
4630
4631 /* Acquire source with given offset. If one doesn't exist, create new. */
4632 static struct pvr_transfer_wa_source *
pvr_acquire_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4633 pvr_acquire_source(struct pvr_transfer_pass *pass,
4634 uint32_t src_offset,
4635 bool extend_height)
4636 {
4637 for (uint32_t i = 0U; i < pass->source_count; i++) {
4638 if (pass->sources[i].src_offset == src_offset &&
4639 pass->sources[i].extend_height == extend_height)
4640 return &pass->sources[i];
4641 }
4642
4643 return pvr_create_source(pass, src_offset, extend_height);
4644 }
4645
pvr_remove_source(struct pvr_transfer_pass * pass,uint32_t idx)4646 static void pvr_remove_source(struct pvr_transfer_pass *pass, uint32_t idx)
4647 {
4648 assert(idx < pass->source_count);
4649
4650 for (uint32_t i = idx; i < (pass->source_count - 1U); i++)
4651 pass->sources[i] = pass->sources[i + 1U];
4652
4653 pass->source_count--;
4654 }
4655
pvr_remove_mapping(struct pvr_transfer_wa_source * src,uint32_t idx)4656 static void pvr_remove_mapping(struct pvr_transfer_wa_source *src, uint32_t idx)
4657 {
4658 assert(idx < src->mapping_count);
4659
4660 for (uint32_t i = idx; i < (src->mapping_count - 1U); i++)
4661 src->mappings[i] = src->mappings[i + 1U];
4662
4663 src->mapping_count--;
4664 }
4665
4666 static struct pvr_rect_mapping *
pvr_create_mapping(struct pvr_transfer_wa_source * src)4667 pvr_create_mapping(struct pvr_transfer_wa_source *src)
4668 {
4669 assert(src->mapping_count < ARRAY_SIZE(src->mappings));
4670
4671 return &src->mappings[src->mapping_count++];
4672 }
4673
4674 /**
4675 * If PBE can't write to surfaces with odd stride, the stride of
4676 * destination surface is doubled to make it even. Height of the surface is
4677 * halved. The source surface is not resized. Each half of the modified
4678 * destination surface samples every second row from the source surface. This
4679 * only works with nearest filtering.
4680 */
pvr_double_stride(struct pvr_transfer_pass * pass,uint32_t stride)4681 static bool pvr_double_stride(struct pvr_transfer_pass *pass, uint32_t stride)
4682 {
4683 struct pvr_rect_mapping *mappings = pass->sources[0].mappings;
4684 uint32_t new_mapping = 0;
4685
4686 if (stride == 1U)
4687 return false;
4688
4689 if (mappings[0U].dst_rect.extent.height == 1U &&
4690 pass->sources[0].mapping_count == 1U) {
4691 /* Only one mapping required if height is 1. */
4692 if ((mappings[0U].dst_rect.offset.y & 1U) != 0U) {
4693 mappings[0U].dst_rect.offset.x += (int32_t)stride;
4694 mappings[0U].dst_rect.offset.y /= 2U;
4695 mappings[0U].dst_rect.extent.height =
4696 (mappings[0U].dst_rect.extent.height + 1U) / 2U;
4697 } else {
4698 mappings[0U].dst_rect.extent.height =
4699 (mappings[0U].dst_rect.offset.y +
4700 mappings[0U].dst_rect.extent.height + 1U) /
4701 2U -
4702 mappings[0U].dst_rect.offset.y;
4703 mappings[0U].dst_rect.offset.y /= 2U;
4704 }
4705
4706 return true;
4707 }
4708
4709 for (uint32_t i = 0; i < pass->sources[0].mapping_count; i++) {
4710 struct pvr_rect_mapping *mapping_a = &mappings[i];
4711 struct pvr_rect_mapping *mapping_b =
4712 &mappings[pass->sources[0].mapping_count + new_mapping];
4713 int32_t mapping_a_src_rect_y1 =
4714 mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height;
4715 int32_t mapping_b_src_rect_y1 = mapping_a_src_rect_y1;
4716 const bool dst_starts_odd_row = !!(mapping_a->dst_rect.offset.y & 1);
4717 const bool dst_ends_odd_row =
4718 !!((mapping_a->dst_rect.offset.y + mapping_a->dst_rect.extent.height) &
4719 1);
4720 const bool src_starts_odd_row = !!(mapping_a->src_rect.offset.y & 1);
4721 const bool src_ends_odd_row =
4722 !!((mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height) &
4723 1);
4724
4725 assert(pass->sources[0].mapping_count + new_mapping <
4726 ARRAY_SIZE(pass->sources[0].mappings));
4727 *mapping_b = *mapping_a;
4728
4729 mapping_a->src_rect.offset.y = ALIGN_POT(mapping_a->src_rect.offset.y, 2);
4730 if (dst_starts_odd_row && !src_starts_odd_row)
4731 mapping_a->src_rect.offset.y++;
4732 else if (!dst_starts_odd_row && src_starts_odd_row)
4733 mapping_a->src_rect.offset.y--;
4734
4735 mapping_a_src_rect_y1 = ALIGN_POT(mapping_a_src_rect_y1, 2);
4736 if (dst_ends_odd_row && !src_ends_odd_row)
4737 mapping_a_src_rect_y1++;
4738 else if (!dst_ends_odd_row && src_ends_odd_row)
4739 mapping_a_src_rect_y1--;
4740
4741 mapping_a->src_rect.extent.height =
4742 mapping_a_src_rect_y1 - mapping_a->src_rect.offset.y;
4743
4744 mapping_b->src_rect.offset.y = ALIGN_POT(mapping_b->src_rect.offset.y, 2);
4745 if (dst_starts_odd_row && src_starts_odd_row)
4746 mapping_b->src_rect.offset.y--;
4747 else if (!dst_starts_odd_row && !src_starts_odd_row)
4748 mapping_b->src_rect.offset.y++;
4749
4750 mapping_b_src_rect_y1 = ALIGN_POT(mapping_b_src_rect_y1, 2);
4751 if (dst_ends_odd_row && src_ends_odd_row)
4752 mapping_b_src_rect_y1--;
4753 else if (!dst_ends_odd_row && !src_ends_odd_row)
4754 mapping_b_src_rect_y1++;
4755
4756 mapping_b->src_rect.extent.height =
4757 mapping_b_src_rect_y1 - mapping_b->src_rect.offset.y;
4758
4759 /* Destination rectangles. */
4760 mapping_a->dst_rect.offset.y = mapping_a->dst_rect.offset.y / 2;
4761
4762 if (dst_starts_odd_row)
4763 mapping_a->dst_rect.offset.y++;
4764
4765 mapping_b->dst_rect.offset.x += stride;
4766 mapping_b->dst_rect.offset.y /= 2;
4767 mapping_b->dst_rect.extent.height /= 2;
4768 mapping_a->dst_rect.extent.height -= mapping_b->dst_rect.extent.height;
4769
4770 if (!mapping_a->src_rect.extent.width ||
4771 !mapping_a->src_rect.extent.height) {
4772 *mapping_a = *mapping_b;
4773 } else if (mapping_b->src_rect.extent.width &&
4774 mapping_b->src_rect.extent.height) {
4775 new_mapping++;
4776 }
4777 }
4778
4779 pass->sources[0].mapping_count++;
4780
4781 return true;
4782 }
4783
pvr_split_rect(uint32_t stride,uint32_t height,uint32_t texel_unwind,VkRect2D * rect_a,VkRect2D * rect_b)4784 static void pvr_split_rect(uint32_t stride,
4785 uint32_t height,
4786 uint32_t texel_unwind,
4787 VkRect2D *rect_a,
4788 VkRect2D *rect_b)
4789 {
4790 rect_a->offset.x = 0;
4791 rect_a->extent.width = stride - texel_unwind;
4792 rect_a->offset.y = 0;
4793 rect_a->extent.height = height;
4794
4795 rect_b->offset.x = (int32_t)stride - texel_unwind;
4796 rect_b->extent.width = texel_unwind;
4797 rect_b->offset.y = 0;
4798 rect_b->extent.height = height;
4799 }
4800
pvr_rect_width_covered_by(const VkRect2D * rect_a,const VkRect2D * rect_b)4801 static bool pvr_rect_width_covered_by(const VkRect2D *rect_a,
4802 const VkRect2D *rect_b)
4803 {
4804 return (rect_b->offset.x <= rect_a->offset.x &&
4805 (rect_b->offset.x + rect_b->extent.width) >=
4806 (rect_a->offset.x + rect_a->extent.width));
4807 }
4808
pvr_unwind_rects(uint32_t width,uint32_t height,uint32_t texel_unwind,bool input,struct pvr_transfer_pass * pass)4809 static void pvr_unwind_rects(uint32_t width,
4810 uint32_t height,
4811 uint32_t texel_unwind,
4812 bool input,
4813 struct pvr_transfer_pass *pass)
4814 {
4815 struct pvr_transfer_wa_source *const source = &pass->sources[0];
4816 struct pvr_rect_mapping *const mappings = source->mappings;
4817 const uint32_t num_mappings = source->mapping_count;
4818 VkRect2D rect_a, rect_b;
4819
4820 if (texel_unwind == 0)
4821 return;
4822
4823 pvr_split_rect(width, height, texel_unwind, &rect_a, &rect_b);
4824
4825 for (uint32_t i = 0; i < num_mappings; i++) {
4826 VkRect2D *const old_rect = input ? &mappings[i].src_rect
4827 : &mappings[i].dst_rect;
4828
4829 if (height == 1) {
4830 old_rect->offset.x += texel_unwind;
4831 } else if (width == 1) {
4832 old_rect->offset.y += texel_unwind;
4833 } else if (pvr_rect_width_covered_by(old_rect, &rect_a)) {
4834 old_rect->offset.x += texel_unwind;
4835 } else if (pvr_rect_width_covered_by(old_rect, &rect_b)) {
4836 old_rect->offset.x = texel_unwind - width + old_rect->offset.x;
4837 old_rect->offset.y++;
4838 } else {
4839 /* Mapping requires split. */
4840 const uint32_t new_mapping = source->mapping_count++;
4841
4842 VkRect2D *const new_rect = input ? &mappings[new_mapping].src_rect
4843 : &mappings[new_mapping].dst_rect;
4844
4845 VkRect2D *const new_rect_opp = input ? &mappings[new_mapping].dst_rect
4846 : &mappings[new_mapping].src_rect;
4847 VkRect2D *const old_rect_opp = input ? &mappings[i].dst_rect
4848 : &mappings[i].src_rect;
4849
4850 const uint32_t split_point = width - texel_unwind;
4851 const uint32_t split_width =
4852 old_rect->offset.x + old_rect->extent.width - split_point;
4853
4854 assert(new_mapping < ARRAY_SIZE(source->mappings));
4855 mappings[new_mapping] = mappings[i];
4856
4857 old_rect_opp->extent.width -= split_width;
4858 new_rect_opp->extent.width = split_width;
4859 new_rect_opp->offset.x =
4860 old_rect_opp->offset.x + old_rect_opp->extent.width;
4861
4862 old_rect->offset.x += texel_unwind;
4863 old_rect->extent.width = width - old_rect->offset.x;
4864
4865 new_rect->offset.x = 0;
4866 new_rect->offset.y++;
4867 new_rect->extent.width = split_width;
4868 }
4869 }
4870 }
4871
4872 /**
4873 * Assign clip rects to rectangle mappings. TDM can only do two PBE clip
4874 * rects per screen.
4875 */
4876 static void
pvr_map_clip_rects(struct pvr_transfer_custom_mapping * custom_mapping)4877 pvr_map_clip_rects(struct pvr_transfer_custom_mapping *custom_mapping)
4878 {
4879 for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4880 struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
4881
4882 pass->clip_rects_count = 0U;
4883
4884 for (uint32_t s = 0U; s < pass->source_count; s++) {
4885 struct pvr_transfer_wa_source *src = &pass->sources[s];
4886
4887 for (uint32_t j = 0U; j < src->mapping_count; j++) {
4888 struct pvr_rect_mapping *mappings = src->mappings;
4889 VkRect2D *clip_rects = pass->clip_rects;
4890 bool merged = false;
4891
4892 /* Try merge adjacent clip rects. */
4893 for (uint32_t k = 0U; k < pass->clip_rects_count; k++) {
4894 if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4895 clip_rects[k].extent.height ==
4896 mappings[j].dst_rect.extent.height &&
4897 clip_rects[k].offset.x + clip_rects[k].extent.width ==
4898 mappings[j].dst_rect.offset.x) {
4899 clip_rects[k].extent.width +=
4900 mappings[j].dst_rect.extent.width;
4901 merged = true;
4902 break;
4903 }
4904
4905 if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4906 clip_rects[k].extent.height ==
4907 mappings[j].dst_rect.extent.height &&
4908 clip_rects[k].offset.x ==
4909 mappings[j].dst_rect.offset.x +
4910 mappings[j].dst_rect.extent.width) {
4911 clip_rects[k].offset.x = mappings[j].dst_rect.offset.x;
4912 clip_rects[k].extent.width +=
4913 mappings[j].dst_rect.extent.width;
4914 merged = true;
4915 break;
4916 }
4917
4918 if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4919 clip_rects[k].extent.width ==
4920 mappings[j].dst_rect.extent.width &&
4921 clip_rects[k].offset.y + clip_rects[k].extent.height ==
4922 mappings[j].dst_rect.offset.y) {
4923 clip_rects[k].extent.height +=
4924 mappings[j].dst_rect.extent.height;
4925 merged = true;
4926 break;
4927 }
4928
4929 if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4930 clip_rects[k].extent.width ==
4931 mappings[j].dst_rect.extent.width &&
4932 clip_rects[k].offset.y ==
4933 mappings[j].dst_rect.offset.y +
4934 mappings[j].dst_rect.extent.height) {
4935 clip_rects[k].extent.height +=
4936 mappings[j].dst_rect.extent.height;
4937 clip_rects[k].offset.y = mappings[j].dst_rect.offset.y;
4938 merged = true;
4939 break;
4940 }
4941 }
4942
4943 if (merged)
4944 continue;
4945
4946 /* Create new pass if needed, TDM can only have 2 clip rects. */
4947 if (pass->clip_rects_count >= custom_mapping->max_clip_rects) {
4948 struct pvr_transfer_pass *new_pass =
4949 pvr_create_pass(custom_mapping, pass->dst_offset);
4950 struct pvr_transfer_wa_source *new_source =
4951 pvr_create_source(new_pass,
4952 src->src_offset,
4953 src->extend_height);
4954 struct pvr_rect_mapping *new_mapping =
4955 pvr_create_mapping(new_source);
4956
4957 new_pass->clip_rects_count = 1U;
4958 *new_mapping = src->mappings[j];
4959
4960 pvr_remove_mapping(src, j);
4961
4962 if (src->mapping_count == 0) {
4963 pvr_remove_source(pass, s);
4964 s--;
4965 } else {
4966 /* Redo - mapping was replaced. */
4967 j--;
4968 }
4969 } else {
4970 pass->clip_rects[pass->clip_rects_count] =
4971 src->mappings[j].dst_rect;
4972
4973 pass->clip_rects_count++;
4974
4975 assert(pass->clip_rects_count <= ARRAY_SIZE(pass->clip_rects));
4976 }
4977 }
4978 }
4979 }
4980 }
4981
pvr_extend_height(const VkRect2D * rect,const uint32_t height,const uint32_t unwind_src)4982 static bool pvr_extend_height(const VkRect2D *rect,
4983 const uint32_t height,
4984 const uint32_t unwind_src)
4985 {
4986 if (rect->offset.x >= (int32_t)unwind_src)
4987 return false;
4988
4989 return (rect->offset.y > (int32_t)height) ||
4990 ((rect->offset.y + rect->extent.height) > (int32_t)height);
4991 }
4992
4993 static void
pvr_generate_custom_mapping(uint32_t src_stride,uint32_t src_width,uint32_t src_height,uint32_t dst_stride,uint32_t dst_width,uint32_t dst_height,enum pvr_memlayout dst_mem_layout,struct pvr_transfer_custom_mapping * custom_mapping)4994 pvr_generate_custom_mapping(uint32_t src_stride,
4995 uint32_t src_width,
4996 uint32_t src_height,
4997 uint32_t dst_stride,
4998 uint32_t dst_width,
4999 uint32_t dst_height,
5000 enum pvr_memlayout dst_mem_layout,
5001 struct pvr_transfer_custom_mapping *custom_mapping)
5002 {
5003 src_stride *= custom_mapping->texel_extend_src;
5004 src_width *= custom_mapping->texel_extend_src;
5005 dst_stride *= custom_mapping->texel_extend_dst;
5006 dst_width *= custom_mapping->texel_extend_dst;
5007
5008 if (custom_mapping->texel_unwind_src > 0U) {
5009 pvr_unwind_rects(src_stride,
5010 src_height,
5011 custom_mapping->texel_unwind_src,
5012 true,
5013 &custom_mapping->passes[0U]);
5014 }
5015
5016 if (custom_mapping->double_stride) {
5017 custom_mapping->double_stride =
5018 pvr_double_stride(&custom_mapping->passes[0U], dst_stride);
5019
5020 dst_stride *= 2U;
5021 }
5022
5023 pvr_unwind_rects(dst_stride,
5024 dst_height,
5025 custom_mapping->texel_unwind_dst,
5026 false,
5027 &custom_mapping->passes[0U]);
5028
5029 pvr_map_clip_rects(custom_mapping);
5030
5031 /* If the last row of the source mapping is sampled, height of the surface
5032 * can only be increased if the new area contains a valid region. Some blits
5033 * are split to two sources.
5034 */
5035 if (custom_mapping->texel_unwind_src > 0U) {
5036 for (uint32_t i = 0; i < custom_mapping->pass_count; i++) {
5037 struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
5038
5039 for (uint32_t j = 0; j < pass->source_count; j++) {
5040 struct pvr_transfer_wa_source *src = &pass->sources[j];
5041
5042 for (uint32_t k = 0; k < src->mapping_count; k++) {
5043 VkRect2D *src_rect = &src->mappings[k].src_rect;
5044 bool extend_height =
5045 pvr_extend_height(src_rect,
5046 src_height,
5047 custom_mapping->texel_unwind_src);
5048
5049 if (src->mapping_count == 1) {
5050 src->extend_height = extend_height;
5051 } else if (!src->extend_height && extend_height) {
5052 struct pvr_transfer_wa_source *new_src =
5053 pvr_acquire_source(pass, src->src_offset, extend_height);
5054
5055 new_src->mappings[new_src->mapping_count] = src->mappings[k];
5056 new_src->src_offset = src->src_offset;
5057
5058 for (uint32_t l = k + 1; l < src->mapping_count; l++)
5059 src->mappings[l - 1] = src->mappings[l];
5060
5061 new_src->mapping_count++;
5062 src->mapping_count--;
5063 k--;
5064 }
5065 }
5066 }
5067 }
5068 }
5069 }
5070
5071 static bool
pvr_get_custom_mapping(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,uint32_t max_clip_rects,struct pvr_transfer_custom_mapping * custom_mapping)5072 pvr_get_custom_mapping(const struct pvr_device_info *dev_info,
5073 const struct pvr_transfer_cmd *transfer_cmd,
5074 uint32_t max_clip_rects,
5075 struct pvr_transfer_custom_mapping *custom_mapping)
5076 {
5077 const uint32_t dst_bpp =
5078 vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5079 const struct pvr_transfer_cmd_source *src = NULL;
5080 struct pvr_transfer_pass *pass;
5081 bool ret;
5082
5083 custom_mapping->max_clip_rects = max_clip_rects;
5084 custom_mapping->texel_unwind_src = 0U;
5085 custom_mapping->texel_unwind_dst = 0U;
5086 custom_mapping->texel_extend_src = 1U;
5087 custom_mapping->texel_extend_dst = 1U;
5088 custom_mapping->pass_count = 0U;
5089
5090 if (transfer_cmd->source_count > 1)
5091 return false;
5092
5093 custom_mapping->max_clip_size = PVR_MAX_CLIP_SIZE(dev_info);
5094
5095 ret = pvr_texel_unwind(dst_bpp,
5096 transfer_cmd->dst.dev_addr,
5097 false,
5098 1U,
5099 &custom_mapping->texel_unwind_dst);
5100 if (!ret) {
5101 custom_mapping->texel_extend_dst = dst_bpp / 8U;
5102 if (transfer_cmd->source_count > 0) {
5103 if (transfer_cmd->sources[0].surface.mem_layout ==
5104 PVR_MEMLAYOUT_LINEAR) {
5105 custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5106 } else if (transfer_cmd->sources[0].surface.mem_layout ==
5107 PVR_MEMLAYOUT_TWIDDLED &&
5108 transfer_cmd->sources[0].surface.height == 1U) {
5109 custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5110 }
5111 }
5112
5113 ret = pvr_texel_unwind(dst_bpp,
5114 transfer_cmd->dst.dev_addr,
5115 false,
5116 custom_mapping->texel_extend_dst,
5117 &custom_mapping->texel_unwind_dst);
5118 if (!ret)
5119 return false;
5120 }
5121
5122 if (transfer_cmd->source_count > 0) {
5123 src = &transfer_cmd->sources[0];
5124 const uint32_t src_bpp =
5125 vk_format_get_blocksizebits(src->surface.vk_format);
5126
5127 ret = pvr_is_surface_aligned(src->surface.dev_addr, true, src_bpp);
5128
5129 if (!ret && (src->surface.mem_layout == PVR_MEMLAYOUT_LINEAR ||
5130 src->surface.height == 1U)) {
5131 ret = pvr_texel_unwind(src_bpp,
5132 src->surface.dev_addr,
5133 true,
5134 custom_mapping->texel_extend_src,
5135 &custom_mapping->texel_unwind_src);
5136 }
5137
5138 if (!ret) {
5139 custom_mapping->texel_extend_src = dst_bpp / 8U;
5140 custom_mapping->texel_extend_dst = custom_mapping->texel_extend_src;
5141
5142 ret = pvr_texel_unwind(src_bpp,
5143 src->surface.dev_addr,
5144 true,
5145 custom_mapping->texel_extend_src,
5146 &custom_mapping->texel_unwind_src);
5147 }
5148
5149 if (!ret)
5150 return false;
5151 }
5152
5153 VkRect2D rect = transfer_cmd->scissor;
5154 assert(
5155 (rect.offset.x + rect.extent.width) <= custom_mapping->max_clip_size &&
5156 (rect.offset.y + rect.extent.height) <= custom_mapping->max_clip_size);
5157
5158 /* Texel extend only works with strided memory layout, because pixel width is
5159 * changed. Texel unwind only works with strided memory layout. 1D blits are
5160 * allowed.
5161 */
5162 if (src && src->surface.height > 1U &&
5163 (custom_mapping->texel_extend_src > 1U ||
5164 custom_mapping->texel_unwind_src > 0U) &&
5165 src->surface.mem_layout != PVR_MEMLAYOUT_LINEAR) {
5166 return false;
5167 }
5168
5169 /* Texel extend only works with strided memory layout, because pixel width is
5170 * changed. Texel unwind only works with strided memory layout. 1D blits are
5171 * allowed.
5172 */
5173 if ((custom_mapping->texel_extend_dst > 1U ||
5174 custom_mapping->texel_unwind_dst > 0U) &&
5175 transfer_cmd->dst.mem_layout != PVR_MEMLAYOUT_LINEAR &&
5176 transfer_cmd->dst.height > 1U) {
5177 return false;
5178 }
5179
5180 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_LINEAR) {
5181 custom_mapping->double_stride = !pvr_is_pbe_stride_aligned(
5182 transfer_cmd->dst.stride * custom_mapping->texel_extend_dst);
5183 }
5184
5185 if (custom_mapping->texel_unwind_src > 0U ||
5186 custom_mapping->texel_unwind_dst > 0U || custom_mapping->double_stride) {
5187 struct pvr_transfer_wa_source *wa_src;
5188 struct pvr_rect_mapping *mapping;
5189
5190 pass = pvr_acquire_pass(custom_mapping, 0U);
5191 wa_src = pvr_create_source(pass, 0U, false);
5192 mapping = pvr_create_mapping(wa_src);
5193
5194 if (transfer_cmd->source_count > 0) {
5195 *mapping = src->mappings[0U];
5196 } else {
5197 mapping->src_rect = transfer_cmd->scissor;
5198 mapping->dst_rect = transfer_cmd->scissor;
5199 }
5200 } else {
5201 return false;
5202 }
5203
5204 if (custom_mapping->texel_extend_src > 1U ||
5205 custom_mapping->texel_extend_dst > 1U) {
5206 pass->sources[0].mappings[0U].src_rect.offset.x *=
5207 (int32_t)custom_mapping->texel_extend_dst;
5208 pass->sources[0].mappings[0U].src_rect.extent.width *=
5209 (int32_t)custom_mapping->texel_extend_dst;
5210 pass->sources[0].mappings[0U].dst_rect.offset.x *=
5211 (int32_t)custom_mapping->texel_extend_dst;
5212 pass->sources[0].mappings[0U].dst_rect.extent.width *=
5213 (int32_t)custom_mapping->texel_extend_dst;
5214 }
5215
5216 if (transfer_cmd->source_count > 0) {
5217 pvr_generate_custom_mapping(transfer_cmd->sources[0].surface.stride,
5218 transfer_cmd->sources[0].surface.width,
5219 transfer_cmd->sources[0].surface.height,
5220 transfer_cmd->dst.stride,
5221 transfer_cmd->dst.width,
5222 transfer_cmd->dst.height,
5223 transfer_cmd->dst.mem_layout,
5224 custom_mapping);
5225 } else {
5226 pvr_generate_custom_mapping(0U,
5227 0U,
5228 0U,
5229 transfer_cmd->dst.stride,
5230 transfer_cmd->dst.width,
5231 transfer_cmd->dst.height,
5232 transfer_cmd->dst.mem_layout,
5233 custom_mapping);
5234 }
5235
5236 return true;
5237 }
5238
pvr_pbe_extend_rect(uint32_t texel_extend,VkRect2D * rect)5239 static void pvr_pbe_extend_rect(uint32_t texel_extend, VkRect2D *rect)
5240 {
5241 rect->offset.x *= texel_extend;
5242 rect->extent.width *= texel_extend;
5243 }
5244
pvr_pbe_rect_intersect(VkRect2D * rect_a,VkRect2D * rect_b)5245 static void pvr_pbe_rect_intersect(VkRect2D *rect_a, VkRect2D *rect_b)
5246 {
5247 rect_a->extent.width = MIN2(rect_a->offset.x + rect_a->extent.width,
5248 rect_b->offset.x + rect_b->extent.width) -
5249 MAX2(rect_a->offset.x, rect_b->offset.x);
5250 rect_a->offset.x = MAX2(rect_a->offset.x, rect_b->offset.x);
5251 rect_a->extent.height = MIN2(rect_a->offset.y + rect_a->extent.height,
5252 rect_b->offset.y + rect_b->extent.height) -
5253 MAX2(rect_a->offset.y, rect_b->offset.y);
5254 rect_a->offset.y = MAX2(rect_a->offset.y, rect_b->offset.y);
5255 }
5256
pvr_texel_extend_src_format(VkFormat vk_format)5257 static VkFormat pvr_texel_extend_src_format(VkFormat vk_format)
5258 {
5259 uint32_t bpp = vk_format_get_blocksizebits(vk_format);
5260 VkFormat ext_format;
5261
5262 switch (bpp) {
5263 case 16:
5264 ext_format = VK_FORMAT_R8G8_UINT;
5265 break;
5266 case 32:
5267 ext_format = VK_FORMAT_R8G8B8A8_UINT;
5268 break;
5269 case 48:
5270 ext_format = VK_FORMAT_R16G16B16_UINT;
5271 break;
5272 default:
5273 ext_format = VK_FORMAT_R8_UINT;
5274 break;
5275 }
5276
5277 return ext_format;
5278 }
5279
5280 static void
pvr_modify_command(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t pass_idx,struct pvr_transfer_cmd * transfer_cmd)5281 pvr_modify_command(struct pvr_transfer_custom_mapping *custom_mapping,
5282 uint32_t pass_idx,
5283 struct pvr_transfer_cmd *transfer_cmd)
5284 {
5285 struct pvr_transfer_pass *pass = &custom_mapping->passes[pass_idx];
5286 uint32_t bpp;
5287
5288 if (custom_mapping->texel_extend_src > 1U) {
5289 struct pvr_rect_mapping *mapping = &transfer_cmd->sources[0].mappings[0];
5290
5291 pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->dst_rect);
5292 pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->src_rect);
5293
5294 transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5295 transfer_cmd->dst.width *= custom_mapping->texel_extend_src;
5296 transfer_cmd->dst.stride *= custom_mapping->texel_extend_src;
5297 transfer_cmd->sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5298 transfer_cmd->sources[0].surface.width *=
5299 custom_mapping->texel_extend_src;
5300 transfer_cmd->sources[0].surface.stride *=
5301 custom_mapping->texel_extend_src;
5302 } else if (custom_mapping->texel_extend_dst > 1U) {
5303 VkRect2D max_clip = {
5304 .offset = { 0, 0 },
5305 .extent = { custom_mapping->max_clip_size,
5306 custom_mapping->max_clip_size },
5307 };
5308
5309 pvr_pbe_extend_rect(custom_mapping->texel_extend_dst,
5310 &transfer_cmd->scissor);
5311
5312 pvr_pbe_rect_intersect(&transfer_cmd->scissor, &max_clip);
5313
5314 if (transfer_cmd->source_count > 0) {
5315 transfer_cmd->sources[0].surface.width *=
5316 custom_mapping->texel_extend_dst;
5317 transfer_cmd->sources[0].surface.stride *=
5318 custom_mapping->texel_extend_dst;
5319
5320 transfer_cmd->sources[0].surface.vk_format =
5321 pvr_texel_extend_src_format(
5322 transfer_cmd->sources[0].surface.vk_format);
5323 }
5324
5325 transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5326 transfer_cmd->dst.width *= custom_mapping->texel_extend_dst;
5327 transfer_cmd->dst.stride *= custom_mapping->texel_extend_dst;
5328 }
5329
5330 if (custom_mapping->double_stride) {
5331 transfer_cmd->dst.width *= 2U;
5332 transfer_cmd->dst.stride *= 2U;
5333 }
5334
5335 if (custom_mapping->texel_unwind_src > 0U) {
5336 if (transfer_cmd->sources[0].surface.height == 1U) {
5337 transfer_cmd->sources[0].surface.width +=
5338 custom_mapping->texel_unwind_src;
5339 transfer_cmd->sources[0].surface.stride +=
5340 custom_mapping->texel_unwind_src;
5341 } else if (transfer_cmd->sources[0].surface.stride == 1U) {
5342 transfer_cmd->sources[0].surface.height +=
5343 custom_mapping->texel_unwind_src;
5344 } else {
5345 /* Increase source width by texel unwind. If texel unwind is less than
5346 * the distance between width and stride. The blit can be done with one
5347 * rectangle mapping, but the width of the surface needs be to
5348 * increased in case we sample from the area between width and stride.
5349 */
5350 transfer_cmd->sources[0].surface.width =
5351 MIN2(transfer_cmd->sources[0].surface.width +
5352 custom_mapping->texel_unwind_src,
5353 transfer_cmd->sources[0].surface.stride);
5354 }
5355 }
5356
5357 for (uint32_t i = 0U; i < pass->source_count; i++) {
5358 struct pvr_transfer_wa_source *src = &pass->sources[i];
5359
5360 if (i > 0)
5361 transfer_cmd->sources[i] = transfer_cmd->sources[0];
5362
5363 transfer_cmd->sources[i].mapping_count = src->mapping_count;
5364 for (uint32_t j = 0U; j < transfer_cmd->sources[i].mapping_count; j++)
5365 transfer_cmd->sources[i].mappings[j] = src->mappings[j];
5366
5367 if (src->extend_height)
5368 transfer_cmd->sources[i].surface.height += 1U;
5369
5370 transfer_cmd->sources[i].surface.width =
5371 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.width);
5372 transfer_cmd->sources[i].surface.height =
5373 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.height);
5374 transfer_cmd->sources[i].surface.stride =
5375 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.stride);
5376 }
5377
5378 if (transfer_cmd->dst.height == 1U) {
5379 transfer_cmd->dst.width =
5380 transfer_cmd->dst.stride + custom_mapping->texel_unwind_dst;
5381 transfer_cmd->dst.mem_layout = PVR_MEMLAYOUT_TWIDDLED;
5382 }
5383
5384 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED) {
5385 transfer_cmd->dst.width =
5386 MIN2((uint32_t)custom_mapping->max_clip_size, transfer_cmd->dst.width);
5387 transfer_cmd->dst.height = MIN2((uint32_t)custom_mapping->max_clip_size,
5388 transfer_cmd->dst.height);
5389 } else {
5390 transfer_cmd->dst.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->dst.width);
5391 }
5392
5393 if (transfer_cmd->source_count > 0) {
5394 for (uint32_t i = 0; i < pass->source_count; i++) {
5395 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
5396
5397 bpp = vk_format_get_blocksizebits(src->surface.vk_format);
5398
5399 src->surface.dev_addr.addr -=
5400 custom_mapping->texel_unwind_src * bpp / 8U;
5401 src->surface.dev_addr.addr += MAX2(src->surface.sample_count, 1U) *
5402 pass->sources[i].src_offset * bpp / 8U;
5403 }
5404 }
5405
5406 bpp = vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5407 transfer_cmd->dst.dev_addr.addr -=
5408 custom_mapping->texel_unwind_dst * bpp / 8U;
5409 transfer_cmd->dst.dev_addr.addr +=
5410 MAX2(transfer_cmd->dst.sample_count, 1U) * pass->dst_offset * bpp / 8U;
5411
5412 if (transfer_cmd->source_count > 0)
5413 transfer_cmd->source_count = pass->source_count;
5414 }
5415
5416 /* Route a copy_blit (FastScale HW) to a clip_blit (Fast2D HW).
5417 * Destination rectangle can be specified in dst_rect, or NULL to use existing.
5418 */
pvr_reroute_to_clip(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct VkRect2D * dst_rect,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5419 static VkResult pvr_reroute_to_clip(struct pvr_transfer_ctx *ctx,
5420 const struct pvr_transfer_cmd *transfer_cmd,
5421 const struct VkRect2D *dst_rect,
5422 struct pvr_transfer_prep_data *prep_data,
5423 uint32_t pass_idx,
5424 bool *finished_out)
5425 {
5426 struct pvr_transfer_cmd clip_transfer_cmd;
5427
5428 clip_transfer_cmd = *transfer_cmd;
5429 clip_transfer_cmd.flags |= PVR_TRANSFER_CMD_FLAGS_FAST2D;
5430
5431 if (transfer_cmd->source_count <= 1U) {
5432 if (dst_rect)
5433 clip_transfer_cmd.scissor = *dst_rect;
5434
5435 return pvr_3d_clip_blit(ctx,
5436 &clip_transfer_cmd,
5437 prep_data,
5438 pass_idx,
5439 finished_out);
5440 }
5441
5442 return vk_error(ctx->device, VK_ERROR_FORMAT_NOT_SUPPORTED);
5443 }
5444
pvr_3d_copy_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5445 static VkResult pvr_3d_copy_blit(struct pvr_transfer_ctx *ctx,
5446 struct pvr_transfer_cmd *transfer_cmd,
5447 struct pvr_transfer_prep_data *prep_data,
5448 uint32_t pass_idx,
5449 bool *finished_out)
5450 {
5451 const struct pvr_device_info *const dev_info =
5452 &ctx->device->pdevice->dev_info;
5453
5454 struct pvr_transfer_3d_state *state = &prep_data->state;
5455 struct pvr_transfer_cmd *active_cmd = transfer_cmd;
5456 struct pvr_transfer_cmd int_cmd;
5457 VkResult result;
5458
5459 state->dont_force_pbe = false;
5460 state->pass_idx = pass_idx;
5461
5462 pvr_transfer_set_filter(transfer_cmd, state);
5463
5464 if (transfer_cmd->source_count == 1U) {
5465 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[0];
5466
5467 /* Try to work out a condition to map pixel formats to RAW. That is only
5468 * possible if we don't perform any kind of 2D operation on the blit as we
5469 * don't know the actual pixel values - i.e. it has to be point sampled -
5470 * scaling doesn't matter as long as point sampled.
5471 */
5472 if (src->surface.vk_format == transfer_cmd->dst.vk_format &&
5473 state->filter[0] == PVR_FILTER_POINT &&
5474 src->surface.sample_count <= transfer_cmd->dst.sample_count &&
5475 (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) == 0U) {
5476 uint32_t bpp;
5477
5478 int_cmd = *transfer_cmd;
5479 active_cmd = &int_cmd;
5480 bpp = vk_format_get_blocksizebits(int_cmd.dst.vk_format);
5481
5482 if (bpp > 0U) {
5483 switch (bpp) {
5484 case 8U:
5485 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5486 break;
5487 case 16U:
5488 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8_UINT;
5489 break;
5490 case 24U:
5491 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8B8_UINT;
5492 break;
5493 case 32U:
5494 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32_UINT;
5495 break;
5496 case 48U:
5497 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R16G16B16_UINT;
5498 break;
5499 case 64U:
5500 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32_UINT;
5501 break;
5502 case 96U:
5503 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32B32_UINT;
5504 break;
5505 case 128U:
5506 int_cmd.sources[0].surface.vk_format =
5507 VK_FORMAT_R32G32B32A32_UINT;
5508 break;
5509 default:
5510 active_cmd = transfer_cmd;
5511 break;
5512 }
5513 }
5514
5515 int_cmd.dst.vk_format = int_cmd.sources[0].surface.vk_format;
5516 }
5517 }
5518
5519 if (pass_idx == 0U) {
5520 pvr_get_custom_mapping(dev_info, active_cmd, 3U, &state->custom_mapping);
5521
5522 if (state->custom_mapping.texel_extend_src > 1U)
5523 state->custom_mapping.texel_extend_dst = 1U;
5524 }
5525
5526 if (state->custom_mapping.pass_count > 0U) {
5527 struct pvr_transfer_pass *pass = &state->custom_mapping.passes[pass_idx];
5528
5529 if (active_cmd != &int_cmd) {
5530 int_cmd = *active_cmd;
5531 active_cmd = &int_cmd;
5532 }
5533
5534 state->custom_filter = true;
5535
5536 pvr_modify_command(&state->custom_mapping, pass_idx, active_cmd);
5537
5538 if (state->custom_mapping.double_stride ||
5539 pass->sources[0].mapping_count > 1U || pass->source_count > 1U) {
5540 result =
5541 pvr_3d_clip_blit(ctx, active_cmd, prep_data, pass_idx, finished_out);
5542 } else {
5543 struct pvr_rect_mapping *mappings = &pass->sources[0].mappings[0U];
5544
5545 mappings[0U].src_rect.offset.x /=
5546 MAX2(1U, state->custom_mapping.texel_extend_dst);
5547 mappings[0U].src_rect.extent.width /=
5548 MAX2(1U, state->custom_mapping.texel_extend_dst);
5549
5550 if (int_cmd.source_count > 0) {
5551 for (uint32_t i = 0U; i < pass->sources[0].mapping_count; i++)
5552 active_cmd->sources[0].mappings[i] = mappings[i];
5553 }
5554
5555 active_cmd->scissor = mappings[0U].dst_rect;
5556
5557 result = pvr_3d_copy_blit_core(ctx,
5558 active_cmd,
5559 prep_data,
5560 pass_idx,
5561 finished_out);
5562 }
5563
5564 return result;
5565 }
5566
5567 /* Route DS merge blits to Clip blit. Background object is used to preserve
5568 * the unmerged channel.
5569 */
5570 if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) != 0U) {
5571 /* PBE byte mask could be used for DS merge with FastScale. Clearing the
5572 * other channel on a DS merge requires Clip blit.
5573 */
5574 if (!PVR_HAS_ERN(dev_info, 42064) ||
5575 ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U)) {
5576 return pvr_reroute_to_clip(ctx,
5577 active_cmd,
5578 &active_cmd->scissor,
5579 prep_data,
5580 pass_idx,
5581 finished_out);
5582 }
5583 }
5584
5585 return pvr_3d_copy_blit_core(ctx,
5586 active_cmd,
5587 prep_data,
5588 pass_idx,
5589 finished_out);
5590 }
5591
5592 /* TODO: This should be generated in csbgen. */
5593 #define TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK \
5594 BITFIELD64_RANGE(2, (53 - 16) + 1)
5595
pvr_validate_source_addr(pvr_dev_addr_t addr)5596 static bool pvr_validate_source_addr(pvr_dev_addr_t addr)
5597 {
5598 if (!pvr_dev_addr_is_aligned(
5599 addr,
5600 ROGUE_TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT)) {
5601 return false;
5602 }
5603
5604 if (addr.addr & ~TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK)
5605 return false;
5606
5607 return true;
5608 }
5609
pvr_supports_texel_unwind(struct pvr_transfer_cmd * transfer_cmd)5610 static bool pvr_supports_texel_unwind(struct pvr_transfer_cmd *transfer_cmd)
5611 {
5612 struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
5613
5614 if (transfer_cmd->source_count > 1)
5615 return false;
5616
5617 if (transfer_cmd->source_count) {
5618 struct pvr_transfer_cmd_surface *src = &transfer_cmd->sources[0].surface;
5619
5620 if (src->height == 1) {
5621 if (src->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5622 src->mem_layout != PVR_MEMLAYOUT_TWIDDLED &&
5623 src->mem_layout != PVR_MEMLAYOUT_3DTWIDDLED) {
5624 return false;
5625 }
5626 } else if (src->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
5627 src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
5628 if (!pvr_validate_source_addr(src->dev_addr))
5629 return false;
5630 } else {
5631 if (src->mem_layout != PVR_MEMLAYOUT_LINEAR)
5632 return false;
5633 }
5634 }
5635
5636 if (dst->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5637 dst->mem_layout != PVR_MEMLAYOUT_TWIDDLED) {
5638 return false;
5639 }
5640
5641 return true;
5642 }
5643
pvr_3d_validate_addr(struct pvr_transfer_cmd * transfer_cmd)5644 static bool pvr_3d_validate_addr(struct pvr_transfer_cmd *transfer_cmd)
5645 {
5646 if (!pvr_supports_texel_unwind(transfer_cmd)) {
5647 return pvr_dev_addr_is_aligned(
5648 transfer_cmd->dst.dev_addr,
5649 ROGUE_PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT);
5650 }
5651
5652 return true;
5653 }
5654
5655 static void
pvr_submit_info_stream_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_prep_data * prep_data,struct pvr_winsys_transfer_cmd * cmd)5656 pvr_submit_info_stream_init(struct pvr_transfer_ctx *ctx,
5657 struct pvr_transfer_prep_data *prep_data,
5658 struct pvr_winsys_transfer_cmd *cmd)
5659 {
5660 const struct pvr_winsys_transfer_regs *const regs = &prep_data->state.regs;
5661 const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
5662 const struct pvr_device_info *const dev_info = &pdevice->dev_info;
5663
5664 uint32_t *stream_ptr = (uint32_t *)cmd->fw_stream;
5665 uint32_t *stream_len_ptr = stream_ptr;
5666
5667 /* Leave space for stream header. */
5668 stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
5669
5670 *(uint64_t *)stream_ptr = regs->pds_bgnd0_base;
5671 stream_ptr += pvr_cmd_length(CR_PDS_BGRND0_BASE);
5672
5673 *(uint64_t *)stream_ptr = regs->pds_bgnd1_base;
5674 stream_ptr += pvr_cmd_length(CR_PDS_BGRND1_BASE);
5675
5676 *(uint64_t *)stream_ptr = regs->pds_bgnd3_sizeinfo;
5677 stream_ptr += pvr_cmd_length(CR_PDS_BGRND3_SIZEINFO);
5678
5679 *(uint64_t *)stream_ptr = regs->isp_mtile_base;
5680 stream_ptr += pvr_cmd_length(CR_ISP_MTILE_BASE);
5681
5682 STATIC_ASSERT(ARRAY_SIZE(regs->pbe_wordx_mrty) == 9U);
5683 STATIC_ASSERT(sizeof(regs->pbe_wordx_mrty[0]) == sizeof(uint64_t));
5684 memcpy(stream_ptr, regs->pbe_wordx_mrty, sizeof(regs->pbe_wordx_mrty));
5685 stream_ptr += 9U * 2U;
5686
5687 *stream_ptr = regs->isp_bgobjvals;
5688 stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
5689
5690 *stream_ptr = regs->usc_pixel_output_ctrl;
5691 stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
5692
5693 *stream_ptr = regs->usc_clear_register0;
5694 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5695
5696 *stream_ptr = regs->usc_clear_register1;
5697 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5698
5699 *stream_ptr = regs->usc_clear_register2;
5700 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5701
5702 *stream_ptr = regs->usc_clear_register3;
5703 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5704
5705 *stream_ptr = regs->isp_mtile_size;
5706 stream_ptr += pvr_cmd_length(CR_ISP_MTILE_SIZE);
5707
5708 *stream_ptr = regs->isp_render_origin;
5709 stream_ptr += pvr_cmd_length(CR_ISP_RENDER_ORIGIN);
5710
5711 *stream_ptr = regs->isp_ctl;
5712 stream_ptr += pvr_cmd_length(CR_ISP_CTL);
5713
5714 *stream_ptr = regs->isp_aa;
5715 stream_ptr += pvr_cmd_length(CR_ISP_AA);
5716
5717 *stream_ptr = regs->event_pixel_pds_info;
5718 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
5719
5720 *stream_ptr = regs->event_pixel_pds_code;
5721 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_CODE);
5722
5723 *stream_ptr = regs->event_pixel_pds_data;
5724 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
5725
5726 *stream_ptr = regs->isp_render;
5727 stream_ptr += pvr_cmd_length(CR_ISP_RENDER);
5728
5729 *stream_ptr = regs->isp_rgn;
5730 stream_ptr++;
5731
5732 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
5733 *stream_ptr = regs->frag_screen;
5734 stream_ptr++;
5735 }
5736
5737 cmd->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)cmd->fw_stream;
5738 assert(cmd->fw_stream_len <= ARRAY_SIZE(cmd->fw_stream));
5739
5740 pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
5741 value.length = cmd->fw_stream_len;
5742 }
5743 }
5744
5745 static void
pvr_submit_info_flags_init(const struct pvr_device_info * const dev_info,const struct pvr_transfer_prep_data * const prep_data,struct pvr_winsys_transfer_cmd_flags * flags)5746 pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
5747 const struct pvr_transfer_prep_data *const prep_data,
5748 struct pvr_winsys_transfer_cmd_flags *flags)
5749 {
5750 *flags = prep_data->flags;
5751 flags->use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support);
5752 }
5753
pvr_transfer_job_ws_submit_info_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct pvr_winsys_transfer_submit_info * submit_info)5754 static void pvr_transfer_job_ws_submit_info_init(
5755 struct pvr_transfer_ctx *ctx,
5756 struct pvr_transfer_submit *submit,
5757 struct vk_sync *wait,
5758 struct pvr_winsys_transfer_submit_info *submit_info)
5759 {
5760 const struct pvr_device *const device = ctx->device;
5761 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
5762
5763 submit_info->frame_num = device->global_queue_present_count;
5764 submit_info->job_num = device->global_cmd_buffer_submit_count;
5765 submit_info->wait = wait;
5766 submit_info->cmd_count = submit->prep_count;
5767
5768 for (uint32_t i = 0U; i < submit->prep_count; i++) {
5769 struct pvr_winsys_transfer_cmd *const cmd = &submit_info->cmds[i];
5770 struct pvr_transfer_prep_data *prep_data = &submit->prep_array[i];
5771
5772 pvr_submit_info_stream_init(ctx, prep_data, cmd);
5773 pvr_submit_info_flags_init(dev_info, prep_data, &cmd->flags);
5774 }
5775 }
5776
pvr_submit_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct vk_sync * signal_sync)5777 static VkResult pvr_submit_transfer(struct pvr_transfer_ctx *ctx,
5778 struct pvr_transfer_submit *submit,
5779 struct vk_sync *wait,
5780 struct vk_sync *signal_sync)
5781 {
5782 struct pvr_winsys_transfer_submit_info submit_info;
5783
5784 pvr_transfer_job_ws_submit_info_init(ctx, submit, wait, &submit_info);
5785
5786 return ctx->device->ws->ops->transfer_submit(ctx->ws_ctx,
5787 &submit_info,
5788 &ctx->device->pdevice->dev_info,
5789 signal_sync);
5790 }
5791
pvr_queue_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct vk_sync * wait,struct vk_sync * signal_sync)5792 static VkResult pvr_queue_transfer(struct pvr_transfer_ctx *ctx,
5793 struct pvr_transfer_cmd *transfer_cmd,
5794 struct vk_sync *wait,
5795 struct vk_sync *signal_sync)
5796 {
5797 struct pvr_transfer_prep_data *prep_data = NULL;
5798 struct pvr_transfer_prep_data *prev_prep_data;
5799 struct pvr_transfer_submit submit = { 0U };
5800 bool finished = false;
5801 uint32_t pass = 0U;
5802 VkResult result;
5803
5804 /* Transfer queue might decide to do a blit in multiple passes. When the
5805 * prepare doesn't set the finished flag this code will keep calling the
5806 * prepare with increasing pass. If queued transfers are submitted from
5807 * here we submit them straight away. That's why we only need a single
5808 * prepare for the blit rather then one for each pass. Otherwise we insert
5809 * each prepare into the prepare array. When the client does blit batching
5810 * and we split the blit into multiple passes each pass in each queued
5811 * transfer adds one more prepare. Thus the prepare array after 2
5812 * pvr_queue_transfer calls might look like:
5813 *
5814 * +------+------++-------+-------+-------+
5815 * |B0/P0 |B0/P1 || B1/P0 | B1/P1 | B1/P2 |
5816 * +------+------++-------+-------+-------+
5817 * F S/U F S/U
5818 *
5819 * Bn/Pm : nth blit (queue transfer call) / mth prepare
5820 * F : fence point
5821 * S/U : update / server sync update point
5822 */
5823
5824 while (!finished) {
5825 prev_prep_data = prep_data;
5826 prep_data = &submit.prep_array[submit.prep_count++];
5827
5828 /* Clear down the memory before we write to this prep. */
5829 memset(prep_data, 0U, sizeof(*prep_data));
5830
5831 if (pass == 0U) {
5832 if (!pvr_3d_validate_addr(transfer_cmd))
5833 return vk_error(ctx->device, VK_ERROR_FEATURE_NOT_PRESENT);
5834 } else {
5835 /* Transfer queue workarounds could use more than one pass with 3D
5836 * path.
5837 */
5838 prep_data->state = prev_prep_data->state;
5839 }
5840
5841 if (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D) {
5842 result =
5843 pvr_3d_clip_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5844 } else {
5845 result =
5846 pvr_3d_copy_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5847 }
5848 if (result != VK_SUCCESS)
5849 return result;
5850
5851 /* Submit if we have finished the blit or if we are out of prepares. */
5852 if (finished || submit.prep_count == ARRAY_SIZE(submit.prep_array)) {
5853 result = pvr_submit_transfer(ctx,
5854 &submit,
5855 wait,
5856 finished ? signal_sync : NULL);
5857 if (result != VK_SUCCESS)
5858 return result;
5859
5860 /* Check if we need to reset prep_count. */
5861 if (submit.prep_count == ARRAY_SIZE(submit.prep_array))
5862 submit.prep_count = 0U;
5863 }
5864
5865 pass++;
5866 }
5867
5868 return VK_SUCCESS;
5869 }
5870
pvr_transfer_job_submit(struct pvr_transfer_ctx * ctx,struct pvr_sub_cmd_transfer * sub_cmd,struct vk_sync * wait_sync,struct vk_sync * signal_sync)5871 VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
5872 struct pvr_sub_cmd_transfer *sub_cmd,
5873 struct vk_sync *wait_sync,
5874 struct vk_sync *signal_sync)
5875 {
5876 list_for_each_entry_safe (struct pvr_transfer_cmd,
5877 transfer_cmd,
5878 sub_cmd->transfer_cmds,
5879 link) {
5880 /* The fw guarantees that any kick on the same context will be
5881 * synchronized in submission order. This means only the first kick must
5882 * wait, and only the last kick need signal.
5883 */
5884 struct vk_sync *first_cmd_wait_sync = NULL;
5885 struct vk_sync *last_cmd_signal_sync = NULL;
5886 VkResult result;
5887
5888 if (list_first_entry(sub_cmd->transfer_cmds,
5889 struct pvr_transfer_cmd,
5890 link) == transfer_cmd) {
5891 first_cmd_wait_sync = wait_sync;
5892 }
5893
5894 if (list_last_entry(sub_cmd->transfer_cmds,
5895 struct pvr_transfer_cmd,
5896 link) == transfer_cmd) {
5897 last_cmd_signal_sync = signal_sync;
5898 }
5899
5900 result = pvr_queue_transfer(ctx,
5901 transfer_cmd,
5902 first_cmd_wait_sync,
5903 last_cmd_signal_sync);
5904 if (result != VK_SUCCESS)
5905 return result;
5906 }
5907
5908 return VK_SUCCESS;
5909 }
5910