1 /*
2 * Copyright 2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Jonathan Marek <jonathan@marek.ca>
7 */
8
9 #ifndef TU_UTIL_H
10 #define TU_UTIL_H
11
12 #include "tu_common.h"
13
14 #include "util/macros.h"
15 #include "util/u_math.h"
16 #include "util/format/u_format_pack.h"
17 #include "util/format/u_format_zs.h"
18 #include "compiler/shader_enums.h"
19
20 #include "vk_util.h"
21
22 #define TU_DEBUG(name) unlikely(tu_env.debug & TU_DEBUG_##name)
23
24 enum tu_debug_flags
25 {
26 TU_DEBUG_STARTUP = 1 << 0,
27 TU_DEBUG_NIR = 1 << 1,
28 TU_DEBUG_NOBIN = 1 << 3,
29 TU_DEBUG_SYSMEM = 1 << 4,
30 TU_DEBUG_FORCEBIN = 1 << 5,
31 TU_DEBUG_NOUBWC = 1 << 6,
32 TU_DEBUG_NOMULTIPOS = 1 << 7,
33 TU_DEBUG_NOLRZ = 1 << 8,
34 TU_DEBUG_PERFC = 1 << 9,
35 TU_DEBUG_FLUSHALL = 1 << 10,
36 TU_DEBUG_SYNCDRAW = 1 << 11,
37 TU_DEBUG_PUSH_CONSTS_PER_STAGE = 1 << 12,
38 TU_DEBUG_GMEM = 1 << 13,
39 TU_DEBUG_RAST_ORDER = 1 << 14,
40 TU_DEBUG_UNALIGNED_STORE = 1 << 15,
41 TU_DEBUG_LAYOUT = 1 << 16,
42 TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 17,
43 TU_DEBUG_PERF = 1 << 18,
44 TU_DEBUG_NOLRZFC = 1 << 19,
45 TU_DEBUG_DYNAMIC = 1 << 20,
46 TU_DEBUG_BOS = 1 << 21,
47 TU_DEBUG_3D_LOAD = 1 << 22,
48 TU_DEBUG_FDM = 1 << 23,
49 TU_DEBUG_NOCONFORM = 1 << 24,
50 TU_DEBUG_RD = 1 << 25,
51 TU_DEBUG_HIPRIO = 1 << 26,
52 TU_DEBUG_NO_CONCURRENT_RESOLVES = 1 << 27,
53 TU_DEBUG_NO_CONCURRENT_UNRESOLVES = 1 << 28,
54 };
55
56 struct tu_env {
57 uint32_t debug;
58 };
59
60 extern struct tu_env tu_env;
61
62 void
63 tu_env_init(void);
64
65 /* Whenever we generate an error, pass it through this function. Useful for
66 * debugging, where we can break on it. Only call at error site, not when
67 * propagating errors. Might be useful to plug in a stack trace here.
68 */
69
70 VkResult
71 __vk_startup_errorf(struct tu_instance *instance,
72 VkResult error,
73 const char *file,
74 int line,
75 const char *format,
76 ...) PRINTFLIKE(5, 6);
77
78 /* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
79 * build.
80 */
81 #define vk_startup_errorf(instance, error, format, ...) \
82 __vk_startup_errorf(instance, error, \
83 __FILE__, __LINE__, format, ##__VA_ARGS__)
84
85 void
86 __tu_finishme(const char *file, int line, const char *format, ...)
87 PRINTFLIKE(3, 4);
88
89 /**
90 * Print a FINISHME message, including its source location.
91 */
92 #define tu_finishme(format, ...) \
93 do { \
94 static bool reported = false; \
95 if (!reported) { \
96 __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
97 reported = true; \
98 } \
99 } while (0)
100
101 #define tu_stub() \
102 do { \
103 tu_finishme("stub %s", __func__); \
104 } while (0)
105
106 void
107 tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
108 const struct tu_device *device,
109 const struct tu_render_pass *pass);
110
111 #define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
112
113 #define tu_foreach_stage(stage, stage_bits) \
114 for (gl_shader_stage stage, \
115 __tmp = (gl_shader_stage) ((stage_bits) &TU_STAGE_MASK); \
116 stage = (gl_shader_stage) (__builtin_ffs(__tmp) - 1), __tmp; \
117 __tmp = (gl_shader_stage) (__tmp & ~(1 << (stage))))
118
119 static inline enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples)120 tu_msaa_samples(uint32_t samples)
121 {
122 assert(__builtin_popcount(samples) == 1);
123 return (enum a3xx_msaa_samples) util_logbase2(samples);
124 }
125
126 static inline uint32_t
tu6_stage2opcode(gl_shader_stage stage)127 tu6_stage2opcode(gl_shader_stage stage)
128 {
129 if (stage == MESA_SHADER_FRAGMENT || stage == MESA_SHADER_COMPUTE)
130 return CP_LOAD_STATE6_FRAG;
131 return CP_LOAD_STATE6_GEOM;
132 }
133
134 static inline enum a6xx_state_block
tu6_stage2texsb(gl_shader_stage stage)135 tu6_stage2texsb(gl_shader_stage stage)
136 {
137 return (enum a6xx_state_block) (SB6_VS_TEX + stage);
138 }
139
140 static inline enum a6xx_state_block
tu6_stage2shadersb(gl_shader_stage stage)141 tu6_stage2shadersb(gl_shader_stage stage)
142 {
143 return (enum a6xx_state_block) (SB6_VS_SHADER + stage);
144 }
145
146 static inline enum a3xx_rop_code
tu6_rop(VkLogicOp op)147 tu6_rop(VkLogicOp op)
148 {
149 /* note: hw enum matches the VK enum, but with the 4 bits reversed */
150 static const enum a3xx_rop_code lookup[] = {
151 [VK_LOGIC_OP_CLEAR] = ROP_CLEAR,
152 [VK_LOGIC_OP_AND] = ROP_AND,
153 [VK_LOGIC_OP_AND_REVERSE] = ROP_AND_REVERSE,
154 [VK_LOGIC_OP_COPY] = ROP_COPY,
155 [VK_LOGIC_OP_AND_INVERTED] = ROP_AND_INVERTED,
156 [VK_LOGIC_OP_NO_OP] = ROP_NOOP,
157 [VK_LOGIC_OP_XOR] = ROP_XOR,
158 [VK_LOGIC_OP_OR] = ROP_OR,
159 [VK_LOGIC_OP_NOR] = ROP_NOR,
160 [VK_LOGIC_OP_EQUIVALENT] = ROP_EQUIV,
161 [VK_LOGIC_OP_INVERT] = ROP_INVERT,
162 [VK_LOGIC_OP_OR_REVERSE] = ROP_OR_REVERSE,
163 [VK_LOGIC_OP_COPY_INVERTED] = ROP_COPY_INVERTED,
164 [VK_LOGIC_OP_OR_INVERTED] = ROP_OR_INVERTED,
165 [VK_LOGIC_OP_NAND] = ROP_NAND,
166 [VK_LOGIC_OP_SET] = ROP_SET,
167 };
168 assert(op < ARRAY_SIZE(lookup));
169 return lookup[op];
170 }
171
172 static inline bool
tu6_primtype_line(enum pc_di_primtype type)173 tu6_primtype_line(enum pc_di_primtype type)
174 {
175 switch(type) {
176 case DI_PT_LINELIST:
177 case DI_PT_LINESTRIP:
178 case DI_PT_LINE_ADJ:
179 case DI_PT_LINESTRIP_ADJ:
180 return true;
181 default:
182 return false;
183 }
184 }
185
186 static inline bool
tu6_primtype_patches(enum pc_di_primtype type)187 tu6_primtype_patches(enum pc_di_primtype type)
188 {
189 return type >= DI_PT_PATCHES0 && type <= DI_PT_PATCHES31;
190 }
191
192 static inline enum pc_di_primtype
tu6_primtype(VkPrimitiveTopology topology)193 tu6_primtype(VkPrimitiveTopology topology)
194 {
195 static const enum pc_di_primtype lookup[] = {
196 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = DI_PT_POINTLIST,
197 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = DI_PT_LINELIST,
198 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = DI_PT_LINESTRIP,
199 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = DI_PT_TRILIST,
200 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
201 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = DI_PT_TRIFAN,
202 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = DI_PT_LINE_ADJ,
203 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = DI_PT_LINESTRIP_ADJ,
204 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = DI_PT_TRI_ADJ,
205 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = DI_PT_TRISTRIP_ADJ,
206 /* Return PATCH0 and update in tu_pipeline_builder_parse_tessellation */
207 [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = DI_PT_PATCHES0,
208 };
209 assert(topology < ARRAY_SIZE(lookup));
210 return lookup[topology];
211 }
212
213 static inline enum adreno_compare_func
tu6_compare_func(VkCompareOp op)214 tu6_compare_func(VkCompareOp op)
215 {
216 return (enum adreno_compare_func) op;
217 }
218
219 static inline enum adreno_stencil_op
tu6_stencil_op(VkStencilOp op)220 tu6_stencil_op(VkStencilOp op)
221 {
222 return (enum adreno_stencil_op) op;
223 }
224
225 static inline enum adreno_rb_blend_factor
tu6_blend_factor(VkBlendFactor factor)226 tu6_blend_factor(VkBlendFactor factor)
227 {
228 static const enum adreno_rb_blend_factor lookup[] = {
229 [VK_BLEND_FACTOR_ZERO] = FACTOR_ZERO,
230 [VK_BLEND_FACTOR_ONE] = FACTOR_ONE,
231 [VK_BLEND_FACTOR_SRC_COLOR] = FACTOR_SRC_COLOR,
232 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = FACTOR_ONE_MINUS_SRC_COLOR,
233 [VK_BLEND_FACTOR_DST_COLOR] = FACTOR_DST_COLOR,
234 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = FACTOR_ONE_MINUS_DST_COLOR,
235 [VK_BLEND_FACTOR_SRC_ALPHA] = FACTOR_SRC_ALPHA,
236 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = FACTOR_ONE_MINUS_SRC_ALPHA,
237 [VK_BLEND_FACTOR_DST_ALPHA] = FACTOR_DST_ALPHA,
238 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = FACTOR_ONE_MINUS_DST_ALPHA,
239 [VK_BLEND_FACTOR_CONSTANT_COLOR] = FACTOR_CONSTANT_COLOR,
240 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= FACTOR_ONE_MINUS_CONSTANT_COLOR,
241 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = FACTOR_CONSTANT_ALPHA,
242 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= FACTOR_ONE_MINUS_CONSTANT_ALPHA,
243 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = FACTOR_SRC_ALPHA_SATURATE,
244 [VK_BLEND_FACTOR_SRC1_COLOR] = FACTOR_SRC1_COLOR,
245 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = FACTOR_ONE_MINUS_SRC1_COLOR,
246 [VK_BLEND_FACTOR_SRC1_ALPHA] = FACTOR_SRC1_ALPHA,
247 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = FACTOR_ONE_MINUS_SRC1_ALPHA,
248 };
249 assert(factor < ARRAY_SIZE(lookup));
250 return lookup[factor];
251 }
252
253 static inline bool
tu_blend_factor_is_dual_src(VkBlendFactor factor)254 tu_blend_factor_is_dual_src(VkBlendFactor factor)
255 {
256 switch (factor) {
257 case VK_BLEND_FACTOR_SRC1_COLOR:
258 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
259 case VK_BLEND_FACTOR_SRC1_ALPHA:
260 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
261 return true;
262 default:
263 return false;
264 }
265 }
266
267 static inline enum a3xx_rb_blend_opcode
tu6_blend_op(VkBlendOp op)268 tu6_blend_op(VkBlendOp op)
269 {
270 return (enum a3xx_rb_blend_opcode) op;
271 }
272
273 static inline enum a6xx_tex_type
tu6_tex_type(VkImageViewType type,bool storage)274 tu6_tex_type(VkImageViewType type, bool storage)
275 {
276 switch (type) {
277 default:
278 case VK_IMAGE_VIEW_TYPE_1D:
279 case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
280 return A6XX_TEX_1D;
281 case VK_IMAGE_VIEW_TYPE_2D:
282 case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
283 return A6XX_TEX_2D;
284 case VK_IMAGE_VIEW_TYPE_3D:
285 return A6XX_TEX_3D;
286 case VK_IMAGE_VIEW_TYPE_CUBE:
287 case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
288 return storage ? A6XX_TEX_2D : A6XX_TEX_CUBE;
289 }
290 }
291
292 static inline enum a6xx_tex_clamp
tu6_tex_wrap(VkSamplerAddressMode address_mode)293 tu6_tex_wrap(VkSamplerAddressMode address_mode)
294 {
295 static const enum a6xx_tex_clamp lookup[] = {
296 [VK_SAMPLER_ADDRESS_MODE_REPEAT] = A6XX_TEX_REPEAT,
297 [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = A6XX_TEX_MIRROR_REPEAT,
298 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = A6XX_TEX_CLAMP_TO_EDGE,
299 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = A6XX_TEX_CLAMP_TO_BORDER,
300 [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = A6XX_TEX_MIRROR_CLAMP,
301 };
302 assert(address_mode < ARRAY_SIZE(lookup));
303 return lookup[address_mode];
304 }
305
306 static inline enum a6xx_tex_filter
tu6_tex_filter(VkFilter filter,unsigned aniso)307 tu6_tex_filter(VkFilter filter, unsigned aniso)
308 {
309 switch (filter) {
310 case VK_FILTER_NEAREST:
311 return A6XX_TEX_NEAREST;
312 case VK_FILTER_LINEAR:
313 return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
314 case VK_FILTER_CUBIC_EXT:
315 return A6XX_TEX_CUBIC;
316 default:
317 unreachable("illegal texture filter");
318 break;
319 }
320 }
321
322 static inline enum a6xx_reduction_mode
tu6_reduction_mode(VkSamplerReductionMode reduction_mode)323 tu6_reduction_mode(VkSamplerReductionMode reduction_mode)
324 {
325 return (enum a6xx_reduction_mode) reduction_mode;
326 }
327
328 static inline enum a6xx_depth_format
tu6_pipe2depth(VkFormat format)329 tu6_pipe2depth(VkFormat format)
330 {
331 switch (format) {
332 case VK_FORMAT_D16_UNORM:
333 return DEPTH6_16;
334 case VK_FORMAT_X8_D24_UNORM_PACK32:
335 case VK_FORMAT_D24_UNORM_S8_UINT:
336 return DEPTH6_24_8;
337 case VK_FORMAT_D32_SFLOAT:
338 case VK_FORMAT_D32_SFLOAT_S8_UINT:
339 case VK_FORMAT_S8_UINT:
340 return DEPTH6_32;
341 default:
342 return DEPTH6_NONE;
343 }
344 }
345
346 static inline enum a6xx_polygon_mode
tu6_polygon_mode(VkPolygonMode mode)347 tu6_polygon_mode(VkPolygonMode mode)
348 {
349 switch (mode) {
350 case VK_POLYGON_MODE_POINT:
351 return POLYMODE6_POINTS;
352 case VK_POLYGON_MODE_LINE:
353 return POLYMODE6_LINES;
354 case VK_POLYGON_MODE_FILL:
355 return POLYMODE6_TRIANGLES;
356 default:
357 unreachable("bad polygon mode");
358 }
359 }
360
361 struct bcolor_entry {
362 alignas(128) uint32_t fp32[4];
363 uint64_t ui16;
364 uint64_t si16;
365 uint64_t fp16;
366 uint16_t rgb565;
367 uint16_t rgb5a1;
368 uint16_t rgba4;
369 uint8_t __pad0[2];
370 uint32_t ui8;
371 uint32_t si8;
372 uint32_t rgb10a2;
373 uint32_t z24; /* also s8? */
374 uint64_t srgb;
375 uint8_t __pad1[56];
376 };
377 static_assert(alignof(struct bcolor_entry) == 128, "");
378
379 /* vulkan does not want clamping of integer clear values, differs from u_format
380 * see spec for VkClearColorValue
381 */
382 static inline void
pack_int8(uint32_t * dst,const uint32_t * val)383 pack_int8(uint32_t *dst, const uint32_t *val)
384 {
385 *dst = (val[0] & 0xff) |
386 (val[1] & 0xff) << 8 |
387 (val[2] & 0xff) << 16 |
388 (val[3] & 0xff) << 24;
389 }
390
391 static inline void
pack_int10_2(uint32_t * dst,const uint32_t * val)392 pack_int10_2(uint32_t *dst, const uint32_t *val)
393 {
394 *dst = (val[0] & 0x3ff) |
395 (val[1] & 0x3ff) << 10 |
396 (val[2] & 0x3ff) << 20 |
397 (val[3] & 0x3) << 30;
398 }
399
400 static inline void
pack_int16(uint32_t * dst,const uint32_t * val)401 pack_int16(uint32_t *dst, const uint32_t *val)
402 {
403 dst[0] = (val[0] & 0xffff) |
404 (val[1] & 0xffff) << 16;
405 dst[1] = (val[2] & 0xffff) |
406 (val[3] & 0xffff) << 16;
407 }
408
409 static inline void
tu6_pack_border_color(struct bcolor_entry * bcolor,const VkClearColorValue * val,bool is_int)410 tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val, bool is_int)
411 {
412 memcpy(bcolor->fp32, val, 4 * sizeof(float));
413 if (is_int) {
414 pack_int16((uint32_t*) &bcolor->fp16, val->uint32);
415 return;
416 }
417 #define PACK_F(x, type) util_format_##type##_pack_rgba_float \
418 ( (uint8_t*) (&bcolor->x), 0, val->float32, 0, 1, 1)
419 PACK_F(ui16, r16g16b16a16_unorm);
420 PACK_F(si16, r16g16b16a16_snorm);
421 PACK_F(fp16, r16g16b16a16_float);
422 PACK_F(rgb565, r5g6b5_unorm);
423 PACK_F(rgb5a1, r5g5b5a1_unorm);
424 PACK_F(rgba4, r4g4b4a4_unorm);
425 PACK_F(ui8, r8g8b8a8_unorm);
426 PACK_F(si8, r8g8b8a8_snorm);
427 PACK_F(rgb10a2, r10g10b10a2_unorm);
428 util_format_z24x8_unorm_pack_z_float((uint8_t*) &bcolor->z24,
429 0, val->float32, 0, 1, 1);
430 PACK_F(srgb, r16g16b16a16_float); /* TODO: clamp? */
431 #undef PACK_F
432 }
433
434 void
435 tu_dbg_log_gmem_load_store_skips(struct tu_device *device);
436
437 #define perf_debug(device, fmt, ...) do { \
438 if (TU_DEBUG(PERF)) \
439 mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__); \
440 } while(0)
441
442 #define sizeof_field(s, field) sizeof(((s *) NULL)->field)
443
444 #define offsetof_arr(s, field, idx) \
445 (offsetof(s, field) + sizeof_field(s, field[0]) * (idx))
446
447 #endif /* TU_UTIL_H */
448