1 /*
2 * Copyright 2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Jonathan Marek <jonathan@marek.ca>
7 */
8
9 #ifndef TU_UTIL_H
10 #define TU_UTIL_H
11
12 #include <atomic>
13
14 #include "tu_common.h"
15
16 #include "util/macros.h"
17 #include "util/u_math.h"
18 #include "util/format/u_format_pack.h"
19 #include "util/format/u_format_zs.h"
20 #include "compiler/shader_enums.h"
21
22 #include "vk_util.h"
23
24 /*
25 * Returns if the specified TU_DEBUG flag is set. The value returned by this macro
26 * can change at runtime if TU_DEBUG_FILE is used. Therefore, the value should
27 * be cached in a local scope if it needs to be coherent across multiple usages.
28 */
29 #define TU_DEBUG(name) unlikely(tu_env.debug.load(std::memory_order_acquire) & TU_DEBUG_##name)
30
31 /*
32 * Same as TU_DEBUG, but only uses the environment variable's value rather
33 * than TU_DEBUG_FILE. This is useful for flags that should not be changed
34 * at runtime or when a flag has different behavior depending on whether it
35 * is set in TU_DEBUG or TU_DEBUG_FILE.
36 */
37 #define TU_DEBUG_ENV(name) unlikely(tu_env.env_debug & TU_DEBUG_##name)
38
39 enum tu_debug_flags
40 {
41 TU_DEBUG_STARTUP = 1 << 0,
42 TU_DEBUG_NIR = 1 << 1,
43 TU_DEBUG_NOBIN = 1 << 3,
44 TU_DEBUG_SYSMEM = 1 << 4,
45 TU_DEBUG_FORCEBIN = 1 << 5,
46 TU_DEBUG_NOUBWC = 1 << 6,
47 TU_DEBUG_NOMULTIPOS = 1 << 7,
48 TU_DEBUG_NOLRZ = 1 << 8,
49 TU_DEBUG_PERFC = 1 << 9,
50 TU_DEBUG_FLUSHALL = 1 << 10,
51 TU_DEBUG_SYNCDRAW = 1 << 11,
52 TU_DEBUG_PUSH_CONSTS_PER_STAGE = 1 << 12,
53 TU_DEBUG_GMEM = 1 << 13,
54 TU_DEBUG_RAST_ORDER = 1 << 14,
55 TU_DEBUG_UNALIGNED_STORE = 1 << 15,
56 TU_DEBUG_LAYOUT = 1 << 16,
57 TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 17,
58 TU_DEBUG_PERF = 1 << 18,
59 TU_DEBUG_NOLRZFC = 1 << 19,
60 TU_DEBUG_DYNAMIC = 1 << 20,
61 TU_DEBUG_BOS = 1 << 21,
62 TU_DEBUG_3D_LOAD = 1 << 22,
63 TU_DEBUG_FDM = 1 << 23,
64 TU_DEBUG_NOCONFORM = 1 << 24,
65 TU_DEBUG_RD = 1 << 25,
66 TU_DEBUG_HIPRIO = 1 << 26,
67 TU_DEBUG_NO_CONCURRENT_RESOLVES = 1 << 27,
68 TU_DEBUG_NO_CONCURRENT_UNRESOLVES = 1 << 28,
69 TU_DEBUG_DUMPAS = 1 << 29,
70 };
71
72 struct tu_env {
73 std::atomic<uint32_t> debug;
74 uint32_t env_debug;
75 };
76
77 extern struct tu_env tu_env;
78
79 /*
80 * Note: tu_env_init() must be called before using the TU_DEBUG* macro.
81 */
82 void
83 tu_env_init(void);
84
85 /* Whenever we generate an error, pass it through this function. Useful for
86 * debugging, where we can break on it. Only call at error site, not when
87 * propagating errors. Might be useful to plug in a stack trace here.
88 */
89
90 VkResult
91 __vk_startup_errorf(struct tu_instance *instance,
92 VkResult error,
93 const char *file,
94 int line,
95 const char *format,
96 ...) PRINTFLIKE(5, 6);
97
98 /* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
99 * build.
100 */
101 #define vk_startup_errorf(instance, error, format, ...) \
102 __vk_startup_errorf(instance, error, \
103 __FILE__, __LINE__, format, ##__VA_ARGS__)
104
105 void
106 __tu_finishme(const char *file, int line, const char *format, ...)
107 PRINTFLIKE(3, 4);
108
109 /**
110 * Print a FINISHME message, including its source location.
111 */
112 #define tu_finishme(format, ...) \
113 do { \
114 static bool reported = false; \
115 if (!reported) { \
116 __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
117 reported = true; \
118 } \
119 } while (0)
120
121 #define tu_stub() \
122 do { \
123 tu_finishme("stub %s", __func__); \
124 } while (0)
125
126 void
127 tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
128 const struct tu_device *device,
129 const struct tu_render_pass *pass);
130
131 #define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
132
133 #define tu_foreach_stage(stage, stage_bits) \
134 for (gl_shader_stage stage, \
135 __tmp = (gl_shader_stage) ((stage_bits) &TU_STAGE_MASK); \
136 stage = (gl_shader_stage) (__builtin_ffs(__tmp) - 1), __tmp; \
137 __tmp = (gl_shader_stage) (__tmp & ~(1 << (stage))))
138
139 static inline enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples)140 tu_msaa_samples(uint32_t samples)
141 {
142 assert(__builtin_popcount(samples) == 1);
143 return (enum a3xx_msaa_samples) util_logbase2(samples);
144 }
145
146 static inline uint32_t
tu6_stage2opcode(gl_shader_stage stage)147 tu6_stage2opcode(gl_shader_stage stage)
148 {
149 if (stage == MESA_SHADER_FRAGMENT || stage == MESA_SHADER_COMPUTE)
150 return CP_LOAD_STATE6_FRAG;
151 return CP_LOAD_STATE6_GEOM;
152 }
153
154 static inline enum a6xx_state_block
tu6_stage2texsb(gl_shader_stage stage)155 tu6_stage2texsb(gl_shader_stage stage)
156 {
157 return (enum a6xx_state_block) (SB6_VS_TEX + stage);
158 }
159
160 static inline enum a6xx_state_block
tu6_stage2shadersb(gl_shader_stage stage)161 tu6_stage2shadersb(gl_shader_stage stage)
162 {
163 return (enum a6xx_state_block) (SB6_VS_SHADER + stage);
164 }
165
166 static inline enum a3xx_rop_code
tu6_rop(VkLogicOp op)167 tu6_rop(VkLogicOp op)
168 {
169 /* note: hw enum matches the VK enum, but with the 4 bits reversed */
170 static const enum a3xx_rop_code lookup[] = {
171 [VK_LOGIC_OP_CLEAR] = ROP_CLEAR,
172 [VK_LOGIC_OP_AND] = ROP_AND,
173 [VK_LOGIC_OP_AND_REVERSE] = ROP_AND_REVERSE,
174 [VK_LOGIC_OP_COPY] = ROP_COPY,
175 [VK_LOGIC_OP_AND_INVERTED] = ROP_AND_INVERTED,
176 [VK_LOGIC_OP_NO_OP] = ROP_NOOP,
177 [VK_LOGIC_OP_XOR] = ROP_XOR,
178 [VK_LOGIC_OP_OR] = ROP_OR,
179 [VK_LOGIC_OP_NOR] = ROP_NOR,
180 [VK_LOGIC_OP_EQUIVALENT] = ROP_EQUIV,
181 [VK_LOGIC_OP_INVERT] = ROP_INVERT,
182 [VK_LOGIC_OP_OR_REVERSE] = ROP_OR_REVERSE,
183 [VK_LOGIC_OP_COPY_INVERTED] = ROP_COPY_INVERTED,
184 [VK_LOGIC_OP_OR_INVERTED] = ROP_OR_INVERTED,
185 [VK_LOGIC_OP_NAND] = ROP_NAND,
186 [VK_LOGIC_OP_SET] = ROP_SET,
187 };
188 assert(op < ARRAY_SIZE(lookup));
189 return lookup[op];
190 }
191
192 static inline bool
tu6_primtype_line(enum pc_di_primtype type)193 tu6_primtype_line(enum pc_di_primtype type)
194 {
195 switch(type) {
196 case DI_PT_LINELIST:
197 case DI_PT_LINESTRIP:
198 case DI_PT_LINE_ADJ:
199 case DI_PT_LINESTRIP_ADJ:
200 return true;
201 default:
202 return false;
203 }
204 }
205
206 static inline bool
tu6_primtype_patches(enum pc_di_primtype type)207 tu6_primtype_patches(enum pc_di_primtype type)
208 {
209 return type >= DI_PT_PATCHES0 && type <= DI_PT_PATCHES31;
210 }
211
212 static inline enum pc_di_primtype
tu6_primtype(VkPrimitiveTopology topology)213 tu6_primtype(VkPrimitiveTopology topology)
214 {
215 static const enum pc_di_primtype lookup[] = {
216 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = DI_PT_POINTLIST,
217 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = DI_PT_LINELIST,
218 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = DI_PT_LINESTRIP,
219 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = DI_PT_TRILIST,
220 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
221 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = DI_PT_TRIFAN,
222 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = DI_PT_LINE_ADJ,
223 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = DI_PT_LINESTRIP_ADJ,
224 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = DI_PT_TRI_ADJ,
225 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = DI_PT_TRISTRIP_ADJ,
226 /* Return PATCH0 and update in tu_pipeline_builder_parse_tessellation */
227 [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = DI_PT_PATCHES0,
228 };
229 assert(topology < ARRAY_SIZE(lookup));
230 return lookup[topology];
231 }
232
233 static inline enum adreno_compare_func
tu6_compare_func(VkCompareOp op)234 tu6_compare_func(VkCompareOp op)
235 {
236 return (enum adreno_compare_func) op;
237 }
238
239 static inline enum adreno_stencil_op
tu6_stencil_op(VkStencilOp op)240 tu6_stencil_op(VkStencilOp op)
241 {
242 return (enum adreno_stencil_op) op;
243 }
244
245 static inline enum adreno_rb_blend_factor
tu6_blend_factor(VkBlendFactor factor)246 tu6_blend_factor(VkBlendFactor factor)
247 {
248 static const enum adreno_rb_blend_factor lookup[] = {
249 [VK_BLEND_FACTOR_ZERO] = FACTOR_ZERO,
250 [VK_BLEND_FACTOR_ONE] = FACTOR_ONE,
251 [VK_BLEND_FACTOR_SRC_COLOR] = FACTOR_SRC_COLOR,
252 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = FACTOR_ONE_MINUS_SRC_COLOR,
253 [VK_BLEND_FACTOR_DST_COLOR] = FACTOR_DST_COLOR,
254 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = FACTOR_ONE_MINUS_DST_COLOR,
255 [VK_BLEND_FACTOR_SRC_ALPHA] = FACTOR_SRC_ALPHA,
256 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = FACTOR_ONE_MINUS_SRC_ALPHA,
257 [VK_BLEND_FACTOR_DST_ALPHA] = FACTOR_DST_ALPHA,
258 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = FACTOR_ONE_MINUS_DST_ALPHA,
259 [VK_BLEND_FACTOR_CONSTANT_COLOR] = FACTOR_CONSTANT_COLOR,
260 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= FACTOR_ONE_MINUS_CONSTANT_COLOR,
261 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = FACTOR_CONSTANT_ALPHA,
262 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= FACTOR_ONE_MINUS_CONSTANT_ALPHA,
263 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = FACTOR_SRC_ALPHA_SATURATE,
264 [VK_BLEND_FACTOR_SRC1_COLOR] = FACTOR_SRC1_COLOR,
265 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = FACTOR_ONE_MINUS_SRC1_COLOR,
266 [VK_BLEND_FACTOR_SRC1_ALPHA] = FACTOR_SRC1_ALPHA,
267 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = FACTOR_ONE_MINUS_SRC1_ALPHA,
268 };
269 assert(factor < ARRAY_SIZE(lookup));
270 return lookup[factor];
271 }
272
273 static inline bool
tu_blend_factor_is_dual_src(VkBlendFactor factor)274 tu_blend_factor_is_dual_src(VkBlendFactor factor)
275 {
276 switch (factor) {
277 case VK_BLEND_FACTOR_SRC1_COLOR:
278 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
279 case VK_BLEND_FACTOR_SRC1_ALPHA:
280 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
281 return true;
282 default:
283 return false;
284 }
285 }
286
287 static inline enum a3xx_rb_blend_opcode
tu6_blend_op(VkBlendOp op)288 tu6_blend_op(VkBlendOp op)
289 {
290 return (enum a3xx_rb_blend_opcode) op;
291 }
292
293 static inline enum a6xx_tex_type
tu6_tex_type(VkImageViewType type,bool storage)294 tu6_tex_type(VkImageViewType type, bool storage)
295 {
296 switch (type) {
297 default:
298 case VK_IMAGE_VIEW_TYPE_1D:
299 case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
300 return A6XX_TEX_1D;
301 case VK_IMAGE_VIEW_TYPE_2D:
302 case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
303 return A6XX_TEX_2D;
304 case VK_IMAGE_VIEW_TYPE_3D:
305 return A6XX_TEX_3D;
306 case VK_IMAGE_VIEW_TYPE_CUBE:
307 case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
308 return storage ? A6XX_TEX_2D : A6XX_TEX_CUBE;
309 }
310 }
311
312 static inline enum a6xx_tex_clamp
tu6_tex_wrap(VkSamplerAddressMode address_mode)313 tu6_tex_wrap(VkSamplerAddressMode address_mode)
314 {
315 static const enum a6xx_tex_clamp lookup[] = {
316 [VK_SAMPLER_ADDRESS_MODE_REPEAT] = A6XX_TEX_REPEAT,
317 [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = A6XX_TEX_MIRROR_REPEAT,
318 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = A6XX_TEX_CLAMP_TO_EDGE,
319 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = A6XX_TEX_CLAMP_TO_BORDER,
320 [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = A6XX_TEX_MIRROR_CLAMP,
321 };
322 assert(address_mode < ARRAY_SIZE(lookup));
323 return lookup[address_mode];
324 }
325
326 static inline enum a6xx_tex_filter
tu6_tex_filter(VkFilter filter,unsigned aniso)327 tu6_tex_filter(VkFilter filter, unsigned aniso)
328 {
329 switch (filter) {
330 case VK_FILTER_NEAREST:
331 return A6XX_TEX_NEAREST;
332 case VK_FILTER_LINEAR:
333 return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
334 case VK_FILTER_CUBIC_EXT:
335 return A6XX_TEX_CUBIC;
336 default:
337 unreachable("illegal texture filter");
338 break;
339 }
340 }
341
342 static inline enum a6xx_reduction_mode
tu6_reduction_mode(VkSamplerReductionMode reduction_mode)343 tu6_reduction_mode(VkSamplerReductionMode reduction_mode)
344 {
345 return (enum a6xx_reduction_mode) reduction_mode;
346 }
347
348 static inline enum a6xx_depth_format
tu6_pipe2depth(VkFormat format)349 tu6_pipe2depth(VkFormat format)
350 {
351 switch (format) {
352 case VK_FORMAT_D16_UNORM:
353 return DEPTH6_16;
354 case VK_FORMAT_X8_D24_UNORM_PACK32:
355 case VK_FORMAT_D24_UNORM_S8_UINT:
356 return DEPTH6_24_8;
357 case VK_FORMAT_D32_SFLOAT:
358 case VK_FORMAT_D32_SFLOAT_S8_UINT:
359 case VK_FORMAT_S8_UINT:
360 return DEPTH6_32;
361 default:
362 return DEPTH6_NONE;
363 }
364 }
365
366 static inline enum a6xx_polygon_mode
tu6_polygon_mode(VkPolygonMode mode)367 tu6_polygon_mode(VkPolygonMode mode)
368 {
369 switch (mode) {
370 case VK_POLYGON_MODE_POINT:
371 return POLYMODE6_POINTS;
372 case VK_POLYGON_MODE_LINE:
373 return POLYMODE6_LINES;
374 case VK_POLYGON_MODE_FILL:
375 return POLYMODE6_TRIANGLES;
376 default:
377 unreachable("bad polygon mode");
378 }
379 }
380
381 struct bcolor_entry {
382 alignas(128) uint32_t fp32[4];
383 uint64_t ui16;
384 uint64_t si16;
385 uint64_t fp16;
386 uint16_t rgb565;
387 uint16_t rgb5a1;
388 uint16_t rgba4;
389 uint8_t __pad0[2];
390 uint32_t ui8;
391 uint32_t si8;
392 uint32_t rgb10a2;
393 uint32_t z24; /* also s8? */
394 uint64_t srgb;
395 uint8_t __pad1[56];
396 };
397 static_assert(alignof(struct bcolor_entry) == 128, "");
398
399 /* vulkan does not want clamping of integer clear values, differs from u_format
400 * see spec for VkClearColorValue
401 */
402 static inline void
pack_int8(uint32_t * dst,const uint32_t * val)403 pack_int8(uint32_t *dst, const uint32_t *val)
404 {
405 *dst = (val[0] & 0xff) |
406 (val[1] & 0xff) << 8 |
407 (val[2] & 0xff) << 16 |
408 (val[3] & 0xff) << 24;
409 }
410
411 static inline void
pack_int10_2(uint32_t * dst,const uint32_t * val)412 pack_int10_2(uint32_t *dst, const uint32_t *val)
413 {
414 *dst = (val[0] & 0x3ff) |
415 (val[1] & 0x3ff) << 10 |
416 (val[2] & 0x3ff) << 20 |
417 (val[3] & 0x3) << 30;
418 }
419
420 static inline void
pack_int16(uint32_t * dst,const uint32_t * val)421 pack_int16(uint32_t *dst, const uint32_t *val)
422 {
423 dst[0] = (val[0] & 0xffff) |
424 (val[1] & 0xffff) << 16;
425 dst[1] = (val[2] & 0xffff) |
426 (val[3] & 0xffff) << 16;
427 }
428
429 static inline void
tu6_pack_border_color(struct bcolor_entry * bcolor,const VkClearColorValue * val,bool is_int)430 tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val, bool is_int)
431 {
432 memcpy(bcolor->fp32, val, 4 * sizeof(float));
433 if (is_int) {
434 pack_int16((uint32_t*) &bcolor->fp16, val->uint32);
435 return;
436 }
437 #define PACK_F(x, type) util_format_##type##_pack_rgba_float \
438 ( (uint8_t*) (&bcolor->x), 0, val->float32, 0, 1, 1)
439 PACK_F(ui16, r16g16b16a16_unorm);
440 PACK_F(si16, r16g16b16a16_snorm);
441 PACK_F(fp16, r16g16b16a16_float);
442 PACK_F(rgb565, r5g6b5_unorm);
443 PACK_F(rgb5a1, r5g5b5a1_unorm);
444 PACK_F(rgba4, r4g4b4a4_unorm);
445 PACK_F(ui8, r8g8b8a8_unorm);
446 PACK_F(si8, r8g8b8a8_snorm);
447 PACK_F(rgb10a2, r10g10b10a2_unorm);
448 util_format_z24x8_unorm_pack_z_float((uint8_t*) &bcolor->z24,
449 0, val->float32, 0, 1, 1);
450 PACK_F(srgb, r16g16b16a16_float); /* TODO: clamp? */
451 #undef PACK_F
452 }
453
454 void
455 tu_dbg_log_gmem_load_store_skips(struct tu_device *device);
456
457 #define perf_debug(device, fmt, ...) do { \
458 if (TU_DEBUG(PERF)) \
459 mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__); \
460 } while(0)
461
462 #define sizeof_field(s, field) sizeof(((s *) NULL)->field)
463
464 #define offsetof_arr(s, field, idx) \
465 (offsetof(s, field) + sizeof_field(s, field[0]) * (idx))
466
467 #endif /* TU_UTIL_H */
468