1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "compiler/nir/nir.h"
8 #include "ac_nir.h"
9 #include "ac_shader_util.h"
10 #include "radeon_uvd_enc.h"
11 #include "radeon_vce.h"
12 #include "radeon_video.h"
13 #include "si_pipe.h"
14 #include "util/u_cpu_detect.h"
15 #include "util/u_screen.h"
16 #include "util/u_video.h"
17 #include "vl/vl_decoder.h"
18 #include "vl/vl_video_buffer.h"
19 #include <sys/utsname.h>
20
21 /* The capabilities reported by the kernel has priority
22 over the existing logic in si_get_video_param */
23 #define QUERYABLE_KERNEL (sscreen->info.is_amdgpu && \
24 !!(sscreen->info.drm_minor >= 41))
25 #define KERNEL_DEC_CAP(codec, attrib) \
26 (codec > PIPE_VIDEO_FORMAT_UNKNOWN && codec <= PIPE_VIDEO_FORMAT_AV1) ? \
27 (sscreen->info.dec_caps.codec_info[codec - 1].valid ? \
28 sscreen->info.dec_caps.codec_info[codec - 1].attrib : 0) : 0
29 #define KERNEL_ENC_CAP(codec, attrib) \
30 (codec > PIPE_VIDEO_FORMAT_UNKNOWN && codec <= PIPE_VIDEO_FORMAT_AV1) ? \
31 (sscreen->info.enc_caps.codec_info[codec - 1].valid ? \
32 sscreen->info.enc_caps.codec_info[codec - 1].attrib : 0) : 0
33
si_get_vendor(struct pipe_screen * pscreen)34 static const char *si_get_vendor(struct pipe_screen *pscreen)
35 {
36 return "AMD";
37 }
38
si_get_device_vendor(struct pipe_screen * pscreen)39 static const char *si_get_device_vendor(struct pipe_screen *pscreen)
40 {
41 return "AMD";
42 }
43
44 static bool
si_is_compute_copy_faster(struct pipe_screen * pscreen,enum pipe_format src_format,enum pipe_format dst_format,unsigned width,unsigned height,unsigned depth,bool cpu)45 si_is_compute_copy_faster(struct pipe_screen *pscreen,
46 enum pipe_format src_format,
47 enum pipe_format dst_format,
48 unsigned width,
49 unsigned height,
50 unsigned depth,
51 bool cpu)
52 {
53 if (cpu)
54 /* very basic for now */
55 return width * height * depth > 64 * 64;
56 return false;
57 }
58
si_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)59 static int si_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
60 enum pipe_shader_cap param)
61 {
62 struct si_screen *sscreen = (struct si_screen *)pscreen;
63
64 if (shader == PIPE_SHADER_MESH ||
65 shader == PIPE_SHADER_TASK)
66 return 0;
67
68 switch (param) {
69 /* Shader limits. */
70 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
71 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
72 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
73 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
74 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
75 return 16384;
76 case PIPE_SHADER_CAP_MAX_INPUTS:
77 return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
78 case PIPE_SHADER_CAP_MAX_OUTPUTS:
79 return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
80 case PIPE_SHADER_CAP_MAX_TEMPS:
81 return 256; /* Max native temporaries. */
82 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
83 return 1 << 26; /* 64 MB */
84 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
85 return SI_NUM_CONST_BUFFERS;
86 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
87 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
88 return SI_NUM_SAMPLERS;
89 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
90 return SI_NUM_SHADER_BUFFERS;
91 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
92 return SI_NUM_IMAGES;
93
94 case PIPE_SHADER_CAP_SUPPORTED_IRS:
95 if (shader == PIPE_SHADER_COMPUTE) {
96 return (1 << PIPE_SHADER_IR_NATIVE) |
97 (1 << PIPE_SHADER_IR_NIR) |
98 (1 << PIPE_SHADER_IR_TGSI);
99 }
100 return (1 << PIPE_SHADER_IR_TGSI) |
101 (1 << PIPE_SHADER_IR_NIR);
102
103 /* Supported boolean features. */
104 case PIPE_SHADER_CAP_CONT_SUPPORTED:
105 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
106 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
107 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
108 case PIPE_SHADER_CAP_INTEGERS:
109 case PIPE_SHADER_CAP_INT64_ATOMICS:
110 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
111 return 1;
112
113 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
114 /* We need f16c for fast FP16 conversions in glUniform. */
115 if (!util_get_cpu_caps()->has_f16c)
116 return 0;
117 FALLTHROUGH;
118 case PIPE_SHADER_CAP_FP16:
119 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
120 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
121 case PIPE_SHADER_CAP_INT16:
122 return sscreen->nir_options->lower_mediump_io != NULL;
123
124 /* Unsupported boolean features. */
125 case PIPE_SHADER_CAP_SUBROUTINES:
126 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
127 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
128 return 0;
129 }
130 return 0;
131 }
132
si_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)133 static const void *si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir,
134 enum pipe_shader_type shader)
135 {
136 struct si_screen *sscreen = (struct si_screen *)screen;
137
138 assert(ir == PIPE_SHADER_IR_NIR);
139 return sscreen->nir_options;
140 }
141
si_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)142 static void si_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
143 {
144 ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
145 }
146
si_get_device_uuid(struct pipe_screen * pscreen,char * uuid)147 static void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
148 {
149 struct si_screen *sscreen = (struct si_screen *)pscreen;
150
151 ac_compute_device_uuid(&sscreen->info, uuid, PIPE_UUID_SIZE);
152 }
153
si_get_name(struct pipe_screen * pscreen)154 static const char *si_get_name(struct pipe_screen *pscreen)
155 {
156 struct si_screen *sscreen = (struct si_screen *)pscreen;
157
158 return sscreen->renderer_string;
159 }
160
si_get_video_param_no_video_hw(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)161 static int si_get_video_param_no_video_hw(struct pipe_screen *screen, enum pipe_video_profile profile,
162 enum pipe_video_entrypoint entrypoint,
163 enum pipe_video_cap param)
164 {
165 switch (param) {
166 case PIPE_VIDEO_CAP_SUPPORTED:
167 return vl_profile_supported(screen, profile, entrypoint);
168 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
169 return 1;
170 case PIPE_VIDEO_CAP_MAX_WIDTH:
171 case PIPE_VIDEO_CAP_MAX_HEIGHT:
172 return vl_video_buffer_max_size(screen);
173 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
174 return PIPE_FORMAT_NV12;
175 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
176 return false;
177 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
178 return false;
179 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
180 return true;
181 case PIPE_VIDEO_CAP_MAX_LEVEL:
182 return vl_level_supported(screen, profile);
183 default:
184 return 0;
185 }
186 }
187
si_get_video_param(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)188 static int si_get_video_param(struct pipe_screen *screen, enum pipe_video_profile profile,
189 enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param)
190 {
191 struct si_screen *sscreen = (struct si_screen *)screen;
192 enum pipe_video_format codec = u_reduce_video_profile(profile);
193 bool fully_supported_profile = ((profile >= PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) &&
194 (profile <= PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)) ||
195 (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN) ||
196 (profile == PIPE_VIDEO_PROFILE_AV1_MAIN);
197
198 /* Return the capability of Video Post Processor.
199 * Have to determine the HW version of VPE.
200 * Have to check the HW limitation and
201 * Check if the VPE exists and is valid
202 */
203 if (sscreen->info.ip[AMD_IP_VPE].num_queues && entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) {
204
205 switch(param) {
206 case PIPE_VIDEO_CAP_SUPPORTED:
207 return true;
208 case PIPE_VIDEO_CAP_MAX_WIDTH:
209 return 10240;
210 case PIPE_VIDEO_CAP_MAX_HEIGHT:
211 return 10240;
212 case PIPE_VIDEO_CAP_VPP_MAX_INPUT_WIDTH:
213 return 10240;
214 case PIPE_VIDEO_CAP_VPP_MAX_INPUT_HEIGHT:
215 return 10240;
216 case PIPE_VIDEO_CAP_VPP_MIN_INPUT_WIDTH:
217 return 16;
218 case PIPE_VIDEO_CAP_VPP_MIN_INPUT_HEIGHT:
219 return 16;
220 case PIPE_VIDEO_CAP_VPP_MAX_OUTPUT_WIDTH:
221 return 10240;
222 case PIPE_VIDEO_CAP_VPP_MAX_OUTPUT_HEIGHT:
223 return 10240;
224 case PIPE_VIDEO_CAP_VPP_MIN_OUTPUT_WIDTH:
225 return 16;
226 case PIPE_VIDEO_CAP_VPP_MIN_OUTPUT_HEIGHT:
227 return 16;
228 case PIPE_VIDEO_CAP_VPP_ORIENTATION_MODES:
229 /* VPE 1st generation does not support orientation
230 * Have to determine the version and features of VPE in future.
231 */
232 return PIPE_VIDEO_VPP_ORIENTATION_DEFAULT;
233 case PIPE_VIDEO_CAP_VPP_BLEND_MODES:
234 /* VPE 1st generation does not support blending.
235 * Have to determine the version and features of VPE in future.
236 */
237 return PIPE_VIDEO_VPP_BLEND_MODE_NONE;
238 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
239 return PIPE_FORMAT_NV12;
240 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
241 return false;
242 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
243 return true;
244 case PIPE_VIDEO_CAP_REQUIRES_FLUSH_ON_END_FRAME:
245 /* true: VPP flush function will be called within vaEndPicture() */
246 /* false: VPP flush function will be skipped */
247 return false;
248 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
249 /* for VPE we prefer non-interlaced buffer */
250 return false;
251 default:
252 return 0;
253 }
254 }
255
256 if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
257 if (!(sscreen->info.ip[AMD_IP_VCE].num_queues ||
258 sscreen->info.ip[AMD_IP_UVD_ENC].num_queues ||
259 sscreen->info.ip[AMD_IP_VCN_ENC].num_queues))
260 return 0;
261
262 if (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
263 sscreen->info.vcn_ip_version == VCN_5_0_1)
264 return 0;
265
266 switch (param) {
267 case PIPE_VIDEO_CAP_SUPPORTED:
268 return (
269 /* in case it is explicitly marked as not supported by the kernel */
270 ((QUERYABLE_KERNEL && fully_supported_profile) ? KERNEL_ENC_CAP(codec, valid) : 1) &&
271 ((codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && profile != PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10 &&
272 (sscreen->info.vcn_ip_version >= VCN_1_0_0 || si_vce_is_fw_version_supported(sscreen))) ||
273 (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
274 (sscreen->info.vcn_ip_version >= VCN_1_0_0 || si_radeon_uvd_enc_supported(sscreen))) ||
275 (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10 && sscreen->info.vcn_ip_version >= VCN_2_0_0) ||
276 (profile == PIPE_VIDEO_PROFILE_AV1_MAIN &&
277 (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3))));
278 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
279 return 1;
280 case PIPE_VIDEO_CAP_MIN_WIDTH:
281 if (sscreen->info.vcn_ip_version >= VCN_5_0_0) {
282 if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC)
283 return 96;
284 else if (codec == PIPE_VIDEO_FORMAT_HEVC)
285 return 384;
286 else if (codec == PIPE_VIDEO_FORMAT_AV1)
287 return 320;
288 }
289 return (codec == PIPE_VIDEO_FORMAT_HEVC) ? 130 : 128;
290 case PIPE_VIDEO_CAP_MIN_HEIGHT:
291 if (sscreen->info.vcn_ip_version >= VCN_5_0_0 && codec == PIPE_VIDEO_FORMAT_MPEG4_AVC)
292 return 32;
293 return 128;
294 case PIPE_VIDEO_CAP_MAX_WIDTH:
295 if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
296 return KERNEL_ENC_CAP(codec, max_width);
297 else
298 return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
299 case PIPE_VIDEO_CAP_MAX_HEIGHT:
300 if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
301 return KERNEL_ENC_CAP(codec, max_height);
302 else
303 return (sscreen->info.family < CHIP_TONGA) ? 1152 : 2304;
304 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
305 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
306 return PIPE_FORMAT_P010;
307 else
308 return PIPE_FORMAT_NV12;
309 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
310 return false;
311 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
312 return false;
313 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
314 return true;
315 case PIPE_VIDEO_CAP_STACKED_FRAMES:
316 return (sscreen->info.family < CHIP_TONGA) ? 1 : 2;
317 case PIPE_VIDEO_CAP_MAX_TEMPORAL_LAYERS:
318 return (sscreen->info.ip[AMD_IP_UVD_ENC].num_queues ||
319 sscreen->info.vcn_ip_version >= VCN_1_0_0) ? 4 : 0;
320 case PIPE_VIDEO_CAP_ENC_QUALITY_LEVEL:
321 return 32;
322 case PIPE_VIDEO_CAP_ENC_SUPPORTS_MAX_FRAME_SIZE:
323 return 1;
324
325 case PIPE_VIDEO_CAP_ENC_HEVC_FEATURE_FLAGS:
326 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
327 profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
328 union pipe_h265_enc_cap_features pipe_features;
329 pipe_features.value = 0;
330
331 pipe_features.bits.amp = PIPE_ENC_FEATURE_SUPPORTED;
332 pipe_features.bits.strong_intra_smoothing = PIPE_ENC_FEATURE_SUPPORTED;
333 pipe_features.bits.constrained_intra_pred = PIPE_ENC_FEATURE_SUPPORTED;
334 pipe_features.bits.deblocking_filter_disable
335 = PIPE_ENC_FEATURE_SUPPORTED;
336 if (sscreen->info.vcn_ip_version >= VCN_2_0_0) {
337 pipe_features.bits.sao = PIPE_ENC_FEATURE_SUPPORTED;
338 pipe_features.bits.cu_qp_delta = PIPE_ENC_FEATURE_SUPPORTED;
339 }
340 if (sscreen->info.vcn_ip_version >= VCN_3_0_0)
341 pipe_features.bits.transform_skip = PIPE_ENC_FEATURE_SUPPORTED;
342
343 return pipe_features.value;
344 } else
345 return 0;
346
347 case PIPE_VIDEO_CAP_ENC_HEVC_BLOCK_SIZES:
348 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
349 profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
350 union pipe_h265_enc_cap_block_sizes pipe_block_sizes;
351 pipe_block_sizes.value = 0;
352
353 pipe_block_sizes.bits.log2_max_coding_tree_block_size_minus3 = 3;
354 pipe_block_sizes.bits.log2_min_coding_tree_block_size_minus3 = 3;
355 pipe_block_sizes.bits.log2_min_luma_coding_block_size_minus3 = 0;
356 pipe_block_sizes.bits.log2_max_luma_transform_block_size_minus2 = 3;
357 pipe_block_sizes.bits.log2_min_luma_transform_block_size_minus2 = 0;
358
359 if (sscreen->info.ip[AMD_IP_UVD_ENC].num_queues) {
360 pipe_block_sizes.bits.max_max_transform_hierarchy_depth_inter = 3;
361 pipe_block_sizes.bits.min_max_transform_hierarchy_depth_inter = 3;
362 pipe_block_sizes.bits.max_max_transform_hierarchy_depth_intra = 3;
363 pipe_block_sizes.bits.min_max_transform_hierarchy_depth_intra = 3;
364 }
365
366 return pipe_block_sizes.value;
367 } else
368 return 0;
369
370 case PIPE_VIDEO_CAP_ENC_MAX_SLICES_PER_FRAME:
371 return 128;
372
373 case PIPE_VIDEO_CAP_ENC_SLICES_STRUCTURE:
374 return PIPE_VIDEO_CAP_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS |
375 PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_ROWS |
376 PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_MULTI_ROWS;
377
378 case PIPE_VIDEO_CAP_ENC_AV1_FEATURE:
379 if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
380 union pipe_av1_enc_cap_features attrib;
381 attrib.value = 0;
382
383 attrib.bits.support_128x128_superblock = PIPE_ENC_FEATURE_NOT_SUPPORTED;
384 attrib.bits.support_filter_intra = PIPE_ENC_FEATURE_NOT_SUPPORTED;
385 attrib.bits.support_intra_edge_filter = PIPE_ENC_FEATURE_NOT_SUPPORTED;
386 attrib.bits.support_interintra_compound = PIPE_ENC_FEATURE_NOT_SUPPORTED;
387 attrib.bits.support_masked_compound = PIPE_ENC_FEATURE_NOT_SUPPORTED;
388 attrib.bits.support_warped_motion = PIPE_ENC_FEATURE_NOT_SUPPORTED;
389 attrib.bits.support_palette_mode = PIPE_ENC_FEATURE_SUPPORTED;
390 attrib.bits.support_dual_filter = PIPE_ENC_FEATURE_NOT_SUPPORTED;
391 attrib.bits.support_jnt_comp = PIPE_ENC_FEATURE_NOT_SUPPORTED;
392 attrib.bits.support_ref_frame_mvs = PIPE_ENC_FEATURE_NOT_SUPPORTED;
393 attrib.bits.support_superres = PIPE_ENC_FEATURE_NOT_SUPPORTED;
394 attrib.bits.support_restoration = PIPE_ENC_FEATURE_NOT_SUPPORTED;
395 attrib.bits.support_allow_intrabc = PIPE_ENC_FEATURE_NOT_SUPPORTED;
396 attrib.bits.support_cdef_channel_strength = PIPE_ENC_FEATURE_SUPPORTED;
397
398 return attrib.value;
399 } else
400 return 0;
401
402 case PIPE_VIDEO_CAP_ENC_AV1_FEATURE_EXT1:
403 if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
404 union pipe_av1_enc_cap_features_ext1 attrib_ext1;
405 attrib_ext1.value = 0;
406 attrib_ext1.bits.interpolation_filter = PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP |
407 PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP_SMOOTH |
408 PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP_SHARP |
409 PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_BILINEAR |
410 PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_SWITCHABLE;
411 attrib_ext1.bits.min_segid_block_size_accepted = 0;
412 attrib_ext1.bits.segment_feature_support = 0;
413
414 return attrib_ext1.value;
415 } else
416 return 0;
417
418 case PIPE_VIDEO_CAP_ENC_AV1_FEATURE_EXT2:
419 if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
420 union pipe_av1_enc_cap_features_ext2 attrib_ext2;
421 attrib_ext2.value = 0;
422
423 attrib_ext2.bits.tile_size_bytes_minus1 = 3;
424 attrib_ext2.bits.obu_size_bytes_minus1 = 1;
425 /**
426 * tx_mode supported.
427 * (tx_mode_support & 0x01) == 1: ONLY_4X4 is supported, 0: not.
428 * (tx_mode_support & 0x02) == 1: TX_MODE_LARGEST is supported, 0: not.
429 * (tx_mode_support & 0x04) == 1: TX_MODE_SELECT is supported, 0: not.
430 */
431 attrib_ext2.bits.tx_mode_support = PIPE_VIDEO_CAP_ENC_AV1_TX_MODE_SELECT;
432 attrib_ext2.bits.max_tile_num_minus1 = 31;
433
434 return attrib_ext2.value;
435 } else
436 return 0;
437 case PIPE_VIDEO_CAP_ENC_SUPPORTS_TILE:
438 if ((sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) &&
439 profile == PIPE_VIDEO_PROFILE_AV1_MAIN)
440 return 1;
441 else
442 return 0;
443
444 case PIPE_VIDEO_CAP_ENC_MAX_REFERENCES_PER_FRAME:
445 if (sscreen->info.vcn_ip_version >= VCN_3_0_0) {
446 int refPicList0 = 1;
447 int refPicList1 = codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 1 : 0;
448 if (sscreen->info.vcn_ip_version >= VCN_5_0_0 && codec == PIPE_VIDEO_FORMAT_AV1) {
449 refPicList0 = 2;
450 refPicList1 = 1;
451 }
452 return refPicList0 | (refPicList1 << 16);
453 } else
454 return 1;
455
456 case PIPE_VIDEO_CAP_ENC_INTRA_REFRESH:
457 return PIPE_VIDEO_ENC_INTRA_REFRESH_ROW |
458 PIPE_VIDEO_ENC_INTRA_REFRESH_COLUMN |
459 PIPE_VIDEO_ENC_INTRA_REFRESH_P_FRAME;
460
461 case PIPE_VIDEO_CAP_ENC_ROI:
462 if (sscreen->info.vcn_ip_version >= VCN_1_0_0) {
463 union pipe_enc_cap_roi attrib;
464 attrib.value = 0;
465
466 attrib.bits.num_roi_regions = PIPE_ENC_ROI_REGION_NUM_MAX;
467 attrib.bits.roi_rc_priority_support = PIPE_ENC_FEATURE_NOT_SUPPORTED;
468 attrib.bits.roi_rc_qp_delta_support = PIPE_ENC_FEATURE_SUPPORTED;
469 return attrib.value;
470 }
471 else
472 return 0;
473 case PIPE_VIDEO_CAP_ENC_SURFACE_ALIGNMENT:
474 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
475 profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
476 union pipe_enc_cap_surface_alignment attrib;
477 attrib.value = 0;
478
479 attrib.bits.log2_width_alignment = RADEON_ENC_HEVC_SURFACE_LOG2_WIDTH_ALIGNMENT;
480 attrib.bits.log2_height_alignment = RADEON_ENC_HEVC_SURFACE_LOG2_HEIGHT_ALIGNMENT;
481 return attrib.value;
482 }
483 else
484 return 0;
485
486 case PIPE_VIDEO_CAP_ENC_RATE_CONTROL_QVBR:
487 if (sscreen->info.vcn_ip_version >= VCN_3_0_0 &&
488 sscreen->info.vcn_ip_version < VCN_4_0_0)
489 return sscreen->info.vcn_enc_minor_version >= 30;
490
491 if (sscreen->info.vcn_ip_version >= VCN_4_0_0 &&
492 sscreen->info.vcn_ip_version < VCN_5_0_0)
493 return sscreen->info.vcn_enc_minor_version >= 15;
494
495 if (sscreen->info.vcn_ip_version >= VCN_5_0_0)
496 return sscreen->info.vcn_enc_minor_version >= 3;
497
498 return 0;
499
500 default:
501 return 0;
502 }
503 }
504
505 switch (param) {
506 case PIPE_VIDEO_CAP_SUPPORTED:
507 if (codec != PIPE_VIDEO_FORMAT_JPEG &&
508 !(sscreen->info.ip[AMD_IP_UVD].num_queues ||
509 ((sscreen->info.vcn_ip_version >= VCN_4_0_0) ?
510 sscreen->info.ip[AMD_IP_VCN_UNIFIED].num_queues :
511 sscreen->info.ip[AMD_IP_VCN_DEC].num_queues)))
512 return false;
513 if (QUERYABLE_KERNEL && fully_supported_profile &&
514 sscreen->info.vcn_ip_version >= VCN_1_0_0)
515 return KERNEL_DEC_CAP(codec, valid);
516 if (codec < PIPE_VIDEO_FORMAT_MPEG4_AVC &&
517 sscreen->info.vcn_ip_version >= VCN_3_0_33)
518 return false;
519
520 switch (codec) {
521 case PIPE_VIDEO_FORMAT_MPEG12:
522 return !(sscreen->info.vcn_ip_version >= VCN_3_0_33 || profile == PIPE_VIDEO_PROFILE_MPEG1);
523 case PIPE_VIDEO_FORMAT_MPEG4:
524 return !(sscreen->info.vcn_ip_version >= VCN_3_0_33);
525 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
526 if ((sscreen->info.family == CHIP_POLARIS10 || sscreen->info.family == CHIP_POLARIS11) &&
527 sscreen->info.uvd_fw_version < UVD_FW_1_66_16) {
528 RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
529 return false;
530 }
531 return (profile != PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10);
532 case PIPE_VIDEO_FORMAT_VC1:
533 return !(sscreen->info.vcn_ip_version >= VCN_3_0_33);
534 case PIPE_VIDEO_FORMAT_HEVC:
535 /* Carrizo only supports HEVC Main */
536 if (sscreen->info.family >= CHIP_STONEY)
537 return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
538 profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
539 else if (sscreen->info.family >= CHIP_CARRIZO)
540 return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
541 return false;
542 case PIPE_VIDEO_FORMAT_JPEG:
543 if (sscreen->info.vcn_ip_version >= VCN_1_0_0) {
544 if (!sscreen->info.ip[AMD_IP_VCN_JPEG].num_queues)
545 return false;
546 else
547 return true;
548 }
549 if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10)
550 return false;
551 if (!sscreen->info.is_amdgpu) {
552 RVID_ERR("No MJPEG support for the kernel version\n");
553 return false;
554 }
555 return true;
556 case PIPE_VIDEO_FORMAT_VP9:
557 return sscreen->info.vcn_ip_version >= VCN_1_0_0;
558 case PIPE_VIDEO_FORMAT_AV1:
559 return sscreen->info.vcn_ip_version >= VCN_3_0_0 && sscreen->info.vcn_ip_version != VCN_3_0_33;
560 default:
561 return false;
562 }
563 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
564 return 1;
565 case PIPE_VIDEO_CAP_MIN_WIDTH:
566 case PIPE_VIDEO_CAP_MIN_HEIGHT:
567 return (codec == PIPE_VIDEO_FORMAT_AV1) ? 16 : 64;
568 case PIPE_VIDEO_CAP_MAX_WIDTH:
569 if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
570 return KERNEL_DEC_CAP(codec, max_width);
571 else {
572 switch (codec) {
573 case PIPE_VIDEO_FORMAT_HEVC:
574 case PIPE_VIDEO_FORMAT_VP9:
575 case PIPE_VIDEO_FORMAT_AV1:
576 return (sscreen->info.vcn_ip_version < VCN_2_0_0) ?
577 ((sscreen->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
578 default:
579 return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
580 }
581 }
582 case PIPE_VIDEO_CAP_MAX_HEIGHT:
583 if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
584 return KERNEL_DEC_CAP(codec, max_height);
585 else {
586 switch (codec) {
587 case PIPE_VIDEO_FORMAT_HEVC:
588 case PIPE_VIDEO_FORMAT_VP9:
589 case PIPE_VIDEO_FORMAT_AV1:
590 return (sscreen->info.vcn_ip_version < VCN_2_0_0) ?
591 ((sscreen->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
592 default:
593 return (sscreen->info.family < CHIP_TONGA) ? 1152 : 4096;
594 }
595 }
596 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
597 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
598 return PIPE_FORMAT_P010;
599 else if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
600 return PIPE_FORMAT_P010;
601 else
602 return PIPE_FORMAT_NV12;
603
604 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
605 return false;
606 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: {
607 enum pipe_video_format format = u_reduce_video_profile(profile);
608
609 if (format >= PIPE_VIDEO_FORMAT_HEVC)
610 return false;
611
612 return true;
613 }
614 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
615 return true;
616 case PIPE_VIDEO_CAP_MAX_LEVEL:
617 if ((profile == PIPE_VIDEO_PROFILE_MPEG2_SIMPLE ||
618 profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN ||
619 profile == PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE ||
620 profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) &&
621 sscreen->info.dec_caps.codec_info[codec - 1].valid) {
622 return sscreen->info.dec_caps.codec_info[codec - 1].max_level;
623 } else {
624 switch (profile) {
625 case PIPE_VIDEO_PROFILE_MPEG1:
626 return 0;
627 case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
628 case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
629 return 3;
630 case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
631 return 3;
632 case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
633 return 5;
634 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
635 return 1;
636 case PIPE_VIDEO_PROFILE_VC1_MAIN:
637 return 2;
638 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
639 return 4;
640 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
641 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
642 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
643 return (sscreen->info.family < CHIP_TONGA) ? 41 : 52;
644 case PIPE_VIDEO_PROFILE_HEVC_MAIN:
645 case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
646 return 186;
647 default:
648 return 0;
649 }
650 }
651 case PIPE_VIDEO_CAP_SUPPORTS_CONTIGUOUS_PLANES_MAP:
652 return true;
653 case PIPE_VIDEO_CAP_ROI_CROP_DEC:
654 if (codec == PIPE_VIDEO_FORMAT_JPEG &&
655 (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
656 sscreen->info.vcn_ip_version == VCN_5_0_1))
657 return true;
658 return false;
659 case PIPE_VIDEO_CAP_SKIP_CLEAR_SURFACE:
660 return sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 59;
661 default:
662 return 0;
663 }
664 }
665
si_vid_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint)666 static bool si_vid_is_format_supported(struct pipe_screen *screen, enum pipe_format format,
667 enum pipe_video_profile profile,
668 enum pipe_video_entrypoint entrypoint)
669 {
670 struct si_screen *sscreen = (struct si_screen *)screen;
671
672 if (sscreen->info.ip[AMD_IP_VPE].num_queues && entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) {
673 /* Todo:
674 * Unable to confirm whether it is asking for an input or output type
675 * Have to modify va frontend for solving this problem
676 */
677 /* VPE Supported input type */
678 if ((format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_NV21) || (format == PIPE_FORMAT_P010))
679 return true;
680
681 /* VPE Supported output type */
682 if ((format == PIPE_FORMAT_A8R8G8B8_UNORM) || (format == PIPE_FORMAT_A8B8G8R8_UNORM) || (format == PIPE_FORMAT_R8G8B8A8_UNORM) ||
683 (format == PIPE_FORMAT_B8G8R8A8_UNORM) || (format == PIPE_FORMAT_X8R8G8B8_UNORM) || (format == PIPE_FORMAT_X8B8G8R8_UNORM) ||
684 (format == PIPE_FORMAT_R8G8B8X8_UNORM) || (format == PIPE_FORMAT_B8G8R8X8_UNORM) || (format == PIPE_FORMAT_A2R10G10B10_UNORM) ||
685 (format == PIPE_FORMAT_A2B10G10R10_UNORM) || (format == PIPE_FORMAT_B10G10R10A2_UNORM) || (format == PIPE_FORMAT_R10G10B10A2_UNORM))
686 return true;
687 }
688
689 /* HEVC 10 bit decoding should use P010 instead of NV12 if possible */
690 if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
691 return (format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_P010) ||
692 (format == PIPE_FORMAT_P016);
693
694 /* Vp9 profile 2 supports 10 bit decoding using P016 */
695 if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
696 return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016);
697
698 if (profile == PIPE_VIDEO_PROFILE_AV1_MAIN && entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
699 return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016) ||
700 (format == PIPE_FORMAT_NV12);
701
702 if (profile == PIPE_VIDEO_PROFILE_AV1_PROFILE2 && entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
703 if (sscreen->info.vcn_ip_version < VCN_5_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_0)
704 return false;
705 return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016) ||
706 (format == PIPE_FORMAT_P012) || (format == PIPE_FORMAT_NV12);
707 }
708
709 /* JPEG supports YUV400 and YUV444 */
710 if (profile == PIPE_VIDEO_PROFILE_JPEG_BASELINE) {
711 switch (format) {
712 case PIPE_FORMAT_NV12:
713 case PIPE_FORMAT_YUYV:
714 case PIPE_FORMAT_Y8_400_UNORM:
715 return true;
716 case PIPE_FORMAT_Y8_U8_V8_444_UNORM:
717 case PIPE_FORMAT_Y8_U8_V8_440_UNORM:
718 if (sscreen->info.vcn_ip_version >= VCN_2_0_0)
719 return true;
720 else
721 return false;
722 case PIPE_FORMAT_R8G8B8A8_UNORM:
723 case PIPE_FORMAT_A8R8G8B8_UNORM:
724 case PIPE_FORMAT_R8_G8_B8_UNORM:
725 if (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
726 sscreen->info.vcn_ip_version == VCN_5_0_1)
727 return true;
728 else
729 return false;
730 default:
731 return false;
732 }
733 }
734
735 if ((entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) &&
736 (((profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH) &&
737 (sscreen->info.vcn_ip_version >= VCN_2_0_0)) ||
738 ((profile == PIPE_VIDEO_PROFILE_AV1_MAIN) &&
739 (sscreen->info.vcn_ip_version >= VCN_4_0_0 &&
740 sscreen->info.vcn_ip_version != VCN_4_0_3 &&
741 sscreen->info.vcn_ip_version != VCN_5_0_1))))
742 return (format == PIPE_FORMAT_P010 || format == PIPE_FORMAT_NV12);
743
744
745 /* we can only handle this one with UVD */
746 if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
747 return format == PIPE_FORMAT_NV12;
748
749 return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
750 }
751
si_vid_is_target_buffer_supported(struct pipe_screen * screen,enum pipe_format format,struct pipe_video_buffer * target,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint)752 static bool si_vid_is_target_buffer_supported(struct pipe_screen *screen,
753 enum pipe_format format,
754 struct pipe_video_buffer *target,
755 enum pipe_video_profile profile,
756 enum pipe_video_entrypoint entrypoint)
757 {
758 struct si_screen *sscreen = (struct si_screen *)screen;
759 struct si_texture *tex = (struct si_texture *)((struct vl_video_buffer *)target)->resources[0];
760 const bool is_dcc = tex->surface.meta_offset;
761 const bool is_format_conversion = format != target->buffer_format;
762
763 switch (entrypoint) {
764 case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
765 if (is_dcc || is_format_conversion)
766 return false;
767 break;
768
769 case PIPE_VIDEO_ENTRYPOINT_ENCODE:
770 if (is_dcc)
771 return false;
772
773 /* EFC */
774 if (is_format_conversion) {
775 const bool input_8bit =
776 target->buffer_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
777 target->buffer_format == PIPE_FORMAT_B8G8R8X8_UNORM ||
778 target->buffer_format == PIPE_FORMAT_R8G8B8A8_UNORM ||
779 target->buffer_format == PIPE_FORMAT_R8G8B8X8_UNORM;
780 const bool input_10bit =
781 target->buffer_format == PIPE_FORMAT_B10G10R10A2_UNORM ||
782 target->buffer_format == PIPE_FORMAT_B10G10R10X2_UNORM ||
783 target->buffer_format == PIPE_FORMAT_R10G10B10A2_UNORM ||
784 target->buffer_format == PIPE_FORMAT_R10G10B10X2_UNORM;
785
786 if (sscreen->info.vcn_ip_version < VCN_2_0_0 ||
787 sscreen->info.vcn_ip_version == VCN_2_2_0 ||
788 sscreen->debug_flags & DBG(NO_EFC))
789 return false;
790
791 if (input_8bit && format != PIPE_FORMAT_NV12)
792 return false;
793 if (input_10bit && format != PIPE_FORMAT_NV12 && format != PIPE_FORMAT_P010)
794 return false;
795 }
796 break;
797
798 default:
799 if (is_format_conversion)
800 return false;
801 break;
802 }
803
804 return si_vid_is_format_supported(screen, format, profile, entrypoint);
805 }
806
get_max_threads_per_block(struct si_screen * screen,enum pipe_shader_ir ir_type)807 static unsigned get_max_threads_per_block(struct si_screen *screen, enum pipe_shader_ir ir_type)
808 {
809 if (ir_type == PIPE_SHADER_IR_NATIVE)
810 return 256;
811
812 /* LLVM only supports 1024 threads per block. */
813 return 1024;
814 }
815
si_get_compute_param(struct pipe_screen * screen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)816 static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type,
817 enum pipe_compute_cap param, void *ret)
818 {
819 struct si_screen *sscreen = (struct si_screen *)screen;
820
821 // TODO: select these params by asic
822 switch (param) {
823 case PIPE_COMPUTE_CAP_IR_TARGET: {
824 const char *gpu, *triple;
825
826 triple = "amdgcn-mesa-mesa3d";
827 gpu = ac_get_llvm_processor_name(sscreen->info.family);
828 if (ret) {
829 sprintf(ret, "%s-%s", gpu, triple);
830 }
831 /* +2 for dash and terminating NIL byte */
832 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
833 }
834 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
835 if (ret) {
836 uint64_t *grid_dimension = ret;
837 grid_dimension[0] = 3;
838 }
839 return 1 * sizeof(uint64_t);
840
841 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
842 if (ret) {
843 uint64_t *grid_size = ret;
844 /* Use this size, so that internal counters don't overflow 64 bits. */
845 grid_size[0] = UINT32_MAX;
846 grid_size[1] = UINT16_MAX;
847 grid_size[2] = UINT16_MAX;
848 }
849 return 3 * sizeof(uint64_t);
850
851 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
852 if (ret) {
853 uint64_t *block_size = ret;
854 unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type);
855 block_size[0] = threads_per_block;
856 block_size[1] = threads_per_block;
857 block_size[2] = threads_per_block;
858 }
859 return 3 * sizeof(uint64_t);
860
861 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
862 if (ret) {
863 uint64_t *max_threads_per_block = ret;
864 *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type);
865 }
866 return sizeof(uint64_t);
867 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
868 if (ret) {
869 uint32_t *address_bits = ret;
870 address_bits[0] = 64;
871 }
872 return 1 * sizeof(uint32_t);
873
874 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
875 if (ret) {
876 uint64_t *max_global_size = ret;
877 uint64_t max_mem_alloc_size;
878
879 si_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
880 &max_mem_alloc_size);
881
882 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
883 * 1/4 of the MAX_GLOBAL_SIZE. Since the
884 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
885 * make sure we never report more than
886 * 4 * MAX_MEM_ALLOC_SIZE.
887 */
888 *max_global_size =
889 MIN2(4 * max_mem_alloc_size, sscreen->info.max_heap_size_kb * 1024ull);
890 }
891 return sizeof(uint64_t);
892
893 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
894 if (ret) {
895 uint64_t *max_local_size = ret;
896 /* Value reported by the closed source driver. */
897 if (sscreen->info.gfx_level == GFX6)
898 *max_local_size = 32 * 1024;
899 else
900 *max_local_size = 64 * 1024;
901 }
902 return sizeof(uint64_t);
903
904 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
905 if (ret) {
906 uint64_t *max_input_size = ret;
907 /* Value reported by the closed source driver. */
908 *max_input_size = 1024;
909 }
910 return sizeof(uint64_t);
911
912 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
913 if (ret) {
914 uint64_t *max_mem_alloc_size = ret;
915
916 /* Return 1/4 of the heap size as the maximum because the max size is not practically
917 * allocatable.
918 */
919 *max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
920 }
921 return sizeof(uint64_t);
922
923 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
924 if (ret) {
925 uint32_t *max_clock_frequency = ret;
926 *max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
927 }
928 return sizeof(uint32_t);
929
930 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
931 if (ret) {
932 uint32_t *max_compute_units = ret;
933 *max_compute_units = sscreen->info.num_cu;
934 }
935 return sizeof(uint32_t);
936
937 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
938 if (ret) {
939 uint32_t *images_supported = ret;
940 *images_supported = 0;
941 }
942 return sizeof(uint32_t);
943 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
944 break; /* unused */
945 case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: {
946 if (ret) {
947 uint32_t *max_subgroups = ret;
948 unsigned threads = get_max_threads_per_block(sscreen, ir_type);
949 unsigned subgroup_size;
950
951 if (sscreen->debug_flags & DBG(W64_CS) || sscreen->info.gfx_level < GFX10)
952 subgroup_size = 64;
953 else
954 subgroup_size = 32;
955
956 *max_subgroups = threads / subgroup_size;
957 }
958 return sizeof(uint32_t);
959 }
960 case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
961 if (ret) {
962 uint32_t *subgroup_size = ret;
963 if (sscreen->debug_flags & DBG(W32_CS))
964 *subgroup_size = 32;
965 else if (sscreen->debug_flags & DBG(W64_CS))
966 *subgroup_size = 64;
967 else
968 *subgroup_size = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
969 }
970 return sizeof(uint32_t);
971 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
972 if (ret) {
973 uint64_t *max_variable_threads_per_block = ret;
974 if (ir_type == PIPE_SHADER_IR_NATIVE)
975 *max_variable_threads_per_block = 0;
976 else
977 *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
978 }
979 return sizeof(uint64_t);
980 }
981
982 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
983 return 0;
984 }
985
si_get_timestamp(struct pipe_screen * screen)986 static uint64_t si_get_timestamp(struct pipe_screen *screen)
987 {
988 struct si_screen *sscreen = (struct si_screen *)screen;
989
990 return 1000000 * sscreen->ws->query_value(sscreen->ws, RADEON_TIMESTAMP) /
991 sscreen->info.clock_crystal_freq;
992 }
993
si_query_memory_info(struct pipe_screen * screen,struct pipe_memory_info * info)994 static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info)
995 {
996 struct si_screen *sscreen = (struct si_screen *)screen;
997 struct radeon_winsys *ws = sscreen->ws;
998 unsigned vram_usage, gtt_usage;
999
1000 info->total_device_memory = sscreen->info.vram_size_kb;
1001 info->total_staging_memory = sscreen->info.gart_size_kb;
1002
1003 /* The real TTM memory usage is somewhat random, because:
1004 *
1005 * 1) TTM delays freeing memory, because it can only free it after
1006 * fences expire.
1007 *
1008 * 2) The memory usage can be really low if big VRAM evictions are
1009 * taking place, but the real usage is well above the size of VRAM.
1010 *
1011 * Instead, return statistics of this process.
1012 */
1013 vram_usage = ws->query_value(ws, RADEON_VRAM_USAGE) / 1024;
1014 gtt_usage = ws->query_value(ws, RADEON_GTT_USAGE) / 1024;
1015
1016 info->avail_device_memory =
1017 vram_usage <= info->total_device_memory ? info->total_device_memory - vram_usage : 0;
1018 info->avail_staging_memory =
1019 gtt_usage <= info->total_staging_memory ? info->total_staging_memory - gtt_usage : 0;
1020
1021 info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
1022
1023 if (sscreen->info.is_amdgpu)
1024 info->nr_device_memory_evictions = ws->query_value(ws, RADEON_NUM_EVICTIONS);
1025 else
1026 /* Just return the number of evicted 64KB pages. */
1027 info->nr_device_memory_evictions = info->device_memory_evicted / 64;
1028 }
1029
si_get_disk_shader_cache(struct pipe_screen * pscreen)1030 static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
1031 {
1032 struct si_screen *sscreen = (struct si_screen *)pscreen;
1033
1034 return sscreen->disk_shader_cache;
1035 }
1036
si_init_renderer_string(struct si_screen * sscreen)1037 static void si_init_renderer_string(struct si_screen *sscreen)
1038 {
1039 char first_name[256], second_name[32] = {}, kernel_version[128] = {};
1040 struct utsname uname_data;
1041
1042 snprintf(first_name, sizeof(first_name), "%s",
1043 sscreen->info.marketing_name ? sscreen->info.marketing_name : sscreen->info.name);
1044 snprintf(second_name, sizeof(second_name), "%s, ", sscreen->info.lowercase_name);
1045
1046 if (uname(&uname_data) == 0)
1047 snprintf(kernel_version, sizeof(kernel_version), ", %s", uname_data.release);
1048
1049 const char *compiler_name =
1050 #if AMD_LLVM_AVAILABLE
1051 !sscreen->use_aco ? "LLVM " MESA_LLVM_VERSION_STRING :
1052 #endif
1053 "ACO";
1054
1055 snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string),
1056 "%s (radeonsi, %s%s, DRM %i.%i%s)", first_name, second_name, compiler_name,
1057 sscreen->info.drm_major, sscreen->info.drm_minor, kernel_version);
1058 }
1059
si_get_screen_fd(struct pipe_screen * screen)1060 static int si_get_screen_fd(struct pipe_screen *screen)
1061 {
1062 struct si_screen *sscreen = (struct si_screen *)screen;
1063 struct radeon_winsys *ws = sscreen->ws;
1064
1065 return ws->get_fd(ws);
1066 }
1067
si_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1068 static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1069 {
1070 unsigned num_profiles = si_get_num_shader_profiles();
1071
1072 for (unsigned i = 0; i < num_profiles; i++) {
1073 if (_mesa_printed_blake3_equal(consumer->info.source_blake3, si_shader_profiles[i].blake3)) {
1074 if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS)
1075 return 0; /* only propagate constants */
1076 break;
1077 }
1078 }
1079
1080 return ac_nir_varying_expression_max_cost(producer, consumer);
1081 }
1082
1083
1084 static void
si_driver_thread_add_job(struct pipe_screen * screen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)1085 si_driver_thread_add_job(struct pipe_screen *screen, void *data,
1086 struct util_queue_fence *fence,
1087 pipe_driver_thread_func execute,
1088 pipe_driver_thread_func cleanup,
1089 const size_t job_size)
1090 {
1091 struct si_screen *sscreen = (struct si_screen *)screen;
1092 util_queue_add_job(&sscreen->shader_compiler_queue, data, fence, execute, cleanup, job_size);
1093 }
1094
1095
si_init_screen_get_functions(struct si_screen * sscreen)1096 void si_init_screen_get_functions(struct si_screen *sscreen)
1097 {
1098 sscreen->b.get_name = si_get_name;
1099 sscreen->b.get_vendor = si_get_vendor;
1100 sscreen->b.get_device_vendor = si_get_device_vendor;
1101 sscreen->b.get_screen_fd = si_get_screen_fd;
1102 sscreen->b.is_compute_copy_faster = si_is_compute_copy_faster;
1103 sscreen->b.driver_thread_add_job = si_driver_thread_add_job;
1104 sscreen->b.get_compute_param = si_get_compute_param;
1105 sscreen->b.get_timestamp = si_get_timestamp;
1106 sscreen->b.get_shader_param = si_get_shader_param;
1107 sscreen->b.get_compiler_options = si_get_compiler_options;
1108 sscreen->b.get_device_uuid = si_get_device_uuid;
1109 sscreen->b.get_driver_uuid = si_get_driver_uuid;
1110 sscreen->b.query_memory_info = si_query_memory_info;
1111 sscreen->b.get_disk_shader_cache = si_get_disk_shader_cache;
1112
1113 if (sscreen->info.ip[AMD_IP_UVD].num_queues ||
1114 ((sscreen->info.vcn_ip_version >= VCN_4_0_0) ?
1115 sscreen->info.ip[AMD_IP_VCN_UNIFIED].num_queues : sscreen->info.ip[AMD_IP_VCN_DEC].num_queues) ||
1116 sscreen->info.ip[AMD_IP_VCN_JPEG].num_queues || sscreen->info.ip[AMD_IP_VCE].num_queues ||
1117 sscreen->info.ip[AMD_IP_UVD_ENC].num_queues || sscreen->info.ip[AMD_IP_VCN_ENC].num_queues ||
1118 sscreen->info.ip[AMD_IP_VPE].num_queues) {
1119 sscreen->b.get_video_param = si_get_video_param;
1120 sscreen->b.is_video_format_supported = si_vid_is_format_supported;
1121 sscreen->b.is_video_target_buffer_supported = si_vid_is_target_buffer_supported;
1122 } else {
1123 sscreen->b.get_video_param = si_get_video_param_no_video_hw;
1124 sscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
1125 }
1126
1127 si_init_renderer_string(sscreen);
1128
1129 /* |---------------------------------- Performance & Availability --------------------------------|
1130 * |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
1131 * Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32 |PK_FMAC_F16|F16,F32,F64
1132 * ------------------------------------------------------------------------------------------------------------------
1133 * gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - | - , - | - ,MAD,FMA
1134 * gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - | - , - |MAD,MAD,FMA
1135 * gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - | 2 , - |FMA,MAD,FMA
1136 * gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - | 2 , 2 |FMA,MAD,FMA
1137 * gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 | 2 , 2 | all FMA
1138 * gfx11 | - , - , - | - , - | - , - | 2 , 2 ,1/16| 2 , 2 | 2 | 2 , 2 | all FMA
1139 *
1140 * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
1141 * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
1142 * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
1143 *
1144 * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
1145 * gfx9 and newer prefer FMA for F16 because of the packed instruction.
1146 * gfx10 and older prefer MAD for F32 because of the legacy instruction.
1147 */
1148 bool use_fma32 =
1149 sscreen->info.gfx_level >= GFX10_3 ||
1150 (sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) ||
1151 /* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */
1152 (sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);
1153 bool has_mediump = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16;
1154
1155 nir_shader_compiler_options *options = sscreen->nir_options;
1156 ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options);
1157
1158 options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
1159 options->lower_ffma32 = !use_fma32;
1160 options->lower_ffma64 = false;
1161 options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9;
1162 options->fuse_ffma32 = use_fma32;
1163 options->fuse_ffma64 = true;
1164 options->lower_uniforms_to_ubo = true;
1165 options->lower_to_scalar = true;
1166 options->lower_to_scalar_filter =
1167 sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
1168 options->max_unroll_iterations = 128;
1169 options->max_unroll_iterations_aggressive = 128;
1170 /* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
1171 * but if we use it, all f32->f16 conversions have to round towards zero,
1172 * because both scalar and vec2 down-conversions have to round equally.
1173 *
1174 * For OpenCL, rounding mode is explicit. This will only lower f2f16 to f2f16_rtz
1175 * when execution mode is rtz instead of rtne.
1176 */
1177 options->force_f2f16_rtz = true;
1178 options->io_options |= (!has_mediump ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics;
1179 options->lower_mediump_io = has_mediump ? si_lower_mediump_io : NULL;
1180 /* HW supports indirect indexing for: | Enabled in driver
1181 * -------------------------------------------------------
1182 * TCS inputs | Yes
1183 * TES inputs | Yes
1184 * GS inputs | No
1185 * -------------------------------------------------------
1186 * VS outputs before TCS | No
1187 * TCS outputs | Yes
1188 * VS/TES outputs before GS | No
1189 */
1190 options->support_indirect_inputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
1191 BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
1192 options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1193 options->varying_expression_max_cost = si_varying_expression_max_cost;
1194 }
1195
si_init_screen_caps(struct si_screen * sscreen)1196 void si_init_screen_caps(struct si_screen *sscreen)
1197 {
1198 struct pipe_caps *caps = (struct pipe_caps *)&sscreen->b.caps;
1199
1200 u_init_pipe_screen_caps(&sscreen->b, 1);
1201
1202 /* Gfx8 (Polaris11) hangs, so don't enable this on Gfx8 and older chips. */
1203 bool enable_sparse =
1204 sscreen->info.gfx_level >= GFX9 && sscreen->info.gfx_level < GFX12 &&
1205 sscreen->info.has_sparse_vm_mappings;
1206
1207 /* Supported features (boolean caps). */
1208 caps->max_dual_source_render_targets = true;
1209 caps->anisotropic_filter = true;
1210 caps->occlusion_query = true;
1211 caps->texture_mirror_clamp = true;
1212 caps->texture_shadow_lod = true;
1213 caps->texture_mirror_clamp_to_edge = true;
1214 caps->blend_equation_separate = true;
1215 caps->texture_swizzle = true;
1216 caps->depth_clip_disable = true;
1217 caps->depth_clip_disable_separate = true;
1218 caps->shader_stencil_export = true;
1219 caps->vertex_element_instance_divisor = true;
1220 caps->fs_coord_origin_upper_left = true;
1221 caps->fs_coord_pixel_center_half_integer = true;
1222 caps->fs_coord_pixel_center_integer = true;
1223 caps->fragment_shader_texture_lod = true;
1224 caps->fragment_shader_derivatives = true;
1225 caps->primitive_restart = true;
1226 caps->primitive_restart_fixed_index = true;
1227 caps->conditional_render = true;
1228 caps->texture_barrier = true;
1229 caps->indep_blend_enable = true;
1230 caps->indep_blend_func = true;
1231 caps->vertex_color_unclamped = true;
1232 caps->start_instance = true;
1233 caps->npot_textures = true;
1234 caps->mixed_framebuffer_sizes = true;
1235 caps->mixed_color_depth_bits = true;
1236 caps->vertex_color_clamped = true;
1237 caps->fragment_color_clamped = true;
1238 caps->vs_instanceid = true;
1239 caps->compute = true;
1240 caps->texture_buffer_objects = true;
1241 caps->vs_layer_viewport = true;
1242 caps->query_pipeline_statistics = true;
1243 caps->sample_shading = true;
1244 caps->draw_indirect = true;
1245 caps->clip_halfz = true;
1246 caps->vs_window_space_position = true;
1247 caps->polygon_offset_clamp = true;
1248 caps->multisample_z_resolve = true;
1249 caps->quads_follow_provoking_vertex_convention = true;
1250 caps->tgsi_texcoord = true;
1251 caps->fs_fine_derivative = true;
1252 caps->conditional_render_inverted = true;
1253 caps->texture_float_linear = true;
1254 caps->texture_half_float_linear = true;
1255 caps->depth_bounds_test = true;
1256 caps->sampler_view_target = true;
1257 caps->texture_query_lod = true;
1258 caps->texture_gather_sm5 = true;
1259 caps->texture_query_samples = true;
1260 caps->force_persample_interp = true;
1261 caps->copy_between_compressed_and_plain_formats = true;
1262 caps->fs_position_is_sysval = true;
1263 caps->fs_face_is_integer_sysval = true;
1264 caps->invalidate_buffer = true;
1265 caps->surface_reinterpret_blocks = true;
1266 caps->query_buffer_object = true;
1267 caps->query_memory_info = true;
1268 caps->shader_pack_half_float = true;
1269 caps->framebuffer_no_attachment = true;
1270 caps->robust_buffer_access_behavior = true;
1271 caps->polygon_offset_units_unscaled = true;
1272 caps->string_marker = true;
1273 caps->cull_distance = true;
1274 caps->shader_array_components = true;
1275 caps->stream_output_pause_resume = true;
1276 caps->stream_output_interleave_buffers = true;
1277 caps->doubles = true;
1278 caps->tgsi_tex_txf_lz = true;
1279 caps->tes_layer_viewport = true;
1280 caps->bindless_texture = true;
1281 caps->query_timestamp = true;
1282 caps->query_time_elapsed = true;
1283 caps->nir_samplers_as_deref = true;
1284 caps->memobj = true;
1285 caps->load_constbuf = true;
1286 caps->int64 = true;
1287 caps->shader_clock = true;
1288 caps->can_bind_const_buffer_as_vertex = true;
1289 caps->allow_mapped_buffers_during_execution = true;
1290 caps->signed_vertex_buffer_offset = true;
1291 caps->shader_ballot = true;
1292 caps->shader_group_vote = true;
1293 caps->compute_grid_info_last_block = true;
1294 caps->image_load_formatted = true;
1295 caps->prefer_compute_for_multimedia = true;
1296 caps->tgsi_div = true;
1297 caps->packed_uniforms = true;
1298 caps->gl_spirv = true;
1299 caps->alpha_to_coverage_dither_control = true;
1300 caps->map_unsynchronized_thread_safe = true;
1301 caps->no_clip_on_copy_tex = true;
1302 caps->shader_atomic_int64 = true;
1303 caps->frontend_noop = true;
1304 caps->demote_to_helper_invocation = true;
1305 caps->prefer_real_buffer_in_constbuf0 = true;
1306 caps->compute_shader_derivatives = true;
1307 caps->image_atomic_inc_wrap = true;
1308 caps->image_store_formatted = true;
1309 caps->allow_draw_out_of_order = true;
1310 caps->query_so_overflow = true;
1311 caps->glsl_tess_levels_as_inputs = true;
1312 caps->device_reset_status_query = true;
1313 caps->texture_multisample = true;
1314 caps->allow_glthread_buffer_subdata_opt = true; /* TODO: remove if it's slow */
1315 caps->null_textures = true;
1316 caps->has_const_bw = true;
1317 caps->cl_gl_sharing = true;
1318 caps->call_finalize_nir_in_linker = true;
1319
1320 caps->fbfetch = 1;
1321
1322 /* Tahiti and Verde only: reduction mode is unsupported due to a bug
1323 * (it might work sometimes, but that's not enough)
1324 */
1325 caps->sampler_reduction_minmax =
1326 caps->sampler_reduction_minmax_arb =
1327 !(sscreen->info.family == CHIP_TAHITI || sscreen->info.family == CHIP_VERDE);
1328
1329 caps->texture_transfer_modes =
1330 PIPE_TEXTURE_TRANSFER_BLIT | PIPE_TEXTURE_TRANSFER_COMPUTE;
1331
1332 caps->draw_vertex_state = !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST));
1333
1334 caps->shader_samples_identical =
1335 sscreen->info.gfx_level < GFX11 && !(sscreen->debug_flags & DBG(NO_FMASK));
1336
1337 caps->glsl_zero_init = 2;
1338
1339 caps->generate_mipmap =
1340 caps->seamless_cube_map =
1341 caps->seamless_cube_map_per_texture =
1342 caps->cube_map_array =
1343 sscreen->info.has_3d_cube_border_color_mipmap;
1344
1345 caps->post_depth_coverage = sscreen->info.gfx_level >= GFX10;
1346
1347 caps->graphics = sscreen->info.has_graphics;
1348
1349 caps->resource_from_user_memory = !UTIL_ARCH_BIG_ENDIAN && sscreen->info.has_userptr;
1350
1351 caps->device_protected_surface = sscreen->info.has_tmz_support;
1352
1353 caps->min_map_buffer_alignment = SI_MAP_BUFFER_ALIGNMENT;
1354
1355 caps->max_vertex_buffers = SI_MAX_ATTRIBS;
1356
1357 caps->constant_buffer_offset_alignment =
1358 caps->texture_buffer_offset_alignment =
1359 caps->max_texture_gather_components =
1360 caps->max_stream_output_buffers =
1361 caps->max_vertex_streams =
1362 caps->shader_buffer_offset_alignment =
1363 caps->max_window_rectangles = 4;
1364
1365 caps->glsl_feature_level =
1366 caps->glsl_feature_level_compatibility = 460;
1367
1368 /* Optimal number for good TexSubImage performance on Polaris10. */
1369 caps->max_texture_upload_memory_budget = 64 * 1024 * 1024;
1370
1371 caps->gl_begin_end_buffer_size = 4096 * 1024;
1372
1373 /* Return 1/4th of the heap size as the maximum because the max size is not practically
1374 * allocatable. Also, this can only return UINT32_MAX at most.
1375 */
1376 unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
1377
1378 /* Allow max 512 MB to pass CTS with a 32-bit build. */
1379 if (sizeof(void*) == 4)
1380 max_size = MIN2(max_size, 512 * 1024 * 1024);
1381
1382 caps->max_constant_buffer_size =
1383 caps->max_shader_buffer_size = max_size;
1384
1385 unsigned max_texels = caps->max_shader_buffer_size;
1386
1387 /* FYI, BUF_RSRC_WORD2.NUM_RECORDS field limit is UINT32_MAX. */
1388
1389 /* Gfx8 and older use the size in bytes for bounds checking, and the max element size
1390 * is 16B. Gfx9 and newer use the VGPR index for bounds checking.
1391 */
1392 if (sscreen->info.gfx_level <= GFX8)
1393 max_texels = MIN2(max_texels, UINT32_MAX / 16);
1394 else
1395 /* Gallium has a limitation that it can only bind UINT32_MAX bytes, not texels.
1396 * TODO: Remove this after the gallium interface is changed. */
1397 max_texels = MIN2(max_texels, UINT32_MAX / 16);
1398
1399 caps->max_texel_buffer_elements = max_texels;
1400
1401 /* Allow 1/4th of the heap size. */
1402 caps->max_texture_mb = sscreen->info.max_heap_size_kb / 1024 / 4;
1403
1404 caps->prefer_back_buffer_reuse = false;
1405 caps->uma = false;
1406 caps->prefer_imm_arrays_as_constbuf = false;
1407
1408 caps->performance_monitor =
1409 sscreen->info.gfx_level >= GFX7 && sscreen->info.gfx_level <= GFX10_3;
1410
1411 caps->sparse_buffer_page_size = enable_sparse ? RADEON_SPARSE_PAGE_SIZE : 0;
1412
1413 caps->context_priority_mask = sscreen->info.is_amdgpu ?
1414 PIPE_CONTEXT_PRIORITY_LOW | PIPE_CONTEXT_PRIORITY_MEDIUM | PIPE_CONTEXT_PRIORITY_HIGH : 0;
1415
1416 caps->fence_signal = sscreen->info.has_syncobj;
1417
1418 caps->constbuf0_flags = SI_RESOURCE_FLAG_32BIT;
1419
1420 caps->native_fence_fd = sscreen->info.has_fence_to_handle;
1421
1422 caps->draw_parameters =
1423 caps->multi_draw_indirect =
1424 caps->multi_draw_indirect_params = sscreen->has_draw_indirect_multi;
1425
1426 caps->max_shader_patch_varyings = 30;
1427
1428 caps->max_varyings =
1429 caps->max_gs_invocations = 32;
1430
1431 caps->texture_border_color_quirk =
1432 sscreen->info.gfx_level <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
1433
1434 /* Stream output. */
1435 caps->max_stream_output_separate_components =
1436 caps->max_stream_output_interleaved_components = 32 * 4;
1437
1438 /* gfx9 has to report 256 to make piglit/gs-max-output pass.
1439 * gfx8 and earlier can do 1024.
1440 */
1441 caps->max_geometry_output_vertices = 256;
1442 caps->max_geometry_total_output_components = 4095;
1443
1444 caps->max_vertex_attrib_stride = 2048;
1445
1446 /* TODO: Gfx12 supports 64K textures, but Gallium can't represent them at the moment. */
1447 caps->max_texture_2d_size = sscreen->info.gfx_level >= GFX12 ? 32768 : 16384;
1448 caps->max_texture_cube_levels = sscreen->info.has_3d_cube_border_color_mipmap ?
1449 (sscreen->info.gfx_level >= GFX12 ? 16 : 15) /* 32K : 16K */ : 0;
1450 caps->max_texture_3d_levels = sscreen->info.has_3d_cube_border_color_mipmap ?
1451 /* This is limited by maximums that both the texture unit and layered rendering support. */
1452 (sscreen->info.gfx_level >= GFX12 ? 15 : /* 16K */
1453 (sscreen->info.gfx_level >= GFX10 ? 14 : 12)) /* 8K : 2K */ : 0;
1454 /* This is limited by maximums that both the texture unit and layered rendering support. */
1455 caps->max_texture_array_layers = sscreen->info.gfx_level >= GFX10 ? 8192 : 2048;
1456
1457 /* Sparse texture */
1458 caps->max_sparse_texture_size = enable_sparse ? caps->max_texture_2d_size : 0;
1459 caps->max_sparse_3d_texture_size = enable_sparse ? (1 << (caps->max_texture_3d_levels - 1)) : 0;
1460 caps->max_sparse_array_texture_layers = enable_sparse ? caps->max_texture_array_layers : 0;
1461 caps->sparse_texture_full_array_cube_mipmaps =
1462 caps->query_sparse_texture_residency =
1463 caps->clamp_sparse_texture_lod = enable_sparse;
1464
1465 /* Viewports and render targets. */
1466 caps->max_viewports = SI_MAX_VIEWPORTS;
1467 caps->viewport_subpixel_bits =
1468 caps->rasterizer_subpixel_bits =
1469 caps->max_render_targets = 8;
1470 caps->framebuffer_msaa_constraints = sscreen->info.has_eqaa_surface_allocator ? 2 : 0;
1471
1472 caps->min_texture_gather_offset =
1473 caps->min_texel_offset = -32;
1474
1475 caps->max_texture_gather_offset =
1476 caps->max_texel_offset = 31;
1477
1478 caps->endianness = PIPE_ENDIAN_LITTLE;
1479
1480 caps->vendor_id = ATI_VENDOR_ID;
1481 caps->device_id = sscreen->info.pci_id;
1482 caps->video_memory = sscreen->info.vram_size_kb >> 10;
1483 caps->pci_group = sscreen->info.pci.domain;
1484 caps->pci_bus = sscreen->info.pci.bus;
1485 caps->pci_device = sscreen->info.pci.dev;
1486 caps->pci_function = sscreen->info.pci.func;
1487
1488 /* Conversion to nanos from cycles per millisecond */
1489 caps->timer_resolution = DIV_ROUND_UP(1000000, sscreen->info.clock_crystal_freq);
1490
1491 caps->shader_subgroup_size = 64;
1492 caps->shader_subgroup_supported_stages = BITFIELD_MASK(PIPE_SHADER_TYPES);
1493 caps->shader_subgroup_supported_features = BITFIELD_MASK(PIPE_SHADER_SUBGROUP_NUM_FEATURES);
1494 caps->shader_subgroup_quad_all_stages = true;
1495
1496 caps->min_line_width =
1497 caps->min_line_width_aa = 1; /* due to axis-aligned end caps at line width 1 */
1498
1499 caps->min_point_size =
1500 caps->min_point_size_aa =
1501 caps->point_size_granularity =
1502 caps->line_width_granularity = 1.0 / 8.0; /* due to the register field precision */
1503
1504 /* This depends on the quant mode, though the precise interactions are unknown. */
1505 caps->max_line_width =
1506 caps->max_line_width_aa = 2048;
1507
1508 caps->max_point_size =
1509 caps->max_point_size_aa = SI_MAX_POINT_SIZE;
1510
1511 caps->max_texture_anisotropy = 16.0f;
1512
1513 /* The hw can do 31, but this test fails if we use that:
1514 * KHR-GL46.texture_lod_bias.texture_lod_bias_all
1515 */
1516 caps->max_texture_lod_bias = 16;
1517 }
1518