• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "compiler/nir/nir.h"
8 #include "ac_nir.h"
9 #include "ac_shader_util.h"
10 #include "radeon_uvd_enc.h"
11 #include "radeon_vce.h"
12 #include "radeon_video.h"
13 #include "si_pipe.h"
14 #include "util/u_cpu_detect.h"
15 #include "util/u_screen.h"
16 #include "util/u_video.h"
17 #include "vl/vl_decoder.h"
18 #include "vl/vl_video_buffer.h"
19 #include <sys/utsname.h>
20 
21 /* The capabilities reported by the kernel has priority
22    over the existing logic in si_get_video_param */
23 #define QUERYABLE_KERNEL   (sscreen->info.is_amdgpu && \
24    !!(sscreen->info.drm_minor >= 41))
25 #define KERNEL_DEC_CAP(codec, attrib)    \
26    (codec > PIPE_VIDEO_FORMAT_UNKNOWN && codec <= PIPE_VIDEO_FORMAT_AV1) ? \
27    (sscreen->info.dec_caps.codec_info[codec - 1].valid ? \
28     sscreen->info.dec_caps.codec_info[codec - 1].attrib : 0) : 0
29 #define KERNEL_ENC_CAP(codec, attrib)    \
30    (codec > PIPE_VIDEO_FORMAT_UNKNOWN && codec <= PIPE_VIDEO_FORMAT_AV1) ? \
31    (sscreen->info.enc_caps.codec_info[codec - 1].valid ? \
32     sscreen->info.enc_caps.codec_info[codec - 1].attrib : 0) : 0
33 
si_get_vendor(struct pipe_screen * pscreen)34 static const char *si_get_vendor(struct pipe_screen *pscreen)
35 {
36    return "AMD";
37 }
38 
si_get_device_vendor(struct pipe_screen * pscreen)39 static const char *si_get_device_vendor(struct pipe_screen *pscreen)
40 {
41    return "AMD";
42 }
43 
44 static bool
si_is_compute_copy_faster(struct pipe_screen * pscreen,enum pipe_format src_format,enum pipe_format dst_format,unsigned width,unsigned height,unsigned depth,bool cpu)45 si_is_compute_copy_faster(struct pipe_screen *pscreen,
46                           enum pipe_format src_format,
47                           enum pipe_format dst_format,
48                           unsigned width,
49                           unsigned height,
50                           unsigned depth,
51                           bool cpu)
52 {
53    if (cpu)
54       /* very basic for now */
55       return width * height * depth > 64 * 64;
56    return false;
57 }
58 
si_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)59 static int si_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
60                                enum pipe_shader_cap param)
61 {
62    struct si_screen *sscreen = (struct si_screen *)pscreen;
63 
64    if (shader == PIPE_SHADER_MESH ||
65        shader == PIPE_SHADER_TASK)
66       return 0;
67 
68    switch (param) {
69    /* Shader limits. */
70    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
71    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
72    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
73    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
74    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
75       return 16384;
76    case PIPE_SHADER_CAP_MAX_INPUTS:
77       return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
78    case PIPE_SHADER_CAP_MAX_OUTPUTS:
79       return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
80    case PIPE_SHADER_CAP_MAX_TEMPS:
81       return 256; /* Max native temporaries. */
82    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
83       return 1 << 26; /* 64 MB */
84    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
85       return SI_NUM_CONST_BUFFERS;
86    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
87    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
88       return SI_NUM_SAMPLERS;
89    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
90       return SI_NUM_SHADER_BUFFERS;
91    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
92       return SI_NUM_IMAGES;
93 
94    case PIPE_SHADER_CAP_SUPPORTED_IRS:
95       if (shader == PIPE_SHADER_COMPUTE) {
96          return (1 << PIPE_SHADER_IR_NATIVE) |
97                 (1 << PIPE_SHADER_IR_NIR) |
98                 (1 << PIPE_SHADER_IR_TGSI);
99       }
100       return (1 << PIPE_SHADER_IR_TGSI) |
101              (1 << PIPE_SHADER_IR_NIR);
102 
103    /* Supported boolean features. */
104    case PIPE_SHADER_CAP_CONT_SUPPORTED:
105    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
106    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
107    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
108    case PIPE_SHADER_CAP_INTEGERS:
109    case PIPE_SHADER_CAP_INT64_ATOMICS:
110    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
111       return 1;
112 
113    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
114       /* We need f16c for fast FP16 conversions in glUniform. */
115       if (!util_get_cpu_caps()->has_f16c)
116          return 0;
117       FALLTHROUGH;
118    case PIPE_SHADER_CAP_FP16:
119    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
120    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
121    case PIPE_SHADER_CAP_INT16:
122       return sscreen->nir_options->lower_mediump_io != NULL;
123 
124    /* Unsupported boolean features. */
125    case PIPE_SHADER_CAP_SUBROUTINES:
126    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
127    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
128       return 0;
129    }
130    return 0;
131 }
132 
si_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)133 static const void *si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir,
134                                            enum pipe_shader_type shader)
135 {
136    struct si_screen *sscreen = (struct si_screen *)screen;
137 
138    assert(ir == PIPE_SHADER_IR_NIR);
139    return sscreen->nir_options;
140 }
141 
si_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)142 static void si_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
143 {
144    ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
145 }
146 
si_get_device_uuid(struct pipe_screen * pscreen,char * uuid)147 static void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
148 {
149    struct si_screen *sscreen = (struct si_screen *)pscreen;
150 
151    ac_compute_device_uuid(&sscreen->info, uuid, PIPE_UUID_SIZE);
152 }
153 
si_get_name(struct pipe_screen * pscreen)154 static const char *si_get_name(struct pipe_screen *pscreen)
155 {
156    struct si_screen *sscreen = (struct si_screen *)pscreen;
157 
158    return sscreen->renderer_string;
159 }
160 
si_get_video_param_no_video_hw(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)161 static int si_get_video_param_no_video_hw(struct pipe_screen *screen, enum pipe_video_profile profile,
162                                           enum pipe_video_entrypoint entrypoint,
163                                           enum pipe_video_cap param)
164 {
165    switch (param) {
166    case PIPE_VIDEO_CAP_SUPPORTED:
167       return vl_profile_supported(screen, profile, entrypoint);
168    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
169       return 1;
170    case PIPE_VIDEO_CAP_MAX_WIDTH:
171    case PIPE_VIDEO_CAP_MAX_HEIGHT:
172       return vl_video_buffer_max_size(screen);
173    case PIPE_VIDEO_CAP_PREFERED_FORMAT:
174       return PIPE_FORMAT_NV12;
175    case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
176       return false;
177    case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
178       return false;
179    case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
180       return true;
181    case PIPE_VIDEO_CAP_MAX_LEVEL:
182       return vl_level_supported(screen, profile);
183    default:
184       return 0;
185    }
186 }
187 
si_get_video_param(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)188 static int si_get_video_param(struct pipe_screen *screen, enum pipe_video_profile profile,
189                               enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param)
190 {
191    struct si_screen *sscreen = (struct si_screen *)screen;
192    enum pipe_video_format codec = u_reduce_video_profile(profile);
193    bool fully_supported_profile = ((profile >= PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) &&
194                                    (profile <= PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)) ||
195                                   (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN) ||
196                                   (profile == PIPE_VIDEO_PROFILE_AV1_MAIN);
197 
198    /* Return the capability of Video Post Processor.
199     * Have to determine the HW version of VPE.
200     * Have to check the HW limitation and
201     * Check if the VPE exists and is valid
202     */
203    if (sscreen->info.ip[AMD_IP_VPE].num_queues && entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) {
204 
205       switch(param) {
206       case PIPE_VIDEO_CAP_SUPPORTED:
207          return true;
208       case PIPE_VIDEO_CAP_MAX_WIDTH:
209          return 10240;
210       case PIPE_VIDEO_CAP_MAX_HEIGHT:
211          return 10240;
212       case PIPE_VIDEO_CAP_VPP_MAX_INPUT_WIDTH:
213          return 10240;
214       case PIPE_VIDEO_CAP_VPP_MAX_INPUT_HEIGHT:
215          return 10240;
216       case PIPE_VIDEO_CAP_VPP_MIN_INPUT_WIDTH:
217          return 16;
218       case PIPE_VIDEO_CAP_VPP_MIN_INPUT_HEIGHT:
219          return 16;
220       case PIPE_VIDEO_CAP_VPP_MAX_OUTPUT_WIDTH:
221          return 10240;
222       case PIPE_VIDEO_CAP_VPP_MAX_OUTPUT_HEIGHT:
223          return 10240;
224       case PIPE_VIDEO_CAP_VPP_MIN_OUTPUT_WIDTH:
225          return 16;
226       case PIPE_VIDEO_CAP_VPP_MIN_OUTPUT_HEIGHT:
227          return 16;
228       case PIPE_VIDEO_CAP_VPP_ORIENTATION_MODES:
229          /* VPE 1st generation does not support orientation
230           * Have to determine the version and features of VPE in future.
231           */
232          return PIPE_VIDEO_VPP_ORIENTATION_DEFAULT;
233       case PIPE_VIDEO_CAP_VPP_BLEND_MODES:
234          /* VPE 1st generation does not support blending.
235           * Have to determine the version and features of VPE in future.
236           */
237          return PIPE_VIDEO_VPP_BLEND_MODE_NONE;
238       case PIPE_VIDEO_CAP_PREFERED_FORMAT:
239          return PIPE_FORMAT_NV12;
240       case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
241          return false;
242       case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
243          return true;
244       case PIPE_VIDEO_CAP_REQUIRES_FLUSH_ON_END_FRAME:
245          /* true: VPP flush function will be called within vaEndPicture() */
246          /* false: VPP flush function will be skipped */
247          return false;
248       case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
249          /* for VPE we prefer non-interlaced buffer */
250          return false;
251       default:
252          return 0;
253       }
254    }
255 
256    if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
257       if (!(sscreen->info.ip[AMD_IP_VCE].num_queues ||
258             sscreen->info.ip[AMD_IP_UVD_ENC].num_queues ||
259             sscreen->info.ip[AMD_IP_VCN_ENC].num_queues))
260          return 0;
261 
262       if (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
263 	  sscreen->info.vcn_ip_version == VCN_5_0_1)
264 	 return 0;
265 
266       switch (param) {
267       case PIPE_VIDEO_CAP_SUPPORTED:
268          return (
269              /* in case it is explicitly marked as not supported by the kernel */
270             ((QUERYABLE_KERNEL && fully_supported_profile) ? KERNEL_ENC_CAP(codec, valid) : 1) &&
271             ((codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && profile != PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10 &&
272              (sscreen->info.vcn_ip_version >= VCN_1_0_0 || si_vce_is_fw_version_supported(sscreen))) ||
273             (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
274              (sscreen->info.vcn_ip_version >= VCN_1_0_0 || si_radeon_uvd_enc_supported(sscreen))) ||
275             (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10 && sscreen->info.vcn_ip_version >= VCN_2_0_0) ||
276             (profile == PIPE_VIDEO_PROFILE_AV1_MAIN &&
277 	     (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3))));
278       case PIPE_VIDEO_CAP_NPOT_TEXTURES:
279          return 1;
280       case PIPE_VIDEO_CAP_MIN_WIDTH:
281          if (sscreen->info.vcn_ip_version >= VCN_5_0_0) {
282             if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC)
283                return 96;
284             else if (codec == PIPE_VIDEO_FORMAT_HEVC)
285                return 384;
286             else if (codec == PIPE_VIDEO_FORMAT_AV1)
287                return 320;
288          }
289          return (codec == PIPE_VIDEO_FORMAT_HEVC) ? 130 : 128;
290       case PIPE_VIDEO_CAP_MIN_HEIGHT:
291          if (sscreen->info.vcn_ip_version >= VCN_5_0_0 && codec == PIPE_VIDEO_FORMAT_MPEG4_AVC)
292             return 32;
293          return 128;
294       case PIPE_VIDEO_CAP_MAX_WIDTH:
295          if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
296             return KERNEL_ENC_CAP(codec, max_width);
297          else
298             return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
299       case PIPE_VIDEO_CAP_MAX_HEIGHT:
300          if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
301             return KERNEL_ENC_CAP(codec, max_height);
302          else
303             return (sscreen->info.family < CHIP_TONGA) ? 1152 : 2304;
304       case PIPE_VIDEO_CAP_PREFERED_FORMAT:
305          if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
306             return PIPE_FORMAT_P010;
307          else
308             return PIPE_FORMAT_NV12;
309       case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
310          return false;
311       case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
312          return false;
313       case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
314          return true;
315       case PIPE_VIDEO_CAP_STACKED_FRAMES:
316          return (sscreen->info.family < CHIP_TONGA) ? 1 : 2;
317       case PIPE_VIDEO_CAP_MAX_TEMPORAL_LAYERS:
318          return (sscreen->info.ip[AMD_IP_UVD_ENC].num_queues ||
319                  sscreen->info.vcn_ip_version >= VCN_1_0_0) ? 4 : 0;
320       case PIPE_VIDEO_CAP_ENC_QUALITY_LEVEL:
321          return 32;
322       case PIPE_VIDEO_CAP_ENC_SUPPORTS_MAX_FRAME_SIZE:
323          return 1;
324 
325       case PIPE_VIDEO_CAP_ENC_HEVC_FEATURE_FLAGS:
326          if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
327              profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
328             union pipe_h265_enc_cap_features pipe_features;
329             pipe_features.value = 0;
330 
331             pipe_features.bits.amp = PIPE_ENC_FEATURE_SUPPORTED;
332             pipe_features.bits.strong_intra_smoothing = PIPE_ENC_FEATURE_SUPPORTED;
333             pipe_features.bits.constrained_intra_pred = PIPE_ENC_FEATURE_SUPPORTED;
334             pipe_features.bits.deblocking_filter_disable
335                                                       = PIPE_ENC_FEATURE_SUPPORTED;
336             if (sscreen->info.vcn_ip_version >= VCN_2_0_0) {
337                pipe_features.bits.sao = PIPE_ENC_FEATURE_SUPPORTED;
338                pipe_features.bits.cu_qp_delta = PIPE_ENC_FEATURE_SUPPORTED;
339             }
340             if (sscreen->info.vcn_ip_version >= VCN_3_0_0)
341                pipe_features.bits.transform_skip = PIPE_ENC_FEATURE_SUPPORTED;
342 
343             return pipe_features.value;
344          } else
345             return 0;
346 
347       case PIPE_VIDEO_CAP_ENC_HEVC_BLOCK_SIZES:
348          if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
349              profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
350             union pipe_h265_enc_cap_block_sizes pipe_block_sizes;
351             pipe_block_sizes.value = 0;
352 
353             pipe_block_sizes.bits.log2_max_coding_tree_block_size_minus3 = 3;
354             pipe_block_sizes.bits.log2_min_coding_tree_block_size_minus3 = 3;
355             pipe_block_sizes.bits.log2_min_luma_coding_block_size_minus3 = 0;
356             pipe_block_sizes.bits.log2_max_luma_transform_block_size_minus2 = 3;
357             pipe_block_sizes.bits.log2_min_luma_transform_block_size_minus2 = 0;
358 
359             if (sscreen->info.ip[AMD_IP_UVD_ENC].num_queues) {
360                pipe_block_sizes.bits.max_max_transform_hierarchy_depth_inter = 3;
361                pipe_block_sizes.bits.min_max_transform_hierarchy_depth_inter = 3;
362                pipe_block_sizes.bits.max_max_transform_hierarchy_depth_intra = 3;
363                pipe_block_sizes.bits.min_max_transform_hierarchy_depth_intra = 3;
364             }
365 
366             return pipe_block_sizes.value;
367          } else
368             return 0;
369 
370       case PIPE_VIDEO_CAP_ENC_MAX_SLICES_PER_FRAME:
371          return 128;
372 
373       case PIPE_VIDEO_CAP_ENC_SLICES_STRUCTURE:
374          return PIPE_VIDEO_CAP_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS |
375                 PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_ROWS |
376                 PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_MULTI_ROWS;
377 
378       case PIPE_VIDEO_CAP_ENC_AV1_FEATURE:
379          if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
380             union pipe_av1_enc_cap_features attrib;
381             attrib.value = 0;
382 
383             attrib.bits.support_128x128_superblock = PIPE_ENC_FEATURE_NOT_SUPPORTED;
384             attrib.bits.support_filter_intra = PIPE_ENC_FEATURE_NOT_SUPPORTED;
385             attrib.bits.support_intra_edge_filter = PIPE_ENC_FEATURE_NOT_SUPPORTED;
386             attrib.bits.support_interintra_compound = PIPE_ENC_FEATURE_NOT_SUPPORTED;
387             attrib.bits.support_masked_compound = PIPE_ENC_FEATURE_NOT_SUPPORTED;
388             attrib.bits.support_warped_motion = PIPE_ENC_FEATURE_NOT_SUPPORTED;
389             attrib.bits.support_palette_mode = PIPE_ENC_FEATURE_SUPPORTED;
390             attrib.bits.support_dual_filter = PIPE_ENC_FEATURE_NOT_SUPPORTED;
391             attrib.bits.support_jnt_comp = PIPE_ENC_FEATURE_NOT_SUPPORTED;
392             attrib.bits.support_ref_frame_mvs = PIPE_ENC_FEATURE_NOT_SUPPORTED;
393             attrib.bits.support_superres = PIPE_ENC_FEATURE_NOT_SUPPORTED;
394             attrib.bits.support_restoration = PIPE_ENC_FEATURE_NOT_SUPPORTED;
395             attrib.bits.support_allow_intrabc = PIPE_ENC_FEATURE_NOT_SUPPORTED;
396             attrib.bits.support_cdef_channel_strength = PIPE_ENC_FEATURE_SUPPORTED;
397 
398             return attrib.value;
399          } else
400             return 0;
401 
402       case PIPE_VIDEO_CAP_ENC_AV1_FEATURE_EXT1:
403          if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
404             union pipe_av1_enc_cap_features_ext1 attrib_ext1;
405             attrib_ext1.value = 0;
406             attrib_ext1.bits.interpolation_filter = PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP |
407                            PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP_SMOOTH |
408                            PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_EIGHT_TAP_SHARP |
409                            PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_BILINEAR |
410                            PIPE_VIDEO_CAP_ENC_AV1_INTERPOLATION_FILTER_SWITCHABLE;
411             attrib_ext1.bits.min_segid_block_size_accepted = 0;
412             attrib_ext1.bits.segment_feature_support = 0;
413 
414             return attrib_ext1.value;
415          } else
416             return 0;
417 
418       case PIPE_VIDEO_CAP_ENC_AV1_FEATURE_EXT2:
419          if (sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) {
420             union pipe_av1_enc_cap_features_ext2 attrib_ext2;
421             attrib_ext2.value = 0;
422 
423            attrib_ext2.bits.tile_size_bytes_minus1 = 3;
424            attrib_ext2.bits.obu_size_bytes_minus1 = 1;
425            /**
426             * tx_mode supported.
427             * (tx_mode_support & 0x01) == 1: ONLY_4X4 is supported, 0: not.
428             * (tx_mode_support & 0x02) == 1: TX_MODE_LARGEST is supported, 0: not.
429             * (tx_mode_support & 0x04) == 1: TX_MODE_SELECT is supported, 0: not.
430             */
431            attrib_ext2.bits.tx_mode_support = PIPE_VIDEO_CAP_ENC_AV1_TX_MODE_SELECT;
432            attrib_ext2.bits.max_tile_num_minus1 = 31;
433 
434             return attrib_ext2.value;
435          } else
436             return 0;
437       case PIPE_VIDEO_CAP_ENC_SUPPORTS_TILE:
438          if ((sscreen->info.vcn_ip_version >= VCN_4_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_3) &&
439               profile == PIPE_VIDEO_PROFILE_AV1_MAIN)
440             return 1;
441          else
442             return 0;
443 
444       case PIPE_VIDEO_CAP_ENC_MAX_REFERENCES_PER_FRAME:
445          if (sscreen->info.vcn_ip_version >= VCN_3_0_0) {
446             int refPicList0 = 1;
447             int refPicList1 = codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 1 : 0;
448             if (sscreen->info.vcn_ip_version >= VCN_5_0_0 && codec == PIPE_VIDEO_FORMAT_AV1) {
449                refPicList0 = 2;
450                refPicList1 = 1;
451             }
452             return refPicList0 | (refPicList1 << 16);
453          } else
454             return 1;
455 
456       case PIPE_VIDEO_CAP_ENC_INTRA_REFRESH:
457             return PIPE_VIDEO_ENC_INTRA_REFRESH_ROW |
458                    PIPE_VIDEO_ENC_INTRA_REFRESH_COLUMN |
459                    PIPE_VIDEO_ENC_INTRA_REFRESH_P_FRAME;
460 
461       case PIPE_VIDEO_CAP_ENC_ROI:
462          if (sscreen->info.vcn_ip_version >= VCN_1_0_0) {
463             union pipe_enc_cap_roi attrib;
464             attrib.value = 0;
465 
466             attrib.bits.num_roi_regions = PIPE_ENC_ROI_REGION_NUM_MAX;
467             attrib.bits.roi_rc_priority_support = PIPE_ENC_FEATURE_NOT_SUPPORTED;
468             attrib.bits.roi_rc_qp_delta_support = PIPE_ENC_FEATURE_SUPPORTED;
469             return attrib.value;
470          }
471          else
472             return 0;
473       case PIPE_VIDEO_CAP_ENC_SURFACE_ALIGNMENT:
474            if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
475                profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
476             union pipe_enc_cap_surface_alignment attrib;
477             attrib.value = 0;
478 
479             attrib.bits.log2_width_alignment = RADEON_ENC_HEVC_SURFACE_LOG2_WIDTH_ALIGNMENT;
480             attrib.bits.log2_height_alignment = RADEON_ENC_HEVC_SURFACE_LOG2_HEIGHT_ALIGNMENT;
481             return attrib.value;
482          }
483          else
484             return 0;
485 
486       case PIPE_VIDEO_CAP_ENC_RATE_CONTROL_QVBR:
487          if (sscreen->info.vcn_ip_version >= VCN_3_0_0 &&
488              sscreen->info.vcn_ip_version < VCN_4_0_0)
489             return sscreen->info.vcn_enc_minor_version >= 30;
490 
491          if (sscreen->info.vcn_ip_version >= VCN_4_0_0 &&
492              sscreen->info.vcn_ip_version < VCN_5_0_0)
493             return sscreen->info.vcn_enc_minor_version >= 15;
494 
495          if (sscreen->info.vcn_ip_version >= VCN_5_0_0)
496             return sscreen->info.vcn_enc_minor_version >= 3;
497 
498          return 0;
499 
500       default:
501          return 0;
502       }
503    }
504 
505    switch (param) {
506    case PIPE_VIDEO_CAP_SUPPORTED:
507       if (codec != PIPE_VIDEO_FORMAT_JPEG &&
508           !(sscreen->info.ip[AMD_IP_UVD].num_queues ||
509             ((sscreen->info.vcn_ip_version >= VCN_4_0_0) ?
510 	      sscreen->info.ip[AMD_IP_VCN_UNIFIED].num_queues :
511 	      sscreen->info.ip[AMD_IP_VCN_DEC].num_queues)))
512          return false;
513       if (QUERYABLE_KERNEL && fully_supported_profile &&
514           sscreen->info.vcn_ip_version >= VCN_1_0_0)
515          return KERNEL_DEC_CAP(codec, valid);
516       if (codec < PIPE_VIDEO_FORMAT_MPEG4_AVC &&
517           sscreen->info.vcn_ip_version >= VCN_3_0_33)
518          return false;
519 
520       switch (codec) {
521       case PIPE_VIDEO_FORMAT_MPEG12:
522          return !(sscreen->info.vcn_ip_version >= VCN_3_0_33 || profile == PIPE_VIDEO_PROFILE_MPEG1);
523       case PIPE_VIDEO_FORMAT_MPEG4:
524          return !(sscreen->info.vcn_ip_version >= VCN_3_0_33);
525       case PIPE_VIDEO_FORMAT_MPEG4_AVC:
526          if ((sscreen->info.family == CHIP_POLARIS10 || sscreen->info.family == CHIP_POLARIS11) &&
527              sscreen->info.uvd_fw_version < UVD_FW_1_66_16) {
528             RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
529             return false;
530          }
531          return (profile != PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10);
532       case PIPE_VIDEO_FORMAT_VC1:
533          return !(sscreen->info.vcn_ip_version >= VCN_3_0_33);
534       case PIPE_VIDEO_FORMAT_HEVC:
535          /* Carrizo only supports HEVC Main */
536          if (sscreen->info.family >= CHIP_STONEY)
537             return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
538                     profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
539          else if (sscreen->info.family >= CHIP_CARRIZO)
540             return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
541          return false;
542       case PIPE_VIDEO_FORMAT_JPEG:
543          if (sscreen->info.vcn_ip_version >= VCN_1_0_0) {
544             if (!sscreen->info.ip[AMD_IP_VCN_JPEG].num_queues)
545                return false;
546             else
547                return true;
548          }
549          if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10)
550             return false;
551          if (!sscreen->info.is_amdgpu) {
552             RVID_ERR("No MJPEG support for the kernel version\n");
553             return false;
554          }
555          return true;
556       case PIPE_VIDEO_FORMAT_VP9:
557          return sscreen->info.vcn_ip_version >= VCN_1_0_0;
558       case PIPE_VIDEO_FORMAT_AV1:
559          return sscreen->info.vcn_ip_version >= VCN_3_0_0 && sscreen->info.vcn_ip_version != VCN_3_0_33;
560       default:
561          return false;
562       }
563    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
564       return 1;
565    case PIPE_VIDEO_CAP_MIN_WIDTH:
566    case PIPE_VIDEO_CAP_MIN_HEIGHT:
567       return (codec == PIPE_VIDEO_FORMAT_AV1) ? 16 : 64;
568    case PIPE_VIDEO_CAP_MAX_WIDTH:
569       if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
570             return KERNEL_DEC_CAP(codec, max_width);
571       else {
572          switch (codec) {
573          case PIPE_VIDEO_FORMAT_HEVC:
574          case PIPE_VIDEO_FORMAT_VP9:
575          case PIPE_VIDEO_FORMAT_AV1:
576             return (sscreen->info.vcn_ip_version < VCN_2_0_0) ?
577                ((sscreen->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
578          default:
579             return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
580          }
581       }
582    case PIPE_VIDEO_CAP_MAX_HEIGHT:
583       if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && QUERYABLE_KERNEL)
584             return KERNEL_DEC_CAP(codec, max_height);
585       else {
586          switch (codec) {
587          case PIPE_VIDEO_FORMAT_HEVC:
588          case PIPE_VIDEO_FORMAT_VP9:
589          case PIPE_VIDEO_FORMAT_AV1:
590             return (sscreen->info.vcn_ip_version < VCN_2_0_0) ?
591                ((sscreen->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
592          default:
593             return (sscreen->info.family < CHIP_TONGA) ? 1152 : 4096;
594          }
595       }
596    case PIPE_VIDEO_CAP_PREFERED_FORMAT:
597       if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
598          return PIPE_FORMAT_P010;
599       else if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
600          return PIPE_FORMAT_P010;
601       else
602          return PIPE_FORMAT_NV12;
603 
604    case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
605       return false;
606    case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: {
607       enum pipe_video_format format = u_reduce_video_profile(profile);
608 
609       if (format >= PIPE_VIDEO_FORMAT_HEVC)
610          return false;
611 
612       return true;
613    }
614    case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
615       return true;
616    case PIPE_VIDEO_CAP_MAX_LEVEL:
617       if ((profile == PIPE_VIDEO_PROFILE_MPEG2_SIMPLE ||
618            profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN ||
619            profile == PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE ||
620            profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) &&
621           sscreen->info.dec_caps.codec_info[codec - 1].valid) {
622          return sscreen->info.dec_caps.codec_info[codec - 1].max_level;
623       } else {
624          switch (profile) {
625          case PIPE_VIDEO_PROFILE_MPEG1:
626             return 0;
627          case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
628          case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
629             return 3;
630          case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
631             return 3;
632          case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
633             return 5;
634          case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
635             return 1;
636          case PIPE_VIDEO_PROFILE_VC1_MAIN:
637             return 2;
638          case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
639             return 4;
640          case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
641          case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
642          case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
643             return (sscreen->info.family < CHIP_TONGA) ? 41 : 52;
644          case PIPE_VIDEO_PROFILE_HEVC_MAIN:
645          case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
646             return 186;
647          default:
648             return 0;
649          }
650       }
651    case PIPE_VIDEO_CAP_SUPPORTS_CONTIGUOUS_PLANES_MAP:
652       return true;
653    case PIPE_VIDEO_CAP_ROI_CROP_DEC:
654       if (codec == PIPE_VIDEO_FORMAT_JPEG &&
655           (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
656            sscreen->info.vcn_ip_version == VCN_5_0_1))
657          return true;
658       return false;
659    case PIPE_VIDEO_CAP_SKIP_CLEAR_SURFACE:
660       return sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 59;
661    default:
662       return 0;
663    }
664 }
665 
si_vid_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint)666 static bool si_vid_is_format_supported(struct pipe_screen *screen, enum pipe_format format,
667                                        enum pipe_video_profile profile,
668                                        enum pipe_video_entrypoint entrypoint)
669 {
670    struct si_screen *sscreen = (struct si_screen *)screen;
671 
672    if (sscreen->info.ip[AMD_IP_VPE].num_queues && entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) {
673       /* Todo:
674        * Unable to confirm whether it is asking for an input or output type
675        * Have to modify va frontend for solving this problem
676        */
677       /* VPE Supported input type */
678       if ((format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_NV21) || (format == PIPE_FORMAT_P010))
679          return true;
680 
681       /* VPE Supported output type */
682       if ((format == PIPE_FORMAT_A8R8G8B8_UNORM) || (format == PIPE_FORMAT_A8B8G8R8_UNORM) || (format == PIPE_FORMAT_R8G8B8A8_UNORM) ||
683           (format == PIPE_FORMAT_B8G8R8A8_UNORM) || (format == PIPE_FORMAT_X8R8G8B8_UNORM) || (format == PIPE_FORMAT_X8B8G8R8_UNORM) ||
684           (format == PIPE_FORMAT_R8G8B8X8_UNORM) || (format == PIPE_FORMAT_B8G8R8X8_UNORM) || (format == PIPE_FORMAT_A2R10G10B10_UNORM) ||
685           (format == PIPE_FORMAT_A2B10G10R10_UNORM) || (format == PIPE_FORMAT_B10G10R10A2_UNORM) || (format == PIPE_FORMAT_R10G10B10A2_UNORM))
686          return true;
687    }
688 
689    /* HEVC 10 bit decoding should use P010 instead of NV12 if possible */
690    if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
691       return (format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_P010) ||
692              (format == PIPE_FORMAT_P016);
693 
694    /* Vp9 profile 2 supports 10 bit decoding using P016 */
695    if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
696       return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016);
697 
698    if (profile == PIPE_VIDEO_PROFILE_AV1_MAIN && entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
699       return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016) ||
700              (format == PIPE_FORMAT_NV12);
701 
702    if (profile == PIPE_VIDEO_PROFILE_AV1_PROFILE2 && entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
703       if (sscreen->info.vcn_ip_version < VCN_5_0_0 && sscreen->info.vcn_ip_version != VCN_4_0_0)
704          return false;
705       return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016) ||
706              (format == PIPE_FORMAT_P012) || (format == PIPE_FORMAT_NV12);
707    }
708 
709    /* JPEG supports YUV400 and YUV444 */
710    if (profile == PIPE_VIDEO_PROFILE_JPEG_BASELINE) {
711       switch (format) {
712       case PIPE_FORMAT_NV12:
713       case PIPE_FORMAT_YUYV:
714       case PIPE_FORMAT_Y8_400_UNORM:
715          return true;
716       case PIPE_FORMAT_Y8_U8_V8_444_UNORM:
717       case PIPE_FORMAT_Y8_U8_V8_440_UNORM:
718          if (sscreen->info.vcn_ip_version >= VCN_2_0_0)
719             return true;
720          else
721             return false;
722       case PIPE_FORMAT_R8G8B8A8_UNORM:
723       case PIPE_FORMAT_A8R8G8B8_UNORM:
724       case PIPE_FORMAT_R8_G8_B8_UNORM:
725          if (sscreen->info.vcn_ip_version == VCN_4_0_3 ||
726              sscreen->info.vcn_ip_version == VCN_5_0_1)
727             return true;
728          else
729             return false;
730       default:
731          return false;
732       }
733    }
734 
735    if ((entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) &&
736           (((profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH) &&
737           (sscreen->info.vcn_ip_version >= VCN_2_0_0)) ||
738           ((profile == PIPE_VIDEO_PROFILE_AV1_MAIN) &&
739            (sscreen->info.vcn_ip_version >= VCN_4_0_0 &&
740             sscreen->info.vcn_ip_version != VCN_4_0_3 &&
741             sscreen->info.vcn_ip_version != VCN_5_0_1))))
742       return (format == PIPE_FORMAT_P010 || format == PIPE_FORMAT_NV12);
743 
744 
745    /* we can only handle this one with UVD */
746    if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
747       return format == PIPE_FORMAT_NV12;
748 
749    return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
750 }
751 
si_vid_is_target_buffer_supported(struct pipe_screen * screen,enum pipe_format format,struct pipe_video_buffer * target,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint)752 static bool si_vid_is_target_buffer_supported(struct pipe_screen *screen,
753                                               enum pipe_format format,
754                                               struct pipe_video_buffer *target,
755                                               enum pipe_video_profile profile,
756                                               enum pipe_video_entrypoint entrypoint)
757 {
758    struct si_screen *sscreen = (struct si_screen *)screen;
759    struct si_texture *tex = (struct si_texture *)((struct vl_video_buffer *)target)->resources[0];
760    const bool is_dcc = tex->surface.meta_offset;
761    const bool is_format_conversion = format != target->buffer_format;
762 
763    switch (entrypoint) {
764    case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
765       if (is_dcc || is_format_conversion)
766          return false;
767       break;
768 
769    case PIPE_VIDEO_ENTRYPOINT_ENCODE:
770       if (is_dcc)
771          return false;
772 
773       /* EFC */
774       if (is_format_conversion) {
775          const bool input_8bit =
776             target->buffer_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
777             target->buffer_format == PIPE_FORMAT_B8G8R8X8_UNORM ||
778             target->buffer_format == PIPE_FORMAT_R8G8B8A8_UNORM ||
779             target->buffer_format == PIPE_FORMAT_R8G8B8X8_UNORM;
780          const bool input_10bit =
781             target->buffer_format == PIPE_FORMAT_B10G10R10A2_UNORM ||
782             target->buffer_format == PIPE_FORMAT_B10G10R10X2_UNORM ||
783             target->buffer_format == PIPE_FORMAT_R10G10B10A2_UNORM ||
784             target->buffer_format == PIPE_FORMAT_R10G10B10X2_UNORM;
785 
786          if (sscreen->info.vcn_ip_version < VCN_2_0_0 ||
787              sscreen->info.vcn_ip_version == VCN_2_2_0 ||
788              sscreen->debug_flags & DBG(NO_EFC))
789             return false;
790 
791          if (input_8bit && format != PIPE_FORMAT_NV12)
792             return false;
793          if (input_10bit && format != PIPE_FORMAT_NV12 && format != PIPE_FORMAT_P010)
794             return false;
795       }
796       break;
797 
798    default:
799       if (is_format_conversion)
800          return false;
801       break;
802    }
803 
804    return si_vid_is_format_supported(screen, format, profile, entrypoint);
805 }
806 
get_max_threads_per_block(struct si_screen * screen,enum pipe_shader_ir ir_type)807 static unsigned get_max_threads_per_block(struct si_screen *screen, enum pipe_shader_ir ir_type)
808 {
809    if (ir_type == PIPE_SHADER_IR_NATIVE)
810       return 256;
811 
812    /* LLVM only supports 1024 threads per block. */
813    return 1024;
814 }
815 
si_get_compute_param(struct pipe_screen * screen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)816 static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type,
817                                 enum pipe_compute_cap param, void *ret)
818 {
819    struct si_screen *sscreen = (struct si_screen *)screen;
820 
821    // TODO: select these params by asic
822    switch (param) {
823    case PIPE_COMPUTE_CAP_IR_TARGET: {
824       const char *gpu, *triple;
825 
826       triple = "amdgcn-mesa-mesa3d";
827       gpu = ac_get_llvm_processor_name(sscreen->info.family);
828       if (ret) {
829          sprintf(ret, "%s-%s", gpu, triple);
830       }
831       /* +2 for dash and terminating NIL byte */
832       return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
833    }
834    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
835       if (ret) {
836          uint64_t *grid_dimension = ret;
837          grid_dimension[0] = 3;
838       }
839       return 1 * sizeof(uint64_t);
840 
841    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
842       if (ret) {
843          uint64_t *grid_size = ret;
844          /* Use this size, so that internal counters don't overflow 64 bits. */
845          grid_size[0] = UINT32_MAX;
846          grid_size[1] = UINT16_MAX;
847          grid_size[2] = UINT16_MAX;
848       }
849       return 3 * sizeof(uint64_t);
850 
851    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
852       if (ret) {
853          uint64_t *block_size = ret;
854          unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type);
855          block_size[0] = threads_per_block;
856          block_size[1] = threads_per_block;
857          block_size[2] = threads_per_block;
858       }
859       return 3 * sizeof(uint64_t);
860 
861    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
862       if (ret) {
863          uint64_t *max_threads_per_block = ret;
864          *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type);
865       }
866       return sizeof(uint64_t);
867    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
868       if (ret) {
869          uint32_t *address_bits = ret;
870          address_bits[0] = 64;
871       }
872       return 1 * sizeof(uint32_t);
873 
874    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
875       if (ret) {
876          uint64_t *max_global_size = ret;
877          uint64_t max_mem_alloc_size;
878 
879          si_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
880                               &max_mem_alloc_size);
881 
882          /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
883           * 1/4 of the MAX_GLOBAL_SIZE.  Since the
884           * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
885           * make sure we never report more than
886           * 4 * MAX_MEM_ALLOC_SIZE.
887           */
888          *max_global_size =
889             MIN2(4 * max_mem_alloc_size, sscreen->info.max_heap_size_kb * 1024ull);
890       }
891       return sizeof(uint64_t);
892 
893    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
894       if (ret) {
895          uint64_t *max_local_size = ret;
896          /* Value reported by the closed source driver. */
897          if (sscreen->info.gfx_level == GFX6)
898             *max_local_size = 32 * 1024;
899          else
900             *max_local_size = 64 * 1024;
901       }
902       return sizeof(uint64_t);
903 
904    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
905       if (ret) {
906          uint64_t *max_input_size = ret;
907          /* Value reported by the closed source driver. */
908          *max_input_size = 1024;
909       }
910       return sizeof(uint64_t);
911 
912    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
913       if (ret) {
914          uint64_t *max_mem_alloc_size = ret;
915 
916          /* Return 1/4 of the heap size as the maximum because the max size is not practically
917           * allocatable.
918           */
919          *max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
920       }
921       return sizeof(uint64_t);
922 
923    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
924       if (ret) {
925          uint32_t *max_clock_frequency = ret;
926          *max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
927       }
928       return sizeof(uint32_t);
929 
930    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
931       if (ret) {
932          uint32_t *max_compute_units = ret;
933          *max_compute_units = sscreen->info.num_cu;
934       }
935       return sizeof(uint32_t);
936 
937    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
938       if (ret) {
939          uint32_t *images_supported = ret;
940          *images_supported = 0;
941       }
942       return sizeof(uint32_t);
943    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
944       break; /* unused */
945    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: {
946       if (ret) {
947          uint32_t *max_subgroups = ret;
948          unsigned threads = get_max_threads_per_block(sscreen, ir_type);
949          unsigned subgroup_size;
950 
951          if (sscreen->debug_flags & DBG(W64_CS) || sscreen->info.gfx_level < GFX10)
952             subgroup_size = 64;
953          else
954             subgroup_size = 32;
955 
956          *max_subgroups = threads / subgroup_size;
957       }
958       return sizeof(uint32_t);
959    }
960    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
961       if (ret) {
962          uint32_t *subgroup_size = ret;
963          if (sscreen->debug_flags & DBG(W32_CS))
964             *subgroup_size = 32;
965          else if (sscreen->debug_flags & DBG(W64_CS))
966             *subgroup_size = 64;
967          else
968             *subgroup_size = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
969       }
970       return sizeof(uint32_t);
971    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
972       if (ret) {
973          uint64_t *max_variable_threads_per_block = ret;
974          if (ir_type == PIPE_SHADER_IR_NATIVE)
975             *max_variable_threads_per_block = 0;
976          else
977             *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
978       }
979       return sizeof(uint64_t);
980    }
981 
982    fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
983    return 0;
984 }
985 
si_get_timestamp(struct pipe_screen * screen)986 static uint64_t si_get_timestamp(struct pipe_screen *screen)
987 {
988    struct si_screen *sscreen = (struct si_screen *)screen;
989 
990    return 1000000 * sscreen->ws->query_value(sscreen->ws, RADEON_TIMESTAMP) /
991           sscreen->info.clock_crystal_freq;
992 }
993 
si_query_memory_info(struct pipe_screen * screen,struct pipe_memory_info * info)994 static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info)
995 {
996    struct si_screen *sscreen = (struct si_screen *)screen;
997    struct radeon_winsys *ws = sscreen->ws;
998    unsigned vram_usage, gtt_usage;
999 
1000    info->total_device_memory = sscreen->info.vram_size_kb;
1001    info->total_staging_memory = sscreen->info.gart_size_kb;
1002 
1003    /* The real TTM memory usage is somewhat random, because:
1004     *
1005     * 1) TTM delays freeing memory, because it can only free it after
1006     *    fences expire.
1007     *
1008     * 2) The memory usage can be really low if big VRAM evictions are
1009     *    taking place, but the real usage is well above the size of VRAM.
1010     *
1011     * Instead, return statistics of this process.
1012     */
1013    vram_usage = ws->query_value(ws, RADEON_VRAM_USAGE) / 1024;
1014    gtt_usage = ws->query_value(ws, RADEON_GTT_USAGE) / 1024;
1015 
1016    info->avail_device_memory =
1017       vram_usage <= info->total_device_memory ? info->total_device_memory - vram_usage : 0;
1018    info->avail_staging_memory =
1019       gtt_usage <= info->total_staging_memory ? info->total_staging_memory - gtt_usage : 0;
1020 
1021    info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
1022 
1023    if (sscreen->info.is_amdgpu)
1024       info->nr_device_memory_evictions = ws->query_value(ws, RADEON_NUM_EVICTIONS);
1025    else
1026       /* Just return the number of evicted 64KB pages. */
1027       info->nr_device_memory_evictions = info->device_memory_evicted / 64;
1028 }
1029 
si_get_disk_shader_cache(struct pipe_screen * pscreen)1030 static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
1031 {
1032    struct si_screen *sscreen = (struct si_screen *)pscreen;
1033 
1034    return sscreen->disk_shader_cache;
1035 }
1036 
si_init_renderer_string(struct si_screen * sscreen)1037 static void si_init_renderer_string(struct si_screen *sscreen)
1038 {
1039    char first_name[256], second_name[32] = {}, kernel_version[128] = {};
1040    struct utsname uname_data;
1041 
1042    snprintf(first_name, sizeof(first_name), "%s",
1043             sscreen->info.marketing_name ? sscreen->info.marketing_name : sscreen->info.name);
1044    snprintf(second_name, sizeof(second_name), "%s, ", sscreen->info.lowercase_name);
1045 
1046    if (uname(&uname_data) == 0)
1047       snprintf(kernel_version, sizeof(kernel_version), ", %s", uname_data.release);
1048 
1049    const char *compiler_name =
1050 #if AMD_LLVM_AVAILABLE
1051       !sscreen->use_aco ? "LLVM " MESA_LLVM_VERSION_STRING :
1052 #endif
1053       "ACO";
1054 
1055    snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string),
1056             "%s (radeonsi, %s%s, DRM %i.%i%s)", first_name, second_name, compiler_name,
1057             sscreen->info.drm_major, sscreen->info.drm_minor, kernel_version);
1058 }
1059 
si_get_screen_fd(struct pipe_screen * screen)1060 static int si_get_screen_fd(struct pipe_screen *screen)
1061 {
1062    struct si_screen *sscreen = (struct si_screen *)screen;
1063    struct radeon_winsys *ws = sscreen->ws;
1064 
1065    return ws->get_fd(ws);
1066 }
1067 
si_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1068 static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1069 {
1070    unsigned num_profiles = si_get_num_shader_profiles();
1071 
1072    for (unsigned i = 0; i < num_profiles; i++) {
1073       if (_mesa_printed_blake3_equal(consumer->info.source_blake3, si_shader_profiles[i].blake3)) {
1074          if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS)
1075             return 0; /* only propagate constants */
1076          break;
1077       }
1078    }
1079 
1080    return ac_nir_varying_expression_max_cost(producer, consumer);
1081 }
1082 
1083 
1084 static void
si_driver_thread_add_job(struct pipe_screen * screen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)1085 si_driver_thread_add_job(struct pipe_screen *screen, void *data,
1086                          struct util_queue_fence *fence,
1087                          pipe_driver_thread_func execute,
1088                          pipe_driver_thread_func cleanup,
1089                          const size_t job_size)
1090 {
1091    struct si_screen *sscreen = (struct si_screen *)screen;
1092    util_queue_add_job(&sscreen->shader_compiler_queue, data, fence, execute, cleanup, job_size);
1093 }
1094 
1095 
si_init_screen_get_functions(struct si_screen * sscreen)1096 void si_init_screen_get_functions(struct si_screen *sscreen)
1097 {
1098    sscreen->b.get_name = si_get_name;
1099    sscreen->b.get_vendor = si_get_vendor;
1100    sscreen->b.get_device_vendor = si_get_device_vendor;
1101    sscreen->b.get_screen_fd = si_get_screen_fd;
1102    sscreen->b.is_compute_copy_faster = si_is_compute_copy_faster;
1103    sscreen->b.driver_thread_add_job = si_driver_thread_add_job;
1104    sscreen->b.get_compute_param = si_get_compute_param;
1105    sscreen->b.get_timestamp = si_get_timestamp;
1106    sscreen->b.get_shader_param = si_get_shader_param;
1107    sscreen->b.get_compiler_options = si_get_compiler_options;
1108    sscreen->b.get_device_uuid = si_get_device_uuid;
1109    sscreen->b.get_driver_uuid = si_get_driver_uuid;
1110    sscreen->b.query_memory_info = si_query_memory_info;
1111    sscreen->b.get_disk_shader_cache = si_get_disk_shader_cache;
1112 
1113    if (sscreen->info.ip[AMD_IP_UVD].num_queues ||
1114        ((sscreen->info.vcn_ip_version >= VCN_4_0_0) ?
1115 	 sscreen->info.ip[AMD_IP_VCN_UNIFIED].num_queues : sscreen->info.ip[AMD_IP_VCN_DEC].num_queues) ||
1116        sscreen->info.ip[AMD_IP_VCN_JPEG].num_queues || sscreen->info.ip[AMD_IP_VCE].num_queues ||
1117        sscreen->info.ip[AMD_IP_UVD_ENC].num_queues || sscreen->info.ip[AMD_IP_VCN_ENC].num_queues ||
1118        sscreen->info.ip[AMD_IP_VPE].num_queues) {
1119       sscreen->b.get_video_param = si_get_video_param;
1120       sscreen->b.is_video_format_supported = si_vid_is_format_supported;
1121       sscreen->b.is_video_target_buffer_supported = si_vid_is_target_buffer_supported;
1122    } else {
1123       sscreen->b.get_video_param = si_get_video_param_no_video_hw;
1124       sscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
1125    }
1126 
1127    si_init_renderer_string(sscreen);
1128 
1129    /*        |---------------------------------- Performance & Availability --------------------------------|
1130     *        |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY|    FMA     |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
1131     * Arch   |    F32,F16,F64    | F32,F16  | F32,F16  |F32,F16,F64 |    F32,F16     |   F32    |PK_FMAC_F16|F16,F32,F64
1132     * ------------------------------------------------------------------------------------------------------------------
1133     * gfx6,7 |     1 , - , -     |  1 , -   |  1 , -   |1/4, - ,1/16|     - , -      |    -     |   - , -   | - ,MAD,FMA
1134     * gfx8   |     1 , 1 , -     |  1 , -   |  - , -   |1/4, 1 ,1/16|     - , -      |    -     |   - , -   |MAD,MAD,FMA
1135     * gfx9   |     1 ,1|0, -     |  1 , -   |  - , -   | 1 , 1 ,1/16|    0|1, -      |    -     |   2 , -   |FMA,MAD,FMA
1136     * gfx10  |     1 , - , -     |  1 , -   |  1 , -   | 1 , 1 ,1/16|     1 , 1      |    -     |   2 , 2   |FMA,MAD,FMA
1137     * gfx10.3|     - , - , -     |  - , -   |  - , -   | 1 , 1 ,1/16|     1 , 1      |    1     |   2 , 2   |  all FMA
1138     * gfx11  |     - , - , -     |  - , -   |  - , -   | 2 , 2 ,1/16|     2 , 2      |    2     |   2 , 2   |  all FMA
1139     *
1140     * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
1141     * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
1142     * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
1143     *
1144     * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
1145     * gfx9 and newer prefer FMA for F16 because of the packed instruction.
1146     * gfx10 and older prefer MAD for F32 because of the legacy instruction.
1147     */
1148    bool use_fma32 =
1149       sscreen->info.gfx_level >= GFX10_3 ||
1150       (sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) ||
1151       /* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */
1152       (sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);
1153    bool has_mediump = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16;
1154 
1155    nir_shader_compiler_options *options = sscreen->nir_options;
1156    ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options);
1157 
1158    options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
1159    options->lower_ffma32 = !use_fma32;
1160    options->lower_ffma64 = false;
1161    options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9;
1162    options->fuse_ffma32 = use_fma32;
1163    options->fuse_ffma64 = true;
1164    options->lower_uniforms_to_ubo = true;
1165    options->lower_to_scalar = true;
1166    options->lower_to_scalar_filter =
1167       sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
1168    options->max_unroll_iterations = 128;
1169    options->max_unroll_iterations_aggressive = 128;
1170    /* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
1171     * but if we use it, all f32->f16 conversions have to round towards zero,
1172     * because both scalar and vec2 down-conversions have to round equally.
1173     *
1174     * For OpenCL, rounding mode is explicit. This will only lower f2f16 to f2f16_rtz
1175     * when execution mode is rtz instead of rtne.
1176     */
1177    options->force_f2f16_rtz = true;
1178    options->io_options |= (!has_mediump ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics;
1179    options->lower_mediump_io = has_mediump ? si_lower_mediump_io : NULL;
1180    /* HW supports indirect indexing for: | Enabled in driver
1181     * -------------------------------------------------------
1182     * TCS inputs                         | Yes
1183     * TES inputs                         | Yes
1184     * GS inputs                          | No
1185     * -------------------------------------------------------
1186     * VS outputs before TCS              | No
1187     * TCS outputs                        | Yes
1188     * VS/TES outputs before GS           | No
1189     */
1190    options->support_indirect_inputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
1191                                       BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
1192    options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1193    options->varying_expression_max_cost = si_varying_expression_max_cost;
1194 }
1195 
si_init_screen_caps(struct si_screen * sscreen)1196 void si_init_screen_caps(struct si_screen *sscreen)
1197 {
1198    struct pipe_caps *caps = (struct pipe_caps *)&sscreen->b.caps;
1199 
1200    u_init_pipe_screen_caps(&sscreen->b, 1);
1201 
1202    /* Gfx8 (Polaris11) hangs, so don't enable this on Gfx8 and older chips. */
1203    bool enable_sparse =
1204       sscreen->info.gfx_level >= GFX9 && sscreen->info.gfx_level < GFX12 &&
1205       sscreen->info.has_sparse_vm_mappings;
1206 
1207    /* Supported features (boolean caps). */
1208    caps->max_dual_source_render_targets = true;
1209    caps->anisotropic_filter = true;
1210    caps->occlusion_query = true;
1211    caps->texture_mirror_clamp = true;
1212    caps->texture_shadow_lod = true;
1213    caps->texture_mirror_clamp_to_edge = true;
1214    caps->blend_equation_separate = true;
1215    caps->texture_swizzle = true;
1216    caps->depth_clip_disable = true;
1217    caps->depth_clip_disable_separate = true;
1218    caps->shader_stencil_export = true;
1219    caps->vertex_element_instance_divisor = true;
1220    caps->fs_coord_origin_upper_left = true;
1221    caps->fs_coord_pixel_center_half_integer = true;
1222    caps->fs_coord_pixel_center_integer = true;
1223    caps->fragment_shader_texture_lod = true;
1224    caps->fragment_shader_derivatives = true;
1225    caps->primitive_restart = true;
1226    caps->primitive_restart_fixed_index = true;
1227    caps->conditional_render = true;
1228    caps->texture_barrier = true;
1229    caps->indep_blend_enable = true;
1230    caps->indep_blend_func = true;
1231    caps->vertex_color_unclamped = true;
1232    caps->start_instance = true;
1233    caps->npot_textures = true;
1234    caps->mixed_framebuffer_sizes = true;
1235    caps->mixed_color_depth_bits = true;
1236    caps->vertex_color_clamped = true;
1237    caps->fragment_color_clamped = true;
1238    caps->vs_instanceid = true;
1239    caps->compute = true;
1240    caps->texture_buffer_objects = true;
1241    caps->vs_layer_viewport = true;
1242    caps->query_pipeline_statistics = true;
1243    caps->sample_shading = true;
1244    caps->draw_indirect = true;
1245    caps->clip_halfz = true;
1246    caps->vs_window_space_position = true;
1247    caps->polygon_offset_clamp = true;
1248    caps->multisample_z_resolve = true;
1249    caps->quads_follow_provoking_vertex_convention = true;
1250    caps->tgsi_texcoord = true;
1251    caps->fs_fine_derivative = true;
1252    caps->conditional_render_inverted = true;
1253    caps->texture_float_linear = true;
1254    caps->texture_half_float_linear = true;
1255    caps->depth_bounds_test = true;
1256    caps->sampler_view_target = true;
1257    caps->texture_query_lod = true;
1258    caps->texture_gather_sm5 = true;
1259    caps->texture_query_samples = true;
1260    caps->force_persample_interp = true;
1261    caps->copy_between_compressed_and_plain_formats = true;
1262    caps->fs_position_is_sysval = true;
1263    caps->fs_face_is_integer_sysval = true;
1264    caps->invalidate_buffer = true;
1265    caps->surface_reinterpret_blocks = true;
1266    caps->query_buffer_object = true;
1267    caps->query_memory_info = true;
1268    caps->shader_pack_half_float = true;
1269    caps->framebuffer_no_attachment = true;
1270    caps->robust_buffer_access_behavior = true;
1271    caps->polygon_offset_units_unscaled = true;
1272    caps->string_marker = true;
1273    caps->cull_distance = true;
1274    caps->shader_array_components = true;
1275    caps->stream_output_pause_resume = true;
1276    caps->stream_output_interleave_buffers = true;
1277    caps->doubles = true;
1278    caps->tgsi_tex_txf_lz = true;
1279    caps->tes_layer_viewport = true;
1280    caps->bindless_texture = true;
1281    caps->query_timestamp = true;
1282    caps->query_time_elapsed = true;
1283    caps->nir_samplers_as_deref = true;
1284    caps->memobj = true;
1285    caps->load_constbuf = true;
1286    caps->int64 = true;
1287    caps->shader_clock = true;
1288    caps->can_bind_const_buffer_as_vertex = true;
1289    caps->allow_mapped_buffers_during_execution = true;
1290    caps->signed_vertex_buffer_offset = true;
1291    caps->shader_ballot = true;
1292    caps->shader_group_vote = true;
1293    caps->compute_grid_info_last_block = true;
1294    caps->image_load_formatted = true;
1295    caps->prefer_compute_for_multimedia = true;
1296    caps->tgsi_div = true;
1297    caps->packed_uniforms = true;
1298    caps->gl_spirv = true;
1299    caps->alpha_to_coverage_dither_control = true;
1300    caps->map_unsynchronized_thread_safe = true;
1301    caps->no_clip_on_copy_tex = true;
1302    caps->shader_atomic_int64 = true;
1303    caps->frontend_noop = true;
1304    caps->demote_to_helper_invocation = true;
1305    caps->prefer_real_buffer_in_constbuf0 = true;
1306    caps->compute_shader_derivatives = true;
1307    caps->image_atomic_inc_wrap = true;
1308    caps->image_store_formatted = true;
1309    caps->allow_draw_out_of_order = true;
1310    caps->query_so_overflow = true;
1311    caps->glsl_tess_levels_as_inputs = true;
1312    caps->device_reset_status_query = true;
1313    caps->texture_multisample = true;
1314    caps->allow_glthread_buffer_subdata_opt = true; /* TODO: remove if it's slow */
1315    caps->null_textures = true;
1316    caps->has_const_bw = true;
1317    caps->cl_gl_sharing = true;
1318    caps->call_finalize_nir_in_linker = true;
1319 
1320    caps->fbfetch = 1;
1321 
1322    /* Tahiti and Verde only: reduction mode is unsupported due to a bug
1323     * (it might work sometimes, but that's not enough)
1324     */
1325    caps->sampler_reduction_minmax =
1326    caps->sampler_reduction_minmax_arb =
1327       !(sscreen->info.family == CHIP_TAHITI || sscreen->info.family == CHIP_VERDE);
1328 
1329    caps->texture_transfer_modes =
1330       PIPE_TEXTURE_TRANSFER_BLIT | PIPE_TEXTURE_TRANSFER_COMPUTE;
1331 
1332    caps->draw_vertex_state = !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST));
1333 
1334    caps->shader_samples_identical =
1335       sscreen->info.gfx_level < GFX11 && !(sscreen->debug_flags & DBG(NO_FMASK));
1336 
1337    caps->glsl_zero_init = 2;
1338 
1339    caps->generate_mipmap =
1340    caps->seamless_cube_map =
1341    caps->seamless_cube_map_per_texture =
1342    caps->cube_map_array =
1343       sscreen->info.has_3d_cube_border_color_mipmap;
1344 
1345    caps->post_depth_coverage = sscreen->info.gfx_level >= GFX10;
1346 
1347    caps->graphics = sscreen->info.has_graphics;
1348 
1349    caps->resource_from_user_memory = !UTIL_ARCH_BIG_ENDIAN && sscreen->info.has_userptr;
1350 
1351    caps->device_protected_surface = sscreen->info.has_tmz_support;
1352 
1353    caps->min_map_buffer_alignment = SI_MAP_BUFFER_ALIGNMENT;
1354 
1355    caps->max_vertex_buffers = SI_MAX_ATTRIBS;
1356 
1357    caps->constant_buffer_offset_alignment =
1358    caps->texture_buffer_offset_alignment =
1359    caps->max_texture_gather_components =
1360    caps->max_stream_output_buffers =
1361    caps->max_vertex_streams =
1362    caps->shader_buffer_offset_alignment =
1363    caps->max_window_rectangles = 4;
1364 
1365    caps->glsl_feature_level =
1366    caps->glsl_feature_level_compatibility = 460;
1367 
1368    /* Optimal number for good TexSubImage performance on Polaris10. */
1369    caps->max_texture_upload_memory_budget = 64 * 1024 * 1024;
1370 
1371    caps->gl_begin_end_buffer_size = 4096 * 1024;
1372 
1373    /* Return 1/4th of the heap size as the maximum because the max size is not practically
1374     * allocatable. Also, this can only return UINT32_MAX at most.
1375     */
1376    unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
1377 
1378    /* Allow max 512 MB to pass CTS with a 32-bit build. */
1379    if (sizeof(void*) == 4)
1380       max_size = MIN2(max_size, 512 * 1024 * 1024);
1381 
1382    caps->max_constant_buffer_size =
1383    caps->max_shader_buffer_size = max_size;
1384 
1385    unsigned max_texels = caps->max_shader_buffer_size;
1386 
1387    /* FYI, BUF_RSRC_WORD2.NUM_RECORDS field limit is UINT32_MAX. */
1388 
1389    /* Gfx8 and older use the size in bytes for bounds checking, and the max element size
1390     * is 16B. Gfx9 and newer use the VGPR index for bounds checking.
1391     */
1392    if (sscreen->info.gfx_level <= GFX8)
1393       max_texels = MIN2(max_texels, UINT32_MAX / 16);
1394    else
1395       /* Gallium has a limitation that it can only bind UINT32_MAX bytes, not texels.
1396        * TODO: Remove this after the gallium interface is changed. */
1397       max_texels = MIN2(max_texels, UINT32_MAX / 16);
1398 
1399    caps->max_texel_buffer_elements = max_texels;
1400 
1401    /* Allow 1/4th of the heap size. */
1402    caps->max_texture_mb = sscreen->info.max_heap_size_kb / 1024 / 4;
1403 
1404    caps->prefer_back_buffer_reuse = false;
1405    caps->uma = false;
1406    caps->prefer_imm_arrays_as_constbuf = false;
1407 
1408    caps->performance_monitor =
1409       sscreen->info.gfx_level >= GFX7 && sscreen->info.gfx_level <= GFX10_3;
1410 
1411    caps->sparse_buffer_page_size = enable_sparse ? RADEON_SPARSE_PAGE_SIZE : 0;
1412 
1413    caps->context_priority_mask = sscreen->info.is_amdgpu ?
1414       PIPE_CONTEXT_PRIORITY_LOW | PIPE_CONTEXT_PRIORITY_MEDIUM | PIPE_CONTEXT_PRIORITY_HIGH : 0;
1415 
1416    caps->fence_signal = sscreen->info.has_syncobj;
1417 
1418    caps->constbuf0_flags = SI_RESOURCE_FLAG_32BIT;
1419 
1420    caps->native_fence_fd = sscreen->info.has_fence_to_handle;
1421 
1422    caps->draw_parameters =
1423    caps->multi_draw_indirect =
1424    caps->multi_draw_indirect_params = sscreen->has_draw_indirect_multi;
1425 
1426    caps->max_shader_patch_varyings = 30;
1427 
1428    caps->max_varyings =
1429    caps->max_gs_invocations = 32;
1430 
1431    caps->texture_border_color_quirk =
1432       sscreen->info.gfx_level <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
1433 
1434    /* Stream output. */
1435    caps->max_stream_output_separate_components =
1436    caps->max_stream_output_interleaved_components = 32 * 4;
1437 
1438    /* gfx9 has to report 256 to make piglit/gs-max-output pass.
1439     * gfx8 and earlier can do 1024.
1440     */
1441    caps->max_geometry_output_vertices = 256;
1442    caps->max_geometry_total_output_components = 4095;
1443 
1444    caps->max_vertex_attrib_stride = 2048;
1445 
1446    /* TODO: Gfx12 supports 64K textures, but Gallium can't represent them at the moment. */
1447    caps->max_texture_2d_size = sscreen->info.gfx_level >= GFX12 ? 32768 : 16384;
1448    caps->max_texture_cube_levels = sscreen->info.has_3d_cube_border_color_mipmap ?
1449       (sscreen->info.gfx_level >= GFX12 ? 16 : 15) /* 32K : 16K */ : 0;
1450    caps->max_texture_3d_levels = sscreen->info.has_3d_cube_border_color_mipmap ?
1451       /* This is limited by maximums that both the texture unit and layered rendering support. */
1452       (sscreen->info.gfx_level >= GFX12 ? 15 : /* 16K */
1453        (sscreen->info.gfx_level >= GFX10 ? 14 : 12)) /* 8K : 2K */ : 0;
1454    /* This is limited by maximums that both the texture unit and layered rendering support. */
1455    caps->max_texture_array_layers = sscreen->info.gfx_level >= GFX10 ? 8192 : 2048;
1456 
1457    /* Sparse texture */
1458    caps->max_sparse_texture_size = enable_sparse ? caps->max_texture_2d_size : 0;
1459    caps->max_sparse_3d_texture_size = enable_sparse ? (1 << (caps->max_texture_3d_levels - 1)) : 0;
1460    caps->max_sparse_array_texture_layers = enable_sparse ? caps->max_texture_array_layers : 0;
1461    caps->sparse_texture_full_array_cube_mipmaps =
1462    caps->query_sparse_texture_residency =
1463    caps->clamp_sparse_texture_lod = enable_sparse;
1464 
1465    /* Viewports and render targets. */
1466    caps->max_viewports = SI_MAX_VIEWPORTS;
1467    caps->viewport_subpixel_bits =
1468    caps->rasterizer_subpixel_bits =
1469    caps->max_render_targets = 8;
1470    caps->framebuffer_msaa_constraints = sscreen->info.has_eqaa_surface_allocator ? 2 : 0;
1471 
1472    caps->min_texture_gather_offset =
1473    caps->min_texel_offset = -32;
1474 
1475    caps->max_texture_gather_offset =
1476    caps->max_texel_offset = 31;
1477 
1478    caps->endianness = PIPE_ENDIAN_LITTLE;
1479 
1480    caps->vendor_id = ATI_VENDOR_ID;
1481    caps->device_id = sscreen->info.pci_id;
1482    caps->video_memory = sscreen->info.vram_size_kb >> 10;
1483    caps->pci_group = sscreen->info.pci.domain;
1484    caps->pci_bus = sscreen->info.pci.bus;
1485    caps->pci_device = sscreen->info.pci.dev;
1486    caps->pci_function = sscreen->info.pci.func;
1487 
1488    /* Conversion to nanos from cycles per millisecond */
1489    caps->timer_resolution = DIV_ROUND_UP(1000000, sscreen->info.clock_crystal_freq);
1490 
1491    caps->shader_subgroup_size = 64;
1492    caps->shader_subgroup_supported_stages = BITFIELD_MASK(PIPE_SHADER_TYPES);
1493    caps->shader_subgroup_supported_features = BITFIELD_MASK(PIPE_SHADER_SUBGROUP_NUM_FEATURES);
1494    caps->shader_subgroup_quad_all_stages = true;
1495 
1496    caps->min_line_width =
1497    caps->min_line_width_aa = 1; /* due to axis-aligned end caps at line width 1 */
1498 
1499    caps->min_point_size =
1500    caps->min_point_size_aa =
1501    caps->point_size_granularity =
1502    caps->line_width_granularity = 1.0 / 8.0; /* due to the register field precision */
1503 
1504    /* This depends on the quant mode, though the precise interactions are unknown. */
1505    caps->max_line_width =
1506    caps->max_line_width_aa = 2048;
1507 
1508    caps->max_point_size =
1509    caps->max_point_size_aa = SI_MAX_POINT_SIZE;
1510 
1511    caps->max_texture_anisotropy = 16.0f;
1512 
1513    /* The hw can do 31, but this test fails if we use that:
1514     *    KHR-GL46.texture_lod_bias.texture_lod_bias_all
1515     */
1516    caps->max_texture_lod_bias = 16;
1517 }
1518