• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "compiler/nir/nir.h"
26 #include "radeon/radeon_uvd_enc.h"
27 #include "radeon/radeon_vce.h"
28 #include "radeon/radeon_video.h"
29 #include "si_pipe.h"
30 #include "util/u_screen.h"
31 #include "util/u_video.h"
32 #include "vl/vl_decoder.h"
33 #include "vl/vl_video_buffer.h"
34 #include <sys/utsname.h>
35 
si_get_vendor(struct pipe_screen * pscreen)36 static const char *si_get_vendor(struct pipe_screen *pscreen)
37 {
38    return "AMD";
39 }
40 
si_get_device_vendor(struct pipe_screen * pscreen)41 static const char *si_get_device_vendor(struct pipe_screen *pscreen)
42 {
43    return "AMD";
44 }
45 
si_get_param(struct pipe_screen * pscreen,enum pipe_cap param)46 static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
47 {
48    struct si_screen *sscreen = (struct si_screen *)pscreen;
49 
50    switch (param) {
51    /* Supported features (boolean caps). */
52    case PIPE_CAP_ACCELERATED:
53    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
54    case PIPE_CAP_ANISOTROPIC_FILTER:
55    case PIPE_CAP_POINT_SPRITE:
56    case PIPE_CAP_OCCLUSION_QUERY:
57    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
58    case PIPE_CAP_TEXTURE_SHADOW_LOD:
59    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
60    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
61    case PIPE_CAP_TEXTURE_SWIZZLE:
62    case PIPE_CAP_DEPTH_CLIP_DISABLE:
63    case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
64    case PIPE_CAP_SHADER_STENCIL_EXPORT:
65    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
66    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
67    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
68    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
69    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
70    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
71    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
72    case PIPE_CAP_VERTEX_SHADER_SATURATE:
73    case PIPE_CAP_SEAMLESS_CUBE_MAP:
74    case PIPE_CAP_PRIMITIVE_RESTART:
75    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
76    case PIPE_CAP_CONDITIONAL_RENDER:
77    case PIPE_CAP_TEXTURE_BARRIER:
78    case PIPE_CAP_INDEP_BLEND_ENABLE:
79    case PIPE_CAP_INDEP_BLEND_FUNC:
80    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
81    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
82    case PIPE_CAP_START_INSTANCE:
83    case PIPE_CAP_NPOT_TEXTURES:
84    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
85    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
86    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
87    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
88    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
89    case PIPE_CAP_TGSI_INSTANCEID:
90    case PIPE_CAP_COMPUTE:
91    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
92    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
93    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
94    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
95    case PIPE_CAP_CUBE_MAP_ARRAY:
96    case PIPE_CAP_SAMPLE_SHADING:
97    case PIPE_CAP_DRAW_INDIRECT:
98    case PIPE_CAP_CLIP_HALFZ:
99    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
100    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
101    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
102    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
103    case PIPE_CAP_TGSI_TEXCOORD:
104    case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
105    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
106    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
107    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
108    case PIPE_CAP_SHAREABLE_SHADERS:
109    case PIPE_CAP_DEPTH_BOUNDS_TEST:
110    case PIPE_CAP_SAMPLER_VIEW_TARGET:
111    case PIPE_CAP_TEXTURE_QUERY_LOD:
112    case PIPE_CAP_TEXTURE_GATHER_SM5:
113    case PIPE_CAP_TGSI_TXQS:
114    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
115    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
116    case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
117    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
118    case PIPE_CAP_INVALIDATE_BUFFER:
119    case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
120    case PIPE_CAP_QUERY_BUFFER_OBJECT:
121    case PIPE_CAP_QUERY_MEMORY_INFO:
122    case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
123    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
124    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
125    case PIPE_CAP_GENERATE_MIPMAP:
126    case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
127    case PIPE_CAP_STRING_MARKER:
128    case PIPE_CAP_CLEAR_TEXTURE:
129    case PIPE_CAP_CULL_DISTANCE:
130    case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
131    case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
132    case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
133    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
134    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
135    case PIPE_CAP_DOUBLES:
136    case PIPE_CAP_TGSI_TEX_TXF_LZ:
137    case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
138    case PIPE_CAP_BINDLESS_TEXTURE:
139    case PIPE_CAP_QUERY_TIMESTAMP:
140    case PIPE_CAP_QUERY_TIME_ELAPSED:
141    case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
142    case PIPE_CAP_MEMOBJ:
143    case PIPE_CAP_LOAD_CONSTBUF:
144    case PIPE_CAP_INT64:
145    case PIPE_CAP_INT64_DIVMOD:
146    case PIPE_CAP_TGSI_CLOCK:
147    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
148    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
149    case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
150    case PIPE_CAP_TGSI_BALLOT:
151    case PIPE_CAP_TGSI_VOTE:
152    case PIPE_CAP_FBFETCH:
153    case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
154    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
155    case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
156    case PIPE_CAP_TGSI_DIV:
157    case PIPE_CAP_PACKED_UNIFORMS:
158    case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
159    case PIPE_CAP_GL_SPIRV:
160    case PIPE_CAP_DRAW_INFO_START_WITH_USER_INDICES:
161    case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL:
162    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
163    case PIPE_CAP_NO_CLIP_ON_COPY_TEX:
164    case PIPE_CAP_SHADER_ATOMIC_INT64:
165    case PIPE_CAP_FRONTEND_NOOP:
166       return 1;
167 
168    case PIPE_CAP_GLSL_ZERO_INIT:
169       return 2;
170 
171    case PIPE_CAP_QUERY_SO_OVERFLOW:
172       return !sscreen->use_ngg_streamout;
173 
174    case PIPE_CAP_POST_DEPTH_COVERAGE:
175       return sscreen->info.chip_class >= GFX10;
176 
177    case PIPE_CAP_GRAPHICS:
178       return sscreen->info.has_graphics;
179 
180    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
181       return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
182 
183    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
184       return sscreen->info.has_gpu_reset_status_query;
185 
186    case PIPE_CAP_DEVICE_PROTECTED_CONTENT:
187       return sscreen->info.has_tmz_support;
188 
189    case PIPE_CAP_TEXTURE_MULTISAMPLE:
190       return sscreen->info.has_2d_tiling;
191 
192    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
193       return SI_MAP_BUFFER_ALIGNMENT;
194 
195    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
196    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
197    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
198    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
199    case PIPE_CAP_MAX_VERTEX_STREAMS:
200    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
201    case PIPE_CAP_MAX_WINDOW_RECTANGLES:
202       return 4;
203 
204    case PIPE_CAP_GLSL_FEATURE_LEVEL:
205    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
206       if (!sscreen->info.has_indirect_compute_dispatch)
207          return 420;
208       return 460;
209 
210    case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
211       /* Optimal number for good TexSubImage performance on Polaris10. */
212       return 64 * 1024 * 1024;
213 
214    case PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE:
215       return 4096 * 1024;
216 
217    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
218    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
219       /* Align it down to 256 bytes. I've chosen the number randomly. */
220       return ROUND_DOWN_TO(MIN2(sscreen->info.max_alloc_size, INT_MAX), 256);
221    case PIPE_CAP_MAX_TEXTURE_MB:
222       return sscreen->info.max_alloc_size / (1024 * 1024);
223 
224    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
225    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
226    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
227       return LLVM_VERSION_MAJOR < 9 && !sscreen->info.has_unaligned_shader_loads;
228 
229    case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
230       return sscreen->info.has_sparse_vm_mappings ? RADEON_SPARSE_PAGE_SIZE : 0;
231 
232    case PIPE_CAP_UMA:
233    case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
234       return 0;
235 
236    case PIPE_CAP_FENCE_SIGNAL:
237       return sscreen->info.has_syncobj;
238 
239    case PIPE_CAP_CONSTBUF0_FLAGS:
240       return SI_RESOURCE_FLAG_32BIT;
241 
242    case PIPE_CAP_NATIVE_FENCE_FD:
243       return sscreen->info.has_fence_to_handle;
244 
245    case PIPE_CAP_DRAW_PARAMETERS:
246    case PIPE_CAP_MULTI_DRAW_INDIRECT:
247    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
248       return sscreen->has_draw_indirect_multi;
249 
250    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
251       return 30;
252 
253    case PIPE_CAP_MAX_VARYINGS:
254       return 32;
255 
256    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
257       return sscreen->info.chip_class <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
258 
259    /* Stream output. */
260    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
261    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
262       return 32 * 4;
263 
264    /* Geometry shader output. */
265    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
266       /* gfx9 has to report 256 to make piglit/gs-max-output pass.
267        * gfx8 and earlier can do 1024.
268        */
269       return 256;
270    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
271       return 4095;
272    case PIPE_CAP_MAX_GS_INVOCATIONS:
273       /* Even though the hw supports more, we officially wanna expose only 32. */
274       return 32;
275 
276    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
277       return 2048;
278 
279    /* Texturing. */
280    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
281       return 16384;
282    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
283       return 15; /* 16384 */
284    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
285       if (sscreen->info.chip_class >= GFX10)
286          return 14;
287       /* textures support 8192, but layered rendering supports 2048 */
288       return 12;
289    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
290       if (sscreen->info.chip_class >= GFX10)
291          return 8192;
292       /* textures support 8192, but layered rendering supports 2048 */
293       return 2048;
294 
295    /* Viewports and render targets. */
296    case PIPE_CAP_MAX_VIEWPORTS:
297       return SI_MAX_VIEWPORTS;
298    case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
299    case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS:
300    case PIPE_CAP_MAX_RENDER_TARGETS:
301       return 8;
302    case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
303       return sscreen->info.has_eqaa_surface_allocator ? 2 : 0;
304 
305    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
306    case PIPE_CAP_MIN_TEXEL_OFFSET:
307       return -32;
308 
309    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
310    case PIPE_CAP_MAX_TEXEL_OFFSET:
311       return 31;
312 
313    case PIPE_CAP_ENDIANNESS:
314       return PIPE_ENDIAN_LITTLE;
315 
316    case PIPE_CAP_VENDOR_ID:
317       return ATI_VENDOR_ID;
318    case PIPE_CAP_DEVICE_ID:
319       return sscreen->info.pci_id;
320    case PIPE_CAP_VIDEO_MEMORY:
321       return sscreen->info.vram_size >> 20;
322    case PIPE_CAP_PCI_GROUP:
323       return sscreen->info.pci_domain;
324    case PIPE_CAP_PCI_BUS:
325       return sscreen->info.pci_bus;
326    case PIPE_CAP_PCI_DEVICE:
327       return sscreen->info.pci_dev;
328    case PIPE_CAP_PCI_FUNCTION:
329       return sscreen->info.pci_func;
330    case PIPE_CAP_TGSI_ATOMINC_WRAP:
331       return LLVM_VERSION_MAJOR >= 10;
332 
333    default:
334       return u_pipe_screen_get_param_defaults(pscreen, param);
335    }
336 }
337 
si_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)338 static float si_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
339 {
340    switch (param) {
341    case PIPE_CAPF_MAX_LINE_WIDTH:
342    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
343       /* This depends on the quant mode, though the precise interactions
344        * are unknown. */
345       return 2048;
346    case PIPE_CAPF_MAX_POINT_WIDTH:
347    case PIPE_CAPF_MAX_POINT_WIDTH_AA:
348       return SI_MAX_POINT_SIZE;
349    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
350       return 16.0f;
351    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
352       return 16.0f;
353    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
354    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
355    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
356       return 0.0f;
357    }
358    return 0.0f;
359 }
360 
si_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)361 static int si_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
362                                enum pipe_shader_cap param)
363 {
364    struct si_screen *sscreen = (struct si_screen *)pscreen;
365 
366    switch (shader) {
367    case PIPE_SHADER_FRAGMENT:
368    case PIPE_SHADER_VERTEX:
369    case PIPE_SHADER_GEOMETRY:
370    case PIPE_SHADER_TESS_CTRL:
371    case PIPE_SHADER_TESS_EVAL:
372       break;
373    case PIPE_SHADER_COMPUTE:
374       switch (param) {
375       case PIPE_SHADER_CAP_SUPPORTED_IRS: {
376          int ir = 1 << PIPE_SHADER_IR_NATIVE;
377 
378          if (sscreen->info.has_indirect_compute_dispatch)
379             ir |= 1 << PIPE_SHADER_IR_NIR;
380 
381          return ir;
382       }
383       default:
384          /* If compute shaders don't require a special value
385           * for this cap, we can return the same value we
386           * do for other shader types. */
387          break;
388       }
389       break;
390    default:
391       return 0;
392    }
393 
394    switch (param) {
395    /* Shader limits. */
396    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
397    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
398    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
399    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
400    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
401       return 16384;
402    case PIPE_SHADER_CAP_MAX_INPUTS:
403       return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
404    case PIPE_SHADER_CAP_MAX_OUTPUTS:
405       return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
406    case PIPE_SHADER_CAP_MAX_TEMPS:
407       return 256; /* Max native temporaries. */
408    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
409       return 1 << 26; /* 64 MB */
410    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
411       return SI_NUM_CONST_BUFFERS;
412    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
413    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
414       return SI_NUM_SAMPLERS;
415    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
416       return SI_NUM_SHADER_BUFFERS;
417    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
418       return SI_NUM_IMAGES;
419    case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
420       return 0;
421    case PIPE_SHADER_CAP_PREFERRED_IR:
422       return PIPE_SHADER_IR_NIR;
423    case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
424       return 4;
425 
426    /* Supported boolean features. */
427    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
428    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
429    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
430    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
431    case PIPE_SHADER_CAP_INTEGERS:
432    case PIPE_SHADER_CAP_INT64_ATOMICS:
433    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
434    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
435    case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
436    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
437    case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
438    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
439    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: /* lowered in finalize_nir */
440    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: /* lowered in finalize_nir */
441       return 1;
442 
443    /* Unsupported boolean features. */
444    case PIPE_SHADER_CAP_FP16:
445    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
446    case PIPE_SHADER_CAP_INT16:
447    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
448    case PIPE_SHADER_CAP_SUBROUTINES:
449    case PIPE_SHADER_CAP_SUPPORTED_IRS:
450    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
451    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
452       return 0;
453    }
454    return 0;
455 }
456 
si_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)457 static const void *si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir,
458                                            enum pipe_shader_type shader)
459 {
460    struct si_screen *sscreen = (struct si_screen *)screen;
461 
462    assert(ir == PIPE_SHADER_IR_NIR);
463    return &sscreen->nir_options;
464 }
465 
si_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)466 static void si_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
467 {
468    ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
469 }
470 
si_get_device_uuid(struct pipe_screen * pscreen,char * uuid)471 static void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
472 {
473    struct si_screen *sscreen = (struct si_screen *)pscreen;
474 
475    ac_compute_device_uuid(&sscreen->info, uuid, PIPE_UUID_SIZE);
476 }
477 
si_get_name(struct pipe_screen * pscreen)478 static const char *si_get_name(struct pipe_screen *pscreen)
479 {
480    struct si_screen *sscreen = (struct si_screen *)pscreen;
481 
482    return sscreen->renderer_string;
483 }
484 
si_get_video_param_no_decode(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)485 static int si_get_video_param_no_decode(struct pipe_screen *screen, enum pipe_video_profile profile,
486                                         enum pipe_video_entrypoint entrypoint,
487                                         enum pipe_video_cap param)
488 {
489    switch (param) {
490    case PIPE_VIDEO_CAP_SUPPORTED:
491       return vl_profile_supported(screen, profile, entrypoint);
492    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
493       return 1;
494    case PIPE_VIDEO_CAP_MAX_WIDTH:
495    case PIPE_VIDEO_CAP_MAX_HEIGHT:
496       return vl_video_buffer_max_size(screen);
497    case PIPE_VIDEO_CAP_PREFERED_FORMAT:
498       return PIPE_FORMAT_NV12;
499    case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
500       return false;
501    case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
502       return false;
503    case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
504       return true;
505    case PIPE_VIDEO_CAP_MAX_LEVEL:
506       return vl_level_supported(screen, profile);
507    default:
508       return 0;
509    }
510 }
511 
si_get_video_param(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)512 static int si_get_video_param(struct pipe_screen *screen, enum pipe_video_profile profile,
513                               enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param)
514 {
515    struct si_screen *sscreen = (struct si_screen *)screen;
516    enum pipe_video_format codec = u_reduce_video_profile(profile);
517 
518    if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
519       switch (param) {
520       case PIPE_VIDEO_CAP_SUPPORTED:
521          return (
522             (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
523              (sscreen->info.family >= CHIP_RAVEN || si_vce_is_fw_version_supported(sscreen))) ||
524             (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
525              (sscreen->info.family >= CHIP_RAVEN || si_radeon_uvd_enc_supported(sscreen))) ||
526             (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10 && sscreen->info.family >= CHIP_RENOIR));
527       case PIPE_VIDEO_CAP_NPOT_TEXTURES:
528          return 1;
529       case PIPE_VIDEO_CAP_MAX_WIDTH:
530          return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
531       case PIPE_VIDEO_CAP_MAX_HEIGHT:
532          return (sscreen->info.family < CHIP_TONGA) ? 1152 : 2304;
533       case PIPE_VIDEO_CAP_PREFERED_FORMAT:
534          return PIPE_FORMAT_NV12;
535       case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
536          return false;
537       case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
538          return false;
539       case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
540          return true;
541       case PIPE_VIDEO_CAP_STACKED_FRAMES:
542          return (sscreen->info.family < CHIP_TONGA) ? 1 : 2;
543       default:
544          return 0;
545       }
546    }
547 
548    switch (param) {
549    case PIPE_VIDEO_CAP_SUPPORTED:
550       switch (codec) {
551       case PIPE_VIDEO_FORMAT_MPEG12:
552          return profile != PIPE_VIDEO_PROFILE_MPEG1;
553       case PIPE_VIDEO_FORMAT_MPEG4:
554          return 1;
555       case PIPE_VIDEO_FORMAT_MPEG4_AVC:
556          if ((sscreen->info.family == CHIP_POLARIS10 || sscreen->info.family == CHIP_POLARIS11) &&
557              sscreen->info.uvd_fw_version < UVD_FW_1_66_16) {
558             RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
559             return false;
560          }
561          return true;
562       case PIPE_VIDEO_FORMAT_VC1:
563          return true;
564       case PIPE_VIDEO_FORMAT_HEVC:
565          /* Carrizo only supports HEVC Main */
566          if (sscreen->info.family >= CHIP_STONEY)
567             return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
568                     profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
569          else if (sscreen->info.family >= CHIP_CARRIZO)
570             return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
571          return false;
572       case PIPE_VIDEO_FORMAT_JPEG:
573          if (sscreen->info.family >= CHIP_RAVEN)
574             return true;
575          if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10)
576             return false;
577          if (!(sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 19)) {
578             RVID_ERR("No MJPEG support for the kernel version\n");
579             return false;
580          }
581          return true;
582       case PIPE_VIDEO_FORMAT_VP9:
583          if (sscreen->info.family < CHIP_RAVEN)
584             return false;
585          return true;
586       default:
587          return false;
588       }
589    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
590       return 1;
591    case PIPE_VIDEO_CAP_MAX_WIDTH:
592       switch (codec) {
593       case PIPE_VIDEO_FORMAT_HEVC:
594       case PIPE_VIDEO_FORMAT_VP9:
595          return (sscreen->info.family < CHIP_RENOIR)
596                    ? ((sscreen->info.family < CHIP_TONGA) ? 2048 : 4096)
597                    : 8192;
598       default:
599          return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096;
600       }
601    case PIPE_VIDEO_CAP_MAX_HEIGHT:
602       switch (codec) {
603       case PIPE_VIDEO_FORMAT_HEVC:
604       case PIPE_VIDEO_FORMAT_VP9:
605          return (sscreen->info.family < CHIP_RENOIR)
606                    ? ((sscreen->info.family < CHIP_TONGA) ? 1152 : 4096)
607                    : 4352;
608       default:
609          return (sscreen->info.family < CHIP_TONGA) ? 1152 : 4096;
610       }
611    case PIPE_VIDEO_CAP_PREFERED_FORMAT:
612       if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
613          return PIPE_FORMAT_P010;
614       else if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
615          return PIPE_FORMAT_P010;
616       else
617          return PIPE_FORMAT_NV12;
618 
619    case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
620    case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: {
621       enum pipe_video_format format = u_reduce_video_profile(profile);
622 
623       if (format == PIPE_VIDEO_FORMAT_HEVC)
624          return false; // The firmware doesn't support interlaced HEVC.
625       else if (format == PIPE_VIDEO_FORMAT_JPEG)
626          return false;
627       else if (format == PIPE_VIDEO_FORMAT_VP9)
628          return false;
629       return true;
630    }
631    case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
632       return true;
633    case PIPE_VIDEO_CAP_MAX_LEVEL:
634       switch (profile) {
635       case PIPE_VIDEO_PROFILE_MPEG1:
636          return 0;
637       case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
638       case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
639          return 3;
640       case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
641          return 3;
642       case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
643          return 5;
644       case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
645          return 1;
646       case PIPE_VIDEO_PROFILE_VC1_MAIN:
647          return 2;
648       case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
649          return 4;
650       case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
651       case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
652       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
653          return (sscreen->info.family < CHIP_TONGA) ? 41 : 52;
654       case PIPE_VIDEO_PROFILE_HEVC_MAIN:
655       case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
656          return 186;
657       default:
658          return 0;
659       }
660    default:
661       return 0;
662    }
663 }
664 
si_vid_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint)665 static bool si_vid_is_format_supported(struct pipe_screen *screen, enum pipe_format format,
666                                        enum pipe_video_profile profile,
667                                        enum pipe_video_entrypoint entrypoint)
668 {
669    /* HEVC 10 bit decoding should use P010 instead of NV12 if possible */
670    if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
671       return (format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_P010) ||
672              (format == PIPE_FORMAT_P016);
673 
674    /* Vp9 profile 2 supports 10 bit decoding using P016 */
675    if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
676       return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016);
677 
678    /* we can only handle this one with UVD */
679    if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
680       return format == PIPE_FORMAT_NV12;
681 
682    return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
683 }
684 
get_max_threads_per_block(struct si_screen * screen,enum pipe_shader_ir ir_type)685 static unsigned get_max_threads_per_block(struct si_screen *screen, enum pipe_shader_ir ir_type)
686 {
687    if (ir_type == PIPE_SHADER_IR_NATIVE)
688       return 256;
689 
690    /* LLVM 10 only supports 1024 threads per block. */
691    return 1024;
692 }
693 
si_get_compute_param(struct pipe_screen * screen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)694 static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type,
695                                 enum pipe_compute_cap param, void *ret)
696 {
697    struct si_screen *sscreen = (struct si_screen *)screen;
698 
699    // TODO: select these params by asic
700    switch (param) {
701    case PIPE_COMPUTE_CAP_IR_TARGET: {
702       const char *gpu, *triple;
703 
704       triple = "amdgcn-mesa-mesa3d";
705       gpu = ac_get_llvm_processor_name(sscreen->info.family);
706       if (ret) {
707          sprintf(ret, "%s-%s", gpu, triple);
708       }
709       /* +2 for dash and terminating NIL byte */
710       return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
711    }
712    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
713       if (ret) {
714          uint64_t *grid_dimension = ret;
715          grid_dimension[0] = 3;
716       }
717       return 1 * sizeof(uint64_t);
718 
719    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
720       if (ret) {
721          uint64_t *grid_size = ret;
722          grid_size[0] = 65535;
723          grid_size[1] = 65535;
724          grid_size[2] = 65535;
725       }
726       return 3 * sizeof(uint64_t);
727 
728    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
729       if (ret) {
730          uint64_t *block_size = ret;
731          unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type);
732          block_size[0] = threads_per_block;
733          block_size[1] = threads_per_block;
734          block_size[2] = threads_per_block;
735       }
736       return 3 * sizeof(uint64_t);
737 
738    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
739       if (ret) {
740          uint64_t *max_threads_per_block = ret;
741          *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type);
742       }
743       return sizeof(uint64_t);
744    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
745       if (ret) {
746          uint32_t *address_bits = ret;
747          address_bits[0] = 64;
748       }
749       return 1 * sizeof(uint32_t);
750 
751    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
752       if (ret) {
753          uint64_t *max_global_size = ret;
754          uint64_t max_mem_alloc_size;
755 
756          si_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
757                               &max_mem_alloc_size);
758 
759          /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
760           * 1/4 of the MAX_GLOBAL_SIZE.  Since the
761           * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
762           * make sure we never report more than
763           * 4 * MAX_MEM_ALLOC_SIZE.
764           */
765          *max_global_size =
766             MIN2(4 * max_mem_alloc_size, MAX2(sscreen->info.gart_size, sscreen->info.vram_size));
767       }
768       return sizeof(uint64_t);
769 
770    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
771       if (ret) {
772          uint64_t *max_local_size = ret;
773          /* Value reported by the closed source driver. */
774          *max_local_size = 32768;
775       }
776       return sizeof(uint64_t);
777 
778    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
779       if (ret) {
780          uint64_t *max_input_size = ret;
781          /* Value reported by the closed source driver. */
782          *max_input_size = 1024;
783       }
784       return sizeof(uint64_t);
785 
786    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
787       if (ret) {
788          uint64_t *max_mem_alloc_size = ret;
789 
790          *max_mem_alloc_size = sscreen->info.max_alloc_size;
791       }
792       return sizeof(uint64_t);
793 
794    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
795       if (ret) {
796          uint32_t *max_clock_frequency = ret;
797          *max_clock_frequency = sscreen->info.max_shader_clock;
798       }
799       return sizeof(uint32_t);
800 
801    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
802       if (ret) {
803          uint32_t *max_compute_units = ret;
804          *max_compute_units = sscreen->info.num_good_compute_units;
805       }
806       return sizeof(uint32_t);
807 
808    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
809       if (ret) {
810          uint32_t *images_supported = ret;
811          *images_supported = 0;
812       }
813       return sizeof(uint32_t);
814    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
815       break; /* unused */
816    case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
817       if (ret) {
818          uint32_t *subgroup_size = ret;
819          *subgroup_size = sscreen->compute_wave_size;
820       }
821       return sizeof(uint32_t);
822    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
823       if (ret) {
824          uint64_t *max_variable_threads_per_block = ret;
825          if (ir_type == PIPE_SHADER_IR_NATIVE)
826             *max_variable_threads_per_block = 0;
827          else
828             *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
829       }
830       return sizeof(uint64_t);
831    }
832 
833    fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
834    return 0;
835 }
836 
si_get_timestamp(struct pipe_screen * screen)837 static uint64_t si_get_timestamp(struct pipe_screen *screen)
838 {
839    struct si_screen *sscreen = (struct si_screen *)screen;
840 
841    return 1000000 * sscreen->ws->query_value(sscreen->ws, RADEON_TIMESTAMP) /
842           sscreen->info.clock_crystal_freq;
843 }
844 
si_query_memory_info(struct pipe_screen * screen,struct pipe_memory_info * info)845 static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info)
846 {
847    struct si_screen *sscreen = (struct si_screen *)screen;
848    struct radeon_winsys *ws = sscreen->ws;
849    unsigned vram_usage, gtt_usage;
850 
851    info->total_device_memory = sscreen->info.vram_size / 1024;
852    info->total_staging_memory = sscreen->info.gart_size / 1024;
853 
854    /* The real TTM memory usage is somewhat random, because:
855     *
856     * 1) TTM delays freeing memory, because it can only free it after
857     *    fences expire.
858     *
859     * 2) The memory usage can be really low if big VRAM evictions are
860     *    taking place, but the real usage is well above the size of VRAM.
861     *
862     * Instead, return statistics of this process.
863     */
864    vram_usage = ws->query_value(ws, RADEON_VRAM_USAGE) / 1024;
865    gtt_usage = ws->query_value(ws, RADEON_GTT_USAGE) / 1024;
866 
867    info->avail_device_memory =
868       vram_usage <= info->total_device_memory ? info->total_device_memory - vram_usage : 0;
869    info->avail_staging_memory =
870       gtt_usage <= info->total_staging_memory ? info->total_staging_memory - gtt_usage : 0;
871 
872    info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
873 
874    if (sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 4)
875       info->nr_device_memory_evictions = ws->query_value(ws, RADEON_NUM_EVICTIONS);
876    else
877       /* Just return the number of evicted 64KB pages. */
878       info->nr_device_memory_evictions = info->device_memory_evicted / 64;
879 }
880 
si_get_disk_shader_cache(struct pipe_screen * pscreen)881 static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
882 {
883    struct si_screen *sscreen = (struct si_screen *)pscreen;
884 
885    return sscreen->disk_shader_cache;
886 }
887 
si_init_renderer_string(struct si_screen * sscreen)888 static void si_init_renderer_string(struct si_screen *sscreen)
889 {
890    char first_name[256], second_name[32] = {}, kernel_version[128] = {};
891    struct utsname uname_data;
892 
893    if (sscreen->info.marketing_name) {
894       snprintf(first_name, sizeof(first_name), "%s", sscreen->info.marketing_name);
895       snprintf(second_name, sizeof(second_name), "%s, ", sscreen->info.name);
896    } else {
897       snprintf(first_name, sizeof(first_name), "AMD %s", sscreen->info.name);
898    }
899 
900    if (uname(&uname_data) == 0)
901       snprintf(kernel_version, sizeof(kernel_version), ", %s", uname_data.release);
902 
903    snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string),
904             "%s (%sDRM %i.%i.%i%s, LLVM " MESA_LLVM_VERSION_STRING ")", first_name, second_name,
905             sscreen->info.drm_major, sscreen->info.drm_minor, sscreen->info.drm_patchlevel,
906             kernel_version);
907 }
908 
si_init_screen_get_functions(struct si_screen * sscreen)909 void si_init_screen_get_functions(struct si_screen *sscreen)
910 {
911    sscreen->b.get_name = si_get_name;
912    sscreen->b.get_vendor = si_get_vendor;
913    sscreen->b.get_device_vendor = si_get_device_vendor;
914    sscreen->b.get_param = si_get_param;
915    sscreen->b.get_paramf = si_get_paramf;
916    sscreen->b.get_compute_param = si_get_compute_param;
917    sscreen->b.get_timestamp = si_get_timestamp;
918    sscreen->b.get_shader_param = si_get_shader_param;
919    sscreen->b.get_compiler_options = si_get_compiler_options;
920    sscreen->b.get_device_uuid = si_get_device_uuid;
921    sscreen->b.get_driver_uuid = si_get_driver_uuid;
922    sscreen->b.query_memory_info = si_query_memory_info;
923    sscreen->b.get_disk_shader_cache = si_get_disk_shader_cache;
924 
925    if (sscreen->info.has_hw_decode) {
926       sscreen->b.get_video_param = si_get_video_param;
927       sscreen->b.is_video_format_supported = si_vid_is_format_supported;
928    } else {
929       sscreen->b.get_video_param = si_get_video_param_no_decode;
930       sscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
931    }
932 
933    si_init_renderer_string(sscreen);
934 
935    const struct nir_shader_compiler_options nir_options = {
936       .lower_scmp = true,
937       .lower_flrp16 = true,
938       .lower_flrp32 = true,
939       .lower_flrp64 = true,
940       .lower_fsat = true,
941       .lower_fdiv = true,
942       .lower_bitfield_insert_to_bitfield_select = true,
943       .lower_bitfield_extract = true,
944       .lower_sub = true,
945       /*        |---------------------------------- Performance & Availability --------------------------------|
946        *        |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY|    FMA     |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
947        * Arch   |    F32,F16,F64    | F32,F16  | F32,F16  |F32,F16,F64 |    F32,F16     | F32,F16  |PK_FMAC_F16|F16,F32,F64
948        * ------------------------------------------------------------------------------------------------------------------
949        * gfx6,7 |     1 , - , -     |  1 , -   |  1 , -   |1/4, - ,1/16|     - , -      |  - , -   |   - , -   | - ,MAD,FMA
950        * gfx8   |     1 , 1 , -     |  1 , -   |  - , -   |1/4, 1 ,1/16|     - , -      |  - , -   |   - , -   |MAD,MAD,FMA
951        * gfx9   |     1 , 1 , -     |  1 , -   |  1 , -   | 1 , 1 ,1/16|     - , -      |  - , 1   |   2 , -   |FMA,MAD,FMA
952        * gfx10  |     1 , 1 , -     |  1 , -   |  1 , -   | 1 , 1 ,1/16|     1 , 1      |  - , -   |   2 , 2   |FMA,MAD,FMA
953        * gfx10.3|     - , - , -     |  - , -   |  - , -   | 1 , 1 ,1/16|     1 , 1      |  1 , -   |   2 , 2   |  all FMA
954        *
955        * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
956        *
957        * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
958        * gfx9 and newer prefer FMA for F16 because of the packed instruction.
959        * gfx10 and older prefer MAD for F32 because of the legacy instruction.
960        */
961       .lower_ffma16 = sscreen->info.chip_class < GFX9,
962       .lower_ffma32 = sscreen->info.chip_class < GFX10_3,
963       .lower_ffma64 = false,
964       .fuse_ffma16 = sscreen->info.chip_class >= GFX9,
965       .fuse_ffma32 = sscreen->info.chip_class >= GFX10_3,
966       .fuse_ffma64 = true,
967       .lower_fmod = true,
968       .lower_pack_snorm_4x8 = true,
969       .lower_pack_unorm_4x8 = true,
970       .lower_unpack_snorm_2x16 = true,
971       .lower_unpack_snorm_4x8 = true,
972       .lower_unpack_unorm_2x16 = true,
973       .lower_unpack_unorm_4x8 = true,
974       .lower_extract_byte = true,
975       .lower_extract_word = true,
976       .lower_rotate = true,
977       .lower_to_scalar = true,
978       .optimize_sample_mask_in = true,
979       .max_unroll_iterations = 32,
980       .use_interpolated_input_intrinsics = true,
981       .lower_uniforms_to_ubo = true,
982       .support_16bit_alu = sscreen->info.has_packed_math_16bit,
983       .vectorize_vec2_16bit = sscreen->info.has_packed_math_16bit,
984    };
985    sscreen->nir_options = nir_options;
986 }
987