• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_screen.c
25  *
26  * Screen related driver hooks and capability lists.
27  *
28  * A program may use multiple rendering contexts (iris_context), but
29  * they all share a common screen (iris_screen).  Global driver state
30  * can be stored in the screen; it may be accessed by multiple threads.
31  */
32 
33 #include <stdio.h>
34 #include <errno.h>
35 #include <sys/ioctl.h>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_context.h"
39 #include "pipe/p_screen.h"
40 #include "util/u_debug.h"
41 #include "util/os_file.h"
42 #include "util/u_cpu_detect.h"
43 #include "util/u_inlines.h"
44 #include "util/format/u_format.h"
45 #include "util/u_transfer_helper.h"
46 #include "util/u_upload_mgr.h"
47 #include "util/ralloc.h"
48 #include "util/xmlconfig.h"
49 #include "iris_context.h"
50 #include "iris_defines.h"
51 #include "iris_fence.h"
52 #include "iris_pipe.h"
53 #include "iris_resource.h"
54 #include "iris_screen.h"
55 #include "compiler/glsl_types.h"
56 #include "intel/common/intel_gem.h"
57 #include "intel/common/intel_l3_config.h"
58 #include "intel/common/intel_uuid.h"
59 #include "iris_monitor.h"
60 
61 #define genX_call(devinfo, func, ...)             \
62    switch ((devinfo)->verx10) {                   \
63    case 200:                                      \
64       gfx20_##func(__VA_ARGS__);                  \
65       break;                                      \
66    case 125:                                      \
67       gfx125_##func(__VA_ARGS__);                 \
68       break;                                      \
69    case 120:                                      \
70       gfx12_##func(__VA_ARGS__);                  \
71       break;                                      \
72    case 110:                                      \
73       gfx11_##func(__VA_ARGS__);                  \
74       break;                                      \
75    case 90:                                       \
76       gfx9_##func(__VA_ARGS__);                   \
77       break;                                      \
78    case 80:                                       \
79       gfx8_##func(__VA_ARGS__);                   \
80       break;                                      \
81    default:                                       \
82       unreachable("Unknown hardware generation"); \
83    }
84 
85 static const char *
iris_get_vendor(struct pipe_screen * pscreen)86 iris_get_vendor(struct pipe_screen *pscreen)
87 {
88    return "Intel";
89 }
90 
91 static const char *
iris_get_device_vendor(struct pipe_screen * pscreen)92 iris_get_device_vendor(struct pipe_screen *pscreen)
93 {
94    return "Intel";
95 }
96 
97 static void
iris_get_device_uuid(struct pipe_screen * pscreen,char * uuid)98 iris_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
99 {
100    struct iris_screen *screen = (struct iris_screen *)pscreen;
101 
102    intel_uuid_compute_device_id((uint8_t *)uuid, screen->devinfo, PIPE_UUID_SIZE);
103 }
104 
105 static void
iris_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)106 iris_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
107 {
108    struct iris_screen *screen = (struct iris_screen *)pscreen;
109    const struct intel_device_info *devinfo = screen->devinfo;
110 
111    intel_uuid_compute_driver_id((uint8_t *)uuid, devinfo, PIPE_UUID_SIZE);
112 }
113 
114 static bool
iris_enable_clover()115 iris_enable_clover()
116 {
117    static int enable = -1;
118    if (enable < 0)
119       enable = debug_get_bool_option("IRIS_ENABLE_CLOVER", false);
120    return enable;
121 }
122 
123 static void
iris_warn_cl()124 iris_warn_cl()
125 {
126    static bool warned = false;
127    if (warned)
128       return;
129 
130    warned = true;
131    fprintf(stderr, "WARNING: OpenCL support via iris driver is incomplete.\n"
132                    "For a complete and conformant OpenCL implementation, use\n"
133                    "https://github.com/intel/compute-runtime instead\n");
134 }
135 
136 static const char *
iris_get_name(struct pipe_screen * pscreen)137 iris_get_name(struct pipe_screen *pscreen)
138 {
139    struct iris_screen *screen = (struct iris_screen *)pscreen;
140    const struct intel_device_info *devinfo = screen->devinfo;
141    static char buf[128];
142 
143    snprintf(buf, sizeof(buf), "Mesa %s", devinfo->name);
144    return buf;
145 }
146 
147 static const char *
iris_get_cl_cts_version(struct pipe_screen * pscreen)148 iris_get_cl_cts_version(struct pipe_screen *pscreen)
149 {
150    struct iris_screen *screen = (struct iris_screen *)pscreen;
151    const struct intel_device_info *devinfo = screen->devinfo;
152 
153    /* https://www.khronos.org/conformance/adopters/conformant-products/opencl#submission_405 */
154    if (devinfo->verx10 == 120)
155       return "v2022-04-22-00";
156 
157    return NULL;
158 }
159 
160 static int
iris_get_video_memory(struct iris_screen * screen)161 iris_get_video_memory(struct iris_screen *screen)
162 {
163    uint64_t vram = iris_bufmgr_vram_size(screen->bufmgr);
164    uint64_t sram = iris_bufmgr_sram_size(screen->bufmgr);
165    if (vram) {
166       return vram / (1024 * 1024);
167    } else if (sram) {
168       return sram / (1024 * 1024);
169    } else {
170       /* This is the old code path, it get the GGTT size from the kernel
171        * (which should always be 4Gb on Gfx8+).
172        *
173        * We should probably never end up here. This is just a fallback to get
174        * some kind of value in case os_get_available_system_memory fails.
175        */
176       const struct intel_device_info *devinfo = screen->devinfo;
177       /* Once a batch uses more than 75% of the maximum mappable size, we
178        * assume that there's some fragmentation, and we start doing extra
179        * flushing, etc.  That's the big cliff apps will care about.
180        */
181       const unsigned gpu_mappable_megabytes =
182          (devinfo->aperture_bytes * 3 / 4) / (1024 * 1024);
183 
184       const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
185       const long system_page_size = sysconf(_SC_PAGE_SIZE);
186 
187       if (system_memory_pages <= 0 || system_page_size <= 0)
188          return -1;
189 
190       const uint64_t system_memory_bytes =
191          (uint64_t) system_memory_pages * (uint64_t) system_page_size;
192 
193       const unsigned system_memory_megabytes =
194          (unsigned) (system_memory_bytes / (1024 * 1024));
195 
196       return MIN2(system_memory_megabytes, gpu_mappable_megabytes);
197    }
198 }
199 
200 static int
iris_get_param(struct pipe_screen * pscreen,enum pipe_cap param)201 iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
202 {
203    struct iris_screen *screen = (struct iris_screen *)pscreen;
204    const struct intel_device_info *devinfo = screen->devinfo;
205 
206    switch (param) {
207    case PIPE_CAP_NPOT_TEXTURES:
208    case PIPE_CAP_ANISOTROPIC_FILTER:
209    case PIPE_CAP_OCCLUSION_QUERY:
210    case PIPE_CAP_QUERY_TIME_ELAPSED:
211    case PIPE_CAP_TEXTURE_SWIZZLE:
212    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
213    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
214    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
215    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
216    case PIPE_CAP_PRIMITIVE_RESTART:
217    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
218    case PIPE_CAP_INDEP_BLEND_ENABLE:
219    case PIPE_CAP_INDEP_BLEND_FUNC:
220    case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
221    case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
222    case PIPE_CAP_DEPTH_CLIP_DISABLE:
223    case PIPE_CAP_VS_INSTANCEID:
224    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
225    case PIPE_CAP_SEAMLESS_CUBE_MAP:
226    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
227    case PIPE_CAP_CONDITIONAL_RENDER:
228    case PIPE_CAP_TEXTURE_BARRIER:
229    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
230    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
231    case PIPE_CAP_COMPUTE:
232    case PIPE_CAP_START_INSTANCE:
233    case PIPE_CAP_QUERY_TIMESTAMP:
234    case PIPE_CAP_TEXTURE_MULTISAMPLE:
235    case PIPE_CAP_CUBE_MAP_ARRAY:
236    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
237    case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
238    case PIPE_CAP_TEXTURE_QUERY_LOD:
239    case PIPE_CAP_SAMPLE_SHADING:
240    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
241    case PIPE_CAP_DRAW_INDIRECT:
242    case PIPE_CAP_MULTI_DRAW_INDIRECT:
243    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
244    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
245    case PIPE_CAP_VS_LAYER_VIEWPORT:
246    case PIPE_CAP_TES_LAYER_VIEWPORT:
247    case PIPE_CAP_FS_FINE_DERIVATIVE:
248    case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
249    case PIPE_CAP_ACCELERATED:
250    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
251    case PIPE_CAP_CLIP_HALFZ:
252    case PIPE_CAP_TGSI_TEXCOORD:
253    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
254    case PIPE_CAP_DOUBLES:
255    case PIPE_CAP_INT64:
256    case PIPE_CAP_SAMPLER_VIEW_TARGET:
257    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
258    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
259    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
260    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
261    case PIPE_CAP_CULL_DISTANCE:
262    case PIPE_CAP_PACKED_UNIFORMS:
263    case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
264    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
265    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
266    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
267    case PIPE_CAP_QUERY_SO_OVERFLOW:
268    case PIPE_CAP_QUERY_BUFFER_OBJECT:
269    case PIPE_CAP_TGSI_TEX_TXF_LZ:
270    case PIPE_CAP_TEXTURE_QUERY_SAMPLES:
271    case PIPE_CAP_SHADER_CLOCK:
272    case PIPE_CAP_SHADER_BALLOT:
273    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
274    case PIPE_CAP_CLEAR_SCISSORED:
275    case PIPE_CAP_SHADER_GROUP_VOTE:
276    case PIPE_CAP_VS_WINDOW_SPACE_POSITION:
277    case PIPE_CAP_TEXTURE_GATHER_SM5:
278    case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
279    case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
280    case PIPE_CAP_LOAD_CONSTBUF:
281    case PIPE_CAP_NIR_COMPACT_ARRAYS:
282    case PIPE_CAP_DRAW_PARAMETERS:
283    case PIPE_CAP_FS_POSITION_IS_SYSVAL:
284    case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
285    case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
286    case PIPE_CAP_INVALIDATE_BUFFER:
287    case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
288    case PIPE_CAP_TEXTURE_SHADOW_LOD:
289    case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
290    case PIPE_CAP_GL_SPIRV:
291    case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS:
292    case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
293    case PIPE_CAP_NATIVE_FENCE_FD:
294    case PIPE_CAP_MEMOBJ:
295    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
296    case PIPE_CAP_FENCE_SIGNAL:
297    case PIPE_CAP_IMAGE_STORE_FORMATTED:
298    case PIPE_CAP_LEGACY_MATH_RULES:
299    case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL:
300    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
301    case PIPE_CAP_HAS_CONST_BW:
302    case PIPE_CAP_CL_GL_SHARING:
303       return true;
304    case PIPE_CAP_UMA:
305       return iris_bufmgr_vram_size(screen->bufmgr) == 0;
306    case PIPE_CAP_QUERY_MEMORY_INFO:
307       return iris_bufmgr_vram_size(screen->bufmgr) != 0;
308    case PIPE_CAP_PREFER_BACK_BUFFER_REUSE:
309       return false;
310    case PIPE_CAP_FBFETCH:
311       return IRIS_MAX_DRAW_BUFFERS;
312    case PIPE_CAP_FBFETCH_COHERENT:
313    case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
314    case PIPE_CAP_POST_DEPTH_COVERAGE:
315    case PIPE_CAP_SHADER_STENCIL_EXPORT:
316    case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
317    case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
318    case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
319       return devinfo->ver >= 9;
320    case PIPE_CAP_DEPTH_BOUNDS_TEST:
321       return devinfo->ver >= 12;
322    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
323       return 1;
324    case PIPE_CAP_MAX_RENDER_TARGETS:
325       return IRIS_MAX_DRAW_BUFFERS;
326    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
327       return 16384;
328    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
329       return IRIS_MAX_MIPLEVELS; /* 16384x16384 */
330    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
331       return 12; /* 2048x2048 */
332    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
333       return 4;
334    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
335       return 2048;
336    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
337       return IRIS_MAX_SOL_BINDINGS / IRIS_MAX_SOL_BUFFERS;
338    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
339       return IRIS_MAX_SOL_BINDINGS;
340    case PIPE_CAP_GLSL_FEATURE_LEVEL:
341    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
342       return 460;
343    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
344       /* 3DSTATE_CONSTANT_XS requires the start of UBOs to be 32B aligned */
345       return 32;
346    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
347       return IRIS_MAP_BUFFER_ALIGNMENT;
348    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
349       return 4;
350    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT:
351       return 1 << 27;
352    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
353       return 16; // XXX: u_screen says 256 is the minimum value...
354    case PIPE_CAP_LINEAR_IMAGE_PITCH_ALIGNMENT:
355       return 1;
356    case PIPE_CAP_LINEAR_IMAGE_BASE_ADDRESS_ALIGNMENT:
357       return 1;
358    case PIPE_CAP_TEXTURE_TRANSFER_MODES:
359       return PIPE_TEXTURE_TRANSFER_BLIT;
360    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
361       return IRIS_MAX_TEXTURE_BUFFER_SIZE;
362    case PIPE_CAP_MAX_VIEWPORTS:
363       return 16;
364    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
365       return 256;
366    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
367       return 1024;
368    case PIPE_CAP_MAX_GS_INVOCATIONS:
369       return 32;
370    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
371       return 4;
372    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
373       return -32;
374    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
375       return 31;
376    case PIPE_CAP_MAX_VERTEX_STREAMS:
377       return 4;
378    case PIPE_CAP_VENDOR_ID:
379       return 0x8086;
380    case PIPE_CAP_DEVICE_ID:
381       return screen->devinfo->pci_device_id;
382    case PIPE_CAP_VIDEO_MEMORY:
383       return iris_get_video_memory(screen);
384    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
385    case PIPE_CAP_MAX_VARYINGS:
386       return 32;
387    case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
388       /* We want immediate arrays to go get uploaded as nir->constant_data by
389        * nir_opt_large_constants() instead.
390        */
391       return 0;
392    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
393       /* AMD_pinned_memory assumes the flexibility of using client memory
394        * for any buffer (incl. vertex buffers) which rules out the prospect
395        * of using snooped buffers, as using snooped buffers without
396        * cogniscience is likely to be detrimental to performance and require
397        * extensive checking in the driver for correctness, e.g. to prevent
398        * illegal snoop <-> snoop transfers.
399        */
400       return devinfo->has_llc;
401    case PIPE_CAP_THROTTLE:
402       return screen->driconf.disable_throttling ? 0 : 1;
403 
404    case PIPE_CAP_CONTEXT_PRIORITY_MASK:
405       return PIPE_CONTEXT_PRIORITY_LOW |
406              PIPE_CONTEXT_PRIORITY_MEDIUM |
407              PIPE_CONTEXT_PRIORITY_HIGH;
408 
409    case PIPE_CAP_FRONTEND_NOOP:
410       return true;
411 
412    // XXX: don't hardcode 00:00:02.0 PCI here
413    case PIPE_CAP_PCI_GROUP:
414       return 0;
415    case PIPE_CAP_PCI_BUS:
416       return 0;
417    case PIPE_CAP_PCI_DEVICE:
418       return 2;
419    case PIPE_CAP_PCI_FUNCTION:
420       return 0;
421 
422    case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS:
423    case PIPE_CAP_INTEGER_MULTIPLY_32X16:
424       return true;
425 
426    case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH:
427       /* Internal details of VF cache make this optimization harmful on GFX
428        * version 8 and 9, because generated VERTEX_BUFFER_STATEs are cached
429        * separately.
430        */
431       return devinfo->ver >= 11;
432 
433    case PIPE_CAP_TIMER_RESOLUTION:
434       return DIV_ROUND_UP(1000000000ull, devinfo->timestamp_frequency);
435 
436    case PIPE_CAP_DEVICE_PROTECTED_CONTEXT:
437       return screen->kernel_features & KERNEL_HAS_PROTECTED_CONTEXT;
438 
439    case PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH:
440       return devinfo->ver == 9 && !intel_device_info_is_9lp(devinfo);
441 
442    default:
443       return u_pipe_screen_get_param_defaults(pscreen, param);
444    }
445    return 0;
446 }
447 
448 static float
iris_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)449 iris_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
450 {
451    switch (param) {
452    case PIPE_CAPF_MIN_LINE_WIDTH:
453    case PIPE_CAPF_MIN_LINE_WIDTH_AA:
454    case PIPE_CAPF_MIN_POINT_SIZE:
455    case PIPE_CAPF_MIN_POINT_SIZE_AA:
456       return 1;
457 
458    case PIPE_CAPF_POINT_SIZE_GRANULARITY:
459    case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
460       return 0.1;
461 
462    case PIPE_CAPF_MAX_LINE_WIDTH:
463    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
464       return 7.375f;
465 
466    case PIPE_CAPF_MAX_POINT_SIZE:
467    case PIPE_CAPF_MAX_POINT_SIZE_AA:
468       return 255.0f;
469 
470    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
471       return 16.0f;
472    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
473       return 15.0f;
474    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
475    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
476    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
477       return 0.0f;
478    default:
479       unreachable("unknown param");
480    }
481 }
482 
483 static int
iris_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type p_stage,enum pipe_shader_cap param)484 iris_get_shader_param(struct pipe_screen *pscreen,
485                       enum pipe_shader_type p_stage,
486                       enum pipe_shader_cap param)
487 {
488    gl_shader_stage stage = stage_from_pipe(p_stage);
489 
490    if (p_stage == PIPE_SHADER_MESH ||
491        p_stage == PIPE_SHADER_TASK)
492       return 0;
493 
494    /* this is probably not totally correct.. but it's a start: */
495    switch (param) {
496    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
497       return stage == MESA_SHADER_FRAGMENT ? 1024 : 16384;
498    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
499    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
500    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
501       return stage == MESA_SHADER_FRAGMENT ? 1024 : 0;
502 
503    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
504       return UINT_MAX;
505 
506    case PIPE_SHADER_CAP_MAX_INPUTS:
507       return stage == MESA_SHADER_VERTEX ? 16 : 32;
508    case PIPE_SHADER_CAP_MAX_OUTPUTS:
509       return 32;
510    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
511       return 16 * 1024 * sizeof(float);
512    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
513       return 16;
514    case PIPE_SHADER_CAP_MAX_TEMPS:
515       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
516    case PIPE_SHADER_CAP_CONT_SUPPORTED:
517       return 0;
518    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
519    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
520    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
521    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
522       /* Lie about these to avoid st/mesa's GLSL IR lowering of indirects,
523        * which we don't want.  Our compiler backend will check brw_compiler's
524        * options and call nir_lower_indirect_derefs appropriately anyway.
525        */
526       return true;
527    case PIPE_SHADER_CAP_SUBROUTINES:
528       return 0;
529    case PIPE_SHADER_CAP_INTEGERS:
530       return 1;
531    case PIPE_SHADER_CAP_INT64_ATOMICS:
532    case PIPE_SHADER_CAP_FP16:
533    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
534    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
535    case PIPE_SHADER_CAP_INT16:
536    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
537       return 0;
538    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
539       return IRIS_MAX_SAMPLERS;
540    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
541       return IRIS_MAX_TEXTURES;
542    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
543       return IRIS_MAX_IMAGES;
544    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
545       return IRIS_MAX_ABOS + IRIS_MAX_SSBOS;
546    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
547    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
548       return 0;
549    case PIPE_SHADER_CAP_SUPPORTED_IRS: {
550       int irs = 1 << PIPE_SHADER_IR_NIR;
551       if (iris_enable_clover())
552          irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
553       return irs;
554    }
555    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
556    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
557       return 0;
558    default:
559       unreachable("unknown shader param");
560    }
561 }
562 
563 static int
iris_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)564 iris_get_compute_param(struct pipe_screen *pscreen,
565                        enum pipe_shader_ir ir_type,
566                        enum pipe_compute_cap param,
567                        void *ret)
568 {
569    struct iris_screen *screen = (struct iris_screen *)pscreen;
570    const struct intel_device_info *devinfo = screen->devinfo;
571 
572    const uint32_t max_invocations =
573       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
574 
575 #define RET(x) do {                  \
576    if (ret)                          \
577       memcpy(ret, x, sizeof(x));     \
578    return sizeof(x);                 \
579 } while (0)
580 
581    switch (param) {
582    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
583       /* This gets queried on OpenCL device init and is never queried by the
584        * OpenGL state tracker.
585        */
586       iris_warn_cl();
587       RET((uint32_t []){ 64 });
588 
589    case PIPE_COMPUTE_CAP_IR_TARGET:
590       if (ret)
591          strcpy(ret, "gen");
592       return 4;
593 
594    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
595       RET((uint64_t []) { 3 });
596 
597    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
598       RET(((uint64_t []) { UINT32_MAX, UINT32_MAX, UINT32_MAX }));
599 
600    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
601       /* MaxComputeWorkGroupSize[0..2] */
602       RET(((uint64_t []) {max_invocations, max_invocations, max_invocations}));
603 
604    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
605       /* MaxComputeWorkGroupInvocations */
606    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
607       /* MaxComputeVariableGroupInvocations */
608       RET((uint64_t []) { max_invocations });
609 
610    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
611       /* MaxComputeSharedMemorySize */
612       RET((uint64_t []) { 64 * 1024 });
613 
614    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
615       RET((uint32_t []) { 1 });
616 
617    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
618       RET((uint32_t []) { 32 | 16 | 8 });
619 
620    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
621       RET((uint32_t []) { devinfo->max_cs_workgroup_threads });
622 
623    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
624    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
625       RET((uint64_t []) { 1 << 30 }); /* TODO */
626 
627    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
628       RET((uint32_t []) { 400 }); /* TODO */
629 
630    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: {
631       RET((uint32_t []) { intel_device_info_subslice_total(devinfo) });
632    }
633 
634    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
635       /* MaxComputeSharedMemorySize */
636       RET((uint64_t []) { 64 * 1024 });
637 
638    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
639       /* We could probably allow more; this is the OpenCL minimum */
640       RET((uint64_t []) { 1024 });
641 
642    default:
643       unreachable("unknown compute param");
644    }
645 }
646 
647 static uint64_t
iris_get_timestamp(struct pipe_screen * pscreen)648 iris_get_timestamp(struct pipe_screen *pscreen)
649 {
650    struct iris_screen *screen = (struct iris_screen *) pscreen;
651    uint64_t result;
652 
653    if (!intel_gem_read_render_timestamp(iris_bufmgr_get_fd(screen->bufmgr),
654                                         screen->devinfo->kmd_type, &result))
655       return 0;
656 
657    result = intel_device_info_timebase_scale(screen->devinfo, result);
658 
659    return result;
660 }
661 
662 void
iris_screen_destroy(struct iris_screen * screen)663 iris_screen_destroy(struct iris_screen *screen)
664 {
665    iris_destroy_screen_measure(screen);
666    util_queue_destroy(&screen->shader_compiler_queue);
667    glsl_type_singleton_decref();
668    iris_bo_unreference(screen->workaround_bo);
669    iris_bo_unreference(screen->breakpoint_bo);
670    u_transfer_helper_destroy(screen->base.transfer_helper);
671    iris_bufmgr_unref(screen->bufmgr);
672    disk_cache_destroy(screen->disk_cache);
673    close(screen->winsys_fd);
674    ralloc_free(screen);
675 }
676 
677 static void
iris_screen_unref(struct pipe_screen * pscreen)678 iris_screen_unref(struct pipe_screen *pscreen)
679 {
680    iris_pscreen_unref(pscreen);
681 }
682 
683 static void
iris_query_memory_info(struct pipe_screen * pscreen,struct pipe_memory_info * info)684 iris_query_memory_info(struct pipe_screen *pscreen,
685                        struct pipe_memory_info *info)
686 {
687    struct iris_screen *screen = (struct iris_screen *)pscreen;
688    struct intel_device_info di;
689    memcpy(&di, screen->devinfo, sizeof(di));
690 
691    if (!intel_device_info_update_memory_info(&di, screen->fd))
692       return;
693 
694    info->total_device_memory =
695       (di.mem.vram.mappable.size + di.mem.vram.unmappable.size) / 1024;
696    info->avail_device_memory =
697       (di.mem.vram.mappable.free + di.mem.vram.unmappable.free) / 1024;
698    info->total_staging_memory = di.mem.sram.mappable.size / 1024;
699    info->avail_staging_memory = di.mem.sram.mappable.free / 1024;
700 
701    /* Neither kernel gives us any way to calculate this information */
702    info->device_memory_evicted = 0;
703    info->nr_device_memory_evictions = 0;
704 }
705 
706 static struct disk_cache *
iris_get_disk_shader_cache(struct pipe_screen * pscreen)707 iris_get_disk_shader_cache(struct pipe_screen *pscreen)
708 {
709    struct iris_screen *screen = (struct iris_screen *) pscreen;
710    return screen->disk_cache;
711 }
712 
713 static const struct intel_l3_config *
iris_get_default_l3_config(const struct intel_device_info * devinfo,bool compute)714 iris_get_default_l3_config(const struct intel_device_info *devinfo,
715                            bool compute)
716 {
717    bool wants_dc_cache = true;
718    bool has_slm = compute;
719    const struct intel_l3_weights w =
720       intel_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
721    return intel_get_l3_config(devinfo, w);
722 }
723 
724 static void
iris_detect_kernel_features(struct iris_screen * screen)725 iris_detect_kernel_features(struct iris_screen *screen)
726 {
727    const struct intel_device_info *devinfo = screen->devinfo;
728    /* Kernel 5.2+ */
729    if (intel_gem_supports_syncobj_wait(screen->fd))
730       screen->kernel_features |= KERNEL_HAS_WAIT_FOR_SUBMIT;
731    if (intel_gem_supports_protected_context(screen->fd, devinfo->kmd_type))
732       screen->kernel_features |= KERNEL_HAS_PROTECTED_CONTEXT;
733 }
734 
735 static bool
iris_init_identifier_bo(struct iris_screen * screen)736 iris_init_identifier_bo(struct iris_screen *screen)
737 {
738    void *bo_map;
739 
740    bo_map = iris_bo_map(NULL, screen->workaround_bo, MAP_READ | MAP_WRITE);
741    if (!bo_map)
742       return false;
743 
744    assert(iris_bo_is_real(screen->workaround_bo));
745 
746    screen->workaround_address = (struct iris_address) {
747       .bo = screen->workaround_bo,
748       .offset = ALIGN(
749          intel_debug_write_identifiers(bo_map, 4096, "Iris"), 32),
750    };
751 
752    iris_bo_unmap(screen->workaround_bo);
753 
754    return true;
755 }
756 
757 static int
iris_screen_get_fd(struct pipe_screen * pscreen)758 iris_screen_get_fd(struct pipe_screen *pscreen)
759 {
760    struct iris_screen *screen = (struct iris_screen *) pscreen;
761 
762    return screen->winsys_fd;
763 }
764 
765 struct pipe_screen *
iris_screen_create(int fd,const struct pipe_screen_config * config)766 iris_screen_create(int fd, const struct pipe_screen_config *config)
767 {
768    struct iris_screen *screen = rzalloc(NULL, struct iris_screen);
769    if (!screen)
770       return NULL;
771 
772    driParseConfigFiles(config->options, config->options_info, 0, "iris",
773                        NULL, NULL, NULL, 0, NULL, 0);
774 
775    bool bo_reuse = false;
776    int bo_reuse_mode = driQueryOptioni(config->options, "bo_reuse");
777    switch (bo_reuse_mode) {
778    case DRI_CONF_BO_REUSE_DISABLED:
779       break;
780    case DRI_CONF_BO_REUSE_ALL:
781       bo_reuse = true;
782       break;
783    }
784 
785    process_intel_debug_variable();
786 
787    screen->bufmgr = iris_bufmgr_get_for_fd(fd, bo_reuse);
788    if (!screen->bufmgr)
789       return NULL;
790 
791    screen->devinfo = iris_bufmgr_get_device_info(screen->bufmgr);
792    p_atomic_set(&screen->refcount, 1);
793 
794    /* Here are the i915 features we need for Iris (in chronological order) :
795     *    - I915_PARAM_HAS_EXEC_NO_RELOC     (3.10)
796     *    - I915_PARAM_HAS_EXEC_HANDLE_LUT   (3.10)
797     *    - I915_PARAM_HAS_EXEC_BATCH_FIRST  (4.13)
798     *    - I915_PARAM_HAS_EXEC_FENCE_ARRAY  (4.14)
799     *    - I915_PARAM_HAS_CONTEXT_ISOLATION (4.16)
800     *
801     * Checking the last feature availability will include all previous ones.
802     */
803    if (!screen->devinfo->has_context_isolation) {
804       debug_error("Kernel is too old (4.16+ required) or unusable for Iris.\n"
805                   "Check your dmesg logs for loading failures.\n");
806       return NULL;
807    }
808 
809    screen->fd = iris_bufmgr_get_fd(screen->bufmgr);
810    screen->winsys_fd = os_dupfd_cloexec(fd);
811 
812    screen->id = iris_bufmgr_create_screen_id(screen->bufmgr);
813 
814    screen->workaround_bo =
815       iris_bo_alloc(screen->bufmgr, "workaround", 4096, 4096,
816                     IRIS_MEMZONE_OTHER, BO_ALLOC_NO_SUBALLOC | BO_ALLOC_CAPTURE);
817    if (!screen->workaround_bo)
818       return NULL;
819 
820    screen->breakpoint_bo = iris_bo_alloc(screen->bufmgr, "breakpoint", 4, 4,
821                                          IRIS_MEMZONE_OTHER, BO_ALLOC_ZEROED);
822    if (!screen->breakpoint_bo)
823       return NULL;
824 
825    if (!iris_init_identifier_bo(screen))
826       return NULL;
827 
828    screen->driconf.dual_color_blend_by_location =
829       driQueryOptionb(config->options, "dual_color_blend_by_location");
830    screen->driconf.disable_throttling =
831       driQueryOptionb(config->options, "disable_throttling");
832    screen->driconf.always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
833       driQueryOptionb(config->options, "always_flush_cache");
834    screen->driconf.sync_compile =
835       driQueryOptionb(config->options, "sync_compile");
836    screen->driconf.limit_trig_input_range =
837       driQueryOptionb(config->options, "limit_trig_input_range");
838    screen->driconf.lower_depth_range_rate =
839       driQueryOptionf(config->options, "lower_depth_range_rate");
840    screen->driconf.intel_enable_wa_14018912822 =
841       driQueryOptionb(config->options, "intel_enable_wa_14018912822");
842    screen->driconf.enable_tbimr =
843       driQueryOptionb(config->options, "intel_tbimr");
844    screen->driconf.generated_indirect_threshold =
845       driQueryOptioni(config->options, "generated_indirect_threshold");
846 
847    screen->precompile = debug_get_bool_option("shader_precompile", true);
848 
849    isl_device_init(&screen->isl_dev, screen->devinfo);
850 
851    iris_compiler_init(screen);
852 
853    screen->l3_config_3d = iris_get_default_l3_config(screen->devinfo, false);
854    screen->l3_config_cs = iris_get_default_l3_config(screen->devinfo, true);
855 
856    iris_disk_cache_init(screen);
857 
858    slab_create_parent(&screen->transfer_pool,
859                       sizeof(struct iris_transfer), 64);
860 
861    iris_detect_kernel_features(screen);
862 
863    struct pipe_screen *pscreen = &screen->base;
864 
865    iris_init_screen_fence_functions(pscreen);
866    iris_init_screen_resource_functions(pscreen);
867    iris_init_screen_measure(screen);
868 
869    pscreen->destroy = iris_screen_unref;
870    pscreen->get_name = iris_get_name;
871    pscreen->get_vendor = iris_get_vendor;
872    pscreen->get_device_vendor = iris_get_device_vendor;
873    pscreen->get_cl_cts_version = iris_get_cl_cts_version;
874    pscreen->get_screen_fd = iris_screen_get_fd;
875    pscreen->get_param = iris_get_param;
876    pscreen->get_shader_param = iris_get_shader_param;
877    pscreen->get_compute_param = iris_get_compute_param;
878    pscreen->get_paramf = iris_get_paramf;
879    pscreen->get_compiler_options = iris_get_compiler_options;
880    pscreen->get_device_uuid = iris_get_device_uuid;
881    pscreen->get_driver_uuid = iris_get_driver_uuid;
882    pscreen->get_disk_shader_cache = iris_get_disk_shader_cache;
883    pscreen->is_format_supported = iris_is_format_supported;
884    pscreen->context_create = iris_create_context;
885    pscreen->get_timestamp = iris_get_timestamp;
886    pscreen->query_memory_info = iris_query_memory_info;
887    pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
888    pscreen->get_driver_query_info = iris_get_monitor_info;
889    iris_init_screen_program_functions(pscreen);
890 
891    genX_call(screen->devinfo, init_screen_state, screen);
892    genX_call(screen->devinfo, init_screen_gen_state, screen);
893 
894    genX_call(screen->devinfo, init_screen_state, screen);
895 
896    glsl_type_singleton_init_or_ref();
897 
898    intel_driver_ds_init();
899 
900    /* FINISHME: Big core vs little core (for CPUs that have both kinds of
901     * cores) and, possibly, thread vs core should be considered here too.
902     */
903    unsigned compiler_threads = 1;
904    const struct util_cpu_caps_t *caps = util_get_cpu_caps();
905    unsigned hw_threads = caps->nr_cpus;
906 
907    if (hw_threads >= 12) {
908       compiler_threads = hw_threads * 3 / 4;
909    } else if (hw_threads >= 6) {
910       compiler_threads = hw_threads - 2;
911    } else if (hw_threads >= 2) {
912       compiler_threads = hw_threads - 1;
913    }
914 
915    if (!util_queue_init(&screen->shader_compiler_queue,
916                         "sh", 64, compiler_threads,
917                         UTIL_QUEUE_INIT_RESIZE_IF_FULL |
918                         UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY,
919                         NULL)) {
920       iris_screen_destroy(screen);
921       return NULL;
922    }
923 
924    return pscreen;
925 }
926