• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_screen.c
25  *
26  * Screen related driver hooks and capability lists.
27  *
28  * A program may use multiple rendering contexts (iris_context), but
29  * they all share a common screen (iris_screen).  Global driver state
30  * can be stored in the screen; it may be accessed by multiple threads.
31  */
32 
33 #include <stdio.h>
34 #include <errno.h>
35 #include <sys/ioctl.h>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_context.h"
39 #include "pipe/p_screen.h"
40 #include "util/debug.h"
41 #include "util/os_file.h"
42 #include "util/u_cpu_detect.h"
43 #include "util/u_inlines.h"
44 #include "util/format/u_format.h"
45 #include "util/u_transfer_helper.h"
46 #include "util/u_upload_mgr.h"
47 #include "util/ralloc.h"
48 #include "util/xmlconfig.h"
49 #include "drm-uapi/i915_drm.h"
50 #include "iris_context.h"
51 #include "iris_defines.h"
52 #include "iris_fence.h"
53 #include "iris_pipe.h"
54 #include "iris_resource.h"
55 #include "iris_screen.h"
56 #include "compiler/glsl_types.h"
57 #include "intel/compiler/brw_compiler.h"
58 #include "intel/common/intel_gem.h"
59 #include "intel/common/intel_l3_config.h"
60 #include "intel/common/intel_uuid.h"
61 #include "iris_monitor.h"
62 
63 #define genX_call(devinfo, func, ...)             \
64    switch ((devinfo)->verx10) {                   \
65    case 125:                                      \
66       gfx125_##func(__VA_ARGS__);                 \
67       break;                                      \
68    case 120:                                      \
69       gfx12_##func(__VA_ARGS__);                  \
70       break;                                      \
71    case 110:                                      \
72       gfx11_##func(__VA_ARGS__);                  \
73       break;                                      \
74    case 90:                                       \
75       gfx9_##func(__VA_ARGS__);                   \
76       break;                                      \
77    case 80:                                       \
78       gfx8_##func(__VA_ARGS__);                   \
79       break;                                      \
80    default:                                       \
81       unreachable("Unknown hardware generation"); \
82    }
83 
84 static const char *
iris_get_vendor(struct pipe_screen * pscreen)85 iris_get_vendor(struct pipe_screen *pscreen)
86 {
87    return "Intel";
88 }
89 
90 static const char *
iris_get_device_vendor(struct pipe_screen * pscreen)91 iris_get_device_vendor(struct pipe_screen *pscreen)
92 {
93    return "Intel";
94 }
95 
96 static void
iris_get_device_uuid(struct pipe_screen * pscreen,char * uuid)97 iris_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
98 {
99    struct iris_screen *screen = (struct iris_screen *)pscreen;
100 
101    intel_uuid_compute_device_id((uint8_t *)uuid, &screen->devinfo, PIPE_UUID_SIZE);
102 }
103 
104 static void
iris_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)105 iris_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
106 {
107    struct iris_screen *screen = (struct iris_screen *)pscreen;
108    const struct intel_device_info *devinfo = &screen->devinfo;
109 
110    intel_uuid_compute_driver_id((uint8_t *)uuid, devinfo, PIPE_UUID_SIZE);
111 }
112 
113 static bool
iris_enable_clover()114 iris_enable_clover()
115 {
116    static int enable = -1;
117    if (enable < 0)
118       enable = env_var_as_boolean("IRIS_ENABLE_CLOVER", false);
119    return enable;
120 }
121 
122 static void
iris_warn_clover()123 iris_warn_clover()
124 {
125    static bool warned = false;
126    if (warned)
127       return;
128 
129    warned = true;
130    fprintf(stderr, "WARNING: OpenCL support via iris+clover is incomplete.\n"
131                    "For a complete and conformant OpenCL implementation, use\n"
132                    "https://github.com/intel/compute-runtime instead\n");
133 }
134 
135 static const char *
iris_get_name(struct pipe_screen * pscreen)136 iris_get_name(struct pipe_screen *pscreen)
137 {
138    struct iris_screen *screen = (struct iris_screen *)pscreen;
139    const struct intel_device_info *devinfo = &screen->devinfo;
140    static char buf[128];
141 
142    snprintf(buf, sizeof(buf), "Mesa %s", devinfo->name);
143    return buf;
144 }
145 
146 static int
iris_get_video_memory(struct iris_screen * screen)147 iris_get_video_memory(struct iris_screen *screen)
148 {
149    uint64_t vram = iris_bufmgr_vram_size(screen->bufmgr);
150    uint64_t sram = iris_bufmgr_sram_size(screen->bufmgr);
151    if (vram) {
152       return vram / (1024 * 1024);
153    } else if (sram) {
154       return sram / (1024 * 1024);
155    } else {
156       /* This is the old code path, it get the GGTT size from the kernel
157        * (which should always be 4Gb on Gfx8+).
158        *
159        * We should probably never end up here. This is just a fallback to get
160        * some kind of value in case os_get_available_system_memory fails.
161        */
162       const struct intel_device_info *devinfo = &screen->devinfo;
163       /* Once a batch uses more than 75% of the maximum mappable size, we
164        * assume that there's some fragmentation, and we start doing extra
165        * flushing, etc.  That's the big cliff apps will care about.
166        */
167       const unsigned gpu_mappable_megabytes =
168          (devinfo->aperture_bytes * 3 / 4) / (1024 * 1024);
169 
170       const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
171       const long system_page_size = sysconf(_SC_PAGE_SIZE);
172 
173       if (system_memory_pages <= 0 || system_page_size <= 0)
174          return -1;
175 
176       const uint64_t system_memory_bytes =
177          (uint64_t) system_memory_pages * (uint64_t) system_page_size;
178 
179       const unsigned system_memory_megabytes =
180          (unsigned) (system_memory_bytes / (1024 * 1024));
181 
182       return MIN2(system_memory_megabytes, gpu_mappable_megabytes);
183    }
184 }
185 
186 static int
iris_get_param(struct pipe_screen * pscreen,enum pipe_cap param)187 iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
188 {
189    struct iris_screen *screen = (struct iris_screen *)pscreen;
190    const struct intel_device_info *devinfo = &screen->devinfo;
191 
192    switch (param) {
193    case PIPE_CAP_NPOT_TEXTURES:
194    case PIPE_CAP_ANISOTROPIC_FILTER:
195    case PIPE_CAP_POINT_SPRITE:
196    case PIPE_CAP_OCCLUSION_QUERY:
197    case PIPE_CAP_QUERY_TIME_ELAPSED:
198    case PIPE_CAP_TEXTURE_SWIZZLE:
199    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
200    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
201    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
202    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
203    case PIPE_CAP_PRIMITIVE_RESTART:
204    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
205    case PIPE_CAP_INDEP_BLEND_ENABLE:
206    case PIPE_CAP_INDEP_BLEND_FUNC:
207    case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
208    case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
209    case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
210    case PIPE_CAP_DEPTH_CLIP_DISABLE:
211    case PIPE_CAP_VS_INSTANCEID:
212    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
213    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
214    case PIPE_CAP_SEAMLESS_CUBE_MAP:
215    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
216    case PIPE_CAP_CONDITIONAL_RENDER:
217    case PIPE_CAP_TEXTURE_BARRIER:
218    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
219    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
220    case PIPE_CAP_COMPUTE:
221    case PIPE_CAP_START_INSTANCE:
222    case PIPE_CAP_QUERY_TIMESTAMP:
223    case PIPE_CAP_TEXTURE_MULTISAMPLE:
224    case PIPE_CAP_CUBE_MAP_ARRAY:
225    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
226    case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
227    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
228    case PIPE_CAP_TEXTURE_QUERY_LOD:
229    case PIPE_CAP_SAMPLE_SHADING:
230    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
231    case PIPE_CAP_DRAW_INDIRECT:
232    case PIPE_CAP_MULTI_DRAW_INDIRECT:
233    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
234    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
235    case PIPE_CAP_VS_LAYER_VIEWPORT:
236    case PIPE_CAP_TES_LAYER_VIEWPORT:
237    case PIPE_CAP_FS_FINE_DERIVATIVE:
238    case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
239    case PIPE_CAP_ACCELERATED:
240    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
241    case PIPE_CAP_CLIP_HALFZ:
242    case PIPE_CAP_TGSI_TEXCOORD:
243    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
244    case PIPE_CAP_DOUBLES:
245    case PIPE_CAP_INT64:
246    case PIPE_CAP_INT64_DIVMOD:
247    case PIPE_CAP_SAMPLER_VIEW_TARGET:
248    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
249    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
250    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
251    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
252    case PIPE_CAP_CULL_DISTANCE:
253    case PIPE_CAP_PACKED_UNIFORMS:
254    case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
255    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
256    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
257    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
258    case PIPE_CAP_QUERY_SO_OVERFLOW:
259    case PIPE_CAP_QUERY_BUFFER_OBJECT:
260    case PIPE_CAP_TGSI_TEX_TXF_LZ:
261    case PIPE_CAP_TEXTURE_QUERY_SAMPLES:
262    case PIPE_CAP_SHADER_CLOCK:
263    case PIPE_CAP_SHADER_BALLOT:
264    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
265    case PIPE_CAP_CLEAR_TEXTURE:
266    case PIPE_CAP_CLEAR_SCISSORED:
267    case PIPE_CAP_SHADER_GROUP_VOTE:
268    case PIPE_CAP_VS_WINDOW_SPACE_POSITION:
269    case PIPE_CAP_TEXTURE_GATHER_SM5:
270    case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
271    case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
272    case PIPE_CAP_LOAD_CONSTBUF:
273    case PIPE_CAP_NIR_COMPACT_ARRAYS:
274    case PIPE_CAP_DRAW_PARAMETERS:
275    case PIPE_CAP_FS_POSITION_IS_SYSVAL:
276    case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
277    case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
278    case PIPE_CAP_INVALIDATE_BUFFER:
279    case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
280    case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
281    case PIPE_CAP_TEXTURE_SHADOW_LOD:
282    case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
283    case PIPE_CAP_GL_SPIRV:
284    case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS:
285    case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
286    case PIPE_CAP_NATIVE_FENCE_FD:
287    case PIPE_CAP_MEMOBJ:
288    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
289    case PIPE_CAP_FENCE_SIGNAL:
290    case PIPE_CAP_IMAGE_STORE_FORMATTED:
291    case PIPE_CAP_LEGACY_MATH_RULES:
292       return true;
293    case PIPE_CAP_UMA:
294       return iris_bufmgr_vram_size(screen->bufmgr) == 0;
295    case PIPE_CAP_PREFER_BACK_BUFFER_REUSE:
296       return false;
297    case PIPE_CAP_FBFETCH:
298       return BRW_MAX_DRAW_BUFFERS;
299    case PIPE_CAP_FBFETCH_COHERENT:
300    case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
301    case PIPE_CAP_POST_DEPTH_COVERAGE:
302    case PIPE_CAP_SHADER_STENCIL_EXPORT:
303    case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
304    case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
305    case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
306       return devinfo->ver >= 9;
307    case PIPE_CAP_DEPTH_BOUNDS_TEST:
308       return devinfo->ver >= 12;
309    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
310       return 1;
311    case PIPE_CAP_MAX_RENDER_TARGETS:
312       return BRW_MAX_DRAW_BUFFERS;
313    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
314       return 16384;
315    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
316       return IRIS_MAX_MIPLEVELS; /* 16384x16384 */
317    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
318       return 12; /* 2048x2048 */
319    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
320       return 4;
321    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
322       return 2048;
323    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
324       return BRW_MAX_SOL_BINDINGS / IRIS_MAX_SOL_BUFFERS;
325    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
326       return BRW_MAX_SOL_BINDINGS;
327    case PIPE_CAP_GLSL_FEATURE_LEVEL:
328    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
329       return 460;
330    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
331       /* 3DSTATE_CONSTANT_XS requires the start of UBOs to be 32B aligned */
332       return 32;
333    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
334       return IRIS_MAP_BUFFER_ALIGNMENT;
335    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
336       return 4;
337    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT:
338       return 1 << 27;
339    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
340       return 16; // XXX: u_screen says 256 is the minimum value...
341    case PIPE_CAP_TEXTURE_TRANSFER_MODES:
342       return PIPE_TEXTURE_TRANSFER_BLIT;
343    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
344       return IRIS_MAX_TEXTURE_BUFFER_SIZE;
345    case PIPE_CAP_MAX_VIEWPORTS:
346       return 16;
347    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
348       return 256;
349    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
350       return 1024;
351    case PIPE_CAP_MAX_GS_INVOCATIONS:
352       return 32;
353    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
354       return 4;
355    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
356       return -32;
357    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
358       return 31;
359    case PIPE_CAP_MAX_VERTEX_STREAMS:
360       return 4;
361    case PIPE_CAP_VENDOR_ID:
362       return 0x8086;
363    case PIPE_CAP_DEVICE_ID:
364       return screen->pci_id;
365    case PIPE_CAP_VIDEO_MEMORY:
366       return iris_get_video_memory(screen);
367    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
368    case PIPE_CAP_MAX_VARYINGS:
369       return 32;
370    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
371       /* AMD_pinned_memory assumes the flexibility of using client memory
372        * for any buffer (incl. vertex buffers) which rules out the prospect
373        * of using snooped buffers, as using snooped buffers without
374        * cogniscience is likely to be detrimental to performance and require
375        * extensive checking in the driver for correctness, e.g. to prevent
376        * illegal snoop <-> snoop transfers.
377        */
378       return devinfo->has_llc;
379    case PIPE_CAP_THROTTLE:
380       return screen->driconf.disable_throttling ? 0 : 1;
381 
382    case PIPE_CAP_CONTEXT_PRIORITY_MASK:
383       return PIPE_CONTEXT_PRIORITY_LOW |
384              PIPE_CONTEXT_PRIORITY_MEDIUM |
385              PIPE_CONTEXT_PRIORITY_HIGH;
386 
387    case PIPE_CAP_FRONTEND_NOOP:
388       return true;
389 
390    // XXX: don't hardcode 00:00:02.0 PCI here
391    case PIPE_CAP_PCI_GROUP:
392       return 0;
393    case PIPE_CAP_PCI_BUS:
394       return 0;
395    case PIPE_CAP_PCI_DEVICE:
396       return 2;
397    case PIPE_CAP_PCI_FUNCTION:
398       return 0;
399 
400    case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS:
401    case PIPE_CAP_INTEGER_MULTIPLY_32X16:
402       return true;
403 
404    case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH:
405       /* Internal details of VF cache make this optimization harmful on GFX
406        * version 8 and 9, because generated VERTEX_BUFFER_STATEs are cached
407        * separately.
408        */
409       return devinfo->ver >= 11;
410 
411    default:
412       return u_pipe_screen_get_param_defaults(pscreen, param);
413    }
414    return 0;
415 }
416 
417 static float
iris_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)418 iris_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
419 {
420    switch (param) {
421    case PIPE_CAPF_MIN_LINE_WIDTH:
422    case PIPE_CAPF_MIN_LINE_WIDTH_AA:
423    case PIPE_CAPF_MIN_POINT_SIZE:
424    case PIPE_CAPF_MIN_POINT_SIZE_AA:
425       return 1;
426 
427    case PIPE_CAPF_POINT_SIZE_GRANULARITY:
428    case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
429       return 0.1;
430 
431    case PIPE_CAPF_MAX_LINE_WIDTH:
432    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
433       return 7.375f;
434 
435    case PIPE_CAPF_MAX_POINT_SIZE:
436    case PIPE_CAPF_MAX_POINT_SIZE_AA:
437       return 255.0f;
438 
439    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
440       return 16.0f;
441    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
442       return 15.0f;
443    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
444    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
445    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
446       return 0.0f;
447    default:
448       unreachable("unknown param");
449    }
450 }
451 
452 static int
iris_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type p_stage,enum pipe_shader_cap param)453 iris_get_shader_param(struct pipe_screen *pscreen,
454                       enum pipe_shader_type p_stage,
455                       enum pipe_shader_cap param)
456 {
457    gl_shader_stage stage = stage_from_pipe(p_stage);
458 
459    /* this is probably not totally correct.. but it's a start: */
460    switch (param) {
461    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
462       return stage == MESA_SHADER_FRAGMENT ? 1024 : 16384;
463    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
464    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
465    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
466       return stage == MESA_SHADER_FRAGMENT ? 1024 : 0;
467 
468    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
469       return UINT_MAX;
470 
471    case PIPE_SHADER_CAP_MAX_INPUTS:
472       return stage == MESA_SHADER_VERTEX ? 16 : 32;
473    case PIPE_SHADER_CAP_MAX_OUTPUTS:
474       return 32;
475    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
476       return 16 * 1024 * sizeof(float);
477    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
478       return 16;
479    case PIPE_SHADER_CAP_MAX_TEMPS:
480       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
481    case PIPE_SHADER_CAP_CONT_SUPPORTED:
482       return 0;
483    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
484    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
485    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
486    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
487       /* Lie about these to avoid st/mesa's GLSL IR lowering of indirects,
488        * which we don't want.  Our compiler backend will check brw_compiler's
489        * options and call nir_lower_indirect_derefs appropriately anyway.
490        */
491       return true;
492    case PIPE_SHADER_CAP_SUBROUTINES:
493       return 0;
494    case PIPE_SHADER_CAP_INTEGERS:
495       return 1;
496    case PIPE_SHADER_CAP_INT64_ATOMICS:
497    case PIPE_SHADER_CAP_FP16:
498    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
499    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
500    case PIPE_SHADER_CAP_INT16:
501    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
502       return 0;
503    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
504    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
505    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
506       return IRIS_MAX_TEXTURE_SAMPLERS;
507    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
508       return IRIS_MAX_ABOS + IRIS_MAX_SSBOS;
509    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
510    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
511       return 0;
512    case PIPE_SHADER_CAP_PREFERRED_IR:
513       return PIPE_SHADER_IR_NIR;
514    case PIPE_SHADER_CAP_SUPPORTED_IRS: {
515       int irs = 1 << PIPE_SHADER_IR_NIR;
516       if (iris_enable_clover())
517          irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
518       return irs;
519    }
520    case PIPE_SHADER_CAP_DROUND_SUPPORTED:
521    case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
522       return 1;
523    case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
524    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
525    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
526       return 0;
527    default:
528       unreachable("unknown shader param");
529    }
530 }
531 
532 static int
iris_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)533 iris_get_compute_param(struct pipe_screen *pscreen,
534                        enum pipe_shader_ir ir_type,
535                        enum pipe_compute_cap param,
536                        void *ret)
537 {
538    struct iris_screen *screen = (struct iris_screen *)pscreen;
539    const struct intel_device_info *devinfo = &screen->devinfo;
540 
541    const uint32_t max_invocations =
542       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
543 
544 #define RET(x) do {                  \
545    if (ret)                          \
546       memcpy(ret, x, sizeof(x));     \
547    return sizeof(x);                 \
548 } while (0)
549 
550    switch (param) {
551    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
552       /* This gets queried on clover device init and is never queried by the
553        * OpenGL state tracker.
554        */
555       iris_warn_clover();
556       RET((uint32_t []){ 64 });
557 
558    case PIPE_COMPUTE_CAP_IR_TARGET:
559       if (ret)
560          strcpy(ret, "gen");
561       return 4;
562 
563    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
564       RET((uint64_t []) { 3 });
565 
566    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
567       RET(((uint64_t []) { 65535, 65535, 65535 }));
568 
569    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
570       /* MaxComputeWorkGroupSize[0..2] */
571       RET(((uint64_t []) {max_invocations, max_invocations, max_invocations}));
572 
573    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
574       /* MaxComputeWorkGroupInvocations */
575    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
576       /* MaxComputeVariableGroupInvocations */
577       RET((uint64_t []) { max_invocations });
578 
579    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
580       /* MaxComputeSharedMemorySize */
581       RET((uint64_t []) { 64 * 1024 });
582 
583    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
584       RET((uint32_t []) { 1 });
585 
586    case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
587       RET((uint32_t []) { BRW_SUBGROUP_SIZE });
588 
589    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
590    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
591       RET((uint64_t []) { 1 << 30 }); /* TODO */
592 
593    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
594       RET((uint32_t []) { 400 }); /* TODO */
595 
596    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: {
597       RET((uint32_t []) { intel_device_info_subslice_total(devinfo) });
598    }
599 
600    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
601       /* MaxComputeSharedMemorySize */
602       RET((uint64_t []) { 64 * 1024 });
603 
604    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
605       /* We could probably allow more; this is the OpenCL minimum */
606       RET((uint64_t []) { 1024 });
607 
608    default:
609       unreachable("unknown compute param");
610    }
611 }
612 
613 static uint64_t
iris_get_timestamp(struct pipe_screen * pscreen)614 iris_get_timestamp(struct pipe_screen *pscreen)
615 {
616    struct iris_screen *screen = (struct iris_screen *) pscreen;
617    const unsigned TIMESTAMP = 0x2358;
618    uint64_t result;
619 
620    iris_reg_read(screen->bufmgr, TIMESTAMP | 1, &result);
621 
622    result = intel_device_info_timebase_scale(&screen->devinfo, result);
623    result &= (1ull << TIMESTAMP_BITS) - 1;
624 
625    return result;
626 }
627 
628 void
iris_screen_destroy(struct iris_screen * screen)629 iris_screen_destroy(struct iris_screen *screen)
630 {
631    iris_destroy_screen_measure(screen);
632    util_queue_destroy(&screen->shader_compiler_queue);
633    glsl_type_singleton_decref();
634    iris_bo_unreference(screen->workaround_bo);
635    u_transfer_helper_destroy(screen->base.transfer_helper);
636    iris_bufmgr_unref(screen->bufmgr);
637    disk_cache_destroy(screen->disk_cache);
638    close(screen->winsys_fd);
639    ralloc_free(screen);
640 }
641 
642 static void
iris_screen_unref(struct pipe_screen * pscreen)643 iris_screen_unref(struct pipe_screen *pscreen)
644 {
645    iris_pscreen_unref(pscreen);
646 }
647 
648 static void
iris_query_memory_info(struct pipe_screen * pscreen,struct pipe_memory_info * info)649 iris_query_memory_info(struct pipe_screen *pscreen,
650                        struct pipe_memory_info *info)
651 {
652 }
653 
654 static const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)655 iris_get_compiler_options(struct pipe_screen *pscreen,
656                           enum pipe_shader_ir ir,
657                           enum pipe_shader_type pstage)
658 {
659    struct iris_screen *screen = (struct iris_screen *) pscreen;
660    gl_shader_stage stage = stage_from_pipe(pstage);
661    assert(ir == PIPE_SHADER_IR_NIR);
662 
663    return screen->compiler->nir_options[stage];
664 }
665 
666 static struct disk_cache *
iris_get_disk_shader_cache(struct pipe_screen * pscreen)667 iris_get_disk_shader_cache(struct pipe_screen *pscreen)
668 {
669    struct iris_screen *screen = (struct iris_screen *) pscreen;
670    return screen->disk_cache;
671 }
672 
673 static int
iris_getparam(int fd,int param,int * value)674 iris_getparam(int fd, int param, int *value)
675 {
676    struct drm_i915_getparam gp = { .param = param, .value = value };
677 
678    if (ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1)
679       return -errno;
680 
681    return 0;
682 }
683 
684 static int
iris_getparam_integer(int fd,int param)685 iris_getparam_integer(int fd, int param)
686 {
687    int value = -1;
688 
689    if (iris_getparam(fd, param, &value) == 0)
690       return value;
691 
692    return -1;
693 }
694 
695 static const struct intel_l3_config *
iris_get_default_l3_config(const struct intel_device_info * devinfo,bool compute)696 iris_get_default_l3_config(const struct intel_device_info *devinfo,
697                            bool compute)
698 {
699    bool wants_dc_cache = true;
700    bool has_slm = compute;
701    const struct intel_l3_weights w =
702       intel_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
703    return intel_get_l3_config(devinfo, w);
704 }
705 
706 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)707 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
708 {
709    struct util_debug_callback *dbg = data;
710    va_list args;
711 
712    if (!dbg->debug_message)
713       return;
714 
715    va_start(args, fmt);
716    dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
717    va_end(args);
718 }
719 
720 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)721 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
722 {
723    struct util_debug_callback *dbg = data;
724    va_list args;
725    va_start(args, fmt);
726 
727    if (INTEL_DEBUG(DEBUG_PERF)) {
728       va_list args_copy;
729       va_copy(args_copy, args);
730       vfprintf(stderr, fmt, args_copy);
731       va_end(args_copy);
732    }
733 
734    if (dbg->debug_message) {
735       dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
736    }
737 
738    va_end(args);
739 }
740 
741 static void
iris_detect_kernel_features(struct iris_screen * screen)742 iris_detect_kernel_features(struct iris_screen *screen)
743 {
744    /* Kernel 5.2+ */
745    if (intel_gem_supports_syncobj_wait(screen->fd))
746       screen->kernel_features |= KERNEL_HAS_WAIT_FOR_SUBMIT;
747 }
748 
749 static bool
iris_init_identifier_bo(struct iris_screen * screen)750 iris_init_identifier_bo(struct iris_screen *screen)
751 {
752    void *bo_map;
753 
754    bo_map = iris_bo_map(NULL, screen->workaround_bo, MAP_READ | MAP_WRITE);
755    if (!bo_map)
756       return false;
757 
758    assert(iris_bo_is_real(screen->workaround_bo));
759 
760    screen->workaround_bo->real.kflags |=
761       EXEC_OBJECT_CAPTURE | EXEC_OBJECT_ASYNC;
762    screen->workaround_address = (struct iris_address) {
763       .bo = screen->workaround_bo,
764       .offset = ALIGN(
765          intel_debug_write_identifiers(bo_map, 4096, "Iris") + 8, 8),
766    };
767 
768    iris_bo_unmap(screen->workaround_bo);
769 
770    return true;
771 }
772 
773 struct pipe_screen *
iris_screen_create(int fd,const struct pipe_screen_config * config)774 iris_screen_create(int fd, const struct pipe_screen_config *config)
775 {
776    struct iris_screen *screen = rzalloc(NULL, struct iris_screen);
777    if (!screen)
778       return NULL;
779 
780    if (!intel_get_device_info_from_fd(fd, &screen->devinfo))
781       return NULL;
782    screen->pci_id = screen->devinfo.pci_device_id;
783 
784    p_atomic_set(&screen->refcount, 1);
785 
786    if (screen->devinfo.ver < 8 || screen->devinfo.platform == INTEL_PLATFORM_CHV)
787       return NULL;
788 
789    /* Here are the i915 features we need for Iris (in chronological order) :
790     *    - I915_PARAM_HAS_EXEC_NO_RELOC     (3.10)
791     *    - I915_PARAM_HAS_EXEC_HANDLE_LUT   (3.10)
792     *    - I915_PARAM_HAS_EXEC_BATCH_FIRST  (4.13)
793     *    - I915_PARAM_HAS_EXEC_FENCE_ARRAY  (4.14)
794     *    - I915_PARAM_HAS_CONTEXT_ISOLATION (4.16)
795     *
796     * Checking the last feature availability will include all previous ones.
797     */
798    if (iris_getparam_integer(fd, I915_PARAM_HAS_CONTEXT_ISOLATION) <= 0) {
799       debug_error("Kernel is too old for Iris. Consider upgrading to kernel v4.16.\n");
800       return NULL;
801    }
802 
803    driParseConfigFiles(config->options, config->options_info, 0, "iris",
804                        NULL, NULL, NULL, 0, NULL, 0);
805 
806    bool bo_reuse = false;
807    int bo_reuse_mode = driQueryOptioni(config->options, "bo_reuse");
808    switch (bo_reuse_mode) {
809    case DRI_CONF_BO_REUSE_DISABLED:
810       break;
811    case DRI_CONF_BO_REUSE_ALL:
812       bo_reuse = true;
813       break;
814    }
815 
816    brw_process_intel_debug_variable();
817 
818    screen->bufmgr = iris_bufmgr_get_for_fd(&screen->devinfo, fd, bo_reuse);
819    if (!screen->bufmgr)
820       return NULL;
821 
822    screen->fd = iris_bufmgr_get_fd(screen->bufmgr);
823    screen->winsys_fd = os_dupfd_cloexec(fd);
824 
825    screen->id = iris_bufmgr_create_screen_id(screen->bufmgr);
826 
827    screen->workaround_bo =
828       iris_bo_alloc(screen->bufmgr, "workaround", 4096, 4096,
829                     IRIS_MEMZONE_OTHER, BO_ALLOC_NO_SUBALLOC);
830    if (!screen->workaround_bo)
831       return NULL;
832 
833    if (!iris_init_identifier_bo(screen))
834       return NULL;
835 
836    screen->driconf.dual_color_blend_by_location =
837       driQueryOptionb(config->options, "dual_color_blend_by_location");
838    screen->driconf.disable_throttling =
839       driQueryOptionb(config->options, "disable_throttling");
840    screen->driconf.always_flush_cache =
841       driQueryOptionb(config->options, "always_flush_cache");
842    screen->driconf.sync_compile =
843       driQueryOptionb(config->options, "sync_compile");
844    screen->driconf.limit_trig_input_range =
845       driQueryOptionb(config->options, "limit_trig_input_range");
846 
847    screen->precompile = env_var_as_boolean("shader_precompile", true);
848 
849    isl_device_init(&screen->isl_dev, &screen->devinfo);
850 
851    screen->compiler = brw_compiler_create(screen, &screen->devinfo);
852    screen->compiler->shader_debug_log = iris_shader_debug_log;
853    screen->compiler->shader_perf_log = iris_shader_perf_log;
854    screen->compiler->supports_shader_constants = true;
855    screen->compiler->indirect_ubos_use_sampler = screen->devinfo.ver < 12;
856 
857    screen->l3_config_3d = iris_get_default_l3_config(&screen->devinfo, false);
858    screen->l3_config_cs = iris_get_default_l3_config(&screen->devinfo, true);
859 
860    iris_disk_cache_init(screen);
861 
862    slab_create_parent(&screen->transfer_pool,
863                       sizeof(struct iris_transfer), 64);
864 
865    iris_detect_kernel_features(screen);
866 
867    struct pipe_screen *pscreen = &screen->base;
868 
869    iris_init_screen_fence_functions(pscreen);
870    iris_init_screen_resource_functions(pscreen);
871    iris_init_screen_measure(screen);
872 
873    pscreen->destroy = iris_screen_unref;
874    pscreen->get_name = iris_get_name;
875    pscreen->get_vendor = iris_get_vendor;
876    pscreen->get_device_vendor = iris_get_device_vendor;
877    pscreen->get_param = iris_get_param;
878    pscreen->get_shader_param = iris_get_shader_param;
879    pscreen->get_compute_param = iris_get_compute_param;
880    pscreen->get_paramf = iris_get_paramf;
881    pscreen->get_compiler_options = iris_get_compiler_options;
882    pscreen->get_device_uuid = iris_get_device_uuid;
883    pscreen->get_driver_uuid = iris_get_driver_uuid;
884    pscreen->get_disk_shader_cache = iris_get_disk_shader_cache;
885    pscreen->is_format_supported = iris_is_format_supported;
886    pscreen->context_create = iris_create_context;
887    pscreen->get_timestamp = iris_get_timestamp;
888    pscreen->query_memory_info = iris_query_memory_info;
889    pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
890    pscreen->get_driver_query_info = iris_get_monitor_info;
891    iris_init_screen_program_functions(pscreen);
892 
893    genX_call(&screen->devinfo, init_screen_state, screen);
894 
895    glsl_type_singleton_init_or_ref();
896 
897    intel_driver_ds_init();
898 
899    /* FINISHME: Big core vs little core (for CPUs that have both kinds of
900     * cores) and, possibly, thread vs core should be considered here too.
901     */
902    unsigned compiler_threads = 1;
903    const struct util_cpu_caps_t *caps = util_get_cpu_caps();
904    unsigned hw_threads = caps->nr_cpus;
905 
906    if (hw_threads >= 12) {
907       compiler_threads = hw_threads * 3 / 4;
908    } else if (hw_threads >= 6) {
909       compiler_threads = hw_threads - 2;
910    } else if (hw_threads >= 2) {
911       compiler_threads = hw_threads - 1;
912    }
913 
914    if (!util_queue_init(&screen->shader_compiler_queue,
915                         "sh", 64, compiler_threads,
916                         UTIL_QUEUE_INIT_RESIZE_IF_FULL |
917                         UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY,
918                         NULL)) {
919       iris_screen_destroy(screen);
920       return NULL;
921    }
922 
923    return pscreen;
924 }
925