• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015-2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_compiler.h"
25 #include "brw_shader.h"
26 #include "brw_eu.h"
27 #include "brw_nir.h"
28 #include "dev/intel_debug.h"
29 #include "compiler/nir/nir.h"
30 #include "util/u_debug.h"
31 
32 const struct nir_shader_compiler_options brw_scalar_nir_options = {
33    .avoid_ternary_with_two_constants = true,
34    .divergence_analysis_options =
35       (nir_divergence_single_patch_per_tcs_subgroup |
36        nir_divergence_single_patch_per_tes_subgroup |
37        nir_divergence_shader_record_ptr_uniform),
38    .force_indirect_unrolling = nir_var_function_temp,
39    .has_bfe = true,
40    .has_bfi = true,
41    .has_bfm = true,
42    .has_pack_32_4x8 = true,
43    .has_uclz = true,
44    .lower_base_vertex = true,
45    .lower_bitfield_extract = true,
46    .lower_bitfield_insert = true,
47    .lower_device_index_to_zero = true,
48    .lower_fdiv = true,
49    .lower_fisnormal = true,
50    .lower_flrp16 = true,
51    .lower_flrp64 = true,
52    .lower_fmod = true,
53    .lower_hadd64 = true,
54    .lower_insert_byte = true,
55    .lower_insert_word = true,
56    .lower_isign = true,
57    .lower_ldexp = true,
58    .lower_pack_half_2x16 = true,
59    .lower_pack_snorm_2x16 = true,
60    .lower_pack_snorm_4x8 = true,
61    .lower_pack_unorm_2x16 = true,
62    .lower_pack_unorm_4x8 = true,
63    .lower_scmp = true,
64    .lower_to_scalar = true,
65    .lower_uadd_carry = true,
66    .lower_ufind_msb = true,
67    .lower_uniforms_to_ubo = true,
68    .lower_unpack_half_2x16 = true,
69    .lower_unpack_snorm_2x16 = true,
70    .lower_unpack_snorm_4x8 = true,
71    .lower_unpack_unorm_2x16 = true,
72    .lower_unpack_unorm_4x8 = true,
73    .lower_usub_borrow = true,
74    .max_unroll_iterations = 32,
75    .support_16bit_alu = true,
76    .use_interpolated_input_intrinsics = true,
77    .vectorize_io = true,
78    .vectorize_tess_levels = true,
79    .vertex_id_zero_based = true,
80 };
81 
82 struct brw_compiler *
brw_compiler_create(void * mem_ctx,const struct intel_device_info * devinfo)83 brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
84 {
85    struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
86    assert(devinfo->ver >= 9);
87 
88    compiler->devinfo = devinfo;
89 
90    brw_init_isa_info(&compiler->isa, devinfo);
91 
92    brw_fs_alloc_reg_sets(compiler);
93 
94    compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false);
95 
96    compiler->use_tcs_multi_patch = devinfo->ver >= 12;
97 
98    /* Default to the sampler since that's what we've done since forever */
99    compiler->indirect_ubos_use_sampler = true;
100 
101    compiler->lower_dpas = devinfo->verx10 < 125 ||
102       intel_device_info_is_mtl(devinfo) ||
103       (intel_device_info_is_arl(devinfo) &&
104        devinfo->platform != INTEL_PLATFORM_ARL_H) ||
105       debug_get_bool_option("INTEL_LOWER_DPAS", false);
106 
107    nir_lower_int64_options int64_options =
108       nir_lower_imul64 |
109       nir_lower_isign64 |
110       nir_lower_divmod64 |
111       nir_lower_imul_high64 |
112       nir_lower_find_lsb64 |
113       nir_lower_ufind_msb64 |
114       nir_lower_bit_count64;
115    nir_lower_doubles_options fp64_options =
116       nir_lower_drcp |
117       nir_lower_dsqrt |
118       nir_lower_drsq |
119       nir_lower_dtrunc |
120       nir_lower_dfloor |
121       nir_lower_dceil |
122       nir_lower_dfract |
123       nir_lower_dround_even |
124       nir_lower_dmod |
125       nir_lower_dsub |
126       nir_lower_ddiv;
127 
128    if (!devinfo->has_64bit_float || INTEL_DEBUG(DEBUG_SOFT64))
129       fp64_options |= nir_lower_fp64_full_software;
130    if (!devinfo->has_64bit_int)
131       int64_options |= (nir_lower_int64_options)~0;
132 
133    /* The Bspec's section titled "Instruction_multiply[DevBDW+]" claims that
134     * destination type can be Quadword and source type Doubleword for Gfx8 and
135     * Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
136     */
137    if (devinfo->ver > 9)
138       int64_options |= nir_lower_imul_2x32_64;
139 
140    /* We want the GLSL compiler to emit code that uses condition codes */
141    for (int i = 0; i < MESA_ALL_SHADER_STAGES; i++) {
142       struct nir_shader_compiler_options *nir_options =
143          rzalloc(compiler, struct nir_shader_compiler_options);
144       *nir_options = brw_scalar_nir_options;
145       int64_options |= nir_lower_usub_sat64;
146 
147       /* Gfx11 loses LRP. */
148       nir_options->lower_flrp32 = devinfo->ver >= 11;
149 
150       nir_options->lower_fpow = devinfo->ver >= 12;
151 
152       nir_options->has_rotate16 = devinfo->ver >= 11;
153       nir_options->has_rotate32 = devinfo->ver >= 11;
154       nir_options->has_iadd3 = devinfo->verx10 >= 125;
155 
156       nir_options->has_sdot_4x8 = devinfo->ver >= 12;
157       nir_options->has_udot_4x8 = devinfo->ver >= 12;
158       nir_options->has_sudot_4x8 = devinfo->ver >= 12;
159       nir_options->has_sdot_4x8_sat = devinfo->ver >= 12;
160       nir_options->has_udot_4x8_sat = devinfo->ver >= 12;
161       nir_options->has_sudot_4x8_sat = devinfo->ver >= 12;
162 
163       nir_options->lower_int64_options = int64_options;
164       nir_options->lower_doubles_options = fp64_options;
165 
166       nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
167 
168       nir_options->force_indirect_unrolling |=
169          brw_nir_no_indirect_mask(compiler, i);
170 
171       if (compiler->use_tcs_multi_patch) {
172          /* TCS MULTI_PATCH mode has multiple patches per subgroup */
173          nir_options->divergence_analysis_options &=
174             ~nir_divergence_single_patch_per_tcs_subgroup;
175       }
176 
177       if (devinfo->ver < 12)
178          nir_options->divergence_analysis_options |=
179             nir_divergence_single_prim_per_subgroup;
180 
181       compiler->nir_options[i] = nir_options;
182    }
183 
184    compiler->mesh.mue_header_packing =
185          (unsigned)debug_get_num_option("INTEL_MESH_HEADER_PACKING", 3);
186    compiler->mesh.mue_compaction =
187          debug_get_bool_option("INTEL_MESH_COMPACTION", true);
188 
189    return compiler;
190 }
191 
192 static void
insert_u64_bit(uint64_t * val,bool add)193 insert_u64_bit(uint64_t *val, bool add)
194 {
195    *val = (*val << 1) | !!add;
196 }
197 
198 uint64_t
brw_get_compiler_config_value(const struct brw_compiler * compiler)199 brw_get_compiler_config_value(const struct brw_compiler *compiler)
200 {
201    uint64_t config = 0;
202    unsigned bits = 0;
203 
204    insert_u64_bit(&config, compiler->precise_trig);
205    bits++;
206    insert_u64_bit(&config, compiler->lower_dpas);
207    bits++;
208    insert_u64_bit(&config, compiler->mesh.mue_compaction);
209    bits++;
210 
211    uint64_t mask = DEBUG_DISK_CACHE_MASK;
212    bits += util_bitcount64(mask);
213 
214    u_foreach_bit64(bit, mask)
215       insert_u64_bit(&config, INTEL_DEBUG(1ULL << bit));
216 
217    mask = SIMD_DISK_CACHE_MASK;
218    bits += util_bitcount64(mask);
219 
220    u_foreach_bit64(bit, mask)
221       insert_u64_bit(&config, (intel_simd & (1ULL << bit)) != 0);
222 
223    mask = 3;
224    bits += util_bitcount64(mask);
225 
226    u_foreach_bit64(bit, mask)
227       insert_u64_bit(&config, (compiler->mesh.mue_header_packing & (1ULL << bit)) != 0);
228 
229    assert(bits <= util_bitcount64(UINT64_MAX));
230 
231    return config;
232 }
233 
234 void
brw_device_sha1(char * hex,const struct intel_device_info * devinfo)235 brw_device_sha1(char *hex,
236                 const struct intel_device_info *devinfo) {
237    struct mesa_sha1 ctx;
238    _mesa_sha1_init(&ctx);
239    brw_device_sha1_update(&ctx, devinfo);
240    unsigned char result[20];
241    _mesa_sha1_final(&ctx, result);
242    _mesa_sha1_format(hex, result);
243 }
244 
245 unsigned
brw_prog_data_size(gl_shader_stage stage)246 brw_prog_data_size(gl_shader_stage stage)
247 {
248    static const size_t stage_sizes[] = {
249       [MESA_SHADER_VERTEX]       = sizeof(struct brw_vs_prog_data),
250       [MESA_SHADER_TESS_CTRL]    = sizeof(struct brw_tcs_prog_data),
251       [MESA_SHADER_TESS_EVAL]    = sizeof(struct brw_tes_prog_data),
252       [MESA_SHADER_GEOMETRY]     = sizeof(struct brw_gs_prog_data),
253       [MESA_SHADER_FRAGMENT]     = sizeof(struct brw_wm_prog_data),
254       [MESA_SHADER_COMPUTE]      = sizeof(struct brw_cs_prog_data),
255       [MESA_SHADER_TASK]         = sizeof(struct brw_task_prog_data),
256       [MESA_SHADER_MESH]         = sizeof(struct brw_mesh_prog_data),
257       [MESA_SHADER_RAYGEN]       = sizeof(struct brw_bs_prog_data),
258       [MESA_SHADER_ANY_HIT]      = sizeof(struct brw_bs_prog_data),
259       [MESA_SHADER_CLOSEST_HIT]  = sizeof(struct brw_bs_prog_data),
260       [MESA_SHADER_MISS]         = sizeof(struct brw_bs_prog_data),
261       [MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_data),
262       [MESA_SHADER_CALLABLE]     = sizeof(struct brw_bs_prog_data),
263       [MESA_SHADER_KERNEL]       = sizeof(struct brw_cs_prog_data),
264    };
265    assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
266    return stage_sizes[stage];
267 }
268 
269 unsigned
brw_prog_key_size(gl_shader_stage stage)270 brw_prog_key_size(gl_shader_stage stage)
271 {
272    static const size_t stage_sizes[] = {
273       [MESA_SHADER_VERTEX]       = sizeof(struct brw_vs_prog_key),
274       [MESA_SHADER_TESS_CTRL]    = sizeof(struct brw_tcs_prog_key),
275       [MESA_SHADER_TESS_EVAL]    = sizeof(struct brw_tes_prog_key),
276       [MESA_SHADER_GEOMETRY]     = sizeof(struct brw_gs_prog_key),
277       [MESA_SHADER_FRAGMENT]     = sizeof(struct brw_wm_prog_key),
278       [MESA_SHADER_COMPUTE]      = sizeof(struct brw_cs_prog_key),
279       [MESA_SHADER_TASK]         = sizeof(struct brw_task_prog_key),
280       [MESA_SHADER_MESH]         = sizeof(struct brw_mesh_prog_key),
281       [MESA_SHADER_RAYGEN]       = sizeof(struct brw_bs_prog_key),
282       [MESA_SHADER_ANY_HIT]      = sizeof(struct brw_bs_prog_key),
283       [MESA_SHADER_CLOSEST_HIT]  = sizeof(struct brw_bs_prog_key),
284       [MESA_SHADER_MISS]         = sizeof(struct brw_bs_prog_key),
285       [MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_key),
286       [MESA_SHADER_CALLABLE]     = sizeof(struct brw_bs_prog_key),
287       [MESA_SHADER_KERNEL]       = sizeof(struct brw_cs_prog_key),
288    };
289    assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
290    return stage_sizes[stage];
291 }
292 
293 void
brw_write_shader_relocs(const struct brw_isa_info * isa,void * program,const struct brw_stage_prog_data * prog_data,struct brw_shader_reloc_value * values,unsigned num_values)294 brw_write_shader_relocs(const struct brw_isa_info *isa,
295                         void *program,
296                         const struct brw_stage_prog_data *prog_data,
297                         struct brw_shader_reloc_value *values,
298                         unsigned num_values)
299 {
300    for (unsigned i = 0; i < prog_data->num_relocs; i++) {
301       assert(prog_data->relocs[i].offset % 8 == 0);
302       void *dst = program + prog_data->relocs[i].offset;
303       for (unsigned j = 0; j < num_values; j++) {
304          if (prog_data->relocs[i].id == values[j].id) {
305             uint32_t value = values[j].value + prog_data->relocs[i].delta;
306             switch (prog_data->relocs[i].type) {
307             case BRW_SHADER_RELOC_TYPE_U32:
308                *(uint32_t *)dst = value;
309                break;
310             case BRW_SHADER_RELOC_TYPE_MOV_IMM:
311                brw_update_reloc_imm(isa, dst, value);
312                break;
313             default:
314                unreachable("Invalid relocation type");
315             }
316             break;
317          }
318       }
319    }
320 }
321