• Home
  • Raw
  • Download

Lines Matching +full:opt +full:- +full:out

3  * SPDX-License-Identifier: MIT
12 #define OPT(nir, pass, ...) ({ \ macro
25 switch (nir->info.stage) { in nak_nir_workgroup_has_one_subgroup()
42 if (nir->info.workgroup_size_variable) in nak_nir_workgroup_has_one_subgroup()
45 uint16_t wg_sz = nir->info.workgroup_size[0] * in nak_nir_workgroup_has_one_subgroup()
46 nir->info.workgroup_size[1] * in nak_nir_workgroup_has_one_subgroup()
47 nir->info.workgroup_size[2]; in nak_nir_workgroup_has_one_subgroup()
63 (nir->options->lower_flrp16 ? 16 : 0) | in optimize_nir()
64 (nir->options->lower_flrp32 ? 32 : 0) | in optimize_nir()
65 (nir->options->lower_flrp64 ? 64 : 0); in optimize_nir()
71 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13955 in optimize_nir()
76 if (nir->info.stage != MESA_SHADER_KERNEL) in optimize_nir()
77 OPT(nir, nir_split_array_vars, nir_var_function_temp); in optimize_nir()
79 OPT(nir, nir_shrink_vec_array_vars, nir_var_function_temp); in optimize_nir()
80 OPT(nir, nir_opt_deref); in optimize_nir()
81 if (OPT(nir, nir_opt_memcpy)) in optimize_nir()
82 OPT(nir, nir_split_var_copies); in optimize_nir()
84 OPT(nir, nir_lower_vars_to_ssa); in optimize_nir()
91 OPT(nir, nir_opt_find_array_copies); in optimize_nir()
93 OPT(nir, nir_opt_copy_prop_vars); in optimize_nir()
94 OPT(nir, nir_opt_dead_write_vars); in optimize_nir()
95 OPT(nir, nir_opt_combine_stores, nir_var_all); in optimize_nir()
97 OPT(nir, nir_lower_alu_to_scalar, NULL, NULL); in optimize_nir()
98 OPT(nir, nir_lower_phis_to_scalar, false); in optimize_nir()
99 OPT(nir, nir_lower_frexp); in optimize_nir()
100 OPT(nir, nir_copy_prop); in optimize_nir()
101 OPT(nir, nir_opt_dce); in optimize_nir()
102 OPT(nir, nir_opt_cse); in optimize_nir()
104 OPT(nir, nir_opt_peephole_select, 0, false, false); in optimize_nir()
105 OPT(nir, nir_opt_intrinsics); in optimize_nir()
106 OPT(nir, nir_opt_idiv_const, 32); in optimize_nir()
107 OPT(nir, nir_opt_algebraic); in optimize_nir()
108 OPT(nir, nir_lower_constant_convert_alu_types); in optimize_nir()
109 OPT(nir, nir_opt_constant_folding); in optimize_nir()
112 if (OPT(nir, nir_lower_flrp, lower_flrp, false /* always_precise */)) in optimize_nir()
113 OPT(nir, nir_opt_constant_folding); in optimize_nir()
118 OPT(nir, nir_opt_dead_cf); in optimize_nir()
119 if (OPT(nir, nir_opt_loop)) { in optimize_nir()
124 OPT(nir, nir_copy_prop); in optimize_nir()
125 OPT(nir, nir_opt_dce); in optimize_nir()
127 OPT(nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); in optimize_nir()
128 OPT(nir, nir_opt_conditional_discard); in optimize_nir()
129 if (nir->options->max_unroll_iterations != 0) { in optimize_nir()
130 OPT(nir, nir_opt_loop_unroll); in optimize_nir()
132 OPT(nir, nir_opt_remove_phis); in optimize_nir()
133 OPT(nir, nir_opt_gcm, false); in optimize_nir()
134 OPT(nir, nir_opt_undef); in optimize_nir()
135 OPT(nir, nir_lower_pack); in optimize_nir()
138 OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); in optimize_nir()
150 switch (instr->type) { in lower_bit_size_cb()
153 if (nir_op_infos[alu->op].is_conversion) in lower_bit_size_cb()
156 switch (alu->op) { in lower_bit_size_cb()
162 * 32-bit and so the bit size of the instruction is given by the in lower_bit_size_cb()
165 return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32; in lower_bit_size_cb()
171 ? alu->src[0].src.ssa->bit_size in lower_bit_size_cb()
172 : alu->def.bit_size; in lower_bit_size_cb()
176 /* TODO: Some hardware has native 16-bit support */ in lower_bit_size_cb()
185 switch (intrin->intrinsic) { in lower_bit_size_cb()
187 if (intrin->src[0].ssa->bit_size != 1 && in lower_bit_size_cb()
188 intrin->src[0].ssa->bit_size < 32) in lower_bit_size_cb()
206 if (intrin->src[0].ssa->bit_size < 32) in lower_bit_size_cb()
217 if (phi->def.bit_size < 32 && phi->def.bit_size != 1) in lower_bit_size_cb()
230 return nir_udiv(b, nir_iadd(b, n, nir_iadd_imm(b, d, -1)), d); in nir_udiv_round_up()
237 switch (intrin->intrinsic) { in nak_nir_lower_subgroup_id_intrin()
239 b->cursor = nir_instr_remove(&intrin->instr); in nak_nir_lower_subgroup_id_intrin()
242 if (nak_nir_workgroup_has_one_subgroup(b->shader)) { in nak_nir_lower_subgroup_id_intrin()
245 assert(b->shader->info.cs.derivative_group == DERIVATIVE_GROUP_NONE); in nak_nir_lower_subgroup_id_intrin()
255 nir_def_rewrite_uses(&intrin->def, num_subgroups); in nak_nir_lower_subgroup_id_intrin()
260 b->cursor = nir_instr_remove(&intrin->instr); in nak_nir_lower_subgroup_id_intrin()
263 if (nak_nir_workgroup_has_one_subgroup(b->shader)) { in nak_nir_lower_subgroup_id_intrin()
266 assert(b->shader->info.cs.derivative_group == DERIVATIVE_GROUP_NONE); in nak_nir_lower_subgroup_id_intrin()
272 nir_def_rewrite_uses(&intrin->def, subgroup_id); in nak_nir_lower_subgroup_id_intrin()
305 OPT(nir, nir_lower_tex, &tex_options); in nak_preprocess_nir()
306 OPT(nir, nir_normalize_cubemap_coords); in nak_preprocess_nir()
311 OPT(nir, nir_lower_image, &image_options); in nak_preprocess_nir()
313 OPT(nir, nir_lower_global_vars_to_local); in nak_preprocess_nir()
315 OPT(nir, nir_split_var_copies); in nak_preprocess_nir()
316 OPT(nir, nir_split_struct_vars, nir_var_function_temp); in nak_preprocess_nir()
321 OPT(nir, nir_lower_load_const_to_scalar); in nak_preprocess_nir()
322 OPT(nir, nir_lower_var_copies); in nak_preprocess_nir()
323 OPT(nir, nir_lower_system_values); in nak_preprocess_nir()
324 OPT(nir, nak_nir_lower_subgroup_id); in nak_preprocess_nir()
325 OPT(nir, nir_lower_compute_system_values, NULL); in nak_preprocess_nir()
332 return NAK_ATTR_GENERIC_START + (attrib - VERT_ATTRIB_GENERIC0) * 0x10; in nak_attribute_attr_addr()
347 var->data.driver_location = in nak_nir_lower_vs_inputs()
348 nak_attribute_attr_addr(var->data.location); in nak_nir_lower_vs_inputs()
351 progress |= OPT(nir, nir_lower_io, nir_var_shader_in, type_size_vec4_bytes, in nak_nir_lower_vs_inputs()
361 return NAK_ATTR_PATCH_START + (slot - VARYING_SLOT_PATCH0) * 0x10; in nak_varying_attr_addr()
363 return NAK_ATTR_GENERIC_START + (slot - VARYING_SLOT_VAR0) * 0x10; in nak_varying_attr_addr()
420 b->cursor = nir_before_instr(&intrin->instr); in nak_nir_lower_system_value_intrin()
423 switch (intrin->intrinsic) { in nak_nir_lower_system_value_intrin()
426 val = nir_load_input(b, intrin->def.num_components, 32, in nak_nir_lower_system_value_intrin()
433 assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || in nak_nir_lower_system_value_intrin()
434 b->shader->info.stage == MESA_SHADER_TESS_EVAL || in nak_nir_lower_system_value_intrin()
435 b->shader->info.stage == MESA_SHADER_GEOMETRY); in nak_nir_lower_system_value_intrin()
447 nir_system_value_from_intrinsic(intrin->intrinsic); in nak_nir_lower_system_value_intrin()
449 val = nir_load_input(b, intrin->def.num_components, 32, in nak_nir_lower_system_value_intrin()
468 nir_system_value_from_intrinsic(intrin->intrinsic); in nak_nir_lower_system_value_intrin()
474 if (intrin->def.bit_size == 64) { in nak_nir_lower_system_value_intrin()
477 assert(intrin->def.bit_size == 32); in nak_nir_lower_system_value_intrin()
478 val = nir_pad_vector_imm_int(b, val, 0, intrin->def.num_components); in nak_nir_lower_system_value_intrin()
490 intrin->intrinsic == nir_intrinsic_load_workgroup_id_zero_base ? in nak_nir_lower_system_value_intrin()
492 nir_system_value_from_intrinsic(intrin->intrinsic); in nak_nir_lower_system_value_intrin()
495 assert(intrin->def.num_components <= 3); in nak_nir_lower_system_value_intrin()
496 for (unsigned c = 0; c < intrin->def.num_components; c++) { in nak_nir_lower_system_value_intrin()
500 val = nir_vec(b, comps, intrin->def.num_components); in nak_nir_lower_system_value_intrin()
505 /* Unlike load_helper_invocation, this one isn't re-orderable */ in nak_nir_lower_system_value_intrin()
532 nir_local_variable_create(b->impl, glsl_uvec2_type(), NULL); in nak_nir_lower_system_value_intrin()
560 if (intrin->def.bit_size == 64) in nak_nir_lower_system_value_intrin()
566 val = nir_imm_int(b, nak->warps_per_sm); in nak_nir_lower_system_value_intrin()
588 if (intrin->def.bit_size == 1) in nak_nir_lower_system_value_intrin()
591 nir_def_rewrite_uses(&intrin->def, val); in nak_nir_lower_system_value_intrin()
612 var->data.driver_location = nak_varying_attr_addr(var->data.location); in nak_nir_lower_varyings()
614 OPT(nir, nir_lower_io, modes, type_size_vec4_bytes, in nak_nir_lower_varyings()
628 u_foreach_bit(b, nir_xfb->buffers_written) { in nak_xfb_from_nir()
629 nak_xfb.stride[b] = nir_xfb->buffers[b].stride; in nak_xfb_from_nir()
630 nak_xfb.stream[b] = nir_xfb->buffer_to_stream[b]; in nak_xfb_from_nir()
634 for (unsigned o = 0; o < nir_xfb->output_count; o++) { in nak_xfb_from_nir()
635 const nir_xfb_output_info *out = &nir_xfb->outputs[o]; in nak_xfb_from_nir() local
636 const uint8_t b = out->buffer; in nak_xfb_from_nir()
637 assert(nir_xfb->buffers_written & BITFIELD_BIT(b)); in nak_xfb_from_nir()
639 const uint16_t attr_addr = nak_varying_attr_addr(out->location); in nak_xfb_from_nir()
643 assert(out->offset % 4 == 0); in nak_xfb_from_nir()
644 uint8_t out_idx = out->offset / 4; in nak_xfb_from_nir()
646 u_foreach_bit(c, out->component_mask) in nak_xfb_from_nir()
686 if (nak->sm >= 70) { in load_interpolated_input()
704 } else if (nak->sm >= 50) { in load_interpolated_input()
727 unreachable("Figure out input interpolation on Kepler"); in load_interpolated_input()
736 nir_imm_int(b, fs_key->sample_locations_cb), in load_sample_pos_at()
737 nir_imm_int(b, fs_key->sample_locations_offset), in load_sample_pos_at()
740 .range = fs_key->sample_locations_offset + 8); in load_sample_pos_at()
758 if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample || in load_barycentric_offset()
759 bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) { in load_barycentric_offset()
760 nir_def *sample_id = bary->src[0].ssa; in load_barycentric_offset()
762 offset_f = nir_fadd_imm(b, sample_pos, -0.5); in load_barycentric_offset()
764 offset_f = bary->src[0].ssa; in load_barycentric_offset()
767 offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5), in load_barycentric_offset()
788 switch (intrin->intrinsic) { in lower_fs_input_intrin()
790 if (!(ctx->fs_key && ctx->fs_key->force_sample_shading)) in lower_fs_input_intrin()
793 intrin->intrinsic = nir_intrinsic_load_barycentric_sample; in lower_fs_input_intrin()
799 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
802 b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID in lower_fs_input_intrin()
805 intrin->intrinsic == nir_intrinsic_load_point_coord ? in lower_fs_input_intrin()
809 nir_def *coord = load_interpolated_input(b, intrin->def.num_components, in lower_fs_input_intrin()
813 ctx->nak); in lower_fs_input_intrin()
815 nir_def_rewrite_uses(&intrin->def, coord); in lower_fs_input_intrin()
816 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
822 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
825 nir_src_as_uint(intrin->src[0]) + in lower_fs_input_intrin()
837 for (unsigned c = 0; c < intrin->def.num_components; c++) { in lower_fs_input_intrin()
841 nir_def *res = nir_vec(b, comps, intrin->def.num_components); in lower_fs_input_intrin()
843 nir_def_rewrite_uses(&intrin->def, res); in lower_fs_input_intrin()
844 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
854 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
868 switch (intrin->intrinsic) { in lower_fs_input_intrin()
872 offset = load_barycentric_offset(b, intrin, ctx->fs_key); in lower_fs_input_intrin()
889 nir_def *res = load_interpolated_input(b, intrin->def.num_components, in lower_fs_input_intrin()
891 inv_w, offset, ctx->nak); in lower_fs_input_intrin()
893 nir_def_rewrite_uses(&intrin->def, res); in lower_fs_input_intrin()
894 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
900 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
903 nir_src_as_uint(intrin->src[1]) + in lower_fs_input_intrin()
906 nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]); in lower_fs_input_intrin()
917 switch (bary->intrinsic) { in lower_fs_input_intrin()
921 offset = load_barycentric_offset(b, bary, ctx->fs_key); in lower_fs_input_intrin()
942 nir_def *res = load_interpolated_input(b, intrin->def.num_components, in lower_fs_input_intrin()
944 inv_w, offset, ctx->nak); in lower_fs_input_intrin()
946 nir_def_rewrite_uses(&intrin->def, res); in lower_fs_input_intrin()
947 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
953 if (!b->shader->info.fs.uses_sample_shading && in lower_fs_input_intrin()
954 !(ctx->fs_key && ctx->fs_key->force_sample_shading)) in lower_fs_input_intrin()
957 b->cursor = nir_after_instr(&intrin->instr); in lower_fs_input_intrin()
962 mask = nir_iand(b, &intrin->def, mask); in lower_fs_input_intrin()
963 nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr); in lower_fs_input_intrin()
969 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
972 nir_def *sample_pos = load_sample_pos_at(b, sample_id, ctx->fs_key); in lower_fs_input_intrin()
974 nir_def_rewrite_uses(&intrin->def, sample_pos); in lower_fs_input_intrin()
975 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
981 b->cursor = nir_before_instr(&intrin->instr); in lower_fs_input_intrin()
983 unsigned vertex_id = nir_src_as_uint(intrin->src[0]); in lower_fs_input_intrin()
987 nir_src_as_uint(intrin->src[1]) + in lower_fs_input_intrin()
991 for (unsigned c = 0; c < intrin->def.num_components; c++) { in lower_fs_input_intrin()
996 nir_def *res = nir_vec(b, comps, intrin->num_components); in lower_fs_input_intrin()
998 nir_def_rewrite_uses(&intrin->def, res); in lower_fs_input_intrin()
999 nir_instr_remove(&intrin->instr); in lower_fs_input_intrin()
1039 if (nir->info.outputs_written == 0) in nak_nir_lower_fs_outputs()
1044 nir->num_outputs = 0; in nak_nir_lower_fs_outputs()
1046 switch (var->data.location) { in nak_nir_lower_fs_outputs()
1048 assert(var->data.index == 0); in nak_nir_lower_fs_outputs()
1049 assert(var->data.location_frac == 0); in nak_nir_lower_fs_outputs()
1050 var->data.driver_location = NAK_FS_OUT_DEPTH; in nak_nir_lower_fs_outputs()
1059 assert(var->data.index == 0); in nak_nir_lower_fs_outputs()
1060 assert(var->data.location_frac == 0); in nak_nir_lower_fs_outputs()
1061 var->data.driver_location = NAK_FS_OUT_SAMPLE_MASK; in nak_nir_lower_fs_outputs()
1064 assert(var->data.location >= FRAG_RESULT_DATA0); in nak_nir_lower_fs_outputs()
1065 assert(var->data.index < 2); in nak_nir_lower_fs_outputs()
1066 const unsigned out = in nak_nir_lower_fs_outputs() local
1067 (var->data.location - FRAG_RESULT_DATA0) + var->data.index; in nak_nir_lower_fs_outputs()
1068 var->data.driver_location = NAK_FS_OUT_COLOR(out); in nak_nir_lower_fs_outputs()
1092 if (low->intrinsic == nir_intrinsic_load_ubo) in nak_mem_vectorize_cb()
1111 /* Reads can over-fetch a bit if the alignment is okay. */ in nak_mem_access_size_align()
1114 bytes_pow2 = 1 << (util_last_bit(bytes) - 1); in nak_mem_access_size_align()
1126 * the 16bit value out. Fortunately, nir_lower_mem_access_bit_sizes() in nak_mem_access_size_align()
1127 * can handle over-alignment for reads. in nak_mem_access_size_align()
1162 if (func->impl && !exec_list_is_empty(&func->impl->locals)) in nir_shader_has_local_variables()
1190 OPT(nir, nir_lower_subgroups, &subgroups_options); in nak_postprocess_nir()
1191 OPT(nir, nak_nir_lower_scan_reduce); in nak_postprocess_nir()
1194 OPT(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, in nak_postprocess_nir()
1196 OPT(nir, nir_lower_explicit_io, nir_var_function_temp, in nak_postprocess_nir()
1201 OPT(nir, nir_opt_shrink_vectors); in nak_postprocess_nir()
1210 OPT(nir, nir_opt_load_store_vectorize, &vectorize_opts); in nak_postprocess_nir()
1216 OPT(nir, nir_lower_mem_access_bit_sizes, &mem_bit_size_options); in nak_postprocess_nir()
1217 OPT(nir, nir_lower_bit_size, lower_bit_size_cb, (void *)nak); in nak_postprocess_nir()
1219 OPT(nir, nir_opt_combine_barriers, NULL, NULL); in nak_postprocess_nir()
1223 OPT(nir, nak_nir_lower_tex, nak); in nak_postprocess_nir()
1224 OPT(nir, nir_lower_idiv, NULL); in nak_postprocess_nir()
1228 OPT(nir, nir_lower_indirect_derefs, 0, UINT32_MAX); in nak_postprocess_nir()
1230 if (nir->info.stage == MESA_SHADER_TESS_EVAL) { in nak_postprocess_nir()
1231 OPT(nir, nir_lower_tess_coord_z, in nak_postprocess_nir()
1232 nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES); in nak_postprocess_nir()
1235 OPT(nir, nak_nir_lower_system_values, nak); in nak_postprocess_nir()
1237 switch (nir->info.stage) { in nak_postprocess_nir()
1239 OPT(nir, nak_nir_lower_vs_inputs); in nak_postprocess_nir()
1240 OPT(nir, nak_nir_lower_varyings, nir_var_shader_out); in nak_postprocess_nir()
1241 OPT(nir, nir_opt_constant_folding); in nak_postprocess_nir()
1242 OPT(nir, nak_nir_lower_vtg_io, nak); in nak_postprocess_nir()
1247 OPT(nir, nak_nir_lower_varyings, nir_var_shader_in | nir_var_shader_out); in nak_postprocess_nir()
1248 OPT(nir, nir_opt_constant_folding); in nak_postprocess_nir()
1249 OPT(nir, nak_nir_lower_vtg_io, nak); in nak_postprocess_nir()
1253 OPT(nir, nak_nir_lower_fs_inputs, nak, fs_key); in nak_postprocess_nir()
1254 OPT(nir, nak_nir_lower_fs_outputs); in nak_postprocess_nir()
1258 OPT(nir, nak_nir_lower_varyings, nir_var_shader_in | nir_var_shader_out); in nak_postprocess_nir()
1259 OPT(nir, nir_opt_constant_folding); in nak_postprocess_nir()
1260 OPT(nir, nak_nir_lower_vtg_io, nak); in nak_postprocess_nir()
1261 OPT(nir, nak_nir_lower_gs_intrinsics); in nak_postprocess_nir()
1272 OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options); in nak_postprocess_nir()
1273 OPT(nir, nir_lower_int64); in nak_postprocess_nir()
1279 OPT(nir, nir_opt_algebraic_late); in nak_postprocess_nir()
1280 OPT(nir, nak_nir_lower_algebraic_late, nak); in nak_postprocess_nir()
1285 if ((nak->nir_options.lower_doubles_options & nir_lower_dsat) && in nak_postprocess_nir()
1286 !(nak->nir_options.lower_doubles_options & nir_lower_dminmax)) in nak_postprocess_nir()
1287 OPT(nir, nir_lower_doubles, NULL, nir_lower_dsat); in nak_postprocess_nir()
1290 OPT(nir, nir_opt_constant_folding); in nak_postprocess_nir()
1291 OPT(nir, nir_copy_prop); in nak_postprocess_nir()
1292 OPT(nir, nir_opt_dce); in nak_postprocess_nir()
1293 OPT(nir, nir_opt_cse); in nak_postprocess_nir()
1299 OPT(nir, nak_nir_add_barriers, nak); in nak_postprocess_nir()
1301 /* Re-index blocks and compact SSA defs because we'll use them to index in nak_postprocess_nir()
1305 if (func->impl) { in nak_postprocess_nir()
1306 nir_index_blocks(func->impl); in nak_postprocess_nir()
1307 nir_index_ssa_defs(func->impl); in nak_postprocess_nir()
1352 .base = nir_scalar_chase_alu_src(addr_s, 1 - i), in nak_get_io_addr_offset()