1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "ac_nir.h"
8 #include "ac_nir_helpers.h"
9
10 #include "nir_builder.h"
11
12 static bool
needs_rounding_mode_16_64(nir_instr * instr)13 needs_rounding_mode_16_64(nir_instr *instr)
14 {
15 if (instr->type != nir_instr_type_alu)
16 return false;
17 nir_alu_instr *alu = nir_instr_as_alu(instr);
18 if (alu->op == nir_op_fquantize2f16)
19 return true;
20 if (alu->def.bit_size != 16 && alu->def.bit_size != 64)
21 return false;
22 if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) != nir_type_float)
23 return false;
24
25 switch (alu->op) {
26 case nir_op_f2f64:
27 case nir_op_b2f64:
28 case nir_op_f2f16_rtz:
29 case nir_op_b2f16:
30 case nir_op_fsat:
31 case nir_op_fabs:
32 case nir_op_fneg:
33 case nir_op_fsign:
34 case nir_op_ftrunc:
35 case nir_op_fceil:
36 case nir_op_ffloor:
37 case nir_op_ffract:
38 case nir_op_fround_even:
39 case nir_op_fmin:
40 case nir_op_fmax:
41 return false;
42 default:
43 return true;
44 }
45 }
46
47 static bool
can_use_fmamix(nir_scalar s,enum amd_gfx_level gfx_level)48 can_use_fmamix(nir_scalar s, enum amd_gfx_level gfx_level)
49 {
50 s = nir_scalar_chase_movs(s);
51 if (!list_is_singular(&s.def->uses))
52 return false;
53
54 if (nir_scalar_is_intrinsic(s) &&
55 nir_scalar_intrinsic_op(s) == nir_intrinsic_load_interpolated_input)
56 return gfx_level >= GFX11;
57
58 if (!nir_scalar_is_alu(s))
59 return false;
60
61 switch (nir_scalar_alu_op(s)) {
62 case nir_op_fmul:
63 case nir_op_ffma:
64 case nir_op_fadd:
65 case nir_op_fsub:
66 return true;
67 case nir_op_fsat:
68 return can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level);
69 default:
70 return false;
71 }
72 }
73
74 static bool
split_pack_half(nir_builder * b,nir_instr * instr,void * param)75 split_pack_half(nir_builder *b, nir_instr *instr, void *param)
76 {
77 enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)param;
78
79 if (instr->type != nir_instr_type_alu)
80 return false;
81 nir_alu_instr *alu = nir_instr_as_alu(instr);
82 if (alu->op != nir_op_pack_half_2x16_rtz_split && alu->op != nir_op_pack_half_2x16_split)
83 return false;
84
85 nir_scalar s = nir_get_scalar(&alu->def, 0);
86
87 if (!can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level) ||
88 !can_use_fmamix(nir_scalar_chase_alu_src(s, 1), gfx_level))
89 return false;
90
91 b->cursor = nir_before_instr(instr);
92
93 /* Split pack_half into two f2f16 to create v_fma_mix{lo,hi}_f16
94 * in the backend.
95 */
96 nir_def *lo = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 0));
97 nir_def *hi = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 1));
98 nir_def_replace(&alu->def, nir_pack_32_2x16_split(b, lo, hi));
99 return true;
100 }
101
102 bool
ac_nir_opt_pack_half(nir_shader * shader,enum amd_gfx_level gfx_level)103 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level)
104 {
105 if (gfx_level < GFX10)
106 return false;
107
108 unsigned exec_mode = shader->info.float_controls_execution_mode;
109 bool set_mode = false;
110 if (!nir_is_rounding_mode_rtz(exec_mode, 16)) {
111 nir_foreach_function_impl(impl, shader) {
112 nir_foreach_block(block, impl) {
113 nir_foreach_instr(instr, block) {
114 if (needs_rounding_mode_16_64(instr))
115 return false;
116 }
117 }
118 }
119 set_mode = true;
120 }
121
122 bool progress = nir_shader_instructions_pass(shader, split_pack_half,
123 nir_metadata_control_flow,
124 &gfx_level);
125
126 if (set_mode && progress) {
127 exec_mode &= ~(FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
128 exec_mode |= FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64;
129 shader->info.float_controls_execution_mode = exec_mode;
130 }
131 return progress;
132 }
133