• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir.h"
8 #include "ac_nir_helpers.h"
9 
10 #include "nir_builder.h"
11 
12 static bool
needs_rounding_mode_16_64(nir_instr * instr)13 needs_rounding_mode_16_64(nir_instr *instr)
14 {
15    if (instr->type != nir_instr_type_alu)
16       return false;
17    nir_alu_instr *alu = nir_instr_as_alu(instr);
18    if (alu->op == nir_op_fquantize2f16)
19       return true;
20    if (alu->def.bit_size != 16 && alu->def.bit_size != 64)
21       return false;
22    if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) != nir_type_float)
23       return false;
24 
25    switch (alu->op) {
26    case nir_op_f2f64:
27    case nir_op_b2f64:
28    case nir_op_f2f16_rtz:
29    case nir_op_b2f16:
30    case nir_op_fsat:
31    case nir_op_fabs:
32    case nir_op_fneg:
33    case nir_op_fsign:
34    case nir_op_ftrunc:
35    case nir_op_fceil:
36    case nir_op_ffloor:
37    case nir_op_ffract:
38    case nir_op_fround_even:
39    case nir_op_fmin:
40    case nir_op_fmax:
41       return false;
42    default:
43       return true;
44    }
45 }
46 
47 static bool
can_use_fmamix(nir_scalar s,enum amd_gfx_level gfx_level)48 can_use_fmamix(nir_scalar s, enum amd_gfx_level gfx_level)
49 {
50    s = nir_scalar_chase_movs(s);
51    if (!list_is_singular(&s.def->uses))
52       return false;
53 
54    if (nir_scalar_is_intrinsic(s) &&
55        nir_scalar_intrinsic_op(s) == nir_intrinsic_load_interpolated_input)
56       return gfx_level >= GFX11;
57 
58    if (!nir_scalar_is_alu(s))
59       return false;
60 
61    switch (nir_scalar_alu_op(s)) {
62    case nir_op_fmul:
63    case nir_op_ffma:
64    case nir_op_fadd:
65    case nir_op_fsub:
66       return true;
67    case nir_op_fsat:
68       return can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level);
69    default:
70       return false;
71    }
72 }
73 
74 static bool
split_pack_half(nir_builder * b,nir_instr * instr,void * param)75 split_pack_half(nir_builder *b, nir_instr *instr, void *param)
76 {
77    enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)param;
78 
79    if (instr->type != nir_instr_type_alu)
80       return false;
81    nir_alu_instr *alu = nir_instr_as_alu(instr);
82    if (alu->op != nir_op_pack_half_2x16_rtz_split && alu->op != nir_op_pack_half_2x16_split)
83       return false;
84 
85    nir_scalar s = nir_get_scalar(&alu->def, 0);
86 
87    if (!can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level) ||
88        !can_use_fmamix(nir_scalar_chase_alu_src(s, 1), gfx_level))
89       return false;
90 
91    b->cursor = nir_before_instr(instr);
92 
93    /* Split pack_half into two f2f16 to create v_fma_mix{lo,hi}_f16
94     * in the backend.
95     */
96    nir_def *lo = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 0));
97    nir_def *hi = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 1));
98    nir_def_replace(&alu->def, nir_pack_32_2x16_split(b, lo, hi));
99    return true;
100 }
101 
102 bool
ac_nir_opt_pack_half(nir_shader * shader,enum amd_gfx_level gfx_level)103 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level)
104 {
105    if (gfx_level < GFX10)
106       return false;
107 
108    unsigned exec_mode = shader->info.float_controls_execution_mode;
109    bool set_mode = false;
110    if (!nir_is_rounding_mode_rtz(exec_mode, 16)) {
111       nir_foreach_function_impl(impl, shader) {
112          nir_foreach_block(block, impl) {
113             nir_foreach_instr(instr, block) {
114                if (needs_rounding_mode_16_64(instr))
115                   return false;
116             }
117          }
118       }
119       set_mode = true;
120    }
121 
122    bool progress = nir_shader_instructions_pass(shader, split_pack_half,
123                                                 nir_metadata_control_flow,
124                                                 &gfx_level);
125 
126    if (set_mode && progress) {
127       exec_mode &= ~(FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
128       exec_mode |= FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64;
129       shader->info.float_controls_execution_mode = exec_mode;
130    }
131    return progress;
132 }
133