1 /*
2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2017 Red Hat
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
26 #include "vtn_private.h"
27 #include "GLSL.ext.AMD.h"
28
29 bool
vtn_handle_amd_gcn_shader_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)30 vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
31 const uint32_t *w, unsigned count)
32 {
33 nir_ssa_def *def;
34 switch ((enum GcnShaderAMD)ext_opcode) {
35 case CubeFaceIndexAMD:
36 def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
37 break;
38 case CubeFaceCoordAMD:
39 def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
40 break;
41 case TimeAMD: {
42 def = nir_pack_64_2x32(&b->nb, nir_shader_clock(&b->nb, NIR_SCOPE_SUBGROUP));
43 break;
44 }
45 default:
46 unreachable("Invalid opcode");
47 }
48
49 vtn_push_nir_ssa(b, w[2], def);
50
51 return true;
52 }
53
54 bool
vtn_handle_amd_shader_ballot_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)55 vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode,
56 const uint32_t *w, unsigned count)
57 {
58 unsigned num_args;
59 nir_intrinsic_op op;
60 switch ((enum ShaderBallotAMD)ext_opcode) {
61 case SwizzleInvocationsAMD:
62 num_args = 1;
63 op = nir_intrinsic_quad_swizzle_amd;
64 break;
65 case SwizzleInvocationsMaskedAMD:
66 num_args = 1;
67 op = nir_intrinsic_masked_swizzle_amd;
68 break;
69 case WriteInvocationAMD:
70 num_args = 3;
71 op = nir_intrinsic_write_invocation_amd;
72 break;
73 case MbcntAMD:
74 num_args = 1;
75 op = nir_intrinsic_mbcnt_amd;
76 break;
77 default:
78 unreachable("Invalid opcode");
79 }
80
81 const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type;
82 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
83 nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dest_type, NULL);
84 if (nir_intrinsic_infos[op].src_components[0] == 0)
85 intrin->num_components = intrin->dest.ssa.num_components;
86
87 for (unsigned i = 0; i < num_args; i++)
88 intrin->src[i] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[i + 5]));
89
90 if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) {
91 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
92 unsigned mask = val->constant->values[0].u32 |
93 val->constant->values[1].u32 << 2 |
94 val->constant->values[2].u32 << 4 |
95 val->constant->values[3].u32 << 6;
96 nir_intrinsic_set_swizzle_mask(intrin, mask);
97
98 } else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) {
99 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
100 unsigned mask = val->constant->values[0].u32 |
101 val->constant->values[1].u32 << 5 |
102 val->constant->values[2].u32 << 10;
103 nir_intrinsic_set_swizzle_mask(intrin, mask);
104 } else if (intrin->intrinsic == nir_intrinsic_mbcnt_amd) {
105 /* The v_mbcnt instruction has an additional source that is added to the result.
106 * This is exposed by the NIR intrinsic but not by SPIR-V, so we add zero here.
107 */
108 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
109 }
110
111 nir_builder_instr_insert(&b->nb, &intrin->instr);
112 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
113
114 return true;
115 }
116
117 bool
vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)118 vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ext_opcode,
119 const uint32_t *w, unsigned count)
120 {
121 struct nir_builder *nb = &b->nb;
122
123 unsigned num_inputs = count - 5;
124 assert(num_inputs == 3);
125 nir_ssa_def *src[3] = { NULL, };
126 for (unsigned i = 0; i < num_inputs; i++)
127 src[i] = vtn_get_nir_ssa(b, w[i + 5]);
128
129 /* place constants at src[1-2] for easier constant-folding */
130 for (unsigned i = 1; i <= 2; i++) {
131 if (nir_src_as_const_value(nir_src_for_ssa(src[0]))) {
132 nir_ssa_def* tmp = src[i];
133 src[i] = src[0];
134 src[0] = tmp;
135 }
136 }
137 nir_ssa_def *def;
138 switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
139 case FMin3AMD:
140 def = nir_fmin(nb, src[0], nir_fmin(nb, src[1], src[2]));
141 break;
142 case UMin3AMD:
143 def = nir_umin(nb, src[0], nir_umin(nb, src[1], src[2]));
144 break;
145 case SMin3AMD:
146 def = nir_imin(nb, src[0], nir_imin(nb, src[1], src[2]));
147 break;
148 case FMax3AMD:
149 def = nir_fmax(nb, src[0], nir_fmax(nb, src[1], src[2]));
150 break;
151 case UMax3AMD:
152 def = nir_umax(nb, src[0], nir_umax(nb, src[1], src[2]));
153 break;
154 case SMax3AMD:
155 def = nir_imax(nb, src[0], nir_imax(nb, src[1], src[2]));
156 break;
157 case FMid3AMD:
158 def = nir_fmin(nb, nir_fmax(nb, src[0], nir_fmin(nb, src[1], src[2])),
159 nir_fmax(nb, src[1], src[2]));
160 break;
161 case UMid3AMD:
162 def = nir_umin(nb, nir_umax(nb, src[0], nir_umin(nb, src[1], src[2])),
163 nir_umax(nb, src[1], src[2]));
164 break;
165 case SMid3AMD:
166 def = nir_imin(nb, nir_imax(nb, src[0], nir_imin(nb, src[1], src[2])),
167 nir_imax(nb, src[1], src[2]));
168 break;
169 default:
170 unreachable("unknown opcode\n");
171 break;
172 }
173
174 vtn_push_nir_ssa(b, w[2], def);
175
176 return true;
177 }
178
179 bool
vtn_handle_amd_shader_explicit_vertex_parameter_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)180 vtn_handle_amd_shader_explicit_vertex_parameter_instruction(struct vtn_builder *b, SpvOp ext_opcode,
181 const uint32_t *w, unsigned count)
182 {
183 nir_intrinsic_op op;
184 switch ((enum ShaderExplicitVertexParameterAMD)ext_opcode) {
185 case InterpolateAtVertexAMD:
186 op = nir_intrinsic_interp_deref_at_vertex;
187 break;
188 default:
189 unreachable("unknown opcode");
190 }
191
192 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
193
194 struct vtn_pointer *ptr =
195 vtn_value(b, w[5], vtn_value_type_pointer)->pointer;
196 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
197
198 /* If the value we are interpolating has an index into a vector then
199 * interpolate the vector and index the result of that instead. This is
200 * necessary because the index will get generated as a series of nir_bcsel
201 * instructions so it would no longer be an input variable.
202 */
203 const bool vec_array_deref = deref->deref_type == nir_deref_type_array &&
204 glsl_type_is_vector(nir_deref_instr_parent(deref)->type);
205
206 nir_deref_instr *vec_deref = NULL;
207 if (vec_array_deref) {
208 vec_deref = deref;
209 deref = nir_deref_instr_parent(deref);
210 }
211 intrin->src[0] = nir_src_for_ssa(&deref->dest.ssa);
212 intrin->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
213
214 intrin->num_components = glsl_get_vector_elements(deref->type);
215 nir_ssa_dest_init(&intrin->instr, &intrin->dest,
216 glsl_get_vector_elements(deref->type),
217 glsl_get_bit_size(deref->type), NULL);
218
219 nir_builder_instr_insert(&b->nb, &intrin->instr);
220
221 nir_ssa_def *def;
222 if (vec_array_deref) {
223 assert(vec_deref);
224 def = nir_vector_extract(&b->nb, &intrin->dest.ssa,
225 vec_deref->arr.index.ssa);
226 } else {
227 def = &intrin->dest.ssa;
228 }
229 vtn_push_nir_ssa(b, w[2], def);
230
231 return true;
232 }
233