1 /*
2 * Copyright 2014, 2015 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 /* the virgl hw tgsi vs what the current gallium want will diverge over time.
25 so add a transform stage to remove things we don't want to send unless
26 the receiver supports it.
27 */
28
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_scan.h"
32 #include "virgl_context.h"
33 #include "virgl_screen.h"
34
35 struct virgl_input_temp {
36 enum tgsi_file_type file;
37
38 /* Index within in the INPUT or SV files, or ~0 if no DCL of this input */
39 unsigned index;
40
41 /* TGSI_FILE_TEMPORARY index it will be mapped to. */
42 unsigned temp;
43
44 bool sint;
45 };
46
47 enum virgl_input_temps {
48 INPUT_TEMP_LAYER,
49 INPUT_TEMP_VIEWPORT_INDEX,
50 INPUT_TEMP_BLOCK_ID,
51 INPUT_TEMP_HELPER_INVOCATION,
52 INPUT_TEMP_COUNT,
53 };
54
55 struct virgl_transform_context {
56 struct tgsi_transform_context base;
57 struct tgsi_shader_info info;
58
59 bool cull_enabled;
60 bool has_precise;
61 bool fake_fp64;
62 bool is_separable;
63
64 unsigned next_temp;
65
66 unsigned src_temp;
67
68 unsigned writemask_fixup_outs[5];
69 unsigned writemask_fixup_temps;
70 unsigned num_writemask_fixups;
71
72 struct virgl_input_temp input_temp[INPUT_TEMP_COUNT];
73
74 uint32_t *precise_flags;
75 };
76
77 static void
virgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration * decl,struct virgl_input_temp * input_temp,enum tgsi_semantic semantic_name)78 virgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration *decl,
79 struct virgl_input_temp *input_temp,
80 enum tgsi_semantic semantic_name)
81 {
82 if (decl->Semantic.Name == semantic_name) {
83 input_temp->file = decl->Declaration.File;
84 input_temp->index = decl->Range.First;
85 }
86 }
87
88 static void
virgl_tgsi_transform_declaration(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)89 virgl_tgsi_transform_declaration(struct tgsi_transform_context *ctx,
90 struct tgsi_full_declaration *decl)
91 {
92 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
93
94 switch (decl->Declaration.File) {
95 case TGSI_FILE_CONSTANT:
96 if (decl->Declaration.Dimension) {
97 if (decl->Dim.Index2D == 0)
98 decl->Declaration.Dimension = 0;
99 }
100 break;
101 case TGSI_FILE_INPUT:
102 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_LAYER],
103 TGSI_SEMANTIC_LAYER);
104 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX],
105 TGSI_SEMANTIC_VIEWPORT_INDEX);
106 break;
107 case TGSI_FILE_SYSTEM_VALUE:
108 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_BLOCK_ID],
109 TGSI_SEMANTIC_BLOCK_ID);
110 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION],
111 TGSI_SEMANTIC_HELPER_INVOCATION);
112 break;
113 case TGSI_FILE_OUTPUT:
114 switch (decl->Semantic.Name) {
115 case TGSI_SEMANTIC_CLIPDIST:
116 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
117 if (decl->Range.Last != decl->Range.First)
118 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.Last;
119 break;
120 case TGSI_SEMANTIC_CLIPVERTEX:
121 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
122 break;
123 case TGSI_SEMANTIC_COLOR:
124 /* Vertex front/backface color output also has issues with writemasking */
125 if (vtctx->base.processor != PIPE_SHADER_FRAGMENT)
126 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
127 break;
128 }
129 break;
130 case TGSI_FILE_TEMPORARY:
131 vtctx->next_temp = MAX2(vtctx->next_temp, decl->Range.Last + 1);
132 break;
133 default:
134 break;
135 }
136 assert(vtctx->num_writemask_fixups <= ARRAY_SIZE(vtctx->writemask_fixup_outs));
137
138 ctx->emit_declaration(ctx, decl);
139 }
140
141 /* for now just strip out the new properties the remote doesn't understand
142 yet */
143 static void
virgl_tgsi_transform_property(struct tgsi_transform_context * ctx,struct tgsi_full_property * prop)144 virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
145 struct tgsi_full_property *prop)
146 {
147 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
148 switch (prop->Property.PropertyName) {
149 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
150 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
151 if (vtctx->cull_enabled)
152 ctx->emit_property(ctx, prop);
153 break;
154 case TGSI_PROPERTY_NEXT_SHADER:
155 break;
156 default:
157 ctx->emit_property(ctx, prop);
158 break;
159 }
160 }
161
162 static void
virgl_mov_input_temp_sint(struct tgsi_transform_context * ctx,struct virgl_input_temp * temp)163 virgl_mov_input_temp_sint(struct tgsi_transform_context * ctx,
164 struct virgl_input_temp *temp)
165 {
166 if (temp->index != ~0) {
167 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_IMAX,
168 TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
169 temp->file, temp->index,
170 temp->file, temp->index, 0);
171 }
172 }
173
174 static void
virgl_mov_input_temp_uint(struct tgsi_transform_context * ctx,struct virgl_input_temp * temp)175 virgl_mov_input_temp_uint(struct tgsi_transform_context * ctx,
176 struct virgl_input_temp *temp)
177 {
178 if (temp->index != ~0) {
179 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
180 TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
181 temp->file, temp->index);
182 }
183 }
184
185 static void
virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)186 virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)
187 {
188 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
189
190 if (vtctx->is_separable) {
191 struct tgsi_full_property prop = tgsi_default_full_property();
192 prop.Property.PropertyName = TGSI_PROPERTY_SEPARABLE_PROGRAM;
193 prop.Property.NrTokens += 1;
194 prop.u[0].Data = 1;
195 ctx->emit_property(ctx, &prop);
196 }
197
198 vtctx->src_temp = vtctx->next_temp;
199 vtctx->next_temp += 4;
200 tgsi_transform_temps_decl(ctx, vtctx->src_temp, vtctx->src_temp + 3);
201
202 if (vtctx->num_writemask_fixups) {
203 vtctx->writemask_fixup_temps = vtctx->next_temp;
204 vtctx->next_temp += vtctx->num_writemask_fixups;
205 tgsi_transform_temps_decl(ctx,
206 vtctx->writemask_fixup_temps,
207 vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups - 1);
208 }
209
210 /* Assign input temps before we emit any instructions, but after we parsed
211 * existing temp decls.
212 */
213 for (int i = 0; i < ARRAY_SIZE(vtctx->input_temp); i++) {
214 if (vtctx->input_temp[i].index != ~0) {
215 vtctx->input_temp[i].temp = vtctx->next_temp++;
216 tgsi_transform_temp_decl(ctx, vtctx->input_temp[i].temp);
217 }
218 }
219
220 /* virglrenderer makes mistakes in the types of layer/viewport input
221 * references from unsigned ops, so we use a temp that we do a no-op signed
222 * op to at the top of the shader.
223 *
224 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/615
225 */
226 virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_LAYER]);
227 virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX]);
228
229 /* virglrenderer also makes mistakes in the types of block id input
230 * references from signed ops, so we use a temp that we do a plain MOV to at
231 * the top of the shader. Also, it falls over if an unused channel's swizzle
232 * uses the .w of the block id.
233 */
234 if (vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index != ~0) {
235 struct tgsi_full_instruction inst = tgsi_default_full_instruction();
236 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
237 inst.Instruction.NumDstRegs = 1;
238 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
239 inst.Dst[0].Register.Index = vtctx->input_temp[INPUT_TEMP_BLOCK_ID].temp;
240 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
241 inst.Instruction.NumSrcRegs = 1;
242 tgsi_transform_src_reg_xyzw(&inst.Src[0],
243 vtctx->input_temp[INPUT_TEMP_BLOCK_ID].file,
244 vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index);
245 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
246 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
247 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
248 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z;
249 ctx->emit_instruction(ctx, &inst);
250 }
251
252 virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]);
253
254 vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t));
255 }
256
257 static void
virgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp * temp,struct tgsi_full_src_register * src)258 virgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp *temp, struct tgsi_full_src_register *src)
259 {
260 if (src->Register.File == temp->file && src->Register.Index == temp->index) {
261 src->Register.File = TGSI_FILE_TEMPORARY;
262 src->Register.Index = temp->temp;
263 }
264 }
265
266 static void
virgl_tgsi_transform_instruction(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)267 virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
268 struct tgsi_full_instruction *inst)
269 {
270 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
271 if (vtctx->fake_fp64 &&
272 (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE ||
273 tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE)) {
274 debug_printf("VIRGL: ARB_gpu_shader_fp64 is exposed but not supported.");
275 return;
276 }
277
278 if (!vtctx->has_precise && inst->Instruction.Precise)
279 inst->Instruction.Precise = 0;
280
281 /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves,
282 * so that we don't see whether the assignment is from a precise instruction, but
283 * we need to know this to set the output decoration correctly, so propagate the
284 * precise flag with TGSI */
285 for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
286 if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) {
287 uint32_t index = inst->Dst[i].Register.Index / 8;
288 uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8);
289
290 /* Since we re-use temps set and clear the precise flag according to the last use
291 * for the register index and written components. Since moves are not marked
292 * as precise originally, and we may end up with an if/else clause that assignes
293 * a precise result in the if branche, but does a simple move from a constant
294 * on the else branche, we don't clear the flag when we hit a mov.
295 * We do the conservatiove approach here, because virglrenderer emits different temp
296 * ranges, and we don't want to mark all temps as precise only because we have
297 * one precise output */
298 if (inst->Instruction.Precise)
299 vtctx->precise_flags[index] |= bits;
300 } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
301 for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
302 if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) {
303 uint32_t index = inst->Src[i].Register.Index / 8;
304 uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) |
305 (1 << inst->Src[i].Register.SwizzleY) |
306 (1 << inst->Src[i].Register.SwizzleZ) |
307 (1 << inst->Src[i].Register.SwizzleW);
308 uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8);
309 if (vtctx->precise_flags[index] & bits) {
310 inst->Instruction.Precise = 1;
311 break;
312 }
313 }
314 }
315 }
316 }
317
318 /* virglrenderer can run out of space in internal buffers for immediates as
319 * tex operands. Move the first immediate tex arg to a temp to save space in
320 * the buffer.
321 *
322 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/582
323 */
324 if (tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
325 inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) {
326 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
327 TGSI_FILE_TEMPORARY, vtctx->src_temp,
328 TGSI_WRITEMASK_XYZW,
329 inst->Src[0].Register.File,
330 inst->Src[0].Register.Index);
331 inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
332 inst->Src[0].Register.Index = vtctx->src_temp;
333 }
334
335 for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
336 /* virglrenderer would fail to compile on clipdist, clipvertex, and some
337 * two-sided-related color writes without a full writemask. So, we write
338 * to a temp and store that temp with a full writemask.
339 *
340 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/616
341 */
342 if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT) {
343 for (int j = 0; j < vtctx->num_writemask_fixups; j++) {
344 if (inst->Dst[i].Register.Index == vtctx->writemask_fixup_outs[j]) {
345 inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY;
346 inst->Dst[i].Register.Index = vtctx->writemask_fixup_temps + j;
347 break;
348 }
349 }
350 }
351 }
352
353 for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) {
354 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT &&
355 inst->Src[i].Register.Dimension &&
356 inst->Src[i].Dimension.Index == 0)
357 inst->Src[i].Register.Dimension = 0;
358
359 for (int j = 0; j < ARRAY_SIZE(vtctx->input_temp); j++)
360 virgl_tgsi_rewrite_src_for_input_temp(&vtctx->input_temp[j], &inst->Src[i]);
361
362 /* virglrenderer double inputs twice, so move them to temps and drop the
363 * swizzle from the double op.
364 */
365 if (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, i) == TGSI_TYPE_DOUBLE) {
366 struct tgsi_full_instruction temp_inst = tgsi_default_full_instruction();
367 temp_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
368 temp_inst.Instruction.NumDstRegs = 1;
369 temp_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
370 temp_inst.Dst[0].Register.Index = vtctx->src_temp + i;
371 temp_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
372 temp_inst.Instruction.NumSrcRegs = 1;
373 tgsi_transform_src_reg_xyzw(&temp_inst.Src[0], inst->Src[i].Register.File, inst->Src[i].Register.Index);
374 temp_inst.Src[0].Register.SwizzleX = inst->Src[i].Register.SwizzleX;
375 temp_inst.Src[0].Register.SwizzleY = inst->Src[i].Register.SwizzleY;
376 temp_inst.Src[0].Register.SwizzleZ = inst->Src[i].Register.SwizzleZ;
377 temp_inst.Src[0].Register.SwizzleW = inst->Src[i].Register.SwizzleW;
378 ctx->emit_instruction(ctx, &temp_inst);
379
380 inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
381 inst->Src[i].Register.Index = vtctx->src_temp + i;
382 inst->Src[i].Register.SwizzleX = TGSI_SWIZZLE_X;
383 inst->Src[i].Register.SwizzleY = TGSI_SWIZZLE_Y;
384 inst->Src[i].Register.SwizzleZ = TGSI_SWIZZLE_Z;
385 inst->Src[i].Register.SwizzleW = TGSI_SWIZZLE_W;
386 }
387 }
388
389 /* virglrenderer doesn't resolve non-float output write properly,
390 * so we have to first write to a temporary */
391 if (inst->Instruction.Opcode != TGSI_OPCODE_MOV &&
392 !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
393 !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_store &&
394 inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
395 tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) != TGSI_TYPE_FLOAT) {
396 struct tgsi_full_instruction op_to_temp = *inst;
397 op_to_temp.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
398 op_to_temp.Dst[0].Register.Index = vtctx->src_temp;
399 op_to_temp.Dst[0].Dimension.Indirect = 0;
400 op_to_temp.Dst[0].Register.Indirect = 0;
401 ctx->emit_instruction(ctx, &op_to_temp);
402
403 inst->Instruction.Opcode = TGSI_OPCODE_MOV;
404 inst->Instruction.NumSrcRegs = 1;
405
406 memset(&inst->Src[0], 0, sizeof(inst->Src[0]));
407 inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
408 inst->Src[0].Register.Index = vtctx->src_temp;
409 inst->Src[0].Register.SwizzleY = 1;
410 inst->Src[0].Register.SwizzleZ = 2;
411 inst->Src[0].Register.SwizzleW = 3;
412 }
413
414 ctx->emit_instruction(ctx, inst);
415
416 for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
417 if (vtctx->num_writemask_fixups &&
418 inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY &&
419 inst->Dst[i].Register.Index >= vtctx->writemask_fixup_temps &&
420 inst->Dst[i].Register.Index < vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups) {
421 /* Emit the fixup MOV from the clipdist/vert temporary to the real output. */
422 unsigned real_out = vtctx->writemask_fixup_outs[inst->Dst[i].Register.Index - vtctx->writemask_fixup_temps];
423 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
424 TGSI_FILE_OUTPUT, real_out, TGSI_WRITEMASK_XYZW,
425 inst->Dst[i].Register.File, inst->Dst[i].Register.Index);
426 }
427 }
428 }
429
virgl_tgsi_transform(struct virgl_screen * vscreen,const struct tgsi_token * tokens_in,bool is_separable)430 struct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const struct tgsi_token *tokens_in,
431 bool is_separable)
432 {
433 struct virgl_transform_context transform;
434 const uint newLen = tgsi_num_tokens(tokens_in);
435
436 memset(&transform, 0, sizeof(transform));
437 transform.base.transform_declaration = virgl_tgsi_transform_declaration;
438 transform.base.transform_property = virgl_tgsi_transform_property;
439 transform.base.transform_instruction = virgl_tgsi_transform_instruction;
440 transform.base.prolog = virgl_tgsi_transform_prolog;
441 transform.cull_enabled = vscreen->caps.caps.v1.bset.has_cull;
442 transform.has_precise = vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_TGSI_PRECISE;
443 transform.fake_fp64 =
444 vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_FAKE_FP64;
445 transform.is_separable = is_separable && (vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_SSO);
446
447 for (int i = 0; i < ARRAY_SIZE(transform.input_temp); i++)
448 transform.input_temp[i].index = ~0;
449
450 tgsi_scan_shader(tokens_in, &transform.info);
451
452 struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base);
453 free(transform.precise_flags);
454 return new_tokens;
455
456 }
457