• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014, 2015 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 /* the virgl hw tgsi vs what the current gallium want will diverge over time.
25    so add a transform stage to remove things we don't want to send unless
26    the receiver supports it.
27 */
28 
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_scan.h"
32 #include "virgl_context.h"
33 #include "virgl_screen.h"
34 
35 struct virgl_input_temp {
36    enum tgsi_file_type file;
37 
38    /* Index within in the INPUT or SV files, or ~0 if no DCL of this input */
39    unsigned index;
40 
41    /* TGSI_FILE_TEMPORARY index it will be mapped to. */
42    unsigned temp;
43 
44    bool sint;
45 };
46 
47 enum virgl_input_temps {
48    INPUT_TEMP_LAYER,
49    INPUT_TEMP_VIEWPORT_INDEX,
50    INPUT_TEMP_BLOCK_ID,
51    INPUT_TEMP_HELPER_INVOCATION,
52    INPUT_TEMP_COUNT,
53 };
54 
55 struct virgl_transform_context {
56    struct tgsi_transform_context base;
57    struct tgsi_shader_info info;
58 
59    bool cull_enabled;
60    bool has_precise;
61    bool fake_fp64;
62    bool is_separable;
63 
64    unsigned next_temp;
65 
66    unsigned src_temp;
67 
68    unsigned writemask_fixup_outs[5];
69    unsigned writemask_fixup_temps;
70    unsigned num_writemask_fixups;
71 
72    struct virgl_input_temp input_temp[INPUT_TEMP_COUNT];
73 
74    uint32_t *precise_flags;
75 };
76 
77 static void
virgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration * decl,struct virgl_input_temp * input_temp,enum tgsi_semantic semantic_name)78 virgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration *decl,
79                                             struct virgl_input_temp *input_temp,
80                                             enum tgsi_semantic semantic_name)
81 {
82    if (decl->Semantic.Name == semantic_name) {
83       input_temp->file = decl->Declaration.File;
84       input_temp->index = decl->Range.First;
85    }
86 }
87 
88 static void
virgl_tgsi_transform_declaration(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)89 virgl_tgsi_transform_declaration(struct tgsi_transform_context *ctx,
90                                  struct tgsi_full_declaration *decl)
91 {
92    struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
93 
94    switch (decl->Declaration.File) {
95    case TGSI_FILE_CONSTANT:
96       if (decl->Declaration.Dimension) {
97          if (decl->Dim.Index2D == 0)
98             decl->Declaration.Dimension = 0;
99       }
100       break;
101    case TGSI_FILE_INPUT:
102       virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_LAYER],
103                                                    TGSI_SEMANTIC_LAYER);
104       virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX],
105                                                    TGSI_SEMANTIC_VIEWPORT_INDEX);
106       break;
107    case TGSI_FILE_SYSTEM_VALUE:
108       virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_BLOCK_ID],
109                                                    TGSI_SEMANTIC_BLOCK_ID);
110       virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION],
111                                                    TGSI_SEMANTIC_HELPER_INVOCATION);
112       break;
113    case TGSI_FILE_OUTPUT:
114       switch (decl->Semantic.Name) {
115       case TGSI_SEMANTIC_CLIPDIST:
116          vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
117          if (decl->Range.Last != decl->Range.First)
118             vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.Last;
119          break;
120       case TGSI_SEMANTIC_CLIPVERTEX:
121          vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
122          break;
123       case TGSI_SEMANTIC_COLOR:
124          /* Vertex front/backface color output also has issues with writemasking */
125          if (vtctx->base.processor != PIPE_SHADER_FRAGMENT)
126             vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
127          break;
128       }
129       break;
130    case TGSI_FILE_TEMPORARY:
131       vtctx->next_temp = MAX2(vtctx->next_temp, decl->Range.Last + 1);
132       break;
133    default:
134       break;
135    }
136    assert(vtctx->num_writemask_fixups <= ARRAY_SIZE(vtctx->writemask_fixup_outs));
137 
138    ctx->emit_declaration(ctx, decl);
139 }
140 
141 /* for now just strip out the new properties the remote doesn't understand
142    yet */
143 static void
virgl_tgsi_transform_property(struct tgsi_transform_context * ctx,struct tgsi_full_property * prop)144 virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
145                               struct tgsi_full_property *prop)
146 {
147    struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
148    switch (prop->Property.PropertyName) {
149    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
150    case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
151       if (vtctx->cull_enabled)
152     ctx->emit_property(ctx, prop);
153       break;
154    case TGSI_PROPERTY_NEXT_SHADER:
155       break;
156    default:
157       ctx->emit_property(ctx, prop);
158       break;
159    }
160 }
161 
162 static void
virgl_mov_input_temp_sint(struct tgsi_transform_context * ctx,struct virgl_input_temp * temp)163 virgl_mov_input_temp_sint(struct tgsi_transform_context * ctx,
164                           struct virgl_input_temp *temp)
165 {
166    if (temp->index != ~0) {
167       tgsi_transform_op2_inst(ctx, TGSI_OPCODE_IMAX,
168                               TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
169                               temp->file, temp->index,
170                               temp->file, temp->index, 0);
171    }
172 }
173 
174 static void
virgl_mov_input_temp_uint(struct tgsi_transform_context * ctx,struct virgl_input_temp * temp)175 virgl_mov_input_temp_uint(struct tgsi_transform_context * ctx,
176                           struct virgl_input_temp *temp)
177 {
178    if (temp->index != ~0) {
179       tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
180                               TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
181                               temp->file, temp->index);
182    }
183 }
184 
185 static void
virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)186 virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)
187 {
188    struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
189 
190    if (vtctx->is_separable) {
191       struct tgsi_full_property prop = tgsi_default_full_property();
192       prop.Property.PropertyName = TGSI_PROPERTY_SEPARABLE_PROGRAM;
193       prop.Property.NrTokens += 1;
194       prop.u[0].Data = 1;
195       ctx->emit_property(ctx, &prop);
196    }
197 
198    vtctx->src_temp = vtctx->next_temp;
199    vtctx->next_temp += 4;
200    tgsi_transform_temps_decl(ctx, vtctx->src_temp, vtctx->src_temp + 3);
201 
202    if (vtctx->num_writemask_fixups) {
203       vtctx->writemask_fixup_temps = vtctx->next_temp;
204       vtctx->next_temp += vtctx->num_writemask_fixups;
205       tgsi_transform_temps_decl(ctx,
206                                 vtctx->writemask_fixup_temps,
207                                 vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups - 1);
208    }
209 
210    /* Assign input temps before we emit any instructions, but after we parsed
211     * existing temp decls.
212     */
213    for (int i = 0; i < ARRAY_SIZE(vtctx->input_temp); i++) {
214       if (vtctx->input_temp[i].index != ~0) {
215          vtctx->input_temp[i].temp = vtctx->next_temp++;
216          tgsi_transform_temp_decl(ctx, vtctx->input_temp[i].temp);
217       }
218    }
219 
220    /* virglrenderer makes mistakes in the types of layer/viewport input
221     * references from unsigned ops, so we use a temp that we do a no-op signed
222     * op to at the top of the shader.
223     *
224     * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/615
225     */
226    virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_LAYER]);
227    virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX]);
228 
229    /* virglrenderer also makes mistakes in the types of block id input
230     * references from signed ops, so we use a temp that we do a plain MOV to at
231     * the top of the shader.  Also, it falls over if an unused channel's swizzle
232     * uses the .w of the block id.
233     */
234    if (vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index != ~0) {
235       struct tgsi_full_instruction inst = tgsi_default_full_instruction();
236       inst.Instruction.Opcode = TGSI_OPCODE_MOV;
237       inst.Instruction.NumDstRegs = 1;
238       inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
239       inst.Dst[0].Register.Index = vtctx->input_temp[INPUT_TEMP_BLOCK_ID].temp;
240       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
241       inst.Instruction.NumSrcRegs = 1;
242       tgsi_transform_src_reg_xyzw(&inst.Src[0],
243                                   vtctx->input_temp[INPUT_TEMP_BLOCK_ID].file,
244                                   vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index);
245       inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
246       inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
247       inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
248       inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z;
249       ctx->emit_instruction(ctx, &inst);
250    }
251 
252    virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]);
253 
254    vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t));
255 }
256 
257 static void
virgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp * temp,struct tgsi_full_src_register * src)258 virgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp *temp, struct tgsi_full_src_register *src)
259 {
260    if (src->Register.File == temp->file && src->Register.Index == temp->index) {
261       src->Register.File = TGSI_FILE_TEMPORARY;
262       src->Register.Index = temp->temp;
263    }
264 }
265 
266 static void
virgl_tgsi_transform_instruction(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)267 virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
268              struct tgsi_full_instruction *inst)
269 {
270    struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
271    if (vtctx->fake_fp64 &&
272        (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE ||
273         tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE)) {
274       debug_printf("VIRGL: ARB_gpu_shader_fp64 is exposed but not supported.");
275       return;
276    }
277 
278    if (!vtctx->has_precise && inst->Instruction.Precise)
279       inst->Instruction.Precise = 0;
280 
281    /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves,
282     * so that we don't see whether the assignment is from a precise instruction, but
283     * we need to know this to set the output decoration correctly, so propagate the
284     * precise flag with TGSI */
285    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
286       if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) {
287          uint32_t index = inst->Dst[i].Register.Index / 8;
288          uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8);
289 
290          /* Since we re-use temps set and clear the precise flag according to the last use
291           * for the register index and written components. Since moves are not marked
292           * as precise originally, and we may end up with an if/else clause that assignes
293           * a precise result in the if branche, but does a simple move from a constant
294           * on the else branche, we don't clear the flag when we hit a mov.
295           * We do the conservatiove approach here, because virglrenderer emits different temp
296           * ranges, and we don't want to mark all temps as precise only because we have
297           * one precise output */
298          if (inst->Instruction.Precise)
299             vtctx->precise_flags[index] |= bits;
300       } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
301          for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
302             if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) {
303                uint32_t index = inst->Src[i].Register.Index / 8;
304                uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) |
305                                     (1 << inst->Src[i].Register.SwizzleY) |
306                                     (1 << inst->Src[i].Register.SwizzleZ) |
307                                     (1 << inst->Src[i].Register.SwizzleW);
308                uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8);
309                if (vtctx->precise_flags[index] & bits) {
310                   inst->Instruction.Precise = 1;
311                   break;
312                }
313             }
314          }
315       }
316    }
317 
318    /* virglrenderer can run out of space in internal buffers for immediates as
319     * tex operands.  Move the first immediate tex arg to a temp to save space in
320     * the buffer.
321     *
322     * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/582
323     */
324    if (tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
325        inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) {
326       tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
327                               TGSI_FILE_TEMPORARY, vtctx->src_temp,
328                               TGSI_WRITEMASK_XYZW,
329                               inst->Src[0].Register.File,
330                               inst->Src[0].Register.Index);
331       inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
332       inst->Src[0].Register.Index = vtctx->src_temp;
333    }
334 
335    for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
336       /* virglrenderer would fail to compile on clipdist, clipvertex, and some
337        * two-sided-related color writes without a full writemask.  So, we write
338        * to a temp and store that temp with a full writemask.
339        *
340        * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/616
341        */
342       if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT) {
343          for (int j = 0; j < vtctx->num_writemask_fixups; j++) {
344             if (inst->Dst[i].Register.Index == vtctx->writemask_fixup_outs[j]) {
345                inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY;
346                inst->Dst[i].Register.Index = vtctx->writemask_fixup_temps + j;
347                break;
348             }
349          }
350       }
351    }
352 
353    for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) {
354       if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT &&
355           inst->Src[i].Register.Dimension &&
356           inst->Src[i].Dimension.Index == 0)
357          inst->Src[i].Register.Dimension = 0;
358 
359       for (int j = 0; j < ARRAY_SIZE(vtctx->input_temp); j++)
360          virgl_tgsi_rewrite_src_for_input_temp(&vtctx->input_temp[j], &inst->Src[i]);
361 
362       /* virglrenderer double inputs twice, so move them to temps and drop the
363        * swizzle from the double op.
364        */
365       if (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, i) == TGSI_TYPE_DOUBLE) {
366          struct tgsi_full_instruction temp_inst = tgsi_default_full_instruction();
367          temp_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
368          temp_inst.Instruction.NumDstRegs = 1;
369          temp_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
370          temp_inst.Dst[0].Register.Index = vtctx->src_temp + i;
371          temp_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
372          temp_inst.Instruction.NumSrcRegs = 1;
373          tgsi_transform_src_reg_xyzw(&temp_inst.Src[0], inst->Src[i].Register.File, inst->Src[i].Register.Index);
374          temp_inst.Src[0].Register.SwizzleX = inst->Src[i].Register.SwizzleX;
375          temp_inst.Src[0].Register.SwizzleY = inst->Src[i].Register.SwizzleY;
376          temp_inst.Src[0].Register.SwizzleZ = inst->Src[i].Register.SwizzleZ;
377          temp_inst.Src[0].Register.SwizzleW = inst->Src[i].Register.SwizzleW;
378          ctx->emit_instruction(ctx, &temp_inst);
379 
380          inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
381          inst->Src[i].Register.Index = vtctx->src_temp + i;
382          inst->Src[i].Register.SwizzleX = TGSI_SWIZZLE_X;
383          inst->Src[i].Register.SwizzleY = TGSI_SWIZZLE_Y;
384          inst->Src[i].Register.SwizzleZ = TGSI_SWIZZLE_Z;
385          inst->Src[i].Register.SwizzleW = TGSI_SWIZZLE_W;
386       }
387    }
388 
389    /* virglrenderer doesn't resolve non-float output write properly,
390     * so we have to first write to a temporary */
391    if (inst->Instruction.Opcode != TGSI_OPCODE_MOV &&
392        !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
393        !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_store &&
394        inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
395        tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) != TGSI_TYPE_FLOAT)  {
396       struct tgsi_full_instruction op_to_temp = *inst;
397       op_to_temp.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
398       op_to_temp.Dst[0].Register.Index = vtctx->src_temp;
399       op_to_temp.Dst[0].Dimension.Indirect = 0;
400       op_to_temp.Dst[0].Register.Indirect = 0;
401       ctx->emit_instruction(ctx, &op_to_temp);
402 
403       inst->Instruction.Opcode = TGSI_OPCODE_MOV;
404       inst->Instruction.NumSrcRegs = 1;
405 
406       memset(&inst->Src[0], 0, sizeof(inst->Src[0]));
407       inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
408       inst->Src[0].Register.Index = vtctx->src_temp;
409       inst->Src[0].Register.SwizzleY = 1;
410       inst->Src[0].Register.SwizzleZ = 2;
411       inst->Src[0].Register.SwizzleW = 3;
412    }
413 
414    ctx->emit_instruction(ctx, inst);
415 
416    for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
417       if (vtctx->num_writemask_fixups &&
418          inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY &&
419          inst->Dst[i].Register.Index >= vtctx->writemask_fixup_temps &&
420          inst->Dst[i].Register.Index < vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups) {
421          /* Emit the fixup MOV from the clipdist/vert temporary to the real output. */
422          unsigned real_out = vtctx->writemask_fixup_outs[inst->Dst[i].Register.Index - vtctx->writemask_fixup_temps];
423          tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
424                                  TGSI_FILE_OUTPUT, real_out, TGSI_WRITEMASK_XYZW,
425                                  inst->Dst[i].Register.File, inst->Dst[i].Register.Index);
426       }
427    }
428 }
429 
virgl_tgsi_transform(struct virgl_screen * vscreen,const struct tgsi_token * tokens_in,bool is_separable)430 struct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const struct tgsi_token *tokens_in,
431                                         bool is_separable)
432 {
433    struct virgl_transform_context transform;
434    const uint newLen = tgsi_num_tokens(tokens_in);
435 
436    memset(&transform, 0, sizeof(transform));
437    transform.base.transform_declaration = virgl_tgsi_transform_declaration;
438    transform.base.transform_property = virgl_tgsi_transform_property;
439    transform.base.transform_instruction = virgl_tgsi_transform_instruction;
440    transform.base.prolog = virgl_tgsi_transform_prolog;
441    transform.cull_enabled = vscreen->caps.caps.v1.bset.has_cull;
442    transform.has_precise = vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_TGSI_PRECISE;
443    transform.fake_fp64 =
444       vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_FAKE_FP64;
445    transform.is_separable = is_separable && (vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_SSO);
446 
447    for (int i = 0; i < ARRAY_SIZE(transform.input_temp); i++)
448       transform.input_temp[i].index = ~0;
449 
450    tgsi_scan_shader(tokens_in, &transform.info);
451 
452    struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base);
453    free(transform.precise_flags);
454    return new_tokens;
455 
456 }
457